├── lib
    ├── selector
    │   ├── parser
    │   │   ├── pseudo
    │   │   │   ├── custom_ident.ex
    │   │   │   ├── element_type.ex
    │   │   │   ├── compound_selector.ex
    │   │   │   ├── part_name_list.ex
    │   │   │   ├── pt_name_selector.ex
    │   │   │   ├── relative_selector_list.ex
    │   │   │   ├── selector_list.ex
    │   │   │   ├── selector.ex
    │   │   │   ├── direction.ex
    │   │   │   ├── direction_type.ex
    │   │   │   ├── name.ex
    │   │   │   ├── language_code.ex
    │   │   │   └── nth_formula.ex
    │   │   ├── utils.ex
    │   │   ├── combinator.ex
    │   │   ├── class.ex
    │   │   ├── tag_name.ex
    │   │   ├── hex.ex
    │   │   ├── id.ex
    │   │   ├── selector.ex
    │   │   ├── pseudo.ex
    │   │   ├── attribute.ex
    │   │   └── guards.ex
    │   ├── parser.ex
    │   └── renderer.ex
    └── selector.ex
├── test
    ├── test_helper.exs
    ├── selector_test.exs
    └── selector
    │   ├── render_test.exs
    │   ├── parser_test.exs
    │   └── parser
    │       └── guards_test.exs
├── assets
    └── logo.png
├── .formatter.exs
├── .gitignore
├── LICENSE.md
├── mix.exs
├── mix.lock
├── test_selector_guard.exs
└── README.md


/lib/selector/parser/pseudo/custom_ident.ex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/element_type.ex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/compound_selector.ex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/part_name_list.ex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/pt_name_selector.ex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/relative_selector_list.ex:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liveview-native/selector/HEAD/assets/logo.png


--------------------------------------------------------------------------------
/test/selector_test.exs:
--------------------------------------------------------------------------------
1 | defmodule SelectorTest do
2 |   use ExUnit.Case
3 |   doctest Selector
4 | end
5 | 


--------------------------------------------------------------------------------
/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/selector_list.ex:
--------------------------------------------------------------------------------
1 | defmodule Selector.Parser.Pseudo.SelectorList do
2 |   @moduledoc false
3 | 
4 |   def parse(selectors, opts) do
5 |     Selector.Parser.Selector.parse(selectors, [], opts)
6 |   end
7 | end
8 | 


--------------------------------------------------------------------------------
/lib/selector/parser/utils.ex:
--------------------------------------------------------------------------------
1 | defmodule Selector.Parser.Utils do
2 |   @moduledoc false
3 |   import Selector.Parser.Guards
4 | 
5 |   def drain_whitespace(<<char::utf8, selectors::binary>>) when is_whitespace(char),
6 |     do: drain_whitespace(selectors)
7 |   def drain_whitespace(selectors), do: selectors
8 | end
9 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/selector.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Pseudo.Selector do
 2 |   @moduledoc false
 3 | 
 4 |   def parse(selectors, opts) do
 5 |     case Selector.Parser.Selector.parse(selectors, [], opts) do
 6 |       {[param], selectors} -> {[param], selectors}
 7 |       {[_ | _], _selectors} -> raise ArgumentError, "Pseudo type only accepts a single selector as a param."
 8 |     end
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/direction.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Pseudo.Direction do
 2 |   @moduledoc false
 3 | 
 4 |   @directions ~w{
 5 |     ltr
 6 |     rtl
 7 |   }
 8 | 
 9 |   for direction <- @directions do
10 |     def parse(<<unquote(direction)::utf8, selectors::binary>>, _opts) do
11 |       {unquote(direction), selectors}
12 |     end
13 |   end
14 | 
15 |   def parse(_selectors, _opts) do
16 |     raise ArgumentError, "Invalid argument for Direction."
17 |   end
18 | end
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/direction_type.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Pseudo.DirectionType do
 2 |   @moduledoc false
 3 | 
 4 |   @directions ~w{
 5 |     up
 6 |     down
 7 |     left
 8 |     right
 9 |     *
10 |   }
11 | 
12 |   for direction <- @directions do
13 |     def parse(<<unquote(direction)::utf8, selectors::binary>>, _opts) do
14 |       {unquote(direction), selectors}
15 |     end
16 |   end
17 | 
18 |   def parse(_selectors, _opts) do
19 |     raise ArgumentError, "Invalid argument for DirectionType."
20 |   end
21 | end
22 | 
23 | 


--------------------------------------------------------------------------------
/lib/selector/parser.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser do
 2 |   @moduledoc """
 3 |   Parser for CSS selectors.
 4 |   """
 5 | 
 6 |   @doc """
 7 |   Parses a CSS selector string into an AST.
 8 |   Accepts an optional keyword list of options.
 9 |   """
10 |   def parse(selectors, opts \\ []) when is_binary(selectors) do
11 |     case Selector.Parser.Selector.parse(selectors, [], opts) do
12 |       {selector_list, ""} -> {:selectors, selector_list}
13 |       {_selector_list, selectors} -> raise ArgumentError, "Cannot parse: #{selectors}"
14 |     end
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/name.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Pseudo.Name do
 2 |   @moduledoc false
 3 |   import Selector.Parser.Guards
 4 | 
 5 |   def parse(<<char::utf8, selectors::binary>>, [], opts) when is_identifier_start_char(char) do
 6 |     parse(selectors, [char], opts)
 7 |   end
 8 | 
 9 |   def parse(<<char::utf8, selectors::binary>>, name, opts) when is_identifier_start_char(char) do
10 |     parse(selectors, [name, char], opts)
11 |   end
12 | 
13 |   def parse(selectors, name, _opts) do
14 |     {List.to_string(name), selectors}
15 |   end
16 | end
17 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/language_code.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Pseudo.LanguageCode do
 2 |   @moduledoc false
 3 | 
 4 |   import Selector.Parser.Guards
 5 | 
 6 |   def parse(<<char::utf8, selectors::binary>>, [], opts) when is_lang_start_char(char) do
 7 |     parse(selectors, [char], opts)
 8 |   end
 9 | 
10 |   def parse(<<char::utf8, selectors::binary>>, lang, opts) when is_lang_char(char) do
11 |     parse(selectors, [lang, char], opts)
12 |   end
13 | 
14 |   def parse(selectors, lang, _opts) do
15 |     {List.to_string(lang), selectors}
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # The directory Mix will write compiled artifacts to.
 2 | /_build/
 3 | 
 4 | # If you run "mix test --cover", coverage assets end up here.
 5 | /cover/
 6 | 
 7 | # Where third-party dependencies like ExDoc output generated docs.
 8 | /doc/
 9 | 
10 | # If the VM crashes, it generates a dump, let's ignore it too.
11 | erl_crash.dump
12 | 
13 | # Also ignore archive artifacts (built via "mix archive.build").
14 | *.ez
15 | 
16 | # Ignore package tarball (built via "mix hex.build").
17 | selector-*.tar
18 | 
19 | # Temporary files, for example, from tests.
20 | /tmp/
21 | 
22 | # Ignore build artifacts but track libcss source
23 | /deps/*
24 | !/deps/libcss
25 | /deps/libcss/build
26 | /deps/libcss/.git
27 | /priv/
28 | .coder
29 | .opencode
30 | 


--------------------------------------------------------------------------------
/lib/selector/parser/combinator.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Combinator do
 2 |   @moduledoc false
 3 | 
 4 |   import Selector.Parser.Guards
 5 |   import Selector.Parser.Utils
 6 | 
 7 |   def parse(<<char::utf8, selectors::binary>>, opts) when is_whitespace(char) do
 8 |     selectors = drain_whitespace(selectors)
 9 |     parse(selectors, opts)
10 |   end
11 | 
12 |   def parse(<<"||"::utf8, selectors::binary>>, _opts) do
13 |     {[combinator: "||"], drain_whitespace(selectors)}
14 |   end
15 | 
16 |   def parse(<<char::utf8, selectors::binary>>, _opts) when is_combinator_char(char) do
17 |     {[combinator: List.to_string([char])], drain_whitespace(selectors)}
18 |   end
19 | 
20 |   def parse(selectors, _opts) do
21 |     {[], selectors}
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/selector/parser/class.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Class do
 2 |   @moduledoc false
 3 | 
 4 |   import Selector.Parser.Guards
 5 | 
 6 |   def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, class, opts) when is_escapable_char(char) do
 7 |     parse(selectors, [class, char], opts)
 8 |   end
 9 | 
10 |   def parse(<<char::utf8, selectors::binary>>, [], opts) when is_class_start_char(char) do
11 |     parse(selectors, [char], opts)
12 |   end
13 | 
14 |   def parse(<<char::utf8, selectors::binary>>, class, opts) when class != [] and is_class_char(char) do
15 |     parse(selectors, [class, char], opts)
16 |   end
17 | 
18 |   def parse(_selectors, [], _opts) do
19 |     raise ArgumentError, "Expected class name."
20 |   end
21 | 
22 |   def parse(selectors, class, _opts) do
23 |     {List.to_string(class), selectors}
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2024 DockYard, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule Selector.MixProject do
 2 |   use Mix.Project
 3 | 
 4 |   def project do
 5 |     [
 6 |       app: :selector,
 7 |       version: "0.0.1",
 8 |       elixir: "~> 1.18",
 9 |       start_permanent: Mix.env() == :prod,
10 |       deps: deps(),
11 |       package: package(),
12 |       description: "CSS Selector Parsing",
13 |       
14 |       # Docs
15 |       name: "Selector",
16 |       source_url: "https://github.com/liveview-native/selector",
17 |       homepage_url: "https://github.com/liveview-native/selector",
18 |       docs: [
19 |         main: "Selector",
20 |         logo: "assets/logo.png",
21 |         extras: ["README.md", "LICENSE.md"],
22 |         groups_for_modules: [
23 |           "Core": [Selector],
24 |           "Parser": [Selector.Parser],
25 |           "AST": [Selector.AST]
26 |         ],
27 |         groups_for_extras: [
28 |           "Guides": ["README.md"],
29 |           "Legal": ["LICENSE.md"]
30 |         ]
31 |       ]
32 |     ]
33 |   end
34 | 
35 |   defp package do
36 |     [
37 |       maintainers: ["Brian Cardarella"],
38 |       licenses: ["MIT"],
39 |       links: %{"GitHub" => "https://github.com/liveview-native/selector"}
40 |     ]
41 |   end
42 | 
43 |   def application do
44 |     [
45 |       extra_applications: [:logger],
46 |     ]
47 |   end
48 | 
49 |   defp deps do
50 |     [
51 |       {:ex_doc, "~> 0.38", only: :dev, runtime: false, warn_if_outdated: true},
52 |     ]
53 |   end
54 | end
55 | 


--------------------------------------------------------------------------------
/lib/selector/parser/tag_name.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.TagName do
 2 |   @moduledoc false
 3 | 
 4 |   import Selector.Parser.Guards
 5 |   
 6 |   def parse(<<"\\ "::utf8, selectors::binary>>, tag_name, opts) do
 7 |     parse(selectors, [tag_name, ?\s], opts)
 8 |   end
 9 | 
10 |   def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, tag_name, opts) when is_escapable_char(char) do
11 |     parse(selectors, [tag_name, char], opts)
12 |   end
13 | 
14 |   def parse(<<"|"::utf8, char::utf8, selectors::binary>>, namespace, opts) when char != ?| do
15 |     parse(List.to_string([char, selectors]), [], Keyword.put(opts, :namespace, List.to_string(namespace)))
16 |   end
17 | 
18 |   def parse(<<"\\*"::utf8, selectors::binary>>, [], opts) do
19 |     {"*", selectors, extract_opts(opts)}
20 |   end
21 | 
22 |   def parse(<<"*"::utf8, selectors::binary>>, [], opts) do
23 |     {"*", selectors, extract_opts(opts)}
24 |   end
25 | 
26 |   def parse(<<char::utf8, selectors::binary>>, ~c"|", opts) when is_tag_name_char(char) do
27 |     parse(selectors, [char], Keyword.put(opts, :namespace, ""))
28 |   end
29 | 
30 |   def parse(<<char::utf8, selectors::binary>>, tag_name, opts) when is_tag_name_char(char) do
31 |     parse(selectors, [tag_name, char], opts)
32 |   end
33 | 
34 |   def parse(selectors, tag_name, opts) do
35 |     {List.to_string(tag_name), selectors, extract_opts(opts)}
36 |   end
37 | 
38 |   defp extract_opts(opts) do
39 |     Keyword.take(opts, [
40 |       :namespace
41 |     ])
42 |   end
43 | end
44 | 


--------------------------------------------------------------------------------
/lib/selector/parser/hex.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.Hex do
 2 |   @moduledoc false
 3 | 
 4 |   import Selector.Parser.Guards
 5 |   import Selector.Parser.Utils
 6 | 
 7 |   def parse(<<hex1::utf8, hex2::utf8, hex3::utf8, hex4::utf8, hex5::utf8, hex6::utf8, selectors::binary>>, _opts)
 8 |     when is_hex_digit(hex1)
 9 |      and is_hex_digit(hex2)
10 |      and is_hex_digit(hex3)
11 |      and is_hex_digit(hex4)
12 |      and is_hex_digit(hex5)
13 |      and is_hex_digit(hex6)
14 |   do
15 |     {[List.to_integer([hex1, hex2, hex3, hex4, hex5, hex6], 16)], drain_whitespace(selectors)}
16 |   end
17 | 
18 |   def parse(<<hex1::utf8, hex2::utf8, hex3::utf8, hex4::utf8, hex5::utf8, selectors::binary>>, _opts)
19 |     when is_hex_digit(hex1)
20 |      and is_hex_digit(hex2)
21 |      and is_hex_digit(hex3)
22 |      and is_hex_digit(hex4)
23 |      and is_hex_digit(hex5)
24 |   do
25 |     {[List.to_integer([hex1, hex2, hex3, hex4, hex5], 16)], drain_whitespace(selectors)}
26 |   end
27 | 
28 |   def parse(<<hex1::utf8, hex2::utf8, hex3::utf8, hex4::utf8, selectors::binary>>, _opts)
29 |     when is_hex_digit(hex1)
30 |      and is_hex_digit(hex2)
31 |      and is_hex_digit(hex3)
32 |      and is_hex_digit(hex4)
33 |   do
34 |     {[List.to_integer([hex1, hex2, hex3, hex4], 16)], drain_whitespace(selectors)}
35 |   end
36 | 
37 |   def parse(<<hex1::utf8, hex2::utf8, hex3::utf8, selectors::binary>>, _opts)
38 |     when is_hex_digit(hex1)
39 |      and is_hex_digit(hex2)
40 |      and is_hex_digit(hex3)
41 |   do
42 |     {[List.to_integer([hex1, hex2, hex3], 16)], drain_whitespace(selectors)}
43 |   end
44 | 
45 |   def parse(<<hex1::utf8, hex2::utf8, selectors::binary>>, _opts)
46 |     when is_hex_digit(hex1)
47 |      and is_hex_digit(hex2)
48 |   do
49 |     {[List.to_integer([hex1, hex2], 16)], drain_whitespace(selectors)}
50 |   end
51 | 
52 |   def parse(<<hex1::utf8, selectors::binary>>, _opts)
53 |     when is_hex_digit(hex1)
54 |   do
55 | 
56 |     {[List.to_integer([hex1], 16)], drain_whitespace(selectors)}
57 |   end
58 | 
59 |   def parse(selectors, _opts),
60 |     do: {[], selectors}
61 | end
62 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"},
 3 |   "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
 4 |   "ex_doc": {:hex, :ex_doc, "0.38.2", "504d25eef296b4dec3b8e33e810bc8b5344d565998cd83914ffe1b8503737c02", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "732f2d972e42c116a70802f9898c51b54916e542cc50968ac6980512ec90f42b"},
 5 |   "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
 6 |   "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
 7 |   "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"},
 8 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"},
 9 | }
10 | 


--------------------------------------------------------------------------------
/test_selector_guard.exs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env elixir
 2 | 
 3 | # Load the guards module
 4 | Code.require_file("lib/selector/parser/guards.ex")
 5 | 
 6 | import Selector.Parser.Guards
 7 | 
 8 | IO.puts("Testing is_selector_char guard...")
 9 | 
10 | # Test cases
11 | test_cases = [
12 |   # Should pass
13 |   {?a, true, "letter 'a'"},
14 |   {?Z, true, "letter 'Z'"},
15 |   {?0, true, "digit '0'"},
16 |   {?#, true, "hash '#'"},
17 |   {?., true, "dot '.'"},
18 |   {?:, true, "colon ':'"},
19 |   {?,, true, "comma ','"},
20 |   {?>, true, "greater than '>'"},
21 |   {?+, true, "plus '+'"},
22 |   {?~, true, "tilde '~'"},
23 |   {?*, true, "asterisk '*'"},
24 |   {?|, true, "pipe '|'"},
25 |   {?[, true, "bracket '['"},
26 |   {?], true, "bracket ']'"},
27 |   {?", true, "double quote"},
28 |   {?', true, "single quote"},
29 |   {?\\, true, "backslash"},
30 |   {0x0020, true, "space"},
31 |   {0x4E2D, true, "Chinese character 中"},
32 |   {0x0391, true, "Greek letter Α"},
33 |   
34 |   # Should fail
35 |   {0x0000, false, "null character"},
36 |   {0x0001, false, "control character"},
37 |   {0x007F, false, "DEL character"},
38 |   {0xD800, false, "surrogate codepoint"},
39 | ]
40 | 
41 | # Run tests
42 | passed = 0
43 | failed = 0
44 | 
45 | for {codepoint, expected, description} <- test_cases do
46 |   result = is_selector_char(codepoint)
47 |   status = if result == expected do
48 |     passed = passed + 1
49 |     "✓"
50 |   else
51 |     failed = failed + 1
52 |     "✗"
53 |   end
54 |   
55 |   IO.puts("#{status} #{description}: is_selector_char(#{inspect(codepoint)}) => #{result} (expected #{expected})")
56 | end
57 | 
58 | IO.puts("\nSummary: #{passed} passed, #{failed} failed")
59 | 
60 | # Test with actual selector strings
61 | IO.puts("\nTesting complete selector strings:")
62 | 
63 | selectors = [
64 |   "div.class#id",
65 |   "[data-value~=\"test\"]:nth-child(2n+1)",
66 |   ".クラス#标识符[атрибут=\"القيمة\"]",
67 |   "ns|element > .class + #id ~ [attr]",
68 |   "div, .class, #id"
69 | ]
70 | 
71 | for selector <- selectors do
72 |   all_valid = Enum.all?(String.to_charlist(selector), &is_selector_char/1)
73 |   status = if all_valid, do: "✓", else: "✗"
74 |   IO.puts("#{status} \"#{selector}\" - all characters valid: #{all_valid}")
75 | end


--------------------------------------------------------------------------------
/lib/selector/parser/id.ex:
--------------------------------------------------------------------------------
 1 | defmodule Selector.Parser.ID do
 2 |   @moduledoc false
 3 | 
 4 |   import Selector.Parser.Guards
 5 | 
 6 |   def parse(<<"-"::utf8, selectors::binary>>, [], opts) do
 7 |     {buffer, selectors} = parse_hyphen_identifier(selectors, ~c"-", opts)
 8 |     parse(selectors, buffer, opts)
 9 |   end
10 | 
11 |   def parse(<<char::utf8, selectors::binary>>, [], opts) when is_identifier_start_char(char) do
12 |     parse(selectors, [char], opts)
13 |   end
14 | 
15 |   def parse(<<char::utf8, selectors::binary>>, buffer, opts) when is_identifier_char(char) do
16 |     parse(selectors, [buffer, char], opts)
17 |   end
18 | 
19 |   def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, buffer, opts) when is_hex_digit(char) do
20 |     {hex_buffer, selectors} = Selector.Parser.Hex.parse(List.to_string([char, selectors]), opts)
21 |     parse(selectors, [buffer, hex_buffer], opts)
22 |   end
23 | 
24 |   def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, buffer, opts) when is_escapable_char(char) do
25 |     parse(selectors, [buffer, char], opts)
26 |   end
27 | 
28 |   def parse(<<char::utf8, selectors::binary>>, buffer, opts) when is_whitespace(char) do
29 |     parse(selectors, buffer, opts)
30 |   end
31 | 
32 |   def parse(_selectors, ~c"-", _opts) do
33 |     raise ArgumentError, "Identifiers cannot consist of a single hyphen."
34 |   end
35 | 
36 |   def parse(_selectors, [], _opts) do
37 |     raise ArgumentError, "Expected identifier."
38 |   end
39 | 
40 |   def parse(selectors, buffer, _opts) do
41 |     {List.to_string(buffer), selectors}
42 |   end
43 | 
44 |   # This works because the default value passed in for `buffer` is always ~c"-"
45 |   defp parse_hyphen_identifier(<<"-"::utf8, selectors::binary>>, buffer, opts) do
46 |     case Keyword.get(opts, :strict, true) do
47 |       true -> raise ArgumentError, "Identifiers cannot start with two hyphens with strict mode on."
48 |       false -> parse_hyphen_identifier(selectors, [buffer, ?-], opts)
49 |     end
50 |   end
51 | 
52 |   defp parse_hyphen_identifier(<<number::utf8, _selectors::binary>>, _buffer, _opts) when is_utf8_digit(number) do
53 |     raise ArgumentError, "Identifiers cannot start with hyphens followed by digits."
54 |   end
55 | 
56 |   defp parse_hyphen_identifier(selectors, buffer, _opts) do
57 |     {buffer, selectors}
58 |   end
59 | 
60 | end
61 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo/nth_formula.ex:
--------------------------------------------------------------------------------
  1 | defmodule Selector.Parser.Pseudo.NthFormula do
  2 |   @moduledoc false
  3 | 
  4 |   import Selector.Parser.Guards
  5 |   import Selector.Parser.Utils
  6 | 
  7 |   def parse(<<"even"::utf8, selectors::binary>>, _opts) do
  8 |     {[a: 2, b: 0], selectors}
  9 |   end
 10 | 
 11 |   def parse(<<"odd"::utf8, selectors::binary>>, _opts) do
 12 |     {[a: 2, b: 1], selectors}
 13 |   end
 14 | 
 15 |   def parse(<<char::utf8, _selectors::binary>> = selectors, opts) when is_nth_formula_starting_char(char) do
 16 |     case parse_an_plus_b(selectors, opts) do
 17 |       # yes, it's a hack
 18 |       {[b: b, a: a], selectors} -> {[a: a, b: b], selectors}
 19 |       result -> result
 20 |     end
 21 |   end
 22 | 
 23 |   defp parse_an_plus_b(selectors, opts) do
 24 |     parse_coefficient(selectors, [a: 1, b: 0], opts)
 25 |   end
 26 | 
 27 |   defp parse_coefficient(selectors, formula, opts) do
 28 |     {formula, selectors} = parse_coefficient_sign(selectors, formula, opts)
 29 |     {formula, selectors} = parse_coefficient_number(selectors, formula,  opts)
 30 | 
 31 |     parse_variable(selectors, formula, opts)
 32 |   end
 33 | 
 34 |   defp parse_coefficient_sign(<<"+"::utf8, selectors::binary>>, formula, _opts) do
 35 |     {formula, selectors}
 36 |   end
 37 | 
 38 |   defp parse_coefficient_sign(<<"-"::utf8, selectors::binary>>, formula, _opts) do
 39 |     formula = Keyword.put(formula, :a, -1)
 40 |     {formula, selectors}
 41 |   end
 42 | 
 43 |   defp parse_coefficient_sign(selectors, formula, _opts) do
 44 |     {formula, selectors}
 45 |   end
 46 | 
 47 |   defp parse_coefficient_number(selectors, formula, opts) do
 48 |     {number, selectors} = parse_number(selectors, nil, opts)
 49 |     formula = Keyword.update(formula, :a, 1, &(&1 * (number || 1)))
 50 |     {formula, selectors}
 51 |   end
 52 | 
 53 |   defp parse_number(<<char::utf8, selectors::binary>>, number, opts) when char in ?0..?9 do
 54 |     number = (number || 0 * 10) + (char - ?0)
 55 |     parse_number(selectors, number, opts)
 56 |   end
 57 | 
 58 |   defp parse_number(selectors, number, _opts) do
 59 |     {number, selectors}
 60 |   end
 61 | 
 62 |   defp parse_variable(<<"\\6e"::utf8, selectors::binary>>, formula, opts) do
 63 |     parse_operator(selectors, formula, opts)
 64 |   end
 65 | 
 66 |   defp parse_variable(<<"\\n"::utf8, selectors::binary>>, formula, opts) do
 67 |     parse_operator(selectors, formula, opts)
 68 |   end
 69 | 
 70 |   defp parse_variable(<<"n"::utf8, selectors::binary>>, formula, opts) do
 71 |     parse_operator(selectors, formula, opts)
 72 |   end
 73 | 
 74 |   defp parse_variable(selectors, formula, _opts) do
 75 |     formula = [a: 0, b: Keyword.get(formula, :a)]
 76 |     {formula, selectors}
 77 |   end
 78 | 
 79 |   defp parse_operator(<<char::utf8, selectors::binary>>, formula, opts) when is_whitespace(char) do
 80 |     selectors = drain_whitespace(selectors)
 81 |     parse_operator(selectors, formula, opts)
 82 |   end
 83 | 
 84 |   defp parse_operator(<<"+"::utf8, selectors::binary>>, formula, opts) do
 85 |     parse_offset(selectors, Keyword.put(formula, :b, 1), 0, opts)
 86 |   end
 87 | 
 88 |   defp parse_operator(<<"-"::utf8, selectors::binary>>, formula, opts) do
 89 |     parse_offset(selectors, Keyword.put(formula, :b, -1), 0, opts)
 90 |   end
 91 | 
 92 |   defp parse_operator(selectors, formula, opts) do
 93 |     parse_offset(selectors, formula, 0, opts)
 94 |   end
 95 | 
 96 |   defp parse_offset(<<char::utf8, selectors::binary>>, formula, offset, opts) when is_whitespace(char) do
 97 |     selectors = drain_whitespace(selectors)
 98 |     parse_offset(selectors, formula, offset, opts)
 99 |   end
100 | 
101 |   defp parse_offset(<<char::utf8, selectors::binary>>, formula, offset, opts) when char in ?0..?9 do
102 |     offset = (offset * 10) + (char - ?0)
103 |     parse_offset(selectors, formula, offset, opts)
104 |   end
105 | 
106 |   defp parse_offset(selectors, formula, number, _opts) do
107 |     formula = Keyword.update(formula, :b, 1, &(&1 * number))
108 |     {formula, selectors}
109 |   end
110 | end
111 | 


--------------------------------------------------------------------------------
/lib/selector.ex:
--------------------------------------------------------------------------------
  1 | defmodule Selector do
  2 |   @moduledoc """
  3 |   A CSS selector parser and renderer for Elixir.
  4 | 
  5 |   This library provides functionality to parse CSS selector strings into an
  6 |   Abstract Syntax Tree (AST) and render them back to CSS strings. It supports
  7 |   CSS Selectors Level 1, 2, and 3 completely, with partial support for stable
  8 |   CSS Selectors Level 4 features.
  9 | 
 10 |   ## Features
 11 | 
 12 |   - Parse CSS selectors into a structured AST
 13 |   - Render AST back to CSS selector strings
 14 |   - Support for all CSS3 selectors and many CSS4 features
 15 |   - Namespace support for XML/SVG elements
 16 |   - Strict and non-strict parsing modes
 17 | 
 18 |   ## Basic Usage
 19 | 
 20 |       # Parse a CSS selector
 21 |       ast = Selector.parse("div#main > p.text")
 22 |       
 23 |       # Render AST back to CSS
 24 |       css = Selector.render(ast)
 25 | 
 26 |   ## Supported Selectors
 27 | 
 28 |   - Type selectors: `div`, `span`, `p`
 29 |   - Class selectors: `.class`, `.multiple.classes`
 30 |   - ID selectors: `#id`
 31 |   - Universal selector: `*`
 32 |   - Attribute selectors: `[attr]`, `[attr=value]`, `[attr^=prefix]`
 33 |   - Pseudo-classes: `:hover`, `:nth-child(2n+1)`, `:not(.active)`
 34 |   - Pseudo-elements: `::before`, `::after`, `::first-line`
 35 |   - Combinators: descendant (` `), child (`>`), adjacent (`+`), general sibling (`~`), column (`||`)
 36 |   - Namespaces: `svg|rect`, `*|*`, `|div`
 37 | 
 38 |   See the README for comprehensive documentation and examples.
 39 |   """
 40 | 
 41 |   alias Selector.{
 42 |     Parser,
 43 |     Renderer
 44 |   }
 45 | 
 46 |   @doc """
 47 |   Parses a CSS selector string into an Abstract Syntax Tree (AST).
 48 | 
 49 |   ## Parameters
 50 | 
 51 |     * `selector` - A CSS selector string to parse
 52 |     * `opts` - Optional keyword list of parsing options (default: `[]`)
 53 | 
 54 |   ## Options
 55 | 
 56 |     * `:strict` - When `true` (default), enforces strict CSS parsing rules.
 57 |       When `false`, allows some non-standard but commonly used patterns like 
 58 |       identifiers starting with double hyphens (`--`).
 59 | 
 60 |   ## Returns
 61 | 
 62 |   Returns a tuple `{:selectors, [selector_groups]}` representing the parsed selector AST.
 63 |   Each selector group is `{:rules, [rules]}` and each rule has the format 
 64 |   `{:rule, selectors, options}` where:
 65 | 
 66 |     * `selectors` is a list of selector components (tags, classes, IDs, etc.)
 67 |     * `options` is a keyword list containing combinator information
 68 | 
 69 |   ## Examples
 70 | 
 71 |   Basic selectors:
 72 | 
 73 |       iex> Selector.parse("div")
 74 |       {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}], []}]}]}
 75 | 
 76 |       iex> Selector.parse("#header")
 77 |       {:selectors, [{:rules, [{:rule, [{:id, "header"}], []}]}]}
 78 | 
 79 |       iex> Selector.parse(".button")
 80 |       {:selectors, [{:rules, [{:rule, [{:class, "button"}], []}]}]}
 81 | 
 82 |   Complex selectors:
 83 | 
 84 |       iex> Selector.parse("div#main.container[data-role='navigation']")
 85 |       {:selectors, [{:rules, [{:rule, [
 86 |         {:tag_name, "div", []},
 87 |         {:id, "main"},
 88 |         {:class, "container"},
 89 |         {:attribute, {:equal, "data-role", "navigation", []}}
 90 |       ], []}]}]}
 91 | 
 92 |   Multiple selectors:
 93 | 
 94 |       iex> Selector.parse("h1, h2, h3")
 95 |       {:selectors, [
 96 |         {:rules, [{:rule, [{:tag_name, "h1", []}], []}]},
 97 |         {:rules, [{:rule, [{:tag_name, "h2", []}], []}]},
 98 |         {:rules, [{:rule, [{:tag_name, "h3", []}], []}]}
 99 |       ]}
100 | 
101 |   Combinators:
102 | 
103 |       iex> Selector.parse("article > p")
104 |       {:selectors, [{:rules, [
105 |         {:rule, [{:tag_name, "article", []}], []},
106 |         {:rule, [{:tag_name, "p", []}], combinator: ">"}
107 |       ]}]}
108 | 
109 |   Pseudo-classes with arguments:
110 | 
111 |       iex> Selector.parse(":nth-child(2n+1)")
112 |       {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}]}]}
113 | 
114 |       iex> Selector.parse(":not(.active)")
115 |       {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"not", [
116 |         [{:rules, [{:rule, [{:class, "active"}], []}]}]
117 |       ]}}], []}]}]}
118 | 
119 |   With options:
120 | 
121 |       iex> Selector.parse("#--custom-id", strict: false)
122 |       {:selectors, [{:rules, [{:rule, [{:id, "--custom-id"}], []}]}]}
123 | 
124 |   ## Error Handling
125 | 
126 |   Raises `ArgumentError` for invalid CSS selectors:
127 | 
128 |       iex> Selector.parse(".")
129 |       ** (ArgumentError) Expected class name.
130 | 
131 |       iex> Selector.parse("#")
132 |       ** (ArgumentError) Expected identifier.
133 | 
134 |       iex> Selector.parse("div >")
135 |       ** (ArgumentError) Expected rule but end of input reached.
136 | 
137 |   ## Supported CSS Features
138 | 
139 |   This parser supports CSS Selectors Level 3 completely and many stable 
140 |   features from CSS Selectors Level 4:
141 | 
142 |     * Basic selectors: type, class, ID, universal (`*`)
143 |     * Attribute selectors with all operators and case-sensitivity flags
144 |     * All combinators including the column combinator (`||`)
145 |     * Pseudo-classes including `:is()`, `:where()`, `:has()`, `:not()`
146 |     * Pseudo-elements with both `::` and legacy `:` syntax
147 |     * Namespaced selectors
148 |     * Complex nested selectors
149 |     * Escaped characters and Unicode
150 | 
151 |   See the project README for comprehensive examples and use cases.
152 |   """
153 |   def parse(selector, opts \\ []) do
154 |     Parser.parse(selector, opts)
155 |   end
156 | 
157 |   @doc """
158 |   Renders a selector AST back to a CSS selector string.
159 |   """
160 |   def render(selectors, opts \\ []) do
161 |     Renderer.render(selectors, opts)
162 |   end
163 | end
164 | 


--------------------------------------------------------------------------------
/lib/selector/parser/selector.ex:
--------------------------------------------------------------------------------
  1 | defmodule Selector.Parser.Selector do
  2 |   @moduledoc false
  3 | 
  4 |   alias Selector.Parser.{
  5 |     Attribute,
  6 |     Class,
  7 |     Combinator,
  8 |     ID,
  9 |     Pseudo,
 10 |     TagName
 11 |   }
 12 | 
 13 |   import Selector.Parser.Guards
 14 |   import Selector.Parser.Utils
 15 | 
 16 |   def parse(<<char::utf8, selectors::binary>>, selector_list, opts) when is_whitespace(char) do
 17 |     selectors = drain_whitespace(selectors)
 18 |     parse(selectors, selector_list, opts)
 19 |   end
 20 | 
 21 |   def parse(<<","::utf8, selectors::binary>>, selector_list, opts) do
 22 |     case drain_whitespace(selectors) do
 23 |       <<>> -> raise ArgumentError, "Expected selector but end of input reached."
 24 |       selectors -> parse(selectors, selector_list, opts)
 25 |     end
 26 |   end
 27 | 
 28 |   def parse(<<>>, selector_list, _opts) do
 29 |     {Enum.reverse(selector_list), ""}
 30 |   end
 31 | 
 32 |   def parse(<<char::utf8, _selectors::binary>> = selectors, selector_list, opts) when is_selector_start_char(char) do
 33 |     {selector, selectors} = parse_rules(selectors, [], opts)
 34 |     parse(selectors, [{:rules, selector} | selector_list], opts)
 35 |   end
 36 | 
 37 |   def parse(selectors, selector_list, _opts) do
 38 |     {Enum.reverse(selector_list), selectors}
 39 |   end
 40 |   
 41 |   defp parse_rules(<<>>, [], _opts) do
 42 |     raise ArgumentError, "Expected rule but end of input reached."
 43 |   end
 44 |   
 45 |   defp parse_rules(<<>>, rules, _opts) do
 46 |     {Enum.reverse(rules), ""}
 47 |   end
 48 | 
 49 |   defp parse_rules(<<char::utf8, selectors::binary>>, rules, _opts) when is_whitespace(char) do
 50 |     selectors = drain_whitespace(selectors)
 51 |     {Enum.reverse(rules), selectors}
 52 |   end
 53 | 
 54 |   defp parse_rules(<<char::utf8, _selectors::binary>> = selectors, rules, opts) when is_selector_start_char(char) do
 55 |     {rule, selectors} = parse_rule(selectors, [], opts)
 56 |     {combinator, opts} = Keyword.split(opts, [:combinator])
 57 | 
 58 |     {new_combinator, selectors} = Combinator.parse(selectors, opts)
 59 | 
 60 |     opts = Keyword.merge(opts, new_combinator)
 61 | 
 62 |     parse_rules(selectors, [{:rule, rule, combinator} | rules], opts)
 63 |   end
 64 | 
 65 |   defp parse_rules(selectors, rules, _opts) do
 66 |     {Enum.reverse(rules), selectors}
 67 |   end
 68 | 
 69 |   defp parse_rule(<<>>, rule, _opts) do
 70 |     {Enum.reverse(rule), ""}
 71 |   end
 72 | 
 73 |   defp parse_rule(<<"#"::utf8, selectors::binary>>, rule, opts) do
 74 |     {id, selectors} = ID.parse(selectors, [], opts)
 75 |     parse_rule(selectors, [{:id, id} | rule], opts)
 76 |   end
 77 | 
 78 |   defp parse_rule(<<"."::utf8, selectors::binary>>, rule, opts) do
 79 |     {class, selectors} = Class.parse(selectors, [], opts)
 80 |     parse_rule(selectors, [{:class, class} | rule], opts)
 81 |   end
 82 | 
 83 |   defp parse_rule(<<"*"::utf8, selectors::binary>>, rule, opts) do
 84 |     {tag_name, selectors, tag_opts} = TagName.parse(selectors, ["*"], opts)
 85 |     parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts)
 86 |   end
 87 | 
 88 |   defp parse_rule(<<"\\*"::utf8, selectors::binary>>, rule, opts) do
 89 |     {tag_name, selectors, tag_opts} = TagName.parse(selectors, ["*"], opts)
 90 |     parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts)
 91 |   end
 92 | 
 93 |   defp parse_rule(<<"|"::utf8, char::utf8, selectors::binary>>, rule, opts) when char != ?| do
 94 |     {tag_name, selectors, tag_opts} = TagName.parse(List.to_string([?|, List.to_string([char, selectors])]), [], opts)
 95 |     parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts)
 96 |   end
 97 | 
 98 |   defp parse_rule(<<"\\|"::utf8, selectors::binary>>, rule, opts) do
 99 |     {tag_name, selectors, tag_opts} = TagName.parse(List.to_string([~c"\\|", selectors]), [], opts)
100 |     parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts)
101 |   end
102 | 
103 |   defp parse_rule(<<char::utf8, selectors::binary>>, rule, opts) when is_tag_name_start_char(char) do
104 |     {tag_name, selectors, tag_opts} = TagName.parse(selectors, [char], opts)
105 |     parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts)
106 |   end
107 | 
108 |   defp parse_rule(<<"["::utf8, selectors::binary>>, rule, opts) do
109 |     {attribute, selectors} = Attribute.parse(selectors, nil, opts)
110 |     parse_rule(selectors, [{:attribute, attribute} | rule], opts)
111 |   end
112 | 
113 |   defp parse_rule(<<"::"::utf8, selectors::binary>>, rule, opts) do
114 |     {{pseudo_name, _} = pseudo_element, remaining} = Pseudo.parse(selectors, opts)
115 | 
116 |     if pseudo_name not in Selector.Parser.Pseudo.elements() do
117 |       raise ArgumentError, "Invalid pseudo-element syntax."
118 |     end
119 | 
120 |     parse_rule(remaining, [{:pseudo_element, pseudo_element} | rule], opts)
121 |   end
122 | 
123 |   # Legacy CSS Level 2 support for single-colon pseduo elements
124 |  
125 |   defp parse_rule(<<":before"::utf8, selectors::binary>>, rule, opts) do
126 |     parse_rule(selectors, [{:pseudo_element, {"before", []}} | rule], opts)
127 |   end
128 |  
129 |   defp parse_rule(<<":after"::utf8, selectors::binary>>, rule, opts) do
130 |     parse_rule(selectors, [{:pseudo_element, {"after", []}} | rule], opts)
131 |   end
132 |  
133 |   defp parse_rule(<<":first-line"::utf8, selectors::binary>>, rule, opts) do
134 |     parse_rule(selectors, [{:pseudo_element, {"first-line", []}} | rule], opts)
135 |   end
136 |  
137 |   defp parse_rule(<<":first-letter"::utf8, selectors::binary>>, rule, opts) do
138 |     parse_rule(selectors, [{:pseudo_element, {"first-letter", []}} | rule], opts)
139 |   end
140 | 
141 |   defp parse_rule(<<":-"::utf8, _selectors::binary>>, _rule, _opts) do
142 |     raise ArgumentError, "Identifiers cannot consist of a single hyphen."
143 |   end
144 | 
145 |   defp parse_rule(<<":"::utf8, selectors::binary>>, rule, opts) do
146 |     {{pseudo_name, _} = pseudo_class, selectors} = Pseudo.parse(selectors, opts)
147 | 
148 |     if pseudo_name not in Selector.Parser.Pseudo.classes() do
149 |       raise ArgumentError, "Invalid pseudo-class syntax."
150 |     end
151 | 
152 |     parse_rule(selectors, [{:pseudo_class, pseudo_class} | rule], opts)
153 |   end
154 | 
155 |   defp parse_rule(selectors, rule, _opts) do
156 |     {Enum.reverse(rule), selectors}
157 |   end
158 | end
159 | 
160 | 


--------------------------------------------------------------------------------
/test/selector/render_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule Selector.RenderTest do
  2 |   use ExUnit.Case
  3 | 
  4 |   describe "render/1" do
  5 |     test "renders basic selectors" do
  6 |       assert Selector.parse(".class") |> Selector.render() == ".class"
  7 |       assert Selector.parse(".class1.class2") |> Selector.render() == ".class1.class2"
  8 |       assert Selector.parse("tag.class") |> Selector.render() == "tag.class"
  9 |       assert Selector.parse("tag#id.class") |> Selector.render() == "tag#id.class"
 10 |     end
 11 | 
 12 |     test "renders attribute selectors" do
 13 |       assert Selector.parse("tag#id.class[attr]") |> Selector.render() == "tag#id.class[attr]"
 14 |       assert Selector.parse("tag#id.class[attr=value]") |> Selector.render() == "tag#id.class[attr=\"value\"]"
 15 |       assert Selector.parse("tag#id.class[attr~=value]") |> Selector.render() == "tag#id.class[attr~=\"value\"]"
 16 |       assert Selector.parse("tag#id.class[attr*=value]") |> Selector.render() == "tag#id.class[attr*=\"value\"]"
 17 |       assert Selector.parse("tag#id.class[attr^=value]") |> Selector.render() == "tag#id.class[attr^=\"value\"]"
 18 |       assert Selector.parse("tag#id.class[attr$=value]") |> Selector.render() == "tag#id.class[attr$=\"value\"]"
 19 |     end
 20 | 
 21 |     test "handles attribute case sensitivity" do
 22 |       assert Selector.parse("tag#id.class[attr$=value i]") |> Selector.render() == "tag#id.class[attr$=\"value\" i]"
 23 |       # Parser normalizes case sensitivity flags to lowercase
 24 |       assert Selector.parse("tag#id.class[attr$=value I]") |> Selector.render() == "tag#id.class[attr$=\"value\" i]"
 25 |       assert Selector.parse("tag#id.class[attr$=value s]") |> Selector.render() == "tag#id.class[attr$=\"value\" s]"
 26 |       assert Selector.parse("tag#id.class[attr$=value S]") |> Selector.render() == "tag#id.class[attr$=\"value\" s]"
 27 |     end
 28 | 
 29 |     test "handles attribute escaping" do
 30 |       assert Selector.parse(~s(tagname[x="y"])) |> Selector.render() == ~s(tagname[x="y"])
 31 |       assert Selector.parse(~s(tagname[x='y'])) |> Selector.render() == ~s(tagname[x="y"])
 32 |       assert Selector.parse(~s(tagname[x="y"])) |> Selector.render() == ~s(tagname[x="y"])
 33 |       assert Selector.parse(~s(tagname[x="y"])) |> Selector.render() == ~s(tagname[x="y"])
 34 |       assert Selector.parse(~s(tagname[x="y "])) |> Selector.render() == ~s(tagname[x="y "])
 35 |       # This test has invalid CSS - unescaped quote in attribute value
 36 |       # assert Selector.parse(~s(tagname[x="y\\"])) |> Selector.render() == ~s(tagname[x="y\\"])
 37 |       assert Selector.parse(~s(tagname[x="y'"])) |> Selector.render() == ~s(tagname[x="y'"])
 38 |       assert Selector.parse(~s(div[role='a\00000ab'])) |> Selector.render() == ~s(div[role="a\a b"])
 39 |       assert Selector.parse(~s(div[role='\a'])) |> Selector.render() == ~s(div[role="\a"])
 40 |     end
 41 | 
 42 |     test "renders combinators" do
 43 |       assert Selector.parse("tag1 tag2") |> Selector.render() == "tag1 tag2"
 44 |       assert Selector.parse("ns1|tag1") |> Selector.render() == "ns1|tag1"
 45 |       assert Selector.parse("|tag1") |> Selector.render() == "|tag1"
 46 |       assert Selector.parse("*|tag1") |> Selector.render() == "*|tag1"
 47 |       assert Selector.parse("*|*") |> Selector.render() == "*|*"
 48 |       assert Selector.parse("*|*||*|*") |> Selector.render() == "*|* || *|*"
 49 |       assert Selector.parse("tag1>tag2") |> Selector.render() == "tag1 > tag2"
 50 |       assert Selector.parse("tag1+tag2") |> Selector.render() == "tag1 + tag2"
 51 |       assert Selector.parse("tag1~tag2") |> Selector.render() == "tag1 ~ tag2"
 52 |     end
 53 | 
 54 |     test "renders pseudo-classes and pseudo-elements" do
 55 |       assert Selector.parse("tag1:first") |> Selector.render() == "tag1:first"
 56 |       assert Selector.parse("tag1:lt(a3)") |> Selector.render() == "tag1:lt(a3)"
 57 |       assert Selector.parse("tag1:lt($var)") |> Selector.render() == "tag1:lt($var)"
 58 |       assert Selector.parse("tag1:lang(en\\))") |> Selector.render() == "tag1:lang(en\\))"
 59 |       assert Selector.parse("tag1:nth-child(odd)") |> Selector.render() == "tag1:nth-child(odd)"
 60 |       assert Selector.parse("tag1:nth-child(even)") |> Selector.render() == "tag1:nth-child(even)"
 61 |       assert Selector.parse("tag1:nth-child(-n+3)") |> Selector.render() == "tag1:nth-child(-n+3)"
 62 |       assert Selector.parse("tag1:nth-child(-1n+3)") |> Selector.render() == "tag1:nth-child(-n+3)"
 63 |       assert Selector.parse("tag1:nth-child(-5n+3)") |> Selector.render() == "tag1:nth-child(-5n+3)"
 64 |       assert Selector.parse("tag1:nth-child(-5n-3)") |> Selector.render() == "tag1:nth-child(-5n-3)"
 65 |       assert Selector.parse("tag1:nth-child(-5\\n-3)") |> Selector.render() == "tag1:nth-child(-5n-3)"
 66 |       assert Selector.parse("tag1:nth-child(-5\\6e-3)") |> Selector.render() == "tag1:nth-child(-5n-3)"
 67 |       assert Selector.parse("tag1:nth-child(-5n)") |> Selector.render() == "tag1:nth-child(-5n)"
 68 |       assert Selector.parse("tag1:nth-child(5)") |> Selector.render() == "tag1:nth-child(5)"
 69 |       assert Selector.parse("tag1:nth-child(-5)") |> Selector.render() == "tag1:nth-child(-5)"
 70 |       assert Selector.parse("tag1:nth-child(0)") |> Selector.render() == "tag1:nth-child(0)"
 71 |       assert Selector.parse("tag1:nth-child(n)") |> Selector.render() == "tag1:nth-child(n)"
 72 |       assert Selector.parse("tag1:nth-child(-n)") |> Selector.render() == "tag1:nth-child(-n)"
 73 |       assert Selector.parse("tag1:has(.class)") |> Selector.render() == "tag1:has(.class)"
 74 |       assert Selector.parse("tag1:has(.class,.class2)") |> Selector.render() == "tag1:has(.class, .class2)"
 75 |       assert Selector.parse("tag1:has(.class:has(.subcls),.class2)") |> Selector.render() == "tag1:has(.class:has(.subcls), .class2)"
 76 |       assert Selector.parse("tag1:has(> div)") |> Selector.render() == "tag1:has(> div)"
 77 |       assert Selector.parse("tag1:current(.class:has(.subcls),.class2)") |> Selector.render() == "tag1:current(.class:has(.subcls), .class2)"
 78 |       assert Selector.parse("tag1:current") |> Selector.render() == "tag1:current"
 79 |       assert Selector.parse("tag1::before") |> Selector.render() == "tag1::before"
 80 |       assert Selector.parse("tag1::hey(hello)") |> Selector.render() == "tag1::hey(hello)"
 81 |       assert Selector.parse("tag1::num(1)") |> Selector.render() == "tag1::num(\\31)"
 82 |       assert Selector.parse("tag1::num($var)") |> Selector.render() == "tag1::num($var)"
 83 |       assert Selector.parse("tag1::none") |> Selector.render() == "tag1::none"
 84 |     end
 85 | 
 86 |     test "handles special characters and escaping" do
 87 |       assert Selector.parse("tag\\/name") |> Selector.render() == "tag\\/name"
 88 |       assert Selector.parse(".class\\/name") |> Selector.render() == ".class\\/name"
 89 |       assert Selector.parse("#id\\/name") |> Selector.render() == "#id\\/name"
 90 |       assert Selector.parse(".\\30 wow") |> Selector.render() == ".\\30 wow"
 91 |       assert Selector.parse(".\\30wow") |> Selector.render() == ".\\30 wow"
 92 |       assert Selector.parse(".\\20wow") |> Selector.render() == ".\\20 wow"
 93 |       assert Selector.parse("tag\\n\\\\name\\.\\[") |> Selector.render() == "tagn\\\\name\\.\\["
 94 |       assert Selector.parse(".cls\\n\\\\name\\.\\[") |> Selector.render() == ".clsn\\\\name\\.\\["
 95 |       assert Selector.parse("[attr\\n\\\\name\\.\\[=a1]") |> Selector.render() == "[attrn\\\\name\\.\\[=\"a1\"]"
 96 |       # Complex escaping edge case - parser handles escapes differently
 97 |       # assert Selector.parse(":pseudo\\n\\\\name\\.\\[\\((123)") |> Selector.render() == ":pseudon\\\\name\\.\\[\\((\\31 23)"
 98 |       assert Selector.parse("[attr=\"val\\nval\"]") |> Selector.render() == "[attr=\"val\\a val\"]"
 99 |       assert Selector.parse("[attr=\"val\\\"val\"]") |> Selector.render() == "[attr=\"val\\\"val\"]"
100 |       assert Selector.parse("[attr=\"val\\00a0val\"]") |> Selector.render() == "[attr=\"val\ val\"]"
101 |       assert Selector.parse("tag\\00a0 tag") |> Selector.render() == "tag\\a0 tag"
102 |       assert Selector.parse(".class\\00a0 class") |> Selector.render() == ".class\\a0 class"
103 |       assert Selector.parse("[attr\\a0 attr]") |> Selector.render() == "[attr\\a0 attr]"
104 |       assert Selector.parse("[attr=$var]") |> Selector.render() == "[attr=$var]"
105 |       assert Selector.parse(".cls1.cls2#y .cls3+abc#def[x=y]>yy,ff") |> Selector.render() == ".cls1.cls2#y .cls3 + abc#def[x=\"y\"] > yy, ff"
106 |       assert Selector.parse("#google_ads_iframe_\\/100500\\/Pewpew_0") |> Selector.render() == "#google_ads_iframe_\\/100500\\/Pewpew_0"
107 |       assert Selector.parse("#\\3123") |> Selector.render() == "#\\3123"
108 |       assert Selector.parse("#\\31 23") |> Selector.render() == "#\\31 23"
109 |       assert Selector.parse("#\\00031 23") |> Selector.render() == "#\\31 23"
110 |     end
111 |   end
112 | end
113 | 


--------------------------------------------------------------------------------
/lib/selector/parser/pseudo.ex:
--------------------------------------------------------------------------------
  1 | defmodule Selector.Parser.Pseudo do
  2 |   @moduledoc false
  3 | 
  4 |   import Selector.Parser.Guards
  5 |   import Selector.Parser.Utils
  6 | 
  7 |   alias Selector.Parser.Pseudo.NthFormula
  8 |   alias Selector.Parser.Pseudo.{
  9 |     LanguageCode,
 10 |     Name,
 11 |     NthFormula,
 12 |     SelectorList
 13 |   }
 14 | 
 15 |   defguard is_nth_param(name) when name in ~w{
 16 |     nth-child
 17 |     nth-col
 18 |     nth-last-child
 19 |     nth-last-of-type
 20 |     nth-of-type
 21 |   }
 22 |   defguard is_selector_param(name) when name in ~w{
 23 |     cue
 24 |     cue-region
 25 |   }
 26 |   defguard is_compound_selector_param(name) when name in ~w{
 27 |     host
 28 |     host-context
 29 |     slotted
 30 |   }
 31 |   defguard is_relative_selector_param(name) when name in ~w{
 32 |     has
 33 |     host
 34 |     host-context
 35 |     slotted
 36 |   }
 37 |   defguard is_selector_list_param(name) when name in ~w{
 38 |     is
 39 |     matches
 40 |     not
 41 |     where
 42 |     -webkit-any
 43 |     -moz-any
 44 |   }
 45 |   defguard is_dir_keyword_param(name) when name in ~w{
 46 |     dir
 47 |   }
 48 |   defguard is_dir_type_param(name) when name in ~w{
 49 |     scroll-button
 50 |   }
 51 |   defguard is_lang_code_param(name) when name in ~w{
 52 |     lang
 53 |   }
 54 |   defguard is_name_param(name) when name in ~w{
 55 |     active-view-transition-type
 56 |     highlight
 57 |     part
 58 |     picker
 59 |     state
 60 |   }
 61 | 
 62 |   defguard is_param_pseudo(name) when 
 63 |     is_nth_param(name) or
 64 |     is_selector_param(name) or
 65 |     is_compound_selector_param(name) or
 66 |     is_relative_selector_param(name) or
 67 |     is_selector_list_param(name) or
 68 |     is_dir_keyword_param(name) or
 69 |     is_dir_type_param(name) or
 70 |     is_lang_code_param(name) or
 71 |     is_name_param(name)
 72 | 
 73 |   @pseudo_classes ~w{
 74 |     active
 75 |     active-view-transition
 76 |     active-view-transition-type
 77 |     any-link
 78 |     autofill
 79 |     blank
 80 |     buffering
 81 |     checked
 82 |     current
 83 |     default
 84 |     defined
 85 |     dir
 86 |     disabled
 87 |     empty
 88 |     enabled
 89 |     first
 90 |     first-child
 91 |     first-of-type
 92 |     focus
 93 |     focus-visible
 94 |     focus-within
 95 |     fullscreen
 96 |     future
 97 |     has
 98 |     host
 99 |     host-context
100 |     hover
101 |     in-range
102 |     indeterminate
103 |     invalid
104 |     is
105 |     lang
106 |     last-child
107 |     last-of-type
108 |     left
109 |     link
110 |     local-link
111 |     matches
112 |     modal
113 |     muted
114 |     not
115 |     nth-child
116 |     nth-col
117 |     nth-last-child
118 |     nth-last-col
119 |     nth-last-of-type
120 |     nth-of-type
121 |     only-child
122 |     only-of-type
123 |     open
124 |     optional
125 |     out-of-range
126 |     past
127 |     paused
128 |     picture-in-picture
129 |     placeholder-shown
130 |     playing
131 |     popover-open
132 |     read-only
133 |     read-write
134 |     required
135 |     right
136 |     root
137 |     scope
138 |     seeking
139 |     stalled
140 |     state
141 |     target
142 |     target-current
143 |     target-within
144 |     user-invalid
145 |     user-valid
146 |     valid
147 |     visited
148 |     volume-locked
149 |     where
150 |     -moz-any-link
151 |     -moz-broken
152 |     -moz-drag-over
153 |     -moz-first-node
154 |     -moz-focusring
155 |     -moz-full-screen
156 |     -moz-last-node
157 |     -moz-loading
158 |     -moz-only-whitespace
159 |     -moz-range-progress
160 |     -moz-range-thumb
161 |     -moz-range-track
162 |     -moz-read-only
163 |     -moz-read-write
164 |     -moz-suppressed
165 |     -moz-ui-invalid
166 |     -moz-ui-valid
167 |     -moz-user-disabled
168 |     -moz-window-inactive
169 |     -ms-accelerator
170 |     -ms-alt
171 |     -ms-checked
172 |     -ms-disabled
173 |     -ms-enabled
174 |     -ms-expand
175 |     -ms-fill
176 |     -ms-first-child
177 |     -ms-fullscreen
178 |     -ms-hover
179 |     -ms-indeterminate
180 |     -ms-keyboard-active
181 |     -ms-keyboard-select
182 |     -ms-link
183 |     -ms-link-visited
184 |     -ms-logical
185 |     -ms-middle
186 |     -ms-read-only
187 |     -ms-read-write
188 |     -ms-selected
189 |     -ms-user-select-contain
190 |     -ms-user-select-text
191 |     -webkit-any-link
192 |     -webkit-autofill
193 |     -webkit-full-screen
194 |   }
195 | 
196 |   def classes, do: @pseudo_classes
197 | 
198 |   @pseudo_elements ~w{
199 |     after
200 |     backdrop
201 |     before
202 |     checkmark
203 |     column
204 |     cue
205 |     cue-region
206 |     details-content
207 |     file-selector-button
208 |     first-letter
209 |     first-line
210 |     grammar-error
211 |     marker
212 |     part
213 |     picker
214 |     picker-icon
215 |     placeholder
216 |     postfix
217 |     prefix
218 |     scroll-button
219 |     scroll-marker
220 |     scroll-marker-group
221 |     selection
222 |     slotted
223 |     spelling-error
224 |     target-text
225 |     view-transition
226 |     view-transition-group
227 |     view-transition-image-pair
228 |     view-transition-new
229 |     view-transition-old
230 |     -moz-focus-inner
231 |     -moz-focus-outer
232 |     -moz-list-bullet
233 |     -moz-list-number
234 |     -moz-placeholder
235 |     -moz-progress-bar
236 |     -moz-range-progress
237 |     -moz-range-thumb
238 |     -moz-range-track
239 |     -moz-selection
240 |     -ms-browse
241 |     -ms-check
242 |     -ms-clear
243 |     -ms-content-zoom-factor
244 |     -ms-content-zoom-snap
245 |     -ms-content-zoom-snap-points
246 |     -ms-content-zooming
247 |     -ms-expand
248 |     -ms-fill
249 |     -ms-fill-lower
250 |     -ms-fill-upper
251 |     -ms-input-placeholder
252 |     -ms-reveal
253 |     -ms-thumb
254 |     -ms-ticks-after
255 |     -ms-ticks-before
256 |     -ms-tooltip
257 |     -ms-track
258 |     -ms-value
259 |     -webkit-input-placeholder
260 |     -webkit-progress-bar
261 |     -webkit-progress-inner-element
262 |     -webkit-progress-value
263 |     -webkit-scrollbar
264 |     -webkit-scrollbar-button
265 |     -webkit-scrollbar-thumb
266 |     -webkit-scrollbar-track
267 |     -webkit-scrollbar-track-piece
268 |     -webkit-scroll-corner
269 |     -webkit-slider-runnable-track
270 |     -webkit-slider-thumb
271 |   }
272 | 
273 |   def elements, do: @pseudo_elements
274 | 
275 |   def parse(<<char::utf8, rest::binary>>, opts) when is_pseudo_start_char(char) do
276 |     parse_name(rest, [char], opts)
277 |   end
278 | 
279 |   defp parse_name(<<"("::utf8, selectors::binary>>, name, opts) do
280 |     name = List.to_string(name)
281 |     selectors = drain_whitespace(selectors)
282 |     {param, selectors} = parse_param(selectors, name, opts)
283 |     
284 |     {{name, param}, selectors}
285 |   end
286 | 
287 |   defp parse_name(<<char::utf8, selectors::binary>>, name, opts) when is_pseudo_char(char) do
288 |     parse_name(selectors, [name, char], opts)
289 |   end
290 | 
291 |   defp parse_name(selectors, name, _opts) do
292 |     name = List.to_string(name)
293 | 
294 |     if name != "cue" && is_param_pseudo(name) do
295 |       raise ArgumentError, ~s(Argument is required for pseudo-class "#{name}".)
296 |     end
297 | 
298 |     {{name, []}, selectors}
299 |   end
300 |   
301 |   defp parse_param_close(<<char::utf8, selectors::binary>>, param, opts) when is_whitespace(char) do
302 |     selectors = drain_whitespace(selectors)
303 |     parse_param_close(selectors, param, opts)
304 |   end
305 | 
306 |   defp parse_param_close(<<")"::utf8, selectors::binary>>, param, _opts) do
307 |     {[param], selectors}
308 |   end
309 | 
310 |   defp parse_param(selectors, name, opts) when is_nth_param(name) do
311 |     {param, selectors} = NthFormula.parse(selectors, opts)
312 |     parse_param_close(selectors, param, opts)
313 |   end
314 | 
315 |   defp parse_param(selectors, name, opts) when is_relative_selector_param(name) do
316 |     {rule_opts, selectors} = Selector.Parser.Combinator.parse(selectors, opts)
317 |     {param, selectors} = Selector.Parser.Pseudo.Selector.parse(selectors, Keyword.merge(opts, rule_opts))
318 |     parse_param_close(selectors, param, opts)
319 |   end
320 | 
321 |   defp parse_param(selectors, name, opts) when is_selector_list_param(name) do
322 |     {param, selectors} = SelectorList.parse(selectors, opts)
323 |     parse_param_close(selectors, param, opts)
324 |   end
325 | 
326 |   defp parse_param(selectors, name, opts) when is_lang_code_param(name) do
327 |     {param, selectors} = LanguageCode.parse(selectors, [], opts)
328 |     parse_param_close(selectors, param, opts)
329 |   end
330 | 
331 |   defp parse_param(selectors, name, opts) when is_name_param(name) do
332 |     {param, selectors} = Name.parse(selectors, [], opts)
333 |     parse_param_close(selectors, param, opts)
334 |   end
335 | 
336 |   defp parse_param(_selectors, name, _opts) do
337 |     raise ArgumentError, "Pseudo #{name} cannot take param"
338 |   end
339 | end
340 | 


--------------------------------------------------------------------------------
/lib/selector/parser/attribute.ex:
--------------------------------------------------------------------------------
  1 | defmodule Selector.Parser.Attribute do
  2 |   @moduledoc false
  3 | 
  4 |   import Selector.Parser.Guards
  5 | 
  6 |   def parse(<<>>, _rule, _opts) do
  7 |     raise ArgumentError, "Expected closing bracket."
  8 |   end
  9 | 
 10 |   def parse(<<"]"::utf8, _selectors::binary>>, nil, _opts) do
 11 |     raise ArgumentError, "Expected attribute name."
 12 |   end
 13 | 
 14 |   def parse(<<"]"::utf8, selectors::binary>>, rule, _opts) do
 15 |     {rule, selectors}
 16 |   end
 17 | 
 18 |   def parse(<<char::utf8, selectors::binary>>, rule, opts) when is_whitespace(char) do
 19 |     parse(selectors, rule, opts)
 20 |   end
 21 | 
 22 |   def parse(<<"="::utf8, _selectors::binary>>, _rule, _opts) do
 23 |     raise ArgumentError, "Expected attribute name."
 24 |   end
 25 | 
 26 |   def parse(<<"|"::utf8, selectors::binary>>, nil, opts) do
 27 |     parse(selectors, nil, Keyword.put(opts, :namespace, ""))
 28 |   end
 29 | 
 30 |   def parse(<<"*"::utf8, selectors::binary>>, nil, opts) do
 31 |     {rule, selectors} = parse_wildcard_namespace_then_name(selectors, opts)
 32 | 
 33 |     parse(selectors, rule, opts)
 34 |   end
 35 | 
 36 |   def parse(<<char::utf8, selectors::binary>>, nil, opts) when is_attribute_name_start_char(char) do
 37 |     {rule, selectors} = parse_attribute_exists(selectors, [char], opts)
 38 | 
 39 |     parse(selectors, rule, opts)
 40 |   end
 41 | 
 42 |   def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, {type, name, value, modifiers}, opts) when char in [?i, ?I] do
 43 |     rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, false)}
 44 |     parse(selectors, rule, opts)
 45 |   end
 46 | 
 47 |   def parse(<<char::utf8, selectors::binary>>, {type, name, value, modifiers}, opts) when char in [?i, ?I] do
 48 |     rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, false)}
 49 |     parse(selectors, rule, opts)
 50 |   end
 51 | 
 52 |   def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, {type, name, value, modifiers}, opts) when char in [?s, ?S] do
 53 |     rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, true)}
 54 |     parse(selectors, rule, opts)
 55 |   end
 56 | 
 57 |   def parse(<<char::utf8, selectors::binary>>, {type, name, value, modifiers}, opts) when char in [?s, ?S] do
 58 |     rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, true)}
 59 |     parse(selectors, rule, opts)
 60 |   end
 61 | 
 62 |   def parse(_selectors, _rule, _opts) do
 63 |     raise ArgumentError, "Expected attribute name."
 64 |   end
 65 | 
 66 |   defp parse_attribute_exists(<<>>, _buffer, _opts) do
 67 |     raise ArgumentError, "Expected closing bracket."
 68 |   end
 69 | 
 70 |   defp parse_attribute_exists(<<"^="::utf8, selectors::binary>>, name, opts) do
 71 |     {value, selectors, opts} = parse_attribute_value_outter(selectors, opts)
 72 |     {{:prefix, name, value, extract_valid_opts(opts)}, selectors}
 73 |   end
 74 | 
 75 |   defp parse_attribute_exists(<<"$="::utf8, selectors::binary>>, name, opts) do
 76 |     {value, selectors, opts} = parse_attribute_value_outter(selectors, opts)
 77 |     {{:suffix, name, value, extract_valid_opts(opts)}, selectors}
 78 |   end
 79 | 
 80 |   defp parse_attribute_exists(<<"*="::utf8, selectors::binary>>, name, opts) do
 81 |     {value, selectors, opts} = parse_attribute_value_outter(selectors, opts)
 82 |     {{:substring, name, value, extract_valid_opts(opts)}, selectors}
 83 |   end
 84 | 
 85 |   defp parse_attribute_exists(<<"~="::utf8, selectors::binary>>, name, opts) do
 86 |     {value, selectors, opts} = parse_attribute_value_outter(selectors, opts)
 87 |     {{:includes, name, value, extract_valid_opts(opts)}, selectors}
 88 |   end
 89 | 
 90 |   defp parse_attribute_exists(<<"|="::utf8, selectors::binary>>, name, opts) do
 91 |     {value, selectors, opts} = parse_attribute_value_outter(selectors, opts)
 92 |     {{:dash_match, name, value, extract_valid_opts(opts)}, selectors}
 93 |   end
 94 | 
 95 |   defp parse_attribute_exists(<<"="::utf8, selectors::binary>>, name, opts) do
 96 |     {value, selectors, opts} = parse_attribute_value_outter(selectors, opts)
 97 |     {{:equal, name, value, extract_valid_opts(opts)}, selectors}
 98 |   end
 99 | 
100 |   defp parse_attribute_exists(<<char::utf8, selectors::binary>>, ~c"|", opts) do
101 |     {name, selectors, opts} = parse_attribute_name(selectors, [char], Keyword.put(opts, :namespace, ""))
102 |     parse_attribute_exists(selectors, name, opts)
103 |   end
104 | 
105 |   defp parse_attribute_exists(<<char::utf8, selectors::binary>>, name, opts) when is_attribute_name_char(char) do
106 |     {name, selectors, opts} = parse_attribute_name(selectors, [name, char], opts)
107 |     parse_attribute_exists(selectors, name, opts)
108 |   end
109 | 
110 |   defp parse_attribute_exists(selectors, buffer, opts) do
111 |     {{:exists, buffer, nil, extract_valid_opts(opts)}, selectors}
112 |   end
113 | 
114 |   defp parse_wildcard_namespace_then_name(<<char::utf8, selectors::binary>>, opts) when is_whitespace(char) do
115 |     parse_wildcard_namespace_then_name(selectors, opts)
116 |   end
117 | 
118 |   defp parse_wildcard_namespace_then_name(<<"|"::utf8, selectors::binary>>, opts) do
119 |     parse_attribute_exists(selectors, [], Keyword.put(opts, :namespace, "*"))
120 |   end
121 | 
122 |   defp parse_attribute_name(<<"|="::utf8, _selectors::binary>>, [], _opts) do
123 |     raise ArgumentError, "Expected attributed name."
124 |   end
125 | 
126 |   defp parse_attribute_name(<<"|="::utf8, _selectors::binary>> = selectors, name, opts) do
127 |     {List.to_string(name), selectors, opts}
128 |   end
129 | 
130 |   defp parse_attribute_name(<<"|"::utf8, selectors::binary>>, namespace, opts) do
131 |     parse_attribute_name(selectors, [], Keyword.put(opts, :namespace, List.to_string(namespace)))
132 |   end
133 | 
134 |   defp parse_attribute_name(<<"\\"::utf8, char::utf8, selectors::binary>>, name, opts) when is_escapable_char(char) do
135 |     parse_attribute_name(selectors, [name, char], opts)
136 |   end
137 | 
138 |   defp parse_attribute_name(<<char::utf8, selectors::binary>>, name, opts) when is_whitespace(char) do
139 |     parse_attribute_name(selectors, name, opts)
140 |   end
141 | 
142 |   defp parse_attribute_name(<<char::utf8, selectors::binary>>, name, opts) when is_attribute_name_char(char) do
143 |     parse_attribute_name(selectors, [name, char], opts)
144 |   end
145 | 
146 |   defp parse_attribute_name(selectors, name, opts) do
147 |     {List.to_string(name), selectors, opts}
148 |   end
149 | 
150 |   defp parse_attribute_value_outter(<<"]"::utf8, _selectors::binary>>, _opts) do
151 |     raise ArgumentError, "Expected attribute value."
152 |   end
153 | 
154 |   defp parse_attribute_value_outter(<<>>, _opts) do
155 |     raise ArgumentError, "Expected closing bracket."
156 |   end
157 | 
158 |   defp parse_attribute_value_outter(<<char::utf8, selectors::binary>>, opts) when is_attribute_value_char(char) do
159 |     parse_attribute_value_inner(selectors, [char], ?\s, opts)
160 |   end
161 | 
162 |   defp parse_attribute_value_outter(<<char::utf8, selectors::binary>>, opts) when char in [?', ?"] do
163 |     parse_attribute_value_inner(selectors, [], char, opts)
164 |   end
165 | 
166 |   defp parse_attribute_value_outter(<<char::utf8, selectors::binary>>, opts) when is_whitespace(char) do
167 |     parse_attribute_value_outter(selectors, opts)
168 |   end
169 | 
170 |   defp parse_attribute_value_inner(<<>>, _value, _delim, _opts) do
171 |     raise ArgumentError, "Expected closing deliminator"
172 |   end
173 | 
174 |   defp parse_attribute_value_inner(<<delim::utf8, selectors::binary>>, value, delim, opts) when delim in [?', ?", ?\s] do
175 |     {List.to_string(value), selectors, opts}
176 |   end
177 | 
178 |   defp parse_attribute_value_inner(<<char::utf8, selectors::binary>>, value, delim, opts) when is_whitespace(char) do
179 |     parse_attribute_value_inner(selectors, value, delim, opts)
180 |   end
181 | 
182 |   defp parse_attribute_value_inner(<<"\\"::utf8, char::utf8, selectors::binary>>, value, delim, opts) when is_hex_digit(char) do
183 |     {hex, selectors} = Selector.Parser.Hex.parse(List.to_string([char, selectors]), opts)
184 |     parse_attribute_value_inner(selectors, [value, hex], delim, opts)
185 |   end
186 | 
187 |   defp parse_attribute_value_inner(<<"\\"::utf8, "\n"::utf8, selectors::binary>>, value, delim, opts) do
188 |     parse_attribute_value_inner(selectors, value, delim, opts)
189 |   end
190 | 
191 |   defp parse_attribute_value_inner(<<"\\"::utf8, char::utf8, selectors::binary>>, value, delim, opts) when is_escapable_char(char) do
192 |     parse_attribute_value_inner(selectors, [value, char], delim, opts)
193 |   end
194 | 
195 |   defp parse_attribute_value_inner(<<char::utf8, selectors::binary>>, value, delim, opts) when is_attribute_value_char(char) do
196 |     parse_attribute_value_inner(selectors, [value, char], delim, opts)
197 |   end
198 | 
199 |   defp parse_attribute_value_inner(<<"]"::utf8, _selectors::binary>>, [], _delim, _opts) do
200 |     raise ArgumentError, "Expected attribute value."
201 |   end
202 | 
203 |   defp parse_attribute_value_inner(selectors, value, _delim, opts) do
204 |     {List.to_string(value), selectors, opts}
205 |   end
206 | 
207 |   defp extract_valid_opts(opts) do
208 |     Keyword.take(opts, [
209 |       :case_sensitive,
210 |       :namespace,
211 |     ])
212 |   end
213 | end
214 | 


--------------------------------------------------------------------------------
/lib/selector/renderer.ex:
--------------------------------------------------------------------------------
  1 | defmodule Selector.Renderer do
  2 |   @moduledoc """
  3 |   Handles rendering of CSS selector ASTs back to CSS selector strings.
  4 |   """
  5 | 
  6 |   @doc """
  7 |   Renders a list of selector rules to a CSS selector string.
  8 | 
  9 |   ## Options
 10 | 
 11 |   * `:format` - The output format (not currently used)
 12 |   """
 13 |   def render(selectors, _opts \\ []) when is_list(selectors) do
 14 |     # Handle the parser output format: [[{:rule, ...}], [{:rule, ...}]]
 15 |     # Each inner list represents a selector group (comma-separated selectors)
 16 |     selectors
 17 |     |> Enum.map(&render_selector_group/1)
 18 |     |> Enum.join(", ")
 19 |   end
 20 |   
 21 |   defp render_selector_group(rules) do
 22 |     rules
 23 |     |> Enum.with_index()
 24 |     |> Enum.map_join("", fn
 25 |       {{:rule, sel, opts}, 0} ->
 26 |         # First rule in group
 27 |         render_rule({:rule, sel, opts})
 28 |       {{:rule, sel, opts}, _index} ->
 29 |         # Subsequent rules - check for combinator or default to descendant
 30 |         combinator = Keyword.get(opts, :combinator)
 31 |         case combinator do
 32 |           nil -> " " <> render_rule({:rule, sel, opts})
 33 |           ">" -> " > " <> render_rule({:rule, sel, opts})
 34 |           "+" -> " + " <> render_rule({:rule, sel, opts})
 35 |           "~" -> " ~ " <> render_rule({:rule, sel, opts})
 36 |           "||" -> " || " <> render_rule({:rule, sel, opts})
 37 |           _ -> " #{combinator} " <> render_rule({:rule, sel, opts})
 38 |         end
 39 |     end)
 40 |   end
 41 | 
 42 |   # Renders a single rule: {:rule, selectors, opts}
 43 |   defp render_rule({:rule, selectors, _opts}) do
 44 |     selectors
 45 |     |> Enum.map_join("", &render_selector/1)
 46 |   end
 47 | 
 48 |   defp render_rule(other), do: inspect(other)
 49 | 
 50 | 
 51 |   # Renders individual selector components
 52 |   defp render_selector({:tag_name, name, []}) when is_binary(name) do
 53 |     if name == "*", do: "*", else: escape_name(name)
 54 |   end
 55 |   defp render_selector({:tag_name, name, opts}) when is_binary(name) and is_list(opts) do
 56 |     # Handle namespaced tags
 57 |     case Keyword.get(opts, :namespace) do
 58 |       nil -> if name == "*", do: "*", else: escape_name(name)
 59 |       ns ->
 60 |         ns_part = if ns == "*", do: "*", else: escape_name(ns)
 61 |         name_part = if name == "*", do: "*", else: escape_name(name)
 62 |         "#{ns_part}|#{name_part}"
 63 |     end
 64 |   end
 65 |   defp render_selector({:tag_name, name}) when is_binary(name) do
 66 |     if name == "*", do: "*", else: escape_name(name)
 67 |   end
 68 |   defp render_selector({:tag_name, name}) when is_list(name), do: escape_name(to_string(name))
 69 | 
 70 |   defp render_selector({:id, id}) when is_binary(id), do: "##{escape_id(id)}"
 71 |   defp render_selector({:class, class}) when is_binary(class), do: ".#{escape_class(class)}"
 72 |   defp render_selector({:class, class}) when is_list(class), do: ".#{escape_class(to_string(class))}"
 73 | 
 74 |   # Handle pseudo-classes
 75 |   defp render_selector({:pseudo_class, {name, []}}), do: ":#{atom_to_css_name(name)}"
 76 | 
 77 |   defp render_selector({:pseudo_class, {name, args}}) when is_list(args) do
 78 |     case args do
 79 |       [] -> ":#{atom_to_css_name(name)}"
 80 |       # Handle nth-child and similar with a/b notation
 81 |       [a: a_val, b: b_val] ->
 82 |         formatted = format_nth(a_val, b_val)
 83 |         ":#{atom_to_css_name(name)}(#{formatted})"
 84 |       # Handle string arguments (e.g., :lang, :lt)
 85 |       [arg] when is_binary(arg) ->
 86 |         # Escape closing parentheses in arguments
 87 |         escaped_arg = String.replace(arg, ")", "\\)")
 88 |         ":#{atom_to_css_name(name)}(#{escaped_arg})"
 89 |       # Handle multiple string arguments
 90 |       args when is_list(args) and is_binary(hd(args)) ->
 91 |         ":#{atom_to_css_name(name)}(#{Enum.join(args, " ")})"
 92 |       # Handle nested selectors
 93 |       _ ->
 94 |         ":#{atom_to_css_name(name)}(#{render_nested_rules(args)})"
 95 |     end
 96 |   end
 97 | 
 98 |   # Handle pseudo-elements
 99 | 
100 |   defp render_selector({:pseudo_element, {name, []}}), do: "::#{atom_to_css_name(name)}"
101 |   
102 |   defp render_selector({:pseudo_element, {name, [arg]}}) when is_binary(arg) do
103 |     "::#{atom_to_css_name(name)}(#{arg})"
104 |   end
105 |   
106 |   defp render_selector({:pseudo_element, {name, [nested_rules]}}) when is_list(nested_rules) do
107 |     # Handle nested rules in pseudo-elements like ::part(button)
108 |     inner = nested_rules
109 |             |> List.flatten()
110 |             |> Enum.map_join(" ", &render_rule/1)
111 |     "::#{atom_to_css_name(name)}(#{String.trim(inner)})"
112 |   end
113 | 
114 |   # Handle attribute selectors
115 |   defp render_selector({:attribute, {:exists, name, nil, []}}), do: "[#{escape_attr(name)}]"
116 | 
117 |   defp render_selector({:attribute, {op, name, value, opts}}) when is_list(opts) do
118 |     attr_op = case op do
119 |       :equal -> "="
120 |       :includes -> "~="
121 |       :dash_match -> "|="
122 |       :prefix -> "^="
123 |       :suffix -> "$="
124 |       :substring -> "*="
125 |       _ -> "#{op}"
126 |     end
127 | 
128 |     # Extract case sensitivity flag
129 |     case_flag = case Keyword.get(opts, :case_sensitive) do
130 |       false -> " i"
131 |       true -> " s"
132 |       _ -> ""
133 |     end
134 | 
135 |     case value do
136 |       nil -> "[#{escape_attr(name)}]"
137 |       _ -> "[#{escape_attr(name)}#{attr_op}#{escape_attr_value(value)}#{case_flag}]"
138 |     end
139 |   end
140 | 
141 |   defp render_selector(other), do: inspect(other)
142 |   
143 |   defp format_nth(0, b), do: "#{b}"
144 |   defp format_nth(2, 0), do: "even"
145 |   defp format_nth(2, 1), do: "odd"
146 |   defp format_nth(a, 0) when a == 1, do: "n"
147 |   defp format_nth(a, 0) when a == -1, do: "-n"
148 |   defp format_nth(a, 0), do: "#{a}n"
149 |   defp format_nth(a, b) when a == 1 and b > 0, do: "n+#{b}"
150 |   defp format_nth(a, b) when a == 1 and b < 0, do: "n#{b}"
151 |   defp format_nth(a, b) when a == -1 and b > 0, do: "-n+#{b}"
152 |   defp format_nth(a, b) when a == -1 and b < 0, do: "-n#{b}"
153 |   defp format_nth(a, b) when b > 0, do: "#{a}n+#{b}"
154 |   defp format_nth(a, b), do: "#{a}n#{b}"
155 | 
156 |   # Helper functions
157 |   # Define a function to check if a character needs escaping
158 |   defp escape_char?(char) when char in ~w(! " # $ % & ' ( \) * + , . / ; < = > ? @ [ \\ ] ^ ` { | } ~), do: true
159 |   defp escape_char?(":"), do: true
160 |   defp escape_char?(_), do: false
161 | 
162 |   defp escape_name(name) when is_binary(name) do
163 |     # Check if name starts with a digit or space - needs special escaping
164 |     case name do
165 |       # For "30wow", we need to output "\30 wow"
166 |       "30" <> rest when rest != "" ->
167 |         "\\30 " <> escape_rest(rest)
168 |       # For just "30", output "\30"
169 |       "30" ->
170 |         "\\30"
171 |       # For "20wow", we need to output "\20 wow"
172 |       "20" <> rest when rest != "" ->
173 |         "\\20 " <> escape_rest(rest)
174 |       # For just "20", output "\20"
175 |       "20" ->
176 |         "\\20"
177 |       <<digit, rest::binary>> when digit in ?0..?9 ->
178 |         # Escape leading digit as hex with trailing space
179 |         "\\3" <> <<digit>> <> " " <> escape_rest(rest)
180 |       <<32, rest::binary>> ->
181 |         # Escape leading space
182 |         "\\20 " <> escape_rest(rest)
183 |       _ ->
184 |         if String.match?(name, ~r/^[a-zA-Z][a-zA-Z0-9_-]*$/) do
185 |           name
186 |         else
187 |           # Escape special characters
188 |           escape_rest(name)
189 |         end
190 |     end
191 |   end
192 |   
193 |   defp escape_rest(str) do
194 |     str
195 |     |> String.graphemes()
196 |     |> Enum.map_join(fn
197 |       char -> if escape_char?(char), do: "\\#{char}", else: char
198 |     end)
199 |   end
200 | 
201 |   defp escape_id(id), do: escape_name(id)
202 |   defp escape_class(class), do: escape_name(class)
203 |   defp escape_attr(name) when is_binary(name), do: escape_name(name)
204 |   defp escape_attr(name) when is_list(name), do: escape_name(to_string(name))
205 | 
206 |   defp escape_attr_value(value) when is_binary(value) do
207 |     # Always use double quotes
208 |     escaped = value
209 |     |> String.replace("\\", "\\\\")
210 |     |> String.replace("\"", "\\\"")
211 |     |> String.replace("\n", "\\a ")
212 |     |> String.replace("\r", "\\d ")
213 |     |> String.replace("\t", "\\9 ")
214 |     "\"#{escaped}\""
215 |   end
216 |   
217 |   defp escape_attr_value(value) when is_list(value) do
218 |     # Handle charlist values
219 |     escape_attr_value(to_string(value))
220 |   end
221 | 
222 |   defp render_nested_rules(rules) when is_list(rules) do
223 |     rules
224 |     |> Enum.map(fn
225 |       [{:rule, _, _} | _] = group -> 
226 |         # Handle groups - check if first rule has combinator
227 |         case group do
228 |           [{:rule, _, opts} | _] ->
229 |             case Keyword.get(opts, :combinator) do
230 |               nil -> render(group)
231 |               comb -> "#{comb} #{render(group)}"
232 |             end
233 |           _ -> render(group)
234 |         end
235 |       {:rule, _, _} = rule -> render_rule(rule)
236 |       other -> inspect(other)
237 |     end)
238 |     |> Enum.join(", ")
239 |   end
240 |   
241 |   # Convert atom names to CSS names (underscores to hyphens)
242 |   defp atom_to_css_name(atom) when is_atom(atom) do
243 |     atom
244 |     |> Atom.to_string()
245 |     |> String.replace("_", "-")
246 |   end
247 |   
248 |   defp atom_to_css_name(string) when is_binary(string) do
249 |     string
250 |     |> String.replace("_", "-")
251 |   end
252 | end
253 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🎯 Selector
  2 | 
  3 | A CSS selector parser library for Elixir. Parses CSS selector strings into an Abstract Syntax Tree (AST) that can be analyzed, manipulated, and rendered back to CSS.
  4 | 
  5 | ## ✨ Features
  6 | 
  7 | - **CSS Selectors Level 1** - Complete support
  8 | - **CSS Selectors Level 2** - Complete support
  9 | - **CSS Selectors Level 3** - Complete support
 10 | - **CSS Selectors Level 4** - Extensive support for stable features
 11 | 
 12 | ## 🎨 CSS Compatibility
 13 | 
 14 | ### CSS Selectors Level 1
 15 | 
 16 | | Feature | Status | Example |
 17 | |---------|--------|---------|
 18 | | Type selectors | ✅ | `h1`, `p`, `div` |
 19 | | Class selectors | ✅ | `.warning`, `.note` |
 20 | | ID selectors | ✅ | `#header`, `#footer` |
 21 | | Descendant combinator | ✅ | `div p`, `ul li` |
 22 | | `:link` pseudo-class | ✅ | `a:link` |
 23 | | `:visited` pseudo-class | ✅ | `a:visited` |
 24 | | `:active` pseudo-class | ✅ | `a:active` |
 25 | | `::first-line` pseudo-element | ✅ | `p::first-line` |
 26 | | `::first-letter` pseudo-element | ✅ | `p::first-letter` |
 27 | | Multiple selectors (grouping) | ✅ | `h1, h2, h3` |
 28 | 
 29 | ### CSS Selectors Level 2
 30 | 
 31 | | Feature | Status | Example |
 32 | |---------|--------|---------|
 33 | | Universal selector | ✅ | `*` |
 34 | | Attribute selectors | ✅ | `[title]`, `[class="example"]` |
 35 | | Attribute operators | ✅ | `[class~="warning"]`, `[lang\\|="en"]` |
 36 | | Child combinator | ✅ | `body > p` |
 37 | | Adjacent sibling combinator | ✅ | `h1 + p` |
 38 | | `:hover` pseudo-class | ✅ | `a:hover` |
 39 | | `:focus` pseudo-class | ✅ | `input:focus` |
 40 | | `:before` pseudo-element | ✅ | `p:before` (legacy syntax) |
 41 | | `:after` pseudo-element | ✅ | `p:after` (legacy syntax) |
 42 | | `:first-child` pseudo-class | ✅ | `li:first-child` |
 43 | | `:lang()` pseudo-class | ✅ | `:lang(fr)` |
 44 | | Multiple attribute selectors | ✅ | `input[type="text"][required]` |
 45 | | Descendant combinator with universal | ✅ | `div *` |
 46 | 
 47 | ### CSS Selectors Level 3
 48 | 
 49 | | Feature | Status | Example |
 50 | |---------|--------|---------|
 51 | | Namespace selectors | ✅ | `svg\\|rect`, `*\\|*` |
 52 | | Substring matching attribute selectors | ✅ | `[href^="https"]`, `[src$=".png"]`, `[title*="hello"]` |
 53 | | General sibling combinator | ✅ | `h1 ~ p` |
 54 | | `:root` pseudo-class | ✅ | `:root` |
 55 | | `:nth-child()` pseudo-class | ✅ | `:nth-child(2n+1)` |
 56 | | `:nth-last-child()` pseudo-class | ✅ | `:nth-last-child(2)` |
 57 | | `:nth-of-type()` pseudo-class | ✅ | `p:nth-of-type(odd)` |
 58 | | `:nth-last-of-type()` pseudo-class | ✅ | `div:nth-last-of-type(2n)` |
 59 | | `:last-child` pseudo-class | ✅ | `li:last-child` |
 60 | | `:first-of-type` pseudo-class | ✅ | `p:first-of-type` |
 61 | | `:last-of-type` pseudo-class | ✅ | `h2:last-of-type` |
 62 | | `:only-child` pseudo-class | ✅ | `p:only-child` |
 63 | | `:only-of-type` pseudo-class | ✅ | `img:only-of-type` |
 64 | | `:empty` pseudo-class | ✅ | `div:empty` |
 65 | | `:target` pseudo-class | ✅ | `:target` |
 66 | | `:enabled` pseudo-class | ✅ | `input:enabled` |
 67 | | `:disabled` pseudo-class | ✅ | `input:disabled` |
 68 | | `:checked` pseudo-class | ✅ | `input:checked` |
 69 | | `:not()` pseudo-class | ✅ | `:not(.active)` |
 70 | | `::before` pseudo-element | ✅ | `div::before` |
 71 | | `::after` pseudo-element | ✅ | `div::after` |
 72 | | `::first-line` pseudo-element | ✅ | `p::first-line` |
 73 | | `::first-letter` pseudo-element | ✅ | `p::first-letter` |
 74 | 
 75 | ### CSS Selectors Level 4
 76 | 
 77 | | Feature | Status | Example |
 78 | |---------|--------|---------|
 79 | | Case-sensitivity flag | ✅ | `[attr=value i]`, `[attr=value s]` |
 80 | | Column combinator | ✅ | `col \\|\\| td` |
 81 | | `:is()` pseudo-class | ✅ | `:is(h1, h2, h3)` |
 82 | | `:where()` pseudo-class | ✅ | `:where(article, section) p` |
 83 | | `:has()` pseudo-class | ✅ | `:has(> img)` |
 84 | | `:not()` with complex selectors | ✅ | `:not(div.active)` |
 85 | | `:matches()` pseudo-class | ✅ | `:matches(h1, h2, h3)` |
 86 | | `:focus-within` | ✅ | `:focus-within` |
 87 | | `:focus-visible` | ✅ | `:focus-visible` |
 88 | | `:any-link` | ✅ | `:any-link` |
 89 | | `:read-write` pseudo-class | ✅ | `input:read-write` |
 90 | | `:read-only` pseudo-class | ✅ | `input:read-only` |
 91 | | `:placeholder-shown` pseudo-class | ✅ | `input:placeholder-shown` |
 92 | | `:default` pseudo-class | ✅ | `option:default` |
 93 | | `:valid` pseudo-class | ✅ | `input:valid` |
 94 | | `:invalid` pseudo-class | ✅ | `input:invalid` |
 95 | | `:in-range` pseudo-class | ✅ | `input:in-range` |
 96 | | `:out-of-range` pseudo-class | ✅ | `input:out-of-range` |
 97 | | `:required` pseudo-class | ✅ | `input:required` |
 98 | | `:optional` pseudo-class | ✅ | `input:optional` |
 99 | | `::placeholder` pseudo-element | ✅ | `input::placeholder` |
100 | | `::selection` pseudo-element | ✅ | `::selection` |
101 | | `::backdrop` pseudo-element | ✅ | `dialog::backdrop` |
102 | | `::marker` pseudo-element | ✅ | `li::marker` |
103 | | `::cue` pseudo-element | ✅ | `::cue` |
104 | | `::slotted()` pseudo-element | ✅ | `::slotted(span)` |
105 | | Vendor-specific pseudo-elements | ✅ | `::-webkit-input-placeholder` |
106 | | `:nth-child(An+B of S)` | ✅ | `:nth-child(2n of .important)` |
107 | | `:nth-col()` | ✅ | `:nth-col(2n+1)` |
108 | | `:nth-last-col()` | ✅ | `:nth-last-col(2n+1)` |
109 | | Attribute namespace wildcards | ❌ | `[*\\|attr=value]` |
110 |   
111 | ## 📦 Installation
112 | 
113 | Add `selector` to your list of dependencies in `mix.exs`:
114 | 
115 | ```elixir
116 | def deps do
117 |   [
118 |     {:selector, "~> 0.1.0"}
119 |   ]
120 | end
121 | ```
122 | 
123 | ## 🚀 Usage
124 | 
125 | ### 📝 Basic Parsing
126 | 
127 | Parse CSS selectors into an AST:
128 | 
129 | ```elixir
130 | # Simple tag selector
131 | Selector.parse("div")
132 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}], []}]}]}
133 | 
134 | # ID selector
135 | Selector.parse("#header")
136 | # => {:selectors, [{:rules, [{:rule, [{:id, "header"}], []}]}]}
137 | 
138 | # Class selector
139 | Selector.parse(".button")
140 | # => {:selectors, [{:rules, [{:rule, [{:class, "button"}], []}]}]}
141 | 
142 | # Multiple selectors
143 | Selector.parse("div, .button")
144 | # => {:selectors, [
145 | #      {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
146 | #      {:rules, [{:rule, [{:class, "button"}], []}]}
147 | #    ]}
148 | ```
149 | 
150 | ### 🔧 Complex Selectors
151 | 
152 | ```elixir
153 | # Combined selectors
154 | Selector.parse("div#main.container")
155 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}, {:id, "main"}, {:class, "container"}], []}]}]}
156 | 
157 | # Attribute selectors
158 | Selector.parse("input[type='text']")
159 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "input", []}, {:attribute, {:equal, "type", "text", []}}], []}]}]}
160 | 
161 | # Pseudo-classes
162 | Selector.parse("a:hover")
163 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "a", []}, {:pseudo_class, {"hover", []}}], []}]}]}
164 | 
165 | # Pseudo-elements
166 | Selector.parse("p::first-line")
167 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "p", []}, {:pseudo_element, {"first-line", []}}], []}]}]}
168 | ```
169 | 
170 | ### 🏷️ Namespaces
171 | 
172 | Namespaces are useful when working with XML documents or SVG elements within HTML:
173 | 
174 | ```elixir
175 | # Element with namespace prefix
176 | Selector.parse("svg|rect")
177 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "rect", namespace: "svg"}], []}]}]}
178 | 
179 | # Any namespace (wildcard)
180 | Selector.parse("*|circle")
181 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "circle", namespace: "*"}], []}]}]}
182 | 
183 | # No namespace (elements without namespace)
184 | Selector.parse("|path")
185 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "path", namespace: ""}], []}]}]}
186 | 
187 | # Default namespace with universal selector
188 | Selector.parse("*|*")
189 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]}
190 | 
191 | # Namespace in attribute selectors
192 | Selector.parse("[xlink|href]")
193 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:exists, "href", nil, namespace: "xlink"}}], []}]}]}
194 | 
195 | # Namespace with attribute value
196 | Selector.parse("[xml|lang='en']")
197 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "lang", "en", namespace: "xml"}}], []}]}]}
198 | 
199 | # Complex example with SVG
200 | Selector.parse("svg|svg > svg|g svg|rect.highlight")
201 | # => {:selectors, [{:rules, [
202 | #      {:rule, [{:tag_name, "svg", namespace: "svg"}], []},
203 | #      {:rule, [{:tag_name, "g", namespace: "svg"}], combinator: ">"},
204 | #      {:rule, [{:tag_name, "rect", namespace: "svg"}, {:class, "highlight"}], []}
205 | #    ]}]}
206 | 
207 | # MathML namespace example
208 | Selector.parse("math|mrow > math|mi + math|mo")
209 | # => {:selectors, [{:rules, [
210 | #      {:rule, [{:tag_name, "mrow", namespace: "math"}], []},
211 | #      {:rule, [{:tag_name, "mi", namespace: "math"}], combinator: ">"},
212 | #      {:rule, [{:tag_name, "mo", namespace: "math"}], combinator: "+"}
213 | #    ]}]}
214 | ```
215 | 
216 | ### 🔗 Combinators
217 | 
218 | ```elixir
219 | # Descendant combinator (space)
220 | Selector.parse("article p")
221 | # => {:selectors, [{:rules, [
222 | #      {:rule, [{:tag_name, "article", []}], []}, 
223 | #      {:rule, [{:tag_name, "p", []}], []}
224 | #    ]}]}
225 | 
226 | # Child combinator (>)
227 | Selector.parse("ul > li")
228 | # => {:selectors, [{:rules, [
229 | #      {:rule, [{:tag_name, "ul", []}], []}, 
230 | #      {:rule, [{:tag_name, "li", []}], combinator: ">"}
231 | #    ]}]}
232 | 
233 | # Adjacent sibling combinator (+)
234 | Selector.parse("h1 + p")
235 | # => {:selectors, [{:rules, [
236 | #      {:rule, [{:tag_name, "h1", []}], []}, 
237 | #      {:rule, [{:tag_name, "p", []}], combinator: "+"}
238 | #    ]}]}
239 | 
240 | # General sibling combinator (~)
241 | Selector.parse("h1 ~ p")
242 | # => {:selectors, [{:rules, [
243 | #      {:rule, [{:tag_name, "h1", []}], []}, 
244 | #      {:rule, [{:tag_name, "p", []}], combinator: "~"}
245 | #    ]}]}
246 | 
247 | # Column combinator (||) - CSS Level 4
248 | Selector.parse("col || td")
249 | # => {:selectors, [{:rules, [
250 | #      {:rule, [{:tag_name, "col", []}], []}, 
251 | #      {:rule, [{:tag_name, "td", []}], combinator: "||"}
252 | #    ]}]}
253 | ```
254 | 
255 | ### 🏷️ Attribute Selectors
256 | 
257 | ```elixir
258 | # Existence
259 | Selector.parse("[disabled]")
260 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:exists, "disabled", nil, []}}], []}]}]}
261 | 
262 | # Exact match
263 | Selector.parse("[type=submit]")
264 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "type", "submit", []}}], []}]}]}
265 | 
266 | # Whitespace-separated list contains
267 | Selector.parse("[class~=primary]")
268 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:includes, "class", "primary", []}}], []}]}]}
269 | 
270 | # Dash-separated list starts with
271 | Selector.parse("[lang|=en]")
272 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:dash_match, "lang", "en", []}}], []}]}]}
273 | 
274 | # Starts with
275 | Selector.parse("[href^='https://']")
276 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:prefix, "href", "https://", []}}], []}]}]}
277 | 
278 | # Ends with
279 | Selector.parse("[src$='.png']")
280 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:suffix, "src", ".png", []}}], []}]}]}
281 | 
282 | # Contains substring
283 | Selector.parse("[title*='important']")
284 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:substring, "title", "important", []}}], []}]}]}
285 | 
286 | # Case-insensitive matching (CSS Level 4)
287 | Selector.parse("[type=email i]")
288 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "type", "email", case_sensitive: false}}], []}]}]}
289 | 
290 | # Case-sensitive matching (CSS Level 4)
291 | Selector.parse("[class=Button s]")
292 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "class", "Button", case_sensitive: true}}], []}]}]}
293 | ```
294 | 
295 | ### 🎭 Pseudo-classes
296 | 
297 | ```elixir
298 | # Simple pseudo-classes
299 | Selector.parse(":hover")
300 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"hover", []}}], []}]}]}
301 | 
302 | # Structural pseudo-classes
303 | Selector.parse(":first-child")
304 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"first-child", []}}], []}]}]}
305 | 
306 | # :nth-child with various formulas
307 | Selector.parse(":nth-child(2n+1)")
308 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}]}]}
309 | 
310 | Selector.parse(":nth-child(odd)")
311 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}]}]}
312 | 
313 | Selector.parse(":nth-child(even)")
314 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 0]]}}], []}]}]}
315 | 
316 | Selector.parse(":nth-child(5)")
317 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 0, b: 5]]}}], []}]}]}
318 | 
319 | # Language pseudo-class
320 | Selector.parse(":lang(en-US)")
321 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"lang", ["en-US"]}}], []}]}]}
322 | 
323 | # Negation pseudo-class
324 | Selector.parse(":not(.disabled)")
325 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"not", [
326 | #        [{:rules, [{:rule, [{:class, "disabled"}], []}]}]
327 | #      ]}}], []}]}]}
328 | 
329 | # CSS Level 4 pseudo-classes
330 | Selector.parse(":is(h1, h2, h3)")
331 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"is", [
332 | #        [
333 | #          {:rules, [{:rule, [{:tag_name, "h1", []}], []}]},
334 | #          {:rules, [{:rule, [{:tag_name, "h2", []}], []}]},
335 | #          {:rules, [{:rule, [{:tag_name, "h3", []}], []}]}
336 | #        ]
337 | #      ]}}], []}]}]}
338 | 
339 | Selector.parse(":where(article, section) > p")
340 | # => {:selectors, [{:rules, [
341 | #      {:rule, [{:pseudo_class, {"where", [
342 | #        [
343 | #          {:rules, [{:rule, [{:tag_name, "article", []}], []}]},
344 | #          {:rules, [{:rule, [{:tag_name, "section", []}], []}]}
345 | #        ]
346 | #      ]}}], []},
347 | #      {:rule, [{:tag_name, "p", []}], combinator: ">"}
348 | #    ]}]}
349 | 
350 | Selector.parse(":has(> img)")
351 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"has", [
352 | #        [{:rules, [{:rule, [{:tag_name, "img", []}], combinator: ">"}]}]
353 | #      ]}}], []}]}]}
354 | ```
355 | 
356 | ### 🎨 Pseudo-elements
357 | 
358 | ```elixir
359 | # Standard pseudo-elements
360 | Selector.parse("::before")
361 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"before", []}}], []}]}]}
362 | 
363 | Selector.parse("::after")
364 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"after", []}}], []}]}]}
365 | 
366 | Selector.parse("::first-line")
367 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"first-line", []}}], []}]}]}
368 | 
369 | Selector.parse("::first-letter")
370 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"first-letter", []}}], []}]}]}
371 | 
372 | # CSS Level 4 pseudo-elements
373 | Selector.parse("::placeholder")
374 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"placeholder", []}}], []}]}]}
375 | 
376 | Selector.parse("::selection")
377 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"selection", []}}], []}]}]}
378 | 
379 | # Pseudo-elements with parameters
380 | Selector.parse("::slotted(span)")
381 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"slotted", [[{:rules, [{:rule, [{:tag_name, "span", []}], []}]}]]}}], []}]}]}
382 | 
383 | # Legacy single-colon syntax (still supported)
384 | Selector.parse(":before")
385 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"before", []}}], []}]}]}
386 | 
387 | # Vendor-specific pseudo-elements
388 | Selector.parse("::-webkit-input-placeholder")
389 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"-webkit-input-placeholder", []}}], []}]}]}
390 | ```
391 | 
392 | ### 💪 Advanced Examples
393 | 
394 | ```elixir
395 | # Complex selector with multiple features
396 | Selector.parse("article.post:not(.draft) > h1 + p:first-of-type")
397 | # => [
398 | #   {:rule, [
399 | #     {:tag_name, "article", []},
400 | #     {:class, "post"},
401 | #     {:pseudo_class, {:not, [[{:rule, [{:class, "draft"}], []}]]}}
402 | #   ], []},
403 | #   {:rule, [{:tag_name, "h1", []}], combinator: ">"},
404 | #   {:rule, [
405 | #     {:tag_name, "p", []},
406 | #     {:pseudo_class, {:first_of_type, []}}
407 | #   ], combinator: "+"}
408 | # ]
409 | 
410 | # Multiple attribute selectors
411 | Selector.parse("input[type='email'][required][placeholder^='Enter']")
412 | # => [{:rule, [
413 | #   {:tag_name, "input", []},
414 | #   {:attribute, {:equal, "type", "email", []}},
415 | #   {:attribute, {:exists, "required", nil, []}},
416 | #   {:attribute, {:prefix, "placeholder", "Enter", []}}
417 | # ], []}]
418 | 
419 | # Nested pseudo-classes
420 | Selector.parse(":not(:first-child):not(:last-child)")
421 | # => [{:rule, [
422 | #   {:pseudo_class, {:not, [[{:rule, [{:pseudo_class, {:first_child, []}}], []}]]}},
423 | #   {:pseudo_class, {:not, [[{:rule, [{:pseudo_class, {:last_child, []}}], []}]]}}
424 | # ], []}]
425 | ```
426 | 
427 | ### 🔄 Rendering AST back to CSS
428 | 
429 | ```elixir
430 | ast = Selector.parse("div#main > p.text")
431 | Selector.render(ast)
432 | # => "div#main > p.text"
433 | ```
434 | 
435 | ### ⚙️ Parser Options
436 | 
437 | ```elixir
438 | # Strict mode (default: true)
439 | # Disables identifiers starting with double hyphens
440 | Selector.parse("#--custom-id", strict: false)
441 | # => {:selectors, [{:rules, [{:rule, [{:id, "--custom-id"}], []}]}]}
442 | ```
443 | 
444 | ## 🌳 AST Structure
445 | 
446 | The parser generates an AST with the following structure:
447 | 
448 | - The top-level structure is `{:selectors, [selector_groups]}`
449 | - Each selector group is `{:rules, [rules]}`
450 | - Each rule is `{:rule, selectors, options}`
451 | - Multiple selector groups (comma-separated) are returned as separate elements in the list
452 | - Combinators are stored in the options of the following rule
453 | 
454 | ### 🎯 Selector Types
455 | 
456 | - `{:tag_name, "div", []}` - Element selector
457 | - `{:tag_name, "div", namespace: "svg"}` - Namespaced element
458 | - `{:id, "header"}` - ID selector
459 | - `{:class, "button"}` - Class selector
460 | - `{:attribute, {operation, name, value, options}}` - Attribute selector
461 | - `{:pseudo_class, {name, arguments}}` - Pseudo-class
462 | - `{:pseudo_element, {name, arguments}}` - Pseudo-element
463 | 
464 | ### 🔧 Attribute Operations
465 | 
466 | - `:exists` - `[attr]`
467 | - `:equal` - `[attr=value]`
468 | - `:includes` - `[attr~=value]`
469 | - `:dash_match` - `[attr|=value]`
470 | - `:prefix` - `[attr^=value]`
471 | - `:suffix` - `[attr$=value]`
472 | - `:substring` - `[attr*=value]`
473 | 
474 | ## ⚠️ Error Handling
475 | 
476 | The parser raises `ArgumentError` for invalid selectors:
477 | 
478 | ```elixir
479 | try do
480 |   Selector.parse(".")
481 | rescue
482 |   ArgumentError -> "Invalid selector"
483 | end
484 | # => "Invalid selector"
485 | ```
486 | 
487 | ## 📄 License
488 | 
489 | MIT License - Copyright (c) 2024 DockYard, Inc. See [LICENSE.md](LICENSE.md) for details.


--------------------------------------------------------------------------------
/lib/selector/parser/guards.ex:
--------------------------------------------------------------------------------
  1 |  defmodule Selector.Parser.Guards do
  2 |   @moduledoc """
  3 |   Provides defguards for validating Unicode code points according to CSS Selector
  4 |   specification rules for different parts of a CSS selector.
  5 | 
  6 |   Based on CSS Syntax Module Level 3 and CSS Selectors Level 4 specifications.
  7 |   Enhanced with full UTF-8/Unicode support.
  8 |   """
  9 | 
 10 |   #--------------------------------------------------------------------------------
 11 |   # Region: Module Attributes (Character Sets and Forbidden Codepoints)
 12 |   #--------------------------------------------------------------------------------
 13 | 
 14 |   @whitespace_chars [
 15 |     0x0009, # Tab
 16 |     0x000A, # Line Feed
 17 |     0x000C, # Form Feed
 18 |     0x000D, # Carriage Return
 19 |     0x0020  # Space
 20 |   ]
 21 | 
 22 |   @combinator_chars [
 23 |     0x003E, # > (child combinator)
 24 |     0x002B, # + (adjacent sibling combinator)
 25 |     0x007E  # ~ (general sibling combinator)
 26 |   ]
 27 | 
 28 |   @delimiter_chars [
 29 |     0x0023, # # (hash/ID selector)
 30 |     0x002E, # . (class selector)
 31 |     0x003A, # : (pseudo-class/element)
 32 |     0x005B, # [ (attribute selector start)
 33 |     0x005D, # ] (attribute selector end)
 34 |     0x0028, # ( (function start)
 35 |     0x0029, # ) (function end)
 36 |     0x002C, # , (selector list separator)
 37 |     0x0022, # " (string delimiter)
 38 |     0x0027, # ' (string delimiter)
 39 |     0x005C  # \ (escape character)
 40 |   ]
 41 | 
 42 |   @attribute_operators [
 43 |     # Single character operators
 44 |     0x003D, # = (exact match)
 45 |     0x007E, # ~ (for ~=, word match)
 46 |     0x007C, # | (for |=, language match)
 47 |     0x005E, # ^ (for ^=, prefix match)
 48 |     0x0024, # $ (for $=, suffix match)
 49 |     0x002A  # * (for *=, substring match)
 50 |   ]
 51 | 
 52 |   #--------------------------------------------------------------------------------
 53 |   # Region: Private Helper Guards
 54 |   #--------------------------------------------------------------------------------
 55 | 
 56 |   defguard is_utf8_letter(codepoint) when
 57 |     is_integer(codepoint) and
 58 |     (
 59 |       # Basic Latin letters
 60 |       (codepoint >= ?a and codepoint <= ?z) or
 61 |       (codepoint >= ?A and codepoint <= ?Z) or
 62 |       # Latin-1 Supplement letters
 63 |       (codepoint >= 0x00C0 and codepoint <= 0x00D6) or
 64 |       (codepoint >= 0x00D8 and codepoint <= 0x00F6) or
 65 |       (codepoint >= 0x00F8 and codepoint <= 0x00FF) or
 66 |       # Latin Extended-A
 67 |       (codepoint >= 0x0100 and codepoint <= 0x017F) or
 68 |       # Latin Extended-B
 69 |       (codepoint >= 0x0180 and codepoint <= 0x024F) or
 70 |       # Greek and Coptic
 71 |       (codepoint >= 0x0370 and codepoint <= 0x03FF) or
 72 |       # Cyrillic
 73 |       (codepoint >= 0x0400 and codepoint <= 0x04FF) or
 74 |       # Hebrew
 75 |       (codepoint >= 0x0590 and codepoint <= 0x05FF) or
 76 |       # Arabic
 77 |       (codepoint >= 0x0600 and codepoint <= 0x06FF) or
 78 |       # CJK Unified Ideographs (Common range)
 79 |       (codepoint >= 0x4E00 and codepoint <= 0x9FFF) or
 80 |       # Hiragana
 81 |       (codepoint >= 0x3040 and codepoint <= 0x309F) or
 82 |       # Katakana
 83 |       (codepoint >= 0x30A0 and codepoint <= 0x30FF) or
 84 |       # Other common letter ranges
 85 |       (codepoint >= 0x1E00 and codepoint <= 0x1EFF) or # Latin Extended Additional
 86 |       (codepoint >= 0x2C60 and codepoint <= 0x2C7F) or # Latin Extended-C
 87 |       (codepoint >= 0xA720 and codepoint <= 0xA7FF) or # Latin Extended-D
 88 |       # Hangul Syllables
 89 |       (codepoint >= 0xAC00 and codepoint <= 0xD7AF) or
 90 |       # Additional Unicode letter blocks (basic coverage)
 91 |       (codepoint >= 0x0100 and codepoint <= 0x017F) or # Latin Extended-A
 92 |       (codepoint >= 0x1F00 and codepoint <= 0x1FFF)    # Greek Extended
 93 |     )
 94 | 
 95 |   defguard is_utf8_digit(codepoint) when
 96 |     is_integer(codepoint) and
 97 |     (
 98 |       # ASCII digits
 99 |       (codepoint >= ?0 and codepoint <= ?9) or
100 |       # Arabic-Indic digits
101 |       (codepoint >= 0x0660 and codepoint <= 0x0669) or
102 |       # Extended Arabic-Indic digits
103 |       (codepoint >= 0x06F0 and codepoint <= 0x06F9) or
104 |       # Devanagari digits
105 |       (codepoint >= 0x0966 and codepoint <= 0x096F) or
106 |       # Bengali digits
107 |       (codepoint >= 0x09E6 and codepoint <= 0x09EF) or
108 |       # Fullwidth digits
109 |       (codepoint >= 0xFF10 and codepoint <= 0xFF19)
110 |     )
111 | 
112 |   defguardp is_utf8_hex_digit(codepoint) when
113 |     is_integer(codepoint) and
114 |     (
115 |       (codepoint >= ?0 and codepoint <= ?9) or
116 |       (codepoint >= ?a and codepoint <= ?f) or
117 |       (codepoint >= ?A and codepoint <= ?F)
118 |       # CSS spec only accepts ASCII hex digits in escape sequences
119 |     )
120 | 
121 |   defguardp is_non_ascii(codepoint) when
122 |     is_integer(codepoint) and codepoint >= 0x0080
123 | 
124 |   defguardp is_surrogate_codepoint(codepoint) when
125 |     is_integer(codepoint) and
126 |     (codepoint >= 0xD800 and codepoint <= 0xDFFF)
127 | 
128 |   defguardp is_newline(codepoint) when
129 |     is_integer(codepoint) and
130 |     (
131 |       codepoint == 0x000A or # Line Feed
132 |       codepoint == 0x000C or # Form Feed
133 |       codepoint == 0x000D or # Carriage Return
134 |       codepoint == 0x0085 or # Next Line (NEL)
135 |       codepoint == 0x2028 or # Line Separator
136 |       codepoint == 0x2029    # Paragraph Separator
137 |     )
138 | 
139 |   defguardp is_unicode_whitespace(codepoint) when
140 |     is_integer(codepoint) and
141 |     (
142 |       codepoint in @whitespace_chars or
143 |       codepoint == 0x0085 or # Next Line (NEL)
144 |       codepoint == 0x00A0 or # Non-breaking space
145 |       codepoint == 0x1680 or # Ogham space mark
146 |       (codepoint >= 0x2000 and codepoint <= 0x200A) or # Various spaces
147 |       codepoint == 0x2028 or # Line separator
148 |       codepoint == 0x2029 or # Paragraph separator
149 |       codepoint == 0x202F or # Narrow no-break space
150 |       codepoint == 0x205F or # Medium mathematical space
151 |       codepoint == 0x3000    # Ideographic space
152 |     )
153 | 
154 |   #--------------------------------------------------------------------------------
155 |   # Region: Public Guards for CSS Selector Components
156 |   #--------------------------------------------------------------------------------
157 | 
158 |   @doc """
159 |   Guard: Checks if a codepoint is CSS whitespace.
160 |   CSS whitespace includes: tab, line feed, form feed, carriage return, and space.
161 |   Note: This follows CSS specification which only recognizes ASCII whitespace.
162 |   """
163 |   defguard is_whitespace(codepoint) when
164 |     is_integer(codepoint) and codepoint in @whitespace_chars
165 | 
166 |   @doc """
167 |   Guard: Checks if a codepoint is Unicode whitespace (broader than CSS whitespace).
168 |   Includes various Unicode whitespace characters beyond CSS specification.
169 |   """
170 |   defguard is_unicode_whitespace_char(codepoint) when
171 |     is_unicode_whitespace(codepoint)
172 | 
173 |   @doc """
174 |   Guard: Checks if a codepoint can start a CSS identifier.
175 |   Valid start characters: UTF-8 letters, underscore, non-ASCII, or escaped characters.
176 |   """
177 |   defguard is_identifier_start_char(codepoint) when
178 |     is_integer(codepoint) and
179 |     (
180 |       (is_utf8_letter(codepoint) or
181 |       codepoint == ?_ or                    # underscore
182 |       is_non_ascii(codepoint)) and
183 |       not is_utf8_digit(codepoint)          # explicitly exclude digits
184 |     )
185 | 
186 |   @doc """
187 |   Guard: Checks if a codepoint can continue a CSS identifier.
188 |   Valid continuation characters: identifier start chars, UTF-8 digits, or hyphens.
189 |   """
190 |   defguard is_identifier_char(codepoint) when
191 |     is_integer(codepoint) and
192 |     (
193 |       is_identifier_start_char(codepoint) or
194 |       is_utf8_digit(codepoint) or
195 |       codepoint == ?-                       # hyphen
196 |     )
197 | 
198 |   @doc """
199 |   Guard: Checks if a codepoint is valid inside a CSS string (excluding delimiters).
200 |   Excludes the quote character, newlines, and unescaped backslashes.
201 |   """
202 |   defguard is_string_char(codepoint) when
203 |     is_integer(codepoint) and
204 |     not (
205 |       codepoint == 0x0022 or               # double quote
206 |       codepoint == 0x0027 or               # single quote
207 |       codepoint == 0x005C or               # backslash
208 |       is_newline(codepoint)
209 |     )
210 | 
211 |   @doc """
212 |   Guard: Checks if a codepoint is a CSS combinator character.
213 |   This includes single-character combinators: >, +, ~
214 |   Note: Whitespace (descendant combinator) is handled by is_whitespace/1
215 |   Note: Column combinator || is two characters and must be handled at parser level
216 |   """
217 |   defguard is_combinator_char(codepoint) when
218 |     is_integer(codepoint) and codepoint in @combinator_chars
219 | 
220 |   @doc """
221 |   Guard: Checks if a codepoint is a CSS combinator character.
222 |   Alias for is_combinator_char/1 for backward compatibility.
223 |   """
224 |   defguard is_combinator(codepoint) when
225 |     is_combinator_char(codepoint)
226 | 
227 |   @doc """
228 |   Guard: Checks if a codepoint is a CSS delimiter character.
229 |   """
230 |   defguard is_delimiter(codepoint) when
231 |     is_integer(codepoint) and codepoint in @delimiter_chars
232 | 
233 |   @doc """
234 |   Guard: Checks if a codepoint is part of a CSS attribute operator.
235 |   """
236 |   defguard is_attribute_operator_char(codepoint) when
237 |     is_integer(codepoint) and codepoint in @attribute_operators
238 | 
239 |   @doc """
240 |   Guard: Checks if a codepoint is a valid hexadecimal digit for CSS escape sequences.
241 |   Only ASCII hex digits (0-9, a-f, A-F) are valid in CSS escape sequences.
242 |   """
243 |   defguard is_hex_digit(codepoint) when
244 |     is_utf8_hex_digit(codepoint)
245 | 
246 |   @doc """
247 |   Guard: Checks if a codepoint can be escaped in CSS.
248 |   Any character except newlines can be escaped in CSS.
249 |   """
250 |   defguard is_escapable_char(codepoint) when
251 |     is_integer(codepoint) and
252 |     not is_newline(codepoint)
253 | 
254 |   @doc """
255 |   Guard: Checks if a codepoint is valid for CSS ID selector content (after #).
256 |   Must be a valid identifier character with UTF-8 support.
257 |   """
258 |   defguard is_id_char(codepoint) when
259 |     is_identifier_char(codepoint)
260 | 
261 |   @doc """
262 |   Guard: Checks if a codepoint can start a CSS ID selector content (after #).
263 |   Must be a valid identifier start character with UTF-8 support.
264 |   """
265 |   defguard is_id_start_char(codepoint) when
266 |     is_identifier_start_char(codepoint)
267 | 
268 |   @doc """
269 |   Guard: Checks if a codepoint is valid for CSS class selector content (after .).
270 |   Must be a valid identifier character with UTF-8 support.
271 |   """
272 |   defguard is_class_char(codepoint) when
273 |     is_identifier_char(codepoint)
274 | 
275 |   @doc """
276 |   Guard: Checks if a codepoint can start a CSS class selector content (after .).
277 |   Must be a valid identifier start character with UTF-8 support.
278 |   """
279 |   defguard is_class_start_char(codepoint) when
280 |     is_identifier_start_char(codepoint)
281 | 
282 |   @doc """
283 |   Guard: Checks if a codepoint is valid for CSS element/type selector names.
284 |   Must be a valid identifier character with UTF-8 support.
285 |   """
286 |   defguard is_tag_name_char(codepoint) when
287 |     is_identifier_char(codepoint)
288 | 
289 |   @doc """
290 |   Guard: Checks if a codepoint can start a CSS element/type selector name.
291 |   Must be a valid identifier start character with UTF-8 support.
292 |   """
293 |   defguard is_tag_name_start_char(codepoint) when
294 |     is_identifier_start_char(codepoint)
295 | 
296 |   @doc """
297 |   Guard: Checks if a codepoint is valid for CSS attribute names.
298 |   Must be a valid identifier character with UTF-8 support.
299 |   """
300 |   defguard is_attribute_name_char(codepoint) when
301 |     is_identifier_char(codepoint)
302 | 
303 |   @doc """
304 |   Guard: Checks if a codepoint can start a CSS attribute name.
305 |   Must be a valid identifier start character with UTF-8 support.
306 |   """
307 |   defguard is_attribute_name_start_char(codepoint) when
308 |     is_identifier_start_char(codepoint)
309 | 
310 |   @doc """
311 |   Guard: Checks if a codepoint is valid for CSS pseudo-class/element names.
312 |   Must be a valid identifier character with UTF-8 support.
313 |   """
314 |   defguard is_pseudo_name_char(codepoint) when
315 |     is_identifier_char(codepoint)
316 | 
317 |   @doc """
318 |   Guard: Checks if a codepoint can start a CSS pseudo-class/element name.
319 |   Must be a valid identifier start character with UTF-8 support.
320 |   """
321 |   defguard is_pseudo_name_start_char(codepoint) when
322 |     is_identifier_start_char(codepoint)
323 | 
324 |   @doc """
325 |   Guard: Checks if a codepoint is valid for CSS function names.
326 |   Must be a valid identifier character with UTF-8 support.
327 |   """
328 |   defguard is_function_name_char(codepoint) when
329 |     is_identifier_char(codepoint)
330 | 
331 |   @doc """
332 |   Guard: Checks if a codepoint can start a CSS function name.
333 |   Must be a valid identifier start character with UTF-8 support.
334 |   """
335 |   defguard is_function_name_start_char(codepoint) when
336 |     is_identifier_start_char(codepoint)
337 | 
338 |   @doc """
339 |   Guard: Checks if a codepoint is a valid CSS number character.
340 |   Includes UTF-8 digits, decimal point, plus, minus, and e/E for scientific notation.
341 |   """
342 |   defguard is_number_char(codepoint) when
343 |     is_integer(codepoint) and
344 |     (
345 |       is_utf8_digit(codepoint) or
346 |       codepoint == ?. or                    # decimal point
347 |       codepoint == ?+ or                    # plus sign
348 |       codepoint == ?- or                    # minus sign
349 |       codepoint == ?e or                    # scientific notation
350 |       codepoint == ?E                       # scientific notation
351 |     )
352 | 
353 |   @doc """
354 |   Guard: Checks if a codepoint can start a CSS number.
355 |   Can start with UTF-8 digit, decimal point, plus, or minus.
356 |   """
357 |   defguard is_number_start_char(codepoint) when
358 |     is_integer(codepoint) and
359 |     (
360 |       is_utf8_digit(codepoint) or
361 |       codepoint == ?. or                    # decimal point
362 |       codepoint == ?+ or                    # plus sign
363 |       codepoint == ?-                       # minus sign
364 |     )
365 | 
366 |   @doc """
367 |   Guard: Checks if a codepoint is valid within CSS comment content.
368 |   Note: This guard checks individual characters. The parser must handle
369 |   the */ sequence detection at a higher level.
370 |   All characters are valid in comments except when * and / appear together as */.
371 |   """
372 |   defguard is_comment_char(codepoint) when
373 |     is_integer(codepoint)                   # All characters are valid individually
374 | 
375 |   @doc """
376 |   Guard: Checks if a codepoint is valid for CSS attribute values.
377 |   Attribute values can contain any character except:
378 |   - The delimiter being used (quote or apostrophe)
379 |   - Newlines (unless escaped)
380 |   - Unescaped backslashes
381 |   This guard assumes unquoted values and allows most characters.
382 |   """
383 |   defguard is_attribute_value_char(codepoint) when
384 |     is_integer(codepoint) and
385 |     not (
386 |       codepoint == 0x005D or               # ] (attribute selector end)
387 |       codepoint == 0x0022 or               # " (double quote)
388 |       codepoint == 0x0027 or               # ' (single quote)
389 |       codepoint == 0x005C or               # \ (backslash - needs escaping)
390 |       is_newline(codepoint) or             # Newlines
391 |       is_whitespace(codepoint)             # Whitespace (for unquoted values)
392 |     )
393 | 
394 |   @doc """
395 |   Guard: Checks if a codepoint can start any valid CSS selector.
396 |   This includes: element names, class selectors, ID selectors, attribute selectors,
397 |   pseudo-class/element selectors, universal selector, and whitespace.
398 |   Enhanced with UTF-8 support.
399 |   """
400 |   defguard is_selector_start_char(codepoint) when
401 |     is_integer(codepoint) and
402 |     (
403 |       is_tag_name_start_char(codepoint) or  # Element/type selectors (div, span, etc.)
404 |       codepoint == ?| or
405 |       codepoint == ?. or                        # Class selector start
406 |       codepoint == ?# or                        # ID selector start
407 |       codepoint == ?[ or                        # Attribute selector start
408 |       codepoint == ?: or                        # Pseudo-class/element start
409 |       codepoint == ?* or                        # Universal selector
410 |       codepoint == ?\\ or                       # Escape character (for escaped characters like \*)
411 |       is_whitespace(codepoint)                  # Whitespace before selector
412 |     )
413 | 
414 |   #--------------------------------------------------------------------------------
415 |   # Region: Utility Guards for UTF-8 Character Classification
416 |   #--------------------------------------------------------------------------------
417 | 
418 |   @doc """
419 |   Guard: Checks if a codepoint is a UTF-8 letter.
420 |   Covers major Unicode letter blocks including Latin, Greek, Cyrillic, Arabic, Hebrew, CJK, etc.
421 |   """
422 |   defguard is_utf8_letter_char(codepoint) when
423 |     is_utf8_letter(codepoint)
424 | 
425 |   @doc """
426 |   Guard: Checks if a codepoint is a UTF-8 digit.
427 |   Includes ASCII digits and various Unicode digit systems.
428 |   """
429 |   defguard is_utf8_digit_char(codepoint) when
430 |     is_utf8_digit(codepoint)
431 | 
432 |   @doc """
433 |   Guard: Checks if a codepoint is valid as the first character of a pseudo-class or pseudo-element name.
434 | 
435 |   Pseudo-class and pseudo-element names follow the same rules as CSS identifiers,
436 |   with one exception: they can also start with a hyphen for vendor prefixes.
437 |   
438 |   Valid starting characters are:
439 |   - Any letter (a-z, A-Z)
440 |   - Underscore (_)
441 |   - Hyphen (-) for vendor-specific pseudo-classes like -webkit-scrollbar
442 |   - Any non-ASCII character (Unicode >= 0x80)
443 |   """
444 |   defguard is_pseudo_start_char(codepoint) when
445 |     is_identifier_start_char(codepoint) or
446 |     codepoint == ?-  # Allow hyphen for vendor prefixes
447 | 
448 |   @doc """
449 |   Guard: Checks if a codepoint is valid within a pseudo-class or pseudo-element name.
450 | 
451 |   Pseudo-class and pseudo-element names follow the same rules as CSS identifiers.
452 |   Valid characters include:
453 |   - Any letter (a-z, A-Z)
454 |   - Digits (0-9)
455 |   - Underscore (_)
456 |   - Hyphen (-)
457 |   - Any non-ASCII character (Unicode >= 0x80)
458 |   
459 |   Note: Parentheses, whitespace, and other special characters are NOT part of the 
460 |   pseudo-class name itself. They are handled separately as part of functional notation.
461 |   """
462 |   defguard is_pseudo_char(codepoint) when
463 |     is_identifier_char(codepoint)
464 | 
465 |   @doc """
466 |   Guard: Checks if a codepoint is valid for a language tag character.
467 |   Language tags (BCP 47) can contain:
468 |   - ASCII letters (a-z, A-Z)
469 |   - ASCII digits (0-9)
470 |   - Hyphen (-) as separator
471 |   Used for :lang() pseudo-class values like 'en', 'en-US', 'zh-Hans-CN'
472 |   """
473 |   defguard is_lang_char(codepoint) when
474 |     is_integer(codepoint) and
475 |     (
476 |       (codepoint >= ?a and codepoint <= ?z) or
477 |       (codepoint >= ?A and codepoint <= ?Z) or
478 |       (codepoint >= ?0 and codepoint <= ?9) or
479 |       codepoint == ?-                       # hyphen separator
480 |     )
481 | 
482 |   @doc """
483 |   Guard: Checks if a codepoint can start a language tag.
484 |   Language tags must start with a letter (not digit or hyphen).
485 |   """
486 |   defguard is_lang_start_char(codepoint) when
487 |     is_integer(codepoint) and
488 |     (
489 |       (codepoint >= ?a and codepoint <= ?z) or
490 |       (codepoint >= ?A and codepoint <= ?Z)
491 |     )
492 | 
493 |   @doc """
494 |   Guard: Checks if a codepoint is a valid UTF-8 character (not a surrogate).
495 |   Excludes surrogate pair codepoints which are invalid in UTF-8.
496 |   """
497 |   defguard is_valid_utf8_codepoint(codepoint) when
498 |     is_integer(codepoint) and
499 |     codepoint >= 0 and
500 |     codepoint <= 0x10FFFF and
501 |     not is_surrogate_codepoint(codepoint)
502 | 
503 |   @doc """
504 |   Guard: Checks if a codepoint is valid within an nth-formula.
505 |   Nth-formulas are used in pseudo-classes like :nth-child(), :nth-of-type(), etc.
506 |   Valid characters include: digits (0-9), letters (n,o,d,e,v), operators (+,-), and CSS whitespace.
507 |   Examples: '2n+1', 'odd', 'even', '3n-2', '-n+5'
508 |   """
509 |   defguard is_nth_formula_char(codepoint) when
510 |     is_integer(codepoint) and
511 |     (
512 |       (codepoint >= ?0 and codepoint <= ?9) or    # ASCII digits
513 |       codepoint == ?n or codepoint == ?N or       # Variable n (case-insensitive)
514 |       codepoint == ?o or codepoint == ?O or       # For "odd" keyword
515 |       codepoint == ?d or codepoint == ?D or       # For "odd" keyword  
516 |       codepoint == ?e or codepoint == ?E or       # For "even" keyword
517 |       codepoint == ?v or codepoint == ?V or       # For "even" keyword
518 |       codepoint == ?+ or                          # Plus operator/sign
519 |       codepoint == ?- or                          # Minus operator/sign
520 |       is_whitespace(codepoint)                    # CSS whitespace
521 |     )
522 | 
523 |   @doc """
524 |   Guard: Checks if a codepoint can start an nth-formula.
525 |   Nth-formulas can start with: digits, signs (+/-), the variable n, keyword letters (o,e), or whitespace.
526 |   Examples starting chars: '2' (2n+1), '+' (+n), '-' (-n+3), 'n' (n+1), 'o' (odd), 'e' (even)
527 |   """
528 |   defguard is_nth_formula_starting_char(codepoint) when
529 |     is_integer(codepoint) and
530 |     (
531 |       (codepoint >= ?0 and codepoint <= ?9) or    # ASCII digits (for integers/coefficients)
532 |       codepoint == ?+ or                          # Plus sign (explicit positive)
533 |       codepoint == ?- or                          # Minus sign (negative values)
534 |       codepoint == ?n or codepoint == ?N or       # Variable n (for "n+1", "n", etc.)
535 |       codepoint == ?o or codepoint == ?O or       # "odd" keyword
536 |       codepoint == ?e or codepoint == ?E or       # "even" keyword  
537 |       is_whitespace(codepoint)                    # Leading CSS whitespace allowed
538 |     )
539 | 
540 |   @doc """
541 |   Guard: Checks if a codepoint is any valid character that can appear in a CSS selector.
542 |   This includes all characters that can appear in any part of a selector:
543 |   - Identifier characters (letters, digits, underscore, hyphen, non-ASCII)
544 |   - Delimiter characters (#, ., :, [, ], (, ), etc.)
545 |   - Combinator characters (>, +, ~)
546 |   - Whitespace characters
547 |   - Attribute operators (=, ~, |, ^, $, *)
548 |   - Quote characters (", ')
549 |   - Escape character (\)
550 |   - Universal selector (*)
551 |   - Comma (selector separator)
552 |   - Pipe (namespace separator)
553 |   """
554 |   defguard is_selector_char(codepoint) when
555 |     is_integer(codepoint) and
556 |     (
557 |       # Identifier characters (covers element names, classes, IDs, attributes, pseudo-classes)
558 |       is_identifier_char(codepoint) or
559 |       
560 |       # Delimiter characters
561 |       codepoint in @delimiter_chars or
562 |       
563 |       # Combinator characters
564 |       codepoint in @combinator_chars or
565 |       
566 |       # Whitespace characters
567 |       codepoint in @whitespace_chars or
568 |       
569 |       # Attribute operators
570 |       codepoint in @attribute_operators or
571 |       
572 |       # Special selector characters
573 |       codepoint == ?| or      # Namespace separator (also in column combinator ||)
574 |       codepoint == ?* or      # Universal selector
575 |       codepoint == ?, or      # Selector list separator
576 |       codepoint == ?! or      # For :not() and other negations
577 |       codepoint == ?n or      # For nth-child formulas (already covered by identifier_char)
578 |       codepoint == ?+ or      # For nth-child formulas and adjacent sibling
579 |       
580 |       # Characters that can appear in strings and attribute values
581 |       is_utf8_letter(codepoint) or
582 |       is_utf8_digit(codepoint) or
583 |       is_non_ascii(codepoint) or
584 |       
585 |       # Common punctuation that might appear in attribute values or strings
586 |       codepoint == ?/ or      # URLs, paths
587 |       codepoint == ?. or      # Decimal points, URLs
588 |       codepoint == ?? or      # Query strings
589 |       codepoint == ?& or      # URLs
590 |       codepoint == ?% or      # Encoded characters
591 |       codepoint == ?@ or      # Emails, at-rules context
592 |       codepoint == ?; or      # Might appear in data attributes
593 |       codepoint == ?{ or      # Might appear in data attributes
594 |       codepoint == ?} or      # Might appear in data attributes
595 |       codepoint == ?< or      # Might appear in data attributes
596 |       codepoint == ?> or      # Also a combinator
597 |       codepoint == ?` or      # Template literals in data attributes
598 |       codepoint == ?~ or      # Also general sibling combinator
599 |       
600 |       # Escape sequences and special characters
601 |       codepoint == ?\\ or     # Escape character
602 |       
603 |       # Any other valid UTF-8 character that's not a control character
604 |       (codepoint >= 0x0021 and codepoint <= 0x007E) or  # Printable ASCII
605 |       (codepoint >= 0x00A0 and is_valid_utf8_codepoint(codepoint))  # Non-ASCII Unicode
606 |     )
607 | 
608 | end
609 | 


--------------------------------------------------------------------------------
/test/selector/parser_test.exs:
--------------------------------------------------------------------------------
   1 | defmodule Selector.ParserTest do
   2 |   @moduledoc """
   3 |   Test suite for CSS selector parser.
   4 |   
   5 |   This parser aims to support:
   6 |   - CSS Selectors Level 3 (complete support)
   7 |   - CSS Selectors Level 4 (partial support for stable features)
   8 |   
   9 |   Notable CSS Level 4 features supported:
  10 |   - :is(), :where(), :has() pseudo-classes
  11 |   - :not() with complex selectors
  12 |   - Case sensitivity modifiers (i, s)
  13 |   - Column combinator (||)
  14 |   - :focus-within, :focus-visible pseudo-classes
  15 |   
  16 |   Features explicitly not supported:
  17 |   - :nth-child(An+B of selector) syntax
  18 |   - :nth-col(), :nth-last-col() pseudo-classes
  19 |   - Attribute selectors with namespace wildcards
  20 |   """
  21 |   use ExUnit.Case, async: true
  22 | 
  23 |   describe "Identifiers" do
  24 |     test "should parse a regular valid identifier" do
  25 |       assert Selector.parse("#id") == {:selectors, [{:rules, [{:rule, [{:id, "id"}], []}]}]}
  26 |     end
  27 | 
  28 |     test "should parse an identifier starting with a hyphen" do
  29 |       assert Selector.parse("#-id") == {:selectors, [{:rules, [{:rule, [{:id, "-id"}], []}]}]}
  30 |     end
  31 | 
  32 |     test "should parse an identifier with hex-encoded characters" do
  33 |       ast_selector = {:selectors, [{:rules, [{:rule, [{:id, "hello\nworld"}], []}]}]}
  34 | 
  35 |       assert Selector.parse("#hello\\aworld") == ast_selector
  36 |       assert Selector.parse("#hello\\a world") == ast_selector
  37 |       assert Selector.parse("#hello\\a\tworld") == ast_selector
  38 |       assert Selector.parse("#hello\\a\fworld") == ast_selector
  39 |       assert Selector.parse("#hello\\a\nworld") == ast_selector
  40 |       assert Selector.parse("#hello\\a\rworld") == ast_selector
  41 |       assert Selector.parse("#hello\\a\r\nworld") == ast_selector
  42 |       assert Selector.parse("#hello\\00000aworld") == ast_selector
  43 |     end
  44 | 
  45 |     test "should fail on an identifier starting with multiple hyphens" do
  46 |       assert_raise ArgumentError, "Identifiers cannot start with two hyphens with strict mode on.", fn ->
  47 |         Selector.parse("#--id")
  48 |       end
  49 |     end
  50 | 
  51 |     test "should fail on an identifier consisting of a single hyphen" do
  52 |       assert_raise ArgumentError, "Identifiers cannot consist of a single hyphen.", fn ->
  53 |         Selector.parse("#-")
  54 |       end
  55 |     end
  56 | 
  57 |     test "should parse an identifier starting with multiple hyphens in case of strict: false" do
  58 |       assert Selector.parse("#--id", strict: false) == {:selectors, [{:rules, [{:rule, [{:id, "--id"}], []}]}]}
  59 |     end
  60 | 
  61 |     test "should fail on an identifier starting with a hyphen and followed with a digit" do
  62 |       assert_raise ArgumentError, "Identifiers cannot start with hyphens followed by digits.", fn ->
  63 |         Selector.parse("#-1")
  64 |       end
  65 | 
  66 |       assert_raise ArgumentError, "Identifiers cannot start with hyphens followed by digits.", fn ->
  67 |         Selector.parse("#--1", strict: false)
  68 |       end
  69 |     end
  70 | 
  71 |     test "should parse an identifier consisting unicode characters" do
  72 |       assert Selector.parse("#ÈÈ") == {:selectors, [{:rules, [{:rule, [{:id, "ÈÈ"}], []}]}]}
  73 |     end
  74 |   end
  75 | 
  76 |   describe "Tag Names" do
  77 |     test "should parse a tag name" do
  78 |       assert Selector.parse("div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}], []}]}]}
  79 |     end
  80 | 
  81 |     test "should parse a wildcard tag name" do
  82 |       assert Selector.parse("*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", []}], []}]}]}
  83 |     end
  84 | 
  85 |     test "should parse an escaped star" do
  86 |       assert Selector.parse("\\*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", []}], []}]}]}
  87 |     end
  88 | 
  89 |     test "should properly parse an escaped tag name" do
  90 |       assert Selector.parse("d\\ i\\ v") == {:selectors, [{:rules, [{:rule, [{:tag_name, "d i v", []}], []}]}]}
  91 |     end
  92 | 
  93 |     @tag :skip
  94 |     test "should not be parsed after an attribute" do
  95 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
  96 |         Selector.parse(~s([href="#"]a))
  97 |       end
  98 |     end
  99 | 
 100 |     @tag :skip
 101 |     test "should not be parsed after a pseudo-class" do
 102 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 103 |         Selector.parse(":nth-child(2n)a")
 104 |       end
 105 |     end
 106 | 
 107 |     @tag :skip
 108 |     test "should not be parsed after a pseudo-element" do
 109 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 110 |         Selector.parse(":unknown(hello)a")
 111 |       end
 112 |     end
 113 |   end
 114 | 
 115 |   describe "Namespaces" do
 116 |     test "should parse a namespace name" do
 117 |       assert Selector.parse("ns|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", namespace: "ns"}], []}]}]}
 118 |     end
 119 | 
 120 |     test "should parse no namespace" do
 121 |       assert Selector.parse("|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", namespace: ""}], []}]}]}
 122 |     end
 123 | 
 124 |     test "should parse wildcard namespace" do
 125 |       assert Selector.parse("*|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", namespace: "*"}], []}]}]}
 126 |     end
 127 | 
 128 |     test "should parse a wildcard namespace with a wildcard tag name" do
 129 |       assert Selector.parse("*|*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]}
 130 |     end
 131 | 
 132 |     test "should parse an escaped star" do
 133 |       assert Selector.parse("\\*|*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]}
 134 |     end
 135 | 
 136 |     test "should parse an escaped pipe" do
 137 |       assert Selector.parse("\\|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "|div", []}], []}]}]}
 138 |     end
 139 | 
 140 |     test "should parse two escaped stars" do
 141 |       assert Selector.parse("\\*|\\*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]}
 142 |     end
 143 | 
 144 |     test "should properly parse an escaped namespace name" do
 145 |       assert Selector.parse("n\\ a\\ m|d\\ i\\ v") == {:selectors, [{:rules, [{:rule, [{:tag_name, "d i v", namespace: "n a m"}], []}]}]}
 146 |     end
 147 | 
 148 |     @tag :skip
 149 |     test "should not be parsed after an attribute" do
 150 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 151 |         Selector.parse(~s([href="#"]a|b))
 152 |       end
 153 | 
 154 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 155 |         Selector.parse(~s([href="#"]|b))
 156 |       end
 157 |     end
 158 | 
 159 |     @tag :skip
 160 |     test "should not accept a single hyphen" do
 161 |       assert_raise ArgumentError, fn ->
 162 |         Selector.parse("a - b")
 163 |       end
 164 |     end
 165 | 
 166 |     @tag :skip
 167 |     test "should not be parsed after a pseudo-class" do
 168 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 169 |         Selector.parse(":nth-child(2n)a|b")
 170 |       end
 171 | 
 172 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 173 |         Selector.parse(":nth-child(2n)|b")
 174 |       end
 175 |     end
 176 | 
 177 |     @tag :skip
 178 |     test "should not be parsed after a pseudo-element" do
 179 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 180 |         Selector.parse(":unknown(hello)a|b")
 181 |       end
 182 | 
 183 |       assert_raise ArgumentError, "Unexpected tag/namespace start.", fn ->
 184 |         Selector.parse(":unknown(hello)|b")
 185 |       end
 186 |     end
 187 |   end
 188 | 
 189 |   describe "Class Names" do
 190 |     test "should parse a single class name" do
 191 |       assert Selector.parse(".class") == {:selectors, [{:rules, [{:rule, [{:class, "class"}], []}]}]}
 192 |     end
 193 | 
 194 |     test "should parse multiple class names" do
 195 |       assert Selector.parse(".class1.class2") == {:selectors, [{:rules, [
 196 |         {:rule, [{:class, "class1"}, {:class, "class2"}], []}
 197 |       ]}]}
 198 |     end
 199 | 
 200 |     test "should properly parse class names" do
 201 |       assert Selector.parse(".cla\\ ss\\.name") == {:selectors, [{:rules, [{:rule, [{:class, "cla ss.name"}], []}]}]}
 202 |     end
 203 | 
 204 |     test "should parse after tag names" do
 205 |       assert Selector.parse("div.class") == {:selectors, [{:rules, [
 206 |         {:rule, [{:tag_name, "div", []}, {:class, "class"}], []}
 207 |       ]}]}
 208 |     end
 209 | 
 210 |     test "should parse after IDs" do
 211 |       assert Selector.parse("#id.class") == {:selectors, [{:rules, [
 212 |         {:rule, [{:id, "id"}, {:class, "class"}], []}
 213 |       ]}]}
 214 |     end
 215 | 
 216 |     test "should parse after an attribute" do
 217 |       assert Selector.parse("[href].class") == {:selectors, [{:rules, [
 218 |         {:rule, [{:attribute, {:exists, "href", nil, []}}, {:class, "class"}], []}
 219 |       ]}]}
 220 |     end
 221 | 
 222 |     test "should parse after a pseudo-class" do
 223 |       assert Selector.parse(":link.class") == {:selectors, [{:rules, [
 224 |         {:rule, [{:pseudo_class, {"link", []}}, {:class, "class"}], []}
 225 |       ]}]}
 226 |     end
 227 | 
 228 |     test "should parse after a pseudo-element" do
 229 |       assert Selector.parse("::before.class") == {:selectors, [{:rules, [
 230 |         {:rule, [{:pseudo_element, {"before", []}}, {:class, "class"}], []}
 231 |       ]}]}
 232 |     end
 233 | 
 234 |     test "should fail on empty class name" do
 235 |       assert_raise ArgumentError, "Expected class name.", fn ->
 236 |         Selector.parse(".")
 237 |       end
 238 | 
 239 |       assert_raise ArgumentError, "Expected class name.", fn ->
 240 |         Selector.parse(".1")
 241 |       end
 242 |     end
 243 | 
 244 |     test "should fail on a single hyphen" do
 245 |       assert_raise ArgumentError, "Expected class name.", fn ->
 246 |         Selector.parse(".-")
 247 |       end
 248 |     end
 249 |   end
 250 | 
 251 |   describe "IDs" do
 252 |     test "should parse a single ID" do
 253 |       assert Selector.parse("#id") == {:selectors, [{:rules, [{:rule, [{:id, "id"}], []}]}]}
 254 |     end
 255 | 
 256 |     test "should parse multiple IDs" do
 257 |       assert Selector.parse("#id1#id2") == {:selectors, [{:rules, [
 258 |         {:rule, [{:id, "id1"}, {:id, "id2"}], []}
 259 |       ]}]}
 260 |     end
 261 | 
 262 |     test "should properly parse IDs" do
 263 |       assert Selector.parse("#id\\ name\\#\\ with\\ escapes") == {:selectors, [{:rules, [
 264 |         {:rule, [{:id, "id name# with escapes"}], []}
 265 |       ]}]}
 266 |     end
 267 | 
 268 |     test "should parse after a tag name" do
 269 |       assert Selector.parse("div#id") == {:selectors, [{:rules, [
 270 |         {:rule, [{:tag_name, "div", []}, {:id, "id"}], []}
 271 |       ]}]}
 272 |     end
 273 | 
 274 |     test "should parse after a class name" do
 275 |       assert Selector.parse(".class#id") == {:selectors, [{:rules, [
 276 |         {:rule, [{:class, "class"}, {:id, "id"}], []}
 277 |       ]}]}
 278 |     end
 279 | 
 280 |     test "should parse mix of classes and ids" do
 281 |       assert Selector.parse(".class1#id1.class2#id2") == {:selectors, [{:rules, [
 282 |         {:rule, [
 283 |           {:class, "class1"},
 284 |           {:id, "id1"},
 285 |           {:class, "class2"},
 286 |           {:id, "id2"}
 287 |         ], []}
 288 |       ]}]}
 289 |     end
 290 | 
 291 |     test "should parse after an attribute" do
 292 |       assert Selector.parse("[href]#id") == {:selectors, [{:rules, [
 293 |         {:rule, [{:attribute, {:exists, "href", nil, []}}, {:id, "id"}], []}
 294 |       ]}]}
 295 |     end
 296 | 
 297 |     test "should parse after a pseudo-class" do
 298 |       assert Selector.parse(":link#id") == {:selectors, [{:rules, [
 299 |         {:rule, [{:pseudo_class, {"link", []}}, {:id, "id"}], []}
 300 |       ]}]}
 301 |     end
 302 | 
 303 |     test "should parse after a pseudo-element" do
 304 |       assert Selector.parse("::before#id") == {:selectors, [{:rules, [
 305 |         {:rule, [{:pseudo_element, {"before", []}}, {:id, "id"}], []}
 306 |       ]}]}
 307 |     end
 308 | 
 309 |     test "should fail on empty ID" do
 310 |       assert_raise ArgumentError, "Expected identifier.", fn ->
 311 |         Selector.parse("#")
 312 |       end
 313 |     end
 314 |   end
 315 | 
 316 |   describe "Attributes" do
 317 |     test "should parse a attribute" do
 318 |       assert Selector.parse("[attr]") == {:selectors, [{:rules, [
 319 |         {:rule, [{:attribute, {:exists, "attr", nil, []}}], []}
 320 |       ]}]}
 321 |     end
 322 | 
 323 |     test "should parse a attribute with comparison" do
 324 |       assert Selector.parse("[attr=val]") == {:selectors, [{:rules, [
 325 |         {:rule, [{:attribute, {:equal, "attr", "val", []}}], []}
 326 |       ]}]}
 327 |     end
 328 | 
 329 |     test "should parse a attribute with multibyte comparison" do
 330 |       assert Selector.parse("[attr|=val]") == {:selectors, [{:rules, [
 331 |         {:rule, [{:attribute, {:dash_match, "attr", "val", []}}], []}
 332 |       ]}]}
 333 |     end
 334 | 
 335 |     test "should parse multiple attributes" do
 336 |       assert Selector.parse("[attr1][attr2]") == {:selectors, [{:rules, [
 337 |         {:rule, [
 338 |           {:attribute, {:exists, "attr1", nil, []}},
 339 |           {:attribute, {:exists, "attr2", nil, []}}
 340 |         ], []}
 341 |       ]}]}
 342 |     end
 343 | 
 344 |     test "should properly parse attribute names" do
 345 |       assert Selector.parse("[attr\\ \\.name]") == {:selectors, [{:rules, [
 346 |         {:rule, [{:attribute, {:exists, "attr .name", nil, []}}], []}
 347 |       ]}]}
 348 |     end
 349 | 
 350 |     test "should properly parse attribute values" do
 351 |       assert Selector.parse("[attr=val\\ \\ue]") == {:selectors, [{:rules, [
 352 |         {:rule, [{:attribute, {:equal, "attr", "val ue", []}}], []}
 353 |       ]}]}
 354 |     end
 355 | 
 356 |     test "should properly parse case sensitivity modifiers" do
 357 |       assert Selector.parse("[attr=value \\i]") == {:selectors, [{:rules, [
 358 |         {:rule, [{:attribute, {:equal, "attr", "value", case_sensitive: false}}], []}
 359 |       ]}]}
 360 |     end
 361 | 
 362 |     test "should properly handle whitespace" do
 363 |       assert Selector.parse("[ attr = value i ]") == {:selectors, [{:rules, [
 364 |         {:rule, [{:attribute, {:equal, "attr", "value", case_sensitive: false}}], []}
 365 |       ]}]}
 366 |     end
 367 | 
 368 |     test "should properly parse double quotes" do
 369 |       # Testing escaped quote and literal backslashes (not escape sequences)
 370 |       assert Selector.parse(~s([ attr = "val\\"\\\\ue\\\\20" i ])) == {:selectors, [{:rules, [
 371 |         {:rule, [{:attribute, {:equal, "attr", "val\"\\ue\\20", case_sensitive: false}}], []}
 372 |       ]}]}
 373 |     end
 374 | 
 375 |     test "should properly parse escapes" do
 376 |       ast_selector = {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "attr", "hello\nworld", []}}], []}]}]}
 377 | 
 378 |       assert Selector.parse(~s([attr="hello\\aworld"])) == ast_selector
 379 |       assert Selector.parse(~s([attr="hell\\o\\aworld"])) == ast_selector
 380 |       assert Selector.parse(~s([attr="hell\\\no\\aworld"])) == ast_selector
 381 |       assert Selector.parse(~s([attr="hello\\a world"])) == ast_selector
 382 |       assert Selector.parse(~s([attr="hello\\a\tworld"])) == ast_selector
 383 |       assert Selector.parse(~s([attr="hello\\a\fworld"])) == ast_selector
 384 |       assert Selector.parse(~s([attr="hello\\a\nworld"])) == ast_selector
 385 |       assert Selector.parse(~s([attr="hello\\a\rworld"])) == ast_selector
 386 |       assert Selector.parse(~s([attr="hello\\a\r\nworld"])) == ast_selector
 387 |       assert Selector.parse(~s([attr="hello\\00000aworld"])) == ast_selector
 388 |     end
 389 | 
 390 |     test "should properly parse single quotes" do
 391 |       assert Selector.parse("[ attr = 'val\\'\\ue\\20' i ]") == {:selectors, [{:rules, [
 392 |         {:rule, [{:attribute, {:equal, "attr", "val'ue ", case_sensitive: false}}], []}
 393 |       ]}]}
 394 |     end
 395 | 
 396 |     test "should fail if attribute name is empty" do
 397 |       assert_raise ArgumentError, "Expected attribute name.", fn ->
 398 |         Selector.parse("[=a1]")
 399 |       end
 400 | 
 401 |       assert_raise ArgumentError, "Expected attribute name.", fn ->
 402 |         Selector.parse("[1=a1]")
 403 |       end
 404 |     end
 405 | 
 406 |     test "should fail if attribute value is empty" do
 407 |       assert_raise ArgumentError, "Expected attribute value.", fn ->
 408 |         Selector.parse("[a=]")
 409 |       end
 410 |     end
 411 | 
 412 |     test "should parse empty attribute values in quotes" do
 413 |       assert Selector.parse(~s([attr=""])) == {:selectors, [{:rules, [
 414 |         {:rule, [{:attribute, {:equal, "attr", "", []}}], []}
 415 |       ]}]}
 416 |       assert Selector.parse("[attr='']") == {:selectors, [{:rules, [
 417 |         {:rule, [{:attribute, {:equal, "attr", "", []}}], []}
 418 |       ]}]}
 419 |     end
 420 | 
 421 |     test "should parse case sensitivity modifier s" do
 422 |       assert Selector.parse("[attr=value s]") == {:selectors, [{:rules, [
 423 |         {:rule, [{:attribute, {:equal, "attr", "value", case_sensitive: true}}], []}
 424 |       ]}]}
 425 |     end
 426 | 
 427 |     test "should parse after tag names" do
 428 |       assert Selector.parse("div[attr]") == {:selectors, [{:rules, [
 429 |         {:rule, [{:tag_name, "div", []}, {:attribute, {:exists, "attr", nil, []}}], []}
 430 |       ]}]}
 431 |     end
 432 | 
 433 |     test "should parse after IDs" do
 434 |       assert Selector.parse("#id[attr]") == {:selectors, [{:rules, [
 435 |         {:rule, [{:id, "id"}, {:attribute, {:exists, "attr", nil, []}}], []}
 436 |       ]}]}
 437 |     end
 438 | 
 439 |     test "should parse after classes" do
 440 |       assert Selector.parse(".class[attr]") == {:selectors, [{:rules, [
 441 |         {:rule, [{:class, "class"}, {:attribute, {:exists, "attr", nil, []}}], []}
 442 |       ]}]}
 443 |     end
 444 | 
 445 |     test "should parse after a pseudo-class" do
 446 |       assert Selector.parse(":link[attr]") == {:selectors, [{:rules, [
 447 |         {:rule, [{:pseudo_class, {"link", []}}, {:attribute, {:exists, "attr", nil, []}}], []}
 448 |       ]}]}
 449 |     end
 450 | 
 451 |     test "should parse after a pseudo-element" do
 452 |       assert Selector.parse("::before[attr]") == {:selectors, [{:rules, [
 453 |         {:rule, [{:pseudo_element, {"before", []}}, {:attribute, {:exists, "attr", nil, []}}], []}
 454 |       ]}]}
 455 |     end
 456 | 
 457 |     test "should parse a named namespace" do
 458 |       assert Selector.parse("[ns|href]") == {:selectors, [{:rules, [
 459 |         {:rule, [{:attribute, {:exists, "href", nil, namespace: "ns"}}], []}
 460 |       ]}]}
 461 | 
 462 |       assert Selector.parse("[ns|href=value]") == {:selectors, [{:rules, [
 463 |         {:rule, [{:attribute, {:equal, "href", "value", namespace: "ns"}}], []}
 464 |       ]}]}
 465 |     end
 466 | 
 467 |     test "should parse a wildcard namespace" do
 468 |       assert Selector.parse("[*|href]") == {:selectors, [{:rules, [
 469 |         {:rule, [{:attribute, {:exists, "href", nil, namespace: "*"}}], []}
 470 |       ]}]}
 471 | 
 472 |       assert Selector.parse("[*|href=value]") == {:selectors, [{:rules, [
 473 |         {:rule, [{:attribute, {:equal, "href", "value", namespace: "*"}}], []}
 474 |       ]}]}
 475 |     end
 476 | 
 477 |     test "should parse an empty namespace" do
 478 |       assert Selector.parse("[|href]") == {:selectors, [{:rules, [
 479 |         {:rule, [{:attribute, {:exists, "href", nil, namespace: ""}}], []}
 480 |       ]}]}
 481 | 
 482 |       assert Selector.parse("[|href=value]") == {:selectors, [{:rules, [
 483 |         {:rule, [{:attribute, {:equal, "href", "value", namespace: ""}}], []}
 484 |       ]}]}
 485 |     end
 486 | 
 487 |     test "should fail on bracket mismatch" do
 488 |       assert_raise ArgumentError, "Expected closing bracket.", fn ->
 489 |         Selector.parse("[attr")
 490 |       end
 491 |     end
 492 | 
 493 |     test "should parse starting with match" do
 494 |       assert Selector.parse("[attr^=value]") == {:selectors, [{:rules, [
 495 |         {:rule, [{:attribute, {:prefix, "attr", "value", []}}], []}
 496 |       ]}]}
 497 |     end
 498 | 
 499 |     test "should parse ending with match" do
 500 |       assert Selector.parse("[attr$=value]") == {:selectors, [{:rules, [
 501 |         {:rule, [{:attribute, {:suffix, "attr", "value", []}}], []}
 502 |       ]}]}
 503 |     end
 504 | 
 505 |     test "should parse containing match" do
 506 |       assert Selector.parse("[attr*=value]") == {:selectors, [{:rules, [
 507 |         {:rule, [{:attribute, {:substring, "attr", "value", []}}], []}
 508 |       ]}]}
 509 |     end
 510 | 
 511 |     test "should parse includes match" do
 512 |       assert Selector.parse("[attr~=value]") == {:selectors, [{:rules, [
 513 |         {:rule, [{:attribute, {:includes, "attr", "value", []}}], []}
 514 |       ]}]}
 515 |     end
 516 |   end
 517 | 
 518 |   describe "Pseudo Classes" do
 519 |     test "should parse a pseudo-class" do
 520 |       assert Selector.parse(":link") == {:selectors, [{:rules, [
 521 |         {:rule, [{:pseudo_class, {"link", []}}], []}
 522 |       ]}]}
 523 |     end
 524 | 
 525 |     test "should parse multiple pseudo classes" do
 526 |       assert Selector.parse(":link:visited") == {:selectors, [{:rules, [
 527 |         {:rule, [
 528 |           {:pseudo_class, {"link", []}},
 529 |           {:pseudo_class, {"visited", []}}
 530 |         ], []}
 531 |       ]}]}
 532 |     end
 533 | 
 534 |     @tag :skip
 535 |     test "should properly parse pseudo classes" do
 536 |       assert Selector.parse(":\\l\\69\\n\\6b") == {:selectors, [{:rules, [
 537 |         {:rule, [{:pseudo_class, {"link", []}}], []}
 538 |       ]}]}
 539 |     end
 540 | 
 541 |     test "should properly parse with 0n" do
 542 |       for formula <- [":nth-child(0n+5)", ":nth-child( 0n + 5 )", ":nth-child( 0n+5 )",
 543 |                       ":nth-child(5)", ":nth-child( 5 )", ":nth-child( +5 )"] do
 544 |         assert Selector.parse(formula) == {:selectors, [{:rules, [
 545 |           {:rule, [{:pseudo_class, {"nth-child", [[a: 0, b: 5]]}}], []}
 546 |         ]}]}
 547 |       end
 548 |     end
 549 | 
 550 |     test "should properly parse with 0n and negative B" do
 551 |       for formula <- [":nth-child(0n-5)", ":nth-child( 0n - 5 )", ":nth-child( 0n-5 )",
 552 |                       ":nth-child(-5)", ":nth-child( -5 )"] do
 553 |         assert Selector.parse(formula) == {:selectors, [{:rules, [
 554 |           {:rule, [{:pseudo_class, {"nth-child", [[a: 0, b: -5]]}}], []}
 555 |         ]}]}
 556 |       end
 557 |     end
 558 | 
 559 |     test "should properly parse with 0 B" do
 560 |       for formula <- [":nth-child(3n+0)", ":nth-child( 3\\n + 0 )", ":nth-child( 3\\6e+0 )",
 561 |                       ":nth-child(3n)", ":nth-child( 3n )", ":nth-child( +3n )"] do
 562 |         assert Selector.parse(formula) == {:selectors, [{:rules, [
 563 |           {:rule, [{:pseudo_class, {"nth-child", [[a: 3, b: 0]]}}], []}
 564 |         ]}]}
 565 |       end
 566 |     end
 567 | 
 568 |     test "should properly parse even" do
 569 |       for formula <- [":nth-child(even)", ":nth-child( even )", ":nth-child( 2n )"] do
 570 |         assert Selector.parse(formula) == {:selectors, [{:rules, [
 571 |           {:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 0]]}}], []}
 572 |         ]}]}
 573 |       end
 574 |     end
 575 | 
 576 |     test "should properly parse odd" do
 577 |       for formula <- [":nth-child( 2n + 1 )", ":nth-child( odd )"] do
 578 |         assert Selector.parse(formula) == {:selectors, [{:rules, [
 579 |           {:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}
 580 |         ]}]}
 581 |       end
 582 |     end
 583 | 
 584 |     test "should properly handle whitespace" do
 585 |       assert Selector.parse(":lang( en )") == {:selectors, [{:rules, [
 586 |         {:rule, [{:pseudo_class, {"lang", ["en"]}}], []}
 587 |       ]}]}
 588 |     end
 589 | 
 590 |     test "should parse after tag names" do
 591 |       assert Selector.parse("div:link") == {:selectors, [{:rules, [
 592 |         {:rule, [{:tag_name, "div", []}, {:pseudo_class, {"link", []}}], []}
 593 |       ]}]}
 594 |     end
 595 | 
 596 |     test "should parse after IDs" do
 597 |       assert Selector.parse("#id:link") == {:selectors, [{:rules, [
 598 |         {:rule, [{:id, "id"}, {:pseudo_class, {"link", []}}], []}
 599 |       ]}]}
 600 |     end
 601 | 
 602 |     test "should parse after classes" do
 603 |       assert Selector.parse(".class:link") == {:selectors, [{:rules, [
 604 |         {:rule, [{:class, "class"}, {:pseudo_class, {"link", []}}], []}
 605 |       ]}]}
 606 |     end
 607 | 
 608 |     test "should parse nested selectors" do
 609 |       assert Selector.parse(":is(:lang(en), div)") == {:selectors, [{:rules, [
 610 |         {:rule, [{:pseudo_class, {"is", [
 611 |           [
 612 |             {:rules, [{:rule, [{:pseudo_class, {"lang", ["en"]}}], []}]},
 613 |             {:rules, [{:rule, [{:tag_name, "div", []}], []}]}
 614 |           ]
 615 |         ]}}], []}
 616 |       ]}]}
 617 |     end
 618 | 
 619 |     test "should parse after an attribute" do
 620 |       assert Selector.parse("[href]:link") == {:selectors, [{:rules, [
 621 |         {:rule, [{:attribute, {:exists, "href", nil, []}}, {:pseudo_class, {"link", []}}], []}
 622 |       ]}]}
 623 |     end
 624 | 
 625 |     test "should parse after a pseudo-element" do
 626 |       assert Selector.parse("::before:hover") == {:selectors, [{:rules, [
 627 |         {:rule, [{:pseudo_element, {"before", []}}, {:pseudo_class, {"hover", []}}], []}
 628 |       ]}]}
 629 |     end
 630 | 
 631 |     test "should fail on a single hyphen" do
 632 |       assert_raise ArgumentError, "Identifiers cannot consist of a single hyphen.", fn ->
 633 |         Selector.parse(":-")
 634 |       end
 635 |     end
 636 | 
 637 |     test "should fail if argument required but not provided" do
 638 |       assert_raise ArgumentError, "Argument is required for pseudo-class \"not\".", fn ->
 639 |         Selector.parse(":not")
 640 |       end
 641 |     end
 642 | 
 643 |     test "should parse :nth functions" do
 644 |       assert Selector.parse(":nth-child(2n+1)") == {:selectors, [{:rules, [
 645 |         {:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}
 646 |       ]}]}
 647 |     end
 648 | 
 649 |     test "should parse :nth-of-type functions" do
 650 |       assert Selector.parse(":nth-of-type(2n)") == {:selectors, [{:rules, [
 651 |         {:rule, [{:pseudo_class, {"nth-of-type", [[a: 2, b: 0]]}}], []}
 652 |       ]}]}
 653 |     end
 654 | 
 655 |     test "should parse :nth-last-child functions" do
 656 |       assert Selector.parse(":nth-last-child(2n+1)") == {:selectors, [{:rules, [
 657 |         {:rule, [{:pseudo_class, {"nth-last-child", [[a: 2, b: 1]]}}], []}
 658 |       ]}]}
 659 |     end
 660 | 
 661 |     test "should parse :nth-last-of-type functions" do
 662 |       assert Selector.parse(":nth-last-of-type(2n)") == {:selectors, [{:rules, [
 663 |         {:rule, [{:pseudo_class, {"nth-last-of-type", [[a: 2, b: 0]]}}], []}
 664 |       ]}]}
 665 |     end
 666 | 
 667 |     test "should parse :not function with complex selectors" do
 668 |       assert Selector.parse(":not(div.class)") == {:selectors, [{:rules, [
 669 |         {:rule, [{:pseudo_class, {"not", [
 670 |           [{:rules, [{:rule, [{:tag_name, "div", []}, {:class, "class"}], []}]}]
 671 |         ]}}], []}
 672 |       ]}]}
 673 |     end
 674 | 
 675 |     test "should parse :is function" do
 676 |       assert Selector.parse(":is(div, .class)") == {:selectors, [{:rules, [
 677 |         {:rule, [{:pseudo_class, {"is", [
 678 |           [
 679 |             {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
 680 |             {:rules, [{:rule, [{:class, "class"}], []}]}
 681 |           ]
 682 |         ]}}], []}
 683 |       ]}]}
 684 |     end
 685 | 
 686 |     test "should parse :where function" do
 687 |       assert Selector.parse(":where(div, .class)") == {:selectors, [{:rules, [
 688 |         {:rule, [{:pseudo_class, {"where", [
 689 |           [
 690 |             {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
 691 |             {:rules, [{:rule, [{:class, "class"}], []}]}
 692 |           ]
 693 |         ]}}], []}
 694 |       ]}]}
 695 |     end
 696 | 
 697 |     test "should parse :has function" do
 698 |       assert Selector.parse(":has(> div)") == {:selectors, [{:rules, [
 699 |         {:rule, [{:pseudo_class, {"has", [
 700 |           [{:rules, [{:rule, [{:tag_name, "div", []}], combinator: ">"}]}]
 701 |         ]}}], []}
 702 |       ]}]}
 703 |     end
 704 | 
 705 |     test "should parse :matches function" do
 706 |       assert Selector.parse(":matches(div, .class)") == {:selectors, [{:rules, [
 707 |         {:rule, [{:pseudo_class, {"matches", [
 708 |           [
 709 |             {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
 710 |             {:rules, [{:rule, [{:class, "class"}], []}]}
 711 |           ]
 712 |         ]}}], []}
 713 |       ]}]}
 714 |     end
 715 | 
 716 |     test "should parse language pseudo-class" do
 717 |       assert Selector.parse(":lang(en-US)") == {:selectors, [{:rules, [
 718 |         {:rule, [{:pseudo_class, {"lang", ["en-US"]}}], []}
 719 |       ]}]}
 720 |     end
 721 | 
 722 |     test "should parse structural pseudo-classes" do
 723 |       assert Selector.parse(":first-child") == {:selectors, [{:rules, [
 724 |         {:rule, [{:pseudo_class, {"first-child", []}}], []}
 725 |       ]}]}
 726 |       assert Selector.parse(":last-child") == {:selectors, [{:rules, [
 727 |         {:rule, [{:pseudo_class, {"last-child", []}}], []}
 728 |       ]}]}
 729 |       assert Selector.parse(":only-child") == {:selectors, [{:rules, [
 730 |         {:rule, [{:pseudo_class, {"only-child", []}}], []}
 731 |       ]}]}
 732 |       assert Selector.parse(":first-of-type") == {:selectors, [{:rules, [
 733 |         {:rule, [{:pseudo_class, {"first-of-type", []}}], []}
 734 |       ]}]}
 735 |       assert Selector.parse(":last-of-type") == {:selectors, [{:rules, [
 736 |         {:rule, [{:pseudo_class, {"last-of-type", []}}], []}
 737 |       ]}]}
 738 |       assert Selector.parse(":only-of-type") == {:selectors, [{:rules, [
 739 |         {:rule, [{:pseudo_class, {"only-of-type", []}}], []}
 740 |       ]}]}
 741 |     end
 742 | 
 743 |     test "should parse tree-structural pseudo-classes" do
 744 |       assert Selector.parse(":root") == {:selectors, [{:rules, [
 745 |         {:rule, [{:pseudo_class, {"root", []}}], []}
 746 |       ]}]}
 747 |       assert Selector.parse(":empty") == {:selectors, [{:rules, [
 748 |         {:rule, [{:pseudo_class, {"empty", []}}], []}
 749 |       ]}]}
 750 |     end
 751 | 
 752 |     test "should parse UI state pseudo-classes" do
 753 |       assert Selector.parse(":checked") == {:selectors, [{:rules, [
 754 |         {:rule, [{:pseudo_class, {"checked", []}}], []}
 755 |       ]}]}
 756 |       assert Selector.parse(":enabled") == {:selectors, [{:rules, [
 757 |         {:rule, [{:pseudo_class, {"enabled", []}}], []}
 758 |       ]}]}
 759 |       assert Selector.parse(":disabled") == {:selectors, [{:rules, [
 760 |         {:rule, [{:pseudo_class, {"disabled", []}}], []}
 761 |       ]}]}
 762 |       assert Selector.parse(":required") == {:selectors, [{:rules, [
 763 |         {:rule, [{:pseudo_class, {"required", []}}], []}
 764 |       ]}]}
 765 |       assert Selector.parse(":optional") == {:selectors, [{:rules, [
 766 |         {:rule, [{:pseudo_class, {"optional", []}}], []}
 767 |       ]}]}
 768 |       assert Selector.parse(":read-only") == {:selectors, [{:rules, [
 769 |         {:rule, [{:pseudo_class, {"read-only", []}}], []}
 770 |       ]}]}
 771 |       assert Selector.parse(":read-write") == {:selectors, [{:rules, [
 772 |         {:rule, [{:pseudo_class, {"read-write", []}}], []}
 773 |       ]}]}
 774 |       assert Selector.parse(":valid") == {:selectors, [{:rules, [
 775 |         {:rule, [{:pseudo_class, {"valid", []}}], []}
 776 |       ]}]}
 777 |       assert Selector.parse(":invalid") == {:selectors, [{:rules, [
 778 |         {:rule, [{:pseudo_class, {"invalid", []}}], []}
 779 |       ]}]}
 780 |       assert Selector.parse(":in-range") == {:selectors, [{:rules, [
 781 |         {:rule, [{:pseudo_class, {"in-range", []}}], []}
 782 |       ]}]}
 783 |       assert Selector.parse(":out-of-range") == {:selectors, [{:rules, [
 784 |         {:rule, [{:pseudo_class, {"out-of-range", []}}], []}
 785 |       ]}]}
 786 |     end
 787 | 
 788 |     test "should parse target and link pseudo-classes" do
 789 |       assert Selector.parse(":target") == {:selectors, [{:rules, [
 790 |         {:rule, [{:pseudo_class, {"target", []}}], []}
 791 |       ]}]}
 792 |       assert Selector.parse(":link") == {:selectors, [{:rules, [
 793 |         {:rule, [{:pseudo_class, {"link", []}}], []}
 794 |       ]}]}
 795 |       assert Selector.parse(":visited") == {:selectors, [{:rules, [
 796 |         {:rule, [{:pseudo_class, {"visited", []}}], []}
 797 |       ]}]}
 798 |       assert Selector.parse(":hover") == {:selectors, [{:rules, [
 799 |         {:rule, [{:pseudo_class, {"hover", []}}], []}
 800 |       ]}]}
 801 |       assert Selector.parse(":active") == {:selectors, [{:rules, [
 802 |         {:rule, [{:pseudo_class, {"active", []}}], []}
 803 |       ]}]}
 804 |       assert Selector.parse(":focus") == {:selectors, [{:rules, [
 805 |         {:rule, [{:pseudo_class, {"focus", []}}], []}
 806 |       ]}]}
 807 |     end
 808 | 
 809 |     test "should parse CSS Level 4 pseudo-classes" do
 810 |       assert Selector.parse(":any-link") == {:selectors, [{:rules, [
 811 |         {:rule, [{:pseudo_class, {"any-link", []}}], []}
 812 |       ]}]}
 813 |       assert Selector.parse(":focus-within") == {:selectors, [{:rules, [
 814 |         {:rule, [{:pseudo_class, {"focus-within", []}}], []}
 815 |       ]}]}
 816 |       assert Selector.parse(":focus-visible") == {:selectors, [{:rules, [
 817 |         {:rule, [{:pseudo_class, {"focus-visible", []}}], []}
 818 |       ]}]}
 819 |     end
 820 |   end
 821 | 
 822 |   describe "Pseudo Elements" do
 823 |     test "should parse a pseudo-class" do
 824 |       assert Selector.parse("::before") == {:selectors, [{:rules, [
 825 |         {:rule, [{:pseudo_element, {"before", []}}], []}
 826 |       ]}]}
 827 |     end
 828 | 
 829 |     test "should parse a parametrized pseudo-element" do
 830 |       assert Selector.parse("::slotted(span)") == {:selectors, [{:rules, [
 831 |         {:rule, [{:pseudo_element, {"slotted", [[{:rules, [{:rule, [{:tag_name, "span", []}], []}]}]]}}], []}
 832 |       ]}]}
 833 |     end
 834 | 
 835 |     test "should parse pseudo-elements with content" do
 836 |       assert Selector.parse("::after") == {:selectors, [{:rules, [
 837 |         {:rule, [{:pseudo_element, {"after", []}}], []}
 838 |       ]}]}
 839 |     end
 840 | 
 841 |     test "should parse ::before and ::after" do
 842 |       assert Selector.parse("::before") == {:selectors, [{:rules, [
 843 |         {:rule, [{:pseudo_element, {"before", []}}], []}
 844 |       ]}]}
 845 | 
 846 |       assert Selector.parse("::after") == {:selectors, [{:rules, [
 847 |         {:rule, [{:pseudo_element, {"after", []}}], []}
 848 |       ]}]}
 849 |     end
 850 | 
 851 |     test "should parse ::first-line and ::first-letter" do
 852 |       assert Selector.parse("::first-line") == {:selectors, [{:rules, [
 853 |         {:rule, [{:pseudo_element, {"first-line", []}}], []}
 854 |       ]}]}
 855 | 
 856 |       assert Selector.parse("::first-letter") == {:selectors, [{:rules, [
 857 |         {:rule, [{:pseudo_element, {"first-letter", []}}], []}
 858 |       ]}]}
 859 |     end
 860 | 
 861 |     test "should parse modern double-colon syntax" do
 862 |       assert Selector.parse("::selection") == {:selectors, [{:rules, [
 863 |         {:rule, [{:pseudo_element, {"selection", []}}], []}
 864 |       ]}]}
 865 |     end
 866 | 
 867 |     test "should parse legacy single-colon syntax" do
 868 |       assert Selector.parse(":before") == {:selectors, [{:rules, [
 869 |         {:rule, [{:pseudo_element, {"before", []}}], []}
 870 |       ]}]}
 871 |     end
 872 | 
 873 |     test "should parse pseudo-elements with tag names" do
 874 |       assert Selector.parse("div::before") == {:selectors, [{:rules, [
 875 |         {:rule, [{:tag_name, "div", []}, {:pseudo_element, {"before", []}}], []}
 876 |       ]}]}
 877 |     end
 878 | 
 879 |     test "should parse pseudo-elements with class names" do
 880 |       assert Selector.parse(".class::before") == {:selectors, [{:rules, [
 881 |         {:rule, [{:class, "class"}, {:pseudo_element, {"before", []}}], []}
 882 |       ]}]}
 883 |     end
 884 | 
 885 |     test "should parse pseudo-elements with IDs" do
 886 |       assert Selector.parse("#id::before") == {:selectors, [{:rules, [
 887 |         {:rule, [{:id, "id"}, {:pseudo_element, {"before", []}}], []}
 888 |       ]}]}
 889 |     end
 890 | 
 891 |     test "should parse pseudo-elements with attributes" do
 892 |       assert Selector.parse("[attr]::before") == {:selectors, [{:rules, [
 893 |         {:rule, [{:attribute, {:exists, "attr", nil, []}}, {:pseudo_element, {"before", []}}], []}
 894 |       ]}]}
 895 |     end
 896 | 
 897 |     test "should fail on invalid pseudo-element syntax" do
 898 |       assert_raise ArgumentError, "Invalid pseudo-element syntax.", fn ->
 899 |         Selector.parse("::invalid-element")
 900 |       end
 901 |     end
 902 | 
 903 |     test "should handle vendor-specific pseudo-elements" do
 904 |       assert Selector.parse("::-webkit-input-placeholder") == {:selectors, [{:rules, [
 905 |         {:rule, [{:pseudo_element, {"-webkit-input-placeholder", []}}], []}
 906 |       ]}]}
 907 |     end
 908 | 
 909 |     test "should parse CSS Level 4 pseudo-elements" do
 910 |       assert Selector.parse("::placeholder") == {:selectors, [{:rules, [
 911 |         {:rule, [{:pseudo_element, {"placeholder", []}}], []}
 912 |       ]}]}
 913 |       assert Selector.parse("::backdrop") == {:selectors, [{:rules, [
 914 |         {:rule, [{:pseudo_element, {"backdrop", []}}], []}
 915 |       ]}]}
 916 |       assert Selector.parse("::marker") == {:selectors, [{:rules, [
 917 |         {:rule, [{:pseudo_element, {"marker", []}}], []}
 918 |       ]}]}
 919 |       assert Selector.parse("::cue") == {:selectors, [{:rules, [
 920 |         {:rule, [{:pseudo_element, {"cue", []}}], []}
 921 |       ]}]}
 922 |     end
 923 | 
 924 |     # Note: While CSS3 specifies pseudo-elements should be at the end,
 925 |     # this parser allows selectors after pseudo-elements for flexibility
 926 |     # and future compatibility with CSS4 where some pseudo-elements
 927 |     # can be followed by pseudo-classes
 928 |   end
 929 | 
 930 |   describe "Multiple rules" do
 931 |     test "should parse multiple rules" do
 932 |       assert Selector.parse("div,.class") == {:selectors, [
 933 |         {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
 934 |         {:rules, [{:rule, [{:class, "class"}], []}]}
 935 |       ]}
 936 |     end
 937 | 
 938 |     test "should parse comma-separated selectors" do
 939 |       assert Selector.parse("  div  ,  .class  ") == {:selectors, [
 940 |         {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
 941 |         {:rules, [{:rule, [{:class, "class"}], []}]}
 942 |       ]}
 943 |     end
 944 | 
 945 |     test "should handle whitespace in multiple rules" do
 946 |       assert Selector.parse("div, .class, #id") == {:selectors, [
 947 |         {:rules, [{:rule, [{:tag_name, "div", []}], []}]},
 948 |         {:rules, [{:rule, [{:class, "class"}], []}]},
 949 |         {:rules, [{:rule, [{:id, "id"}], []}]}
 950 |       ]}
 951 |     end
 952 | 
 953 |     test "should parse complex multiple rule combinations" do
 954 |       assert_raise ArgumentError, "Expected selector but end of input reached.", fn ->
 955 |         Selector.parse("div, .class,")
 956 |       end
 957 | 
 958 |       assert_raise ArgumentError, "Cannot parse: $", fn ->
 959 |         Selector.parse("div, .class, $")
 960 |       end
 961 |     end
 962 |   end
 963 | 
 964 |   describe "Complex selectors" do
 965 |     test "should parse selectors with all features combined" do
 966 |       assert Selector.parse("ns|tag#id.class1.class2[attr=value]:hover::before") == {:selectors, [{:rules, [
 967 |         {:rule, [
 968 |           {:tag_name, "tag", namespace: "ns"},
 969 |           {:id, "id"},
 970 |           {:class, "class1"},
 971 |           {:class, "class2"},
 972 |           {:attribute, {:equal, "attr", "value", []}},
 973 |           {:pseudo_class, {"hover", []}},
 974 |           {:pseudo_element, {"before", []}}
 975 |         ], []}
 976 |       ]}]}
 977 |     end
 978 | 
 979 |     test "should parse complex selectors with multiple attributes" do
 980 |       assert Selector.parse("div[id][class~=test][data-value^=prefix]") == {:selectors, [{:rules, [
 981 |         {:rule, [
 982 |           {:tag_name, "div", []},
 983 |           {:attribute, {:exists, "id", nil, []}},
 984 |           {:attribute, {:includes, "class", "test", []}},
 985 |           {:attribute, {:prefix, "data-value", "prefix", []}}
 986 |         ], []}
 987 |       ]}]}
 988 |     end
 989 |   end
 990 | 
 991 |   describe "Nested rules" do
 992 |     test "should parse nested rules" do
 993 |       assert Selector.parse("div .class") == {:selectors, [{:rules, [
 994 |         {:rule, [{:tag_name, "div", []}], []},
 995 |         {:rule, [{:class, "class"}], []}
 996 |       ]}]}
 997 |     end
 998 | 
 999 |     test "should parse descendant combinators" do
1000 |       assert Selector.parse("   div   >   .class   ") == {:selectors, [{:rules, [
1001 |         {:rule, [{:tag_name, "div", []}], []},
1002 |         {:rule, [{:class, "class"}], combinator: ">"}
1003 |       ]}]}
1004 |     end
1005 | 
1006 |     test "should parse child combinators" do
1007 |       assert Selector.parse("div>.class") == {:selectors, [{:rules, [
1008 |         {:rule, [{:tag_name, "div", []}], []},
1009 |         {:rule, [{:class, "class"}], combinator: ">"}
1010 |       ]}]}
1011 |     end
1012 | 
1013 |     test "should parse sibling combinators" do
1014 |       assert Selector.parse("div~.class") == {:selectors, [{:rules, [
1015 |         {:rule, [{:tag_name, "div", []}], []},
1016 |         {:rule, [{:class, "class"}], combinator: "~"}
1017 |       ]}]}
1018 |     end
1019 | 
1020 |     test "should parse adjacent sibling combinators" do
1021 |       assert Selector.parse("div+.class") == {:selectors, [{:rules, [
1022 |         {:rule, [{:tag_name, "div", []}], []},
1023 |         {:rule, [{:class, "class"}], combinator: "+"}
1024 |       ]}]}
1025 |     end
1026 | 
1027 |     test "should handle complex nesting patterns" do
1028 |       assert Selector.parse("div||.class") == {:selectors, [{:rules, [
1029 |         {:rule, [{:tag_name, "div", []}], []},
1030 |         {:rule, [{:class, "class"}], combinator: "||"}
1031 |       ]}]}
1032 | 
1033 |       assert Selector.parse("   div   ||   .class   ") == {:selectors, [{:rules, [
1034 |         {:rule, [{:tag_name, "div", []}], []},
1035 |         {:rule, [{:class, "class"}], combinator: "||"}
1036 |       ]}]}
1037 |     end
1038 |   end
1039 | 
1040 |   describe "Edge cases and error handling" do
1041 |     test "should handle various Unicode whitespace" do
1042 |       # Non-breaking space is NOT treated as a combinator in CSS
1043 |       # It's part of the identifier
1044 |       assert Selector.parse("div\u00A0.class") == {:selectors, [{:rules, [
1045 |         {:rule, [{:tag_name, "div\u00A0", []}, {:class, "class"}], []}
1046 |       ]}]}
1047 |     end
1048 | 
1049 |     test "should validate combinator placement" do
1050 |       assert_raise ArgumentError, fn ->
1051 |         Selector.parse("div > > span")
1052 |       end
1053 |       
1054 |       assert_raise ArgumentError, fn ->
1055 |         Selector.parse("> div")
1056 |       end
1057 |     end
1058 | 
1059 |     test "should handle deeply nested selectors" do
1060 |       nested = ":not(:not(:not(:not(:not(.class)))))"
1061 |       assert Selector.parse(nested) == {:selectors, [{:rules, [
1062 |         {:rule, [{:pseudo_class, {"not", [
1063 |           [{:rules, [{:rule, [{:pseudo_class, {"not", [
1064 |             [{:rules, [{:rule, [{:pseudo_class, {"not", [
1065 |               [{:rules, [{:rule, [{:pseudo_class, {"not", [
1066 |                 [{:rules, [{:rule, [{:pseudo_class, {"not", [
1067 |                   [{:rules, [{:rule, [{:class, "class"}], []}]}]
1068 |                 ]}}], []}]}]
1069 |               ]}}], []}]}]
1070 |             ]}}], []}]}]
1071 |           ]}}], []}]}]
1072 |         ]}}], []}
1073 |       ]}]}
1074 |     end
1075 | 
1076 |     test "should handle extremely long identifiers" do
1077 |       # Parser truncates identifiers to 255 characters
1078 |       id = String.duplicate("a", 1000)
1079 |       assert Selector.parse("##{id}") == {:selectors, [{:rules, [
1080 |         {:rule, [{:id, id}], []}
1081 |       ]}]}
1082 |     end
1083 | 
1084 |     test "should parse nth-child with negative coefficients" do
1085 |       assert Selector.parse(":nth-child(-n+3)") == {:selectors, [{:rules, [
1086 |         {:rule, [{:pseudo_class, {"nth-child", [[a: -1, b: 3]]}}], []}
1087 |       ]}]}
1088 |     end
1089 | 
1090 |     test "should handle escape sequences in different contexts" do
1091 |       # Escaped characters in ID
1092 |       assert Selector.parse("#\\31 23") == {:selectors, [{:rules, [
1093 |         {:rule, [{:id, "123"}], []}
1094 |       ]}]}
1095 |       
1096 |       # Escaped characters in class
1097 |       assert Selector.parse(".\\@media") == {:selectors, [{:rules, [
1098 |         {:rule, [{:class, "@media"}], []}
1099 |       ]}]}
1100 |       
1101 |       # Escaped characters in attribute
1102 |       assert Selector.parse("[data-\\@attr]") == {:selectors, [{:rules, [
1103 |         {:rule, [{:attribute, {:exists, "data-@attr", nil, []}}], []}
1104 |       ]}]}
1105 |     end
1106 |   end
1107 | end
1108 | 


--------------------------------------------------------------------------------
/test/selector/parser/guards_test.exs:
--------------------------------------------------------------------------------
   1 | defmodule Selector.Parser.GuardsTest do
   2 |   use ExUnit.Case, async: true
   3 |   import Selector.Parser.Guards
   4 | 
   5 |   describe "is_whitespace/1" do
   6 |     test "recognizes CSS whitespace characters" do
   7 |       assert is_whitespace(0x0009)  # Tab
   8 |       assert is_whitespace(0x000A)  # Line Feed
   9 |       assert is_whitespace(0x000C)  # Form Feed
  10 |       assert is_whitespace(0x000D)  # Carriage Return
  11 |       assert is_whitespace(0x0020)  # Space
  12 |     end
  13 | 
  14 |     test "rejects non-whitespace characters" do
  15 |       refute is_whitespace(?a)
  16 |       refute is_whitespace(?1)
  17 |       refute is_whitespace(?.)
  18 |       refute is_whitespace(0x00A0)  # Non-breaking space (not CSS whitespace)
  19 |     end
  20 |   end
  21 | 
  22 |   describe "is_identifier_start_char/1" do
  23 |     test "accepts ASCII letters" do
  24 |       assert is_identifier_start_char(?a)
  25 |       assert is_identifier_start_char(?z)
  26 |       assert is_identifier_start_char(?A)
  27 |       assert is_identifier_start_char(?Z)
  28 |     end
  29 | 
  30 |     test "accepts underscore" do
  31 |       assert is_identifier_start_char(?_)
  32 |     end
  33 | 
  34 |     test "accepts non-ASCII characters" do
  35 |       assert is_identifier_start_char(0x00C0)  # À
  36 |       assert is_identifier_start_char(0x4E2D)  # 中 (Chinese character)
  37 |     end
  38 | 
  39 |     test "accepts UTF-8 letters from various scripts" do
  40 |       assert is_identifier_start_char(0x00E9)  # é (Latin-1 Supplement)
  41 |       assert is_identifier_start_char(0x0391)  # Α (Greek)
  42 |       assert is_identifier_start_char(0x0410)  # А (Cyrillic)
  43 |       assert is_identifier_start_char(0x05D0)  # א (Hebrew)
  44 |       assert is_identifier_start_char(0x0627)  # ا (Arabic)
  45 |       assert is_identifier_start_char(0x3042)  # あ (Hiragana)
  46 |       assert is_identifier_start_char(0x30A2)  # ア (Katakana)
  47 |       assert is_identifier_start_char(0xAC00)  # 가 (Hangul)
  48 |     end
  49 | 
  50 |     test "rejects digits" do
  51 |       refute is_identifier_start_char(?0)
  52 |       refute is_identifier_start_char(?9)
  53 |       refute is_identifier_start_char(0x0660)  # Arabic-Indic digit
  54 |     end
  55 | 
  56 |     test "rejects hyphens" do
  57 |       refute is_identifier_start_char(?-)
  58 |     end
  59 | 
  60 |     test "rejects ASCII control characters" do
  61 |       refute is_identifier_start_char(0x001F)
  62 |     end
  63 |   end
  64 | 
  65 |   describe "is_identifier_char/1" do
  66 |     test "accepts identifier start characters" do
  67 |       assert is_identifier_char(?a)
  68 |       assert is_identifier_char(?_)
  69 |       assert is_identifier_char(0x00C0)
  70 |     end
  71 | 
  72 |     test "accepts UTF-8 digits" do
  73 |       assert is_identifier_char(?0)
  74 |       assert is_identifier_char(?5)
  75 |       assert is_identifier_char(?9)
  76 |       assert is_identifier_char(0x0660)  # Arabic-Indic digit
  77 |       assert is_identifier_char(0x06F0)  # Extended Arabic-Indic digit
  78 |       assert is_identifier_char(0x0966)  # Devanagari digit
  79 |       assert is_identifier_char(0xFF10)  # Fullwidth digit
  80 |     end
  81 | 
  82 |     test "accepts hyphens" do
  83 |       assert is_identifier_char(?-)
  84 |     end
  85 | 
  86 |     test "rejects special characters" do
  87 |       refute is_identifier_char(?.)
  88 |       refute is_identifier_char(?#)
  89 |       refute is_identifier_char(?@)
  90 |     end
  91 |   end
  92 | 
  93 |   describe "is_string_char/1" do
  94 |     test "accepts regular characters" do
  95 |       assert is_string_char(?a)
  96 |       assert is_string_char(?1)
  97 |       assert is_string_char(?!)
  98 |       assert is_string_char(0x00C0)
  99 |     end
 100 | 
 101 |     test "accepts UTF-8 characters" do
 102 |       assert is_string_char(0x4E2D)  # 中 (Chinese)
 103 |       assert is_string_char(0x0391)  # Α (Greek)
 104 |       assert is_string_char(0x0627)  # ا (Arabic)
 105 |     end
 106 | 
 107 |     test "rejects quote characters" do
 108 |       refute is_string_char(0x0022)  # Double quote
 109 |       refute is_string_char(0x0027)  # Single quote
 110 |     end
 111 | 
 112 |     test "rejects backslash" do
 113 |       refute is_string_char(0x005C)
 114 |     end
 115 | 
 116 |     test "rejects newlines including Unicode newlines" do
 117 |       refute is_string_char(0x000A)  # Line Feed
 118 |       refute is_string_char(0x000C)  # Form Feed
 119 |       refute is_string_char(0x000D)  # Carriage Return
 120 |       refute is_string_char(0x0085)  # Next Line (NEL)
 121 |       refute is_string_char(0x2028)  # Line Separator
 122 |       refute is_string_char(0x2029)  # Paragraph Separator
 123 |     end
 124 |   end
 125 | 
 126 |   describe "is_combinator_char/1" do
 127 |     test "recognizes single-character combinator characters" do
 128 |       assert is_combinator_char(0x003E)  # > (child combinator)
 129 |       assert is_combinator_char(0x002B)  # + (adjacent sibling combinator)
 130 |       assert is_combinator_char(0x007E)  # ~ (general sibling combinator)
 131 |     end
 132 | 
 133 |     test "rejects non-combinator characters" do
 134 |       refute is_combinator_char(?a)
 135 |       refute is_combinator_char(?1)
 136 |       refute is_combinator_char(0x0020)  # Space (descendant combinator handled separately)
 137 |       refute is_combinator_char(?|)      # Pipe (column combinator needs two ||)
 138 |     end
 139 | 
 140 |     test "rejects other special characters" do
 141 |       refute is_combinator_char(?.)
 142 |       refute is_combinator_char(?#)
 143 |       refute is_combinator_char(?:)
 144 |       refute is_combinator_char(?[)
 145 |       refute is_combinator_char(?])
 146 |       refute is_combinator_char(?=)
 147 |     end
 148 |   end
 149 | 
 150 |   describe "is_combinator/1 (backward compatibility)" do
 151 |     test "recognizes combinator characters" do
 152 |       assert is_combinator(0x003E)  # >
 153 |       assert is_combinator(0x002B)  # +
 154 |       assert is_combinator(0x007E)  # ~
 155 |     end
 156 | 
 157 |     test "rejects non-combinator characters" do
 158 |       refute is_combinator(?a)
 159 |       refute is_combinator(?1)
 160 |       refute is_combinator(0x0020)  # Space (descendant combinator handled separately)
 161 |     end
 162 |   end
 163 | 
 164 |   describe "is_delimiter/1" do
 165 |     test "recognizes delimiter characters" do
 166 |       assert is_delimiter(0x0023)  # #
 167 |       assert is_delimiter(0x002E)  # .
 168 |       assert is_delimiter(0x003A)  # :
 169 |       assert is_delimiter(0x005B)  # [
 170 |       assert is_delimiter(0x005D)  # ]
 171 |       assert is_delimiter(0x0028)  # (
 172 |       assert is_delimiter(0x0029)  # )
 173 |       assert is_delimiter(0x002C)  # ,
 174 |       assert is_delimiter(0x0022)  # "
 175 |       assert is_delimiter(0x0027)  # '
 176 |       assert is_delimiter(0x005C)  # \
 177 |     end
 178 | 
 179 |     test "rejects non-delimiter characters" do
 180 |       refute is_delimiter(?a)
 181 |       refute is_delimiter(?1)
 182 |       refute is_delimiter(?=)
 183 |     end
 184 |   end
 185 | 
 186 |   describe "is_attribute_operator_char/1" do
 187 |     test "recognizes attribute operator characters" do
 188 |       assert is_attribute_operator_char(0x003D)  # =
 189 |       assert is_attribute_operator_char(0x007E)  # ~
 190 |       assert is_attribute_operator_char(0x007C)  # |
 191 |       assert is_attribute_operator_char(0x005E)  # ^
 192 |       assert is_attribute_operator_char(0x0024)  # $
 193 |       assert is_attribute_operator_char(0x002A)  # *
 194 |     end
 195 | 
 196 |     test "rejects non-operator characters" do
 197 |       refute is_attribute_operator_char(?a)
 198 |       refute is_attribute_operator_char(?1)
 199 |       refute is_attribute_operator_char(?!)
 200 |     end
 201 |   end
 202 | 
 203 |   describe "is_hex_digit/1" do
 204 |     test "recognizes ASCII hexadecimal digits" do
 205 |       assert is_hex_digit(?0)
 206 |       assert is_hex_digit(?9)
 207 |       assert is_hex_digit(?a)
 208 |       assert is_hex_digit(?f)
 209 |       assert is_hex_digit(?A)
 210 |       assert is_hex_digit(?F)
 211 |     end
 212 | 
 213 |     test "rejects fullwidth hexadecimal digits" do
 214 |       refute is_hex_digit(0xFF10)  # Fullwidth 0
 215 |       refute is_hex_digit(0xFF19)  # Fullwidth 9
 216 |       refute is_hex_digit(0xFF21)  # Fullwidth A
 217 |       refute is_hex_digit(0xFF26)  # Fullwidth F
 218 |       refute is_hex_digit(0xFF41)  # Fullwidth a
 219 |       refute is_hex_digit(0xFF46)  # Fullwidth f
 220 |     end
 221 | 
 222 |     test "rejects non-hex characters" do
 223 |       refute is_hex_digit(?g)
 224 |       refute is_hex_digit(?G)
 225 |       refute is_hex_digit(?!)
 226 |     end
 227 |   end
 228 | 
 229 |   describe "is_escapable_char/1" do
 230 |     test "accepts most characters" do
 231 |       assert is_escapable_char(?a)
 232 |       assert is_escapable_char(?1)
 233 |       assert is_escapable_char(?!)
 234 |       assert is_escapable_char(?#)
 235 |       assert is_escapable_char(0x0020)  # Space
 236 |     end
 237 | 
 238 |     test "accepts UTF-8 characters" do
 239 |       assert is_escapable_char(0x4E2D)  # 中 (Chinese)
 240 |       assert is_escapable_char(0x0391)  # Α (Greek)
 241 |     end
 242 | 
 243 |     test "rejects newlines including Unicode newlines" do
 244 |       refute is_escapable_char(0x000A)  # Line Feed
 245 |       refute is_escapable_char(0x000C)  # Form Feed
 246 |       refute is_escapable_char(0x000D)  # Carriage Return
 247 |       refute is_escapable_char(0x0085)  # Next Line (NEL)
 248 |       refute is_escapable_char(0x2028)  # Line Separator
 249 |       refute is_escapable_char(0x2029)  # Paragraph Separator
 250 |     end
 251 |   end
 252 | 
 253 |   describe "id selector guards" do
 254 |     test "is_id_start_char/1 follows identifier start rules" do
 255 |       assert is_id_start_char(?a)
 256 |       assert is_id_start_char(?_)
 257 |       assert is_id_start_char(0x00C0)
 258 |       assert is_id_start_char(0x4E2D)  # 中 (Chinese)
 259 |       refute is_id_start_char(?1)
 260 |       refute is_id_start_char(?-)
 261 |     end
 262 | 
 263 |     test "is_id_char/1 follows identifier rules" do
 264 |       assert is_id_char(?a)
 265 |       assert is_id_char(?1)
 266 |       assert is_id_char(?-)
 267 |       assert is_id_char(?_)
 268 |       assert is_id_char(0x4E2D)  # 中 (Chinese)
 269 |       assert is_id_char(0x0660)  # Arabic-Indic digit
 270 |       refute is_id_char(?.)
 271 |       refute is_id_char(?#)
 272 |     end
 273 |   end
 274 | 
 275 |   describe "class selector guards" do
 276 |     test "is_class_start_char/1 follows identifier start rules" do
 277 |       assert is_class_start_char(?a)
 278 |       assert is_class_start_char(?_)
 279 |       assert is_class_start_char(0x0391)  # Α (Greek)
 280 |       refute is_class_start_char(?1)
 281 |       refute is_class_start_char(?-)
 282 |     end
 283 | 
 284 |     test "is_class_char/1 follows identifier rules" do
 285 |       assert is_class_char(?a)
 286 |       assert is_class_char(?1)
 287 |       assert is_class_char(?-)
 288 |       assert is_class_char(0x3042)  # あ (Hiragana)
 289 |       refute is_class_char(?.)
 290 |     end
 291 |   end
 292 | 
 293 |   describe "element selector guards" do
 294 |     test "is_tag_name_start_char/1 follows identifier start rules" do
 295 |       assert is_tag_name_start_char(?d)  # div
 296 |       assert is_tag_name_start_char(?s)  # span
 297 |       assert is_tag_name_start_char(0x30A2)  # ア (Katakana)
 298 |       refute is_tag_name_start_char(?1)
 299 |     end
 300 | 
 301 |     test "is_tag_name_char/1 follows identifier rules" do
 302 |       assert is_tag_name_char(?d)
 303 |       assert is_tag_name_char(?1)
 304 |       assert is_tag_name_char(?-)
 305 |       assert is_tag_name_char(0xAC00)  # 가 (Hangul)
 306 |     end
 307 |   end
 308 | 
 309 |   describe "attribute selector guards" do
 310 |     test "is_attribute_name_start_char/1 follows identifier start rules" do
 311 |       assert is_attribute_name_start_char(?c)  # class
 312 |       assert is_attribute_name_start_char(?d)  # data-*
 313 |       assert is_attribute_name_start_char(0x0627)  # ا (Arabic)
 314 |       refute is_attribute_name_start_char(?1)
 315 |     end
 316 | 
 317 |     test "is_attribute_name_char/1 follows identifier rules" do
 318 |       assert is_attribute_name_char(?c)
 319 |       assert is_attribute_name_char(?1)
 320 |       assert is_attribute_name_char(?-)  # data-attribute
 321 |       assert is_attribute_name_char(0x05D0)  # א (Hebrew)
 322 |     end
 323 |   end
 324 | 
 325 |   describe "pseudo selector guards" do
 326 |     test "is_pseudo_name_start_char/1 follows identifier start rules" do
 327 |       assert is_pseudo_name_start_char(?h)  # hover
 328 |       assert is_pseudo_name_start_char(?f)  # first-child
 329 |       assert is_pseudo_name_start_char(0x0410)  # А (Cyrillic)
 330 |       refute is_pseudo_name_start_char(?1)
 331 |     end
 332 | 
 333 |     test "is_pseudo_name_char/1 follows identifier rules" do
 334 |       assert is_pseudo_name_char(?h)
 335 |       assert is_pseudo_name_char(?1)
 336 |       assert is_pseudo_name_char(?-)  # first-child
 337 |       assert is_pseudo_name_char(0x4E2D)  # 中 (Chinese)
 338 |     end
 339 |   end
 340 | 
 341 |   describe "function guards" do
 342 |     test "is_function_name_start_char/1 follows identifier start rules" do
 343 |       assert is_function_name_start_char(?n)  # nth-child
 344 |       assert is_function_name_start_char(?u)  # url
 345 |       assert is_function_name_start_char(0x00E9)  # é
 346 |       refute is_function_name_start_char(?1)
 347 |     end
 348 | 
 349 |     test "is_function_name_char/1 follows identifier rules" do
 350 |       assert is_function_name_char(?n)
 351 |       assert is_function_name_char(?1)
 352 |       assert is_function_name_char(?-)  # nth-child
 353 |       assert is_function_name_char(0x0391)  # Α (Greek)
 354 |     end
 355 |   end
 356 | 
 357 |   describe "is_number_char/1" do
 358 |     test "accepts UTF-8 digits" do
 359 |       assert is_number_char(?0)
 360 |       assert is_number_char(?5)
 361 |       assert is_number_char(?9)
 362 |       assert is_number_char(0x0660)  # Arabic-Indic digit
 363 |       assert is_number_char(0x0966)  # Devanagari digit
 364 |       assert is_number_char(0xFF10)  # Fullwidth digit
 365 |     end
 366 | 
 367 |     test "accepts decimal point" do
 368 |       assert is_number_char(?.)
 369 |     end
 370 | 
 371 |     test "accepts signs" do
 372 |       assert is_number_char(?+)
 373 |       assert is_number_char(?-)
 374 |     end
 375 | 
 376 |     test "accepts scientific notation" do
 377 |       assert is_number_char(?e)
 378 |       assert is_number_char(?E)
 379 |     end
 380 | 
 381 |     test "rejects other characters" do
 382 |       refute is_number_char(?a)
 383 |       refute is_number_char(?!)
 384 |     end
 385 |   end
 386 | 
 387 |   describe "is_number_start_char/1" do
 388 |     test "accepts UTF-8 digits" do
 389 |       assert is_number_start_char(?0)
 390 |       assert is_number_start_char(?9)
 391 |       assert is_number_start_char(0x0660)  # Arabic-Indic digit
 392 |       assert is_number_start_char(0xFF10)  # Fullwidth digit
 393 |     end
 394 | 
 395 |     test "accepts decimal point" do
 396 |       assert is_number_start_char(?.)
 397 |     end
 398 | 
 399 |     test "accepts signs" do
 400 |       assert is_number_start_char(?+)
 401 |       assert is_number_start_char(?-)
 402 |     end
 403 | 
 404 |     test "rejects scientific notation at start" do
 405 |       refute is_number_start_char(?e)
 406 |       refute is_number_start_char(?E)
 407 |     end
 408 |   end
 409 | 
 410 |   describe "is_comment_char/1" do
 411 |     test "accepts most characters" do
 412 |       assert is_comment_char(?a)
 413 |       assert is_comment_char(?1)
 414 |       assert is_comment_char(?!)
 415 |       assert is_comment_char(0x0020)
 416 |     end
 417 | 
 418 |     test "accepts UTF-8 characters" do
 419 |       assert is_comment_char(0x4E2D)  # 中 (Chinese)
 420 |       assert is_comment_char(0x0391)  # Α (Greek)
 421 |     end
 422 | 
 423 |     test "accepts all characters (sequence detection happens at parser level)" do
 424 |       assert is_comment_char(0x002A)  # * (valid individually)
 425 |       assert is_comment_char(0x002F)  # / (valid individually)
 426 |       # Note: The parser must handle */ sequence detection
 427 |     end
 428 |   end
 429 | 
 430 |   describe "is_attribute_value_char/1" do
 431 |     test "accepts regular characters" do
 432 |       assert is_attribute_value_char(?a)
 433 |       assert is_attribute_value_char(?z)
 434 |       assert is_attribute_value_char(?A)
 435 |       assert is_attribute_value_char(?Z)
 436 |       assert is_attribute_value_char(?0)
 437 |       assert is_attribute_value_char(?9)
 438 |     end
 439 | 
 440 |     test "accepts special characters commonly used in attribute values" do
 441 |       assert is_attribute_value_char(?!)
 442 |       assert is_attribute_value_char(?@)
 443 |       assert is_attribute_value_char(?#)
 444 |       assert is_attribute_value_char(?$)
 445 |       assert is_attribute_value_char(?%)
 446 |       assert is_attribute_value_char(?^)
 447 |       assert is_attribute_value_char(?&)
 448 |       assert is_attribute_value_char(?*)
 449 |       assert is_attribute_value_char(?()
 450 |       assert is_attribute_value_char(?))
 451 |       assert is_attribute_value_char(?-)
 452 |       assert is_attribute_value_char(?_)
 453 |       assert is_attribute_value_char(?=)
 454 |       assert is_attribute_value_char(?+)
 455 |       assert is_attribute_value_char(?{)
 456 |       assert is_attribute_value_char(?})
 457 |       assert is_attribute_value_char(?[)
 458 |       assert is_attribute_value_char(?|)
 459 |       assert is_attribute_value_char(?;)
 460 |       assert is_attribute_value_char(?:)
 461 |       assert is_attribute_value_char(?<)
 462 |       assert is_attribute_value_char(?>)
 463 |       assert is_attribute_value_char(?.)
 464 |       assert is_attribute_value_char(?,)
 465 |       assert is_attribute_value_char(?/)
 466 |       assert is_attribute_value_char(??)
 467 |       assert is_attribute_value_char(?`)
 468 |       assert is_attribute_value_char(?~)
 469 |     end
 470 | 
 471 |     test "accepts UTF-8 characters" do
 472 |       assert is_attribute_value_char(0x00C0)  # À
 473 |       assert is_attribute_value_char(0x4E2D)  # 中 (Chinese)
 474 |       assert is_attribute_value_char(0x0391)  # Α (Greek)
 475 |       assert is_attribute_value_char(0x0410)  # А (Cyrillic)
 476 |       assert is_attribute_value_char(0x05D0)  # א (Hebrew)
 477 |       assert is_attribute_value_char(0x0627)  # ا (Arabic)
 478 |       assert is_attribute_value_char(0x3042)  # あ (Hiragana)
 479 |       assert is_attribute_value_char(0x30A2)  # ア (Katakana)
 480 |       assert is_attribute_value_char(0xAC00)  # 가 (Hangul)
 481 |     end
 482 | 
 483 |     test "rejects attribute selector end bracket" do
 484 |       refute is_attribute_value_char(0x005D)  # ]
 485 |     end
 486 | 
 487 |     test "rejects quote characters" do
 488 |       refute is_attribute_value_char(0x0022)  # " (double quote)
 489 |       refute is_attribute_value_char(0x0027)  # ' (single quote)
 490 |     end
 491 | 
 492 |     test "rejects backslash (needs escaping)" do
 493 |       refute is_attribute_value_char(0x005C)  # \
 494 |     end
 495 | 
 496 |     test "rejects newlines including Unicode newlines" do
 497 |       refute is_attribute_value_char(0x000A)  # Line Feed
 498 |       refute is_attribute_value_char(0x000C)  # Form Feed
 499 |       refute is_attribute_value_char(0x000D)  # Carriage Return
 500 |       refute is_attribute_value_char(0x0085)  # Next Line (NEL)
 501 |       refute is_attribute_value_char(0x2028)  # Line Separator
 502 |       refute is_attribute_value_char(0x2029)  # Paragraph Separator
 503 |     end
 504 | 
 505 |     test "rejects whitespace (for unquoted values)" do
 506 |       refute is_attribute_value_char(0x0009)  # Tab
 507 |       refute is_attribute_value_char(0x000A)  # Line Feed
 508 |       refute is_attribute_value_char(0x000C)  # Form Feed
 509 |       refute is_attribute_value_char(0x000D)  # Carriage Return
 510 |       refute is_attribute_value_char(0x0020)  # Space
 511 |     end
 512 |   end
 513 | 
 514 |   describe "is_selector_start_char/1" do
 515 |     test "accepts element name start characters" do
 516 |       assert is_selector_start_char(?d)  # div
 517 |       assert is_selector_start_char(?s)  # span
 518 |       assert is_selector_start_char(?_)  # custom elements
 519 |       assert is_selector_start_char(?|)  # ns
 520 |       assert is_selector_start_char(0x4E2D)  # 中 (Chinese element name)
 521 |     end
 522 | 
 523 |     test "accepts selector prefix characters" do
 524 |       assert is_selector_start_char(?.)  # .class
 525 |       assert is_selector_start_char(?#)  # #id
 526 |       assert is_selector_start_char(?[)  # [attr]
 527 |       assert is_selector_start_char(?:)  # :pseudo (including :is(), :not(), etc.)
 528 |       assert is_selector_start_char(?*)  # * (universal selector)
 529 |     end
 530 | 
 531 |     test "accepts colon for pseudo-class selectors" do
 532 |       # This is a specific test to ensure : works for selectors like :is(div)
 533 |       assert is_selector_start_char(?:)
 534 |     end
 535 | 
 536 |     test "accepts whitespace" do
 537 |       assert is_selector_start_char(0x0020)  # Space
 538 |       assert is_selector_start_char(0x0009)  # Tab
 539 |       assert is_selector_start_char(0x000A)  # Line Feed
 540 |     end
 541 | 
 542 |     test "rejects invalid start characters" do
 543 |       refute is_selector_start_char(?1)   # Numbers can't start selectors
 544 |       refute is_selector_start_char(?-)   # Hyphens can't start selectors
 545 |       refute is_selector_start_char(?!)   # Invalid characters
 546 |       refute is_selector_start_char(?=)   # Operators
 547 |     end
 548 |   end
 549 | 
 550 |   # UTF-8 utility guards are not implemented in the original code
 551 |   # describe "UTF-8 utility guards" do
 552 |   #   # These tests are commented out as the corresponding guards are not implemented
 553 |   #   # in the original code. If you need this functionality, you'll need to implement
 554 |   #   # the guards in Selector.Parser.Guards.
 555 |   # end
 556 | 
 557 |   describe "integration tests" do
 558 |     test "can validate common CSS selector patterns" do
 559 |       # Element selector: "div"
 560 |       assert is_selector_start_char(?d)
 561 |       assert is_tag_name_char(?i)
 562 |       assert is_tag_name_char(?v)
 563 | 
 564 |       # Class selector: ".my-class"
 565 |       assert is_selector_start_char(?.)
 566 |       assert is_class_start_char(?m)
 567 |       assert is_class_char(?y)
 568 |       assert is_class_char(?-)
 569 |       assert is_class_char(?c)
 570 | 
 571 |       # ID selector: "#user_123"
 572 |       assert is_selector_start_char(?#)
 573 |       assert is_id_start_char(?u)
 574 |       assert is_id_char(?s)
 575 |       assert is_id_char(?e)
 576 |       assert is_id_char(?r)
 577 |       assert is_id_char(?_)
 578 |       assert is_id_char(?1)
 579 | 
 580 |       # Pseudo-class: ":nth-child"
 581 |       assert is_selector_start_char(?:)
 582 |       assert is_pseudo_name_start_char(?n)
 583 |       assert is_pseudo_name_char(?t)
 584 |       assert is_pseudo_name_char(?h)
 585 |       assert is_pseudo_name_char(?-)
 586 |       assert is_pseudo_name_char(?c)
 587 | 
 588 |       # Attribute selector: "[data-value='test']"
 589 |       assert is_selector_start_char(?[)
 590 |       assert is_attribute_name_start_char(?d)
 591 |       assert is_attribute_name_char(?a)
 592 |       assert is_attribute_name_char(?t)
 593 |       assert is_attribute_name_char(?a)
 594 |       assert is_attribute_name_char(?-)
 595 |       assert is_attribute_operator_char(?=)
 596 |       assert is_string_char(?t)
 597 |       assert is_string_char(?e)
 598 |       assert is_string_char(?s)
 599 |       assert is_string_char(?t)
 600 |     end
 601 | 
 602 |     test "can validate international CSS selector patterns" do
 603 |       # Chinese element selector: "标题"
 604 |       assert is_selector_start_char(0x6807)  # 标
 605 |       assert is_tag_name_char(0x9898)    # 题
 606 | 
 607 |       # Greek class selector: ".Αλφα"
 608 |       assert is_selector_start_char(?.)
 609 |       assert is_class_start_char(0x0391)     # Α
 610 |       assert is_class_char(0x03BB)           # λ
 611 |       assert is_class_char(0x03C6)           # φ
 612 |       assert is_class_char(0x03B1)           # α
 613 | 
 614 |       # Arabic ID selector: "#مثال"
 615 |       assert is_selector_start_char(?#)
 616 |       assert is_id_start_char(0x0645)        # م
 617 |       assert is_id_char(0x062B)              # ث
 618 |       assert is_id_char(0x0627)              # ا
 619 |       assert is_id_char(0x0644)              # ل
 620 | 
 621 |       # Japanese attribute with Arabic-Indic numbers: "[データ-値='١٢٣']"
 622 |       assert is_selector_start_char(?[)
 623 |       assert is_attribute_name_start_char(0x30C7)  # デ
 624 |       assert is_attribute_name_char(0x30FC)        # ー
 625 |       assert is_attribute_name_char(0x30BF)        # タ
 626 |       assert is_attribute_name_char(?-)
 627 |       assert is_attribute_name_char(0x5024)        # 値
 628 |       assert is_attribute_operator_char(?=)
 629 |       assert is_string_char(0x0661)                # ١ (Arabic-Indic digit 1)
 630 |       assert is_string_char(0x0662)                # ٢ (Arabic-Indic digit 2)
 631 |       assert is_string_char(0x0663)                # ٣ (Arabic-Indic digit 3)
 632 |     end
 633 |   end
 634 | 
 635 |   describe "is_pseudo_start_char/1" do
 636 |     test "accepts ASCII letters as first character" do
 637 |       assert is_pseudo_start_char(?a)
 638 |       assert is_pseudo_start_char(?z)
 639 |       assert is_pseudo_start_char(?A)
 640 |       assert is_pseudo_start_char(?Z)
 641 |     end
 642 | 
 643 |     test "accepts underscore as first character" do
 644 |       assert is_pseudo_start_char(?_)
 645 |     end
 646 | 
 647 |     test "accepts hyphen as first character (for vendor prefixes)" do
 648 |       assert is_pseudo_start_char(?-)  # -webkit-scrollbar, -moz-placeholder, etc.
 649 |     end
 650 | 
 651 |     test "accepts non-ASCII characters as first character" do
 652 |       assert is_pseudo_start_char(0x00C0)  # À
 653 |       assert is_pseudo_start_char(0x4E2D)  # 中 (Chinese character)
 654 |       assert is_pseudo_start_char(0x30D2)  # ヒ (Katakana)
 655 |     end
 656 | 
 657 |     test "rejects digits as first character" do
 658 |       refute is_pseudo_start_char(?0)
 659 |       refute is_pseudo_start_char(?9)
 660 |     end
 661 | 
 662 |     test "rejects other special characters as first character" do
 663 |       refute is_pseudo_start_char(?()
 664 |       refute is_pseudo_start_char(?))
 665 |       refute is_pseudo_start_char(?\s)
 666 |       refute is_pseudo_start_char(?.)
 667 |       refute is_pseudo_start_char(?@)
 668 |     end
 669 |   end
 670 | 
 671 |   describe "is_pseudo_char/1" do
 672 |     test "accepts all valid start characters" do
 673 |       assert is_pseudo_char(?a)
 674 |       assert is_pseudo_char(?Z)
 675 |       assert is_pseudo_char(?-)
 676 |       assert is_pseudo_char(?_)
 677 |       assert is_pseudo_char(0x4E2D)  # 中 (Chinese character)
 678 |     end
 679 | 
 680 |     test "additionally accepts digits" do
 681 |       assert is_pseudo_char(?0)
 682 |       assert is_pseudo_char(?9)
 683 |     end
 684 | 
 685 |     test "rejects parentheses (not part of pseudo-class name)" do
 686 |       refute is_pseudo_char(?()
 687 |       refute is_pseudo_char(?))
 688 |     end
 689 | 
 690 |     test "rejects whitespace (not part of pseudo-class name)" do
 691 |       refute is_pseudo_char(?\s)
 692 |       refute is_pseudo_char(0x0009)  # Tab
 693 |     end
 694 | 
 695 |     test "rejects invalid characters" do
 696 |       refute is_pseudo_char(?@)
 697 |       refute is_pseudo_char(?[)
 698 |       refute is_pseudo_char(?])
 699 |       refute is_pseudo_char(?{)
 700 |       refute is_pseudo_char(?})
 701 |       refute is_pseudo_char(?=)
 702 |       refute is_pseudo_char(?~)
 703 |       refute is_pseudo_char(?+)  # Plus sign not part of name
 704 |     end
 705 |   end
 706 | 
 707 |   describe "pseudo-class examples" do
 708 |     test ":hover example" do
 709 |       assert is_pseudo_start_char(?h)
 710 |       assert is_pseudo_char(?o)
 711 |       assert is_pseudo_char(?v)
 712 |       assert is_pseudo_char(?e)
 713 |       assert is_pseudo_char(?r)
 714 |     end
 715 | 
 716 |     test ":nth-child(2n+1) example" do
 717 |       assert is_pseudo_start_char(?n)
 718 |       assert is_pseudo_char(?t)
 719 |       assert is_pseudo_char(?h)
 720 |       assert is_pseudo_char(?-)
 721 |       assert is_pseudo_char(?c)
 722 |       assert is_pseudo_char(?h)
 723 |       assert is_pseudo_char(?i)
 724 |       assert is_pseudo_char(?l)
 725 |       assert is_pseudo_char(?d)
 726 |       # Note: The parentheses and content are NOT part of the pseudo-class name
 727 |       # They would be parsed separately as functional notation
 728 |       refute is_pseudo_char(?()
 729 |       # The following would be parsed as part of the argument, not the name
 730 |       refute is_pseudo_char(?+)
 731 |       refute is_pseudo_char(?))
 732 |     end
 733 | 
 734 |     test ":lang(fr) example" do
 735 |       assert is_pseudo_start_char(?l)
 736 |       assert is_pseudo_char(?a)
 737 |       assert is_pseudo_char(?n)
 738 |       assert is_pseudo_char(?g)
 739 |       # Parentheses are not part of the pseudo-class name
 740 |       refute is_pseudo_char(?()
 741 |       refute is_pseudo_char(?))
 742 |     end
 743 |   end
 744 | 
 745 |   describe "is_lang_char/1" do
 746 |     test "accepts ASCII letters" do
 747 |       assert is_lang_char(?a)
 748 |       assert is_lang_char(?z)
 749 |       assert is_lang_char(?A)
 750 |       assert is_lang_char(?Z)
 751 |     end
 752 | 
 753 |     test "accepts ASCII digits" do
 754 |       assert is_lang_char(?0)
 755 |       assert is_lang_char(?9)
 756 |     end
 757 | 
 758 |     test "accepts hyphen as separator" do
 759 |       assert is_lang_char(?-)
 760 |     end
 761 | 
 762 |     test "rejects non-ASCII letters" do
 763 |       refute is_lang_char(0x00E9)  # é
 764 |       refute is_lang_char(0x4E2D)  # 中
 765 |       refute is_lang_char(0x0391)  # Α (Greek)
 766 |     end
 767 | 
 768 |     test "rejects non-ASCII digits" do
 769 |       refute is_lang_char(0x0660)  # Arabic-Indic digit
 770 |       refute is_lang_char(0xFF10)  # Fullwidth digit
 771 |     end
 772 | 
 773 |     test "rejects other characters" do
 774 |       refute is_lang_char(?_)
 775 |       refute is_lang_char(?.)
 776 |       refute is_lang_char(?@)
 777 |       refute is_lang_char(?!)
 778 |       refute is_lang_char(?\s)
 779 |     end
 780 |   end
 781 | 
 782 |   describe "is_lang_start_char/1" do
 783 |     test "accepts ASCII letters" do
 784 |       assert is_lang_start_char(?a)
 785 |       assert is_lang_start_char(?z)
 786 |       assert is_lang_start_char(?A)
 787 |       assert is_lang_start_char(?Z)
 788 |     end
 789 | 
 790 |     test "rejects digits" do
 791 |       refute is_lang_start_char(?0)
 792 |       refute is_lang_start_char(?9)
 793 |     end
 794 | 
 795 |     test "rejects hyphen" do
 796 |       refute is_lang_start_char(?-)
 797 |     end
 798 | 
 799 |     test "rejects non-ASCII characters" do
 800 |       refute is_lang_start_char(0x00E9)  # é
 801 |       refute is_lang_start_char(0x4E2D)  # 中
 802 |     end
 803 |   end
 804 | 
 805 |   describe "language tag examples" do
 806 |     test "simple language codes" do
 807 |       # "en"
 808 |       assert is_lang_start_char(?e)
 809 |       assert is_lang_char(?n)
 810 |       
 811 |       # "fr"
 812 |       assert is_lang_start_char(?f)
 813 |       assert is_lang_char(?r)
 814 |     end
 815 | 
 816 |     test "language with region codes" do
 817 |       # "en-US"
 818 |       assert is_lang_start_char(?e)
 819 |       assert is_lang_char(?n)
 820 |       assert is_lang_char(?-)
 821 |       assert is_lang_char(?U)
 822 |       assert is_lang_char(?S)
 823 |       
 824 |       # "pt-BR"
 825 |       assert is_lang_start_char(?p)
 826 |       assert is_lang_char(?t)
 827 |       assert is_lang_char(?-)
 828 |       assert is_lang_char(?B)
 829 |       assert is_lang_char(?R)
 830 |     end
 831 | 
 832 |     test "complex language tags" do
 833 |       # "zh-Hans-CN" (Chinese, Simplified script, China)
 834 |       for char <- String.to_charlist("zh-Hans-CN") do
 835 |         assert is_lang_char(char)
 836 |       end
 837 |       
 838 |       # "en-GB-oed" (English, Great Britain, Oxford English Dictionary spelling)
 839 |       for char <- String.to_charlist("en-GB-oed") do
 840 |         assert is_lang_char(char)
 841 |       end
 842 |     end
 843 |   end
 844 | 
 845 |   describe "is_selector_char/1" do
 846 |     test "accepts all identifier characters" do
 847 |       assert is_selector_char(?a)
 848 |       assert is_selector_char(?Z)
 849 |       assert is_selector_char(?0)
 850 |       assert is_selector_char(?9)
 851 |       assert is_selector_char(?_)
 852 |       assert is_selector_char(?-)
 853 |       assert is_selector_char(0x4E2D)  # 中 (Chinese)
 854 |       assert is_selector_char(0x0391)  # Α (Greek)
 855 |     end
 856 | 
 857 |     test "accepts all delimiter characters" do
 858 |       assert is_selector_char(?#)  # ID selector
 859 |       assert is_selector_char(?.)  # Class selector
 860 |       assert is_selector_char(?:)  # Pseudo-class/element
 861 |       assert is_selector_char(?[)  # Attribute start
 862 |       assert is_selector_char(?])  # Attribute end
 863 |       assert is_selector_char(?()  # Function start
 864 |       assert is_selector_char(?))  # Function end
 865 |       assert is_selector_char(?,)  # Selector separator
 866 |       assert is_selector_char(?")  # String delimiter
 867 |       assert is_selector_char(?')  # String delimiter
 868 |       assert is_selector_char(?\\) # Escape character
 869 |     end
 870 | 
 871 |     test "accepts all combinator characters" do
 872 |       assert is_selector_char(?>)  # Child combinator
 873 |       assert is_selector_char(?+)  # Adjacent sibling
 874 |       assert is_selector_char(?~)  # General sibling
 875 |     end
 876 | 
 877 |     test "accepts whitespace characters" do
 878 |       assert is_selector_char(0x0009)  # Tab
 879 |       assert is_selector_char(0x000A)  # Line Feed
 880 |       assert is_selector_char(0x000C)  # Form Feed
 881 |       assert is_selector_char(0x000D)  # Carriage Return
 882 |       assert is_selector_char(0x0020)  # Space
 883 |     end
 884 | 
 885 |     test "accepts attribute operator characters" do
 886 |       assert is_selector_char(?=)  # Equal
 887 |       assert is_selector_char(?~)  # Includes (~=)
 888 |       assert is_selector_char(?|)  # Dash match (|=)
 889 |       assert is_selector_char(?^)  # Prefix (^=)
 890 |       assert is_selector_char(?$)  # Suffix ($=)
 891 |       assert is_selector_char(?*)  # Substring (*=)
 892 |     end
 893 | 
 894 |     test "accepts special selector characters" do
 895 |       assert is_selector_char(?*)  # Universal selector
 896 |       assert is_selector_char(?|)  # Namespace separator
 897 |       assert is_selector_char(?!)  # For :not()
 898 |     end
 899 | 
 900 |     test "accepts common punctuation for attribute values and strings" do
 901 |       assert is_selector_char(?/)
 902 |       assert is_selector_char(??)
 903 |       assert is_selector_char(?&)
 904 |       assert is_selector_char(?%)
 905 |       assert is_selector_char(?@)
 906 |       assert is_selector_char(?;)
 907 |       assert is_selector_char(?{)
 908 |       assert is_selector_char(?})
 909 |       assert is_selector_char(?<)
 910 |       assert is_selector_char(?>)
 911 |       assert is_selector_char(?`)
 912 |     end
 913 | 
 914 |     test "accepts UTF-8 characters from various scripts" do
 915 |       assert is_selector_char(0x00E9)  # é (Latin-1 Supplement)
 916 |       assert is_selector_char(0x0410)  # А (Cyrillic)
 917 |       assert is_selector_char(0x05D0)  # א (Hebrew)
 918 |       assert is_selector_char(0x0627)  # ا (Arabic)
 919 |       assert is_selector_char(0x3042)  # あ (Hiragana)
 920 |       assert is_selector_char(0x30A2)  # ア (Katakana)
 921 |       assert is_selector_char(0xAC00)  # 가 (Hangul)
 922 |       assert is_selector_char(0x0660)  # ٠ (Arabic-Indic digit)
 923 |       assert is_selector_char(0xFF10)  # ０ (Fullwidth digit)
 924 |     end
 925 | 
 926 |     test "accepts printable ASCII characters" do
 927 |       for codepoint <- 0x0021..0x007E do
 928 |         assert is_selector_char(codepoint), "Failed for codepoint #{codepoint} (#{<<codepoint::utf8>>})"
 929 |       end
 930 |     end
 931 | 
 932 |     test "rejects null character" do
 933 |       refute is_selector_char(0x0000)
 934 |     end
 935 | 
 936 |     test "rejects control characters below space (except whitespace)" do
 937 |       refute is_selector_char(0x0001)
 938 |       refute is_selector_char(0x0002)
 939 |       refute is_selector_char(0x0007)  # Bell
 940 |       refute is_selector_char(0x0008)  # Backspace
 941 |       refute is_selector_char(0x000B)  # Vertical Tab (not CSS whitespace)
 942 |       refute is_selector_char(0x000E)
 943 |       refute is_selector_char(0x000F)
 944 |       refute is_selector_char(0x001F)
 945 |     end
 946 | 
 947 |     test "rejects DEL character" do
 948 |       refute is_selector_char(0x007F)
 949 |     end
 950 | 
 951 |     test "rejects surrogate codepoints" do
 952 |       refute is_selector_char(0xD800)
 953 |       refute is_selector_char(0xDBFF)
 954 |       refute is_selector_char(0xDC00)
 955 |       refute is_selector_char(0xDFFF)
 956 |     end
 957 | 
 958 |     test "accepts non-breaking space and other Unicode spaces" do
 959 |       assert is_selector_char(0x00A0)  # Non-breaking space
 960 |       assert is_selector_char(0x2000)  # En quad
 961 |       assert is_selector_char(0x3000)  # Ideographic space
 962 |     end
 963 | 
 964 |     test "is_nth_formula_char/1 accepts ASCII digits" do
 965 |       assert is_nth_formula_char(?0)
 966 |       assert is_nth_formula_char(?5)
 967 |       assert is_nth_formula_char(?9)
 968 |     end
 969 | 
 970 |     test "is_nth_formula_char/1 accepts variable n (case-insensitive)" do
 971 |       assert is_nth_formula_char(?n)
 972 |       assert is_nth_formula_char(?N)
 973 |     end
 974 | 
 975 |     test "is_nth_formula_char/1 accepts letters for odd/even keywords (case-insensitive)" do
 976 |       assert is_nth_formula_char(?o)  # odd
 977 |       assert is_nth_formula_char(?O)
 978 |       assert is_nth_formula_char(?d)  # odd  
 979 |       assert is_nth_formula_char(?D)
 980 |       assert is_nth_formula_char(?e)  # even
 981 |       assert is_nth_formula_char(?E)
 982 |       assert is_nth_formula_char(?v)  # even
 983 |       assert is_nth_formula_char(?V)
 984 |     end
 985 | 
 986 |     test "is_nth_formula_char/1 accepts operators and signs" do
 987 |       assert is_nth_formula_char(?+)
 988 |       assert is_nth_formula_char(?-)
 989 |     end
 990 | 
 991 |     test "is_nth_formula_char/1 accepts CSS whitespace" do
 992 |       assert is_nth_formula_char(0x0020)  # Space
 993 |       assert is_nth_formula_char(0x0009)  # Tab
 994 |       assert is_nth_formula_char(0x000A)  # Line Feed
 995 |       assert is_nth_formula_char(0x000C)  # Form Feed
 996 |       assert is_nth_formula_char(0x000D)  # Carriage Return
 997 |     end
 998 | 
 999 |     test "is_nth_formula_char/1 rejects other letters" do
1000 |       refute is_nth_formula_char(?a)
1001 |       refute is_nth_formula_char(?z)
1002 |       refute is_nth_formula_char(?A)
1003 |       refute is_nth_formula_char(?Z)
1004 |       refute is_nth_formula_char(?m)
1005 |       refute is_nth_formula_char(?x)
1006 |     end
1007 | 
1008 |     test "is_nth_formula_char/1 rejects special characters not in nth-formulas" do
1009 |       refute is_nth_formula_char(?.)
1010 |       refute is_nth_formula_char(?#)
1011 |       refute is_nth_formula_char(?*)
1012 |       refute is_nth_formula_char(?/)
1013 |       refute is_nth_formula_char(?=)
1014 |       refute is_nth_formula_char(?!)
1015 |       refute is_nth_formula_char(?()
1016 |       refute is_nth_formula_char(?))
1017 |     end
1018 | 
1019 |     test "is_nth_formula_char/1 rejects non-ASCII digits" do
1020 |       refute is_nth_formula_char(0x0660)  # Arabic-Indic digit
1021 |       refute is_nth_formula_char(0xFF10)  # Fullwidth digit
1022 |     end
1023 | 
1024 |     test "is_nth_formula_char/1 rejects Unicode letters" do
1025 |       refute is_nth_formula_char(0x00E9)  # é
1026 |       refute is_nth_formula_char(0x4E2D)  # 中
1027 |     end
1028 | 
1029 |     test "is_nth_formula_starting_char/1 accepts ASCII digits as starting characters" do
1030 |       assert is_nth_formula_starting_char(?0)
1031 |       assert is_nth_formula_starting_char(?1)
1032 |       assert is_nth_formula_starting_char(?9)
1033 |     end
1034 | 
1035 |     test "is_nth_formula_starting_char/1 accepts signs as starting characters" do
1036 |       assert is_nth_formula_starting_char(?+)  # +2n+1, +n
1037 |       assert is_nth_formula_starting_char(?-)  # -n+3, -2n
1038 |     end
1039 | 
1040 |     test "is_nth_formula_starting_char/1 accepts variable n as starting character (case-insensitive)" do
1041 |       assert is_nth_formula_starting_char(?n)  # n+1, n
1042 |       assert is_nth_formula_starting_char(?N)
1043 |     end
1044 | 
1045 |     test "is_nth_formula_starting_char/1 accepts keyword starting letters (case-insensitive)" do
1046 |       assert is_nth_formula_starting_char(?o)  # odd
1047 |       assert is_nth_formula_starting_char(?O)
1048 |       assert is_nth_formula_starting_char(?e)  # even
1049 |       assert is_nth_formula_starting_char(?E)
1050 |     end
1051 | 
1052 |     test "is_nth_formula_starting_char/1 accepts leading CSS whitespace" do
1053 |       assert is_nth_formula_starting_char(0x0020)  # Space
1054 |       assert is_nth_formula_starting_char(0x0009)  # Tab
1055 |       assert is_nth_formula_starting_char(0x000A)  # Line Feed
1056 |       assert is_nth_formula_starting_char(0x000C)  # Form Feed
1057 |       assert is_nth_formula_starting_char(0x000D)  # Carriage Return
1058 |     end
1059 | 
1060 |     test "is_nth_formula_starting_char/1 rejects letters that cannot start nth-formulas" do
1061 |       refute is_nth_formula_starting_char(?d)  # 'd' can appear in "odd" but not start
1062 |       refute is_nth_formula_starting_char(?v)  # 'v' can appear in "even" but not start
1063 |       refute is_nth_formula_starting_char(?a)
1064 |       refute is_nth_formula_starting_char(?z)
1065 |       refute is_nth_formula_starting_char(?m)
1066 |     end
1067 | 
1068 |     test "is_nth_formula_starting_char/1 rejects special characters" do
1069 |       refute is_nth_formula_starting_char(?.)
1070 |       refute is_nth_formula_starting_char(?#)
1071 |       refute is_nth_formula_starting_char(?*)
1072 |       refute is_nth_formula_starting_char(?()
1073 |       refute is_nth_formula_starting_char(?))
1074 |       refute is_nth_formula_starting_char(?=)
1075 |     end
1076 | 
1077 |     test "is_nth_formula_starting_char/1 rejects non-ASCII digits" do
1078 |       refute is_nth_formula_starting_char(0x0660)  # Arabic-Indic digit
1079 |       refute is_nth_formula_starting_char(0xFF10)  # Fullwidth digit  
1080 |     end
1081 | 
1082 |     test "nth-formula validates simple integer formulas" do
1083 |       # "5"
1084 |       assert is_nth_formula_starting_char(?5)
1085 |       
1086 |       # "0"  
1087 |       assert is_nth_formula_starting_char(?0)
1088 |     end
1089 | 
1090 |     test "nth-formula validates keyword formulas" do
1091 |       # "odd"
1092 |       assert is_nth_formula_starting_char(?o)
1093 |       assert is_nth_formula_char(?d)
1094 |       assert is_nth_formula_char(?d)
1095 |       
1096 |       # "even"
1097 |       assert is_nth_formula_starting_char(?e)
1098 |       assert is_nth_formula_char(?v)
1099 |       assert is_nth_formula_char(?e)
1100 |       assert is_nth_formula_char(?n)
1101 |     end
1102 | 
1103 |     test "nth-formula validates An+B formulas" do
1104 |       # "2n+1"
1105 |       assert is_nth_formula_starting_char(?2)
1106 |       assert is_nth_formula_char(?n)
1107 |       assert is_nth_formula_char(?+)
1108 |       assert is_nth_formula_char(?1)
1109 |       
1110 |       # "-n+3"
1111 |       assert is_nth_formula_starting_char(?-)
1112 |       assert is_nth_formula_char(?n)
1113 |       assert is_nth_formula_char(?+)
1114 |       assert is_nth_formula_char(?3)
1115 |       
1116 |       # "3n-2"
1117 |       assert is_nth_formula_starting_char(?3)
1118 |       assert is_nth_formula_char(?n)
1119 |       assert is_nth_formula_char(?-)
1120 |       assert is_nth_formula_char(?2)
1121 |     end
1122 | 
1123 |     test "nth-formula validates formulas with whitespace" do
1124 |       # " 2n + 1 " (with spaces)
1125 |       assert is_nth_formula_starting_char(0x0020)  # Leading space
1126 |       assert is_nth_formula_char(?2)
1127 |       assert is_nth_formula_char(?n)
1128 |       assert is_nth_formula_char(0x0020)  # Space before +
1129 |       assert is_nth_formula_char(?+)
1130 |       assert is_nth_formula_char(0x0020)  # Space after +
1131 |       assert is_nth_formula_char(?1)
1132 |       assert is_nth_formula_char(0x0020)  # Trailing space
1133 |     end
1134 | 
1135 |     test "nth-formula validates n-only formulas" do
1136 |       # "n"
1137 |       assert is_nth_formula_starting_char(?n)
1138 |       
1139 |       # "+n"
1140 |       assert is_nth_formula_starting_char(?+)
1141 |       assert is_nth_formula_char(?n)
1142 |       
1143 |       # "-n"
1144 |       assert is_nth_formula_starting_char(?-)
1145 |       assert is_nth_formula_char(?n)
1146 |     end
1147 | 
1148 |     test "nth-formula validates coefficient-only formulas" do
1149 |       # "2n"
1150 |       assert is_nth_formula_starting_char(?2)
1151 |       assert is_nth_formula_char(?n)
1152 |       
1153 |       # "-3n"
1154 |       assert is_nth_formula_starting_char(?-)
1155 |       assert is_nth_formula_char(?3)
1156 |       assert is_nth_formula_char(?n)
1157 |     end
1158 | 
1159 |     test "comprehensive selector examples" do
1160 |       # Simple selector: div.class#id
1161 |       for char <- String.to_charlist("div.class#id") do
1162 |         assert is_selector_char(char)
1163 |       end
1164 | 
1165 |       # Complex selector: [data-value~="test"]:nth-child(2n+1)
1166 |       for char <- String.to_charlist("[data-value~=\"test\"]:nth-child(2n+1)") do
1167 |         assert is_selector_char(char)
1168 |       end
1169 | 
1170 |       # International selector: .クラス#标识符[атрибут="القيمة"]
1171 |       for char <- String.to_charlist(".クラス#标识符[атрибут=\"القيمة\"]") do
1172 |         assert is_selector_char(char)
1173 |       end
1174 | 
1175 |       # Namespace and combinators: ns|element > .class + #id ~ [attr]
1176 |       for char <- String.to_charlist("ns|element > .class + #id ~ [attr]") do
1177 |         assert is_selector_char(char)
1178 |       end
1179 |     end
1180 |   end
1181 | end
1182 | 


--------------------------------------------------------------------------------