├── .gitattributes ├── clss-logo.png ├── docs ├── clss-logo.png └── index.html ├── README.md ├── clss.asd ├── LICENSE ├── package.lisp ├── selector.lisp ├── pseudo-selectors.lisp ├── parser.lisp └── engine.lisp /.gitattributes: -------------------------------------------------------------------------------- 1 | doc/ linguist-vendored 2 | -------------------------------------------------------------------------------- /clss-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shinmera/CLSS/master/clss-logo.png -------------------------------------------------------------------------------- /docs/clss-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shinmera/CLSS/master/docs/clss-logo.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This repository has [moved](https://shinmera.com/projects/CLSS)! 2 | Due to Microsoft's continued enshittification of the platform this repository has been moved to [Codeberg](https://shinmera.com/projects/CLSS) in August of 2025. It will not receive further updates or patches. **Issues and pull requests will not be looked at here either**, please submit your patches and issue tickets on Codeberg, or send them directly via good old email patches to [shirakumo@tymoon.eu](mailto:shirakumo@tymoon.eu). 3 | 4 | Thanks. -------------------------------------------------------------------------------- /clss.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem clss 2 | :name "CSS Like Simple Selectors" 3 | :version "0.3.1" 4 | :license "zlib" 5 | :author "Yukari Hafner " 6 | :maintainer "Yukari Hafner " 7 | :description "A DOM tree searching engine based on CSS selectors." 8 | :homepage "https://shinmera.com/docs/CLSS/" 9 | :bug-tracker "https://shinmera.com/project/CLSS/issues" 10 | :source-control (:git "https://shinmera.com/project/CLSS.git") 11 | :serial T 12 | :components ((:file "package") 13 | (:file "selector") 14 | (:file "parser") 15 | (:file "engine") 16 | (:file "pseudo-selectors")) 17 | :depends-on (:array-utils 18 | :plump)) 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Yukari Hafner 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 2. Altered source versions must be plainly marked as such, and must not be 16 | misrepresented as being the original software. 17 | 3. This notice may not be removed or altered from any source distribution. 18 | -------------------------------------------------------------------------------- /package.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:cl) 2 | (defpackage #:CLSS 3 | (:nicknames #:org.shirakumo.clss) 4 | (:use #:cl #:plump) 5 | (:shadow #:read-name #:read-attribute #:read-attribute-value) 6 | ;; engine.lisp 7 | (:export 8 | #:pseudo-selector 9 | #:remove-pseudo-selector 10 | #:define-pseudo-selector 11 | 12 | #:pseudo-selector-not-available 13 | #:name 14 | 15 | #:undefined-pseudo-selector 16 | #:name 17 | 18 | #:selector-malformed 19 | #:selector 20 | 21 | #:css-escape 22 | #:css-unescape 23 | 24 | #:match-constraint 25 | #:match-matcher 26 | #:match-pair 27 | #:match-group 28 | #:match-selector 29 | 30 | #:select 31 | #:ordered-select 32 | 33 | #:match-group-backwards 34 | #:node-matches-p) 35 | ;; parser.lisp 36 | (:export 37 | #:parse-selector) 38 | ;; selector.lisp 39 | (:export 40 | #:make-selector 41 | #:make-clss-matcher 42 | #:make-any-constraint 43 | #:make-tag-constraint 44 | #:make-type-constraint 45 | #:make-id-constraint 46 | #:make-class-constraint 47 | #:make-attribute-constraint 48 | #:make-pseudo-constraint)) 49 | -------------------------------------------------------------------------------- /selector.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:org.shirakumo.clss) 2 | 3 | (defun make-selector (&rest groups) 4 | `(:selector ,@groups)) 5 | 6 | (defun make-group (&rest matches-and-ops) 7 | `(:group ,@matches-and-ops)) 8 | 9 | (defun make-clss-matcher (&rest constraints) 10 | `(:matcher ,@constraints)) 11 | 12 | (defun make-any-constraint () 13 | `(:c-any)) 14 | 15 | (defun make-tag-constraint (tag) 16 | `(:c-tag ,tag)) 17 | 18 | (defun make-type-constraint (name) 19 | (let ((type (or (find-symbol (string-upcase name) "PLUMP-DOM") 20 | (find-symbol (string-upcase name)) 21 | (error "No such PLUMP-DOM class: ~s" name)))) 22 | (or (subtypep type 'plump-dom:node) 23 | (error "~s is not a PLUMP-DOM:NODE subclass." name)) 24 | `(:c-type ,type))) 25 | 26 | (defun make-id-constraint (id) 27 | `(:c-id ,id)) 28 | 29 | (defun make-class-constraint (class) 30 | `(:c-class ,class)) 31 | 32 | (defun make-attribute-constraint (attribute &optional value (comparator :=)) 33 | (if (and value comparator) 34 | `(:c-attr-equals ,comparator ,attribute ,value) 35 | `(:c-attr-exists ,attribute))) 36 | 37 | (defun make-pseudo-constraint (function &rest args) 38 | `(:c-pseudo ,function ,@args)) 39 | -------------------------------------------------------------------------------- /pseudo-selectors.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:org.shirakumo.clss) 2 | 3 | (define-pseudo-selector root (node) 4 | (root-p node)) 5 | 6 | (defun match-nth (i n) 7 | (cond ((string-equal n "odd") (oddp i)) 8 | ((string-equal n "even") (evenp i)) 9 | ((find #\n n) 10 | (let* ((npos (position #\n n)) 11 | (mult (cond ((= 0 npos) 1) 12 | ((and (= 1 npos) (char= #\- (char n 0))) -1) 13 | (T (parse-integer n :end npos)))) 14 | (off (if (< (1+ npos) (length n)) 15 | (parse-integer n :start (1+ npos)) 16 | 0))) 17 | (multiple-value-bind (quot rem) (floor (- i off) mult) 18 | (and (= 0 rem) (<= 0 quot))))) 19 | (T (= (parse-integer n) i)))) 20 | 21 | (define-pseudo-selector nth-child (node n) 22 | (match-nth (1+ (element-position node)) n)) 23 | 24 | (define-pseudo-selector nth-last-child (node n) 25 | (match-nth (- (length (sibling-elements node)) 26 | (element-position node)) n)) 27 | 28 | (define-pseudo-selector nth-of-type (node n) 29 | (match-nth (loop with count = 0 30 | for sibling across (family node) 31 | when (and (element-p sibling) 32 | (string-equal (tag-name sibling) (tag-name node))) 33 | do (incf count) 34 | until (eq sibling node) 35 | finally (return count)) n)) 36 | 37 | (define-pseudo-selector nth-last-of-type (node n) 38 | (match-nth (loop with count = 0 39 | for i downfrom (1- (length (family node))) to 0 40 | for sibling = (aref (family node) i) 41 | when (and (element-p sibling) 42 | (string-equal (tag-name sibling) (tag-name node))) 43 | do (incf count) 44 | until (eq sibling node) 45 | finally (return count)) n)) 46 | 47 | (define-pseudo-selector first-child (node) 48 | (= (element-position node) 0)) 49 | 50 | (define-pseudo-selector last-child (node) 51 | (loop for i downfrom (1- (length (family node))) to 0 52 | for sibling = (aref (family node) i) 53 | when (element-p sibling) 54 | do (return (eq sibling node)))) 55 | 56 | (define-pseudo-selector first-of-type (node) 57 | (loop for sibling across (family node) 58 | when (and (element-p sibling) 59 | (string-equal (tag-name sibling) (tag-name node))) 60 | do (return (eq sibling node)))) 61 | 62 | (define-pseudo-selector last-of-type (node) 63 | (loop for i downfrom (1- (length (family node))) to 0 64 | for sibling = (aref (family node) i) 65 | when (and (element-p sibling) 66 | (string-equal (tag-name sibling) (tag-name node))) 67 | do (return (eq sibling node)))) 68 | 69 | (define-pseudo-selector only-child (node) 70 | (loop for sibling across (family node) 71 | always (or (eq sibling node) 72 | (not (element-p sibling))))) 73 | 74 | (define-pseudo-selector only-of-type (node) 75 | (loop for sibling across (family node) 76 | always (or (eq sibling node) 77 | (not (element-p sibling)) 78 | (not (string-equal (tag-name sibling) (tag-name node)))))) 79 | 80 | (define-pseudo-selector empty (node) 81 | (= (length (children node)) 0)) 82 | 83 | (define-pseudo-selector link (node) 84 | (error 'pseudo-selector-not-available :name "LINK")) 85 | 86 | (define-pseudo-selector visited (node) 87 | (error 'pseudo-selector-not-available :name "VISITED")) 88 | 89 | (define-pseudo-selector active (node) 90 | (error 'pseudo-selector-not-available :name "ACTIVE")) 91 | 92 | (define-pseudo-selector hover (node) 93 | (error 'pseudo-selector-not-available :name "HOVER")) 94 | 95 | (define-pseudo-selector focus (node) 96 | (error 'pseudo-selector-not-available :name "FOCUS")) 97 | 98 | (define-pseudo-selector target (node) 99 | (error 'pseudo-selector-not-available :name "TARGET")) 100 | 101 | (define-pseudo-selector lang (node language) 102 | (let ((languages (or (attribute node "lang") 103 | (attribute node "xml:lang")))) 104 | (when (and languages (find-substring language languages #\-)) 105 | language))) 106 | 107 | (define-pseudo-selector enabled (node) 108 | (has-attribute node "enabled")) 109 | 110 | (define-pseudo-selector disabled (node) 111 | (has-attribute node "disabled")) 112 | 113 | (define-pseudo-selector checked (node) 114 | (has-attribute node "checked")) 115 | 116 | (define-pseudo-selector first-line (node) 117 | (error 'pseudo-selector-not-available :name "FIRST-LINE")) 118 | 119 | (define-pseudo-selector first-letter (node) 120 | (error 'pseudo-selector-not-available :name "FIRST-LETTER")) 121 | 122 | (define-pseudo-selector before (node) 123 | (error 'pseudo-selector-not-available :name "BEFORE")) 124 | 125 | (define-pseudo-selector after (node) 126 | (error 'pseudo-selector-not-available :name "AFTER")) 127 | 128 | (define-pseudo-selector warning (node) 129 | (let ((classes (attribute node "class"))) 130 | (when (and classes (find-substring "warning" classes #\Space)) 131 | "warning"))) 132 | 133 | (define-pseudo-selector not (node selector) 134 | (not (match-matcher (third (second (parse-selector selector))) node))) 135 | 136 | ;;; Extra extensions specific to CLSS 137 | 138 | (define-pseudo-selector first-only (node) 139 | (signal 'complete-match-pair :value (make-array 1 :initial-element node :adjustable T :fill-pointer T))) 140 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | Clss
0.3.1

A DOM tree searching engine based on CSS selectors.

Table of Contents

About CLSS

CLSS is a DOM traversal engine based on CSS selectors. It makes use of the Plump-DOM and is used by lQuery.

How To

Load CLSS through Quicklisp or ASDF:

(ql:quickload :clss)
 2 | 

Using a standard CSS selector you can retrieve a vector of nodes from the DOM:

(clss:select "img" (plump:parse "<div><p>A beautiful image: <img src="//example.com/image.png" alt="image" /></p></div>"))
 3 | 

CLSS implements Level 3 selectors and offers most of the features from the spec. Some things were left out as they make no sense outside a CSS context. 4 | As Plump supports XML as well as HTML, it also includes special handling for a few nodes that are not elements and are thus not reachable by standard CSS selectors. In order to solve this problem, CLSS adds an extra operator, the ^ caret. The caret is followed by a Plump-DOM class-name and will then match any elements that conform to a typep test against it.

(clss:select "^CDATA" (plump:parse "<foo><![CDATA[bar]]></foo>"))
 5 | 

CSS selectors in themselves also don't support XML namespaces due to the ambiguity arising with pseudo-selectors. CLSS solves this by interpreting a double colon as a name. Thus, a tag with the name of foo:bar is selected by foo::bar.

CLSS attempts to be a fast engine and various parts of it have been tuned for this purpose, which limits the extensibility of CLSS a bit. However, it is still possible to f.e. programmatically construct a selector.

Extending CLSS

Using define-pseudo-selector you can add your own extensions to CLSS:

(clss:define-pseudo-selector outside-link (node)
 6 |   (let ((href (plump:attribute node "href")))
 7 |     (and href (cl-ppcre:scan "^(http|https)://" href))))
 8 | 
 9 | (clss:select "a:outside-link" (plump:parse "<foo><a href=\"/baloney\"/><a href=\"http://example.com\"/></foo>"))
10 | 

System Information

0.3.1
Yukari Hafner
zlib
64 | -------------------------------------------------------------------------------- /parser.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:org.shirakumo.clss) 2 | 3 | ;;; Selector grammar 4 | ;; SELECTOR ::= GROUP (, GROUP)* 5 | ;; GROUP ::= MATCHER (OPERATOR MATCHER)* 6 | ;; OPERATOR ::= #\> | #\+ | #\~ | #\Space 7 | ;; MATCHER ::= (#\* | TAG | TYPE | ID | CLASS | ATTRIBUTE) ID? CLASS* ATTRIBUTE* PSEUDO* 8 | ;; ID ::= #\# NAME 9 | ;; TAG ::= NAME 10 | ;; CLASS ::= #\. NAME 11 | ;; ATTRIBUTE ::= #\[ NAME ATTR-VALUE? #\] 12 | ;; ATTR-VALUE ::= ATTR-OPERATOR (NAME | STRING) 13 | ;; ATTR-OPERATOR ::= (#\~ | #\^ | #\$ | #\* | #\|)? #\= 14 | ;; PSEUDO ::= #\: NAME ARGUMENTS? 15 | ;; ARGUMENTS ::= #\( VALUE (#\, VALUE)* #\) 16 | 17 | ;; https://www.w3.org/TR/CSS21/syndata.html#value-def-identifier 18 | (define-matcher clss-name (or (in #\/ #\9) (in #\? #\Z) (in #\a #\z) (any #\- #\\ #\_ #\! #.(code-char #xB7)) 19 | (in #.(code-char #xA0) #.(code-char (1- char-code-limit))) 20 | (and (in #.(code-char #x1F) #.(code-char #xFF)) 21 | (prev (is #\\))))) 22 | (define-matcher clss-tag-name (or :clss-name (and (is #\:) (next (is #\:))) (and (is #\:) (prev (is #\:))))) 23 | (define-matcher combinator (any #\Space #\Newline #\> #\+ #\~)) 24 | (define-matcher grouper (is #\,)) 25 | (define-matcher attr-comparator (or (is #\=) (and (any #\~ #\^ #\$ #\* #\|) (next (is #\=))))) 26 | (defvar *valid-combinators* " >+~") 27 | 28 | (defun escapable (char) 29 | "A helper function to decide whether a part of identifier needs escaping." 30 | (let ((code (char-code char))) 31 | (or (<= code #x1f) 32 | (= code #x7f) 33 | (not (or (>= code #x80) 34 | (char= char #\-) 35 | (char= char #\_) 36 | (digit-char-p char) 37 | (char<= #\A char #\Z) 38 | (char<= #\a char #\z)))))) 39 | 40 | ;; https://drafts.csswg.org/cssom/#serialize-an-identifier 41 | (defun css-escape (string) 42 | "Escape all the invalid CSS characters to their safe counterparts." 43 | (declare (optimize speed) 44 | (type string string)) 45 | (if (or (some #'escapable string) 46 | (and (= 1 (length string)) 47 | (char= #\- (elt string 0))) 48 | (and (>= (length string) 1) 49 | (digit-char-p (elt string 0))) 50 | (and (>= (length string) 2) 51 | (char= #\- (elt string 0)) 52 | (digit-char-p (elt string 1)))) 53 | (with-output-to-string (s) 54 | (flet ((escape-regular-string (string) 55 | (dotimes (index (length string)) 56 | (let ((char (elt string index))) 57 | (cond 58 | ((char= #\Nul char) 59 | (write-string "\\fffd " s)) 60 | ((or (<= #x1 (char-code char) #x1f) 61 | (= (char-code char) #x7f)) 62 | (format s "\\~X " (char-code char))) 63 | ((escapable char) 64 | (write-char #\\ s) 65 | (write-char char s)) 66 | (t 67 | (write-char char s))))))) 68 | ;; Process first char. 69 | (cond 70 | ((and (= (length string) 1) 71 | (char= #\- (elt string 0))) 72 | (write-char #\\ s) 73 | (write-char (elt string 0) s)) 74 | ((and (= (length string) 1) 75 | (digit-char-p (elt string 0))) 76 | (format s "\\~X " (char-code (elt string 0)))) 77 | (t 78 | (escape-regular-string (subseq string 0 1)))) 79 | ;; Second char. 80 | (when (>= (length string) 2) 81 | (if (and (char= #\- (elt string 0)) 82 | (digit-char-p (elt string 1))) 83 | (format s "\\~X " (char-code (elt string 1))) 84 | (escape-regular-string (subseq string 1 2)))) 85 | ;; All the rest. 86 | (when (> (length string) 2) 87 | (escape-regular-string (subseq string 2))))) 88 | string)) 89 | 90 | (defun css-unescape (string) 91 | "Get the original contents of the escaped STRING." 92 | (declare (optimize speed) 93 | (type string string)) 94 | (if (search "\\" string) 95 | (with-output-to-string (s) 96 | (loop with len = (length string) 97 | for index below len 98 | for char = (elt string index) 99 | for next-char = (when (< index (1- len)) 100 | (elt string (1+ index))) 101 | do (cond 102 | ((search "\\fffd " string 103 | :start2 index :end2 (min len (+ index 6))) 104 | (write-char #\Nul s) 105 | (setf index (position #\Space string :start (1+ index)))) 106 | ((and (eql #\\ char) 107 | next-char 108 | (digit-char-p next-char 16)) 109 | (write-char 110 | (code-char (parse-integer string :radix 16 :start (1+ index))) s) 111 | (setf index (position #\Space string :start (1+ index)))) 112 | ((and (eql #\\ char) 113 | next-char) 114 | (write-char next-char s) 115 | (incf index)) 116 | (t 117 | (write-char char s))))) 118 | string)) 119 | 120 | (defun read-name () 121 | "Reads a CSS selector name-like string." 122 | (unless (funcall (make-matcher :clss-name)) 123 | (error "~s at position ~d is not a valid name char." (peek) *index*)) 124 | (consume-until (make-matcher (not :clss-name)))) 125 | 126 | (defun read-any-constraint () 127 | "Reads an any constraint and returns it." 128 | (make-any-constraint)) 129 | 130 | (defun read-type-constraint () 131 | "Reads a DOM type constraint and returns it." 132 | (make-type-constraint (read-name))) 133 | 134 | (defun read-tag-constraint () 135 | "Reads a tag constraint and returns it." 136 | (let ((out (make-string-output-stream))) 137 | (loop with matcher = (make-matcher :clss-tag-name) 138 | for prev = #\ then char 139 | for char = (peek) 140 | while (funcall matcher) 141 | do (unless (and (eql char #\:) (eql prev #\:)) 142 | (write-char char out)) 143 | (advance)) 144 | (let ((name (get-output-stream-string out))) 145 | (when (string= "" name) 146 | (error "The CSS selector ~s contains invalid characters around position ~d." 147 | plump-lexer:*string* plump-lexer:*index*)) 148 | (make-tag-constraint name)))) 149 | 150 | (defun read-id-constraint () 151 | "Reads an ID attribute constraint and returns it." 152 | (make-id-constraint (css-unescape (read-name)))) 153 | 154 | (defun read-class-constraint () 155 | "Reads a class constraint and returns it." 156 | (make-class-constraint (css-unescape (read-name)))) 157 | 158 | (defun read-attribute-comparator () 159 | "Reads an attribute comparator string and returns it if found." 160 | (let ((op (consume-until (make-matcher (not :attr-comparator))))) 161 | (when (< 0 (length op)) 162 | op))) 163 | 164 | (defun read-attribute-value () 165 | "Reads an attribute value and returns it." 166 | (case (peek) 167 | ((#\" #\') 168 | (prog2 169 | (consume) 170 | (consume-until (make-matcher (and (not (prev (is #\\))) 171 | (or (is #\") (is #\'))))) 172 | (consume))) 173 | (T (consume-until (make-matcher (is #\])))))) 174 | 175 | (defun read-attribute-constraint () 176 | "Reads a complete attribute constraint and returns it." 177 | (let ((name (css-unescape (read-name))) 178 | (oper (read-attribute-comparator)) 179 | (val (css-unescape (read-attribute-value)))) 180 | (prog1 181 | (make-attribute-constraint name val oper) 182 | (consume)))) 183 | 184 | (defun read-pseudo-args () 185 | "Reads an arguments list of a pseudo selector." 186 | (when (char= (or (peek) #\Space) #\() 187 | (consume) 188 | (loop with index = *index* 189 | with args = () 190 | for char = (consume) 191 | until (or (not char) (char= char #\))) 192 | do (when (char= char #\,) 193 | (push (string-trim " " (subseq *string* index (1- *index*))) args) 194 | (setf index *index*)) 195 | finally (progn 196 | (let ((arg (string-trim " " (subseq *string* index (1- *index*))))) 197 | (unless (string= arg "") 198 | (push arg args))) 199 | (return (nreverse args)))))) 200 | 201 | (defun read-pseudo-constraint () 202 | "Reads a complete pseudo constraint and returns it." 203 | (apply #'make-pseudo-constraint (read-name) (read-pseudo-args))) 204 | 205 | (defun read-constraint () 206 | "Read any constraint. Dispatches depending on the next character consumed." 207 | (case (consume) 208 | (#\* (read-any-constraint)) 209 | (#\# (read-id-constraint)) 210 | (#\. (read-class-constraint)) 211 | (#\[ (read-attribute-constraint)) 212 | (#\: (read-pseudo-constraint)) 213 | (#\^ (read-type-constraint)) 214 | (T (unread) (read-tag-constraint)))) 215 | 216 | (defun read-matcher () 217 | "Read a matcher (a sequence of constraints) and return it." 218 | (loop for peek = (peek) 219 | for loop-cond = (and peek (funcall (make-matcher (not (or :combinator :grouper))))) 220 | for constraint = (and loop-cond (read-constraint)) 221 | while loop-cond 222 | when constraint 223 | collect constraint into constraints 224 | finally (return (apply #'make-clss-matcher constraints)))) 225 | 226 | (defun read-combinator () 227 | "Reads the combinator between matchers and returns it." 228 | (let ((op (consume-until (make-matcher (not :combinator)))) 229 | (next (peek))) 230 | (unless (or (not next) (char= next #\,)) 231 | (let ((op (string-trim '(#\Space #\Newline) op))) 232 | (if (string= op "") 233 | #\Space 234 | (aref op 0)))))) 235 | 236 | (defun read-group () 237 | "Reads a selector group and returns it." 238 | (loop with list = () 239 | for combinator = (read-combinator) 240 | for matcher = (read-matcher) 241 | while combinator 242 | do (unless (find combinator *valid-combinators* :test #'char=) 243 | (error "Invalid combinator ~a." combinator)) 244 | (push combinator list) 245 | (push matcher list) 246 | finally (return (apply #'make-group (nreverse list))))) 247 | 248 | (defun read-selector () 249 | "Reads a complete selector and returns it." 250 | (loop for next = (peek) 251 | while next 252 | do (when (char= next #\,) 253 | (consume)) 254 | collect (read-group) into groups 255 | finally (return (apply #'make-selector groups)))) 256 | 257 | (defun parse-selector (string) 258 | "Parse a selector string into its \"compiled\" list form." 259 | (setf string (concatenate 'string " " string)) 260 | (with-lexer-environment (string) 261 | (read-selector))) 262 | 263 | (defun ensure-selector (thing) 264 | (etypecase thing 265 | (list thing) 266 | (string (parse-selector thing)))) 267 | -------------------------------------------------------------------------------- /engine.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:org.shirakumo.clss) 2 | 3 | (defvar *pseudo-selectors* (make-hash-table :test 'equalp) 4 | "Hash table for pseudo selector functions. 5 | Links string names to functions of one or more arguments.") 6 | 7 | (defun pseudo-selector (name) 8 | "Returns the pseudo-selector function associated with NAME, if any." 9 | (gethash (string name) *pseudo-selectors*)) 10 | 11 | (defun (setf pseudo-selector) (function name) 12 | "Sets FUNCTION as the pseudo-selector for NAME." 13 | (setf (gethash (string name) *pseudo-selectors*) 14 | function)) 15 | 16 | (defun remove-pseudo-selector (name) 17 | "Removes the pseudo-selector associated with NAME." 18 | (remhash (string name) *pseudo-selectors*)) 19 | 20 | (defmacro define-pseudo-selector (name (nodename &rest args-lambda) &body body) 21 | "Define a new pseudo-selector of NAME. 22 | 23 | NAME --- A symbol or string naming the selector (case insensitive always). 24 | NODENAME --- A variable symbol the matched node is bound to. 25 | ARGS-LAMBDA --- A lambda-list of the expected arguments for the pseudo-selector. 26 | Note that keyword arguments make no sense in this context. 27 | BODY ::= form*" 28 | `(setf (pseudo-selector ,(string name)) 29 | #'(lambda (,nodename ,@args-lambda) 30 | (declare (ignorable ,nodename)) 31 | ,@body))) 32 | 33 | (define-condition pseudo-selector-not-available (error) 34 | ((%name :initarg :name :initform (error "NAME required.") :accessor name)) 35 | (:report (lambda (c s) (format s "The ~a pseudo selector doesn't make sense in a node matching engine." (name c)))) 36 | (:documentation "Condition signalled when a pseudo selector is defined according to spec, 37 | but makes no sense in the context of CLSS and has thus been left 38 | unimplemented.")) 39 | 40 | (define-condition undefined-pseudo-selector (error) 41 | ((%name :initarg :name :initform (error "NAME required.") :accessor name)) 42 | (:report (lambda (c s) (format s "The ~a pseudo selector is not defined!" (name c)))) 43 | (:documentation "Condition signalled when trying to use a pseudo selector that has not 44 | been defined. This is signalled at match-time, rather than at 45 | selector-compile-time.")) 46 | 47 | (define-condition selector-malformed (error) 48 | ((%selector :initarg :selector :initform (error "Selector malformed.") :accessor selector)) 49 | (:report (lambda (c s) (format s "Selector is malformed: ~a" (selector c)))) 50 | (:documentation "Signalled when a selector or matcher has been found to be malformed. 51 | This really shouldn't happen unless you're passing raw lists 52 | for the selector to the matcher.")) 53 | 54 | (define-condition complete-match-pair (condition) 55 | ((%value :initarg :value :initform NIL :accessor value)) 56 | (:documentation "Condition signalled to immediately return from MATCH-PAIR.")) 57 | 58 | (defun find-substring (item string split) 59 | "Returns ITEM if it is an element of STRING split by the SPLIT character." 60 | (declare (optimize speed) 61 | (type string item string) 62 | (type character split)) 63 | (macrolet ((with-stringcase (var &body body) 64 | `(typecase ,var 65 | (simple-base-string ,@body) 66 | ((and simple-string (not simple-base-string)) ,@body) 67 | ((and string (not simple-string)) ,@body)))) 68 | (with-stringcase item 69 | (with-stringcase string 70 | (let ((start 0) (end 0)) 71 | (declare (type fixnum start end)) 72 | (flet ((test () 73 | (when (string= item string :start2 start :end2 end) 74 | (return-from find-substring item)))) 75 | (declare (inline test)) 76 | (loop while (< end (length string)) 77 | do (let ((char (aref string end))) 78 | (declare (type character char)) 79 | (when (char= char split) 80 | (test) 81 | (setf start (1+ end))) 82 | (incf end)) 83 | finally (test) (return NIL)))))))) 84 | 85 | (declaim (ftype (function (list plump-dom:node) 86 | (values boolean)) 87 | match-constraint)) 88 | (defun match-constraint (constraint node) 89 | "Attempts to match the CONSTRAINT form against the node. 90 | Returns NIL if it fails to do so, unspecified otherwise." 91 | (declare (optimize speed)) 92 | (when (ecase (car constraint) 93 | (:c-any 94 | (and (not (text-node-p node)) 95 | (not (comment-p node)))) 96 | (:c-tag 97 | (and (element-p node) 98 | (string-equal (tag-name node) (second constraint)))) 99 | (:c-type 100 | (typep node (second constraint))) 101 | (:c-id 102 | (and (element-p node) 103 | (string-equal (attribute node "id") (second constraint)))) 104 | (:c-class 105 | (and (element-p node) 106 | (not (null (find-substring (second constraint) (or (attribute node "class") "") #\Space))))) 107 | (:c-attr-exists 108 | (and (element-p node) 109 | (not (null (attribute node (second constraint)))))) 110 | (:c-attr-equals 111 | (and (element-p node) 112 | (destructuring-bind (comparator attribute value) (cdr constraint) 113 | (declare (type simple-string comparator attribute value)) 114 | (let ((attr (attribute node attribute)) 115 | (value value)) 116 | (declare (type (or null string) attr)) 117 | (when attr 118 | (ecase (aref comparator 0) 119 | (#\= 120 | (not (null (string-equal attr value)))) 121 | (#\~ 122 | (not (null (find-substring value attr #\Space)))) 123 | (#\^ 124 | (and (<= (length value) (length attr)) 125 | (string= value attr :end2 (length value)))) 126 | (#\$ 127 | (and (<= (length value) (length attr)) 128 | (string= value attr :start2 (- (length attr) (length value))))) 129 | (#\* 130 | (not (null (search value attr)))) 131 | (#\| 132 | (not (null (find-substring value attr #\-)))))))))) 133 | (:c-pseudo 134 | (and (element-p node) 135 | (destructuring-bind (name &rest args) (cdr constraint) 136 | (let ((pseudo (pseudo-selector name))) 137 | (declare (type function pseudo)) 138 | (assert (not (null pseudo)) () 'undefined-pseudo-selector :name name) 139 | (not (null (apply pseudo node args)))))))) 140 | (values t))) 141 | 142 | (declaim (ftype (function (list plump:node) 143 | (values boolean)) 144 | match-matcher)) 145 | (defun match-matcher (matcher node) 146 | "Attempts to match a matcher against a node. 147 | Returns T if all constraints match, NIL otherwise." 148 | (declare (optimize speed)) 149 | (assert (eq (car matcher) :matcher) () 'selector-malformed matcher) 150 | (loop for constraint in (cdr matcher) 151 | always (match-constraint constraint node))) 152 | 153 | (declaim (ftype (function (character list plump:node (function (plump:node) T)) 154 | (values &optional null)) 155 | match-pair-depth)) 156 | (defun match-pair-depth (combinator matcher parent matching-nodes-processor) 157 | "Match a combinator and matcher pair against a list of nodes. For every match 158 | the function specified in \"MATCHING-NODES-PROCESSOR\" is called with the found 159 | match as the only argument." 160 | (declare (optimize speed)) 161 | (handler-case 162 | (prog1 nil 163 | (case combinator 164 | (#\Space 165 | (labels ((match-recursive (nodes) 166 | (declare ((and (vector plump:node) (not simple-array)) nodes)) 167 | (loop for node across nodes 168 | when (match-matcher matcher node) 169 | do (funcall matching-nodes-processor node) 170 | when (plump:nesting-node-p node) 171 | do (match-recursive (children node))))) 172 | (match-recursive (children parent)))) 173 | (#\> 174 | (loop for node across (the (and (vector plump:node) (not simple-array)) 175 | (children parent)) 176 | when (match-matcher matcher node) 177 | do (funcall matching-nodes-processor node))) 178 | (#\+ 179 | (let ((position (child-position parent)) 180 | (family (family parent))) 181 | (declare (type fixnum position) 182 | (type (and (vector plump-dom:child-node) 183 | (not simple-array)) 184 | family)) 185 | (loop for i of-type fixnum from position below (1- (fill-pointer family)) 186 | for sibling = (aref family (1+ i)) 187 | ;; This is gross. In order to properly support 188 | ;; edge cases like a foo+^bar we cannot exclude 189 | ;; anything other than these two... 190 | do (when (and (not (text-node-p parent)) 191 | (not (comment-p parent))) 192 | (when (match-matcher matcher sibling) 193 | (funcall matching-nodes-processor sibling)) 194 | (return))))) 195 | (#\~ 196 | (let ((position (child-position parent)) 197 | (family (family parent))) 198 | (declare (type fixnum position) 199 | (type (and (vector plump-dom:child-node) 200 | (not simple-array)) 201 | family)) 202 | (loop for i of-type fixnum from position below (1- (fill-pointer family)) 203 | for sibling = (aref family (1+ i)) 204 | do (when (match-matcher matcher sibling) 205 | (funcall matching-nodes-processor sibling))))))) 206 | (complete-match-pair (o) 207 | (loop for node across (value o) 208 | do (funcall matching-nodes-processor node))))) 209 | 210 | (declaim (ftype (function (character list (and (vector plump:node) (not simple-array))) 211 | (values (and (vector plump:node) (not simple-array)) &optional)) 212 | match-pair-breadth)) 213 | (defun match-pair-breadth (combinator matcher nodes) 214 | "Match a combinator and matcher pair against a list of nodes. 215 | Returns a vector of matching nodes." 216 | (declare (optimize speed)) 217 | (handler-case 218 | (let ((resultset (make-array (length nodes) :adjustable T :fill-pointer 0))) 219 | (case combinator 220 | (#\Space 221 | (labels ((match-recursive (nodes) 222 | (declare ((and (vector plump:node) (not simple-array)) nodes)) 223 | (loop for node across nodes 224 | when (match-matcher matcher node) 225 | do (vector-push-extend node resultset) 226 | when (nesting-node-p node) 227 | do (match-recursive (children node))))) 228 | (loop for node across nodes 229 | do (match-recursive (children node))))) 230 | (#\> 231 | (loop for parent across nodes 232 | do (loop for node across (the (and (vector plump:node) (not simple-array)) 233 | (children parent)) 234 | when (match-matcher matcher node) 235 | do (vector-push-extend node resultset)))) 236 | (#\+ 237 | (loop for node across nodes 238 | for position of-type fixnum = (child-position node) 239 | for family = (family node) 240 | do (loop for i of-type fixnum from position below (1- (fill-pointer family)) 241 | for sibling = (aref family (1+ i)) 242 | ;; This is gross. In order to properly support 243 | ;; edge cases like a foo+^bar we cannot exclude 244 | ;; anything other than these two... 245 | do (when (and (not (text-node-p node)) 246 | (not (comment-p node))) 247 | (when (match-matcher matcher sibling) 248 | (vector-push-extend sibling resultset)) 249 | (return))))) 250 | (#\~ 251 | (loop for node across nodes 252 | for position of-type fixnum = (child-position node) 253 | for family = (family node) 254 | do (loop for i of-type fixnum from position below (fill-pointer family) 255 | for sibling = (aref family i) 256 | do (when (match-matcher matcher sibling) 257 | (vector-push-extend sibling resultset) 258 | (return)))))) 259 | resultset) 260 | (complete-match-pair (o) 261 | (return-from match-pair-breadth (value o))))) 262 | 263 | (declaim (ftype (function (list (or plump:node vector list) &optional keyword) 264 | (values (and (vector plump:node) (not simple-array)))) 265 | match-selector)) 266 | (defun match-group (group root-node &optional (search-type :depth-first)) 267 | "Match a matcher group against the root-node and possibly all its children. 268 | Returns an array of mached nodes." 269 | (declare (optimize debug)) 270 | (assert (eq (car group) :group) () 'selector-malformed) 271 | (let ((group (cdr group))) 272 | (ecase search-type 273 | (:depth-first 274 | (let* ((result (make-array 10 :adjustable T :fill-pointer 0))) 275 | (labels ((add-to-result (node) 276 | (vector-push-extend node result)) 277 | (search-node (node group) 278 | (let ((combinator (car group)) 279 | (matcher (cadr group)) 280 | (group (cddr group))) 281 | (if group 282 | (match-pair-depth combinator 283 | matcher 284 | node 285 | (lambda (node) 286 | (search-node node group))) 287 | (match-pair-depth combinator 288 | matcher 289 | node 290 | #'add-to-result))))) 291 | (etypecase root-node 292 | (plump:node (search-node root-node group)) 293 | (sequence (map nil 294 | (lambda (node) (search-node node group)) 295 | root-node))) 296 | result))) 297 | (:breadth-first 298 | (loop with nodes = (etypecase root-node 299 | (plump:node (make-array 1 :initial-element root-node :adjustable T :fill-pointer T)) 300 | (vector root-node) 301 | (list (coerce root-node 'vector))) 302 | for combinator = (pop group) 303 | for matcher = (pop group) 304 | while matcher 305 | do (setf nodes (match-pair-breadth combinator matcher nodes)) 306 | finally (return nodes)))))) 307 | 308 | (declaim (ftype (function (list (or plump:node vector list) keyword) 309 | (values (and (vector plump:node) (not simple-array)))) 310 | match-selector)) 311 | (defun match-selector (selector root-node search-type) 312 | "Match a selector against the root-node and possibly all its children. 313 | Returns an array of matched nodes." 314 | (declare (optimize speed)) 315 | (assert (eq (car selector) :selector) () 'selector-malformed) 316 | (let ((selector (cdr selector))) 317 | (loop with result = (match-group (pop selector) root-node search-type) 318 | for group in selector 319 | do (array-utils:vector-append result (match-group group root-node search-type)) 320 | finally (return result)))) 321 | 322 | (declaim (ftype (function ((or string list) (or plump:node vector list) &optional keyword) 323 | (values (and (vector plump:node) (not simple-array)) &optional)) 324 | select)) 325 | (defun select (selector root-node &optional (search-type :depth-first)) 326 | "Match the given selector against the root-node and possibly all its children. 327 | Returns an array of matched nodes. 328 | 329 | SELECTOR --- A CSS-selector string or a compiled selector list. 330 | ROOT-NODE --- A single node, list or vector of nodes to start matching from. 331 | SEARCH-TYPE --- Select the search algorithm, options are \":depth-first\" and \":breadth-first\"." 332 | (match-selector (ensure-selector selector) root-node search-type)) 333 | 334 | (define-compiler-macro select (&whole whole &environment env selector root-node &optional (search-type :depth-first)) 335 | (if (constantp selector env) 336 | `(match-selector (load-time-value (ensure-selector ,selector)) ,root-node ,search-type) 337 | whole)) 338 | 339 | (declaim (ftype (function (list plump:node) boolean) match-group-backwards)) 340 | (defun match-group-backwards (group node) 341 | (declare (optimize speed)) 342 | (assert (eql (car group) :group) () 'selector-malformed) 343 | (let ((group (reverse (cdr group)))) 344 | (when (match-matcher (pop group) node) 345 | (loop for combinator = (pop group) 346 | for matcher = (pop group) 347 | while matcher 348 | do (case combinator 349 | (#\Space 350 | (loop do (setf node (parent node)) 351 | (when (or (not node) (root-p node)) 352 | (return-from match-group-backwards NIL)) 353 | until (match-matcher matcher node))) 354 | (#\> 355 | (setf node (parent node)) 356 | (unless (and node (not (root-p node)) (match-matcher matcher node)) 357 | (return-from match-group-backwards NIL))) 358 | (#\+ 359 | (setf node (previous-element node)) 360 | (unless (and node (match-matcher matcher node)) 361 | (return-from match-group-backwards NIL))) 362 | (#\~ 363 | (loop for i of-type fixnum downfrom (child-position node) above 0 364 | for sibling = (aref (family node) i) 365 | do (when (match-matcher matcher sibling) 366 | (setf node sibling) 367 | (return)) 368 | finally (return-from match-group-backwards NIL)))) 369 | finally (return T))))) 370 | 371 | (declaim (ftype (function (T plump:node) boolean) node-matches-p)) 372 | (defun node-matches-p (selector node) 373 | "Tests whether the node matches the selector. 374 | 375 | SELECTOR --- A CSS-selector string or a compiled selector list. 376 | NODE --- The node to test." 377 | (declare (optimize speed)) 378 | (let ((selector (ensure-selector selector))) 379 | (assert (eql (car selector) :selector) () 'selector-malformed) 380 | (loop for group in (cdr selector) 381 | thereis (match-group-backwards group node)))) 382 | 383 | (define-compiler-macro node-matches-p (&whole whole &environment env selector root-node) 384 | (if (constantp selector env) 385 | `(node-matches-p (load-time-value (ensure-selector ,selector)) ,root-node) 386 | whole)) 387 | 388 | (defun ordered-select (selector root-node) 389 | "Match the given selector against the root-node and possibly all its children. 390 | Return an array of matching nodes ordered by their depth-first 391 | traversal appearance in the DOM. 392 | 393 | SELECTOR --- A CSS-selector string or a compiled selector list. 394 | ROOT-NODE --- A single node, list or vector of nodes to start matching from." 395 | (declare (optimize speed)) 396 | (let ((matched-nodes (make-array 0 :adjustable T :fill-pointer 0)) 397 | (selector (ensure-selector selector))) 398 | (assert (eql (car selector) :selector) () 'selector-malformed) 399 | (labels ((collect-if-match (element) 400 | (when (clss:node-matches-p selector element) 401 | (vector-push-extend element matched-nodes)) 402 | (map NIL #'collect-if-match (plump:child-elements element)))) 403 | (collect-if-match root-node) 404 | matched-nodes))) 405 | 406 | (define-compiler-macro ordered-select (&whole whole &environment env selector root-node) 407 | (if (constantp selector env) 408 | `(ordered-select (load-time-value (ensure-selector ,selector)) ,root-node) 409 | whole)) 410 | --------------------------------------------------------------------------------