├── .gitignore
├── CHANGES
├── PORTING-TO-DO
├── README
├── info.rkt
└── sxml
├── ddo-axes.rkt
├── ddo-txpath.rkt
├── info.rkt
├── lazy-ssax.rkt
├── lazy-xpath.rkt
├── main.rkt
├── modif.rkt
├── scribblings
├── .gitignore
├── SXML.scm.tmp
├── all-exported.scrbl
├── extract-provides.rkt
├── extracted-sperber.scrbl
├── sax-parsing.scrbl
├── serialization.scrbl
├── sxml-rep.scrbl
├── sxml.scrbl
├── sxpath.scrbl
├── sxslt.scrbl
└── util.rkt
├── serializer.rkt
├── ssax
├── SSAX-code.rkt
├── SXML-tree-trans.rkt
├── access-remote.rkt
├── doc.txt
├── errors-and-warnings.rkt
├── id.rkt
├── info.rkt
├── input-parse.rkt
├── multi-parser.rkt
├── myenv.rkt
├── parse-error.rkt
├── ssax-prim.rkt
├── ssax.rkt
├── sxpathlib.rkt
├── tests
│ └── vinput-parse.scm
├── util.rkt
└── xlink-parser.rkt
├── sxml-tools.rkt
├── sxpath-ext.rkt
├── sxpath.rkt
├── tests
├── README
├── if.xml
├── ssax-tests.rkt
├── tests.rkt
├── vSXML-to-HTML.rkt
└── vSXML-tree-trans.rkt
├── txpath.rkt
├── xpath-ast.rkt
├── xpath-context_xlink.rkt
└── xpath-parser.rkt
/.gitignore:
--------------------------------------------------------------------------------
1 | compiled/
2 | doc/
3 | *~
4 | planet-docs/
5 | coverage/
6 |
--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
1 | - (require (planet clements/sxml2)) includes both sxml and ssax
2 | - taking out the (unused) run-test define in SSAX-code.rkt
3 | - moving tests (such as they are) to a sub-directory
4 |
--------------------------------------------------------------------------------
/PORTING-TO-DO:
--------------------------------------------------------------------------------
1 | - ADD DOCUMENTATION !!!
2 | - add a test suite-- what's in the CVS repo?
3 |
4 | - decide what to do with all the version strings
5 | - remove the "bug reports to lisovsky/lizorkin" messages
6 | - change to set-mcar! to get mutation back?
7 |
8 | - (ryanc) add (or find?) sxml-top?, sxml-element?, etc predicates
9 | - (ryanc) parameters and/or kw args for continuable errors / warnings
10 | - (ryanc) parameters for defaults, eg namespace-abbreviations
11 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | This repository represents a PLT package containing SXML, a family of XML manipulation
2 | tools designed by Oleg Kiselyov and written by lots of people, including
3 |
4 | - Oleg Kiselyov,
5 | - Kirill Lisovsky,
6 | - Dmitry Lizorkin,
7 | - Michael Sperber,
8 | - Neil Van Dyke,
9 | - Ryan Culpepper,
10 | - John Clements,
11 |
12 | ... and others that I've forgotten (pull requests welcome!).
13 |
--------------------------------------------------------------------------------
/info.rkt:
--------------------------------------------------------------------------------
1 | #lang setup/infotab
2 |
3 | (define collection 'multi)
4 | (define deps '("base" "srfi-lib"))
5 | (define build-deps '("racket-doc" "scribble-lib"))
6 |
--------------------------------------------------------------------------------
/sxml/info.rkt:
--------------------------------------------------------------------------------
1 | #lang setup/infotab
2 |
3 | (define name "sxml")
4 | (define blurb
5 | (list "Collection of tools for processing markup documents "
6 | "in the form of S-expressions"))
7 | (define primary-file "main.rkt")
8 | (define homepage "http://modis.ispras.ru/Lizorkin/sxml-tutorial.html")
9 | (define categories '(xml))
10 | (define compile-omit-paths '("tests/" "ssax/tests/"))
11 | (define repositories '("4.x"))
12 | (define release-notes (list "bug fixes, serializers can now accept paths"))
13 | (define version "2012-02-16 10:11")
14 |
15 | (define scribblings '(("scribblings/sxml.scrbl" (multi-page))))
16 |
--------------------------------------------------------------------------------
/sxml/lazy-ssax.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require racket/promise
3 | srfi/13/string
4 | "ssax/parse-error.rkt"
5 | "ssax/SSAX-code.rkt"
6 | "lazy-xpath.rkt")
7 | (provide (all-defined-out))
8 |
9 | ;; A specialized lazy XML->SXML parser
10 | ; Is heavily based on continuations
11 |
12 | ;-------------------------------------------------
13 | ; Preliminary helper functions
14 |
15 | ; A helper that forces all descendants of a given node or a nodeset
16 | (define (lazy:force-descendants node)
17 | (cond
18 | ((lazy:promise? node) ; force it
19 | (lazy:force-descendants (force node)))
20 | ((pair? node) ; not null
21 | (for-each lazy:force-descendants node))
22 | (else ; null or not pair
23 | #t ; nothing to be done
24 | )))
25 |
26 | ; Returns the list containing of all members of the argument list except
27 | ; for the last member
28 | (define (lazy:except-last lst)
29 | (if
30 | (or (null? lst) ; this shouldn't happen
31 | (null? (cdr lst)))
32 | '()
33 | (cons (car lst) (lazy:except-last (cdr lst)))))
34 |
35 | ;-------------------------------------------------
36 | ;
37 |
38 | ; Returns the common part of the seed
39 | (define (lazy:seed-common seed)
40 | ((if (null? (cdr seed)) ; a short seed
41 | car caddr)
42 | seed))
43 |
44 | ; A monad-like handler
45 | ; Replaces the common part of the seed
46 | (define (lazy:replace-common seed new-common)
47 | (if (null? (cdr seed)) ; a short seed
48 | (list new-common)
49 | (list (car seed)
50 | (cadr seed)
51 | new-common
52 | (cadddr seed))))
53 |
54 | ; Produces a lazy SXML document, which corresponds to reading a source
55 | ; document in a stream-wise fashion
56 | (define (lazy:xml->sxml port namespace-prefix-assig)
57 | (let ((namespaces
58 | (map (lambda (el)
59 | (list* #f (car el) (ssax:uri-string->symbol (cdr el))))
60 | namespace-prefix-assig))
61 | (RES-NAME->SXML
62 | (lambda (res-name)
63 | (string->symbol
64 | (string-append
65 | (symbol->string (car res-name))
66 | ":"
67 | (symbol->string (cdr res-name)))))))
68 | ((lambda (result)
69 | ; We assume that nobody follows the document element
70 | (if (null? namespace-prefix-assig)
71 | (cons '*TOP* (lazy:except-last result))
72 | (cons
73 | '*TOP*
74 | (cons
75 | `(@@ (*NAMESPACES*
76 | ,@(map
77 | (lambda (ns) (list (car ns) (cdr ns)))
78 | namespace-prefix-assig)))
79 | (lazy:except-last result)))))
80 | (call-with-current-continuation ; we grab the continuation to escape from parsing
81 | (lambda (result-k)
82 | ; seed ::= (list result-k state-k common-seed level)
83 | ; result-k - continuation on what to do with the current result portion
84 | ; state-k - continuation to return to SSAX state on this level of XML
85 | ; tree hierarchy
86 | ; common-seed - general seed information
87 | ; level - level of a current node in a tree hierarchy
88 | ((ssax:make-parser
89 | NEW-LEVEL-SEED
90 | (lambda (elem-gi attributes namespaces expected-content seed)
91 | ;(pp (cons elem-gi (cadddr seed)))
92 | (if
93 | (or (null? (cdr seed)) ; short seed
94 | (> (cadddr seed) 3)) ; deep level
95 | (list '()) ; work like a conventional SSAX parser
96 | (let ((attrs
97 | (attlist-fold
98 | (lambda (attr accum)
99 | (cons (list
100 | (if (symbol? (car attr)) (car attr)
101 | (RES-NAME->SXML (car attr)))
102 | (cdr attr)) accum))
103 | '() attributes)))
104 | (call-with-current-continuation
105 | (lambda (new-level-k) ; how to parse next
106 | ((car seed) ; return the result
107 | (let ((elem-content
108 | ; A promise to continue parsing
109 | (call-with-current-continuation ; where to put the result
110 | (lambda (elem-k)
111 | (new-level-k
112 | (list ; now form a seed
113 | elem-k ; what to do with result
114 | new-level-k ; SSAX state on this level
115 | '() ; common-seed is empty
116 | (+ (cadddr seed) 1) ; increase level
117 | ))))))
118 | (append
119 | ; Previous string content
120 | (ssax:reverse-collect-str-drop-ws (caddr seed))
121 | (list
122 | (cons
123 | (if (symbol? elem-gi) elem-gi
124 | (RES-NAME->SXML elem-gi))
125 | (if (null? attrs) elem-content
126 | (cons (cons '@ attrs) elem-content)))
127 | ; The following siblings of this element
128 | (delay
129 | (call-with-current-continuation ; where to put the result
130 | (lambda (foll-k)
131 | ; First we force the parsing of the current element
132 | (lazy:force-descendants elem-content)
133 | ; Than continue parsing
134 | ((cadr seed) ; recover the parent level of nesting
135 | (list
136 | foll-k ; what to do with result
137 | (cadr seed)
138 | '() ; common-seed is empty
139 | (cadddr seed) ; the same level for siblings
140 | ))))))))))))))
141 |
142 | FINISH-ELEMENT
143 | (lambda (elem-gi attributes namespaces parent-seed seed)
144 | (if
145 | (null? (cdr seed)) ; a short seed
146 | (let ((common (ssax:reverse-collect-str-drop-ws
147 | (lazy:seed-common seed)))
148 | (attrs
149 | (attlist-fold
150 | (lambda (attr accum)
151 | (cons (list
152 | (if (symbol? (car attr)) (car attr)
153 | (RES-NAME->SXML (car attr)))
154 | (cdr attr)) accum))
155 | '() attributes)))
156 | (lazy:replace-common
157 | parent-seed
158 | (cons
159 | (cons
160 | (if (symbol? elem-gi) elem-gi
161 | (RES-NAME->SXML elem-gi))
162 | (if (null? attrs) common
163 | (cons (cons '@ attrs) common)))
164 | (lazy:seed-common parent-seed))))
165 | ; Otherwise - just return the remaining character content
166 | ((car seed) ; continuation
167 | (ssax:reverse-collect-str-drop-ws
168 | (lazy:seed-common seed)))))
169 |
170 | CHAR-DATA-HANDLER
171 | (lambda (string1 string2 seed)
172 | ;(pp (list string1 string2 seed))
173 | (lazy:replace-common
174 | seed
175 | (if (string-null? string2)
176 | (cons string1 (lazy:seed-common seed))
177 | (list* string2 string1 (lazy:seed-common seed)))))
178 |
179 | DOCTYPE
180 | (lambda (port docname systemid internal-subset? seed)
181 | (when internal-subset?
182 | (ssax:warn port
183 | "Internal DTD subset is not currently handled ")
184 | (ssax:skip-internal-dtd port))
185 | (ssax:warn port "DOCTYPE DECL " docname " "
186 | systemid " found and skipped")
187 | (values #f '() namespaces seed))
188 |
189 | UNDECL-ROOT
190 | (lambda (elem-gi seed)
191 | (values #f '() namespaces seed))
192 |
193 | PI
194 | ((*DEFAULT* .
195 | (lambda (port pi-tag seed)
196 | (lazy:replace-common
197 | seed
198 | (cons
199 | (list '*PI* pi-tag (ssax:read-pi-body-as-string port))
200 | (lazy:seed-common seed))))))
201 | )
202 | port
203 | (list ; form initial seed
204 | result-k ; put the result
205 | (lambda (seed) ; dummy top-level parser state that produces '()
206 | ((car seed) ; where to put the result nodeset
207 | '()))
208 | '()
209 | 1 ; level for the document element
210 | )))))))
211 |
--------------------------------------------------------------------------------
/sxml/main.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require "sxml-tools.rkt")
4 | (require "sxpath-ext.rkt")
5 | (require "xpath-parser.rkt")
6 | (require "txpath.rkt")
7 | (require "sxpath.rkt")
8 | (require "xpath-ast.rkt")
9 | (require "xpath-context_xlink.rkt")
10 | (require "ddo-axes.rkt")
11 | (require "ddo-txpath.rkt")
12 | (require "lazy-xpath.rkt")
13 | (require "lazy-ssax.rkt")
14 | (require "modif.rkt")
15 | (require "serializer.rkt")
16 | (provide (all-from-out "sxml-tools.rkt"))
17 | (provide (all-from-out "sxpath-ext.rkt"))
18 | (provide (all-from-out "xpath-parser.rkt"))
19 | (provide (all-from-out "txpath.rkt"))
20 | (provide (all-from-out "sxpath.rkt"))
21 | (provide (all-from-out "xpath-ast.rkt"))
22 | (provide (all-from-out "xpath-context_xlink.rkt"))
23 | (provide (all-from-out "ddo-axes.rkt"))
24 | (provide (all-from-out "ddo-txpath.rkt"))
25 | (provide (all-from-out "lazy-xpath.rkt"))
26 | (provide (all-from-out "lazy-ssax.rkt"))
27 | (provide (all-from-out "modif.rkt"))
28 | (provide (all-from-out "serializer.rkt"))
29 |
30 | (require "ssax/multi-parser.rkt"
31 | "ssax/sxpathlib.rkt"
32 | "ssax/SXML-tree-trans.rkt"
33 | "ssax/SSAX-code.rkt")
34 | (provide ssax:make-parser
35 | ssax:multi-parser
36 | (all-from-out "ssax/sxpathlib.rkt")
37 | pre-post-order
38 | ssax:xml->sxml)
39 |
--------------------------------------------------------------------------------
/sxml/scribblings/.gitignore:
--------------------------------------------------------------------------------
1 | racket.css
2 | scribble-common.js
3 | scribble-style.css
4 | scribble.css
5 | sxml.html
6 |
--------------------------------------------------------------------------------
/sxml/scribblings/extract-provides.rkt:
--------------------------------------------------------------------------------
1 | #lang racket
2 |
3 | (require racket/runtime-path)
4 |
5 | (define-runtime-path parent-dir "..")
6 |
7 | (define-namespace-anchor ns-anchor)
8 |
9 | (define (interleave requires-and-provides)
10 | (define reqs (map
11 | (lambda (x)
12 | (list 'section (second x)))
13 | (filter (lambda (x) (equal? (first x) '#%require))
14 | requires-and-provides)))
15 | (define pros (filter (lambda (x) (equal? (first x) '#%provide))
16 | requires-and-provides))
17 | (apply append
18 | (map list reqs pros)))
19 |
20 | (module+ main
21 | (call-with-input-file (build-path parent-dir "ssax" "ssax.rkt")
22 | (lambda (port)
23 | (parameterize ([current-directory (build-path parent-dir "ssax")]
24 | [read-accept-reader #t]
25 | [current-namespace (namespace-anchor->namespace ns-anchor)])
26 | (interleave
27 | (rest (fourth (syntax->datum (expand (read-syntax "main.rkt" port))))))))))
28 |
29 |
--------------------------------------------------------------------------------
/sxml/scribblings/sax-parsing.scrbl:
--------------------------------------------------------------------------------
1 | #lang scribble/doc
2 | @(require scribble/manual
3 | "util.rkt"
4 | (for-label sxml))
5 |
6 | @title[#:tag "ssax"]{SAX Parsing}
7 |
8 | @defproc[(ssax:xml->sxml [port input-port?]
9 | [namespace-prefix-assig (listof (cons/c symbol? string?))])
10 | sxml?]{
11 |
12 | Reads an XML document (which can be a single XML element) from
13 | @racket[port], and returns the corresponding SXML (@racket[_top])
14 | representation. The @racket[namespace-prefix-assig] association list
15 | provides shortened forms to be used in place of namespaces.
16 |
17 | @examples[#:eval the-eval
18 | (ssax:xml->sxml
19 | (open-input-string
20 | "abcd")
21 | '())
22 | #| should be '(*TOP* (zippy (pippy (|@| (pigtails "2")) "ab") "cd")) |#
23 |
24 | (ssax:xml->sxml
25 | (open-input-string
26 | "4")
27 | '())
28 |
29 | (ssax:xml->sxml
30 | (open-input-string
31 | "4")
32 | '((v . "vehicles")))
33 | ]
34 | }
35 |
36 |
37 | @defproc[(sxml:document [url-string string?] [namespace-prefix-assig any/c]) sxml?]{
38 | Given a local file URI, return the corresponding SXML representation.
39 |
40 | NOTE: currently, this appears to work only for local documents.
41 |
42 | NAMESPACE-PREFIX-ASSIG - is passed as-is to the SSAX parser: there it is
43 | used for assigning certain user prefixes to certain namespaces.
44 |
45 | NAMESPACE-PREFIX-ASSIG is an optional argument and has an effect for an
46 | XML resource only. For an HTML resource requested, NAMESPACE-PREFIX-ASSIG
47 | is silently ignored.
48 |
49 | So, for instance, if the file @filepath{/tmp/foo.xml} contains an XML file,
50 | you should be able to call
51 |
52 | @racketblock[
53 | (sxml:document "file:///tmp/foo")]
54 |
55 | (Note the plethora of slashes required by the URI format.)
56 |
57 |
58 | }
59 |
60 |
61 | @;{has to be a defform because of Oleg's approach to argument naming.
62 | can't see a way in defform to describe result.}
63 | @defform[#:kind "procedure"
64 | (ssax:make-parser new-level-seed-spec
65 | finish-element-spec
66 | char-data-handler-spec
67 | tag-spec ...)
68 | #:grammar
69 | [(new-level-seed-spec NEW-LEVEL-SEED new-level-seed-proc)
70 | (finish-element-spec FINISH-ELEMENT finish-element-proc)
71 | (char-data-handler-spec CHAR-DATA-HANDLER char-data-handler-proc)
72 | (tag-spec tag tag-proc)]]{
73 |
74 | Returns a procedure of two arguments, an input port @racket[xml-port], and an
75 | object @racket[init-seed]. That procedure will parse the XML document
76 | produced by @racket[xml-port], and the object @racket[init-seed], according to the
77 | specifications @racket[new-level-seed-spec], @racket[finish-element-spec],
78 | @racket[char-data-handler-spec], and @racket[tag-spec]s, and will return an object
79 | of the same type as @racket[init-seed].
80 |
81 |
82 | @racket[new-level-seed-spec] consists of the tag @racket[NEW-LEVEL-SEED] in upper
83 | case, followed by a procedure @racket[new-level-seed-proc]. This procedure
84 | must take the arguments @racket[element-name], @racket[attributes], @racket[namespaces],
85 | @racket[expected-content], and @racket[seed]. It must return an object of the same
86 | type as @racket[init-seed].
87 |
88 |
89 | @racket[finish-element-spec] consists of the tag @racket[FINISH-ELEMENT] in upper
90 | case, followed by a procedure @racket[finish-element-proc]. This procedure
91 | must take the arguments @racket[element-name], @racket[attributes], @racket[namespaces],
92 | @racket[parent-seed], and @racket[seed]. It must return an object of the same type
93 | as @racket[init-seed].
94 |
95 | @racket[char-data-handler-spec] consists of the tag @racket[CHAR-DATA-HANDLER] in
96 | upper case, followed by a procedure @racket[char-data-handler-proc]. This
97 | procedure must take the arguments @racket[string-1], @racket[string-2], and @racket[seed].
98 | It must return an object of the same type as @racket[init-seed].
99 |
100 |
101 | `tag-spec': TODO.
102 |
103 |
104 | Here's an example that returns a string containing the text, after removing markup, from the
105 | XML document produced by the input port `in'.
106 |
107 | @codeblock|{
108 | #lang racket
109 |
110 | (require racket/string sxml)
111 |
112 | (define (remove-markup xml-port)
113 | (let* ([parser
114 | (ssax:make-parser NEW-LEVEL-SEED remove-markup-nls
115 | FINISH-ELEMENT remove-markup-fe
116 | CHAR-DATA-HANDLER remove-markup-cdh)]
117 | [strings (parser xml-port null)])
118 | (string-join (reverse strings) "")))
119 |
120 | (define (remove-markup-nls gi attributes namespaces expected-content
121 | seed)
122 | seed)
123 |
124 | (define (remove-markup-fe gi attributes namespaces parent-seed seed)
125 | seed)
126 |
127 | (define (remove-markup-cdh string-1 string-2 seed)
128 | (let ([seed (cons string-1 seed)])
129 | (if (non-empty-string? string-2)
130 | (cons string-2 seed)
131 | seed)))
132 |
133 | (remove-markup
134 | (open-input-string
135 | "Hello, world!"))
136 | }|
137 | }
138 |
--------------------------------------------------------------------------------
/sxml/scribblings/serialization.scrbl:
--------------------------------------------------------------------------------
1 | #lang scribble/doc
2 | @(require scribble/manual
3 | "util.rkt"
4 | (for-label sxml))
5 |
6 | @title[#:tag "srl"]{Serialization}
7 |
8 | @defproc*[([(srl:sxml->xml [sxml-obj sxml?])
9 | string?]
10 | [(srl:sxml->xml [sxml-obj sxml?]
11 | [dest (or/c output-port? path-string?)])
12 | void?])]{
13 |
14 | Serializes the SXML node or nodeset @racket[sxml-obj] into XML, with
15 | indentation to facilitate readability by a human.
16 |
17 | If @racket[dest] is not supplied, the function returns a string that
18 | contains the serialized representation of the @racket[sxml-obj]. If
19 | @racket[dest] is supplied and is a port, the functions write the
20 | serialized representation of @racket[sxml-obj] to this port. If
21 | @racket[dest] is supplied and is a string, this string is treated as
22 | an output filename, the serialized representation of
23 | @racket[sxml-obj] is written to that filename. If a file with the
24 | given name already exists, the effect is unspecified.
25 |
26 | @examples[#:eval the-eval
27 | (srl:sxml->xml '(zippy (pippy (|@| (pigtails "2")) "ab") "bc"))
28 | (srl:sxml->xml '(zippy (pippy (|@| (pigtails "2")) "ab") "bc")
29 | (current-output-port))
30 | (srl:sxml->xml (for/fold ([body '(nothing)]) ([i (in-range 5)])
31 | `(doll (|@| (level ,(number->string i))) ,body))
32 | (current-output-port))
33 | ]
34 | }
35 |
36 | @defproc*[([(srl:sxml->xml-noindent [sxml-obj sxml?])
37 | string?]
38 | [(srl:sxml->xml-noindent [sxml-obj sxml?]
39 | [dest (or/c output-port? path-string?)])
40 | void?])]{
41 |
42 | Like @racket[srl:sxml->xml] but without indentation.
43 |
44 | @examples[#:eval the-eval
45 | (srl:sxml->xml-noindent
46 | '(zippy (pippy (|@| (pigtails "2")) "ab") "bc"))
47 | (srl:sxml->xml-noindent
48 | '(zippy (pippy (|@| (pigtails "2")) "ab") "bc")
49 | (current-output-port))
50 | (srl:sxml->xml-noindent
51 | (for/fold ([body '(nothing)]) ([i (in-range 5)])
52 | `(doll (|@| (level ,(number->string i))) ,body))
53 | (current-output-port))
54 | ]
55 | }
56 |
57 | @defproc*[([(srl:sxml->html [sxml-obj sxml?])
58 | string?]
59 | [(srl:sxml->html [sxml-obj sxml?]
60 | [dest (or/c output-port? path-string?)])
61 | void?])]{
62 |
63 | Serializes the SXML node or nodeset @racket[sxml-obj] into HTML, with
64 | indentation to facilitate readability by a human.
65 |
66 | If @racket[dest] is not supplied, the functions return a string that
67 | contains the serialized representation of the @racket[sxml-obj]. If
68 | @racket[dest] is supplied and is a port, the functions write the
69 | serialized representation of @racket[sxml-obj] to this port. If
70 | @racket[dest] is supplied and is a string, this string is treated as
71 | an output filename, the serialized representation of
72 | @racket[sxml-obj] is written to that filename. If a file with the
73 | given name already exists, the effect is unspecified.
74 |
75 | NOTE: As far as I can tell, the result of this transformation
76 | is more accurately described as XHTML than as HTML. The most noticeable
77 | difference is that certain tags may be rendered surprisingly by
78 | many browsers when they are expressed in the @tt{} form
79 | rather than in the @tt{} form. Giving these tags an
80 | empty string as a body will force them to render correctly. The
81 | list of tags for which the W3 consortium recommends using the
82 | expanded form is this:
83 | @itemlist[@item{param}
84 | @item{meta}
85 | @item{link}
86 | @item{isindex}
87 | @item{input}
88 | @item{img}
89 | @item{hr}
90 | @item{frame}
91 | @item{col}
92 | @item{br}
93 | @item{basefont}
94 | @item{base}
95 | @item{area}]
96 |
97 | Should this function automatically treat these differently? Yes, probably
98 | so.
99 | }
100 |
101 | @defproc*[([(srl:sxml->html-noindent [sxml-obj sxml?])
102 | string?]
103 | [(srl:sxml->html-noindent [sxml-obj sxml?]
104 | [dest (or/c output-port? path-string?)])
105 | void?])]{
106 |
107 | Like @racket[srl:sxml->html] but without indentation.
108 | }
109 |
--------------------------------------------------------------------------------
/sxml/scribblings/sxml-rep.scrbl:
--------------------------------------------------------------------------------
1 | #lang scribble/doc
2 | @(require scribble/manual
3 | "util.rkt"
4 | (for-label sxml))
5 |
6 | @title{SXML}
7 |
8 | @deftech{SXML} is a representation of XML elements using
9 | unique s-expressions. The following grammar describes the structure of SXML:
10 |
11 | @(define ATSIGN (racketidfont "@"))
12 |
13 | @racketgrammar*[
14 | #:literals (*TOP* *PI* *COMMENT* *ENTITY* URI *NAMESPACES* |@|)
15 | [top (*TOP* maybe-annotations
16 | PI ...
17 | comment ...
18 | element)]
19 | [element (name maybe-annot-attributes child ...)]
20 | [annot-attributes (|@| attribute ... maybe-annotations)]
21 | [attribute (name maybe-value maybe-annotations)]
22 | [child element
23 | character-data-string
24 | PI
25 | comment
26 | entity]
27 | [PI (*PI* pi-target
28 | maybe-annotations
29 | processing-instruction-content-string)]
30 | [comment (*COMMENT* comment-string)]
31 | [entity (*ENTITY* public-id-string system-id-string)]
32 | [name local-name
33 | exp-name]
34 | [local-name @#,elem{symbol conforming to XML Namespace recommendation}]
35 | [exp-name @#,elem{symbol of the form @racket[_namespace-id]@litchar{:}@racket[_local-name]}]
36 | [namespace-id URI-symbol
37 | user-ns-shortcut-symbol]
38 | [namespaces (*NAMESPACES* namespace-assoc ...)]
39 | [namespace-assoc (namespace-id uri-string maybe-original-prefix)]
40 | [annotations (|@| maybe-namespaces annotation ...)]
41 | [annotation @#,elem{not yet specified}]
42 | ]
43 |
44 | Some tools, such as SXPath, use the following coarse approximation of
45 | SXML structure for simplicity:
46 |
47 | @racketgrammar*[
48 | #:literals (*TOP* *PI* *COMMENT* *ENTITY* URI *NAMESPACES* |@|)
49 | [node (name . node-list)
50 | string]
51 | [node-list (node ...)]
52 | [name local-name exp-name |@| *TOP* *PI* *COMMENT* *ENTITY* *NAMESPACES*]
53 | ]
54 |
55 | In short, an XML element is represented as a list consisting of its
56 | tag name as a symbol followed by its children nodes. If the XML
57 | element has attributes, they come immediately after the tag symbol, in
58 | a list tagged by an @racket[|@|] symbol.
59 |
60 | For example, the XML element
61 |
62 | @tt{defjkl}
63 |
64 | is represented by the SXML datum
65 |
66 | @racket['(abc "def" (ghi) "jkl")]
67 |
68 | and the XML element
69 |
70 | @tt{Barry White}
71 |
72 | is represented by the SXML datum
73 |
74 | @racket['(customer (|@| (specialness "gazonga")) "Barry White")]
75 |
76 | NOTE! Some of the sxml libraries, particularly sxml:modify, depend
77 | on the fact that sxml elements in a legal document are all "unique";
78 | as I understand it, the requirement is that no two subtrees of a given
79 | SXML document can be 'eq?' to each other. This can easily
80 | occur when rewriting a tree, for instance a pass that inserts `(delete-me)
81 | in multiple places.
82 |
83 | That's the easy part. Things get more tricky when you start talking
84 | about documents and namespaces.
85 |
86 | Refer to @hyperlink["http://okmij.org/ftp/Scheme/SXML.html"]{the
87 | original SXML specification} for a more detailed explanation of the
88 | representation, including examples.
89 |
90 |
91 | @;{ ============================================================ }
92 |
93 | @section{SXML Functions}
94 |
95 | @defproc[(sxml:element? [v any/c]) boolean?]{
96 |
97 | Returns @racket[#t] if @racket[v] is a list starting with a symbol
98 | that is not a special symbol, @racket[#f] otherwise.
99 |
100 | @examples[#:eval the-eval
101 | (sxml:element? '(p "blah"))
102 | (sxml:element? '(*COMMENT* "ignore me"))
103 | (sxml:element? '(|@| (href "link.html")))
104 | ]
105 | }
106 |
107 | @defproc[(ntype-names?? [tags (listof symbol?)])
108 | (-> any/c boolean?)]{
109 |
110 | Given a list of allowable tag names, returns a predicate that
111 | recognizes @racket[_element]s with those tags.
112 |
113 | @examples[#:eval the-eval
114 | ((ntype-names?? '(a p)) '(p "blah"))
115 | ((ntype-names?? '(a p)) '(br))
116 | ]
117 | }
118 |
119 | @defproc[(ntype?? [crit symbol?])
120 | (-> any/c boolean?)]{
121 |
122 | If @racket[_crit] is a special symbol, a predicate is returned that
123 | accepts the following classes of @racket[_node]:
124 |
125 | @itemlist[
126 | @item{@racket['|@|]: an @racket[_annot-attributes] node}
127 | @item{@racket['*]: any @racket[_element] (@racket[sxml:element?])}
128 | @item{@racket['*any*]: any @racket[_node]}
129 | @item{@racket['*text*]: any string}
130 | @item{@racket['*data*]: anything except a pair (@racket[_element])}
131 | @item{@racket['*COMMENT*]: a @racket[_comment] node}
132 | @item{@racket['*PI*]: a @racket[_PI] (processing instruction) node}
133 | @item{@racket['*ENTITY*]: an @racket[_entity] node}
134 | ]
135 |
136 | Otherwise, it is an ordinary tag name, and a predicate is returned
137 | that recognizes @racket[_element]s with that tag.
138 |
139 | @examples[#:eval the-eval
140 | ((ntype?? '*) "blah")
141 | ((ntype?? '*) '(p "blah"))
142 | ((ntype?? '*text*) "blah")
143 | ((ntype?? '*text*) '(p "blah"))
144 | ]
145 | }
146 |
147 | @defproc[(ntype-namespace-id?? [ns-id (or/c string? #f)])
148 | (-> any/c boolean?)]{
149 |
150 | Returns a predicate that recognizes @racket[_element]s with tags
151 | belonging to the namespace @racket[ns-id]. If @racket[ns-id] is
152 | @racket[#f], the predicate recognizes elements whose tags have no
153 | namespace.
154 |
155 | @examples[#:eval the-eval
156 | ((ntype-namespace-id?? "atom") '(atom:id "blah"))
157 | ((ntype-namespace-id?? "atom") '(atomic "section"))
158 | ((ntype-namespace-id?? #f) '(atomic "section"))
159 | ]
160 | }
161 |
162 | @defproc[(sxml:node? [v any/c]) boolean?]{
163 |
164 | Returns @racket[#t] for anything except an attribute list (that is,
165 | a list whose first element is @racket['|@|]).
166 |
167 | Note that the set of values accepted by @racket[sxml:node?] is
168 | different from the non-terminal @racket[_node].
169 |
170 | @examples[#:eval the-eval
171 | (sxml:node? '(a (|@| (href "link.html")) "blah"))
172 | (sxml:node? '(|@| (href "link.html")))
173 | ]
174 | }
175 |
176 | @defproc[(sxml:attr-list [node _node])
177 | (listof _attribute)]{
178 |
179 | If @racket[node] is an @racket[_element], returns its list of
180 | attributes (or @racket['()]) if it has no attributes; for all other
181 | types of @racket[_node], returns @racket['()].
182 |
183 | @examples[#:eval the-eval
184 | (sxml:attr-list '(a (|@| (href "link.html")) "blah"))
185 | (sxml:attr-list '(p "blah"))
186 | (sxml:attr-list "blah")
187 | ]
188 | }
189 |
190 | @;{ ============================================================ }
191 | @;{ -- From sxml-tools.rkt -- }
192 |
193 | @defproc[(sxml:attr-list-node [elem sxml:element?])
194 | (or/c #f (cons/c '|@| (listof @#,racket[_attribute])))]{
195 |
196 | Returns an element's attribute list node, or @racket[#f] it is has
197 | none. Compare @racket[sxml:attr-list].
198 |
199 | @examples[#:eval the-eval
200 | (sxml:attr-list-node '(a (|@| (href "link.html")) "blah"))
201 | (sxml:attr-list-node '(p "blah"))
202 | ]
203 | }
204 |
205 | @;{
206 | sxml:attr-as-list
207 | sxml:aux-list-node
208 | sxml:aux-as-list
209 | }
210 |
211 | @defproc[(sxml:empty-element? [elem sxml:element?])
212 | boolean?]{
213 |
214 | Returns @racket[#t] if @racket[elem] has no nested elements, text
215 | nodes, etc. The element may have attributes.
216 |
217 | @examples[#:eval the-eval
218 | (sxml:empty-element? '(br))
219 | (sxml:empty-element? '(p "blah"))
220 | (sxml:empty-element? '(link (|@| (rel "self") (href "here.html"))))
221 | ]
222 | }
223 |
224 | @;{
225 | sxml:shallow-normalized?
226 | sxml:normalized
227 | sxml:shallow-minimized?
228 | sxml:minimized?
229 | sxml:name ;; what is domain???
230 | }
231 |
232 | @defproc[(sxml:element-name [elem sxml:element?])
233 | symbol?]{
234 |
235 | Returns an element's tag.
236 | }
237 |
238 | @defproc[(sxml:ncname [qualified-name symbol?])
239 | string?]{
240 |
241 | Returns the local part of a qualified name.
242 | }
243 |
244 | @defproc[(sxml:name->ns-id [qualified-name symbol?])
245 | (or/c string? #f)]{
246 |
247 | Returns the namespace part of a qualified name.
248 | }
249 |
250 | @defproc[(sxml:content [node-or-nodeset (or/c _node nodeset?)])
251 | (listof _node)]{
252 | Returns the contents (elements and text nodes) of an element or
253 | nodeset.
254 | }
255 |
256 | @defproc[(sxml:text [node-or-nodeset (or/c _node nodeset?)])
257 | string?]{
258 |
259 | Returns a string consisting of all of the character data immediately
260 | within @racket[node-or-nodeset].
261 |
262 | @examples[#:eval the-eval
263 | (sxml:text '(p (em "red") " fish; " (em "blue") " fish"))
264 | ]
265 | }
266 |
267 | @defproc[(sxml:attr [elem sxml:element?]
268 | [attr-name symbol?])
269 | (or/c string? #f)]{
270 |
271 | Gets the value of the @racket[attr-name] attribute of @racket[elem].
272 | }
273 |
274 | @;{
275 | sxml:content-raw
276 | sxml:attr-list-u
277 | sxml:aux-list
278 | sxml:aux-list-u
279 | sxml:aux-node
280 | sxml:aux-nodes
281 | sxml:attr-from-list
282 | sxml:num-attr
283 | sxml:attr-u
284 | sxml:ns-list
285 | sxml:ns-id->nodes
286 | sxml:ns-id->uri
287 | sxml:ns-uri->nodes
288 | sxml:ns-id
289 | sxml:ns-uri
290 | sxml:ns-prefix
291 | }
292 |
293 | @defproc[(sxml:change-content [elem sxml:element?]
294 | [new-content (listof _child)])
295 | sxml:element?]{
296 |
297 | Replaces the content of @racket[elem] with @racket[new-content],
298 | preserving its attributes and auxiliary information.
299 | }
300 |
301 | @defproc[(sxml:change-attrlist [elem sxml:element?]
302 | [new-attrlist (listof _attribute)])
303 | sxml:element?]{
304 |
305 | Replaces the attributes of @racket[elem] with @racket[new-attrlist],
306 | preserving its contents and auxiliary information.
307 | }
308 |
309 | @defproc[(sxml:change-name [elem sxml:element?]
310 | [tag symbol?])
311 | sxml:element?]{
312 |
313 | Changes the tag name of @racket[elem], preserving its attributes,
314 | auxiliary information, and contents.
315 | }
316 |
317 | @defproc[(sxml:set-attr [elem sxml:element?]
318 | [attr (list/c symbol? any/c)])
319 | sxml:element?]{
320 |
321 | Returns an element like @racket[elem] but with the attribute
322 | @racket[attr], which replaces any existing attribute with
323 | @racket[attr]'s key.
324 | }
325 |
326 | @defproc[(sxml:add-attr [elem sxml:element?]
327 | [attr (list/c symbol? any/c)])
328 | (or/c sxml:element? #f)]{
329 |
330 | Like @racket[sxml:set-attr], but returns @racket[#f] if
331 | @racket[elem] already contains an attribute with @racket[attr]'s
332 | key.
333 | }
334 |
335 | @defproc[(sxml:change-attr [elem sxml:element?]
336 | [attr (list/c symbol? any/c)])
337 | (or/c sxml:element? #f)]{
338 |
339 | Like @racket[sxml:set-attr], but returns @racket[#f] unless
340 | @racket[elem] already contains an attribute with @racket[attr]'s
341 | key.
342 | }
343 |
344 | @defproc[(sxml:squeeze [elem sxml:element?])
345 | sxml:element?]{
346 |
347 | Eliminates empty attribute lists and auxiliary lists.
348 | }
349 |
350 | @defproc[(sxml:clean [elem sxml:element?])
351 | sxml:element?]{
352 |
353 | Eliminates empty attribute lists and removes all auxilary lists.
354 | }
355 |
356 | @;{
357 | sxml:add-aux
358 | }
359 |
360 | @;{ -- }
361 |
362 | @;{
363 | sxml:node-parent
364 | sxml:lookup
365 | sxml:attr->xml
366 | sxml:string->xml
367 | sxml:sxml->xml
368 | sxml:attr->html
369 | sxml:string->html
370 | sxml:sxml->html
371 | }
372 |
--------------------------------------------------------------------------------
/sxml/scribblings/sxml.scrbl:
--------------------------------------------------------------------------------
1 | #lang scribble/doc
2 | @(require scribble/manual
3 | "util.rkt"
4 | (for-label racket/base
5 | sxml))
6 |
7 | @title{SXML: S-Expression Representation of XML}
8 | @;{@author[(author+email "John Clements" "clements@racket-lang.org")]}
9 |
10 | @defmodule[sxml]{
11 |
12 | This library contains Oleg Kiselyov's SXML
13 | libraries in a Racket-friendly format. It is a direct descendant of
14 | Dmitry Lizorkin's PLaneT package. It's different from that package in that
15 |
16 | @itemize[#:style 'ordered
17 | @item{It contains some documentation (here it is!),}
18 | @item{it contains some tests that run in Racket,}
19 | @item{it raises racket exceptions rather than printing to stderr and raising "-1",}
20 | @item{it has only one require point (ssax & sxml are both included), and}
21 | @item{it doesn't depend on schemeunit:3, so it compiles quickly.}]
22 |
23 | This documentation is scraped together from various sources; the bulk of it
24 | (currently) is pulled from in-source comments.
25 |
26 | I'm hoping that this will become a Racket community project, with various
27 | people contributing documentation and test cases and maybe even bug fixes.
28 |
29 | To that end, this code currently lives in a github repository which should
30 | be fairly easy to find. Patches gratefully accepted.
31 |
32 | It's perhaps also worth noting that this project currently has less than 23%
33 | test case coverage. Bleh.
34 |
35 | For Heaven's sake, report lots of bugs!
36 |
37 | --John Clements, 2011-02-17
38 | }
39 |
40 | @include-section["sxml-rep.scrbl"]
41 | @include-section["sax-parsing.scrbl"]
42 | @include-section["serialization.scrbl"]
43 | @include-section["sxpath.scrbl"]
44 | @include-section["sxslt.scrbl"]
45 |
46 | @include-section["extracted-sperber.scrbl"]
47 | @include-section["all-exported.scrbl"]
48 |
49 | @(close-eval the-eval)
50 |
--------------------------------------------------------------------------------
/sxml/scribblings/sxpath.scrbl:
--------------------------------------------------------------------------------
1 | #lang scribble/manual
2 | @(require scribble/core
3 | "util.rkt"
4 | scribble/racket
5 | (for-syntax racket/base)
6 | (for-label sxml))
7 |
8 | @title[#:tag "sxpath"]{Search (SXPath)}
9 |
10 | The @hyperlink["https://www.w3.org/TR/xpath/"]{W3C "XPath" standard} describes a standardized way to perform
11 | searches in XML documents. For instance, the XPath string
12 | @racket["/A/B/C"] (thank you, Wikipedia) describes a search for
13 | a @tt{C} element whose parent is a @tt{B} element whose parent
14 | is an @tt{A} element that is the root of the document.
15 |
16 | The @racket[sxpath] function performs a similar search over SXML data,
17 | using either the standard XPath strings or a list of Racket values.
18 |
19 | @interaction[#:eval (make-base-eval)
20 | (require sxml/sxpath)
21 | ((sxpath "/A/B/C")
22 | '(*TOP* (A (B (C)))))
23 | ((sxpath '(A B C))
24 | '(*TOP* (A (B (C)))))
25 | ((sxpath "//p[contains(@class, 'blue')]/text()")
26 | '(*TOP*
27 | (body
28 | (p (|@| (class "blue")) "P1")
29 | (p (|@| (class "blue green")) "P2")
30 | (p (|@| (class "red")) "P3"))))]
31 |
32 | (This documentation desperately needs more examples.)
33 |
34 | Let's consider the following XML document:
35 | @verbatim{
36 | "
37 |
38 |
39 | www content
40 |
41 |
42 |
43 | content in ccc
44 |
45 |
46 | "}
47 |
48 | If we use Neil Van Dyke's html parser, we might parse this into
49 | the following sxml document:
50 |
51 | @interaction[#:eval the-eval
52 | (define example-doc
53 | '(*TOP*
54 | (aaa "\n" " "
55 | (bbb "\n" " "
56 | (ccc) "\n" " "
57 | (www " www content " (xxx) (www "\n" " " (zzz) "\n" " ")))
58 | "\n" " "
59 | (xxx "\n" " " (ddd " content in ccc \n" " ") "\n" " ")
60 | "\n")))]
61 |
62 | @defproc[(sxpath [path (or/c list? string?)]
63 | [ns-bindings (listof (cons/c symbol? string?)) '()])
64 | (-> (or/c _node nodeset?) nodeset?)]{
65 |
66 | Given a representation of a @racket[path], produces a procedure that
67 | accepts an SXML document and returns a list of matches. Path
68 | representations are interpreted according to the following rewrite
69 | rules.
70 |
71 | @;{
72 | docs previously said:
73 | Note that the @racket[*TOP*] node of the document is required.
74 | But it isn't!
75 | }
76 |
77 | @(let ()
78 | (define-syntax-rule (rewrite-table line ...)
79 | (tabular #:style (let ([cs (style #f (list 'top))])
80 | (style #f (list (table-columns (list cs cs cs)))))
81 | (append line ...)))
82 | (define-syntax-rule (line lhs rhs)
83 | (list (list @racketblock[lhs] @elem{⇒} @racketblock[rhs])))
84 | (define-syntax-rule (LINE lhs rhs)
85 | (list (list @racketblock[lhs] 'cont 'cont)
86 | (list @elem{} @elem{⇒} @racketblock[rhs])))
87 | (define-syntax-rule (BREAK)
88 | (list (list @elem{} @elem{@~} @elem{})))
89 | (define-syntax-rule (defmetas id ...)
90 | (begin (define-syntax id
91 | (make-element-id-transformer
92 | (lambda _
93 | #'(racketvarfont (symbol->string 'id)))))
94 | ...))
95 | (defmetas path-component0 path-components x p path symbol reducer)
96 | (rewrite-table
97 | (line (sxpath '()) (node-join))
98 | (LINE (sxpath (cons path-component0 path-components))
99 | (node-join (sxpath1 path-component0)
100 | (sxpath path-components)))
101 | (BREAK)
102 | (line (sxpath1 '//)
103 | (sxml:descendant-or-self sxml:node?))
104 | (line (sxpath1 `(equal? ,x))
105 | (select-kids (node-equal? x)))
106 | (line (sxpath1 `(eq? ,x))
107 | (select-kids (node-eq? x)))
108 | (line (sxpath1 `(*or* ,p ...))
109 | (select-kids (ntype-names?? `(,p ...))))
110 | (line (sxpath1 `(*not* ,p ...))
111 | (select-kids
112 | (sxml:complement
113 | (ntype-names?? `(,p ...)))))
114 | (line (sxpath1 `(ns-id:* ,x))
115 | (select-kids (ntype-namespace-id?? x)))
116 | (line (sxpath1 _symbol)
117 | (select-kids (ntype?? _symbol)))
118 | (line (sxpath1 _string)
119 | (txpath _string))
120 | (line (sxpath1 _procedure)
121 | _procedure)
122 | ;; ryanc: symbol has to be followed by reducer, right?
123 | ;; original comments just have ellipses, unclear
124 | (LINE (sxpath1 `(,symbol ,reducer ...))
125 | (sxpath1 `((,symbol) ,reducer ...)))
126 | (LINE (sxpath1 `(,path ,reducer ...))
127 | (node-reduce (sxpath path)
128 | (sxpathr reducer) ...))
129 | (BREAK)
130 | (line (sxpathr _number)
131 | (node-pos _number))
132 | (line (sxpathr _path)
133 | (sxml:filter (sxpath _path)))))
134 |
135 | To extract the @tt{xxx}'s inside the @tt{aaa} from the example document:
136 |
137 | @interaction[#:eval the-eval
138 | ((sxpath '(aaa xxx)) example-doc)]
139 |
140 | To extract all cells from an HTML table:
141 |
142 | @interaction[#:eval the-eval
143 | (define table
144 | `(*TOP*
145 | (table
146 | (tr (td "a") (td "b"))
147 | (tr (td "c") (td "d")))))
148 | ((sxpath '(table tr td)) table)
149 | #| should produce '((td "a") (td "b") (td "c") (td "d")) |#
150 | ]
151 |
152 | To extract all cells anywhere in a document:
153 |
154 | @interaction[#:eval the-eval
155 | (define table
156 | `(*TOP*
157 | (div
158 | (p (table
159 | (tr (td "a") (td "b"))
160 | (tr (td "c") (td "d"))))
161 | (table
162 | (tr (td "e"))))))
163 | ((sxpath '(// td)) table)
164 | #| should produce '((td "a") (td "b") (td "c") (td "d") (td "e")) |#
165 | ]
166 |
167 | One result may be nested in another one:
168 |
169 | @interaction[#:eval the-eval
170 | (define doc
171 | `(*TOP*
172 | (div
173 | (p (div "3")
174 | (div (div "4"))))))
175 | ((sxpath '(// div)) doc)
176 | #| should produce
177 | '((div (p (div "3") (div (div "4")))) (div "3") (div (div "4")) (div "4"))
178 | |#
179 | ]
180 |
181 | There's also a string-based syntax, @racket[txpath]. As shown in the grammar above,
182 | @racket[sxpath] assumes that any strings in the path are expressed using the
183 | @racket[txpath] syntax.
184 |
185 | So, for instance, the prior example could be rewritten using a string:
186 |
187 | @interaction[#:eval the-eval
188 | (define doc
189 | `(*TOP*
190 | (div
191 | (p (div "3")
192 | (div (div "4"))))))
193 | ((sxpath "//div") doc)
194 | #| should produce
195 | '((div (p (div "3") (div (div "4")))) (div "3") (div (div "4")) (div "4"))
196 | |#
197 | ]
198 |
199 | More generally, lists in the s-expression syntax correspond to string
200 | concatenation in the txpath syntax.
201 |
202 | So, to find all italics that appear at top level within a paragraph:
203 |
204 | @interaction[#:eval the-eval
205 | (define doc
206 | `(*TOP*
207 | (div
208 | (p (i "3")
209 | (froogy (i "4"))))))
210 | ((sxpath "//p/i") doc)
211 | #| should produce '((i "3")) |#
212 | ]
213 |
214 | Handling of namespaces in @racket[sxpath] is a bit surprising. In particular,
215 | it appears to me that sxpath's model is that namespaces must appear fully expanded
216 | in the matched source. For instance:
217 |
218 | @interaction[#:eval the-eval
219 | ((sxpath "//ns:p" `((ns . "http://example.com")))
220 | '(*TOP* (html (|http://example.com:body|
221 | (|http://example.com:p| "first para")
222 | (|http://example.com:p|
223 | "second para containing"
224 | (|http://example.com:p| "third para") "inside it")))))
225 | #| should produce
226 | '((|http://example.com:p| "first para")
227 | (|http://example.com:p| "second para containing"
228 | (|http://example.com:p| "third para") "inside it")
229 | (|http://example.com:p| "third para"))
230 | |#
231 | ]
232 |
233 | But the corresponding example where the source document contains a namespace shortcut does
234 | not match in the same way. That is:
235 |
236 | @interaction[#:eval the-eval
237 | ((sxpath "//ns:p" `((ns . "http://example.com")))
238 | '(*TOP* (|@| (*NAMESPACES* (ns "http://example.com")))
239 | (html (ns:body (ns:p "first para")
240 | (ns:p "second para containing"
241 | (ns:p "third para") "inside it")))))
242 | ]
243 |
244 | It produces the empty list. Instead, you must pretend that the
245 | shortcut is actually the namespace. Thus:
246 |
247 | @interaction[#:eval the-eval
248 | ((sxpath "//ns:p" `((ns . "ns")))
249 | '(*TOP* (|@| (*NAMESPACES* (ns "http://example.com")))
250 | (html (ns:body (ns:p "first para")
251 | (ns:p "second para containing"
252 | (ns:p "third para") "inside it")))))
253 | #| should produce
254 | '((ns:p "first para")
255 | (ns:p "second para containing"
256 | (ns:p "third para") "inside it")
257 | (ns:p "third para"))
258 | |#
259 | ]
260 |
261 | Ah well.
262 |
263 | @section{Filtering}
264 |
265 | It may sometimes be the case that you're looking for @racket[abc]
266 | nodes that contain @racket[def] nodes. You could try
267 |
268 | @racketblock[(sxpath '(// abc def))]
269 |
270 | ... but that would give you the inner @racket[def] nodes, not the
271 | @racket[abc] parent nodes that contain the @racket[def] nodes.
272 | If you pore over the expansion above, you will discover that you can
273 | do this, using ... well, essentially, using a pair of nested lists:
274 |
275 | @racketblock[
276 | ((sxpath '(// (abc (def))))
277 | '(x (x (abc (x)
278 | (def "1")))
279 | (abc (x) "2")
280 | (abc (def) "3")))]
281 |
282 | Note that this is also the right way to go if for instance you're looking
283 | for a @racket[div] with a particular id:
284 |
285 | @codeblock|{
286 | ((sxpath '(// (div (@ id (equal? "wanted")))))
287 | '(body (foo (div (@ (id "a"))
288 | (div (@ (id "b")) "abc")
289 | "def")
290 | (div (@ (id "wanted"))
291 | (div (@ (id "c")) "qq")
292 | "ghi"))))
293 | }|
294 |
295 | But what if you want to check not that the string is equal to a fixed value, but
296 | rather that it contains a given value? This is common in the case of the "class"
297 | attribute, which often has a space-separated list of tokens. It turns out that
298 | SXML's combinator library can handle this just fine, but in order to use it, you'll
299 | need to unzip the sxpath to allow you to use the combinators. To see this, let's
300 | amend the earlier example to find a class containing the token @racket["wanted"].
301 |
302 | Our first step is to "unzip" the sxpath syntax. The following query produces exactly
303 | the same result as the previous one:
304 |
305 | @codeblock|{
306 | ((node-join (sxpath '(//))
307 | (node-reduce
308 | (sxpath '(div))
309 | (sxml:filter (node-join (sxpath '(@ id))
310 | (select-kids
311 | (node-equal? "wanted"))))))
312 | '(body (foo (div (@ (id "a"))
313 | (div (@ (id "b")) "abc")
314 | "def")
315 | (div (@ (id "wanted"))
316 | (div (@ (id "c")) "qq")
317 | "ghi"))))
318 | }|
319 |
320 | At this point, we can replace @racket[node-equal?] with any predicate on strings. In the following
321 | example we generalize it to look for occurrences of the token, and then we add a bunch of
322 | other junk to the specified id, to show that it still works:
323 |
324 | @codeblock|{
325 | ;; does the given string occur as a "word" in the text?
326 | (define (str-includes? str)
327 | (λ (text) (member str (string-split text))))
328 |
329 | ((node-join (sxpath '(//))
330 | (node-reduce
331 | (sxpath '(div))
332 | (sxml:filter (node-join (sxpath '(@ id))
333 | (select-kids
334 | (str-includes? "wanted"))))))
335 | '(body (foo (div (@ (id "a"))
336 | (div (@ (id "b")) "abc")
337 | "def")
338 | (div (@ (id "wanted bar baz"))
339 | (div (@ (id "c")) "qq")
340 | "ghi"))))
341 | }|
342 |
343 |
344 | }
345 | @defproc[(txpath [xpath-location-path string?]
346 | [ns-bindings (listof (cons/c symbol? string?)) '()])
347 | (-> (or/c _node nodeset?) nodeset?)]{
348 |
349 | Like @racket[sxpath], but only accepts an XPath query in string
350 | form, using the standard XPath syntax.
351 |
352 | Deprecated; use @racket[sxpath] instead.
353 | }
354 |
355 | @;{ ============================================================ }
356 |
357 | A @deftech{sxml-converter} is a function
358 | @racketblock[(-> (or/c _node nodeset?)
359 | nodeset?)]
360 | that is, it takes nodes or nodesets to nodesets. A
361 | @deftech{sxml-converter-as-predicate} is an @tech{sxml-converter} used
362 | as a predicate; a return value of @racket['()] indicates false.
363 |
364 | @defproc[(nodeset? [v any/c]) boolean?]{
365 | Returns @racket[#t] if @racket[v] is a list of nodes (that is, a
366 | list that does not start with a symbol).
367 |
368 | @examples[#:eval the-eval
369 | (nodeset? '(p "blah"))
370 | (nodeset? '((p "blah") (br) "more"))
371 | ]
372 | }
373 |
374 | @defproc[(as-nodeset [v any/c])
375 | nodeset?]{
376 |
377 | If @racket[v] is a nodeset, returns @racket[v], otherwise returns
378 | @racket[(list v)].
379 |
380 | @examples[#:eval the-eval
381 | (as-nodeset '(p "blah"))
382 | (as-nodeset '((p "blah") (br) "more"))
383 | ]
384 | }
385 |
386 | @; ----
387 |
388 | @defproc[(node-eq? [v any/c])
389 | (-> any/c boolean?)]{
390 | Curried @racket[eq?].
391 | }
392 | @defproc[(node-equal? [v any/c])
393 | (-> any/c boolean?)]{
394 | Curried @racket[equal?].
395 | }
396 |
397 | @defproc[(node-pos [n (or/c exact-positive-integer? exact-negative-integer?)])
398 | @#,tech{sxml-converter}]{
399 |
400 | Returns a converter that selects the @racket[n]th element (counting
401 | from 1, not 0) of a nodelist and returns it as a singleton nodelist. If
402 | @racket[n] is negative, it selects from the right: @racket[-1]
403 | selects the last node, and so forth.
404 |
405 | @examples[#:eval the-eval
406 | ((node-pos 2) '((a) (b) (c) (d) (e)))
407 | ((node-pos -1) '((a) (b) (c)))
408 | ]
409 | }
410 |
411 | @; ----
412 |
413 | @defproc[(sxml:filter [pred @#,tech{sxml-converter-as-predicate}])
414 | @#,tech{sxml-converter}]
415 |
416 | @defproc[(sxml:complement [pred @#,tech{sxml-converter-as-predicate}])
417 | @#,tech{sxml-converter-as-predicate}]
418 |
419 | @;{
420 | take-until
421 | take-after
422 | map-union
423 | node-reverse
424 | node-trace
425 | }
426 |
427 | @defproc[(select-kids [pred @#,tech{sxml-converter-as-predicate}])
428 | @#,tech{sxml-converter}]{
429 |
430 | Returns a converter that selects an (ordered) subset of the children
431 | of the given node (or the children of the members of the given
432 | nodelist) satisfying @racket[pred].
433 |
434 | @examples[#:eval the-eval
435 | ((select-kids (ntype?? 'p)) '(p "blah"))
436 | ((select-kids (ntype?? '*text*)) '(p "blah"))
437 | ((select-kids (ntype?? 'p)) (list '(p "blah") '(br) '(p "blahblah")))
438 | ]
439 | }
440 |
441 | @defproc[(select-first-kid [pred @#,tech{sxml-converter-as-predicate}])
442 | (-> (or/c _node nodeset?) (or/c _node #f))]{
443 |
444 | Like @racket[select-kids] but returns only the first one, or
445 | @racket[#f] if none.
446 | }
447 |
448 | @defproc[(node-self [pred @#,tech{sxml-converter-as-predicate}])
449 | @#,tech{sxml-converter}]{
450 |
451 | Returns a function that when applied to @racket[_node], returns
452 | @racket[(list _node)] if @racket[(pred _node)] is neither @racket[#f]
453 | nor @racket['()], otherwise returns @racket['()].
454 |
455 | @examples[#:eval the-eval
456 | ((node-self (ntype?? 'p)) '(p "blah"))
457 | ((node-self (ntype?? 'p)) '(br))
458 | ]
459 | }
460 |
461 | @defproc[(node-join [selector @#,tech{sxml-converter}] ...)
462 | @#,tech{sxml-converter}]{
463 | Forms a new sxml-converter that is the sequential composition of
464 | the sxml-converters in the list.}
465 |
466 | @defproc[(node-reduce [converter @#,tech{sxml-converter}] ...)
467 | @#,tech{sxml-converter}]
468 |
469 | @defproc[(node-or [converter @#,tech{sxml-converter}] ...)
470 | @#,tech{sxml-converter}]
471 |
472 | @defproc[(node-closure [converter @#,tech{sxml-converter}])
473 | @#,tech{sxml-converter}]
474 |
475 | @deftogether[[
476 | @defproc[(sxml:attribute [pred @#,tech{sxml-converter-as-predicate}])
477 | @#,tech{sxml-converter}]
478 | @defproc[(sxml:child [pred @#,tech{sxml-converter-as-predicate}])
479 | @#,tech{sxml-converter}]
480 | @defthing[sxml:child-nodes @#,tech{sxml-converter}]
481 | @defthing[sxml:child-elements @#,tech{sxml-converter}]
482 | @defproc[(sxml:descendant [pred @#,tech{sxml-converter-as-predicate}])
483 | @#,tech{sxml-converter}]
484 | @defproc[(sxml:descendant-or-self [pred @#,tech{sxml-converter-as-predicate}])
485 | @#,tech{sxml-converter}]
486 | ]]{
487 |
488 | XPath axes and accessors.
489 | }
490 |
491 | The following procedures depend explicitly on the root node.
492 |
493 | @deftogether[[
494 | @defproc[((sxml:parent [pred @#,tech{sxml-converter-as-predicate}])
495 | [root _node])
496 | @#,tech{sxml-converter}]
497 | @defproc[(node-parent [root _node])
498 | @#,tech{sxml-converter}]
499 | @defproc[((sxml:ancestor [pred @#,tech{sxml-converter-as-predicate}])
500 | [root _node])
501 | @#,tech{sxml-converter}]
502 | @defproc[((sxml:ancestor-or-self [pred @#,tech{sxml-converter-as-predicate}])
503 | [root _node])
504 | @#,tech{sxml-converter}]
505 | @defproc[((sxml:following [pred @#,tech{sxml-converter-as-predicate}])
506 | [root _node])
507 | @#,tech{sxml-converter}]
508 | @defproc[((sxml:following-sibling [pred @#,tech{sxml-converter-as-predicate}])
509 | [root _node])
510 | @#,tech{sxml-converter}]
511 | ]]{
512 | Gosh, I wish these functions were documented.
513 | }
514 |
515 | @defproc[((sxml:preceding [pred @#,tech{sxml-converter-as-predicate}])
516 | [root _node])
517 | @#,tech{sxml-converter}]{
518 | given a predicate and a root node, returns a procedure that accepts a
519 | nodeset and returns all nodes that appear before the given nodes in
520 | document order, filtered using the predicate.
521 |
522 | Here's an example:
523 |
524 | @interaction[#:eval the-eval
525 | (((sxml:preceding (ntype?? 'www)) example-doc) ((sxpath `(aaa xxx)) example-doc))]
526 |
527 | }
528 |
529 | @defproc[((sxml:preceding-sibling [pred @#,tech{sxml-converter-as-predicate}])
530 | [root _node])
531 | @#,tech{sxml-converter}]{
532 | given a predicate and a root node, returns a procedure that accepts a
533 | nodeset and returns all dones that are preceding siblings (in document
534 | order) of the given nodes.
535 |
536 | @interaction[#:eval the-eval
537 | (define doc '(*TOP* (div (p "foo") (p "bar")
538 | (img "baz") (p "quux"))))
539 | (((sxml:preceding-sibling (ntype?? 'p)) doc) ((sxpath '(// img)) doc))]
540 | }
541 |
542 |
--------------------------------------------------------------------------------
/sxml/scribblings/sxslt.scrbl:
--------------------------------------------------------------------------------
1 | #lang scribble/doc
2 | @(require scribble/manual
3 | "util.rkt"
4 | (for-label sxml))
5 |
6 | @title[#:tag "sxslt"]{SXML Transformation (SXSLT)}
7 |
8 | This library provides a means to transform XML elements. It is
9 | the descendant of the system described by Kiselyov and Krishnamurthi
10 | in their 2003 PADL paper,
11 | @link["http://cs.brown.edu/~sk/Publications/Papers/Published/kk-sxslt/"]{"SXSLT: Manipulation Language for SXML"}.
12 |
13 | @defproc[(sxml:modify [updater _update-spec] ...)
14 | (-> _node _node)]{
15 |
16 | Returns a procedure that applies the given @racket[updater]s to an
17 | SXML document, producing a new SXML document. Each @racket[updater]
18 | has the following form:
19 |
20 | @racketgrammar*[
21 | #:literals (list quote)
22 | [update-spec (list xpath-location-path action action-param ...)]
23 | [action 'delete
24 | 'delete-undeep
25 | 'insert-into
26 | 'insert-following
27 | 'insert-preceding
28 | 'replace
29 | 'move-into
30 | 'move-following
31 | 'move-preceding
32 | handler-proc]]
33 |
34 | The @racket[xpath-location-path] describes the nodes to be transformed
35 | (see also @racket[sxpath]). The @racket[xpath-location-path] is
36 | interpreted with respect to some base node. If the location path is
37 | absolute, the base node is the root of the document being
38 | transformed. If the location path is relative, the base node is the
39 | node selected by the previous @racket[updater].
40 |
41 | Note! @racket[sxml:modify] depends on the uniqueness of all subtrees;
42 | if an @racket[eq?] subtree occurs more than once, @racket['delete]
43 | may fail to delete all instances, for example.
44 |
45 | The following combinations of @racket[action]s and
46 | @racket[action-param]s are supported:
47 |
48 | @specsubform[#:literals (quote) 'delete]{
49 | Deletes the selected nodes.
50 | }
51 | @specsubform[#:literals (quote) 'delete-undeep]{
52 | Deletes the selected nodes, but keeps all of their contents, which
53 | thus move one level upwards in the document tree.
54 | }
55 | @specsubform[#:literals (quote) (code:line 'insert-into new-node ...)]{
56 | Inserts the @racket[new-node]s as the last children of the selected
57 | nodes.
58 | }
59 | @specsubform[#:literals (quote) (code:line 'insert-following new-node ...)]{
60 | Inserts the @racket[new-node]s after the selected nodes.
61 | }
62 | @specsubform[#:literals (quote) (code:line 'insert-preceding new-node ...)]{
63 | Inserts the @racket[new-node]s before the selected nodes.
64 | }
65 | @specsubform[#:literals (quote) (code:line 'replace new-node ...)]{
66 | Replaces the selected nodes with the @racket[new-node]s.
67 | }
68 | @specsubform[#:literals (quote) (code:line 'rename new-tag)]{
69 | Renames the selected nodes, replacing its element tag with
70 | @racket[new-tag].
71 | }
72 | @specsubform[#:literals (quote) (code:line 'move-into new-location-path)]{
73 | Moves the selected nodes to a new location. The selected nodes become
74 | the last children of the nodes selected by @racket[new-location-path].
75 | }
76 | @specsubform[#:literals (quote) (code:line 'move-following new-location-path)]{
77 | Moves the selected nodes to a new location. The selected nodes are
78 | placed immediately after the nodes selected by
79 | @racket[new-location-path].
80 | }
81 | @specsubform[#:literals (quote) (code:line 'move-preceding new-location-path)]{
82 | Moves the selected nodes to a new location. The selected nodes are
83 | placed immediately before the nodes selected by
84 | @racket[new-location-path].
85 | }
86 | @specsubform[handler-proc]{
87 | Applies @racket[handler-proc] to three arguments: the selected node,
88 | a context (?), and the base node. The procedure must return a node
89 | or nodeset, which replaces the selected node.
90 | }
91 |
92 | @examples[#:eval the-eval
93 | (define sample-doc
94 | `(*TOP*
95 | (html (title "the title")
96 | (body (p "paragraph 1")
97 | (p "paragraph 2")))))
98 | ((sxml:modify (list "//title" 'delete)) sample-doc)
99 | ((sxml:modify (list "//body" 'delete-undeep)) sample-doc)
100 | ((sxml:modify (list "//body" 'rename 'table)
101 | (list "p" (lambda (node ctx root)
102 | `(tr (td ,node)))))
103 | sample-doc)
104 | ]
105 | }
106 |
107 |
108 | @defproc[(pre-post-order [tree _node]
109 | [bindings (listof _binding)])
110 | _node]{
111 |
112 | Traverses @racket[tree], applying the transformation rules specified
113 | by @racket[bindings] to each node.
114 |
115 | @racketgrammar*[
116 | #:literals (list* quote *preorder* *macro* *text* *default*)
117 | [binding (list* trigger-symbol '*preorder* . handler-proc)
118 | (list* trigger-symbol '*macro* . handler-proc)
119 | (list* trigger-symbol new-bindings . handler-proc)
120 | (list* trigger-symbol . handler-proc)]
121 | [trigger-symbol XMLname
122 | '*text*
123 | '*default*]
124 | ]
125 |
126 | The @racket[pre-post-order] function visits the nodes and nodelists
127 | pre-post-order (depth-first). For each node of the form
128 | @racket[(_name _node ...)] it looks up an association with the given
129 | @racket[_name] among its @racket[bindings]. If it fails,
130 | @racket[pre-post-order] tries to locate a default binding. It's an
131 | error if the latter attempt fails as well.
132 |
133 | The following types of @racket[_binding] are supported:
134 |
135 | @specsubform[#:literals (list* quote)
136 | (list* trigger-symbol '*preorder* . handler-proc)]{
137 |
138 | The @racket[handler-proc] is applied to each node matching
139 | @racket[trigger-symbol] without first processing the node's
140 | contents. The result is not traversed by @racket[pre-post-order].
141 | }
142 |
143 | @specsubform[#:literals (list* quote)
144 | (list* trigger-symbol '*macro* . handler-proc)]{
145 |
146 | This is equivalent to @racket['*preorder*] described above. However,
147 | the result is re-processed again, with the current stylesheet.
148 | }
149 |
150 | @specsubform[#:literals (list* quote)
151 | (list* trigger-symbol new-bindings . handler-proc)]
152 | @specsubform[#:literals (list* quote)
153 | (list* trigger-symbol . handler-proc)]{
154 |
155 | The @racket[handler-proc] is applied to each node matching
156 | @racket[trigger-symbol], but only after the node's contents have been
157 | recursively processed, using the additional @racket[_new-bindings], if
158 | present.
159 |
160 | To be more precise, the handler is applied to the head of the current
161 | node and its processed children. The result of the handler, which
162 | should also be a tree, replaces the current node. If the current node
163 | is a text string or other atom, a special binding with a symbol
164 | @racket[*text*] is looked up.
165 | }
166 |
167 | @examples[#:eval the-eval
168 | (define sample-doc
169 | `(*TOP*
170 | (html (title "the title")
171 | (body (p "paragraph 1")
172 | (p "paragraph 2")))))
173 | (define italicizer
174 | `((p . ,(lambda (tag . content)
175 | (cons tag (cons "PARAGRAPH BEGINS: " content))))
176 | (*text* . ,(lambda (tag content)
177 | `(i ,content)))
178 | (*default* . ,(lambda args args))))
179 | (pre-post-order sample-doc italicizer)
180 | #| should produce
181 | '(*TOP*
182 | (html
183 | (title (i "the title"))
184 | (body
185 | (p "PARAGRAPH BEGINS: " (i "paragraph 1"))
186 | (p "PARAGRAPH BEGINS: " (i "paragraph 2")))))
187 | |#
188 | ]
189 | }
190 |
--------------------------------------------------------------------------------
/sxml/scribblings/util.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require scribble/manual
3 | scribble/eval
4 | (for-label racket/base
5 | racket/contract))
6 | (provide (all-from-out scribble/manual)
7 | (all-from-out scribble/eval)
8 | (for-label
9 | (all-from-out racket/base)
10 | (all-from-out racket/contract))
11 | the-eval)
12 |
13 | (define the-eval (make-base-eval))
14 | (the-eval `(begin (require sxml
15 | racket/pretty)
16 | (current-print pretty-print-handler)))
17 |
--------------------------------------------------------------------------------
/sxml/ssax/SXML-tree-trans.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require "myenv.ss")
3 | (provide post-order
4 | pre-post-order
5 | SRV:send-reply
6 | replace-range)
7 |
8 |
9 | ; XML/HTML processing in Scheme
10 | ; SXML expression tree transformers
11 | ;
12 | ; IMPORT
13 | ; A prelude appropriate for your Scheme system
14 | ; (myenv-bigloo.scm, myenv-mit.scm, etc.)
15 | ;
16 | ; EXPORT
17 | ; (provide SRV:send-reply
18 | ; post-order pre-post-order replace-range)
19 | ;
20 | ; See vSXML-tree-trans.scm for the validation code, which also
21 | ; serves as usage examples.
22 | ;
23 | ; $Id: SXML-tree-trans.scm,v 1.7 2004/11/09 20:22:26 sperber Exp $
24 |
25 |
26 | ; procedure: SRV:send-reply FRAGMENT ...
27 | ;
28 | ; Output the 'fragments'
29 | ; The fragments are a list of strings, characters,
30 | ; numbers, thunks, #f, #t -- and other fragments.
31 | ; The function traverses the tree depth-first, writes out
32 | ; strings and characters, executes thunks, and ignores
33 | ; #f and '().
34 | ; The function returns #t if anything was written at all;
35 | ; otherwise the result is #f
36 | ; If #t occurs among the fragments, it is not written out
37 | ; but causes the result of SRV:send-reply to be #t
38 |
39 | (define (SRV:send-reply . fragments)
40 | (let loop ((fragments fragments) (result #f))
41 | (cond
42 | ((null? fragments) result)
43 | ((not (car fragments)) (loop (cdr fragments) result))
44 | ((null? (car fragments)) (loop (cdr fragments) result))
45 | ((eq? #t (car fragments)) (loop (cdr fragments) #t))
46 | ((pair? (car fragments))
47 | (loop (cdr fragments) (loop (car fragments) result)))
48 | ((procedure? (car fragments))
49 | ((car fragments))
50 | (loop (cdr fragments) #t))
51 | (else
52 | (display (car fragments))
53 | (loop (cdr fragments) #t)))))
54 |
55 |
56 |
57 | ; procedure: pre-post-order TREE BINDINGS
58 | ;
59 | ; Traversal of an SXML tree or a grove:
60 | ; a or a
61 | ;
62 | ; A and a are mutually-recursive datatypes that
63 | ; underlie the SXML tree:
64 | ; ::= (name . ) | "text string"
65 | ; An (ordered) set of nodes is just a list of the constituent nodes:
66 | ; ::= ( ...)
67 | ; Nodelists, and Nodes other than text strings are both lists. A
68 | ; however is either an empty list, or a list whose head is
69 | ; not a symbol (an atom in general). A symbol at the head of a node is
70 | ; either an XML name (in which case it's a tag of an XML element), or
71 | ; an administrative name such as '@'.
72 | ; See SXPath.scm and SSAX.scm for more information on SXML.
73 | ;
74 | ;
75 | ; Pre-Post-order traversal of a tree and creation of a new tree:
76 | ; pre-post-order:: x ->
77 | ; where
78 | ; ::= ( ...)
79 | ; ::= ( *preorder* . ) |
80 | ; ( *macro* . ) |
81 | ; ( . ) |
82 | ; ( . )
83 | ; ::= XMLname | *text* | *default*
84 | ; :: x [] ->
85 | ;
86 | ; The pre-post-order function visits the nodes and nodelists
87 | ; pre-post-order (depth-first). For each of the form (name
88 | ; ...) it looks up an association with the given 'name' among
89 | ; its . If failed, pre-post-order tries to locate a
90 | ; *default* binding. It's an error if the latter attempt fails as
91 | ; well. Having found a binding, the pre-post-order function first
92 | ; checks to see if the binding is of the form
93 | ; ( *preorder* . )
94 | ; If it is, the handler is 'applied' to the current node. Otherwise,
95 | ; the pre-post-order function first calls itself recursively for each
96 | ; child of the current node, with prepended to the
97 | ; in effect. The result of these calls is passed to the
98 | ; (along with the head of the current ). To be more
99 | ; precise, the handler is _applied_ to the head of the current node
100 | ; and its processed children. The result of the handler, which should
101 | ; also be a , replaces the current . If the current
102 | ; is a text string or other atom, a special binding with a symbol
103 | ; *text* is looked up.
104 | ;
105 | ; A binding can also be of a form
106 | ; ( *macro* . )
107 | ; This is equivalent to *preorder* described above. However, the result
108 | ; is re-processed again, with the current stylesheet.
109 | ;
110 | (define (pre-post-order tree bindings)
111 | (let* ((default-binding (assq '*default* bindings))
112 | (text-binding (or (assq '*text* bindings) default-binding))
113 | (text-handler ; Cache default and text bindings
114 | (and text-binding
115 | (if (procedure? (cdr text-binding))
116 | (cdr text-binding) (cddr text-binding)))))
117 | (let loop ((tree tree))
118 | (cond
119 | ((null? tree) '())
120 | ((not (pair? tree))
121 | (let ((trigger '*text*))
122 | (if text-handler (text-handler trigger tree)
123 | (myenv:error "Unknown binding for " trigger " and no default"))))
124 | ((not (symbol? (car tree))) (map loop tree)) ; tree is a nodelist
125 | (else ; tree is an SXML node
126 | (let* ((trigger (car tree))
127 | (binding (or (assq trigger bindings) default-binding)))
128 | (cond
129 | ((not binding)
130 | (myenv:error "Unknown binding for " trigger " and no default"))
131 | ((not (pair? (cdr binding))) ; must be a procedure: handler
132 | (apply (cdr binding) trigger (map loop (cdr tree))))
133 | ((eq? '*preorder* (cadr binding))
134 | (apply (cddr binding) tree))
135 | ((eq? '*macro* (cadr binding))
136 | (loop (apply (cddr binding) tree)))
137 | (else ; (cadr binding) is a local binding
138 | (apply (cddr binding) trigger
139 | (pre-post-order (cdr tree) (append (cadr binding) bindings)))
140 | ))))))))
141 |
142 | ; procedure: post-order TREE BINDINGS
143 | ; post-order is a strict subset of pre-post-order without *preorder*
144 | ; (let alone *macro*) traversals.
145 | ; Now pre-post-order is actually faster than the old post-order.
146 | ; The function post-order is deprecated and is aliased below for
147 | ; backward compatibility.
148 | (define post-order pre-post-order)
149 |
150 | ;------------------------------------------------------------------------
151 | ; Extended tree fold
152 | ; tree = atom | (node-name tree ...)
153 | ;
154 | ; foldts fdown fup fhere seed (Leaf str) = fhere seed str
155 | ; foldts fdown fup fhere seed (Nd kids) =
156 | ; fup seed $ foldl (foldts fdown fup fhere) (fdown seed) kids
157 |
158 | ; procedure fhere: seed -> atom -> seed
159 | ; procedure fdown: seed -> node -> seed
160 | ; procedure fup: parent-seed -> last-kid-seed -> node -> seed
161 | ; foldts returns the final seed
162 |
163 | (define (foldts fdown fup fhere seed tree)
164 | (cond
165 | ((null? tree) seed)
166 | ((not (pair? tree)) ; An atom
167 | (fhere seed tree))
168 | (else
169 | (let loop ((kid-seed (fdown seed tree)) (kids (cdr tree)))
170 | (if (null? kids)
171 | (fup seed kid-seed tree)
172 | (loop (foldts fdown fup fhere kid-seed (car kids))
173 | (cdr kids)))))))
174 |
175 | ; procedure: replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
176 | ; Traverse a forest depth-first and cut/replace ranges of nodes.
177 | ;
178 | ; The nodes that define a range don't have to have the same immediate
179 | ; parent, don't have to be on the same level, and the end node of a
180 | ; range doesn't even have to exist. A replace-range procedure removes
181 | ; nodes from the beginning node of the range up to (but not including)
182 | ; the end node of the range. In addition, the beginning node of the
183 | ; range can be replaced by a node or a list of nodes. The range of
184 | ; nodes is cut while depth-first traversing the forest. If all
185 | ; branches of the node are cut a node is cut as well. The procedure
186 | ; can cut several non-overlapping ranges from a forest.
187 |
188 | ; replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
189 | ; where
190 | ; type FOREST = (NODE ...)
191 | ; type NODE = Atom | (Name . FOREST) | FOREST
192 | ;
193 | ; The range of nodes is specified by two predicates, beg-pred and end-pred.
194 | ; beg-pred:: NODE -> #f | FOREST
195 | ; end-pred:: NODE -> #f | FOREST
196 | ; The beg-pred predicate decides on the beginning of the range. The node
197 | ; for which the predicate yields non-#f marks the beginning of the range
198 | ; The non-#f value of the predicate replaces the node. The value can be a
199 | ; list of nodes. The replace-range procedure then traverses the tree and skips
200 | ; all the nodes, until the end-pred yields non-#f. The value of the end-pred
201 | ; replaces the end-range node. The new end node and its brothers will be
202 | ; re-scanned.
203 | ; The predicates are evaluated pre-order. We do not descend into a node that
204 | ; is marked as the beginning of the range.
205 |
206 | (define (replace-range beg-pred end-pred forest)
207 |
208 | ; loop forest keep? new-forest
209 | ; forest is the forest to traverse
210 | ; new-forest accumulates the nodes we will keep, in the reverse
211 | ; order
212 | ; If keep? is #t, keep the curr node if atomic. If the node is not atomic,
213 | ; traverse its children and keep those that are not in the skip range.
214 | ; If keep? is #f, skip the current node if atomic. Otherwise,
215 | ; traverse its children. If all children are skipped, skip the node
216 | ; as well.
217 |
218 | (define (loop forest keep? new-forest)
219 | (if (null? forest) (values (reverse new-forest) keep?)
220 | (let ((node (car forest)))
221 | (if keep?
222 | (cond ; accumulate mode
223 | ((beg-pred node) => ; see if the node starts the skip range
224 | (lambda (repl-branches) ; if so, skip/replace the node
225 | (loop (cdr forest) #f
226 | (append (reverse repl-branches) new-forest))))
227 | ((not (pair? node)) ; it's an atom, keep it
228 | (loop (cdr forest) keep? (cons node new-forest)))
229 | (else
230 | (let ((node?
231 | (symbol? (car node)))) ; or is it a nodelist?
232 | (call-with-values
233 | ; traverse its children
234 | (lambda () (loop (if node? (cdr node) node) #t '()))
235 | (lambda (new-kids keep?)
236 | (loop (cdr forest) keep?
237 | (cons
238 | (if node? (cons (car node) new-kids) new-kids)
239 | new-forest)))))))
240 | ; skip mode
241 | (cond
242 | ((end-pred node) => ; end the skip range
243 | (lambda (repl-branches) ; repl-branches will be re-scanned
244 | (loop (append repl-branches (cdr forest)) #t
245 | new-forest)))
246 | ((not (pair? node)) ; it's an atom, skip it
247 | (loop (cdr forest) keep? new-forest))
248 | (else
249 | (let ((node?
250 | (symbol? (car node)))) ; or is it a nodelist?
251 | ; traverse its children
252 | (call-with-values
253 | (lambda () (loop (if node? (cdr node) node) #f '()))
254 | (lambda (new-kids keep?)
255 | (loop
256 | (cdr forest) keep?
257 | (if (or keep? (pair? new-kids))
258 | (cons
259 | (if node? (cons (car node) new-kids) new-kids)
260 | new-forest)
261 | new-forest) ; if all kids are skipped
262 | )))))))))) ; skip the node too
263 |
264 | (call-with-values
265 | (lambda () (loop forest #t '()))
266 | (lambda (new-forest keep?)
267 | new-forest)))
268 |
--------------------------------------------------------------------------------
/sxml/ssax/access-remote.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require net/url
3 | net/head
4 | racket/path
5 | srfi/13/string
6 | "errors-and-warnings.rkt")
7 | (provide open-input-resource
8 | ar:resolve-uri-according-base
9 | ar:resource-type)
10 |
11 | ;; Uniform access to local and remote resources
12 | ;; Resolution for relative URIs in accordance with RFC 2396
13 | ;
14 | ; This software is in Public Domain.
15 | ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
16 | ;
17 | ; Please send bug reports and comments to:
18 | ; lizorkin@hotbox.ru Dmitry Lizorkin
19 |
20 | ;=========================================================================
21 | ; Accessing (remote) resources
22 |
23 | ; Opens an input port for a resource
24 | ; REQ-URI - a string representing a URI of the resource
25 | ; An input port is returned if there were no errors. In case of an error,
26 | ; the function returns #f and displays an error message as a side effect.
27 | ; Doesn't raise any exceptions.
28 | ;; ryanc: Why not?!
29 | (define (open-input-resource req-uri)
30 | (with-handlers ([exn:fail?
31 | (lambda (e)
32 | (sxml:warn 'open-input-resource "~a: ~a"
33 | req-uri (exn-message e))
34 | #f)])
35 | (get-pure-port (string->url req-uri))))
36 |
37 | ;=========================================================================
38 | ; Determining resource type
39 |
40 | ; Determines the type of a resource
41 | ; REQ-URI - a string representing a URI of the resource
42 | ; For a local resource, its type is determined by its file extension
43 | ; One of the following is returned:
44 | ; #f - if the requested resource doesn't exist
45 | ; 'xml - for a resource that is an XML document
46 | ; 'html - for a resource that is an HTML document
47 | ; 'unknown - for any other resource type
48 | (define (ar:resource-type req-uri)
49 | (cond [(string-prefix? "http://" req-uri) ; HTTP scheme is used in REQ-URI
50 | (with-handlers ([exn:fail? (lambda (exn) #f)])
51 | (call/input-url (string->url req-uri) head-impure-port
52 | (lambda (port)
53 | (let* ([headers (purify-port port)]
54 | [content-type (extract-field "content-type" headers)])
55 | (cond [(not content-type) ;; no content-type specified
56 | 'unknown]
57 | [(string-prefix? "text/xml" content-type)
58 | 'xml]
59 | [(string-prefix? "text/html" content-type)
60 | 'html]
61 | [(string-prefix? "text/plain" content-type)
62 | 'plain]
63 | [else 'unknown])))))]
64 | [(string-prefix? "file://" req-uri)
65 | (define filename (substring req-uri 7))
66 | (cond [(not (file-exists? filename)) ; file doesn't exist
67 | #f]
68 | [(assoc (filename-extension filename)
69 | '((#"xml" . xml) (#"html" . html) (#"htm" . html)))
70 | => cdr]
71 | [else 'unknown])]
72 | [else 'unknown]))
73 |
74 | ;=========================================================================
75 | ; Working on absolute/relative URIs
76 | ; This section is based on RFC 2396
77 |
78 | ;-------------------------------------------------
79 | ; Resolves a relative URI with respect to the base URI
80 |
81 | ; base-uri - base URI for the requiested one
82 | ; Returns the resolved URI
83 | (define (ar:resolve-uri-according-base base-uri req-uri)
84 | (url->string (combine-url/relative (string->url base-uri) req-uri)))
85 |
--------------------------------------------------------------------------------
/sxml/ssax/doc.txt:
--------------------------------------------------------------------------------
1 | SSAX Package
2 | ============
3 |
4 | A SSAX functional XML parsing framework consists of a DOM/SXML parser, a SAX
5 | parser, and a supporting library of lexing and parsing procedures. The
6 | procedures in the package can be used separately to tokenize or parse various
7 | pieces of XML documents. The framework supports XML Namespaces, character,
8 | internal and external parsed entities, attribute value normalization,
9 | processing instructions and CDATA sections. The package includes a
10 | semi-validating SXML parser: a DOM-mode parser that is an instantiation of
11 | a SAX parser (called SSAX).
12 |
13 | SSAX is a full-featured, algorithmically optimal, pure-functional parser,
14 | which can act as a stream processor. SSAX is an efficient SAX parser that is
15 | easy to use. SSAX minimizes the amount of application-specific state that has
16 | to be shared among user-supplied event handlers. SSAX makes the maintenance
17 | of an application-specific element stack unnecessary, which eliminates several
18 | classes of common bugs. SSAX is written in a pure-functional subset of Scheme.
19 | Therefore, the event handlers are referentially transparent, which makes them
20 | easier for a programmer to write and to reason about. The more expressive,
21 | reliable and easier to use application interface for the event-driven XML
22 | parsing is the outcome of implementing the parsing engine as an enhanced tree
23 | fold combinator, which fully captures the control pattern of the depth-first
24 | tree traversal.
25 |
26 | -------------------------------------------------
27 |
28 | Quick start
29 |
30 | ; procedure: ssax:xml->sxml PORT NAMESPACE-PREFIX-ASSIG
31 | ;
32 | ; This is an instance of a SSAX parser that returns an SXML
33 | ; representation of the XML document to be read from PORT.
34 | ; NAMESPACE-PREFIX-ASSIG is a list of (USER-PREFIX . URI-STRING)
35 | ; that assigns USER-PREFIXes to certain namespaces identified by
36 | ; particular URI-STRINGs. It may be an empty list.
37 | ; The procedure returns an SXML tree. The port points out to the
38 | ; first character after the root element.
39 | (define (ssax:xml->sxml port namespace-prefix-assig) ...)
40 |
41 | ; procedure: pre-post-order TREE BINDINGS
42 | ;
43 | ; Traversal of an SXML tree or a grove:
44 | ; a or a
45 | ;
46 | ; A and a are mutually-recursive datatypes that
47 | ; underlie the SXML tree:
48 | ; ::= (name . ) | "text string"
49 | ; An (ordered) set of nodes is just a list of the constituent nodes:
50 | ; ::= ( ...)
51 | ; Nodelists, and Nodes other than text strings are both lists. A
52 | ; however is either an empty list, or a list whose head is
53 | ; not a symbol (an atom in general). A symbol at the head of a node is
54 | ; either an XML name (in which case it's a tag of an XML element), or
55 | ; an administrative name such as '@'.
56 | ; See SXPath.scm and SSAX.scm for more information on SXML.
57 | ;
58 | ;
59 | ; Pre-Post-order traversal of a tree and creation of a new tree:
60 | ; pre-post-order:: x ->
61 | ; where
62 | ; ::= ( ...)
63 | ; ::= ( *preorder* . ) |
64 | ; ( *macro* . ) |
65 | ; ( . ) |
66 | ; ( . )
67 | ; ::= XMLname | *text* | *default*
68 | ; :: x [] ->
69 | ;
70 | ; The pre-post-order function visits the nodes and nodelists
71 | ; pre-post-order (depth-first). For each of the form (name
72 | ; ...) it looks up an association with the given 'name' among
73 | ; its . If failed, pre-post-order tries to locate a
74 | ; *default* binding. It's an error if the latter attempt fails as
75 | ; well. Having found a binding, the pre-post-order function first
76 | ; checks to see if the binding is of the form
77 | ; ( *preorder* . )
78 | ; If it is, the handler is 'applied' to the current node. Otherwise,
79 | ; the pre-post-order function first calls itself recursively for each
80 | ; child of the current node, with prepended to the
81 | ; in effect. The result of these calls is passed to the
82 | ; (along with the head of the current ). To be more
83 | ; precise, the handler is _applied_ to the head of the current node
84 | ; and its processed children. The result of the handler, which should
85 | ; also be a , replaces the current . If the current
86 | ; is a text string or other atom, a special binding with a symbol
87 | ; *text* is looked up.
88 | ;
89 | ; A binding can also be of a form
90 | ; ( *macro* . )
91 | ; This is equivalent to *preorder* described above. However, the result
92 | ; is re-processed again, with the current stylesheet.
93 | ;
94 | (define (pre-post-order tree bindings) ...)
95 |
96 | -------------------------------------------------
97 |
98 | Additional tools included into the package
99 |
100 | 1. "access-remote.ss"
101 | Uniform access to local and remote resources
102 | Resolution for relative URIs in accordance with RFC 2396
103 |
104 | 2. "id.ss"
105 | Creation and manipulation of the ID-index for a faster access to SXML elements
106 | by their unique ID
107 | Provides the DTD parser for extracting ID attribute declarations
108 |
109 | 3. "xlink-parser.ss"
110 | Parser for XML documents that contain XLink elements
111 |
112 | 4. "multi-parser.ss"
113 | SSAX multi parser: combines several specialized parsers into one
114 | Provides creation of parent pointers to SXML document constructed
115 |
--------------------------------------------------------------------------------
/sxml/ssax/errors-and-warnings.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require racket/contract/base)
3 | (provide/contract
4 | [current-sxml-warning-handler
5 | (parameter/c
6 | (->* (symbol? string?) () #:rest list? any))]
7 | [make-warning-handler
8 | (-> (or/c output-port? (-> output-port?))
9 | (->* (symbol? string?) () #:rest list? any))]
10 | [sxml:warn
11 | (->* (symbol? string?) () #:rest list? void?)]
12 | [sxml:warn/concat
13 | (->* (symbol?) () #:rest list? void?)])
14 |
15 | ;; Goal: replace all uses of 'cerr' etc with 'sxml:warn'
16 |
17 | ;; warning-handler adds newline; fmt doesn't need to
18 | (define (make-warning-handler out)
19 | (lambda (who fmt . args)
20 | (let ([out (if (procedure? out) (out) out)])
21 | (apply fprintf out (string-append "~a: " fmt "\n") who args))))
22 |
23 | (define current-sxml-warning-handler
24 | (make-parameter (make-warning-handler current-error-port)))
25 |
26 | (define (sxml:warn who fmt . args)
27 | (apply (current-sxml-warning-handler) who fmt args)
28 | (void))
29 |
30 | (define (sxml:warn/concat who . args)
31 | ((current-sxml-warning-handler)
32 | who
33 | (apply string-append (map (lambda (x) (format "~a" x)) args))))
34 |
--------------------------------------------------------------------------------
/sxml/ssax/id.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require "errors-and-warnings.rkt"
3 | "myenv.ss"
4 | "access-remote.ss"
5 | "sxpathlib.ss")
6 | (provide id:doctype-handler
7 | id:ending-action
8 | id:finish-element-handler
9 | id:make-seed
10 | id:new-level-seed-handler)
11 |
12 | ;; Creation and manipulation of the ID-index
13 | ;; Provides the DTD parser for extracting ID attribute declarations
14 | ;
15 | ; This software is in Public Domain.
16 | ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
17 | ;
18 | ; Please send bug reports and comments to:
19 | ; lizorkin@hotbox.ru Dmitry Lizorkin
20 | ;
21 | ; ID-index provides fast access to XML elements by their unique ID.
22 | ; ID-index has the following structure:
23 | ; id-index = ( (id . element) (id . element) ... )
24 | ; i.e.
25 | ; id-index = (list
26 | ; (cons id element)
27 | ; (cons id element)
28 | ; ...)
29 | ; where
30 | ; id - (a string) element's unique ID
31 | ; element - an SXML presentation of an element
32 | ;
33 | ; Creation of an id-index generally consists of two steps.
34 | ; On the first step, a document declaration (internal and external DTD)
35 | ; is read and information of ID attributes is extracted.
36 | ; This is presented in a following form:
37 | ; id-attrs = ( (elem-name attr-name attr-name attr-name ...)
38 | ; (elem-name attr-name attr-name attr-name ...) ... )
39 | ; i.e.
40 | ; id-attrs = (list
41 | ; (cons
42 | ; elem-name
43 | ; (list attr-name attr-name attr-name ...)
44 | ; (cons
45 | ; elem-name
46 | ; (list attr-name attr-name attr-name ...)
47 | ; ...)
48 | ; where
49 | ; elem-name - (a symbol) a name of the element
50 | ; attr-name - (a symbol) element's attribute having an ID type
51 | ;
52 | ; On the second step, if an SXML presentation of the document is available,
53 | ; 'id-attrs' are used for forming an 'id-index'.
54 | ; If there is no SXML presentation for a document yet, both steps are
55 | ; performed as a single function call - to a specialized SSAX parser.
56 | ; This parser constructs an SXML presentation and an 'id-index'
57 | ; in a single pass
58 | ;
59 | ; ATTENTION:
60 | ; 1. Only non-qualified 'elem-name' and 'attr-name' are correctly supported
61 | ; 2. Parameter entity reference (PEReference) is NOT supported
62 |
63 | ;=========================================================================
64 | ; Functions which read XML document declaration
65 |
66 | ;------------------------------------------------
67 | ; Trivial functions that ignore symbols
68 |
69 | ; Function reads a whitespace (S production)
70 | (define (id:process-s port)
71 | (let ((symb (peek-char port)))
72 | (cond((eof-object? symb) symb)
73 | ((char=? symb #\space) (read-char port)
74 | (id:process-s port))
75 | ((char=? symb #\return) (read-char port)
76 | (id:process-s port))
77 | ((char=? symb #\newline)(read-char port)
78 | (id:process-s port))
79 | ((char=? symb #\tab)(read-char port)
80 | (id:process-s port))
81 | (else symb))))
82 |
83 |
84 | ; Ignores all symbols until template-symbol
85 | (define (id:ignore-until templ-sym port)
86 | (let loop ((symb (peek-char port)))
87 | (cond((eof-object? symb) symb)
88 | ((equal? symb templ-sym) (read-char port)
89 | symb)
90 | (else (read-char port)
91 | (loop (peek-char port))))))
92 |
93 |
94 | ;------------------------------------------------
95 | ; These functions perform reading from a file
96 |
97 | ; Read N symbols from a port
98 | ;; If fewer than N are available before EOF, return eof
99 | (define (id:read-n num port)
100 | (id:process-s port)
101 | (let ([result (read-string num port)])
102 | (cond [(string? result)
103 | (if (< (string-length result) num) eof result)]
104 | [else eof])))
105 |
106 | ; This function reads a name - a sequence of characters ending with
107 | ; a whitespace or '<'. '>', '(', ')', '[', ']', '|'
108 | ;; ryanc: FIXME: use buffer from other file? (next-token??)
109 | (define (id:read-name port)
110 | (id:process-s port)
111 | (let ([out (open-output-string)])
112 | (let loop ()
113 | (let ([symb (peek-char port)])
114 | (cond [(or (eof-object? symb)
115 | (member symb '(#\space #\tab #\return #\newline
116 | #\< #\> #\( #\) #\[ #\] #\|)))
117 | (get-output-string out)]
118 | [else
119 | (write-char (read-char port) out)
120 | (loop)])))))
121 |
122 | ; This function reads a literal
123 | ; literal ::= ('"' [^"]* '"') | ("'" [^']* "'")
124 | ; A string is returned
125 | (define (id:process-literal port)
126 | (id:process-s port)
127 | (let ([quot (read-char port)]
128 | [out (open-output-string)])
129 | ;; ryanc: FIXME, check is either ' or "
130 | (let loop ()
131 | (let ([next (read-char port)])
132 | (cond [(or (eof-object? next)
133 | (eqv? next quot))
134 | (get-output-string out)]
135 | [else
136 | (write-char next out)
137 | (loop)])))))
138 |
139 | ;------------------------------------------------
140 | ; Miscellaneous
141 |
142 | ; Converts a string into small letters
143 | (define (id:to-small str) (string-downcase str))
144 |
145 | ; Takes an 'id-attrs' which can contain equal element names
146 | ; Returns a new 'id-attrs' where all element names are unique
147 | (define (id:unite-id-attrs id-attrs)
148 | (let loop ((id-attrs id-attrs)
149 | (new '()))
150 | (if (null? id-attrs)
151 | new
152 | (let rpt ((elem-name (caar id-attrs))
153 | (atts (cdar id-attrs))
154 | (rest (cdr id-attrs))
155 | (id-attrs '()))
156 | (cond
157 | ((null? rest)
158 | (loop id-attrs (cons (cons elem-name atts) new)))
159 | ((equal? (caar rest) elem-name)
160 | (rpt elem-name
161 | (append atts (cdar rest))
162 | (cdr rest)
163 | id-attrs))
164 | (else
165 | (rpt elem-name atts (cdr rest) (cons (car rest) id-attrs))))))))
166 |
167 |
168 | ;------------------------------------------------
169 | ; Parsing XML productions concerning document declaration
170 | ; These functions are not intendes for error detection, they assume that
171 | ; the document is correct
172 |
173 | ; This function ignores information related to a PI production [16]
174 | ; [16] PI ::= '' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
175 | ; It looks for an ending '?>' template
176 | (define (id:ignore-PI port)
177 | (id:ignore-until #\? port)
178 | (let ((symb (peek-char port)))
179 | (cond((eof-object? symb) symb)
180 | ((equal? symb #\>) (read-char port)
181 | symb)
182 | (else (id:ignore-PI port)))))
183 |
184 |
185 | ; This function ignores information related to a Comment production [15]
186 | ; [15] Comment ::= ''
187 | ; The starting '' template
189 | (define (id:ignore-comment port)
190 | (read-char port) ; it is '-'
191 | (read-char port) ; it is '-'
192 | (id:ignore-until #\- port)
193 | (let((sym1 (peek-char port)))
194 | (cond((eof-object? sym1) sym1)
195 | ((char=? sym1 #\-) (read-char port)
196 | (let((sym2 (read-char port))) ; must be '>'
197 | sym2))
198 | (else (id:ignore-comment port)))))
199 |
200 |
201 | ; This function processes AttType production ([54]-[59] in XML specification)
202 | ; [54] AttType ::= StringType | TokenizedType | EnumeratedType
203 | ; [55] StringType ::= 'CDATA'
204 | ; [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
205 | ; | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
206 | ; [57] EnumeratedType ::= NotationType | Enumeration
207 | ; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
208 | ; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
209 | ; The function returnd #t if the attribute has an ID type and #f otherwise
210 | (define (id:AttType-ID? port)
211 | (let((type (id:to-small (id:read-name port))))
212 | (cond((string=? type "id") #t)
213 | ((string=? type "notation")
214 | (id:process-s port)
215 | (read-char port) ; it is #\(
216 | (id:ignore-until #\) port)
217 | #f)
218 | ((and (string=? type "") (char=? (peek-char port) #\()) ; see [59]
219 | (id:ignore-until #\) port)
220 | #f)
221 | (else #f))))
222 |
223 |
224 | ; This function processes DefaultDecl production ([60] in XML specification)
225 | ; [60] DefaultDecl ::= '#REQUIRED'
226 | ; | '#IMPLIED'
227 | ; | (('#FIXED' S)? AttValue)
228 | ; The result is always #t
229 | (define (id:process-DefaultDecl port)
230 | (let((type (id:to-small (id:read-name port))))
231 | (cond((string=? type "#fixed")
232 | (id:read-name port) ; reads a default value
233 | #t)
234 | (else #t))))
235 |
236 |
237 | ; This function processes AttDef production ([53] in XML specification)
238 | ; [53] AttDef ::= S Name S AttType S DefaultDecl
239 | ; If an attribute has an ID type, (list attribule-name) is returned
240 | ; (a list of one element). Otherwise, function returns an empty list
241 | (define (id:process-AttDef port)
242 | (let((att-name (string->symbol (id:read-name port))))
243 | (let((bool (id:AttType-ID? port)))
244 | (id:process-DefaultDecl port)
245 | (if bool (list att-name) '()))))
246 |
247 |
248 | ; The function processes AttlistDecl production ([52] in XML specification)
249 | ; [52] AttlistDecl ::= ''
250 | ; The starting 'symbol (id:read-name port))))
254 | (let loop ((atts '()))
255 | (id:process-s port)
256 | (cond((char=? (peek-char port) #\>) ; no more attributes will be declared
257 | (read-char port)
258 | (if(null? atts)
259 | '()
260 | (list (cons element-name atts))))
261 | (else
262 | (loop (append (id:process-AttDef port) atts)))))))
263 |
264 |
265 | ; This function processes a multiple markupdecl production [29]
266 | ; [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl
267 | ; | NotationDecl | PI | Comment
268 | ; 'id-attrs' are returned as a result
269 | (define (id:process-markupdecl* port)
270 | (let loop ((id-attrs '()))
271 | (let((beg (id:read-n 2 port)))
272 | (cond((eof-object? beg) id-attrs) ; the file is over
273 | ((string=? beg "]>") id-attrs) ; the end of the markupdecl
274 | ((string=? beg "") ; processing instruction
275 | (id:ignore-PI port)
276 | (loop id-attrs))
277 | ((and (string=? beg " port)
286 | (loop id-attrs)))))
287 | (else ; an error condition
288 | (sxml:warn 'id:process-markupdecl
289 | "error in markupdecl production: unexpected ~a" beg)
290 | (id:ignore-until #\> port)
291 | id-attrs)))))
292 |
293 |
294 | ; This function processes a doctypedecl production ([75] in XML specification)
295 | ; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
296 | ; | 'PUBLIC' S PubidLiteral S SystemLiteral
297 | ; The function ignores a PubidLiteral
298 | ; 'id-attrs' are returned as a result
299 | (define (id:process-ExternalID port)
300 | (let((system-literal
301 | (let((name (id:to-small (id:read-name port))))
302 | (cond
303 | ((string=? name "system")
304 | (id:process-literal port))
305 | ((string=? name "public")
306 | (id:process-literal port)
307 | (id:process-literal port))
308 | (else #f)))))
309 | (if(not system-literal)
310 | '() ; an incorrect situation
311 | (let((external-port (open-input-resource system-literal)))
312 | (if(not external-port)
313 | '() ; a failure
314 | (let((id-attrs (id:process-markupdecl* external-port)))
315 | (close-input-port external-port)
316 | id-attrs))))))
317 |
318 |
319 | ; This function processes a doctypedecl production ([28] in XML specification)
320 | ; [28] doctypedecl ::= ''
322 | ; The function doesn't process a DeclSep (this is a PEReference which
323 | ; this programme doesn't support)
324 | ; The starting ''
352 | ; [27] Misc ::= Comment | PI | S
353 | ; 'id-attrs' are returned as a result
354 | (define (id:process-prolog port)
355 | (let((beg (id:read-n 2 port)))
356 | (cond((eof-object? beg) '()) ; a file is over - strange...
357 | ((string=? beg "") ; PI or XMLDecl
358 | (id:ignore-PI port)
359 | (id:process-prolog port))
360 | ((and (string=? beg "SXML+id document id-attrs)
420 | (let((aux-subtrees
421 | (let((aux ((select-kids (ntype?? '@@)) document)))
422 | (if(null? aux)
423 | '()
424 | (let rpt ((res '())
425 | (to-see (cdar aux)))
426 | (cond
427 | ((null? to-see) (reverse res))
428 | ((equal? (caar to-see) 'id-index) (rpt res (cdr to-see)))
429 | (else (rpt (cons (car to-see) res)
430 | (cdr to-see)))))))))
431 | (let loop ((nodeset (list document))
432 | (id-index '()))
433 | (if(null? nodeset)
434 | (let((kids ((select-kids
435 | (lambda (node)
436 | (not (and (pair? node) (equal? (car node) '@@)))))
437 | document)))
438 | (cons* '*TOP*
439 | (cons* '@@
440 | (cons 'id-index id-index)
441 | aux-subtrees)
442 | kids))
443 | (let((cur-node (car nodeset)))
444 | (cond
445 | ((not (pair? cur-node)) ; a text node
446 | (loop (cdr nodeset) id-index))
447 | ((assoc (car cur-node) id-attrs)
448 | =>
449 | (lambda (lst)
450 | (let((id-values
451 | ((select-kids (lambda (x) #t))
452 | ((sxml:filter (lambda (x) (member (car x) (cdr lst))))
453 | ((select-kids (lambda (x) #t))
454 | ((select-kids (ntype?? '@)) cur-node))))))
455 | (loop
456 | (append
457 | ((select-kids (ntype?? '*)) (car nodeset))
458 | (cdr nodeset))
459 | (append
460 | id-index
461 | (map
462 | (lambda (x) (cons x cur-node))
463 | id-values))))))
464 | (else
465 | (loop
466 | (append ((select-kids (ntype?? '*)) (car nodeset)) (cdr nodeset))
467 | id-index))))))))
468 |
469 |
470 |
471 | ;=========================================================================
472 | ; Some stuff for a SSAX multi parser
473 |
474 | ;------------------------------------------------
475 | ; Id-related part of the seed
476 | ; id:seed = (list id-attrs id-index)
477 | ; id-attrs, id-index - see a head comment
478 |
479 | ; Mutator
480 | (define (id:make-seed id-attrs id-index)
481 | (list id-attrs id-index))
482 |
483 |
484 | ; Accessors
485 | (define (id:seed-attrs id:seed)
486 | (car id:seed))
487 |
488 | (define (id:seed-index id:seed)
489 | (cadr id:seed))
490 |
491 |
492 | ;------------------------------------------------
493 | ; Handler units
494 |
495 | ; This function is called by the NEW-LEVEL-SEED handler
496 | ; A new 'id:seed' is returned
497 | (define (id:new-level-seed-handler id:seed)
498 | id:seed)
499 |
500 |
501 | ; This function is called by the FINISH-ELEMENT handler
502 | ; A new 'id:seed' is returned
503 | (define (id:finish-element-handler elem-gi attributes id:seed element)
504 | (cond
505 | ((assoc elem-gi (id:seed-attrs id:seed))
506 | =>
507 | (lambda (lst)
508 | (let loop ((atts attributes)
509 | (id-index (id:seed-index id:seed)))
510 | (if
511 | (null? atts)
512 | (id:make-seed (id:seed-attrs id:seed) id-index)
513 | (let((att (car atts)))
514 | (cond
515 | ((pair? (car att)) ; namespace aware
516 | (loop (cdr atts) id-index))
517 | ((member (car att) (cdr lst))
518 | (loop (cdr atts)
519 | (cons (cons (cdr att) element) id-index)))
520 | (else
521 | (loop (cdr atts) id-index))))))))
522 | (else
523 | id:seed)))
524 |
525 |
526 | ; This function is called by the DOCTYPE handler
527 | ; A new 'id:seed' is returned
528 | (define (id:doctype-handler port systemid internal-subset?)
529 | (let((id-attrs
530 | (if
531 | (not systemid)
532 | '() ; systemid not supplied
533 | (let((external-port (open-input-resource systemid)))
534 | (if
535 | (not external-port)
536 | '() ; a failure
537 | (let((id-attrs (id:process-markupdecl* external-port)))
538 | (close-input-port external-port)
539 | id-attrs))))))
540 | (let((id-attrs
541 | (if
542 | internal-subset?
543 | (id:unite-id-attrs
544 | (append id-attrs (id:process-markupdecl* port)))
545 | (id:unite-id-attrs id-attrs))))
546 | (id:make-seed id-attrs '()))))
547 |
548 |
549 | ; This function constructs the member of an axuiliary list
550 | (define (id:ending-action id:seed)
551 | (let((id-index (id:seed-index id:seed)))
552 | (cons 'id-index id-index)))
553 |
--------------------------------------------------------------------------------
/sxml/ssax/info.rkt:
--------------------------------------------------------------------------------
1 | (module info (lib "infotab.ss" "setup")
2 | (define name "ssax")
3 | (define blurb
4 | (list "SSAX functional XML parsing framework "
5 | "to inter-convert between an angular-bracket and "
6 | "an S-expression-based notations for markup documents"))
7 | (define primary-file "ssax.ss")
8 | (define doc.txt "doc.txt")
9 | (define categories '(xml))
10 | )
11 |
--------------------------------------------------------------------------------
/sxml/ssax/input-parse.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require racket/port
3 | "myenv.ss"
4 | "parse-error.ss")
5 | (provide (all-defined-out))
6 |
7 | ;****************************************************************************
8 | ; Simple Parsing of input
9 | ;
10 | ; The following simple functions surprisingly often suffice to parse
11 | ; an input stream. They either skip, or build and return tokens,
12 | ; according to inclusion or delimiting semantics. The list of
13 | ; characters to expect, include, or to break at may vary from one
14 | ; invocation of a function to another. This allows the functions to
15 | ; easily parse even context-sensitive languages.
16 | ;
17 | ; EOF is generally frowned on, and thrown up upon if encountered.
18 | ; Exceptions are mentioned specifically. The list of expected characters
19 | ; (characters to skip until, or break-characters) may include an EOF
20 | ; "character", which is to be coded as symbol *eof*
21 | ;
22 | ; The input stream to parse is specified as a PORT, which is usually
23 | ; the last (and optional) argument. It defaults to the current input
24 | ; port if omitted.
25 | ;
26 | ; IMPORT
27 | ; This package relies on a function parser-error, which must be defined
28 | ; by a user of the package. The function has the following signature:
29 | ; parser-error PORT MESSAGE SPECIALISING-MSG*
30 | ; Many procedures of this package call parser-error to report a parsing
31 | ; error. The first argument is a port, which typically points to the
32 | ; offending character or its neighborhood. Most of the Scheme systems
33 | ; let the user query a PORT for the current position. MESSAGE is the
34 | ; description of the error. Other arguments supply more details about
35 | ; the problem.
36 | ;
37 | ; $Id: input-parse.scm,v 1.1.1.1 2001/07/11 19:33:43 oleg Exp $
38 |
39 | ;------------------------------------------------------------------------
40 | ; Preparation and tuning section
41 |
42 | ; This package is heavily used. Therefore, we take time to tune it in,
43 | ; in particular for Gambit.
44 |
45 | ;------------------------------------------------------------------------
46 |
47 | ; -- procedure+: peek-next-char PORT
48 | ; advances to the next character in the PORT and peeks at it.
49 | ; This function is useful when parsing LR(1)-type languages
50 | ; (one-char-read-ahead).
51 |
52 | (define (peek-next-char port)
53 | (read-char port)
54 | (peek-char port))
55 |
56 |
57 | ;------------------------------------------------------------------------
58 |
59 | ; -- procedure+: assert-curr-char CHAR-LIST STRING PORT
60 | ; Reads a character from the PORT and looks it up
61 | ; in the CHAR-LIST of expected characters
62 | ; If the read character was found among expected, it is returned
63 | ; Otherwise, the procedure writes a nasty message using STRING
64 | ; as a comment, and quits.
65 |
66 | (define (assert-curr-char expected-chars comment port)
67 | (let ((c (read-char port)))
68 | (if (memq c expected-chars) c
69 | (parser-error port "Wrong character " c
70 | " (0x" (if (eof-object? c) "*eof*"
71 | (number->string (char->integer c) 16)) ") "
72 | comment ". " expected-chars " expected"))))
73 |
74 |
75 | ; -- procedure+: skip-until CHAR PORT
76 | ; Reads and skips characters from the PORT until the break
77 | ; character is encountered. This break character is returned.
78 |
79 | (define (skip-until-char stop-char port)
80 | (let loop ((c (read-char port)))
81 | (cond
82 | ((eqv? c stop-char) c)
83 | ((eof-object? c)
84 | (parser-error port "Unexpected EOF while skipping until " stop-char))
85 | (else (loop (read-char port))))))
86 |
87 | ; -- procedure+: skip-while CHAR-LIST PORT
88 | ; Reads characters from the PORT and disregards them,
89 | ; as long as they are mentioned in the CHAR-LIST.
90 | ; The first character (which may be EOF) peeked from the stream
91 | ; that is NOT a member of the CHAR-LIST is returned. This character
92 | ; is left on the stream.
93 |
94 | (define (skip-while skip-chars port)
95 | (do ((c (peek-char port) (peek-char port)))
96 | ((not (memv c skip-chars)) c)
97 | (read-char port)))
98 |
99 | ; whitespace const
100 |
101 | ;------------------------------------------------------------------------
102 | ; Stream tokenizers
103 |
104 |
105 | ; -- procedure+:
106 | ; next-token PREFIX-CHAR-LIST BREAK-CHAR-LIST [COMMENT-STRING] [PORT]
107 | ; skips any number of the prefix characters (members of the
108 | ; PREFIX-CHAR-LIST), if any, and reads the sequence of characters
109 | ; up to (but not including) a break character, one of the
110 | ; BREAK-CHAR-LIST.
111 | ; The string of characters thus read is returned.
112 | ; The break character is left on the input stream
113 | ; The list of break characters may include EOF, which is to be coded as
114 | ; a symbol *eof*. Otherwise, EOF is fatal, generating an error message
115 | ; including a specified COMMENT-STRING (if any)
116 | ;
117 | ; Note: since we can't tell offhand how large the token being read is
118 | ; going to be, we make a guess, pre-allocate a string, and grow it by
119 | ; quanta if necessary. The quantum is always the length of the string
120 | ; before it was extended the last time. Thus the algorithm does
121 | ; a Fibonacci-type extension, which has been proven optimal.
122 | ; Note, explicit port specification in read-char, peek-char helps.
123 |
124 | ; Procedure input-parse:init-buffer
125 | ; returns an initial buffer for next-token* procedures.
126 | ; The input-parse:init-buffer may allocate a new buffer per each invocation:
127 | ; (define (input-parse:init-buffer) (make-string 32))
128 | ; Size 32 turns out to be fairly good, on average.
129 | ; That policy is good only when a Scheme system is multi-threaded with
130 | ; preemptive scheduling, or when a Scheme system supports shared substrings.
131 | ; In all the other cases, it's better for input-parse:init-buffer to
132 | ; return the same static buffer. next-token* functions return a copy
133 | ; (a substring) of accumulated data, so the same buffer can be reused.
134 | ; We shouldn't worry about new token being too large: next-token will use
135 | ; a larger buffer automatically. Still, the best size for the static buffer
136 | ; is to allow most of the tokens to fit in.
137 | ; Using a static buffer _dramatically_ reduces the amount of produced garbage
138 | ; (e.g., during XML parsing).
139 | ;; ryanc: Unfortunately, single static buffer not safe in Racket
140 | ;; FIXME: tune size, see if thread-cell cache is worth it
141 | (define input-parse:init-buffer
142 | (let ([buffers (make-thread-cell #f)])
143 | (lambda ()
144 | (let ([buffer (thread-cell-ref buffers)])
145 | (or buffer
146 | (let ([buffer (make-string 512)])
147 | (thread-cell-set! buffers buffer)
148 | buffer))))))
149 |
150 | (define (next-token prefix-skipped-chars break-chars comment port)
151 | (let* ((buffer (input-parse:init-buffer))
152 | (curr-buf-len (string-length buffer)) (quantum 16))
153 | (let loop ((i 0) (c (skip-while prefix-skipped-chars port)))
154 | (cond
155 | ((memq c break-chars) (substring buffer 0 i))
156 | ((eof-object? c)
157 | (if (memq '*eof* break-chars)
158 | (substring buffer 0 i) ; was EOF expected?
159 | (parser-error port "EOF while reading a token " comment)))
160 | (else
161 | (when (>= i curr-buf-len) ; make space for i-th char in buffer
162 | (begin ; -> grow the buffer by the quantum
163 | (set! buffer (string-append buffer (make-string quantum)))
164 | (set! quantum curr-buf-len)
165 | (set! curr-buf-len (string-length buffer))))
166 | (string-set! buffer i c)
167 | (read-char port) ; move to the next char
168 | (loop (++ i) (peek-char port))
169 | )))))
170 |
171 |
172 | ; -- procedure+: next-token-of INC-CHARSET PORT
173 | ; Reads characters from the PORT that belong to the list of characters
174 | ; INC-CHARSET. The reading stops at the first character which is not
175 | ; a member of the set. This character is left on the stream.
176 | ; All the read characters are returned in a string.
177 | ;
178 | ; -- procedure+: next-token-of PRED PORT
179 | ; Reads characters from the PORT for which PRED (a procedure of one
180 | ; argument) returns non-#f. The reading stops at the first character
181 | ; for which PRED returns #f. That character is left on the stream.
182 | ; All the results of evaluating of PRED up to #f are returned in a
183 | ; string.
184 | ;
185 | ; PRED is a procedure that takes one argument (a character
186 | ; or the EOF object) and returns a character or #f. The returned
187 | ; character does not have to be the same as the input argument
188 | ; to the PRED. For example,
189 | ; (next-token-of (lambda (c)
190 | ; (cond ((eof-object? c) #f)
191 | ; ((char-alphabetic? c) (char-downcase c))
192 | ; (else #f))))
193 | ; will try to read an alphabetic token from the current
194 | ; input port, and return it in lower case.
195 | ;
196 | ; Note: since we can't tell offhand how large the token being read is
197 | ; going to be, we make a guess, pre-allocate a string, and grow it by
198 | ; quanta if necessary. The quantum is always the length of the string
199 | ; before it was extended the last time. Thus the algorithm does
200 | ; a Fibonacci-type extension, which has been proven optimal.
201 | ;
202 | ; This procedure is similar to next-token but only it implements
203 | ; an inclusion rather than delimiting semantics.
204 |
205 | (define (next-token-of incl-list/pred port)
206 | (let* ((buffer (input-parse:init-buffer))
207 | (curr-buf-len (string-length buffer))
208 | (quantum 16))
209 | (if (procedure? incl-list/pred)
210 | (let loop ((i 0) (c (peek-char port)))
211 | (cond
212 | ((incl-list/pred c) =>
213 | (lambda (c)
214 | (when (>= i curr-buf-len) ; make space for i-th char in buffer
215 | (begin ; -> grow the buffer by the quantum
216 | (set! buffer (string-append buffer (make-string quantum)))
217 | (set! quantum curr-buf-len)
218 | (set! curr-buf-len (string-length buffer))))
219 | (string-set! buffer i c)
220 | (read-char port) ; move to the next char
221 | (loop (++ i) (peek-char port))))
222 | (else (substring buffer 0 i))))
223 | ; incl-list/pred is a list of allowed characters
224 | (let loop ((i 0) (c (peek-char port)))
225 | (cond
226 | ((not (memq c incl-list/pred)) (substring buffer 0 i))
227 | (else
228 | (when (>= i curr-buf-len) ; make space for i-th char in buffer
229 | (begin ; -> grow the buffer by the quantum
230 | (set! buffer (string-append buffer (make-string quantum)))
231 | (set! quantum curr-buf-len)
232 | (set! curr-buf-len (string-length buffer))))
233 | (string-set! buffer i c)
234 | (read-char port) ; move to the next char
235 | (loop (++ i) (peek-char port))
236 | ))))))
237 |
238 | ;; ============================================================
239 |
240 | ; -- Function: find-string-from-port? STR IN-PORT
241 | ; Looks for a string STR within the input port IN-PORT
242 | ; When the STR is found, the function returns the number of
243 | ; characters it has read from the port, and the port is set
244 | ; to read the first char after that (that is, after the STR)
245 | ; The function returns #f when the string wasn't found
246 | ; Note the function reads the port *STRICTLY* sequentially, and does not
247 | ; perform any buffering. So the function can be used even if the port is open
248 | ; on a pipe or other communication channel.
249 |
250 | (define (find-string-from-port? str input-port)
251 | (let ([output-counting-port (open-output-nowhere)])
252 | ;; Could just use file-position, but that returns byte count, not char count
253 | ;; Could use output string port, but don't need contents (memory use)
254 | (port-count-lines-enabled output-counting-port)
255 | (let ([result
256 | (regexp-match? (regexp-quote str)
257 | input-port
258 | 0 #f
259 | output-counting-port)])
260 | (and result
261 | (let-values ([(_line _col pos) (port-next-location output-counting-port)])
262 | (+ (string-length str)
263 | (sub1 pos)))))))
264 |
265 | ;; ============================================================
266 |
267 | ; Character-encoding
268 | ;
269 | ; This module deals with particular character-encoding issues such as
270 | ; conversions between characters and their ASCII or UCS2 codes, Scheme
271 | ; representations of "Carriage Return" (CR), "tabulation" (TAB) and
272 | ; other control characters.
273 | ;
274 | ; This module by necessity is platform-specific as character encoding
275 | ; issues are hardly addressed in R5RS. For example, the result of
276 | ; char->integer is generally not an ASCII code of an integer (although
277 | ; it is, on many Scheme systems, with the important exception of
278 | ; Scheme48 and SCSH). The level of support for character sets other
279 | ; than ASCII varies widely among Scheme systems.
280 | ;
281 | ; This file collects various character-encoding functions that are
282 | ; necessary for the SSAX XML parser. The functions are of general use
283 | ; and scope.
284 | ;
285 | ; $Id: char-encoding.scm,v 1.1 2003/04/09 20:34:28 oleg Exp $
286 |
287 |
288 | ; ascii->char INT -> CHAR
289 | ; return a character whose ASCII code is INT
290 | ; Note, because ascii->char is injective (there are more characters than
291 | ; ASCII characters), the inverse transformation is not defined.
292 | (define ascii->char integer->char)
293 |
294 |
295 | ; ucscode->char INT -> CHAR
296 | ; Return a character whose UCS (ISO/IEC 10646) code is INT
297 | ; Note
298 | ; This function is required for processing of XML character entities:
299 | ; According to Section "4.1 Character and Entity References"
300 | ; of the XML Recommendation:
301 | ; "[Definition: A character reference refers to a specific character
302 | ; in the ISO/IEC 10646 character set, for example one not directly
303 | ; accessible from available input devices.]"
304 |
305 | (define (ucscode->char code)
306 | (integer->char code))
307 |
308 | ; Commonly used control characters
309 |
310 | (define char-return (ascii->char 13))
311 | (define char-tab (ascii->char 9))
312 | (define char-newline (ascii->char 10)) ; a.k.a. #\newline, per R5RS
313 | (define char-space (ascii->char 32))
314 |
--------------------------------------------------------------------------------
/sxml/ssax/multi-parser.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require "myenv.ss"
3 | srfi/13/string
4 | "parse-error.ss"
5 | "SSAX-code.ss"
6 | "ssax-prim.ss"
7 | "id.ss"
8 | "xlink-parser.ss")
9 | (provide parent:new-level-seed-handler
10 | parent:construct-element
11 | ssax:multi-parser)
12 |
13 | ;; SSAX multi parser
14 | ;; Provides ID-index creation, SXML parent pointers and XLink grammar parsing
15 | ;
16 | ; This software is in Public Domain.
17 | ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
18 | ;
19 | ; Please send bug reports and comments to:
20 | ; lisovsky@acm.org Kirill Lisovsky
21 | ; lizorkin@hotbox.ru Dmitry Lizorkin
22 | ;
23 | ; Primary features:
24 | ; '()
25 | ; '(parent)
26 | ; '(id)
27 | ; '(parent id)
28 | ; '(id xlink)
29 | ; '(parent id xlink)
30 |
31 | ;=========================================================================
32 | ; Parent seed
33 |
34 | ;------------------------------------------------
35 | ; Parent-related part of the seed
36 | ; It is a list of one element:
37 | ; a function of no arguments which returns a pointer to element's parent
38 | ; or '*TOP-PTR* symbol for a root SXML element
39 | ; Duuring an element construction it may be just a pointer to parents head,
40 | ; because a parent itself may be under construction at the moment.
41 |
42 | ; This function is called by the NEW-LEVEL-SEED handler
43 | ; elem-name = (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)
44 | ; A new 'parent:seed' is returned
45 | (define (parent:new-level-seed-handler elem-name)
46 | (let
47 | ((head (list elem-name)))
48 | (list (lambda () head))))
49 |
50 | ; A function which constructs an element from its attributes, children
51 | ; and delayed parent information
52 | ; parent:seed - contains a delayed pointer to element's parent
53 | ; attrs - element's attributes
54 | ; children - a list of child elements
55 | (define (parent:construct-element parent:parent-seed parent:seed
56 | attrs children)
57 | (let ((head ((car parent:seed))))
58 | (append head
59 | (list (cons '@ attrs))
60 | children)))
61 |
62 | ;=========================================================================
63 | ; A seed
64 | ; seed = (list original-seed parent:seed id:seed xlink:seed)
65 | ; original-seed - the seed of the original 'SSAX:XML->SXML' function. It
66 | ; contains an SXML tree being constructed.
67 | ; parent:seed - parent-related part
68 | ; id:seed - id-related part
69 | ; xlink:seed - xlink-related part
70 |
71 | ;------------------------------------------------------------------------------
72 | ; Accessors
73 |
74 | ; (mul:seed-original seed)
75 | (define get-sxml-seed car)
76 |
77 | ; Renamed:
78 | ; mul:seed-parent get-pptr-seed
79 | ; mul:seed-id get-id-seed
80 | ; mul:seed-xlink get-xlink-seed
81 | ; Handler for attempts to access an absent seed.
82 | (define (bad-accessor type)
83 | (lambda args
84 | (error 'ssax:multi-parser "bad accessor; called with ~a"
85 | (string-join (map (lambda (x) (format "~e" x)) args) " "))))
86 |
87 | ; Seed constructor. #f seeds will be omitted.
88 | (define (make-seed . seeds)
89 | (let rpt
90 | ((s (cdr seeds)) (rzt (list (car seeds))))
91 | (cond
92 | ((null? s) (reverse rzt))
93 | ((car s) (rpt (cdr s)
94 | (cons (car s) rzt)))
95 | (else (rpt (cdr s) rzt)))))
96 |
97 | ;=========================================================================
98 | ; This is a multi parser constructor function
99 |
100 | ; parent, id, xlink - boolean parameters. #t means that we construct the
101 | ; corresponding feature, #f - otherwise
102 | ; ns - for future development. Is not used anywhere in the function
103 | (define (ssax:multi-parser . req-features)
104 | (let ((ns-assig '())
105 | (with-parent? (memq 'parent req-features))
106 | (with-id? (memq 'id req-features))
107 | (with-xlink? (memq 'xlink req-features)))
108 | (call-with-values
109 | (lambda () (values
110 | (if with-parent?
111 | cadr (bad-accessor 'par))
112 | (if with-id?
113 | (if with-parent? caddr cadr)
114 | (bad-accessor 'id))
115 | (if with-xlink?
116 | (cond
117 | ((and with-parent? with-id?)
118 | cadddr)
119 | ((or with-parent? with-id?)
120 | caddr)
121 | (else cadr))
122 | (bad-accessor 'xlink))))
123 | (lambda (get-pptr-seed get-id-seed get-xlink-seed)
124 | (let ((initial-seed ; Initial values for specialized seeds
125 | (make-seed
126 | '()
127 | (and with-parent? (list '*TOP-PTR*))
128 | (and with-id? (id:make-seed '() '()))
129 | (and with-xlink?
130 | (xlink:make-small-seed 'general '() '(1) '())))))
131 | (letrec
132 | (
133 | ; Making a special function, which, if applyed to the final seed,
134 | ; will construct a document
135 | (ending-actions
136 | (cond
137 | ((not (or with-id? with-xlink?))
138 | (lambda (seed)
139 | (let ((result (reverse (get-sxml-seed seed))))
140 | (cons '*TOP* result))))
141 | ((and with-id? (not with-xlink?)) ; with-id?
142 | (lambda (seed)
143 | (let((result (reverse (get-sxml-seed seed)))
144 | (aux (list (id:ending-action (get-id-seed seed)))))
145 | (cons* '*TOP*
146 | (cons '@@ aux)
147 | result))))
148 | ((and with-id? with-xlink?) ; with-id, with-xlink
149 | (lambda (seed)
150 | (let((result (reverse (get-sxml-seed seed)))
151 | (aux (list (xlink:ending-action (get-xlink-seed seed))
152 | (id:ending-action (get-id-seed seed)))))
153 | (cons* '*TOP*
154 | (cons '@@ aux)
155 | result))))
156 | (else
157 | (error 'ssax:multi-parser
158 | "ending-actions NIY: ~a ~a ~a"
159 | with-parent? with-id? with-xlink?))))
160 |
161 |
162 | ;------------------------------------
163 | ; Some handlers
164 |
165 | ; A special function
166 | ; When given an input port, it becomes a handler for a NEW-LEVEL-SEED
167 | (new-level-seed-handler
168 | (cond
169 | ((not (or with-parent? with-id? with-xlink?))
170 | (lambda(port)
171 | (lambda (elem-gi attributes namespaces expected-content seed)
172 | (list '()))))
173 | ((and with-parent? (not (or with-id? with-xlink?))) ; with-parent
174 | (lambda(port)
175 | (lambda (elem-gi attributes namespaces expected-content seed)
176 | (make-seed
177 | '()
178 | (and with-parent?
179 | (parent:new-level-seed-handler
180 | (if (symbol? elem-gi)
181 | elem-gi
182 | (RES-NAME->SXML elem-gi))))
183 | ))))
184 | ((and with-id? (not (or with-parent? with-xlink?))) ; with-id
185 | (lambda(port)
186 | (lambda (elem-gi attributes namespaces expected-content seed)
187 | (list ; make-seed
188 | '()
189 | (id:new-level-seed-handler (get-id-seed seed))))))
190 | ((and with-parent? with-id? (not with-xlink?)) ; parent, id
191 | (lambda(port)
192 | (lambda (elem-gi attributes namespaces expected-content seed)
193 | (list ; make-seed
194 | '()
195 | (parent:new-level-seed-handler
196 | (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)))
197 | (id:new-level-seed-handler (get-id-seed seed))))))
198 | ((and with-id? with-xlink? (not with-parent?)) ; id, xlink
199 | (lambda(port)
200 | (lambda (elem-gi attributes namespaces expected-content seed)
201 | (list ; make-seed
202 | '()
203 | (id:new-level-seed-handler (get-id-seed seed))
204 | (xlink:new-level-seed-handler
205 | port attributes namespaces (get-xlink-seed seed))))))
206 | ((and with-parent? with-id? with-xlink?) ; parent, id, xlink
207 | (lambda(port)
208 | (lambda (elem-gi attributes namespaces expected-content seed)
209 | (list ; make-seed
210 | '()
211 | (parent:new-level-seed-handler
212 | (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)))
213 | (id:new-level-seed-handler (get-id-seed seed))
214 | (xlink:new-level-seed-handler
215 | port attributes namespaces (get-xlink-seed seed))))))
216 | (else
217 | (error 'ssax:multi-parser
218 | "new-level NIY: ~s ~s ~s"
219 | with-parent? with-id? with-xlink?))))
220 |
221 |
222 | ; A special handler function for a FINISH-ELEMENT
223 | (finish-element-handler
224 | (cond
225 | ((not (or with-parent? with-id? with-xlink?))
226 | (lambda (elem-gi attributes namespaces parent-seed seed)
227 | (let ((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
228 | (attrs
229 | (attlist-fold
230 | (lambda (attr accum)
231 | (cons (list
232 | (if (symbol? (car attr)) (car attr)
233 | (RES-NAME->SXML (car attr)))
234 | (cdr attr)) accum))
235 | '() attributes)))
236 | (list ; make-seed
237 | (cons
238 | (cons
239 | (if (symbol? elem-gi) elem-gi
240 | (RES-NAME->SXML elem-gi))
241 | (if (null? attrs) children
242 | (cons (cons '@ attrs) children)))
243 | (get-sxml-seed parent-seed))))))
244 | ((and with-parent? (not (or with-id? with-xlink?))) ; parent
245 | (lambda (elem-gi attributes namespaces parent-seed seed)
246 | (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
247 | (attrs
248 | (attlist-fold
249 | (lambda (attr accum)
250 | (cons (list
251 | (if (symbol? (car attr)) (car attr)
252 | (RES-NAME->SXML (car attr)))
253 | (cdr attr)) accum))
254 | '() attributes)))
255 | (list ; make-seed
256 | (cons
257 | (parent:construct-element
258 | (get-pptr-seed parent-seed)
259 | (get-pptr-seed seed)
260 | attrs children)
261 | (get-sxml-seed parent-seed))
262 | ; pptr- seed from parent seed is not modified:
263 | (get-pptr-seed parent-seed)
264 | ))))
265 | ((and with-id? (not (or with-parent? with-xlink?))) ; id
266 | (lambda (elem-gi attributes namespaces parent-seed seed)
267 | (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
268 | (attrs
269 | (attlist-fold
270 | (lambda (attr accum)
271 | (cons (list
272 | (if (symbol? (car attr)) (car attr)
273 | (RES-NAME->SXML (car attr)))
274 | (cdr attr)) accum))
275 | '() attributes)))
276 | (let((element
277 | (cons
278 | (if(symbol? elem-gi)
279 | elem-gi
280 | (RES-NAME->SXML elem-gi))
281 | (if(null? attrs)
282 | children
283 | (cons (cons '@ attrs) children)))))
284 | (list ; make-seed
285 | (cons element (get-sxml-seed parent-seed))
286 | (id:finish-element-handler
287 | elem-gi attributes (get-id-seed seed) element))))))
288 | ((and with-parent? with-id? (not with-xlink?)) ; parent, id
289 | (lambda (elem-gi attributes namespaces parent-seed seed)
290 | (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
291 | (attrs
292 | (attlist-fold
293 | (lambda (attr accum)
294 | (cons (list
295 | (if (symbol? (car attr)) (car attr)
296 | (RES-NAME->SXML (car attr)))
297 | (cdr attr)) accum))
298 | '() attributes)))
299 | (let((element
300 | (parent:construct-element
301 | (get-pptr-seed parent-seed) (get-pptr-seed seed)
302 | attrs children)))
303 | (list ; make-seed
304 | (cons element (get-sxml-seed parent-seed))
305 | ; pptr- seed from parent seed is not modified:
306 | (get-pptr-seed parent-seed)
307 | (id:finish-element-handler
308 | elem-gi attributes (get-id-seed seed) element))))))
309 | ((and with-id? with-xlink? (not with-parent?)) ; id, xlink
310 | (lambda (elem-gi attributes namespaces parent-seed seed)
311 | (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
312 | (attrs
313 | (attlist-fold
314 | (lambda (attr accum)
315 | (cons (list
316 | (if (symbol? (car attr)) (car attr)
317 | (RES-NAME->SXML (car attr)))
318 | (cdr attr)) accum))
319 | '() attributes)))
320 | (let((element
321 | (cons
322 | (if(symbol? elem-gi)
323 | elem-gi
324 | (RES-NAME->SXML elem-gi))
325 | (if(null? attrs)
326 | children
327 | (cons (cons '@ attrs) children)))))
328 | (list ; make-seed
329 | (cons element (get-sxml-seed parent-seed))
330 | (id:finish-element-handler
331 | elem-gi attributes (get-id-seed seed) element)
332 | (xlink:finish-element-handler
333 | (get-xlink-seed parent-seed)
334 | (get-xlink-seed seed) element))))))
335 | ((and with-parent? with-id? with-xlink?) ; parent, id, xlink
336 | (lambda (elem-gi attributes namespaces parent-seed seed)
337 | (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
338 | (attrs
339 | (attlist-fold
340 | (lambda (attr accum)
341 | (cons (list
342 | (if (symbol? (car attr)) (car attr)
343 | (RES-NAME->SXML (car attr)))
344 | (cdr attr)) accum))
345 | '() attributes)))
346 | (let((element
347 | (parent:construct-element
348 | (get-pptr-seed parent-seed) (get-pptr-seed seed)
349 | attrs children)))
350 | (list ; make-seed
351 | (cons element (get-sxml-seed parent-seed))
352 | ; pptr- seed from parent seed is not modified:
353 | (get-pptr-seed parent-seed)
354 | (id:finish-element-handler
355 | elem-gi attributes (get-id-seed seed) element)
356 | (xlink:finish-element-handler
357 | (get-xlink-seed parent-seed)
358 | (get-xlink-seed seed) element))))))
359 | (else
360 | (error 'ssax:multi-parser
361 | "finish-element: NIY"))))
362 |
363 |
364 | ; A special function
365 | ; Given 'namespaces', it becomes a handler for a DOCTYPE
366 | (doctype-handler
367 | (if
368 | (not with-id?)
369 | (lambda (namespaces)
370 | (lambda (port docname systemid internal-subset? seed)
371 | (when internal-subset?
372 | (ssax:warn port
373 | "Internal DTD subset is not currently handled ")
374 | (ssax:skip-internal-dtd port))
375 | (ssax:warn port "DOCTYPE DECL " docname " "
376 | systemid " found and skipped")
377 | (values #f '() namespaces seed)))
378 | (cond
379 | ((not (or with-parent? with-xlink?)) ; with-id
380 | (lambda (namespaces)
381 | (lambda (port docname systemid internal-subset? seed)
382 | (values
383 | #f '() namespaces
384 | (list ; make-seed
385 | (get-sxml-seed seed)
386 | (id:doctype-handler port systemid internal-subset?))))))
387 | ((and with-parent? (not with-xlink?)) ; with-parent, with-id
388 | (lambda (namespaces)
389 | (lambda (port docname systemid internal-subset? seed)
390 | (values
391 | #f '() namespaces
392 | (list ; make-seed
393 | (get-sxml-seed seed)
394 | (get-pptr-seed seed)
395 | (id:doctype-handler port systemid internal-subset?))))))
396 | ((and (not with-parent?) with-xlink?) ; with-id, with-xlink
397 | (lambda (namespaces)
398 | (lambda (port docname systemid internal-subset? seed)
399 | (values
400 | #f '() namespaces
401 | (list ; make-seed
402 | (get-sxml-seed seed)
403 | (id:doctype-handler port systemid internal-subset?)
404 | (get-xlink-seed seed))))))
405 | (else ; with-parent, with-id, with-xlink
406 | (lambda (namespaces)
407 | (lambda (port docname systemid internal-subset? seed)
408 | (values
409 | #f '() namespaces
410 | (list ; make-seed
411 | (get-sxml-seed seed)
412 | (get-pptr-seed seed)
413 | (id:doctype-handler port systemid internal-subset?)
414 | (get-xlink-seed seed)))))))))
415 |
416 | ) ; end of letrec
417 |
418 | ; Constructing a special parser function
419 | (lambda (port)
420 | (let
421 | ((namespaces
422 | (map (lambda (el)
423 | (cons* #f (car el) (ssax:uri-string->symbol (cdr el))))
424 | ns-assig)))
425 | (ending-actions
426 | ((ssax:make-parser
427 |
428 | NEW-LEVEL-SEED
429 | (new-level-seed-handler port)
430 |
431 | FINISH-ELEMENT
432 | finish-element-handler
433 |
434 | CHAR-DATA-HANDLER
435 | (lambda (string1 string2 seed)
436 | (cons
437 | (if(string-null? string2)
438 | (cons string1 (car seed))
439 | (cons* string2 string1 (car seed)))
440 | (cdr seed)))
441 |
442 | DOCTYPE
443 | (doctype-handler namespaces)
444 |
445 | UNDECL-ROOT
446 | (lambda (elem-gi seed)
447 | (values #f '() namespaces seed))
448 |
449 | PI
450 | ((*DEFAULT* . (lambda (port pi-tag seed)
451 | (cons
452 | (cons
453 | (list '*PI* pi-tag
454 | (ssax:read-pi-body-as-string port))
455 | (car seed))
456 | (cdr seed)))))
457 | )
458 | port
459 | initial-seed))))))
460 | ))))
461 |
--------------------------------------------------------------------------------
/sxml/ssax/myenv.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require (for-syntax racket/base syntax/stx))
3 | (provide myenv:error
4 | assert
5 | cout
6 | cerr
7 | nl
8 | ++
9 | ++!
10 | --
11 | --!
12 | whennot
13 | push!
14 | cond-expand
15 | inc
16 | dec
17 | cons*)
18 |
19 | ;; $Id: myenv.ss,v 1.14 2002/03/28 22:23:06 nwv Exp $
20 | ;; $Source: /home/nwv/cvsroot/projects/ssax-plt/myenv.ss,v $
21 | ;; [ssax-plt] This is a modified version of "official/lib/myenv.scm".
22 | ;(module myenv mzscheme
23 | ; (require (lib "defmacro.ss"))
24 | ; (require (rename (lib "pretty.ss") pp pretty-print))
25 |
26 | ; My Standard Scheme "Prelude"
27 | ;
28 | ; This version of the prelude contains several forms and procedures
29 | ; that are specific to a Gambit-C 3.0 system.
30 | ; See myenv-scm.scm, myenv-bigloo.scm, etc. for versions
31 | ; of this prelude that are tuned to other Scheme systems.
32 | ;
33 | ; Id: myenv.scm,v 1.2 2001/09/21 19:53:30 oleg Exp
34 |
35 | (define myenv:error error)
36 |
37 | ; assert the truth of an expression (or of a sequence of expressions)
38 | ;
39 | ; syntax: assert ?expr ?expr ...
40 | ;
41 | ; If (and ?expr ?expr ...) evaluates to anything but #f, the result
42 | ; is the value of that expression.
43 | ; If (and ?expr ?expr ...) evaluates to #f, an error is reported.
44 | ; The error message will show the failed expressions, as well
45 | ; as the values of selected variables (or expressions, in general).
46 |
47 | ;; ryanc: The original assert macro here was broken. It sometimes
48 | ;; calls cerr on procedures expecting to print them out, but (cerr
49 | ;; proc) applies proc to stderr port! Also, report: was only used by
50 | ;; assure macro, which was unused, so both assure and report: deleted.
51 |
52 | (define-syntax-rule (assert e ...)
53 | (let ([v (and e ...)])
54 | (unless v
55 | (fprintf (current-error-port)
56 | "assertion failure: ~s\n" '
57 | '(assert e ...)))
58 | v))
59 |
60 | ; like cout << arguments << args
61 | ; where argument can be any Scheme object. If it's a procedure
62 | ; (without args) it's executed rather than printed (like newline)
63 |
64 | (define (cout . args)
65 | (for-each (lambda (x)
66 | (if (procedure? x) (x) (display x)))
67 | args))
68 |
69 | ;; [ssax-plt] In `cerr', `##stderr' replaced with `(current-error-port)'.
70 |
71 | (define (cerr . args)
72 | (for-each (lambda (x)
73 | (if (procedure? x)
74 | (x (current-error-port))
75 | (display x (current-error-port))))
76 | args))
77 |
78 | ;(##define-macro (nl) '(newline))
79 | (define nl (string #\newline))
80 |
81 | ;; [ssax-plt] `##fixnum.' prefix removed.
82 |
83 | ; Some useful increment/decrement operators
84 | ; Note, ##fixnum prefix is Gambit-specific, it means that the
85 | ; operands assumed FIXNUM (as they ought to be anyway).
86 | ; This perfix could be safely removed: it'll leave the code just as
87 | ; correct, but more portable (and less efficient)
88 |
89 | (define-syntax-rule (++! x) (set! x (add1 x)))
90 | (define-syntax-rule (++ x) (add1 x))
91 | (define-syntax-rule (--! x) (set! x (sub1 x)))
92 | (define-syntax-rule (-- x) (sub1 x))
93 |
94 | ; Some useful control operators
95 |
96 | ; if condition is false execute stmts in turn
97 | ; and return the result of the last statement
98 | ; otherwise, return #t
99 | ; This primitive is often called 'unless'
100 | (define-syntax-rule (whennot condition . stmts)
101 | (or condition (begin . stmts)))
102 |
103 | ; Prepend an ITEM to a LIST, like a Lisp macro PUSH
104 | ; an ITEM can be an expression, but ls must be a VAR
105 | (define-syntax-rule (push! item ls)
106 | (set! ls (cons item ls)))
107 |
108 | ; Implementation of SRFI-0
109 | ;; ryanc: rewrote, not very robust
110 | (define-syntax (cond-expand stx)
111 | (define (feature-req-satisfied? freq)
112 | (cond [(memq freq '(plt srfi-0 else)) #t]
113 | [(not (pair? freq)) #f]
114 | [(eq? 'and (car freq))
115 | (andmap feature-req-satisfied? (cdr freq))]
116 | [(eq? 'or (car freq))
117 | (ormap feature-req-satisfied? (cdr freq))]
118 | [(eq? 'not (car freq))
119 | (not (feature-req-satisfied? (cadr freq)))]
120 | [else #f]))
121 | (syntax-case stx ()
122 | [(ce [freq . body] ...)
123 | (or (for/or ([clause (in-list (syntax->list #'([freq . body] ...)))]
124 | #:when (feature-req-satisfied? (syntax->datum (stx-car clause))))
125 | #`(begin . #,(stx-cdr clause)))
126 | (raise-syntax-error #f "unsatisfied" stx))]))
127 |
128 | ;==============================================================================
129 | ; DL: this piece of code is taken from the previous version of "myenv.scm"
130 | ; Stubs
131 |
132 | (define-syntax-rule (inc x) (add1 x))
133 | (define-syntax-rule (dec x) (sub1 x))
134 |
135 | (define cons* list*)
136 |
--------------------------------------------------------------------------------
/sxml/ssax/parse-error.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require racket/contract/base
3 | syntax/readerr
4 | "errors-and-warnings.rkt")
5 | (provide/contract
6 | [parser-error
7 | (->* (port?) () #:rest list? any)]
8 | [ssax:warn
9 | (->* (port?) () #:rest list? any)])
10 |
11 | ; This code provides informative error messages
12 | ; for SSAX (S)XML parser.
13 |
14 | ;==============================================================================
15 | ; Error handler
16 |
17 | ; According to the SSAX convention this function
18 | ; accepts the port as its first argument which is used for
19 | ; location of the error in input file.
20 | ; Other parameters are considered as error messages,
21 | ; they are printed to stderr as is.
22 |
23 | ;; NB : updated to signal a racket error rather than printing to stdout.
24 | (define (parser-error p . args)
25 | (let-values ([(line col pos) (port-next-location p)])
26 | (raise-read-error (format "SXML parser error: ~a" (args->display-string args))
27 | (object-name p)
28 | line col pos #f)))
29 |
30 | ;; map args to their display representations, glue them together:
31 | (define (args->display-string args)
32 | (apply string-append (map (lambda (x) (format "~a" x)) args)))
33 |
34 | (define (ssax:warn p . args)
35 | (sxml:warn 'ssax:warn
36 | "warning at position ~a: ~a"
37 | (file-position p)
38 | (args->display-string args)))
39 |
--------------------------------------------------------------------------------
/sxml/ssax/ssax-prim.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require "SSAX-code.ss")
3 | (provide RES-NAME->SXML
4 | reverse-collect-str
5 | reverse-collect-str-drop-ws)
6 |
7 | ;=========================================================================
8 | ; This is a multi parser constructor function
9 |
10 | ;------------------------------------------------
11 | ; Some Oleg Kiselyov's features from SSAX:XML->SXML
12 |
13 | ; Returns
14 | (define (RES-NAME->SXML res-name)
15 | (string->symbol
16 | (string-append
17 | (symbol->string (car res-name))
18 | ":"
19 | (symbol->string (cdr res-name)))))
20 |
21 |
22 | ; given the list of fragments (some of which are text strings)
23 | ; reverse the list and concatenate adjacent text strings
24 | (define (reverse-collect-str fragments)
25 | (if (null? fragments) '() ; a shortcut
26 | (let loop ((fragments fragments) (result '()) (strs '()))
27 | (cond
28 | ((null? fragments)
29 | (if (null? strs) result
30 | (cons (apply string-append strs) result)))
31 | ((string? (car fragments))
32 | (loop (cdr fragments) result (cons (car fragments) strs)))
33 | (else
34 | (loop (cdr fragments)
35 | (cons
36 | (car fragments)
37 | (if (null? strs) result
38 | (cons (apply string-append strs) result)))
39 | '()))))))
40 |
41 |
42 | ; given the list of fragments (some of which are text strings)
43 | ; reverse the list and concatenate adjacent text strings
44 | ; We also drop "unsignificant" whitespace, that is, whitespace
45 | ; in front, behind and between elements. The whitespace that
46 | ; is included in character data is not affected.
47 | (define (reverse-collect-str-drop-ws fragments)
48 | (cond
49 | ((null? fragments) '()) ; a shortcut
50 | ((and (string? (car fragments)) ; another shortcut
51 | (null? (cdr fragments)) ; remove trailing ws
52 | (string-whitespace? (car fragments))) '())
53 | (else
54 | (let loop ((fragments fragments) (result '()) (strs '())
55 | (all-whitespace? #t))
56 | (cond
57 | ((null? fragments)
58 | (if all-whitespace? result ; remove leading ws
59 | (cons (apply string-append strs) result)))
60 | ((string? (car fragments))
61 | (loop (cdr fragments) result (cons (car fragments) strs)
62 | (and all-whitespace?
63 | (string-whitespace? (car fragments)))))
64 | (else
65 | (loop (cdr fragments)
66 | (cons
67 | (car fragments)
68 | (if all-whitespace? result
69 | (cons (apply string-append strs) result)))
70 | '() #t)))))))
71 |
--------------------------------------------------------------------------------
/sxml/ssax/ssax.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 |
3 | (require "myenv.rkt")
4 | (require "util.rkt")
5 | (require "parse-error.rkt")
6 | (require "input-parse.rkt")
7 | (require "SSAX-code.rkt")
8 | (require "SXML-tree-trans.rkt")
9 | (require "sxpathlib.rkt")
10 | (require "access-remote.rkt")
11 | (require "id.rkt")
12 | (require "xlink-parser.rkt")
13 | (require "ssax-prim.rkt")
14 | (require "multi-parser.rkt")
15 | (provide (all-from-out "myenv.rkt"))
16 | (provide (all-from-out "util.rkt"))
17 | (provide (all-from-out "parse-error.rkt"))
18 | (provide (all-from-out "input-parse.rkt"))
19 | (provide (all-from-out "SSAX-code.rkt"))
20 | (provide (all-from-out "SXML-tree-trans.rkt"))
21 | (provide (all-from-out "sxpathlib.rkt"))
22 | (provide (all-from-out "access-remote.rkt"))
23 | (provide (all-from-out "id.rkt"))
24 | (provide (all-from-out "xlink-parser.rkt"))
25 | (provide (all-from-out "ssax-prim.rkt"))
26 | (provide (all-from-out "multi-parser.rkt"))
27 |
--------------------------------------------------------------------------------
/sxml/ssax/tests/vinput-parse.scm:
--------------------------------------------------------------------------------
1 | #lang racket
2 |
3 | (require srfi/13/string
4 | "../myenv.rkt"
5 | "../util.rkt"
6 | "../input-parse.rkt"
7 | #;"../look-for-str.rkt"
8 | #;"../catch-error.rkt"
9 | rackunit)
10 |
11 | ;; copied from ssax-tests: ryanc, does this belong here?
12 | ;; ryanc: catch exceptions (simplified from original lib/catch-error.scm)
13 | (define-syntax failed?
14 | (syntax-rules ()
15 | ((failed? . stmts)
16 | (with-handlers ([exn? (lambda (e) #t)])
17 | (let () . stmts)
18 | #f))))
19 |
20 | ;-----------------------------------------------------------------------------
21 | ; This is a test driver for input parsing functions, to make sure they
22 | ; really work as intended
23 | ;
24 | ; IMPORT
25 | ; appropriate prelude: myenv.scm, myenv-bigloo.scm, myenv-scm.scm
26 | ; depending on your system
27 | ; catch-error.scm -- for procedure, for-syntax
28 | ; util.scm
29 | ; srfi-13-local.scm if no native SRFI-13 support is available.
30 | ; look-for-str.scm
31 | ; input-parse.scm
32 | ;
33 | ; See the Makefile in this directory, which shows how to run
34 | ; this code on a variery of Scheme systems.
35 | ;
36 | ; $Id: vinput-parse.scm,v 1.8 2004/07/08 21:53:33 oleg Exp $
37 |
38 |
39 | ; This function is imported into the input-parse.scm module
40 | (define (parser-error port msg . specializing-msgs)
41 | (apply error (cons msg specializing-msgs)))
42 |
43 | ; make sure that the 'FORM' gave upon evaluation the
44 | ; EXPECTED-RESULT
45 |
46 | (define-syntax expect
47 | (syntax-rules ()
48 | ((expect form expected-result)
49 | (check-equal? form expected-result))))
50 |
51 |
52 | ; apply FORM to parse the input from the STR
53 | ; and compare the result with the EXPECTED-RESULT
54 | ; EXPECTED-RESULT is a pair: expected result from the
55 | ; form and the expected next character from the stream
56 | (define-syntax expect-parse-result
57 | (syntax-rules ()
58 | ((expect-parse-result str form expected-result)
59 | (with-input-from-string
60 | str
61 | (lambda ()
62 | (let* ((real-result form) (real-next-char
63 | (read-char (current-input-port))))
64 | (check-equal? (cons real-result real-next-char)
65 | expected-result)))))))
66 |
67 | ; Build a string out of components
68 | ; A component can be a string, a character, a number
69 | ; (converted into a character), symbols cr and lf
70 | ; We could've used a notation like "abc\n\t"
71 | ; Unfortunately, not all Scheme systems support C-like notation
72 | ; of Scheme strings
73 | (define (s . components)
74 | (apply string-append
75 | (map (lambda (component)
76 | (cond
77 | ((string? component) component)
78 | ((char? component) (string component))
79 | ((number? component) (string (integer->char component)))
80 | ((eq? 'lf component) (string #\newline))
81 | ((eq? 'cr component) (string (integer->char 13)))
82 | (else (error "bad component: " component))))
83 | components)))
84 |
85 |
86 | (test-case
87 | "string->integer"
88 | (let ()
89 | (expect (string->integer "" 0 0) #f)
90 | (expect (string->integer "" 0 1) #f)
91 | (expect (string->integer "" 1 0) #f)
92 | (expect (string->integer "1" 0 0) #f)
93 | (expect (string->integer "1" 0 1) 1)
94 | (expect (string->integer "1" 0 2) #f)
95 | (expect (string->integer "1" 1 1) #f)
96 | (expect (string->integer "1" 1 0) #f)
97 | (expect (string->integer "81234" 0 5) 81234)
98 | (expect (string->integer "81234" 1 5) 1234)
99 | (expect (string->integer "81234" -1 5) #f)
100 | (expect (string->integer "81234" 1 6) #f)
101 | (expect (string->integer "81234" 1 4) 123)
102 | (expect (string->integer "81234" 5 4) #f)
103 | (expect (string->integer "81234" 4 4) #f)
104 | (expect (string->integer "81234" 4 5) 4)
105 | (expect (string->integer "-1234" 4 5) 4)
106 | (expect (string->integer "-1234" 1 5) 1234)
107 | (expect (string->integer "-1234" 0 5) #f)
108 | (expect (string->integer "x12+4" 0 5) #f)
109 | (expect (string->integer "x12+4" 0 3) #f)
110 | (expect (string->integer "x12+4" 1 3) 12)
111 | (expect (string->integer "x12+4" 1 4) #f)
112 | ))
113 |
114 | (test-case
115 | "string-split"
116 | (let ((tab " ")) ; This is a string of one tab character
117 | (expect (string-split "") '())
118 | (expect (string-split "" '()) '())
119 | (expect (string-split "" '() 0) '())
120 | (expect (string-split "" '() 10) '())
121 | (expect (string-split " " '() 0) '())
122 | (expect (string-split " ") '())
123 | (expect (string-split (string-append tab " " tab) '() 10) '())
124 | (expect (string-split "abcd" '() 10) '("abcd"))
125 | (expect (string-split "abcd") '("abcd"))
126 | (expect (string-split " abcd ") '("abcd"))
127 | (expect (string-split " abcd " '() -5) '())
128 | (expect (string-split " abcd " '() 1) '("abcd "))
129 | (expect (string-split (string-append " ab" tab "cd ")) '("ab" "cd"))
130 | (expect (string-split (string-append " ab" tab " cd ")) '("ab" "cd"))
131 | (expect (string-split (string-append " ab" tab " cd ") '() 1)
132 | (list (string-append "ab" tab " cd ")))
133 | (expect (string-split (string-append " ab" tab " cd ") '() 2)
134 | '("ab" "cd "))
135 | (expect (string-split (string-append " ab" tab " cd ") '() 3)
136 | '("ab" "cd"))
137 | (expect (string-split " abc d e f ") '("abc" "d" "e" "f"))
138 | (expect (string-split " abc d e f " '() 1) '("abc d e f "))
139 | (expect (string-split " abc d e f " '() 3) '("abc" "d" "e f "))
140 |
141 | (expect (string-split "" '(#\: #\+)) '())
142 | (expect (string-split "" '(#\: #\+) 0) '())
143 | (expect (string-split "" '(#\: #\+) 10) '())
144 | (expect (string-split " " '(#\: #\+)) '(" "))
145 | (expect (string-split " " '(#\: #\+) 1) '(" "))
146 | (expect (string-split " " '(#\: #\+) 0) '())
147 | (expect (string-split ":" '(#\: #\+)) '("" ""))
148 | (expect (string-split "a:" '(#\: #\+)) '("a" ""))
149 | (expect (string-split "a:" '(#\: #\+) 1) '("a:"))
150 | (expect (string-split ":a" '(#\: #\+)) '("" "a"))
151 | (expect (string-split ":a" '(#\: #\+) 1) '(":a"))
152 | (expect (string-split ":" '(#\: #\+) 1) '(":"))
153 | (expect (string-split ":+" '(#\: #\+)) '("" "" ""))
154 | (expect (string-split ":+" '(#\: #\+) 1) '(":+"))
155 | (expect (string-split ":+" '(#\: #\+) 2) '("" "+"))
156 | (expect (string-split ":+" '(#\: #\+) 3) '("" "" ""))
157 | (expect (string-split ":+" '(#\: #\+) 4) '("" "" ""))
158 | (expect (string-split ":abc:d:e:f:" '(#\:)) '("" "abc" "d" "e" "f" ""))
159 | (expect (string-split ":abc:d:e::f:" '(#\:)) '("" "abc" "d" "e" "" "f" ""))
160 | (expect (string-split "root:x:0:0:Lord" '(#\:) 2) '("root" "x:0:0:Lord"))
161 | (expect (string-split "/usr/local/bin:/usr/bin:/usr/ucb/bin" '(#\:))
162 | '("/usr/local/bin" "/usr/bin" "/usr/ucb/bin"))
163 | (expect (string-split "/usr/local/bin" '(#\/)) '("" "usr" "local" "bin"))
164 | ))
165 |
166 |
167 | (test-case
168 | "make-char-quotator"
169 | (let ((string->goodHTML
170 | (make-char-quotator
171 | '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))))
172 | (expect (string->goodHTML "abc!def ") "abc!def ")
173 | (expect (string->goodHTML "") "")
174 | (expect (string->goodHTML "<") '("<"))
175 | (expect (string->goodHTML "goodHTML "a&b") '("a" "&" "b"))
177 | (expect (string->goodHTML "a b>") '("a b" ">"))
178 | (expect (string->goodHTML "<>&\"") '("<" ">" "&" """))
179 | (expect (string->goodHTML " <>&\\\"")
180 | '(" " "<" ">" "&" "\\" """))
181 | (expect (string->goodHTML "&") '("&" "amp;"))
182 | ))
183 |
184 |
185 | (test-case
186 | "assert-curr-char"
187 | (let ()
188 | (define (test-assert-curr-char str char-list)
189 | (with-input-from-string str
190 | (lambda ()
191 | (assert-curr-char char-list "assert curr char" (current-input-port))
192 | )))
193 |
194 | (expect (test-assert-curr-char " abcd" '(#\a #\space)) #\space)
195 | (expect (test-assert-curr-char "a bcd" '(#\a #\space)) #\a)
196 | (assert (failed? (expect (test-assert-curr-char "bacd" '(#\a #\space)) #\a)))
197 | ))
198 |
199 | ;; wow, can't see how to fix these either
200 | #;((test-case
201 | "skipping of characters"
202 | (let (
203 | (eof (with-input-from-string "" read)))
204 |
205 | ;; calls skip-until-char with a number?
206 | #;(expect-parse-result " abcd" (skip-until-char 1) '(#f . #\a))
207 | #;(assert (failed? (expect-parse-result " abcd" (skip-until-char 10) '(#f . #f))))
208 | #;(expect-parse-result " abcd" (skip-until-char 5) `(#f . ,eof))
209 | (expect-parse-result " abcd" (skip-until-char '(#\a #\space)) '(#\space . #\a))
210 | (expect-parse-result "xxxc bcd" (skip-until-char '(#\a #\space #\c))
211 | '(#\c . #\space))
212 | (expect-parse-result "xxxc" (skip-until-char '(#\a #\space #\c) (current-input-port))
213 | `(#\c . ,eof))
214 | (assert (failed? (expect-parse-result "xxxd"
215 | (skip-until-char '(#\a #\space #\c)) '(#f . #f))))
216 | (expect-parse-result "xxxd" (skip-until-char '(#\a #\space #\c *eof*))
217 | `(,eof . ,eof))
218 | (expect-parse-result "xxxc" (skip-until-char '(#\a #\space #\c *eof*))
219 | `(#\c . ,eof))
220 |
221 | (expect-parse-result "xxxd" (skip-while '(#\a #\space #\x))
222 | '(#\d . #\d))
223 | (expect-parse-result "yxxxd" (skip-while '(#\a #\space #\x))
224 | '(#\y . #\y))
225 | (expect-parse-result "xxx" (skip-while '(#\a #\space #\x) (current-input-port))
226 | `(,eof . ,eof))
227 | (expect-parse-result "xxa x" (skip-while '(#\a #\space #\x))
228 | `(,eof . ,eof))
229 | )))
230 |
231 | (test-case
232 | "tokenizing of the input stream"
233 | (let ((eof (with-input-from-string "" read)))
234 | ;; apparently there's supposed to be a 2-arg version of next-token?
235 | (define (next-token/2 a b)
236 | (next-token a b "no comment..." (current-input-port)))
237 |
238 | (expect-parse-result "xxxd"
239 | (next-token '(#\a #\space #\x) '(#\d) "next token" (current-input-port))
240 | '("" . #\d))
241 | (expect-parse-result "xxx xa cccxd"
242 | (next-token/2 '(#\a #\space #\x) '(#\d))
243 | '("cccx" . #\d))
244 | (expect-parse-result "xxx xa cccxdaa"
245 | (next-token/2 '() '(#\d))
246 | '("xxx xa cccx" . #\d))
247 | (expect-parse-result "xxx xa cccxdaa"
248 | (next-token/2 '() '(#\d #\a))
249 | '("xxx x" . #\a))
250 | (expect-parse-result "cccxd"
251 | (next-token/2 '(#\a #\space #\x) '(#\d))
252 | '("cccx" . #\d))
253 | (assert (failed? (expect-parse-result
254 | "cccx"
255 | (next-token '(#\a #\space #\x) '(#\d) "next token"
256 | (current-input-port))
257 | '(#f . #f))))
258 | (assert (failed? (expect-parse-result
259 | "cccx"
260 | (next-token/2 '(#\a #\space #\x) '(#\d))
261 | '(#f . #f))))
262 | (expect-parse-result "cccx"
263 | (next-token '(#\a #\space #\x) '(#\d *eof*) "" (current-input-port))
264 | `("cccx" . ,eof))
265 | (assert (failed? (expect-parse-result
266 | "cccx"
267 | (next-token/2 '(#\c #\space #\x) '(#\d))
268 | '(#f . #f))))
269 | ))
270 |
271 | (test-case
272 | "tokenizing, big tokens"
273 | (let* ((eof (with-input-from-string "" read))
274 | (big-token
275 | (call-with-output-string
276 | (lambda (port)
277 | (do ((i 0 (inc i))) ((>= i 512))
278 | (display (modulo i 10) port)))))
279 | (big-token1 (string-append
280 | big-token big-token big-token
281 | (substring big-token 0 511)))
282 | (term-list '(#\space #\newline *eof*)))
283 |
284 | (call-with-input-string big-token
285 | (lambda (port)
286 | (let ((token (next-token '(#\space) term-list "" port)))
287 | (assert (equal? token big-token) (eof-object? (peek-char port))))))
288 |
289 | (call-with-input-string big-token1
290 | (lambda (port)
291 | (let ((token (next-token '() term-list "" port)))
292 | (assert (equal? token big-token1) (eof-object? (read-char port))))))
293 |
294 | (call-with-input-string (string-append " " big-token " ")
295 | (lambda (port)
296 | (let ((token (next-token '(#\space) term-list "comment" port)))
297 | (assert (equal? token big-token)
298 | (memv (peek-char port) term-list)))))
299 |
300 | (call-with-input-string (string-append big-token1 (string #\newline))
301 | (lambda (port)
302 | (let ((token (next-token '(#\space) term-list "" port)))
303 | (assert (equal? token big-token1)
304 | (memv (peek-char port) term-list)))))
305 |
306 | (call-with-input-string (string-append big-token)
307 | (lambda (port)
308 | (let ((token (next-token-of
309 | (lambda (c) (and (not (eof-object? c)) c)) port)))
310 | (assert (equal? token big-token)
311 | (eof-object? (peek-char port))))))
312 |
313 | (call-with-input-string (string-append big-token1 (string #\newline))
314 | (lambda (port)
315 | (let ((token (next-token-of (string->list "a0123456789") port)))
316 | (assert (equal? token big-token1)
317 | (memv (peek-char port) term-list)))))
318 | ))
319 |
320 | ;; wrong # of args to next-token of, no idea how to fix
321 | #;(
322 | (test-case
323 | "next-token-of"
324 | (let ((eof (with-input-from-string "" read)))
325 |
326 | (expect-parse-result "" (next-token-of '(#\a #\space #\x))
327 | `("" . ,eof))
328 | (expect-parse-result "d" (next-token-of '(#\a #\space #\x))
329 | '("" . #\d))
330 | (expect-parse-result "a xx " (next-token-of '(#\a #\space #\x))
331 | `("a xx " . ,eof))
332 | (expect-parse-result (s "a xx " 'lf)
333 | (next-token-of '(#\a #\space #\x) (current-input-port))
334 | '("a xx " . #\newline))
335 | (expect-parse-result (s "a " 'cr " xx ") (next-token-of '(#\a #\space #\x))
336 | (cons "a " (integer->char 13)))
337 | (expect-parse-result (s 'lf "a " 'cr " xx ")
338 | (next-token-of '(#\a #\space #\x))
339 | '("" . #\newline))
340 |
341 | (expect-parse-result ""
342 | (next-token-of (lambda (c) (and (not (eof-object? c)) c)))
343 | `("" . ,eof))
344 | (expect-parse-result (s "123" 'lf 'cr 0 "!")
345 | (next-token-of (lambda (c) (and (not (eof-object? c)) c)))
346 | `(,(s "123" 'lf 'cr 0 "!") . ,eof))
347 |
348 | (let ((down-pred
349 | (lambda (c)
350 | (cond ((eof-object? c) #f)
351 | ((char-alphabetic? c) (char-downcase c))
352 | (else #f)))))
353 |
354 | (expect-parse-result "" (next-token-of down-pred)
355 | `("" . ,eof))
356 | (expect-parse-result "12abc" (next-token-of down-pred)
357 | '("" . #\1))
358 | (expect-parse-result "abc12"
359 | (next-token-of down-pred (current-input-port))
360 | '("abc" . #\1))
361 | (expect-parse-result "aB c12" (next-token-of down-pred)
362 | '("ab" . #\space))
363 | (expect-parse-result "XYZ" (next-token-of down-pred)
364 | `("xyz" . ,eof))
365 | )
366 | )))
367 |
368 | ;; really can't find any definition or reference to read-text-line...
369 | #;((test-case
370 | "read-text-line"
371 | (let ((eof (with-input-from-string "" read)))
372 |
373 | (expect-parse-result "" (read-text-line)
374 | `(,eof . ,eof))
375 | (expect-parse-result "a 1 % xx" (read-text-line)
376 | `("a 1 % xx" . ,eof))
377 | (expect-parse-result (s 'lf) (read-text-line)
378 | `("" . ,eof))
379 | (expect-parse-result (s 'cr) (read-text-line)
380 | `("" . ,eof))
381 | (expect-parse-result (s 'cr 'lf) (read-text-line)
382 | `("" . ,eof))
383 | (expect-parse-result (s 'cr 'cr 'lf) (read-text-line (current-input-port))
384 | (cons "" (integer->char 13)))
385 | (expect-parse-result (s 'lf 'lf) (read-text-line)
386 | '("" . #\newline))
387 | (expect-parse-result (s #\space 'lf 'cr 'lf) (read-text-line)
388 | (cons " " (integer->char 13)))
389 | (expect-parse-result (s " 12" 'lf "3" 'cr 'lf) (read-text-line)
390 | '(" 12" . #\3))
391 | (expect-parse-result (s " 12 " 'cr "3" 'cr 'lf) (read-text-line)
392 | '(" 12 " . #\3))
393 | (expect-parse-result (s " 12 " 'cr 'lf " 4" 'cr 'lf)
394 | (read-text-line (current-input-port))
395 | '(" 12 " . #\space))
396 | (expect-parse-result (s " 12 " 'cr 'lf 'cr 'lf) (read-text-line)
397 | (cons " 12 " (integer->char 13)))
398 | )))
399 |
400 | ;; tests fail. looks like now parse returns # on parse of empty
401 | ;; string?
402 | #;((test-case
403 | "read-string"
404 | (let ((eof (with-input-from-string "" read)))
405 |
406 | (expect-parse-result "" (read-string 1)
407 | `("" . ,eof))
408 | (expect-parse-result "" (read-string 0)
409 | `("" . ,eof))
410 | (expect-parse-result "1234" (read-string 0)
411 | '("" . #\1))
412 | (expect-parse-result "1234" (read-string -10)
413 | '("" . #\1))
414 | (expect-parse-result (s 'lf "1234 " 'cr)
415 | (read-string 1 (current-input-port))
416 | (cons (s 'lf) #\1))
417 | (expect-parse-result (s 'lf "1234 " 'cr) (read-string 3)
418 | (cons (s 'lf "12") #\3))
419 | (expect-parse-result (s 'lf "1234 " 'cr) (read-string 7)
420 | (cons (s 'lf "1234 " 'cr) eof))
421 | (expect-parse-result (s 'lf "1234 " 'cr)
422 | (read-string 8 (current-input-port))
423 | (cons (s 'lf "1234 " 'cr) eof))
424 | (expect-parse-result (s 'lf "1234 " 'cr) (read-string 100)
425 | (cons (s 'lf "1234 " 'cr) eof))
426 | )))
427 |
428 |
429 | (test-case
430 | "find-string-from-port"
431 | (let ((eof (with-input-from-string "" read)))
432 |
433 | ;; these tests expect a version of "find-string-from-port?"
434 | ;; that accepts an extra argument. Perhaps a limit on chars
435 | ;; read? function doesn't actually seem to be used other than
436 | ;; as a predicate in the main code.
437 | #;(expect-parse-result "bacacabd"
438 | (find-string-from-port? "acab" (current-input-port) 100)
439 | '(7 . #\d))
440 | #;(expect-parse-result "bacacabd"
441 | (find-string-from-port? "acad" (current-input-port) 100)
442 | `(#f . ,eof))
443 | #;(expect-parse-result "bacacabd"
444 | (find-string-from-port? "bd" (current-input-port) 8)
445 | `(8 . ,eof))
446 | #;(expect-parse-result "bacacabd"
447 | (find-string-from-port? "be" (current-input-port) 20)
448 | `(#f . ,eof))
449 |
450 | #;(expect-parse-result "bacacabd"
451 | (find-string-from-port? "bd" (current-input-port) 5)
452 | '(#f . #\a))
453 | #;(expect-parse-result "bacacabd"
454 | (find-string-from-port? "bd" (current-input-port) 9)
455 | `(8 . ,eof))
456 |
457 | (expect-parse-result "bacacabd"
458 | (find-string-from-port? "acab" (current-input-port))
459 | '(7 . #\d))
460 |
461 | (expect-parse-result "bacacabd"
462 | (find-string-from-port? "acad" (current-input-port))
463 | `(#f . ,eof))
464 |
465 | (expect-parse-result "bacacabd"
466 | (find-string-from-port? "bd" (current-input-port))
467 | `(8 . ,eof))
468 |
469 | ))
470 |
--------------------------------------------------------------------------------
/sxml/ssax/util.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require racket/list
3 | srfi/13/string
4 | "myenv.ss")
5 | (provide (all-defined-out))
6 |
7 | ;****************************************************************************
8 | ; My Scheme misc utility functions
9 | ; (mainly dealing with string and list manipulations)
10 | ;
11 | ; myenv.scm, myenv-bigloo.scm or similar prelude is assumed.
12 | ; From SRFI-13, import many functions
13 | ; If a particular implementation lacks SRFI-13 support, please
14 | ; include the file srfi-13-local.scm
15 | ;
16 | ; $Id: util.scm,v 1.5 2004/07/07 16:02:31 sperber Exp $
17 |
18 | ;------------------------------------------------------------------------
19 | ; Iterator ANY?
20 | ;
21 | ; -- procedure+: any? PRED COLLECTION
22 | ; Searches for the first element in the collection satisfying a
23 | ; given predicate
24 | ; That is, the procedure applies PRED to every element of the
25 | ; COLLECTION in turn.
26 | ; The first element for which PRED returns non-#f stops the iteration;
27 | ; the value of the predicate is returned.
28 | ; If none of the elements of the COLLECTION satisfy the predicate,
29 | ; the return value from the procedure is #f
30 | ; COLLECTION can be a list, a vector, a string, or an input port.
31 | ; See vmyenv.scm for validation tests.
32 | (define (any? pred? coll)
33 | (cond [(list? coll)
34 | (ormap pred? coll)]
35 | [(vector? coll)
36 | (for/or ([x (in-vector coll)]) (pred? x))]
37 | [(string? coll)
38 | (for/or ([x (in-string coll)]) (pred? x))]
39 | [(input-port? coll)
40 | (for/or ([x (in-port read-char coll)]) (pred? x))]
41 | [else (error 'any? "invalid collection")]))
42 |
43 |
44 | ;------------------------------------------------------------------------
45 | ; Some list manipulation functions
46 |
47 | ; -- procedure+: list-intersperse SRC-L ELEM
48 | ; inserts ELEM between elements of the SRC-L, returning a freshly allocated
49 | ; list (cells, that is)
50 |
51 | (define (list-intersperse src-l elem)
52 | (add-between src-l elem))
53 |
54 |
55 | ; List-tail-difference: given two lists, list1 and list2 where
56 | ; list2 is presumably a tail of list1, return
57 | ; a (freshly allocated) list which is a difference between list1
58 | ; and list2. If list2 is *not* a tail of list1, the entire list1
59 | ; is returned.
60 | (define (list-tail-diff list1 list2)
61 | (let loop ((l1-curr list1) (difference '()))
62 | (cond
63 | ((eq? l1-curr list2) (reverse difference))
64 | ((null? l1-curr) (reverse difference))
65 | (else (loop (cdr l1-curr) (cons (car l1-curr) difference))))))
66 |
67 |
68 | ;------------------------------------------------------------------------
69 | ; String utilities
70 | ; See SRFI-13 or srfi-13-local.scm
71 |
72 |
73 | ; Return the index of the last occurence of a-char in str, or #f
74 | ; See SRFI-13
75 | (define string-rindex string-index-right)
76 |
77 | ; -- procedure+: substring? PATTERN STRING
78 | ; Searches STRING to see if it contains the substring PATTERN.
79 | ; Returns the index of the first substring of STRING that is equal
80 | ; to PATTERN; or `#f' if STRING does not contain PATTERN.
81 | ;
82 | ; (substring? "rat" "pirate") => 2
83 | ; (substring? "rat" "outrage") => #f
84 | ; (substring? "" any-string) => 0
85 | (define (substring? pattern str) (string-contains str pattern))
86 |
87 |
88 | ; -- procedure+: string->integer STR START END
89 | ;
90 | ; Makes sure a substring of the STR from START (inclusive) till END
91 | ; (exclusive) is a representation of a non-negative integer in decimal
92 | ; notation. If so, this integer is returned. Otherwise -- when the
93 | ; substring contains non-decimal characters, or when the range from
94 | ; START till END is not within STR, the result is #f.
95 | ;
96 | ; This procedure is a simplification of the standard string->number.
97 | ; The latter is far more generic: for example, it will try to read
98 | ; strings like "1/2" "1S2" "1.34" and even "1/0" (the latter causing
99 | ; a zero-divide error). Note that to string->number, "1S2" is a valid
100 | ; representation of an _inexact_ integer (100 to be precise).
101 | ; Oftentimes we want to be more restrictive about what we consider a
102 | ; number; we want merely to read an integral label.
103 | (define (string->integer str start end)
104 | (and (< -1 start end (inc (string-length str)))
105 | (let loop ((pos start) (accum 0))
106 | (cond
107 | ((>= pos end) accum)
108 | ((char-numeric? (string-ref str pos))
109 | (loop (inc pos) (+ (char->integer (string-ref str pos))
110 | (- (char->integer #\0)) (* 10 accum))))
111 | (else #f)))))
112 |
113 |
114 | ; -- procedure+: string-split STRING
115 | ; -- procedure+: string-split STRING '()
116 | ; -- procedure+: string-split STRING '() MAXSPLIT
117 | ;
118 | ; Returns a list of whitespace delimited words in STRING.
119 | ; If STRING is empty or contains only whitespace, then the empty list
120 | ; is returned. Leading and trailing whitespaces are trimmed.
121 | ; If MAXSPLIT is specified and positive, the resulting list will
122 | ; contain at most MAXSPLIT elements, the last of which is the string
123 | ; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
124 | ; non-positive, the empty list is returned. "In time critical
125 | ; applications it behooves you not to split into more fields than you
126 | ; really need."
127 | ;
128 | ; -- procedure+: string-split STRING CHARSET
129 | ; -- procedure+: string-split STRING CHARSET MAXSPLIT
130 | ;
131 | ; Returns a list of words delimited by the characters in CHARSET in
132 | ; STRING. CHARSET is a list of characters that are treated as delimiters.
133 | ; Leading or trailing delimeters are NOT trimmed. That is, the resulting
134 | ; list will have as many initial empty string elements as there are
135 | ; leading delimiters in STRING.
136 | ;
137 | ; If MAXSPLIT is specified and positive, the resulting list will
138 | ; contain at most MAXSPLIT elements, the last of which is the string
139 | ; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
140 | ; non-positive, the empty list is returned. "In time critical
141 | ; applications it behooves you not to split into more fields than you
142 | ; really need."
143 | ;
144 | ; This is based on the split function in Python/Perl
145 | ;
146 | ; (string-split " abc d e f ") ==> ("abc" "d" "e" "f")
147 | ; (string-split " abc d e f " '() 1) ==> ("abc d e f ")
148 | ; (string-split " abc d e f " '() 0) ==> ()
149 | ; (string-split ":abc:d:e::f:" '(#\:)) ==> ("" "abc" "d" "e" "" "f" "")
150 | ; (string-split ":" '(#\:)) ==> ("" "")
151 | ; (string-split "root:x:0:0:Lord" '(#\:) 2) ==> ("root" "x:0:0:Lord")
152 | ; (string-split "/usr/local/bin:/usr/bin:/usr/ucb/bin" '(#\:))
153 | ; ==> ("/usr/local/bin" "/usr/bin" "/usr/ucb/bin")
154 | ; (string-split "/usr/local/bin" '(#\/)) ==> ("" "usr" "local" "bin")
155 |
156 | (define (string-split str . rest)
157 | ; maxsplit is a positive number
158 | (define (split-by-whitespace str maxsplit)
159 | (define (skip-ws i yet-to-split-count)
160 | (cond
161 | ((>= i (string-length str)) '())
162 | ((char-whitespace? (string-ref str i))
163 | (skip-ws (inc i) yet-to-split-count))
164 | (else (scan-beg-word (inc i) i yet-to-split-count))))
165 | (define (scan-beg-word i from yet-to-split-count)
166 | (cond
167 | ((zero? yet-to-split-count)
168 | (cons (substring str from (string-length str)) '()))
169 | (else (scan-word i from yet-to-split-count))))
170 | (define (scan-word i from yet-to-split-count)
171 | (cond
172 | ((>= i (string-length str))
173 | (cons (substring str from i) '()))
174 | ((char-whitespace? (string-ref str i))
175 | (cons (substring str from i)
176 | (skip-ws (inc i) (- yet-to-split-count 1))))
177 | (else (scan-word (inc i) from yet-to-split-count))))
178 | (skip-ws 0 (- maxsplit 1)))
179 |
180 | ; maxsplit is a positive number
181 | ; str is not empty
182 | (define (split-by-charset str delimeters maxsplit)
183 | (define (scan-beg-word from yet-to-split-count)
184 | (cond
185 | ((>= from (string-length str)) '(""))
186 | ((zero? yet-to-split-count)
187 | (cons (substring str from (string-length str)) '()))
188 | (else (scan-word from from yet-to-split-count))))
189 | (define (scan-word i from yet-to-split-count)
190 | (cond
191 | ((>= i (string-length str))
192 | (cons (substring str from i) '()))
193 | ((memq (string-ref str i) delimeters)
194 | (cons (substring str from i)
195 | (scan-beg-word (inc i) (- yet-to-split-count 1))))
196 | (else (scan-word (inc i) from yet-to-split-count))))
197 | (scan-beg-word 0 (- maxsplit 1)))
198 |
199 | ; resolver of overloading...
200 | ; if omitted, maxsplit defaults to
201 | ; (inc (string-length str))
202 | (if (string-null? str) '()
203 | (if (null? rest)
204 | (split-by-whitespace str (inc (string-length str)))
205 | (let ((charset (car rest))
206 | (maxsplit
207 | (if (pair? (cdr rest)) (cadr rest) (inc (string-length str)))))
208 | (cond
209 | ((not (positive? maxsplit)) '())
210 | ((null? charset) (split-by-whitespace str maxsplit))
211 | (else (split-by-charset str charset maxsplit)))))))
212 |
213 |
214 | ; make-char-quotator QUOT-RULES
215 | ;
216 | ; Given QUOT-RULES, an assoc list of (char . string) pairs, return
217 | ; a quotation procedure. The returned quotation procedure takes a string
218 | ; and returns either a string or a list of strings. The quotation procedure
219 | ; check to see if its argument string contains any instance of a character
220 | ; that needs to be encoded (quoted). If the argument string is "clean",
221 | ; it is returned unchanged. Otherwise, the quotation procedure will
222 | ; return a list of string fragments. The input straing will be broken
223 | ; at the places where the special characters occur. The special character
224 | ; will be replaced by the corresponding encoding strings.
225 | ;
226 | ; For example, to make a procedure that quotes special HTML characters,
227 | ; do
228 | ; (make-char-quotator
229 | ; '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))
230 |
231 | (define (make-char-quotator char-encoding)
232 | (let ((bad-chars (map car char-encoding)))
233 |
234 | ; Check to see if str contains one of the characters in charset,
235 | ; from the position i onward. If so, return that character's index.
236 | ; otherwise, return #f
237 | (define (index-cset str i charset)
238 | (let loop ((i i))
239 | (and (< i (string-length str))
240 | (if (memv (string-ref str i) charset) i
241 | (loop (inc i))))))
242 |
243 | ; The body of the function
244 | (lambda (str)
245 | (let ((bad-pos (index-cset str 0 bad-chars)))
246 | (if (not bad-pos) str ; str had all good chars
247 | (let loop ((from 0) (to bad-pos))
248 | (cond
249 | ((>= from (string-length str)) '())
250 | ((not to)
251 | (cons (substring str from (string-length str)) '()))
252 | (else
253 | (let ((quoted-char
254 | (cdr (assv (string-ref str to) char-encoding)))
255 | (new-to
256 | (index-cset str (inc to) bad-chars)))
257 | (if (< from to)
258 | (cons
259 | (substring str from to)
260 | (cons quoted-char (loop (inc to) new-to)))
261 | (cons quoted-char (loop (inc to) new-to))))))))))))
262 |
--------------------------------------------------------------------------------
/sxml/sxpath.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require "sxml-tools.rkt"
3 | "ssax/sxpathlib.rkt"
4 | srfi/2
5 | "ssax/errors-and-warnings.rkt"
6 | "sxpath-ext.rkt"
7 | "txpath.rkt")
8 | (provide (all-defined-out))
9 |
10 | ;; $Id: sxpath.scm,v 1.5 2005/09/07 09:27:34 lizorkin Exp $
11 | ;; Highghest level SXPath
12 | ;; Refactored from sxml-tools.scm and sxpathlib.scm
13 |
14 | ;==============================================================================
15 | ; Abbreviated SXPath
16 |
17 | ; Evaluate an abbreviated SXPath
18 | ; sxpath:: AbbrPath -> Converter, or
19 | ; sxpath:: AbbrPath -> Node|Nodeset -> Nodeset
20 | ; AbbrPath is a list. It is translated to the full SXPath according
21 | ; to the following rewriting rules
22 | ; (sxpath '()) -> (node-join)
23 | ; (sxpath '(path-component ...)) ->
24 | ; (node-join (sxpath1 path-component) (sxpath '(...)))
25 | ; (sxpath1 '//) -> (sxml:descendant-or-self sxml:node?)
26 | ; (sxpath1 '(equal? x)) -> (select-kids (node-equal? x))
27 | ; (sxpath1 '(eq? x)) -> (select-kids (node-eq? x))
28 | ; (sxpath1 '(*or* ...)) -> (select-kids (ntype-names??
29 | ; (cdr '(*or* ...))))
30 | ; (sxpath1 '(*not* ...)) -> (select-kids (sxml:complement
31 | ; (ntype-names??
32 | ; (cdr '(*not* ...)))))
33 | ; (sxpath1 '(ns-id:* x)) -> (select-kids
34 | ; (ntype-namespace-id?? x))
35 | ; (sxpath1 ?symbol) -> (select-kids (ntype?? ?symbol))
36 | ; (sxpath1 ?string) -> (txpath ?string)
37 | ; (sxpath1 procedure) -> procedure
38 | ; (sxpath1 '(?symbol ...)) -> (sxpath1 '((?symbol) ...))
39 | ; (sxpath1 '(path reducer ...)) ->
40 | ; (node-reduce (sxpath path) (sxpathr reducer) ...)
41 | ; (sxpathr number) -> (node-pos number)
42 | ; (sxpathr path-filter) -> (filter (sxpath path-filter))
43 | (define (sxpath path [ns-binding null])
44 | (let loop ((converters '())
45 | (root-vars '()) ; a list of booleans, one per location step:
46 | ; #t - location step function is binary
47 | ; #f - location step function is unary
48 | (path (if (string? path) (list path) path)))
49 | (cond
50 | ((null? path) ; parsing is finished
51 | (lambda (node [var-binding null])
52 | (let ()
53 | (let rpt ((nodeset (as-nodeset node))
54 | (conv (reverse converters))
55 | (r-v (reverse root-vars)))
56 | (if
57 | (null? conv) ; the path is over
58 | nodeset
59 | (rpt
60 | (if (car r-v) ; the current converter consumes 2 arguments
61 | ((car conv) nodeset var-binding)
62 | ((car conv) nodeset))
63 | (cdr conv)
64 | (cdr r-v)))))))
65 | ; *or* handler
66 | ((and (pair? (car path))
67 | (not (null? (car path)))
68 | (eq? '*or* (caar path)))
69 | (loop (cons (select-kids (ntype-names?? (cdar path))) converters)
70 | (cons #f root-vars)
71 | (cdr path)))
72 | ; *not* handler
73 | ((and (pair? (car path))
74 | (not (null? (car path)))
75 | (eq? '*not* (caar path)))
76 | (loop (cons
77 | (select-kids (sxml:complement (ntype-names?? (cdar path))))
78 | converters)
79 | (cons #f root-vars)
80 | (cdr path)))
81 | ((procedure? (car path))
82 | (loop (cons (car path) converters)
83 | (cons #t root-vars)
84 | (cdr path)))
85 | ((eq? '// (car path))
86 | (if (or (null? (cdr path))
87 | (not (symbol? (cadr path)))
88 | (eq? (cadr path) '@))
89 | (loop (cons (sxml:descendant-or-self sxml:node?)
90 | converters)
91 | (cons #f root-vars)
92 | (cdr path))
93 | (loop (cons (sxml:descendant (ntype?? (cadr path)))
94 | converters)
95 | (cons #f root-vars)
96 | (cddr path))))
97 | ((symbol? (car path))
98 | (loop (cons (select-kids (ntype?? (car path))) converters)
99 | (cons #f root-vars)
100 | (cdr path)))
101 | ((string? (car path))
102 | (and-let*
103 | ((f (sxml:xpath-expr (car path) ns-binding))) ; DL: was: txpath
104 | (loop (cons f converters)
105 | (cons #t root-vars)
106 | (cdr path))))
107 | ((and (pair? (car path)) (eq? 'equal? (caar path)))
108 | (loop (cons (select-kids (apply node-equal? (cdar path))) converters)
109 | (cons #f root-vars)
110 | (cdr path)))
111 | ; ns-id:* handler
112 | ((and (pair? (car path)) (eq? 'ns-id:* (caar path)))
113 | (loop
114 | (cons (select-kids (ntype-namespace-id?? (cadar path))) converters)
115 | (cons #f root-vars)
116 | (cdr path)))
117 | ((and (pair? (car path)) (eq? 'eq? (caar path)))
118 | (loop (cons (select-kids (apply node-eq? (cdar path))) converters)
119 | (cons #f root-vars)
120 | (cdr path)))
121 | ((pair? (car path))
122 | (and-let*
123 | ((select
124 | (if
125 | (symbol? (caar path))
126 | (lambda (node [var-binding null]) ;; ryanc: unused!!
127 | ((select-kids (ntype?? (caar path))) node))
128 | (sxpath (caar path) ns-binding))))
129 | (let reducer ((reducing-path (cdar path))
130 | (filters '()))
131 | (cond
132 | ((null? reducing-path)
133 | (loop
134 | (cons
135 | (lambda (node var-binding)
136 | (map-union
137 | (lambda (node)
138 | (let label ((nodeset (select node var-binding))
139 | (fs (reverse filters)))
140 | (if
141 | (null? fs)
142 | nodeset
143 | (label
144 | ((car fs) nodeset var-binding)
145 | (cdr fs)))))
146 | (if (nodeset? node) node (list node))))
147 | converters)
148 | (cons #t root-vars)
149 | (cdr path)))
150 | ((number? (car reducing-path))
151 | (reducer
152 | (cdr reducing-path)
153 | (cons
154 | (lambda (node var-binding)
155 | ((node-pos (car reducing-path)) node))
156 | filters)))
157 | (else
158 | (and-let*
159 | ((func (sxpath (car reducing-path) ns-binding)))
160 | (reducer
161 | (cdr reducing-path)
162 | (cons
163 | (lambda (node var-binding)
164 | ((sxml:filter
165 | (lambda (n) (func n var-binding)))
166 | node))
167 | filters))))))))
168 | (else
169 | (sxml:warn 'sxpath "invalid path step: ~e" (car path))
170 | #f))))
171 |
172 |
173 | ;==============================================================================
174 | ; Wrappers
175 |
176 | ; sxpath always returns a list, which is #t in Scheme
177 | ; if-sxpath returns #f instead of empty list
178 | (define (if-sxpath path)
179 | (lambda (obj)
180 | (let ((x ((sxpath path) obj)))
181 | (if (null? x) #f x))))
182 |
183 | ; Returns first node found, if any.
184 | ; Otherwise returns #f.
185 | (define (if-car-sxpath path)
186 | (lambda (obj)
187 | (let ((x ((sxpath path) obj)))
188 | (if (null? x) #f (car x)))))
189 |
190 | ; Returns first node found, if any.
191 | ; Otherwise returns empty list.
192 | (define (car-sxpath path)
193 | (lambda (obj)
194 | (let ((x ((sxpath path) obj)))
195 | (if (null? x) '() (car x)))))
196 |
197 | ;==============================================================================
198 | ; lookup by a value of ID type attribute
199 | ; See also sxml:lookup in sxml-tools
200 |
201 | ; Built an index as a list of (ID_value . element) pairs for given
202 | ; node. lpaths are location paths for attributes of type ID.
203 | (define (sxml:id-alist node . lpaths)
204 | (apply
205 | append
206 | (map
207 | (lambda(lp)
208 | (let ((lpr (reverse lp)))
209 | (map
210 | (lambda (nd)
211 | (cons (sxml:attr nd (car lpr))
212 | nd))
213 | ; Selects elements with ID attributes
214 | ; using (lpath ,(node-self (sxpath '(@ attrname))))
215 | ((sxpath (reverse (cons
216 | (lambda(n r+v)
217 | ((node-self (sxpath `(@ ,(car lpr)))) n))
218 | (cddr lpr)))) node))
219 | ))
220 | lpaths)))
221 |
--------------------------------------------------------------------------------
/sxml/tests/README:
--------------------------------------------------------------------------------
1 | This directory is for tests.
2 |
3 | tests.rkt -- a few tests, all pass.
4 | ssax-test.rkt -- a very noisy test suite. All pass.
5 |
6 | other files -- random test-related stuff pulled in from the main tree.
--------------------------------------------------------------------------------
/sxml/tests/tests.rkt:
--------------------------------------------------------------------------------
1 | #lang racket
2 |
3 | (module+ test
4 | (require "../main.rkt"
5 | rackunit
6 | rackunit/text-ui)
7 | ;; this set of tests is wildly inadequate.
8 |
9 | ;; call xml->sxml with a port created from a string
10 | (define (read-from-string str [ns '()])
11 | (ssax:xml->sxml (open-input-string str) ns))
12 |
13 | (define basic-tests
14 | (test-suite "basic ssax tests"
15 | (test-equal? "a plain tag"
16 | (read-from-string "")
17 | `(*TOP* (a)))
18 | (test-equal? "a default namespace"
19 | (read-from-string "")
20 | `(*TOP* (gooble:a)))
21 | (test-equal? "a default namespace applies to sub-elements"
22 | (read-from-string "")
23 | `(*TOP* (gooble:a (gooble:b))))
24 | (test-equal? "a non-default namespace has to be triggered explicitly"
25 | (read-from-string "")
26 | `(*TOP* (a (b))))
27 | (test-equal? "here's how you use a non-default namespace"
28 | (read-from-string "")
29 | `(*TOP* (gooble:a (gooble:b))))
30 | (test-equal? "non-default namespace is non-default"
31 | (read-from-string "")
32 | `(*TOP* (gooble:a (b))))
33 | (test-equal? "you can use the namespace argument to shorten the prefix tag"
34 | (read-from-string ""
35 | `((g . "gooble")))
36 | `(*TOP* (@ (*NAMESPACES* (g "gooble")))
37 | (g:a (b))))
38 | (test-equal? "empty tags are indistinguishable from tags with empty strings"
39 | (read-from-string "")
40 | `(*TOP* (a)))
41 | (test-equal? "empty tag 2"
42 | (read-from-string "")
43 | `(*TOP* (a)))
44 | (test-equal? "by default, the SSAX reader discards ... whitespace-only strings?"
45 | (read-from-string "\n")
46 | `(*TOP* (a)))
47 | (test-equal? "discard whitespace 2"
48 | (read-from-string "\nt\n")
49 | `(*TOP* (a "\nt\n")))
50 | (test-exn "parsing empty string fails"
51 | #rx"unexpected EOF"
52 | (lambda () (ssax:xml->sxml (open-input-string "") '())))
53 | (test-equal? "serialization 1"
54 | (srl:sxml->xml `(*TOP* (p)))
55 | "")
56 | (test-exn "serialization bad"
57 | (lambda (e) #t)
58 | (lambda () (srl:sxml->xml '(foo (@ (bar (13)))))))
59 | (let ([temp (make-temporary-file)])
60 | (test-not-exn "serialization accepts paths"
61 | (lambda () (and (path? temp)
62 | (not (string? temp))
63 | (delete-file temp)
64 | (srl:sxml->xml `(*TOP* (p)) temp)))))))
65 |
66 | (run-tests basic-tests))
67 |
--------------------------------------------------------------------------------
/sxml/tests/vSXML-to-HTML.rkt:
--------------------------------------------------------------------------------
1 | ; Validation code for SXML-to-HTML.scm
2 | ;
3 | ; IMPORT
4 | ; SXML-to-HTML.scm and all of its imports
5 | ; ,open sxml-to-html sxml-tree-trans coutputs assertions with-output-to-string srfi-23
6 | ;
7 | ; $Id: vSXML-to-HTML.scm,v 1.3 2004/07/07 16:02:31 sperber Exp $
8 |
9 | ; equal-strs? LIST-OF-PRINTABLES STRING
10 | ; Check to make sure that the result of writing out LIST-OF-PRINTABLES
11 | ; is the same as STRING
12 | ; LIST-OF-PRINTABLES can include strings, characters and numbers
13 |
14 | #lang racket/base
15 |
16 | (module+ test
17 | (require racket/port
18 | rackunit
19 | rackunit/text-ui
20 | "../ssax/SXML-tree-trans.rkt"
21 | (only-in "../sxml-tools.rkt"
22 | [sxml:string->html string->goodHTML])
23 | (only-in "../serializer.rkt"
24 | [srl:sxml->html sxml->html]))
25 |
26 | ;; STATUS: tests mostly fail
27 | ;; 2016: tests still mostly fail... but could be fixed, by
28 | ;; someone more confident about what sxml->html should produce
29 |
30 | (define (display-to-str strs)
31 | (with-output-to-string
32 | (lambda ()
33 | (for-each display strs))))
34 |
35 | (run-tests
36 | (test-suite "SXML-to-HTML"
37 | (test-case "test 1"
38 | (letrec ((gen (lambda (test-val)
39 | (sxml->html
40 | `(p "par1" "par2"
41 | ,@(if test-val (list "par3" "par4") '()))))))
42 | (check-equal? (gen #t) "par1par2par3par4
")
43 | (check-equal? (gen #t) "par1par2par3par4
")
44 | (check-equal? (gen #f) "par1par2
")))
45 | (test-case
46 | "test 2"
47 | (check-equal? (sxml->html '(p "&")) "&
")
48 | (check-equal? (sxml->html '(p (@ (align "center")) "bad chars:" "<>&\""))
49 | ;; changed to match provided... double-quote char looks
50 | ;; okay to me.
51 | "bad chars:<>&\"
")
52 | ;; wait... you don't need quoting there, do you?
53 | #;(check-equal? (sxml->html '(p (@ (align "center") (atr ""))
54 | "bad chars:" (em "<>&\"")))
55 | "bad chars:
56 | <>&"
")
57 | (check-equal? (sxml->html '(p (@ (align "center") (atr "\"text\"")) (br)
58 | (ul (@ (compact)) (li "item " 1))))
59 | "
60 |
61 |
64 |
")
65 | (check-equal? (sxml->html '(p (@) (br) (ul (@ (compact)) (li "item " 1))))
66 | "
67 |
68 |
71 | ")
72 | ;; yikes, looks like this has changed dramatically...
73 | #;(check-equal? (sxml->html
74 | '(html:begin "my title"
75 | (body (@ (bgcolor "#ffffff")) (p "par1"))))
76 | "Content-type: text/html
77 |
78 | my title
79 | ""
80 | par1
"))
81 | ;; this test is doing goofy stuff with numbers inline. Bleh.
82 | #;(test-case "test 3"
83 | (let ()
84 | (define (print-slide n max-count)
85 | (sxml->html
86 | `((h2 "Slide number:" ,n) ; Note n is used in its native form
87 | ,@(cond [(positive? n)
88 | `((a (@ (href "base-url&slide=" ,(- n 1))) "prev"))]
89 | [else null])
90 | ,@(cond [(< (+ n 1) max-count)
91 | `((a (@ (href "base-url&slide=" ,(+ n 1))) "next"))]
92 | [else null])
93 | (p "the text of the slide"))))
94 | (check-equal? (print-slide 0 1)
95 | (display-to-str
96 | '("Slide number:0
"
97 | #\newline "the text of the slide
")))
98 | (check-equal? (print-slide 0 3)
99 | (display-to-str
100 | '("Slide number:0
"
101 | #\newline "next"
102 | #\newline "the text of the slide
")))
103 | (check-equal? (print-slide 1 3)
104 | (display-to-str
105 | '("Slide number:1
"
106 | #\newline "prev"
107 | #\newline "next"
108 | #\newline "the text of the slide
")))
109 | (check-equal? (print-slide 2 3)
110 | (display-to-str
111 | '(#\newline "Slide number:2
"
112 | #\newline "prev"
113 | #\newline "the text of the slide
")))))
114 | (test-case "test 4"
115 | (void
116 | (sxml->html
117 | `(ul
118 | ,@(map (lambda (filename-title)
119 | `(li (a (@ (href ,(car filename-title))))
120 | ,(cdr filename-title)))
121 | '(("slides/slide0001.gif" . "Introduction")
122 | ("slides/slide0010.gif" . "Summary")))))))
123 |
124 | ;; nope, this test cases doesn't look right to me either.
125 | #;(test-case "preorder and macro rules"
126 | (let ()
127 | (define enattr list) ;; ??
128 | (define entag cons) ;; ??
129 | (define (custom-sxml->html tree)
130 | (with-output-to-string
131 | (lambda ()
132 | (SRV:send-reply
133 | (pre-post-order tree
134 | ;; Universal transformation rules. Work for every HTML,
135 | ;; present and future
136 | `((@
137 | ((*default* ; local override for attributes
138 | . ,(lambda (attr-key . value) (enattr attr-key value))))
139 | . ,(lambda (trigger . value) (cons '@ value)))
140 | (*default* . ,(lambda (tag . elems) (entag tag elems)))
141 | (*text* . ,(lambda (trigger str)
142 | (if (string? str) (string->goodHTML str) str)))
143 | (link
144 | *macro*
145 | . ,(lambda (tag url body)
146 | `(a (@ (href ,url)) ,body)))
147 | (vspace ; (vspace flag)
148 | *preorder* ; where flag is a symbol: small, large
149 | . ,(lambda (tag flag)
150 | (case flag
151 | ((large) (list "
"))
152 | ((small) (list "
"))
153 | (else (error "wrong flag:" flag))))))
154 | )))))
155 | 6(check-equal? (custom-sxml->html '(p "text" (link "url" "") "text1"))
156 | (display-to-str
157 | '("text"
158 | #\newline "<body>text1
")))
159 | (check-equal? (custom-sxml->html '(p "text" (vspace small) "text1"))
160 | (display-to-str
161 | '( "text
text1
")))
162 | (check-equal? (custom-sxml->html '(p "text" (vspace large) "text1"))
163 | (display-to-str
164 | '("text
text1")))))
165 | )))
166 |
--------------------------------------------------------------------------------
/sxml/tests/vSXML-tree-trans.rkt:
--------------------------------------------------------------------------------
1 | ; Validation code for SXML-tree-trans.scm
2 | ;
3 | ; IMPORT
4 | ; SXML-tree-trans.scm and all of its imports
5 | ; Pretty-printer of trees (named pp)
6 | ; ,open sxml-tree-trans ppretty-prints coutputs assertions
7 | ;
8 | ; $Id: vSXML-tree-trans.scm,v 1.2 2004/07/07 16:02:31 sperber Exp $
9 |
10 | #lang racket/base
11 |
12 | (module+ test
13 | (require racket/port
14 | rackunit
15 | rackunit/text-ui
16 | "../ssax/SXML-tree-trans.rkt")
17 |
18 | ;; STATUS: all tests pass
19 |
20 | (run-tests
21 | (test-suite "SXML-tree-trans"
22 | (let* ((tree
23 | '(root (n1 (n11) "s12" (n13))
24 | "s2"
25 | (n2 (n21) "s22")
26 | (n3
27 | (n31 (n311))
28 | "s32"
29 | (n33 (n331) "s332" (n333))
30 | "s34")))
31 | (test
32 | (lambda (pred-begin pred-end expected)
33 | (let ((computed
34 | (car (replace-range pred-begin pred-end (list tree)))))
35 | (check-equal? computed expected)))))
36 | (test-case "Remove one node, s2"
37 | (test
38 | (lambda (node)
39 | (and (equal? node "s2") '()))
40 | (lambda (node) (list node))
41 | '(root (n1 (n11) "s12" (n13))
42 | (n2 (n21) "s22")
43 | (n3 (n31 (n311)) "s32" (n33 (n331) "s332" (n333)) "s34"))))
44 |
45 | (test-case "Replace one node, s2 with s2-new"
46 | (test
47 | (lambda (node)
48 | (and (equal? node "s2") '("s2-new")))
49 | (lambda (node) (list node))
50 | '(root (n1 (n11) "s12" (n13))
51 | "s2-new"
52 | (n2 (n21) "s22")
53 | (n3 (n31 (n311)) "s32" (n33 (n331) "s332" (n333)) "s34"))))
54 |
55 | (test-case "Replace one node, s2 with s2-new and its brother (n-new s)"
56 | (test
57 | (lambda (node)
58 | (and (equal? node "s2") '("s2-new" (n-new "s"))))
59 | (lambda (node) (list node))
60 | '(root (n1 (n11) "s12" (n13))
61 | "s2-new" (n-new "s")
62 | (n2 (n21) "s22")
63 | (n3 (n31 (n311)) "s32" (n33 (n331) "s332" (n333)) "s34"))))
64 |
65 | (test-case "Remove everything from s2 onward"
66 | (test
67 | (lambda (node)
68 | (and (equal? node "s2") '()))
69 | (lambda (node) #f)
70 | '(root (n1 (n11) "s12" (n13)))))
71 |
72 | (test-case "Remove everything from n1 onward"
73 | (test
74 | (lambda (node)
75 | (and (pair? node) (eq? 'n1 (car node)) '()))
76 | (lambda (node) #f)
77 | '(root)))
78 |
79 | (test-case "Replace from n1 through n33"
80 | (test
81 | (lambda (node)
82 | (and (pair? node)
83 | (eq? 'n1 (car node))
84 | (list node '(n1* "s12*"))))
85 | (lambda (node)
86 | (and (pair? node)
87 | (eq? 'n33 (car node))
88 | (list node)))
89 | '(root
90 | (n1 (n11) "s12" (n13))
91 | (n1* "s12*")
92 | (n3
93 | (n33 (n331) "s332" (n333))
94 | "s34"))))
95 | ))))
96 |
--------------------------------------------------------------------------------
/sxml/xpath-ast.rkt:
--------------------------------------------------------------------------------
1 | #lang racket/base
2 | (require srfi/2
3 | "ssax/errors-and-warnings.rkt"
4 | "xpath-parser.rkt")
5 | (provide (all-defined-out))
6 |
7 | ;; XPath/XPointer -> Abstract Syntax Tree parser
8 | ;
9 | ; This software is in Public Domain.
10 | ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
11 | ;
12 | ; Please send bug reports and comments to:
13 | ; lisovsky@acm.org Kirill Lisovsky
14 | ; lizorkin@hotbox.ru Dmitry Lizorkin
15 |
16 |
17 | ;==========================================================================
18 | ; W3C textual XPath/XPointer -> AST
19 |
20 | ; Writing operations as an S-expression in an infix notation
21 | (define (txp:ast-operation-helper expr-lst op-lst add-on)
22 | (let ((rev-expr-lst (reverse expr-lst)))
23 | (let loop ((exprs (cdr rev-expr-lst))
24 | (ops (reverse op-lst))
25 | (res (car rev-expr-lst)))
26 | (if (null? ops)
27 | res
28 | (loop (cdr exprs) (cdr ops)
29 | (list (car ops) (car exprs) res))))))
30 |
31 | ;-------------------------------------------------
32 | ; Parameters for TXPath -> AST implementation
33 |
34 | (define txp:ast-params
35 | `(
36 | ; Axes
37 | (axis
38 | ((ancestor
39 | ,(lambda (add-on) 'ancestor))
40 | (ancestor-or-self
41 | ,(lambda (add-on) 'ancestor-or-self))
42 | (attribute
43 | ,(lambda (add-on) 'attribute))
44 | (child
45 | ,(lambda (add-on) 'child))
46 | (descendant
47 | ,(lambda (add-on) 'descendant))
48 | (descendant-or-self
49 | ,(lambda (add-on) 'descendant-or-self))
50 | (following
51 | ,(lambda (add-on) 'following))
52 | (following-sibling
53 | ,(lambda (add-on) 'following-sibling))
54 | (namespace
55 | ,(lambda (add-on) 'namespace))
56 | (parent
57 | ,(lambda (add-on) 'parent))
58 | (preceding
59 | ,(lambda (add-on) 'preceding))
60 | (preceding-sibling
61 | ,(lambda (add-on) 'preceding-sibling))
62 | (self
63 | ,(lambda (add-on) 'self))
64 | ; Addition by XLink
65 | (arc
66 | ,(lambda (add-on) 'arc))
67 | (traverse
68 | ,(lambda (add-on) 'traverse))
69 | (traverse-arc
70 | ,(lambda (add-on) 'traverse-arc))))
71 |
72 | ; Node test
73 | (node-test
74 | ((star
75 | ,(lambda (add-on) '((*))))
76 | (uri+star
77 | ,(lambda (uri add-on)
78 | `((namespace-uri ,uri))))
79 | (qname
80 | ,(lambda (uri local-name add-on)
81 | (if (not uri)
82 | `((local-name ,local-name))
83 | `((namespace-uri ,uri) (local-name ,local-name)))))
84 | (comment
85 | ,(lambda (add-on) '((comment))))
86 | (text
87 | ,(lambda (add-on) '((text))))
88 | (processing-instruction
89 | ,(lambda (literal-string add-on)
90 | (if (not literal-string) ; no literal provided
91 | '((pi))
92 | `((pi ,literal-string)))))
93 | (node
94 | ,(lambda (add-on) '((node))))
95 | (point
96 | ,(lambda (add-on) '((point))))
97 | (range
98 | ,(lambda (add-on) '((range))))))
99 |
100 | ; Location step
101 | (step
102 | ((common
103 | ,(lambda (axis-res node-test-res predicate-res-lst add-on)
104 | `(step
105 | (axis-specifier (,axis-res))
106 | (node-test ,@node-test-res)
107 | ,@predicate-res-lst)))
108 | (range-to
109 | ,(lambda (expr-res predicate-res-lst add-on)
110 | `(range-to
111 | (expr ,expr-res)
112 | ,@predicate-res-lst)))))
113 |
114 | ; Relative location path
115 | (relative-lpath
116 | ,(lambda (step-res-lst add-on)
117 | (cons 'relative-location-path step-res-lst)))
118 |
119 | ; Location path
120 | (location-path
121 | ((bare-slash
122 | ,(lambda (add-on) '(absolute-location-path)))
123 | (slash
124 | ,(lambda (relative-lpath-res add-on)
125 | (cons 'absolute-location-path (cdr relative-lpath-res))))
126 | (double-slash
127 | ,(lambda (relative-lpath-res add-on)
128 | `(absolute-location-path
129 | (step
130 | (axis-specifier (descendant-or-self))
131 | (node-test (node)))
132 | ,@(cdr relative-lpath-res))))))
133 |
134 | ; Predicate
135 | (predicate
136 | ,(lambda (expr-res add-on)
137 | (list 'predicate expr-res)))
138 |
139 | ; Variable reference
140 | (variable-ref
141 | ,(lambda (var-name-string add-on)
142 | `(variable-reference ,var-name-string)))
143 |
144 | ; Function call
145 | (function-call
146 | ,(lambda (fun-name-string arg-res-lst add-on)
147 | `(function-call
148 | (function-name ,fun-name-string)
149 | ,@(map
150 | (lambda (arg-res) `(argument ,arg-res))
151 | arg-res-lst))))
152 |
153 | ; Primary expression
154 | (primary-expr
155 | ((literal
156 | ,(lambda (literal add-on)
157 | `(literal ,literal)))
158 | (number
159 | ,(lambda (number add-on)
160 | `(number ,number)))))
161 |
162 | ; Filter expression
163 | (filter-expr
164 | ,(lambda (primary-expr-res predicate-res-lst add-on)
165 | `(filter-expr
166 | (primary-expr ,primary-expr-res)
167 | ,@predicate-res-lst)))
168 |
169 | ; Path expression
170 | (path-expr
171 | ((slash
172 | ,(lambda (filter-expr-res relative-lpath-res add-on)
173 | `(path-expr
174 | ,(if (eq? (car filter-expr-res) 'filter-expr)
175 | filter-expr-res
176 | `(filter-expr (primary-expr ,filter-expr-res)))
177 | ,@(cdr relative-lpath-res))))
178 | (double-slash
179 | ,(lambda (filter-expr-res relative-lpath-res add-on)
180 | `(path-expr
181 | ,(if (eq? (car filter-expr-res) 'filter-expr)
182 | filter-expr-res
183 | `(filter-expr (primary-expr ,filter-expr-res)))
184 | (step
185 | (axis-specifier (descendant-or-self))
186 | (node-test (node)))
187 | ,@(cdr relative-lpath-res))))))
188 |
189 | ; Union expression
190 | (union-expr
191 | ,(lambda (path-expr-res-lst add-on)
192 | (cons 'union-expr path-expr-res-lst)))
193 |
194 | ; Unary expression
195 | (unary-expr
196 | ,(lambda (union-expr-res num-minuses add-on)
197 | (let loop ((n num-minuses)
198 | (res union-expr-res))
199 | (if (= n 0) res
200 | (loop (- n 1) (list '- res))))))
201 |
202 | ; Different operations
203 | (operations
204 | ((* ,(lambda (add-on) '*))
205 | (div ,(lambda (add-on) 'div))
206 | (mod ,(lambda (add-on) 'mod))
207 |
208 | (+ ,(lambda (add-on) '+))
209 | (- ,(lambda (add-on) '-))
210 | (< ,(lambda (add-on) '<))
211 | (> ,(lambda (add-on) '>))
212 | (<= ,(lambda (add-on) '<=))
213 | (>= ,(lambda (add-on) '>=))
214 | (= ,(lambda (add-on) '=))
215 | (!= ,(lambda (add-on) '!=))))
216 |
217 | ; Additive and multiplicative expressions
218 | (mul-expr ,txp:ast-operation-helper)
219 | (add-expr ,txp:ast-operation-helper)
220 |
221 | ; Relational expression
222 | (relational-expr ,txp:ast-operation-helper)
223 |
224 | ; Equality expression
225 | (equality-expr ,txp:ast-operation-helper)
226 |
227 | ; And-expression
228 | (and-expr
229 | ,(lambda (equality-expr-res-lst add-on)
230 | (cons 'and equality-expr-res-lst)))
231 |
232 | ; Or-expression
233 | (or-expr
234 | ,(lambda (and-expr-res-lst add-on)
235 | (cons 'or and-expr-res-lst)))
236 |
237 | ; Full XPointer
238 | (full-xptr
239 | ,(lambda (expr-res-lst add-on)
240 | (cons 'full-xptr expr-res-lst)))
241 |
242 | ; XPointer child sequence
243 | (child-seq
244 | ((with-name
245 | ,(lambda (name-string number-lst add-on)
246 | `(child-seq
247 | (name ,name-string)
248 | ,@(map
249 | (lambda (num) (list 'number num))
250 | number-lst))))
251 | (without-name
252 | ,(lambda (number-lst add-on)
253 | (cons 'child-seq
254 | (map
255 | (lambda (num) (list 'number num))
256 | number-lst))))))
257 | ))
258 |
259 | (define txp:ast-res (txp:parameterize-parser txp:ast-params))
260 |
261 | ;-------------------------------------------------
262 | ; Highest level API functions
263 | ;
264 | ; xpath-string - an XPath location path (a string)
265 | ; ns-binding - declared namespace prefixes (an optional argument)
266 | ; ns-binding = (list (prefix . uri)
267 | ; (prefix . uri)
268 | ; ...)
269 | ; prefix - a symbol
270 | ; uri - a string
271 | ;
272 | ; The returned result: abstract-syntax-tree or #f
273 | ; abstract-syntax-tree - an S-expression
274 | ; #f - signals of a parse error (an error message is printed as a side effect
275 | ; during parsing)
276 |
277 | (define (txp:ast-api-helper parse-proc)
278 | (lambda (xpath-string . ns-binding)
279 | (let ((res (parse-proc
280 | xpath-string
281 | (if (null? ns-binding) ns-binding (car ns-binding))
282 | '())))
283 | (if (txp:error? res) ; error detected
284 | #f res))))
285 |
286 | (define txp:xpath->ast
287 | (txp:ast-api-helper (cadr (assq 'xpath txp:ast-res))))
288 | (define txp:xpointer->ast
289 | (txp:ast-api-helper (cadr (assq 'xpointer txp:ast-res))))
290 | (define txp:expr->ast
291 | (txp:ast-api-helper (cadr (assq 'expr txp:ast-res))))
292 |
293 |
294 | ;==========================================================================
295 | ; SXPath native syntax -> AST
296 | ; Additional features added to AST by native SXPath
297 | ; Operator += below denotes additional alternatives to AST grammar rules
298 | ; {7} += (node-test (equal? ))
299 | ; | (node-test (eq? ))
300 | ; | (node-test (names + ))
301 | ; | (node-test (not-names + ))
302 | ; {4} += (lambda-step )
303 | ; |
304 |
305 | (define (txp:sxpath->ast path . ns-binding)
306 | (let ((ns-binding (if (null? ns-binding) ns-binding (car ns-binding))))
307 | (if
308 | (string? path) ; Just a textual XPath
309 | (txp:expr->ast path ns-binding)
310 | (let loop ((ast-steps '())
311 | (path path))
312 | (cond
313 | ((null? path) ; parsing is finished
314 | (if (null? ast-steps) ; empty path
315 | '(absolute-location-path)
316 | (let ((forward-steps (reverse ast-steps)))
317 | (cons
318 | (if (eq? (caar forward-steps) 'filter-expr)
319 | 'path-expr 'relative-location-path)
320 | forward-steps))))
321 | ((procedure? (car path))
322 | (loop (cons (list 'lambda-step (car path))
323 | ast-steps)
324 | (cdr path)))
325 | ((assq (car path) '((// . descendant-or-self) (.. . parent)))
326 | => (lambda (pair)
327 | (loop (cons
328 | `(step (axis-specifier (,(cdr pair)))
329 | (node-test (node)))
330 | ast-steps)
331 | (cdr path))))
332 | ((symbol? (car path))
333 | (loop (cons
334 | `(step (axis-specifier (child))
335 | (node-test
336 | ,(cond
337 | ((assq (car path) '((* . (*)) (*text* . (text))))
338 | => cdr)
339 | (else
340 | `(local-name ,(symbol->string (car path)))))))
341 | ast-steps)
342 | (cdr path)))
343 | ((string? (car path))
344 | (and-let* ; only for the location path for the moment
345 | ((txt-ast (txp:expr->ast (car path) ns-binding)))
346 | (loop (if (eq? (car txt-ast) 'relative-location-path)
347 | (append (reverse (cdr txt-ast)) ast-steps)
348 | (cons
349 | `(filter-expr (primary-expr ,txt-ast))
350 | ast-steps))
351 | (cdr path))))
352 | ((and (pair? (car path)) (not (null? (car path))))
353 | (cond
354 | ((assq (caar path) '((*or* . names) (*not* . not-names)))
355 | => (lambda (pair)
356 | (loop
357 | (cons
358 | `(step (axis-specifier (child))
359 | (node-test
360 | ,(cons (cdr pair)
361 | (map symbol->string (cdar path)))))
362 | ast-steps)
363 | (cdr path))))
364 | ((assq (caar path) '((equal? . equal?) (eq? . eq?)
365 | (ns-id:* . namespace-uri)))
366 | => (lambda (pair)
367 | (loop
368 | (cons `(step (axis-specifier (child))
369 | (node-test ,(list (cdr pair) (cadar path))))
370 | ast-steps)
371 | (cdr path))))
372 | (else
373 | (let reducer ((reducing-path (cdar path))
374 | (filters '()))
375 | (cond
376 | ((null? reducing-path)
377 | (if
378 | (symbol? (caar path)) ; just a child axis
379 | (loop
380 | (cons
381 | `(step
382 | (axis-specifier (child))
383 | (node-test (local-name ,(symbol->string (caar path))))
384 | ,@(reverse filters))
385 | ast-steps)
386 | (cdr path))
387 | (and-let*
388 | ((select (txp:sxpath->ast (caar path) ns-binding)))
389 | (loop
390 | (cons `(filter-expr
391 | (primary-expr ,select)
392 | ,@(reverse filters))
393 | ast-steps)
394 | (cdr path)))))
395 | ((number? (car reducing-path))
396 | (reducer
397 | (cdr reducing-path)
398 | (cons
399 | `(predicate
400 | ,(if
401 | (negative? (car reducing-path)) ; from end of nodeset
402 | `(- (function-call (function-name "last"))
403 | (number ,(- -1 (car reducing-path))))
404 | `(number ,(car reducing-path))))
405 | filters)))
406 | (else
407 | (and-let*
408 | ((pred-ast
409 | (txp:sxpath->ast (car reducing-path) ns-binding)))
410 | (reducer
411 | (cdr reducing-path)
412 | (cons `(predicate ,pred-ast) filters)))))))))
413 | (else
414 | (sxml:warn 'tsp:sxpath->ast "invalid path step: ~e" (car path))
415 | #f))))))
416 |
417 |
418 | ;==========================================================================
419 | ; Several popular accessors and constructors for AST steps
420 |
421 | ; Whether a representation for location step
422 | (define (txp:step? op)
423 | (and (pair? op) (eq? (car op) 'step)))
424 |
425 | ; Returns the axis specifier of the location step
426 | ; Argument: the AST representation of a location step
427 | ; Result: either '(child) and the like, or #f if the AST contains syntactic
428 | ; error
429 | (define (txp:step-axis op)
430 | (and (txp:step? op)
431 | (not (null? (cdr op)))
432 | (pair? (cadr op)) (eq? (caadr op) 'axis-specifier)
433 | (cadadr op)))
434 |
435 | ; Returns the node test of the location step
436 | ; Argument: the AST representation of a location step
437 | ; Result: either '(*) and the like, or #f if the AST contains syntactic
438 | ; error
439 | (define (txp:step-node-test op)
440 | (and (txp:step? op)
441 | (not (null? (cdr op))) (not (null? (cddr op)))
442 | (pair? (caddr op)) (eq? (caaddr op) 'node-test)
443 | (cadr (caddr op))))
444 |
445 | ; Returns predicate expressions of the location step
446 | ; Argument: the AST representation of a location step
447 | ; Result: either (listof ast-expr)
448 | ; or #f if syntactic error detected in a location step AST
449 | (define (txp:step-preds op)
450 | (and (txp:step? op)
451 | (not (null? (cdr op))) (not (null? (cddr op)))
452 | (null? (filter
453 | (lambda (sub) ; not a predicate representation
454 | (not (and (pair? sub) (eq? (car sub) 'predicate))))
455 | (cdddr op)))
456 | (map cadr (cdddr op))))
457 |
458 | ; Constructs the AST representation for a given axis, node-test and
459 | ; a list of predicate expressions
460 | ; axis ::= '(child) and the like
461 | ; node-test ::= '(*) and the like
462 | ; pred-expr-list ::= (listof ast-expr)
463 | (define (txp:construct-step axis node-test . pred-expr-list)
464 | `(step (axis-specifier ,axis)
465 | (node-test ,node-test)
466 | ,@(map
467 | (lambda (pred-expr) `(predicate ,pred-expr))
468 | pred-expr-list)))
469 |
--------------------------------------------------------------------------------