>(&self, prefix: &P) -> bool {
111 | self.0.contains_key(prefix.as_ref())
112 | }
113 |
114 | /// Puts a mapping into this namespace.
115 | ///
116 | /// This method does not override any already existing mappings.
117 | ///
118 | /// Returns a boolean flag indicating whether the map already contained
119 | /// the given prefix.
120 | ///
121 | /// # Parameters
122 | /// * `prefix` --- namespace prefix;
123 | /// * `uri` --- namespace URI.
124 | ///
125 | /// # Return value
126 | /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
127 | /// was already present in the namespace.
128 | pub fn put(&mut self, prefix: P, uri: U) -> bool
129 | where
130 | P: Into,
131 | U: Into,
132 | {
133 | match self.0.entry(prefix.into()) {
134 | Entry::Occupied(_) => false,
135 | Entry::Vacant(ve) => {
136 | ve.insert(uri.into());
137 | true
138 | }
139 | }
140 | }
141 |
142 | /// Puts a mapping into this namespace forcefully.
143 | ///
144 | /// This method, unlike `put()`, does replace an already existing mapping.
145 | ///
146 | /// Returns previous URI which was assigned to the given prefix, if it is present.
147 | ///
148 | /// # Parameters
149 | /// * `prefix` --- namespace prefix;
150 | /// * `uri` --- namespace URI.
151 | ///
152 | /// # Return value
153 | /// `Some(uri)` with `uri` being a previous URI assigned to the `prefix`, or
154 | /// `None` if such prefix was not present in the namespace before.
155 | pub fn force_put(&mut self, prefix: P, uri: U) -> Option
156 | where
157 | P: Into,
158 | U: Into,
159 | {
160 | self.0.insert(prefix.into(), uri.into())
161 | }
162 |
163 | /// Queries the namespace for the given prefix.
164 | ///
165 | /// # Parameters
166 | /// * `prefix` --- namespace prefix.
167 | ///
168 | /// # Return value
169 | /// Namespace URI corresponding to the given prefix, if it is present.
170 | pub fn get<'a, P: ?Sized + AsRef>(&'a self, prefix: &P) -> Option<&'a str> {
171 | self.0.get(prefix.as_ref()).map(|s| &**s)
172 | }
173 | }
174 |
175 | /// An alias for iterator type for namespace mappings contained in a namespace.
176 | pub type NamespaceMappings<'a> =
177 | Map, for<'b> fn((&'b String, &'b String)) -> UriMapping<'b>>;
178 |
179 | impl<'a> IntoIterator for &'a Namespace {
180 | type Item = UriMapping<'a>;
181 | type IntoIter = NamespaceMappings<'a>;
182 |
183 | fn into_iter(self) -> Self::IntoIter {
184 | self.0.iter().map(|(prefix, uri)| (prefix, uri))
185 | }
186 | }
187 |
188 | /// Namespace stack is a sequence of namespaces.
189 | ///
190 | /// Namespace stack is used to represent cumulative namespace consisting of
191 | /// combined namespaces from nested elements.
192 | #[derive(Clone, Eq, PartialEq, Debug)]
193 | pub struct NamespaceStack(pub Vec);
194 |
195 | impl NamespaceStack {
196 | /// Returns an empty namespace stack.
197 | #[inline]
198 | pub fn empty() -> NamespaceStack {
199 | NamespaceStack(Vec::with_capacity(2))
200 | }
201 |
202 | /// Returns a namespace stack with default items in it.
203 | ///
204 | /// Default items are the following:
205 | ///
206 | /// * `xml` → `http://www.w3.org/XML/1998/namespace`;
207 | /// * `xmlns` → `http://www.w3.org/2000/xmlns/`.
208 | #[inline]
209 | pub fn default() -> NamespaceStack {
210 | let mut nst = NamespaceStack::empty();
211 | nst.push_empty();
212 | // xml namespace
213 | nst.put(NS_XML_PREFIX, NS_XML_URI);
214 | // xmlns namespace
215 | nst.put(NS_XMLNS_PREFIX, NS_XMLNS_URI);
216 | // empty namespace
217 | nst.put(NS_NO_PREFIX, NS_EMPTY_URI);
218 | nst
219 | }
220 |
221 | /// Adds an empty namespace to the top of this stack.
222 | #[inline]
223 | pub fn push_empty(&mut self) -> &mut NamespaceStack {
224 | self.0.push(Namespace::empty());
225 | self
226 | }
227 |
228 | /// Removes the topmost namespace in this stack.
229 | ///
230 | /// Panics if the stack is empty.
231 | #[inline]
232 | pub fn pop(&mut self) -> Namespace {
233 | self.0.pop().unwrap()
234 | }
235 |
236 | /// Removes the topmost namespace in this stack.
237 | ///
238 | /// Returns `Some(namespace)` if this stack is not empty and `None` otherwise.
239 | #[inline]
240 | pub fn try_pop(&mut self) -> Option {
241 | self.0.pop()
242 | }
243 |
244 | /// Borrows the topmost namespace mutably, leaving the stack intact.
245 | ///
246 | /// Panics if the stack is empty.
247 | #[inline]
248 | pub fn peek_mut(&mut self) -> &mut Namespace {
249 | self.0.last_mut().unwrap()
250 | }
251 |
252 | /// Borrows the topmost namespace immutably, leaving the stack intact.
253 | ///
254 | /// Panics if the stack is empty.
255 | #[inline]
256 | pub fn peek(&self) -> &Namespace {
257 | self.0.last().unwrap()
258 | }
259 |
260 | /// Puts a mapping into the topmost namespace if this stack does not already contain one.
261 | ///
262 | /// Returns a boolean flag indicating whether the insertion has completed successfully.
263 | /// Note that both key and value are matched and the mapping is inserted if either
264 | /// namespace prefix is not already mapped, or if it is mapped, but to a different URI.
265 | ///
266 | /// # Parameters
267 | /// * `prefix` --- namespace prefix;
268 | /// * `uri` --- namespace URI.
269 | ///
270 | /// # Return value
271 | /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
272 | /// was already present in the namespace stack.
273 | pub fn put_checked(&mut self, prefix: P, uri: U) -> bool
274 | where
275 | P: Into + AsRef,
276 | U: Into + AsRef,
277 | {
278 | if self
279 | .0
280 | .iter()
281 | .any(|ns| ns.get(&prefix) == Some(uri.as_ref()))
282 | {
283 | false
284 | } else {
285 | self.put(prefix, uri);
286 | true
287 | }
288 | }
289 |
290 | /// Puts a mapping into the topmost namespace in this stack.
291 | ///
292 | /// This method does not override a mapping in the topmost namespace if it is
293 | /// already present, however, it does not depend on other namespaces in the stack,
294 | /// so it is possible to put a mapping which is present in lower namespaces.
295 | ///
296 | /// Returns a boolean flag indicating whether the insertion has completed successfully.
297 | ///
298 | /// # Parameters
299 | /// * `prefix` --- namespace prefix;
300 | /// * `uri` --- namespace URI.
301 | ///
302 | /// # Return value
303 | /// `true` if `prefix` has been inserted successfully; `false` if the `prefix`
304 | /// was already present in the namespace.
305 | #[inline]
306 | pub fn put(&mut self, prefix: P, uri: U) -> bool
307 | where
308 | P: Into,
309 | U: Into,
310 | {
311 | self.0.last_mut().unwrap().put(prefix, uri)
312 | }
313 |
314 | /// Performs a search for the given prefix in the whole stack.
315 | ///
316 | /// This method walks the stack from top to bottom, querying each namespace
317 | /// in order for the given prefix. If none of the namespaces contains the prefix,
318 | /// `None` is returned.
319 | ///
320 | /// # Parameters
321 | /// * `prefix` --- namespace prefix.
322 | #[inline]
323 | pub fn get<'a, P: ?Sized + AsRef>(&'a self, prefix: &P) -> Option<&'a str> {
324 | let prefix = prefix.as_ref();
325 | for ns in self.0.iter().rev() {
326 | match ns.get(prefix) {
327 | None => {}
328 | r => return r,
329 | }
330 | }
331 | None
332 | }
333 |
334 | /// Combines this stack of namespaces into a single namespace.
335 | ///
336 | /// Namespaces are combined in left-to-right order, that is, rightmost namespace
337 | /// elements take priority over leftmost ones.
338 | pub fn squash(&self) -> Namespace {
339 | let mut result = BTreeMap::new();
340 | for ns in self.0.iter() {
341 | result.extend(ns.0.iter().map(|(k, v)| (k.clone(), v.clone())));
342 | }
343 | Namespace(result)
344 | }
345 |
346 | /// Returns an object which implements `Extend` using `put_checked()` instead of `put()`.
347 | ///
348 | /// See `CheckedTarget` for more information.
349 | #[inline]
350 | pub fn checked_target(&mut self) -> CheckedTarget<'_> {
351 | CheckedTarget(self)
352 | }
353 |
354 | /// Returns an iterator over all mappings in this namespace stack.
355 | #[inline]
356 | pub fn iter(&self) -> NamespaceStackMappings<'_> {
357 | self.into_iter()
358 | }
359 | }
360 |
361 | /// An iterator over mappings from prefixes to URIs in a namespace stack.
362 | pub struct NamespaceStackMappings<'a> {
363 | namespaces: Rev>,
364 | current_namespace: Option>,
365 | used_keys: HashSet<&'a str>,
366 | }
367 |
368 | impl<'a> NamespaceStackMappings<'a> {
369 | fn go_to_next_namespace(&mut self) -> bool {
370 | self.current_namespace = self.namespaces.next().map(|ns| ns.into_iter());
371 | self.current_namespace.is_some()
372 | }
373 | }
374 |
375 | impl<'a> Iterator for NamespaceStackMappings<'a> {
376 | type Item = UriMapping<'a>;
377 |
378 | fn next(&mut self) -> Option> {
379 | // If there is no current namespace and no next namespace, we're finished
380 | if self.current_namespace.is_none() && !self.go_to_next_namespace() {
381 | return None;
382 | }
383 | let next_item = self.current_namespace.as_mut().unwrap().next();
384 |
385 | match next_item {
386 | // There is an element in the current namespace
387 | Some((k, v)) => {
388 | if self.used_keys.contains(&k) {
389 | // If the current key is used, go to the next one
390 | self.next()
391 | } else {
392 | // Otherwise insert the current key to the set of used keys and
393 | // return the mapping
394 | self.used_keys.insert(k);
395 | Some((k, v))
396 | }
397 | }
398 | // Current namespace is exhausted
399 | None => {
400 | if self.go_to_next_namespace() {
401 | // If there is next namespace, continue from it
402 | self.next()
403 | } else {
404 | // No next namespace, exiting
405 | None
406 | }
407 | }
408 | }
409 | }
410 | }
411 |
412 | impl<'a> IntoIterator for &'a NamespaceStack {
413 | type Item = UriMapping<'a>;
414 | type IntoIter = NamespaceStackMappings<'a>;
415 |
416 | fn into_iter(self) -> Self::IntoIter {
417 | NamespaceStackMappings {
418 | namespaces: self.0.iter().rev(),
419 | current_namespace: None,
420 | used_keys: HashSet::new(),
421 | }
422 | }
423 | }
424 |
425 | /// A type alias for a pair of `(prefix, uri)` values returned by namespace iterators.
426 | pub type UriMapping<'a> = (&'a str, &'a str);
427 |
428 | impl<'a> Extend> for Namespace {
429 | fn extend(&mut self, iterable: T)
430 | where
431 | T: IntoIterator- >,
432 | {
433 | for (prefix, uri) in iterable {
434 | self.put(prefix, uri);
435 | }
436 | }
437 | }
438 |
439 | impl<'a> Extend> for NamespaceStack {
440 | fn extend(&mut self, iterable: T)
441 | where
442 | T: IntoIterator
- >,
443 | {
444 | for (prefix, uri) in iterable {
445 | self.put(prefix, uri);
446 | }
447 | }
448 | }
449 |
450 | /// A wrapper around `NamespaceStack` which implements `Extend` using `put_checked()`.
451 | pub struct CheckedTarget<'a>(&'a mut NamespaceStack);
452 |
453 | impl<'a, 'b> Extend> for CheckedTarget<'a> {
454 | fn extend(&mut self, iterable: T)
455 | where
456 | T: IntoIterator
- >,
457 | {
458 | for (prefix, uri) in iterable {
459 | self.0.put_checked(prefix, uri);
460 | }
461 | }
462 | }
463 |
464 | #[test]
465 | fn test_namespace_iter() {
466 | let mut nst = NamespaceStack::empty();
467 | nst.push_empty();
468 | nst.put("a", "urn:A");
469 | nst.put("b", "urn:B");
470 | nst.push_empty();
471 | nst.put("c", "urn:C");
472 |
473 | assert_eq!(
474 | vec![("c", "urn:C"), ("a", "urn:A"), ("b", "urn:B")],
475 | nst.iter().collect::>()
476 | );
477 | }
478 |
479 | #[test]
480 | fn test_checked_target() {
481 | let mut nst = NamespaceStack::empty();
482 | nst.push_empty();
483 | nst.put("a", "urn:A");
484 | nst.put("b", "urn:B");
485 | nst.push_empty();
486 | nst.put("c", "urn:C");
487 |
488 | nst.checked_target().extend(vec![
489 | ("a", "urn:Z"),
490 | ("b", "urn:B"),
491 | ("c", "urn:Y"),
492 | ("d", "urn:D"),
493 | ]);
494 | assert_eq!(
495 | vec![
496 | ("a", "urn:Z"),
497 | ("c", "urn:C"),
498 | ("d", "urn:D"),
499 | ("b", "urn:B")
500 | ],
501 | nst.iter().collect::>()
502 | );
503 |
504 | let mut nst = NamespaceStack::empty();
505 | nst.push_empty();
506 | nst.put("a", "urn:A");
507 | nst.put("b", "urn:B");
508 | nst.push_empty();
509 | nst.put("c", "urn:C");
510 |
511 | nst.extend(vec![
512 | ("a", "urn:Z"),
513 | ("b", "urn:B"),
514 | ("c", "urn:Y"),
515 | ("d", "urn:D"),
516 | ]);
517 | assert_eq!(
518 | vec![
519 | ("a", "urn:Z"),
520 | ("b", "urn:B"),
521 | ("c", "urn:C"),
522 | ("d", "urn:D")
523 | ],
524 | nst.iter().collect::>()
525 | );
526 | }
527 |
--------------------------------------------------------------------------------
/src/xml/reader/parser/mod.rs:
--------------------------------------------------------------------------------
1 | //! Contains an implementation of pull-based XML parser.
2 |
3 | use std::borrow::Cow;
4 | use std::io::prelude::*;
5 | use std::mem;
6 |
7 | use crate::xml::attribute::OwnedAttribute;
8 | use crate::xml::common::{
9 | self, is_name_char, is_name_start_char, Position, TextPosition, XmlVersion,
10 | };
11 | use crate::xml::name::OwnedName;
12 | use crate::xml::namespace::NamespaceStack;
13 | use crate::xml::reader::config::ParserConfig;
14 | use crate::xml::reader::events::XmlEvent;
15 | use crate::xml::reader::lexer::{Lexer, Token};
16 |
17 | macro_rules! gen_takes(
18 | ($($field:ident -> $method:ident, $t:ty);+) => (
19 | $(
20 | impl MarkupData {
21 | #[inline]
22 | fn $method(&mut self) -> $t {
23 | mem::take(&mut self.$field)
24 | }
25 | }
26 | )+
27 | )
28 | );
29 |
30 | gen_takes!(
31 | name -> take_name, String;
32 | ref_data -> take_ref_data, String;
33 |
34 | version -> take_version, Option;
35 | encoding -> take_encoding, Option;
36 | standalone -> take_standalone, Option;
37 |
38 | element_name -> take_element_name, Option;
39 |
40 | attr_name -> take_attr_name, Option;
41 | attributes -> take_attributes, Vec
42 | );
43 |
44 | macro_rules! self_error(
45 | ($this:ident; $msg:expr) => ($this.error($msg));
46 | ($this:ident; $fmt:expr, $($arg:expr),+) => ($this.error(format!($fmt, $($arg),+)))
47 | );
48 |
49 | mod inside_cdata;
50 | mod inside_closing_tag_name;
51 | mod inside_comment;
52 | mod inside_declaration;
53 | mod inside_doctype;
54 | mod inside_opening_tag;
55 | mod inside_processing_instruction;
56 | mod inside_reference;
57 | mod outside_tag;
58 |
59 | static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10;
60 | static DEFAULT_ENCODING: &str = "UTF-8";
61 | static DEFAULT_STANDALONE: Option = None;
62 |
63 | type ElementStack = Vec;
64 | pub type ParserOutcome = super::Outcome;
65 |
66 | /// Pull-based XML parser.
67 | pub struct PullParser {
68 | config: ParserConfig,
69 | lexer: Lexer,
70 | st: State,
71 | buf: String,
72 | nst: NamespaceStack,
73 |
74 | data: MarkupData,
75 | final_result: Option,
76 | next_event: Option,
77 | est: ElementStack,
78 | pos: Vec,
79 |
80 | encountered_element: bool,
81 | parsed_declaration: bool,
82 | inside_whitespace: bool,
83 | read_prefix_separator: bool,
84 | pop_namespace: bool,
85 | }
86 |
87 | impl PullParser {
88 | /// Returns a new parser using the given config.
89 | pub fn new(config: ParserConfig) -> PullParser {
90 | PullParser {
91 | config,
92 | lexer: Lexer::new(),
93 | st: State::OutsideTag,
94 | buf: String::new(),
95 | nst: NamespaceStack::default(),
96 |
97 | data: MarkupData {
98 | name: String::new(),
99 | version: None,
100 | encoding: None,
101 | standalone: None,
102 | ref_data: String::new(),
103 | element_name: None,
104 | quote: None,
105 | attr_name: None,
106 | attributes: Vec::new(),
107 | },
108 | final_result: None,
109 | next_event: None,
110 | est: Vec::new(),
111 | pos: vec![TextPosition::new()],
112 |
113 | encountered_element: false,
114 | parsed_declaration: false,
115 | inside_whitespace: true,
116 | read_prefix_separator: false,
117 | pop_namespace: false,
118 | }
119 | }
120 | }
121 |
122 | impl Position for PullParser {
123 | /// Returns the position of the last event produced by the parser
124 | #[inline]
125 | fn position(&self) -> TextPosition {
126 | self.pos[0]
127 | }
128 | }
129 |
130 | #[derive(Clone, PartialEq)]
131 | pub enum State {
132 | OutsideTag,
133 | InsideOpeningTag(OpeningTagSubstate),
134 | InsideClosingTag(ClosingTagSubstate),
135 | InsideProcessingInstruction(ProcessingInstructionSubstate),
136 | InsideComment,
137 | InsideCData,
138 | InsideDeclaration(DeclarationSubstate),
139 | InsideDoctype,
140 | InsideReference(Box),
141 | }
142 |
143 | #[derive(Clone, Eq, PartialEq)]
144 | pub enum OpeningTagSubstate {
145 | InsideName,
146 |
147 | InsideTag,
148 |
149 | InsideAttributeName,
150 | AfterAttributeName,
151 |
152 | InsideAttributeValue,
153 | }
154 |
155 | #[derive(Clone, Eq, PartialEq)]
156 | pub enum ClosingTagSubstate {
157 | CTInsideName,
158 | CTAfterName,
159 | }
160 |
161 | #[derive(Clone, Eq, PartialEq)]
162 | pub enum ProcessingInstructionSubstate {
163 | PIInsideName,
164 | PIInsideData,
165 | }
166 |
167 | #[derive(Clone, Eq, PartialEq)]
168 | pub enum DeclarationSubstate {
169 | BeforeVersion,
170 | InsideVersion,
171 | AfterVersion,
172 |
173 | InsideVersionValue,
174 | AfterVersionValue,
175 |
176 | InsideEncoding,
177 | AfterEncoding,
178 |
179 | InsideEncodingValue,
180 |
181 | BeforeStandaloneDecl,
182 | InsideStandaloneDecl,
183 | AfterStandaloneDecl,
184 |
185 | InsideStandaloneDeclValue,
186 | AfterStandaloneDeclValue,
187 | }
188 |
189 | #[derive(PartialEq)]
190 | enum QualifiedNameTarget {
191 | Attribute,
192 | OpeningTag,
193 | ClosingTag,
194 | }
195 |
196 | #[derive(Copy, Clone, PartialEq, Eq)]
197 | enum QuoteToken {
198 | SingleQuoteToken,
199 | DoubleQuoteToken,
200 | }
201 |
202 | impl QuoteToken {
203 | fn from_token(t: &Token) -> QuoteToken {
204 | match *t {
205 | Token::SingleQuote => QuoteToken::SingleQuoteToken,
206 | Token::DoubleQuote => QuoteToken::DoubleQuoteToken,
207 | _ => panic!("Unexpected token: {}", t),
208 | }
209 | }
210 |
211 | fn as_token(self) -> Token {
212 | match self {
213 | QuoteToken::SingleQuoteToken => Token::SingleQuote,
214 | QuoteToken::DoubleQuoteToken => Token::DoubleQuote,
215 | }
216 | }
217 | }
218 |
219 | struct MarkupData {
220 | name: String, // used for processing instruction name
221 | ref_data: String, // used for reference content
222 |
223 | version: Option, // used for XML declaration version
224 | encoding: Option, // used for XML declaration encoding
225 | standalone: Option, // used for XML declaration standalone parameter
226 |
227 | element_name: Option, // used for element name
228 |
229 | quote: Option, // used to hold opening quote for attribute value
230 | attr_name: Option, // used to hold attribute name
231 | attributes: Vec, // used to hold all accumulated attributes
232 | }
233 |
234 | impl PullParser {
235 | /// Returns next event read from the given buffer.
236 | ///
237 | /// This method should be always called with the same buffer. If you call it
238 | /// providing different buffers each time, the result will be undefined.
239 | pub fn next(&mut self, r: &mut R) -> ParserOutcome {
240 | if let Some(ref ev) = self.final_result {
241 | return ev.clone();
242 | }
243 |
244 | if let Some(ev) = self.next_event.take() {
245 | return ev;
246 | }
247 |
248 | if self.pop_namespace {
249 | self.pop_namespace = false;
250 | self.nst.pop();
251 | }
252 |
253 | loop {
254 | // While lexer gives us Ok(maybe_token) -- we loop.
255 | // Upon having a complete XML-event -- we return from the whole function.
256 | match self.lexer.next_token(r) {
257 | Ok(maybe_token) => match maybe_token {
258 | None => break,
259 | Some(token) => match self.dispatch_token(token) {
260 | None => {} // continue
261 | Some(Ok(XmlEvent::EndDocument)) => {
262 | return {
263 | self.next_pos();
264 | self.set_final_result(Ok(XmlEvent::EndDocument))
265 | }
266 | }
267 | Some(Ok(xml_event)) => {
268 | return {
269 | self.next_pos();
270 | Ok(xml_event)
271 | }
272 | }
273 | Some(Err(xml_error)) => {
274 | return {
275 | self.next_pos();
276 | self.set_final_result(Err(xml_error))
277 | }
278 | }
279 | },
280 | },
281 | Err(lexer_error) => return self.set_final_result(Err(lexer_error)),
282 | }
283 | }
284 |
285 | // Handle end of stream
286 | // Forward pos to the lexer head
287 | self.next_pos();
288 | let ev = if self.depth() == 0 {
289 | if self.encountered_element && self.st == State::OutsideTag {
290 | // all is ok
291 | Ok(XmlEvent::EndDocument)
292 | } else if !self.encountered_element {
293 | self_error!(self; "Unexpected end of stream: no root element found")
294 | } else {
295 | // self.st != State::OutsideTag
296 | self_error!(self; "Unexpected end of stream") // TODO: add expected hint?
297 | }
298 | } else {
299 | self_error!(self; "Unexpected end of stream: still inside the root element")
300 | };
301 | self.set_final_result(ev)
302 | }
303 |
304 | // This function is to be called when a terminal event is reached.
305 | // The function sets up the `self.final_result` into `Some(result)` and return `result`.
306 | fn set_final_result(&mut self, result: ParserOutcome) -> ParserOutcome {
307 | self.final_result = Some(result.clone());
308 | result
309 | }
310 |
311 | #[inline]
312 | fn error>>(&self, msg: M) -> ParserOutcome {
313 | Err((&self.lexer, msg).into())
314 | }
315 |
316 | #[inline]
317 | fn next_pos(&mut self) {
318 | if self.pos.len() > 1 {
319 | self.pos.remove(0);
320 | } else {
321 | self.pos[0] = self.lexer.position();
322 | }
323 | }
324 |
325 | #[inline]
326 | fn push_pos(&mut self) {
327 | self.pos.push(self.lexer.position());
328 | }
329 |
330 | fn dispatch_token(&mut self, t: Token) -> Option {
331 | match self.st.clone() {
332 | State::OutsideTag => self.outside_tag(t),
333 | State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s),
334 | State::InsideDeclaration(s) => self.inside_declaration(t, s),
335 | State::InsideDoctype => self.inside_doctype(t),
336 | State::InsideOpeningTag(s) => self.inside_opening_tag(t, s),
337 | State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s),
338 | State::InsideComment => self.inside_comment(t),
339 | State::InsideCData => self.inside_cdata(t),
340 | State::InsideReference(s) => self.inside_reference(t, *s),
341 | }
342 | }
343 |
344 | #[inline]
345 | fn depth(&self) -> usize {
346 | self.est.len()
347 | }
348 |
349 | #[inline]
350 | fn buf_has_data(&self) -> bool {
351 | !self.buf.is_empty()
352 | }
353 |
354 | #[inline]
355 | fn take_buf(&mut self) -> String {
356 | std::mem::take(&mut self.buf)
357 | }
358 |
359 | #[inline]
360 | fn append_char_continue(&mut self, c: char) -> Option {
361 | self.buf.push(c);
362 | None
363 | }
364 |
365 | #[inline]
366 | fn as_state(&mut self, st: State, ev: Option) -> Option {
367 | self.st = st;
368 | ev
369 | }
370 |
371 | #[inline]
372 | fn as_state_continue(&mut self, st: State) -> Option {
373 | self.as_state(st, None)
374 | }
375 |
376 | #[inline]
377 | fn as_state_emit(&mut self, st: State, ev: ParserOutcome) -> Option {
378 | self.as_state(st, Some(ev))
379 | }
380 |
381 | /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed,
382 | /// an error is returned.
383 | ///
384 | /// # Parameters
385 | /// * `t` --- next token;
386 | /// * `on_name` --- a callback which is executed when whitespace is encountered.
387 | fn read_qualified_name(
388 | &mut self,
389 | t: Token,
390 | target: QualifiedNameTarget,
391 | on_name: F,
392 | ) -> Option
393 | where
394 | F: Fn(&mut PullParser, Token, OwnedName) -> Option,
395 | {
396 | // We can get here for the first time only when self.data.name contains zero or one character,
397 | // but first character cannot be a colon anyway
398 | if self.buf.len() <= 1 {
399 | self.read_prefix_separator = false;
400 | }
401 |
402 | let invoke_callback = |this: &mut PullParser, t| {
403 | let name = this.take_buf();
404 | match name.parse() {
405 | Ok(name) => on_name(this, t, name),
406 | Err(_) => Some(self_error!(this; "Qualified name is invalid: {}", name)),
407 | }
408 | };
409 |
410 | match t {
411 | // There can be only one colon, and not as the first character
412 | Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => {
413 | self.buf.push(':');
414 | self.read_prefix_separator = true;
415 | None
416 | }
417 |
418 | Token::Character(c)
419 | if c != ':'
420 | && (!self.buf_has_data() && is_name_start_char(c)
421 | || self.buf_has_data() && is_name_char(c)) =>
422 | {
423 | self.append_char_continue(c)
424 | }
425 |
426 | Token::EqualsSign if target == QualifiedNameTarget::Attribute => {
427 | invoke_callback(self, t)
428 | }
429 |
430 | Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTag => {
431 | invoke_callback(self, t)
432 | }
433 |
434 | Token::TagEnd
435 | if target == QualifiedNameTarget::OpeningTag
436 | || target == QualifiedNameTarget::ClosingTag =>
437 | {
438 | invoke_callback(self, t)
439 | }
440 |
441 | Token::Whitespace(_) => invoke_callback(self, t),
442 |
443 | _ => Some(self_error!(self; "Unexpected token inside qualified name: {}", t)),
444 | }
445 | }
446 |
447 | /// Dispatches tokens in order to process attribute value.
448 | ///
449 | /// # Parameters
450 | /// * `t` --- next token;
451 | /// * `on_value` --- a callback which is called when terminating quote is encountered.
452 | fn read_attribute_value(&mut self, t: Token, on_value: F) -> Option
453 | where
454 | F: Fn(&mut PullParser, String) -> Option,
455 | {
456 | match t {
457 | Token::Whitespace(_) if self.data.quote.is_none() => None, // skip leading whitespace
458 |
459 | Token::DoubleQuote | Token::SingleQuote => match self.data.quote {
460 | None => {
461 | // Entered attribute value
462 | self.data.quote = Some(QuoteToken::from_token(&t));
463 | None
464 | }
465 | Some(q) if q.as_token() == t => {
466 | self.data.quote = None;
467 | let value = self.take_buf();
468 | on_value(self, value)
469 | }
470 | _ => {
471 | t.push_to_string(&mut self.buf);
472 | None
473 | }
474 | },
475 |
476 | Token::ReferenceStart => {
477 | let st = Box::new(self.st.clone());
478 | self.as_state_continue(State::InsideReference(st))
479 | }
480 |
481 | Token::OpeningTagStart => {
482 | Some(self_error!(self; "Unexpected token inside attribute value: <"))
483 | }
484 |
485 | // Every character except " and ' and < is okay
486 | _ => {
487 | t.push_to_string(&mut self.buf);
488 | None
489 | }
490 | }
491 | }
492 |
493 | fn emit_start_element(&mut self, emit_end_element: bool) -> Option {
494 | let mut name = self.data.take_element_name().unwrap();
495 | let mut attributes = self.data.take_attributes();
496 |
497 | // check whether the name prefix is bound and fix its namespace
498 | match self.nst.get(name.borrow().prefix_repr()) {
499 | Some("") => name.namespace = None, // default namespace
500 | Some(ns) => name.namespace = Some(ns.into()),
501 | None => return Some(self_error!(self; "Element {} prefix is unbound", name)),
502 | }
503 |
504 | // check and fix accumulated attributes prefixes
505 | for attr in attributes.iter_mut() {
506 | if let Some(ref pfx) = attr.name.prefix {
507 | let new_ns = match self.nst.get(pfx) {
508 | Some("") => None, // default namespace
509 | Some(ns) => Some(ns.into()),
510 | None => {
511 | return Some(self_error!(self; "Attribute {} prefix is unbound", attr.name))
512 | }
513 | };
514 | attr.name.namespace = new_ns;
515 | }
516 | }
517 |
518 | if emit_end_element {
519 | self.pop_namespace = true;
520 | self.next_event = Some(Ok(XmlEvent::EndElement { name: name.clone() }));
521 | } else {
522 | self.est.push(name.clone());
523 | }
524 | let namespace = self.nst.squash();
525 | self.as_state_emit(
526 | State::OutsideTag,
527 | Ok(XmlEvent::StartElement {
528 | name,
529 | attributes,
530 | namespace,
531 | }),
532 | )
533 | }
534 |
535 | fn emit_end_element(&mut self) -> Option {
536 | let mut name = self.data.take_element_name().unwrap();
537 |
538 | // check whether the name prefix is bound and fix its namespace
539 | match self.nst.get(name.borrow().prefix_repr()) {
540 | Some("") => name.namespace = None, // default namespace
541 | Some(ns) => name.namespace = Some(ns.into()),
542 | None => return Some(self_error!(self; "Element {} prefix is unbound", name)),
543 | }
544 |
545 | let op_name = self.est.pop().unwrap();
546 |
547 | if name == op_name {
548 | self.pop_namespace = true;
549 | self.as_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name }))
550 | } else {
551 | Some(self_error!(self; "Unexpected closing tag: {}, expected {}", name, op_name))
552 | }
553 | }
554 | }
555 |
556 | #[cfg(test)]
557 | mod tests {
558 | use std::io::BufReader;
559 |
560 | use crate::xml::attribute::OwnedAttribute;
561 | use crate::xml::common::{Position, TextPosition};
562 | use crate::xml::name::OwnedName;
563 | use crate::xml::reader::events::XmlEvent;
564 | use crate::xml::reader::parser::PullParser;
565 | use crate::xml::reader::ParserConfig;
566 |
567 | fn new_parser() -> PullParser {
568 | PullParser::new(ParserConfig::new())
569 | }
570 |
571 | macro_rules! expect_event(
572 | ($r:expr, $p:expr, $t:pat) => (
573 | match $p.next(&mut $r) {
574 | $t => {}
575 | e => panic!("Unexpected event: {:?}", e)
576 | }
577 | );
578 | ($r:expr, $p:expr, $t:pat => $c:expr ) => (
579 | match $p.next(&mut $r) {
580 | $t if $c => {}
581 | e => panic!("Unexpected event: {:?}", e)
582 | }
583 | )
584 | );
585 |
586 | macro_rules! test_data(
587 | ($d:expr) => ({
588 | static DATA: &'static str = $d;
589 | let r = BufReader::new(DATA.as_bytes());
590 | let p = new_parser();
591 | (r, p)
592 | })
593 | );
594 |
595 | #[test]
596 | fn issue_3_semicolon_in_attribute_value() {
597 | let (mut r, mut p) = test_data!(
598 | r#"
599 |
600 | "#
601 | );
602 |
603 | expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
604 | expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) =>
605 | *name == OwnedName::local("a") &&
606 | attributes.len() == 1 &&
607 | attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") &&
608 | namespace.is_essentially_empty()
609 | );
610 | expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a"));
611 | expect_event!(r, p, Ok(XmlEvent::EndDocument));
612 | }
613 |
614 | #[test]
615 | fn issue_140_entity_reference_inside_tag() {
616 | let (mut r, mut p) = test_data!(
617 | r#"
618 | ♫
619 | "#
620 | );
621 |
622 | expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
623 | expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla"));
624 | expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}");
625 | expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla"));
626 | expect_event!(r, p, Ok(XmlEvent::EndDocument));
627 | }
628 |
629 | #[test]
630 | fn opening_tag_in_attribute_value() {
631 | let (mut r, mut p) = test_data!(
632 | r#"
633 |
634 | "#
635 | );
636 |
637 | expect_event!(r, p, Ok(XmlEvent::StartDocument { .. }));
638 | expect_event!(r, p, Err(ref e) =>
639 | e.msg() == "Unexpected token inside attribute value: <" &&
640 | e.position() == TextPosition { row: 1, column: 24 }
641 | );
642 | }
643 | }
644 |
--------------------------------------------------------------------------------