├── .gitignore
├── architecture.png
├── XPathParser
├── XPathParser.snk
├── XPathOperator.cs
├── XPathAxis.cs
├── XPathParser.csproj
├── IXpathBuilder.cs
├── XPathParserException.cs
├── XPathScanner.cs
└── XPathParser.cs
├── appveyor.yml
├── version.json
├── XPathParserTest
├── XPathParserTest.csproj
├── XPathTreeBuilder.cs
├── Test.cs
└── XPathStringBuilder.cs
├── XPathParser.sln
├── LICENSE.txt
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | *.lock.json
2 | .vs/
3 | bin/
4 | obj/
--------------------------------------------------------------------------------
/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quamotion/XPathParser/HEAD/architecture.png
--------------------------------------------------------------------------------
/XPathParser/XPathParser.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/quamotion/XPathParser/HEAD/XPathParser/XPathParser.snk
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | build_script:
2 | - cmd: dotnet restore
3 | - cmd: cd XPathParserTest
4 | - cmd: dotnet test
5 | - cmd: cd ..\XPathParser
6 | - cmd: dotnet build -c Release
7 | - cmd: dotnet pack -c Release
8 |
9 | on_success:
10 | - ps: Push-AppveyorArtifact "bin\Release\XPathParser.*.nupkg"
--------------------------------------------------------------------------------
/version.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://raw.githubusercontent.com/AArnott/Nerdbank.GitVersioning/master/src/NerdBank.GitVersioning/version.schema.json",
3 | "version": "1.2",
4 | "publicReleaseRefSpec": [
5 | "^refs/heads/master$", // we release out of master
6 | "^refs/tags/v\\d+\\.\\d+" // we also release tags starting with vN.N
7 | ],
8 | }
--------------------------------------------------------------------------------
/XPathParser/XPathOperator.cs:
--------------------------------------------------------------------------------
1 | namespace CodePlex.XPathParser {
2 | public enum XPathOperator {
3 | Unknown = 0,
4 | Or,
5 | And,
6 | Eq,
7 | Ne,
8 | Lt,
9 | Le,
10 | Gt,
11 | Ge,
12 | Plus,
13 | Minus,
14 | Multiply,
15 | Divide,
16 | Modulo,
17 | UnaryMinus,
18 | Union
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/XPathParser/XPathAxis.cs:
--------------------------------------------------------------------------------
1 | namespace CodePlex.XPathParser {
2 | public enum XPathAxis {
3 | Unknown = 0,
4 | Ancestor ,
5 | AncestorOrSelf ,
6 | Attribute ,
7 | Child ,
8 | Descendant ,
9 | DescendantOrSelf,
10 | Following ,
11 | FollowingSibling,
12 | Namespace ,
13 | Parent ,
14 | Preceding ,
15 | PrecedingSibling,
16 | Self ,
17 | Root ,
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/XPathParserTest/XPathParserTest.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netcoreapp2.2
5 |
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/XPathParser/XPathParser.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net40;net45;netstandard2.0
5 | Sergey Dubinets
6 | .NET XPath Parser
7 | XPath parser in C# source code.
8 | https://github.com/quamotion/XPathParser/blob/master/LICENSE.txt
9 | true
10 | XPathParser.snk
11 | true
12 | $(AllowedOutputExtensionsInPackageBuildOutputFolder);.pdb
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/XPathParser/IXpathBuilder.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Xml.XPath;
4 |
5 | namespace CodePlex.XPathParser {
6 | public interface IXPathBuilder {
7 | // Should be called once per build
8 | void StartBuild();
9 |
10 | // Should be called after build for result tree post-processing
11 | Node EndBuild(Node result);
12 |
13 | Node String(string value);
14 |
15 | Node Number(string value);
16 |
17 | Node Operator(XPathOperator op, Node left, Node right);
18 |
19 | Node Axis(XPathAxis xpathAxis, XPathNodeType nodeType, string prefix, string name);
20 |
21 | Node JoinStep(Node left, Node right);
22 |
23 | // http://www.w3.org/TR/xquery-semantics/#id-axis-steps
24 | // reverseStep is how parser comunicates to builder diference between "ansestor[1]" and "(ansestor)[1]"
25 | Node Predicate(Node node, Node condition, bool reverseStep);
26 |
27 | Node Variable(string prefix, string name);
28 |
29 | Node Function(string prefix, string name, IList args);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/XPathParser.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 14
4 | VisualStudioVersion = 14.0.25123.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "XPathParser", "XPathParser\XPathParser.csproj", "{A8072758-E8D2-4551-89C2-564BDE162403}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "XPathParserTest", "XPathParserTest\XPathParserTest.csproj", "{6706ED44-C0D9-45A0-A911-80B13866E4E2}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {A8072758-E8D2-4551-89C2-564BDE162403}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {A8072758-E8D2-4551-89C2-564BDE162403}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {A8072758-E8D2-4551-89C2-564BDE162403}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {A8072758-E8D2-4551-89C2-564BDE162403}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | EndGlobal
29 |
--------------------------------------------------------------------------------
/XPathParserTest/XPathTreeBuilder.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 | using System.Xml.Linq;
5 | using CodePlex.XPathParser;
6 |
7 | namespace XPathParserTest {
8 | class XPathTreeBuilder : IXPathBuilder {
9 |
10 | public void StartBuild() {}
11 |
12 | public XElement EndBuild(XElement result) {
13 | return result;
14 | }
15 |
16 | public XElement String(string value) {
17 | return new XElement("string", new XAttribute("value", value));
18 | }
19 |
20 | public XElement Number(string value) {
21 | return new XElement("number", new XAttribute("value", value));
22 | }
23 |
24 | public XElement Operator(XPathOperator op, XElement left, XElement right) {
25 | if (op == XPathOperator.UnaryMinus) {
26 | return new XElement("negate", left);
27 | }
28 | return new XElement(op.ToString(), left, right);
29 | }
30 |
31 | public XElement Axis(XPathAxis xpathAxis, System.Xml.XPath.XPathNodeType nodeType, string prefix, string name) {
32 | return new XElement(xpathAxis.ToString(),
33 | new XAttribute("nodeTyepe", nodeType.ToString()),
34 | new XAttribute("prefix" , prefix??"(null)"),
35 | new XAttribute("name" , name??"(null)")
36 | );
37 | }
38 |
39 | public XElement JoinStep(XElement left, XElement right) {
40 | return new XElement("step", left, right);
41 | }
42 |
43 | public XElement Predicate(XElement node, XElement condition, bool reverseStep) {
44 | return new XElement("predicate", new XAttribute("reverse", reverseStep),
45 | node, condition
46 | );
47 | }
48 |
49 | public XElement Variable(string prefix, string name) {
50 | return new XElement("variable",
51 | new XAttribute("prefix", prefix ?? "(null)"),
52 | new XAttribute("name", name ?? "(null)")
53 | );
54 | }
55 |
56 | public XElement Function(string prefix, string name, IList args) {
57 | XElement xe = new XElement("variable",
58 | new XAttribute("prefix", prefix ?? "(null)"),
59 | new XAttribute("name", name ?? "(null)")
60 | );
61 | foreach (XElement e in args) {
62 | xe.Add(e);
63 | }
64 | return xe;
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Microsoft Public License (Ms-PL)
2 |
3 | This license governs use of the accompanying software. If you use the software, you accept this license. If you do not accept the license, do not use the software.
4 |
5 | 1. Definitions
6 |
7 | The terms "reproduce," "reproduction," "derivative works," and "distribution" have the same meaning here as under U.S. copyright law.
8 |
9 | A "contribution" is the original software, or any additions or changes to the software.
10 |
11 | A "contributor" is any person that distributes its contribution under this license.
12 |
13 | "Licensed patents" are a contributor's patent claims that read directly on its contribution.
14 |
15 | 2. Grant of Rights
16 |
17 | (A) Copyright Grant- Subject to the terms of this license, including the license conditions and limitations in section 3, each contributor grants you a non-exclusive, worldwide, royalty-free copyright license to reproduce its contribution, prepare derivative works of its contribution, and distribute its contribution or any derivative works that you create.
18 |
19 | (B) Patent Grant- Subject to the terms of this license, including the license conditions and limitations in section 3, each contributor grants you a non-exclusive, worldwide, royalty-free license under its licensed patents to make, have made, use, sell, offer for sale, import, and/or otherwise dispose of its contribution in the software or derivative works of the contribution in the software.
20 |
21 | 3. Conditions and Limitations
22 |
23 | (A) No Trademark License- This license does not grant you rights to use any contributors' name, logo, or trademarks.
24 |
25 | (B) If you bring a patent claim against any contributor over patents that you claim are infringed by the software, your patent license from such contributor to the software ends automatically.
26 |
27 | (C) If you distribute any portion of the software, you must retain all copyright, patent, trademark, and attribution notices that are present in the software.
28 |
29 | (D) If you distribute any portion of the software in source code form, you may do so only under this license by including a complete copy of this license with your distribution. If you distribute any portion of the software in compiled or object code form, you may only do so under a license that complies with this license.
30 |
31 | (E) The software is licensed "as-is." You bear the risk of using it. The contributors give no express warranties, guarantees or conditions. You may have additional consumer rights under your local laws which this license cannot change. To the extent permitted under your local laws, the contributors exclude the implied warranties of merchantability, fitness for a particular purpose and non-infringement.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # XPathParser: XPath parser in C# source code.
2 | [](https://ci.appveyor.com/project/qmfrederik/xpathparser)
3 |
4 | Close to one System.Xml uses in the XslCompiledTransform.
5 | Currently supports XPath 1.0 grammar.
6 |
7 | This implementation uses _Builder_ pattern to separate parsing code from result the parser should produce.
8 |
9 | While parsing the source XPath parser calls method in the methods of `IXPathBuilder` interface provided by caller.
10 |
11 | User of this code expected to write his own `IXPathBuilder` implementation.
12 | Source code of this project contains two `IXPathBuilder` implementations for demo/testing purpose:
13 | * `XPathTreeBuilder` - constructs XLinq tree that represents XPath syntax tree.
14 | * `XPathStringBuilder` - compiles syntax tree back to XPaht string.
15 |
16 | ## Installation
17 |
18 | Install using the command line:
19 |
20 | ```
21 | Install-Package XPathParser
22 | ```
23 |
24 | ## Architecture
25 | XPath is a language to query data from XML documents. It is built in to XSLT and can be used standalone in several .NET APIs. (http://www.w3.org/TR/xpath)
26 | In some cases customers need to parse XPath expressions themselves to analyze, modify or validate them.
27 | `XPathParser` is the class that can help you doing this.
28 |
29 | To make parser extensible `XPathParser` uses "Builder" pattern. It takes string with XPath expression and instance of `IXPathBuilder` interface (builder) as input and generates set of calls to the builder.
30 |
31 | This way implementation of the `XPathParser` doesn't dictate how parsed expression would be represented.
32 |
33 | With the `XPathParser` we provide `XPathParserTest` that contains two sample implements of the `IXPathBuilder`: `XPathTreeBuilder` which builds XLinq tree as a result of parsing and `XPathStringBuilder` that generates string implementation of the compiled XPath.
34 |
35 | The work of `XPathParser` can be demonstrated in the following diagram:
36 |
37 | 
38 |
39 | ## Examples
40 |
41 | ### Expression "1 + 2":
42 | ```
43 | ctx = StartBuild();
44 | return EndBuild(Operator(XPathOperator.Plus, Number(1), Number(2)))
45 | ```
46 |
47 | ### Expression "a/@*":
48 |
49 | ```
50 | ctx = StartBuild();
51 | stp1 = Axis(ctx, XPathAxis.Child, QilXmlNodeKind.Element, "", "a");
52 | stp2 = Axis(stp1, XPathAxis.Attribute, QilXmlNodeKind.Attribute, "", "");
53 | return EndBuild(JoinStep(stp1, stp2));
54 | ```
55 |
56 | ### Expression "parent:a[@b]":
57 |
58 | ctx = StartBuild();
59 | stp1 = Axis(ctx, XPathAxis.Parent, QilXmlNodeKind.Element, "", "a");
60 | stp2 = Axis(stp1, XPathAxis.Attribute, QilXmlNodeKind.Attribute, "", "b");
61 | return EndBuild(Predicate(stp1, stp2));
62 |
63 | ## Credits
64 | This repository was forked from http://xpathparser.codeplex.com/
--------------------------------------------------------------------------------
/XPathParser/XPathParserException.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Text;
3 |
4 | namespace CodePlex.XPathParser
5 | {
6 | public class XPathParserException : System.Exception {
7 | public string queryString;
8 | public int startChar;
9 | public int endChar;
10 |
11 | public XPathParserException(string queryString, int startChar, int endChar, string message) : base(message) {
12 | this.queryString = queryString;
13 | this.startChar = startChar;
14 | this.endChar = endChar;
15 | }
16 |
17 | private enum TrimType {
18 | Left,
19 | Right,
20 | Middle,
21 | }
22 |
23 | // This function is used to prevent long quotations in error messages
24 | private static void AppendTrimmed(StringBuilder sb, string value, int startIndex, int count, TrimType trimType) {
25 | const int TrimSize = 32;
26 | const string TrimMarker = "...";
27 |
28 | if (count <= TrimSize) {
29 | sb.Append(value, startIndex, count);
30 | } else {
31 | switch (trimType) {
32 | case TrimType.Left:
33 | sb.Append(TrimMarker);
34 | sb.Append(value, startIndex + count - TrimSize, TrimSize);
35 | break;
36 | case TrimType.Right:
37 | sb.Append(value, startIndex, TrimSize);
38 | sb.Append(TrimMarker);
39 | break;
40 | case TrimType.Middle:
41 | sb.Append(value, startIndex, TrimSize / 2);
42 | sb.Append(TrimMarker);
43 | sb.Append(value, startIndex + count - TrimSize / 2, TrimSize / 2);
44 | break;
45 | }
46 | }
47 | }
48 |
49 | internal string MarkOutError() {
50 | if (queryString == null || queryString.Trim(' ').Length == 0) {
51 | return null;
52 | }
53 |
54 | int len = endChar - startChar;
55 | StringBuilder sb = new StringBuilder();
56 |
57 | AppendTrimmed(sb, queryString, 0, startChar, TrimType.Left);
58 | if (len > 0) {
59 | sb.Append(" -->");
60 | AppendTrimmed(sb, queryString, startChar, len, TrimType.Middle);
61 | }
62 |
63 | sb.Append("<-- ");
64 | AppendTrimmed(sb, queryString, endChar, queryString.Length - endChar, TrimType.Right);
65 |
66 | return sb.ToString();
67 | }
68 |
69 |
70 | private string FormatDetailedMessage() {
71 | string message = Message;
72 | string error = MarkOutError();
73 |
74 | if (error != null && error.Length > 0) {
75 | if (message.Length > 0) {
76 | message += Environment.NewLine;
77 | }
78 | message += error;
79 | }
80 | return message;
81 | }
82 |
83 | public override string ToString() {
84 | string result = this.GetType().FullName;
85 | string info = FormatDetailedMessage();
86 | if (info != null && info.Length > 0) {
87 | result += ": " + info;
88 | }
89 | if (StackTrace != null) {
90 | result += Environment.NewLine + StackTrace;
91 | }
92 | return result;
93 | }
94 |
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/XPathParserTest/Test.cs:
--------------------------------------------------------------------------------
1 | using CodePlex.XPathParser;
2 | using System;
3 | using System.Diagnostics;
4 | using System.Xml;
5 | using System.Xml.Linq;
6 | using Xunit;
7 |
8 | namespace XPathParserTest
9 | {
10 | public class Test
11 | {
12 | // Expressions from http://www.w3.org/TR/xpath#location-paths
13 | [InlineData(@"child::para")]
14 | [InlineData(@"child::*")]
15 | [InlineData(@"child::text()")]
16 | [InlineData(@"child::node()")]
17 | [InlineData(@"attribute::name")]
18 | [InlineData(@"attribute::*")]
19 | [InlineData(@"descendant::para")]
20 | [InlineData(@"ancestor::div")]
21 | [InlineData(@"ancestor-or-self::div")]
22 | [InlineData(@"descendant-or-self::para")]
23 | [InlineData(@"self::para")]
24 | [InlineData(@"child::chapter/descendant::para")]
25 | [InlineData(@"child::*/child::para")]
26 | [InlineData(@"/")]
27 | [InlineData(@"/descendant::para")]
28 | [InlineData(@"/descendant::olist/child::item")]
29 | [InlineData(@"child::para[position()=1]")]
30 | [InlineData(@"child::para[position()=last()]")]
31 | [InlineData(@"child::para[position()=last()-1]")]
32 | [InlineData(@"child::para[position()>1]")]
33 | [InlineData(@"following-sibling::chapter[position()=1]")]
34 | [InlineData(@"preceding-sibling::chapter[position()=1]")]
35 | [InlineData(@"/descendant::figure[position()=42]")]
36 | [InlineData(@"/child::doc/child::chapter[position()=5]/child::section[position()=2]")]
37 | [InlineData(@"child::para[attribute::type=""warning""]")]
38 | [InlineData(@"child::para[attribute::type='warning'][position()=5]")]
39 | [InlineData(@"child::para[position()=5][attribute::type=""warning""]")]
40 | [InlineData(@"child::chapter[child::title='Introduction']")]
41 | [InlineData(@"child::chapter[child::title]")]
42 | [InlineData(@"child::*[self::chapter or self::appendix]")]
43 | [InlineData(@"child::*[self::chapter or self::appendix][position()=last()]")]
44 | [Theory]
45 | public void CorrectTest(string expression)
46 | {
47 | RunTestString(expression);
48 | RunTestTree(expression);
49 | }
50 |
51 | [InlineData(@"")]
52 | [InlineData(@"a b")]
53 | [InlineData(@"a[")]
54 | [InlineData(@"]")]
55 | [InlineData(@"///")]
56 | [InlineData(@"fo(")]
57 | [InlineData(@")")]
58 | [InlineData(@"a[']")]
59 | [InlineData(@"b[""]")]
60 | [InlineData(@"3e8")]
61 | [InlineData(@"child::*[self::chapter or self::appendix][position()=last()] child::*[self::chapter or self::appendix][position()=last()]")]
62 | [Theory]
63 | public void ErrorTest(string expression)
64 | {
65 | Assert.Throws(() => RunTestTree(expression));
66 | }
67 |
68 | static void RunTestString(string xpathExpr)
69 | {
70 | Debug.WriteLine("Translated one: {0}", new XPathParser().Parse(xpathExpr, new XPathStringBuilder()));
71 | }
72 |
73 | static void RunTestTree(string xpathExpr)
74 | {
75 | XElement xe = new XPathParser().Parse(xpathExpr, new XPathTreeBuilder());
76 | XmlWriterSettings ws = new XmlWriterSettings();
77 | {
78 | ws.Indent = true;
79 | ws.OmitXmlDeclaration = true;
80 | }
81 | using (XmlWriter w = XmlWriter.Create(Console.Out, ws))
82 | {
83 | xe.WriteTo(w);
84 | }
85 | }
86 | }
87 | }
88 |
89 |
--------------------------------------------------------------------------------
/XPathParserTest/XPathStringBuilder.cs:
--------------------------------------------------------------------------------
1 | using CodePlex.XPathParser;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Diagnostics;
5 | using System.Xml.XPath;
6 |
7 | namespace XPathParserTest
8 | {
9 | class XPathStringBuilder : IXPathBuilder {
10 | #region IXPathBuilder Members
11 |
12 | public void StartBuild() { }
13 |
14 | public string EndBuild(string result) {
15 | return result;
16 | }
17 |
18 | public string String(string value) {
19 | return "'" + value + "'";
20 | }
21 |
22 | public string Number(string value) {
23 | return value;
24 | }
25 |
26 | public string Operator(XPathOperator op, string left, string right) {
27 | Debug.Assert(op != XPathOperator.Union);
28 | if (op == XPathOperator.UnaryMinus) {
29 | return "-" + left;
30 | }
31 | return left + opStrings[(int)op] + right;
32 | }
33 |
34 | public string Axis(XPathAxis xpathAxis, XPathNodeType nodeType, string prefix, string name) {
35 | string nodeTest;
36 | switch (nodeType) {
37 | case XPathNodeType.ProcessingInstruction:
38 | Debug.Assert(prefix == "");
39 | nodeTest = "processing-instruction(" + name + ")";
40 | break;
41 | case XPathNodeType.Text:
42 | Debug.Assert(prefix == null && name == null);
43 | nodeTest = "text()";
44 | break;
45 | case XPathNodeType.Comment:
46 | Debug.Assert(prefix == null && name == null);
47 | nodeTest = "comment()";
48 | break;
49 | case XPathNodeType.All:
50 | nodeTest = "node()";
51 | break;
52 | case XPathNodeType.Attribute:
53 | case XPathNodeType.Element:
54 | case XPathNodeType.Namespace:
55 | nodeTest = QNameOrWildcard(prefix, name);
56 | break;
57 | default:
58 | throw new ArgumentException("unexpected XPathNodeType", "XPathNodeType");
59 | }
60 | return axisStrings[(int)xpathAxis] + nodeTest;
61 | }
62 |
63 | public string JoinStep(string left, string right) {
64 | return left + '/' + right;
65 | }
66 |
67 | public string Predicate(string node, string condition, bool reverseStep) {
68 | if (!reverseStep) {
69 | // In this method we don't know how axis was represented in original XPath and the only
70 | // difference between ancestor::*[2] and (ancestor::*)[2] is the reverseStep parameter.
71 | // to not store the axis from previous builder events we simply wrap node in the () here.
72 | node = '(' + node + ')';
73 | }
74 | return node + '[' + condition + ']';
75 | }
76 |
77 | public string Variable(string prefix, string name) {
78 | return '$' + QName(prefix, name);
79 | }
80 |
81 | public string Function(string prefix, string name, IList args) {
82 | string result = QName(prefix, name) + '(';
83 | for (int i = 0; i < args.Count; i++) {
84 | if (i != 0) {
85 | result += ',';
86 | }
87 | result += args[i];
88 | }
89 | result += ')';
90 | return result;
91 | }
92 |
93 | private static string QName(string prefix, string localName) {
94 | if (prefix == null) {
95 | throw new ArgumentNullException("prefix");
96 | }
97 | if (localName == null) {
98 | throw new ArgumentNullException("localName");
99 | }
100 | return prefix == "" ? localName : prefix + ':' + localName;
101 | }
102 |
103 | private static string QNameOrWildcard(string prefix, string localName) {
104 | if (prefix == null) {
105 | Debug.Assert(localName == null);
106 | return "*";
107 | }
108 | if (localName == null) {
109 | Debug.Assert(prefix != "");
110 | return prefix + ":*";
111 | }
112 | return prefix == "" ? localName : prefix + ':' + localName;
113 | }
114 |
115 | #endregion
116 |
117 | string[] opStrings = {
118 | /* Unknown */ " Unknown ",
119 | /* Or */ " or " ,
120 | /* And */ " and ",
121 | /* Eq */ "=" ,
122 | /* Ne */ "!=" ,
123 | /* Lt */ "<" ,
124 | /* Le */ "<=" ,
125 | /* Gt */ ">" ,
126 | /* Ge */ ">=" ,
127 | /* Plus */ "+" ,
128 | /* Minus */ "-" ,
129 | /* Multiply */ "*" ,
130 | /* Divide */ " div ",
131 | /* Modulo */ " mod ",
132 | /* UnaryMinus */ "-" ,
133 | /* Union */ "|"
134 | };
135 |
136 | string[] axisStrings = {
137 | /*Unknown */ "Unknown::" ,
138 | /*Ancestor */ "ancestor::" ,
139 | /*AncestorOrSelf */ "ancestor-or-self::" ,
140 | /*Attribute */ "attribute::" ,
141 | /*Child */ "child::" ,
142 | /*Descendant */ "descendant::" ,
143 | /*DescendantOrSelf */ "descendant-or-self::",
144 | /*Following */ "following::" ,
145 | /*FollowingSibling */ "following-sibling::" ,
146 | /*Namespace */ "namespace::" ,
147 | /*Parent */ "parent::" ,
148 | /*Preceding */ "preceding::" ,
149 | /*PrecedingSibling */ "preceding-sibling::" ,
150 | /*Self */ "self::" ,
151 | /*Root */ "root::" ,
152 | };
153 | }
154 | }
155 |
156 |
--------------------------------------------------------------------------------
/XPathParser/XPathScanner.cs:
--------------------------------------------------------------------------------
1 | using System.Diagnostics;
2 | using System.Text.RegularExpressions;
3 |
4 | namespace CodePlex.XPathParser {
5 | // Extends XPathOperator enumeration
6 | internal enum LexKind {
7 | Unknown, // Unknown lexeme
8 | Or, // Operator 'or'
9 | And, // Operator 'and'
10 | Eq, // Operator '='
11 | Ne, // Operator '!='
12 | Lt, // Operator '<'
13 | Le, // Operator '<='
14 | Gt, // Operator '>'
15 | Ge, // Operator '>='
16 | Plus, // Operator '+'
17 | Minus, // Operator '-'
18 | Multiply, // Operator '*'
19 | Divide, // Operator 'div'
20 | Modulo, // Operator 'mod'
21 | UnaryMinus, // Not used
22 | Union, // Operator '|'
23 | LastOperator = Union,
24 |
25 | DotDot, // '..'
26 | ColonColon, // '::'
27 | SlashSlash, // Operator '//'
28 | Number, // Number (numeric literal)
29 | Axis, // AxisName
30 |
31 | Name, // NameTest, NodeType, FunctionName, AxisName, second part of VariableReference
32 | String, // Literal (string literal)
33 | Eof, // End of the expression
34 |
35 | FirstStringable = Name,
36 | LastNonChar = Eof,
37 |
38 | LParens = '(',
39 | RParens = ')',
40 | LBracket = '[',
41 | RBracket = ']',
42 | Dot = '.',
43 | At = '@',
44 | Comma = ',',
45 |
46 | Star = '*', // NameTest
47 | Slash = '/', // Operator '/'
48 | Dollar = '$', // First part of VariableReference
49 | RBrace = '}', // Used for AVTs
50 | };
51 |
52 | internal sealed class XPathScanner {
53 | private string xpathExpr;
54 | private int curIndex;
55 | private char curChar;
56 | private LexKind kind;
57 | private string name;
58 | private string prefix;
59 | private string stringValue;
60 | private bool canBeFunction;
61 | private int lexStart;
62 | private int prevLexEnd;
63 | private LexKind prevKind;
64 | private XPathAxis axis;
65 |
66 | public XPathScanner(string xpathExpr) : this(xpathExpr, 0) {}
67 |
68 | public XPathScanner(string xpathExpr, int startFrom) {
69 | Debug.Assert(xpathExpr != null);
70 | this.xpathExpr = xpathExpr;
71 | this.kind = LexKind.Unknown;
72 | SetSourceIndex(startFrom);
73 | NextLex();
74 | }
75 |
76 | public string Source { get { return xpathExpr; } }
77 | public LexKind Kind { get { return kind; } }
78 | public int LexStart { get { return lexStart; } }
79 | public int LexSize { get { return curIndex - lexStart; } }
80 | public int PrevLexEnd { get { return prevLexEnd; } }
81 |
82 | private void SetSourceIndex(int index) {
83 | Debug.Assert(0 <= index && index <= xpathExpr.Length);
84 | curIndex = index - 1;
85 | NextChar();
86 | }
87 |
88 | private void NextChar() {
89 | Debug.Assert(-1 <= curIndex && curIndex < xpathExpr.Length);
90 | curIndex++;
91 | if (curIndex < xpathExpr.Length) {
92 | curChar = xpathExpr[curIndex];
93 | } else {
94 | Debug.Assert(curIndex == xpathExpr.Length);
95 | curChar = '\0';
96 | }
97 | }
98 |
99 | public string Name {
100 | get {
101 | Debug.Assert(kind == LexKind.Name);
102 | Debug.Assert(name != null);
103 | return name;
104 | }
105 | }
106 |
107 | public string Prefix {
108 | get {
109 | Debug.Assert(kind == LexKind.Name);
110 | Debug.Assert(prefix != null);
111 | return prefix;
112 | }
113 | }
114 |
115 | public string RawValue {
116 | get {
117 | if (kind == LexKind.Eof) {
118 | return LexKindToString(kind);
119 | } else {
120 | return xpathExpr.Substring(lexStart, curIndex - lexStart);
121 | }
122 | }
123 | }
124 |
125 | public string StringValue {
126 | get {
127 | Debug.Assert(kind == LexKind.String);
128 | Debug.Assert(stringValue != null);
129 | return stringValue;
130 | }
131 | }
132 |
133 | // Returns true if the character following an QName (possibly after intervening
134 | // ExprWhitespace) is '('. In this case the token must be recognized as a NodeType
135 | // or a FunctionName unless it is an OperatorName. This distinction cannot be done
136 | // without knowing the previous lexeme. For example, "or" in "... or (1 != 0)" may
137 | // be an OperatorName or a FunctionName.
138 | public bool CanBeFunction {
139 | get {
140 | Debug.Assert(kind == LexKind.Name);
141 | return canBeFunction;
142 | }
143 | }
144 |
145 | public XPathAxis Axis {
146 | get {
147 | Debug.Assert(kind == LexKind.Axis);
148 | Debug.Assert(axis != XPathAxis.Unknown);
149 | return axis;
150 | }
151 | }
152 |
153 | private void SkipSpace() {
154 | while (IsWhiteSpace(curChar)) {
155 | NextChar();
156 | }
157 | }
158 |
159 | private static bool IsAsciiDigit(char ch) {
160 | return (uint)(ch - '0') <= 9;
161 | }
162 |
163 | public static bool IsWhiteSpace(char ch) {
164 | return ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
165 | }
166 |
167 | public void NextLex() {
168 | prevLexEnd = curIndex;
169 | prevKind = kind;
170 | SkipSpace();
171 | lexStart = curIndex;
172 |
173 | switch (curChar) {
174 | case '\0':
175 | kind = LexKind.Eof;
176 | return;
177 | case '(': case ')': case '[': case ']':
178 | case '@': case ',': case '$': case '}':
179 | kind = (LexKind)curChar;
180 | NextChar();
181 | break;
182 | case '.':
183 | NextChar();
184 | if (curChar == '.') {
185 | kind = LexKind.DotDot;
186 | NextChar();
187 | } else if (IsAsciiDigit(curChar)) {
188 | SetSourceIndex(lexStart);
189 | goto case '0';
190 | } else {
191 | kind = LexKind.Dot;
192 | }
193 | break;
194 | case ':':
195 | NextChar();
196 | if (curChar == ':') {
197 | kind = LexKind.ColonColon;
198 | NextChar();
199 | } else {
200 | kind = LexKind.Unknown;
201 | }
202 | break;
203 | case '*':
204 | kind = LexKind.Star;
205 | NextChar();
206 | CheckOperator(true);
207 | break;
208 | case '/':
209 | NextChar();
210 | if (curChar == '/') {
211 | kind = LexKind.SlashSlash;
212 | NextChar();
213 | } else {
214 | kind = LexKind.Slash;
215 | }
216 | break;
217 | case '|':
218 | kind = LexKind.Union;
219 | NextChar();
220 | break;
221 | case '+':
222 | kind = LexKind.Plus;
223 | NextChar();
224 | break;
225 | case '-':
226 | kind = LexKind.Minus;
227 | NextChar();
228 | break;
229 | case '=':
230 | kind = LexKind.Eq;
231 | NextChar();
232 | break;
233 | case '!':
234 | NextChar();
235 | if (curChar == '=') {
236 | kind = LexKind.Ne;
237 | NextChar();
238 | } else {
239 | kind = LexKind.Unknown;
240 | }
241 | break;
242 | case '<':
243 | NextChar();
244 | if (curChar == '=') {
245 | kind = LexKind.Le;
246 | NextChar();
247 | } else {
248 | kind = LexKind.Lt;
249 | }
250 | break;
251 | case '>':
252 | NextChar();
253 | if (curChar == '=') {
254 | kind = LexKind.Ge;
255 | NextChar();
256 | } else {
257 | kind = LexKind.Gt;
258 | }
259 | break;
260 | case '"':
261 | case '\'':
262 | kind = LexKind.String;
263 | ScanString();
264 | break;
265 | case '0': case '1': case '2': case '3':
266 | case '4': case '5': case '6': case '7':
267 | case '8': case '9':
268 | kind = LexKind.Number;
269 | ScanNumber();
270 | break;
271 | default:
272 | this.name = ScanNCName();
273 | if (this.name != null) {
274 | kind = LexKind.Name;
275 | this.prefix = string.Empty;
276 | this.canBeFunction = false;
277 | this.axis = XPathAxis.Unknown;
278 | bool colonColon = false;
279 | int saveSourceIndex = curIndex;
280 |
281 | // "foo:bar" or "foo:*" -- one lexeme (no spaces allowed)
282 | // "foo::" or "foo ::" -- two lexemes, reported as one (AxisName)
283 | // "foo:?" or "foo :?" -- lexeme "foo" reported
284 | if (curChar == ':') {
285 | NextChar();
286 | if (curChar == ':') { // "foo::" -> OperatorName, AxisName
287 | NextChar();
288 | colonColon = true;
289 | SetSourceIndex(saveSourceIndex);
290 | } else { // "foo:bar", "foo:*" or "foo:?"
291 | string ncName = ScanNCName();
292 | if (ncName != null) {
293 | this.prefix = this.name;
294 | this.name = ncName;
295 | // Look ahead for '(' to determine whether QName can be a FunctionName
296 | saveSourceIndex = curIndex;
297 | SkipSpace();
298 | this.canBeFunction = (curChar == '(');
299 | SetSourceIndex(saveSourceIndex);
300 | } else if (curChar == '*') {
301 | NextChar();
302 | this.prefix = this.name;
303 | this.name = "*";
304 | } else { // "foo:?" -> OperatorName, NameTest
305 | // Return "foo" and leave ":" to be reported later as an unknown lexeme
306 | SetSourceIndex(saveSourceIndex);
307 | }
308 | }
309 | } else {
310 | SkipSpace();
311 | if (curChar == ':') { // "foo ::" or "foo :?"
312 | NextChar();
313 | if (curChar == ':') {
314 | NextChar();
315 | colonColon = true;
316 | }
317 | SetSourceIndex(saveSourceIndex);
318 | } else {
319 | this.canBeFunction = (curChar == '(');
320 | }
321 | }
322 | if (!CheckOperator(false) && colonColon) {
323 | this.axis = CheckAxis();
324 | }
325 | } else {
326 | kind = LexKind.Unknown;
327 | NextChar();
328 | }
329 | break;
330 | }
331 | }
332 |
333 | private bool CheckOperator(bool star) {
334 | LexKind opKind;
335 |
336 | if (star) {
337 | opKind = LexKind.Multiply;
338 | } else {
339 | if (prefix.Length != 0 || name.Length > 3)
340 | return false;
341 |
342 | switch (name) {
343 | case "or" : opKind = LexKind.Or; break;
344 | case "and": opKind = LexKind.And; break;
345 | case "div": opKind = LexKind.Divide; break;
346 | case "mod": opKind = LexKind.Modulo; break;
347 | default : return false;
348 | }
349 | }
350 |
351 | // If there is a preceding token and the preceding token is not one of '@', '::', '(', '[', ',' or an Operator,
352 | // then a '*' must be recognized as a MultiplyOperator and an NCName must be recognized as an OperatorName.
353 | if (prevKind <= LexKind.LastOperator)
354 | return false;
355 |
356 | switch (prevKind) {
357 | case LexKind.Slash:
358 | case LexKind.SlashSlash:
359 | case LexKind.At:
360 | case LexKind.ColonColon:
361 | case LexKind.LParens:
362 | case LexKind.LBracket:
363 | case LexKind.Comma:
364 | case LexKind.Dollar:
365 | return false;
366 | }
367 |
368 | this.kind = opKind;
369 | return true;
370 | }
371 |
372 | private XPathAxis CheckAxis() {
373 | this.kind = LexKind.Axis;
374 | switch (name) {
375 | case "ancestor" : return XPathAxis.Ancestor;
376 | case "ancestor-or-self" : return XPathAxis.AncestorOrSelf;
377 | case "attribute" : return XPathAxis.Attribute;
378 | case "child" : return XPathAxis.Child;
379 | case "descendant" : return XPathAxis.Descendant;
380 | case "descendant-or-self" : return XPathAxis.DescendantOrSelf;
381 | case "following" : return XPathAxis.Following;
382 | case "following-sibling" : return XPathAxis.FollowingSibling;
383 | case "namespace" : return XPathAxis.Namespace;
384 | case "parent" : return XPathAxis.Parent;
385 | case "preceding" : return XPathAxis.Preceding;
386 | case "preceding-sibling" : return XPathAxis.PrecedingSibling;
387 | case "self" : return XPathAxis.Self;
388 | default :
389 | this.kind = LexKind.Name;
390 | return XPathAxis.Unknown;
391 | }
392 | }
393 |
394 | private void ScanNumber() {
395 | Debug.Assert(IsAsciiDigit(curChar) || curChar == '.');
396 | while (IsAsciiDigit(curChar)) {
397 | NextChar();
398 | }
399 | if (curChar == '.') {
400 | NextChar();
401 | while (IsAsciiDigit(curChar)) {
402 | NextChar();
403 | }
404 | }
405 | if ((curChar & (~0x20)) == 'E') {
406 | NextChar();
407 | if (curChar == '+' || curChar == '-') {
408 | NextChar();
409 | }
410 | while (IsAsciiDigit(curChar)) {
411 | NextChar();
412 | }
413 | throw ScientificNotationException();
414 | }
415 | }
416 |
417 | private void ScanString() {
418 | int startIdx = curIndex + 1;
419 | int endIdx = xpathExpr.IndexOf(curChar, startIdx);
420 |
421 | if (endIdx < 0) {
422 | SetSourceIndex(xpathExpr.Length);
423 | throw UnclosedStringException();
424 | }
425 |
426 | this.stringValue = xpathExpr.Substring(startIdx, endIdx - startIdx);
427 | SetSourceIndex(endIdx + 1);
428 | }
429 |
430 | static Regex re = new Regex(@"\p{_xmlI}[\p{_xmlC}-[:]]*", RegexOptions.Compiled);
431 |
432 | private string ScanNCName() {
433 | Match m = re.Match(xpathExpr, curIndex);
434 | if (m.Success) {
435 | curIndex += m.Length - 1;
436 | NextChar();
437 | return m.Value;
438 | }
439 | return null;
440 | }
441 |
442 | public void PassToken(LexKind t) {
443 | CheckToken(t);
444 | NextLex();
445 | }
446 |
447 | public void CheckToken(LexKind t) {
448 | Debug.Assert(LexKind.FirstStringable <= t);
449 | if (kind != t) {
450 | if (t == LexKind.Eof) {
451 | throw EofExpectedException(RawValue);
452 | } else {
453 | throw TokenExpectedException(LexKindToString(t), RawValue);
454 | }
455 | }
456 | }
457 |
458 | // May be called for the following tokens: Name, String, Eof, Comma, LParens, RParens, LBracket, RBracket, RBrace
459 | private string LexKindToString(LexKind t) {
460 | Debug.Assert(LexKind.FirstStringable <= t);
461 |
462 | if (LexKind.LastNonChar < t) {
463 | Debug.Assert("()[].@,*/$}".IndexOf((char)t) >= 0);
464 | return new string((char)t, 1);
465 | }
466 |
467 | switch (t) {
468 | case LexKind.Name : return "";
469 | case LexKind.String : return "";
470 | case LexKind.Eof : return "";
471 | default:
472 | Debug.Fail("Unexpected LexKind: " + t.ToString());
473 | return string.Empty;
474 | }
475 | }
476 |
477 | // XPath error messages
478 | // --------------------
479 |
480 | public XPathParserException UnexpectedTokenException(string token) {
481 | return new XPathParserException(xpathExpr, lexStart, curIndex,
482 | string.Format("Unexpected token '{0}' in the expression.", token)
483 | );
484 | }
485 | public XPathParserException NodeTestExpectedException(string token) {
486 | return new XPathParserException(xpathExpr, lexStart, curIndex,
487 | string.Format("Expected a node test, found '{0}'.", token)
488 | );
489 | }
490 | public XPathParserException PredicateAfterDotException() {
491 | return new XPathParserException(xpathExpr, lexStart, curIndex,
492 | "Abbreviated step '.' cannot be followed by a predicate. Use the full form 'self::node()[predicate]' instead."
493 | );
494 | }
495 | public XPathParserException PredicateAfterDotDotException() {
496 | return new XPathParserException(xpathExpr, lexStart, curIndex,
497 | "Abbreviated step '..' cannot be followed by a predicate. Use the full form 'parent::node()[predicate]' instead."
498 | );
499 | }
500 | public XPathParserException ScientificNotationException() {
501 | return new XPathParserException(xpathExpr, lexStart, curIndex,
502 | "Scientific notation is not allowed."
503 | );
504 | }
505 | public XPathParserException UnclosedStringException() {
506 | return new XPathParserException(xpathExpr, lexStart, curIndex,
507 | "String literal was not closed."
508 | );
509 | }
510 | public XPathParserException EofExpectedException(string token) {
511 | return new XPathParserException(xpathExpr, lexStart, curIndex,
512 | string.Format("Expected end of the expression, found '{0}'.", token)
513 | );
514 | }
515 | public XPathParserException TokenExpectedException(string expectedToken, string actualToken) {
516 | return new XPathParserException(xpathExpr, lexStart, curIndex,
517 | string.Format("Expected token '{0}', found '{1}'.", expectedToken, actualToken)
518 | );
519 | }
520 | }
521 | }
522 |
--------------------------------------------------------------------------------
/XPathParser/XPathParser.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 | using System.Diagnostics;
3 |
4 | namespace CodePlex.XPathParser {
5 | using XPathNodeType = System.Xml.XPath.XPathNodeType;
6 | using System.Globalization;
7 |
8 | public class XPathParser {
9 | private XPathScanner scanner;
10 | private IXPathBuilder builder;
11 | private Stack posInfo = new Stack();
12 |
13 | // Six possible causes of exceptions in the builder:
14 | // 1. Undefined prefix in a node test.
15 | // 2. Undefined prefix in a variable reference, or unknown variable.
16 | // 3. Undefined prefix in a function call, or unknown function, or wrong number/types of arguments.
17 | // 4. Argument of Union operator is not a node-set.
18 | // 5. First argument of Predicate is not a node-set.
19 | // 6. Argument of Axis is not a node-set.
20 |
21 | public Node Parse(string xpathExpr, IXPathBuilder builder) {
22 | Debug.Assert(this.scanner == null && this.builder == null);
23 | Debug.Assert(builder != null);
24 |
25 | Node result = default(Node);
26 | this.scanner = new XPathScanner(xpathExpr);
27 | this.builder = builder;
28 | this.posInfo.Clear();
29 |
30 | try {
31 | builder.StartBuild();
32 | result = ParseExpr();
33 | scanner.CheckToken(LexKind.Eof);
34 | }
35 | catch (XPathParserException e) {
36 | if (e.queryString == null) {
37 | e.queryString = scanner.Source;
38 | PopPosInfo(out e.startChar, out e.endChar);
39 | }
40 | throw;
41 | }
42 | finally {
43 | result = builder.EndBuild(result);
44 | #if DEBUG
45 | this.builder = null;
46 | this.scanner = null;
47 | #endif
48 | }
49 | Debug.Assert(posInfo.Count == 0, "PushPosInfo() and PopPosInfo() calls have been unbalanced");
50 | return result;
51 | }
52 |
53 | #region Location paths and node tests
54 | /**************************************************************************************************/
55 | /* Location paths and node tests */
56 | /**************************************************************************************************/
57 |
58 | private static bool IsStep(LexKind lexKind) {
59 | return (
60 | lexKind == LexKind.Dot ||
61 | lexKind == LexKind.DotDot ||
62 | lexKind == LexKind.At ||
63 | lexKind == LexKind.Axis ||
64 | lexKind == LexKind.Star ||
65 | lexKind == LexKind.Name // NodeTest is also Name
66 | );
67 | }
68 |
69 | /*
70 | * LocationPath ::= RelativeLocationPath | '/' RelativeLocationPath? | '//' RelativeLocationPath
71 | */
72 | private Node ParseLocationPath() {
73 | if (scanner.Kind == LexKind.Slash) {
74 | scanner.NextLex();
75 | Node opnd = builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null);
76 |
77 | if (IsStep(scanner.Kind)) {
78 | opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
79 | }
80 | return opnd;
81 | } else if (scanner.Kind == LexKind.SlashSlash) {
82 | scanner.NextLex();
83 | return builder.JoinStep(
84 | builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null),
85 | builder.JoinStep(
86 | builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
87 | ParseRelativeLocationPath()
88 | )
89 | );
90 | } else {
91 | return ParseRelativeLocationPath();
92 | }
93 | }
94 |
95 | /*
96 | * RelativeLocationPath ::= Step (('/' | '//') Step)*
97 | */
98 | private Node ParseRelativeLocationPath() {
99 | Node opnd = ParseStep();
100 | if (scanner.Kind == LexKind.Slash) {
101 | scanner.NextLex();
102 | opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
103 | } else if (scanner.Kind == LexKind.SlashSlash) {
104 | scanner.NextLex();
105 | opnd = builder.JoinStep(opnd,
106 | builder.JoinStep(
107 | builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
108 | ParseRelativeLocationPath()
109 | )
110 | );
111 | }
112 | return opnd;
113 | }
114 |
115 | /*
116 | * Step ::= '.' | '..' | (AxisName '::' | '@')? NodeTest Predicate*
117 | */
118 | private Node ParseStep() {
119 | Node opnd;
120 | if (LexKind.Dot == scanner.Kind) { // '.'
121 | scanner.NextLex();
122 | opnd = builder.Axis(XPathAxis.Self, XPathNodeType.All, null, null);
123 | if (LexKind.LBracket == scanner.Kind) {
124 | throw scanner.PredicateAfterDotException();
125 | }
126 | } else if (LexKind.DotDot == scanner.Kind) { // '..'
127 | scanner.NextLex();
128 | opnd = builder.Axis(XPathAxis.Parent, XPathNodeType.All, null, null);
129 | if (LexKind.LBracket == scanner.Kind) {
130 | throw scanner.PredicateAfterDotDotException();
131 | }
132 | } else { // (AxisName '::' | '@')? NodeTest Predicate*
133 | XPathAxis axis;
134 | switch (scanner.Kind) {
135 | case LexKind.Axis: // AxisName '::'
136 | axis = scanner.Axis;
137 | scanner.NextLex();
138 | scanner.NextLex();
139 | break;
140 | case LexKind.At: // '@'
141 | axis = XPathAxis.Attribute;
142 | scanner.NextLex();
143 | break;
144 | case LexKind.Name:
145 | case LexKind.Star:
146 | // NodeTest must start with Name or '*'
147 | axis = XPathAxis.Child;
148 | break;
149 | default:
150 | throw scanner.UnexpectedTokenException(scanner.RawValue);
151 | }
152 |
153 | opnd = ParseNodeTest(axis);
154 |
155 | while (LexKind.LBracket == scanner.Kind) {
156 | opnd = builder.Predicate(opnd, ParsePredicate(), IsReverseAxis(axis));
157 | }
158 | }
159 | return opnd;
160 | }
161 |
162 | private static bool IsReverseAxis(XPathAxis axis) {
163 | return (
164 | axis == XPathAxis.Ancestor || axis == XPathAxis.Preceding ||
165 | axis == XPathAxis.AncestorOrSelf || axis == XPathAxis.PrecedingSibling
166 | );
167 | }
168 |
169 | /*
170 | * NodeTest ::= NameTest | ('comment' | 'text' | 'node') '(' ')' | 'processing-instruction' '(' Literal? ')'
171 | * NameTest ::= '*' | NCName ':' '*' | QName
172 | */
173 | private Node ParseNodeTest(XPathAxis axis) {
174 | XPathNodeType nodeType;
175 | string nodePrefix, nodeName;
176 |
177 | int startChar = scanner.LexStart;
178 | InternalParseNodeTest(scanner, axis, out nodeType, out nodePrefix, out nodeName);
179 | PushPosInfo(startChar, scanner.PrevLexEnd);
180 | Node result = builder.Axis(axis, nodeType, nodePrefix, nodeName);
181 | PopPosInfo();
182 | return result;
183 | }
184 |
185 | private static bool IsNodeType(XPathScanner scanner) {
186 | return scanner.Prefix.Length == 0 && (
187 | scanner.Name == "node" ||
188 | scanner.Name == "text" ||
189 | scanner.Name == "processing-instruction" ||
190 | scanner.Name == "comment"
191 | );
192 | }
193 |
194 | private static XPathNodeType PrincipalNodeType(XPathAxis axis) {
195 | return (
196 | axis == XPathAxis.Attribute ? XPathNodeType.Attribute :
197 | axis == XPathAxis.Namespace ? XPathNodeType.Namespace :
198 | /*else*/ XPathNodeType.Element
199 | );
200 | }
201 |
202 | private static void InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName) {
203 | switch (scanner.Kind) {
204 | case LexKind.Name :
205 | if (scanner.CanBeFunction && IsNodeType(scanner)) {
206 | nodePrefix = null;
207 | nodeName = null;
208 | switch (scanner.Name) {
209 | case "comment": nodeType = XPathNodeType.Comment; break;
210 | case "text" : nodeType = XPathNodeType.Text; break;
211 | case "node" : nodeType = XPathNodeType.All; break;
212 | default:
213 | Debug.Assert(scanner.Name == "processing-instruction");
214 | nodeType = XPathNodeType.ProcessingInstruction;
215 | break;
216 | }
217 |
218 | scanner.NextLex();
219 | scanner.PassToken(LexKind.LParens);
220 |
221 | if (nodeType == XPathNodeType.ProcessingInstruction) {
222 | if (scanner.Kind != LexKind.RParens) { // 'processing-instruction' '(' Literal ')'
223 | scanner.CheckToken(LexKind.String);
224 | // It is not needed to set nodePrefix here, but for our current implementation
225 | // comparing whole QNames is faster than comparing just local names
226 | nodePrefix = string.Empty;
227 | nodeName = scanner.StringValue;
228 | scanner.NextLex();
229 | }
230 | }
231 |
232 | scanner.PassToken(LexKind.RParens);
233 | } else {
234 | nodePrefix = scanner.Prefix;
235 | nodeName = scanner.Name;
236 | nodeType = PrincipalNodeType(axis);
237 | scanner.NextLex();
238 | if (nodeName == "*") {
239 | nodeName = null;
240 | }
241 | }
242 | break;
243 | case LexKind.Star :
244 | nodePrefix = null;
245 | nodeName = null;
246 | nodeType = PrincipalNodeType(axis);
247 | scanner.NextLex();
248 | break;
249 | default :
250 | throw scanner.NodeTestExpectedException(scanner.RawValue);
251 | }
252 | }
253 |
254 | /*
255 | * Predicate ::= '[' Expr ']'
256 | */
257 | private Node ParsePredicate() {
258 | scanner.PassToken(LexKind.LBracket);
259 | Node opnd = ParseExpr();
260 | scanner.PassToken(LexKind.RBracket);
261 | return opnd;
262 | }
263 | #endregion
264 |
265 | #region Expressions
266 | /**************************************************************************************************/
267 | /* Expressions */
268 | /**************************************************************************************************/
269 |
270 | /*
271 | * Expr ::= OrExpr
272 | * OrExpr ::= AndExpr ('or' AndExpr)*
273 | * AndExpr ::= EqualityExpr ('and' EqualityExpr)*
274 | * EqualityExpr ::= RelationalExpr (('=' | '!=') RelationalExpr)*
275 | * RelationalExpr ::= AdditiveExpr (('<' | '>' | '<=' | '>=') AdditiveExpr)*
276 | * AdditiveExpr ::= MultiplicativeExpr (('+' | '-') MultiplicativeExpr)*
277 | * MultiplicativeExpr ::= UnaryExpr (('*' | 'div' | 'mod') UnaryExpr)*
278 | * UnaryExpr ::= ('-')* UnionExpr
279 | */
280 | private Node ParseExpr() {
281 | return ParseSubExpr(/*callerPrec:*/0);
282 | }
283 |
284 | private Node ParseSubExpr(int callerPrec) {
285 | XPathOperator op;
286 | Node opnd;
287 |
288 | // Check for unary operators
289 | if (scanner.Kind == LexKind.Minus) {
290 | op = XPathOperator.UnaryMinus;
291 | int opPrec = XPathOperatorPrecedence[(int)op];
292 | scanner.NextLex();
293 | opnd = builder.Operator(op, ParseSubExpr(opPrec), default(Node));
294 | } else {
295 | opnd = ParseUnionExpr();
296 | }
297 |
298 | // Process binary operators
299 | while (true) {
300 | op = (scanner.Kind <= LexKind.LastOperator) ? (XPathOperator)scanner.Kind : XPathOperator.Unknown;
301 | int opPrec = XPathOperatorPrecedence[(int)op];
302 | if (opPrec <= callerPrec)
303 | return opnd;
304 |
305 | // Operator's precedence is greater than the one of our caller, so process it here
306 | scanner.NextLex();
307 | opnd = builder.Operator(op, opnd, ParseSubExpr(/*callerPrec:*/opPrec));
308 | }
309 | }
310 |
311 | private static int[] XPathOperatorPrecedence = {
312 | /*Unknown */ 0,
313 | /*Or */ 1,
314 | /*And */ 2,
315 | /*Eq */ 3,
316 | /*Ne */ 3,
317 | /*Lt */ 4,
318 | /*Le */ 4,
319 | /*Gt */ 4,
320 | /*Ge */ 4,
321 | /*Plus */ 5,
322 | /*Minus */ 5,
323 | /*Multiply */ 6,
324 | /*Divide */ 6,
325 | /*Modulo */ 6,
326 | /*UnaryMinus */ 7,
327 | /*Union */ 8, // Not used
328 | };
329 |
330 | /*
331 | * UnionExpr ::= PathExpr ('|' PathExpr)*
332 | */
333 | private Node ParseUnionExpr() {
334 | int startChar = scanner.LexStart;
335 | Node opnd1 = ParsePathExpr();
336 |
337 | if (scanner.Kind == LexKind.Union) {
338 | PushPosInfo(startChar, scanner.PrevLexEnd);
339 | opnd1 = builder.Operator(XPathOperator.Union, default(Node), opnd1);
340 | PopPosInfo();
341 |
342 | while (scanner.Kind == LexKind.Union) {
343 | scanner.NextLex();
344 | startChar = scanner.LexStart;
345 | Node opnd2 = ParsePathExpr();
346 | PushPosInfo(startChar, scanner.PrevLexEnd);
347 | opnd1 = builder.Operator(XPathOperator.Union, opnd1, opnd2);
348 | PopPosInfo();
349 | }
350 | }
351 | return opnd1;
352 | }
353 |
354 | /*
355 | * PathExpr ::= LocationPath | FilterExpr (('/' | '//') RelativeLocationPath )?
356 | */
357 | private Node ParsePathExpr() {
358 | // Here we distinguish FilterExpr from LocationPath - the former starts with PrimaryExpr
359 | if (IsPrimaryExpr()) {
360 | int startChar = scanner.LexStart;
361 | Node opnd = ParseFilterExpr();
362 | int endChar = scanner.PrevLexEnd;
363 |
364 | if (scanner.Kind == LexKind.Slash) {
365 | scanner.NextLex();
366 | PushPosInfo(startChar, endChar);
367 | opnd = builder.JoinStep(opnd, ParseRelativeLocationPath());
368 | PopPosInfo();
369 | } else if (scanner.Kind == LexKind.SlashSlash) {
370 | scanner.NextLex();
371 | PushPosInfo(startChar, endChar);
372 | opnd = builder.JoinStep(opnd,
373 | builder.JoinStep(
374 | builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null),
375 | ParseRelativeLocationPath()
376 | )
377 | );
378 | PopPosInfo();
379 | }
380 | return opnd;
381 | } else {
382 | return ParseLocationPath();
383 | }
384 | }
385 |
386 | /*
387 | * FilterExpr ::= PrimaryExpr Predicate*
388 | */
389 | private Node ParseFilterExpr() {
390 | int startChar = scanner.LexStart;
391 | Node opnd = ParsePrimaryExpr();
392 | int endChar = scanner.PrevLexEnd;
393 |
394 | while (scanner.Kind == LexKind.LBracket) {
395 | PushPosInfo(startChar, endChar);
396 | opnd = builder.Predicate(opnd, ParsePredicate(), /*reverseStep:*/false);
397 | PopPosInfo();
398 | }
399 | return opnd;
400 | }
401 |
402 | private bool IsPrimaryExpr() {
403 | return (
404 | scanner.Kind == LexKind.String ||
405 | scanner.Kind == LexKind.Number ||
406 | scanner.Kind == LexKind.Dollar ||
407 | scanner.Kind == LexKind.LParens ||
408 | scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner)
409 | );
410 | }
411 |
412 | /*
413 | * PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall
414 | */
415 | private Node ParsePrimaryExpr() {
416 | Debug.Assert(IsPrimaryExpr());
417 | Node opnd;
418 | switch (scanner.Kind) {
419 | case LexKind.String:
420 | opnd = builder.String(scanner.StringValue);
421 | scanner.NextLex();
422 | break;
423 | case LexKind.Number:
424 | opnd = builder.Number(scanner.RawValue);
425 | scanner.NextLex();
426 | break;
427 | case LexKind.Dollar:
428 | int startChar = scanner.LexStart;
429 | scanner.NextLex();
430 | scanner.CheckToken(LexKind.Name);
431 | PushPosInfo(startChar, scanner.LexStart + scanner.LexSize);
432 | opnd = builder.Variable(scanner.Prefix, scanner.Name);
433 | PopPosInfo();
434 | scanner.NextLex();
435 | break;
436 | case LexKind.LParens:
437 | scanner.NextLex();
438 | opnd = ParseExpr();
439 | scanner.PassToken(LexKind.RParens);
440 | break;
441 | default:
442 | Debug.Assert(
443 | scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner),
444 | "IsPrimaryExpr() returned true, but the lexeme is not recognized"
445 | );
446 | opnd = ParseFunctionCall();
447 | break;
448 | }
449 | return opnd;
450 | }
451 |
452 | /*
453 | * FunctionCall ::= FunctionName '(' (Expr (',' Expr)* )? ')'
454 | */
455 | private Node ParseFunctionCall() {
456 | List argList = new List();
457 | string name = scanner.Name;
458 | string prefix = scanner.Prefix;
459 | int startChar = scanner.LexStart;
460 |
461 | scanner.PassToken(LexKind.Name);
462 | scanner.PassToken(LexKind.LParens);
463 |
464 | if (scanner.Kind != LexKind.RParens) {
465 | while (true) {
466 | argList.Add(ParseExpr());
467 | if (scanner.Kind != LexKind.Comma) {
468 | scanner.CheckToken(LexKind.RParens);
469 | break;
470 | }
471 | scanner.NextLex(); // move off the ','
472 | }
473 | }
474 |
475 | scanner.NextLex(); // move off the ')'
476 | PushPosInfo(startChar, scanner.PrevLexEnd);
477 | Node result = builder.Function(prefix, name, argList);
478 | PopPosInfo();
479 | return result;
480 | }
481 | #endregion
482 |
483 | /**************************************************************************************************/
484 | /* Helper methods */
485 | /**************************************************************************************************/
486 |
487 | private void PushPosInfo(int startChar, int endChar) {
488 | posInfo.Push(startChar);
489 | posInfo.Push(endChar);
490 | }
491 |
492 | private void PopPosInfo() {
493 | posInfo.Pop();
494 | posInfo.Pop();
495 | }
496 |
497 | private void PopPosInfo(out int startChar, out int endChar) {
498 | endChar = posInfo.Pop();
499 | startChar = posInfo.Pop();
500 | }
501 |
502 | private static double ToDouble(string str) {
503 | double d;
504 | if (double.TryParse(str, NumberStyles.AllowLeadingSign|NumberStyles.AllowDecimalPoint|NumberStyles.AllowTrailingWhite, NumberFormatInfo.InvariantInfo, out d)) {
505 | return d;
506 | }
507 | return double.NaN;
508 | }
509 | }
510 | }
511 |
--------------------------------------------------------------------------------