├── .gitignore ├── architecture.png ├── XPathParser ├── XPathParser.snk ├── XPathOperator.cs ├── XPathAxis.cs ├── XPathParser.csproj ├── IXpathBuilder.cs ├── XPathParserException.cs ├── XPathScanner.cs └── XPathParser.cs ├── appveyor.yml ├── version.json ├── XPathParserTest ├── XPathParserTest.csproj ├── XPathTreeBuilder.cs ├── Test.cs └── XPathStringBuilder.cs ├── XPathParser.sln ├── LICENSE.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.lock.json 2 | .vs/ 3 | bin/ 4 | obj/ -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quamotion/XPathParser/HEAD/architecture.png -------------------------------------------------------------------------------- /XPathParser/XPathParser.snk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quamotion/XPathParser/HEAD/XPathParser/XPathParser.snk -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | build_script: 2 | - cmd: dotnet restore 3 | - cmd: cd XPathParserTest 4 | - cmd: dotnet test 5 | - cmd: cd ..\XPathParser 6 | - cmd: dotnet build -c Release 7 | - cmd: dotnet pack -c Release 8 | 9 | on_success: 10 | - ps: Push-AppveyorArtifact "bin\Release\XPathParser.*.nupkg" -------------------------------------------------------------------------------- /version.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://raw.githubusercontent.com/AArnott/Nerdbank.GitVersioning/master/src/NerdBank.GitVersioning/version.schema.json", 3 | "version": "1.2", 4 | "publicReleaseRefSpec": [ 5 | "^refs/heads/master$", // we release out of master 6 | "^refs/tags/v\\d+\\.\\d+" // we also release tags starting with vN.N 7 | ], 8 | } -------------------------------------------------------------------------------- /XPathParser/XPathOperator.cs: -------------------------------------------------------------------------------- 1 | namespace CodePlex.XPathParser { 2 | public enum XPathOperator { 3 | Unknown = 0, 4 | Or, 5 | And, 6 | Eq, 7 | Ne, 8 | Lt, 9 | Le, 10 | Gt, 11 | Ge, 12 | Plus, 13 | Minus, 14 | Multiply, 15 | Divide, 16 | Modulo, 17 | UnaryMinus, 18 | Union 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /XPathParser/XPathAxis.cs: -------------------------------------------------------------------------------- 1 | namespace CodePlex.XPathParser { 2 | public enum XPathAxis { 3 | Unknown = 0, 4 | Ancestor , 5 | AncestorOrSelf , 6 | Attribute , 7 | Child , 8 | Descendant , 9 | DescendantOrSelf, 10 | Following , 11 | FollowingSibling, 12 | Namespace , 13 | Parent , 14 | Preceding , 15 | PrecedingSibling, 16 | Self , 17 | Root , 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /XPathParserTest/XPathParserTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp2.2 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /XPathParser/XPathParser.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net40;net45;netstandard2.0 5 | Sergey Dubinets 6 | .NET XPath Parser 7 | XPath parser in C# source code. 8 | https://github.com/quamotion/XPathParser/blob/master/LICENSE.txt 9 | true 10 | XPathParser.snk 11 | true 12 | $(AllowedOutputExtensionsInPackageBuildOutputFolder);.pdb 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /XPathParser/IXpathBuilder.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Xml.XPath; 4 | 5 | namespace CodePlex.XPathParser { 6 | public interface IXPathBuilder { 7 | // Should be called once per build 8 | void StartBuild(); 9 | 10 | // Should be called after build for result tree post-processing 11 | Node EndBuild(Node result); 12 | 13 | Node String(string value); 14 | 15 | Node Number(string value); 16 | 17 | Node Operator(XPathOperator op, Node left, Node right); 18 | 19 | Node Axis(XPathAxis xpathAxis, XPathNodeType nodeType, string prefix, string name); 20 | 21 | Node JoinStep(Node left, Node right); 22 | 23 | // http://www.w3.org/TR/xquery-semantics/#id-axis-steps 24 | // reverseStep is how parser comunicates to builder diference between "ansestor[1]" and "(ansestor)[1]" 25 | Node Predicate(Node node, Node condition, bool reverseStep); 26 | 27 | Node Variable(string prefix, string name); 28 | 29 | Node Function(string prefix, string name, IList args); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /XPathParser.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25123.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "XPathParser", "XPathParser\XPathParser.csproj", "{A8072758-E8D2-4551-89C2-564BDE162403}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "XPathParserTest", "XPathParserTest\XPathParserTest.csproj", "{6706ED44-C0D9-45A0-A911-80B13866E4E2}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {A8072758-E8D2-4551-89C2-564BDE162403}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {A8072758-E8D2-4551-89C2-564BDE162403}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {A8072758-E8D2-4551-89C2-564BDE162403}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {A8072758-E8D2-4551-89C2-564BDE162403}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {6706ED44-C0D9-45A0-A911-80B13866E4E2}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /XPathParserTest/XPathTreeBuilder.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text; 4 | using System.Xml.Linq; 5 | using CodePlex.XPathParser; 6 | 7 | namespace XPathParserTest { 8 | class XPathTreeBuilder : IXPathBuilder { 9 | 10 | public void StartBuild() {} 11 | 12 | public XElement EndBuild(XElement result) { 13 | return result; 14 | } 15 | 16 | public XElement String(string value) { 17 | return new XElement("string", new XAttribute("value", value)); 18 | } 19 | 20 | public XElement Number(string value) { 21 | return new XElement("number", new XAttribute("value", value)); 22 | } 23 | 24 | public XElement Operator(XPathOperator op, XElement left, XElement right) { 25 | if (op == XPathOperator.UnaryMinus) { 26 | return new XElement("negate", left); 27 | } 28 | return new XElement(op.ToString(), left, right); 29 | } 30 | 31 | public XElement Axis(XPathAxis xpathAxis, System.Xml.XPath.XPathNodeType nodeType, string prefix, string name) { 32 | return new XElement(xpathAxis.ToString(), 33 | new XAttribute("nodeTyepe", nodeType.ToString()), 34 | new XAttribute("prefix" , prefix??"(null)"), 35 | new XAttribute("name" , name??"(null)") 36 | ); 37 | } 38 | 39 | public XElement JoinStep(XElement left, XElement right) { 40 | return new XElement("step", left, right); 41 | } 42 | 43 | public XElement Predicate(XElement node, XElement condition, bool reverseStep) { 44 | return new XElement("predicate", new XAttribute("reverse", reverseStep), 45 | node, condition 46 | ); 47 | } 48 | 49 | public XElement Variable(string prefix, string name) { 50 | return new XElement("variable", 51 | new XAttribute("prefix", prefix ?? "(null)"), 52 | new XAttribute("name", name ?? "(null)") 53 | ); 54 | } 55 | 56 | public XElement Function(string prefix, string name, IList args) { 57 | XElement xe = new XElement("variable", 58 | new XAttribute("prefix", prefix ?? "(null)"), 59 | new XAttribute("name", name ?? "(null)") 60 | ); 61 | foreach (XElement e in args) { 62 | xe.Add(e); 63 | } 64 | return xe; 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Microsoft Public License (Ms-PL) 2 | 3 | This license governs use of the accompanying software. If you use the software, you accept this license. If you do not accept the license, do not use the software. 4 | 5 | 1. Definitions 6 | 7 | The terms "reproduce," "reproduction," "derivative works," and "distribution" have the same meaning here as under U.S. copyright law. 8 | 9 | A "contribution" is the original software, or any additions or changes to the software. 10 | 11 | A "contributor" is any person that distributes its contribution under this license. 12 | 13 | "Licensed patents" are a contributor's patent claims that read directly on its contribution. 14 | 15 | 2. Grant of Rights 16 | 17 | (A) Copyright Grant- Subject to the terms of this license, including the license conditions and limitations in section 3, each contributor grants you a non-exclusive, worldwide, royalty-free copyright license to reproduce its contribution, prepare derivative works of its contribution, and distribute its contribution or any derivative works that you create. 18 | 19 | (B) Patent Grant- Subject to the terms of this license, including the license conditions and limitations in section 3, each contributor grants you a non-exclusive, worldwide, royalty-free license under its licensed patents to make, have made, use, sell, offer for sale, import, and/or otherwise dispose of its contribution in the software or derivative works of the contribution in the software. 20 | 21 | 3. Conditions and Limitations 22 | 23 | (A) No Trademark License- This license does not grant you rights to use any contributors' name, logo, or trademarks. 24 | 25 | (B) If you bring a patent claim against any contributor over patents that you claim are infringed by the software, your patent license from such contributor to the software ends automatically. 26 | 27 | (C) If you distribute any portion of the software, you must retain all copyright, patent, trademark, and attribution notices that are present in the software. 28 | 29 | (D) If you distribute any portion of the software in source code form, you may do so only under this license by including a complete copy of this license with your distribution. If you distribute any portion of the software in compiled or object code form, you may only do so under a license that complies with this license. 30 | 31 | (E) The software is licensed "as-is." You bear the risk of using it. The contributors give no express warranties, guarantees or conditions. You may have additional consumer rights under your local laws which this license cannot change. To the extent permitted under your local laws, the contributors exclude the implied warranties of merchantability, fitness for a particular purpose and non-infringement. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # XPathParser: XPath parser in C# source code. 2 | [![Build status](https://ci.appveyor.com/api/projects/status/7lupdjmn2tftttk1?svg=true)](https://ci.appveyor.com/project/qmfrederik/xpathparser) 3 | 4 | Close to one System.Xml uses in the XslCompiledTransform. 5 | Currently supports XPath 1.0 grammar. 6 | 7 | This implementation uses _Builder_ pattern to separate parsing code from result the parser should produce. 8 | 9 | While parsing the source XPath parser calls method in the methods of `IXPathBuilder` interface provided by caller. 10 | 11 | User of this code expected to write his own `IXPathBuilder` implementation. 12 | Source code of this project contains two `IXPathBuilder` implementations for demo/testing purpose: 13 | * `XPathTreeBuilder` - constructs XLinq tree that represents XPath syntax tree. 14 | * `XPathStringBuilder` - compiles syntax tree back to XPaht string. 15 | 16 | ## Installation 17 | 18 | Install using the command line: 19 | 20 | ``` 21 | Install-Package XPathParser 22 | ``` 23 | 24 | ## Architecture 25 | XPath is a language to query data from XML documents. It is built in to XSLT and can be used standalone in several .NET APIs. (http://www.w3.org/TR/xpath) 26 | In some cases customers need to parse XPath expressions themselves to analyze, modify or validate them. 27 | `XPathParser` is the class that can help you doing this. 28 | 29 | To make parser extensible `XPathParser` uses "Builder" pattern. It takes string with XPath expression and instance of `IXPathBuilder` interface (builder) as input and generates set of calls to the builder. 30 | 31 | This way implementation of the `XPathParser` doesn't dictate how parsed expression would be represented. 32 | 33 | With the `XPathParser` we provide `XPathParserTest` that contains two sample implements of the `IXPathBuilder`: `XPathTreeBuilder` which builds XLinq tree as a result of parsing and `XPathStringBuilder` that generates string implementation of the compiled XPath. 34 | 35 | The work of `XPathParser` can be demonstrated in the following diagram: 36 | 37 | ![Architecture](architecture.png) 38 | 39 | ## Examples 40 | 41 | ### Expression "1 + 2": 42 | ``` 43 | ctx = StartBuild(); 44 | return EndBuild(Operator(XPathOperator.Plus, Number(1), Number(2))) 45 | ``` 46 | 47 | ### Expression "a/@*": 48 | 49 | ``` 50 | ctx = StartBuild(); 51 | stp1 = Axis(ctx, XPathAxis.Child, QilXmlNodeKind.Element, "", "a"); 52 | stp2 = Axis(stp1, XPathAxis.Attribute, QilXmlNodeKind.Attribute, "", ""); 53 | return EndBuild(JoinStep(stp1, stp2)); 54 | ``` 55 | 56 | ### Expression "parent:a[@b]": 57 | 58 | ctx = StartBuild(); 59 | stp1 = Axis(ctx, XPathAxis.Parent, QilXmlNodeKind.Element, "", "a"); 60 | stp2 = Axis(stp1, XPathAxis.Attribute, QilXmlNodeKind.Attribute, "", "b"); 61 | return EndBuild(Predicate(stp1, stp2)); 62 | 63 | ## Credits 64 | This repository was forked from http://xpathparser.codeplex.com/ -------------------------------------------------------------------------------- /XPathParser/XPathParserException.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Text; 3 | 4 | namespace CodePlex.XPathParser 5 | { 6 | public class XPathParserException : System.Exception { 7 | public string queryString; 8 | public int startChar; 9 | public int endChar; 10 | 11 | public XPathParserException(string queryString, int startChar, int endChar, string message) : base(message) { 12 | this.queryString = queryString; 13 | this.startChar = startChar; 14 | this.endChar = endChar; 15 | } 16 | 17 | private enum TrimType { 18 | Left, 19 | Right, 20 | Middle, 21 | } 22 | 23 | // This function is used to prevent long quotations in error messages 24 | private static void AppendTrimmed(StringBuilder sb, string value, int startIndex, int count, TrimType trimType) { 25 | const int TrimSize = 32; 26 | const string TrimMarker = "..."; 27 | 28 | if (count <= TrimSize) { 29 | sb.Append(value, startIndex, count); 30 | } else { 31 | switch (trimType) { 32 | case TrimType.Left: 33 | sb.Append(TrimMarker); 34 | sb.Append(value, startIndex + count - TrimSize, TrimSize); 35 | break; 36 | case TrimType.Right: 37 | sb.Append(value, startIndex, TrimSize); 38 | sb.Append(TrimMarker); 39 | break; 40 | case TrimType.Middle: 41 | sb.Append(value, startIndex, TrimSize / 2); 42 | sb.Append(TrimMarker); 43 | sb.Append(value, startIndex + count - TrimSize / 2, TrimSize / 2); 44 | break; 45 | } 46 | } 47 | } 48 | 49 | internal string MarkOutError() { 50 | if (queryString == null || queryString.Trim(' ').Length == 0) { 51 | return null; 52 | } 53 | 54 | int len = endChar - startChar; 55 | StringBuilder sb = new StringBuilder(); 56 | 57 | AppendTrimmed(sb, queryString, 0, startChar, TrimType.Left); 58 | if (len > 0) { 59 | sb.Append(" -->"); 60 | AppendTrimmed(sb, queryString, startChar, len, TrimType.Middle); 61 | } 62 | 63 | sb.Append("<-- "); 64 | AppendTrimmed(sb, queryString, endChar, queryString.Length - endChar, TrimType.Right); 65 | 66 | return sb.ToString(); 67 | } 68 | 69 | 70 | private string FormatDetailedMessage() { 71 | string message = Message; 72 | string error = MarkOutError(); 73 | 74 | if (error != null && error.Length > 0) { 75 | if (message.Length > 0) { 76 | message += Environment.NewLine; 77 | } 78 | message += error; 79 | } 80 | return message; 81 | } 82 | 83 | public override string ToString() { 84 | string result = this.GetType().FullName; 85 | string info = FormatDetailedMessage(); 86 | if (info != null && info.Length > 0) { 87 | result += ": " + info; 88 | } 89 | if (StackTrace != null) { 90 | result += Environment.NewLine + StackTrace; 91 | } 92 | return result; 93 | } 94 | 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /XPathParserTest/Test.cs: -------------------------------------------------------------------------------- 1 | using CodePlex.XPathParser; 2 | using System; 3 | using System.Diagnostics; 4 | using System.Xml; 5 | using System.Xml.Linq; 6 | using Xunit; 7 | 8 | namespace XPathParserTest 9 | { 10 | public class Test 11 | { 12 | // Expressions from http://www.w3.org/TR/xpath#location-paths 13 | [InlineData(@"child::para")] 14 | [InlineData(@"child::*")] 15 | [InlineData(@"child::text()")] 16 | [InlineData(@"child::node()")] 17 | [InlineData(@"attribute::name")] 18 | [InlineData(@"attribute::*")] 19 | [InlineData(@"descendant::para")] 20 | [InlineData(@"ancestor::div")] 21 | [InlineData(@"ancestor-or-self::div")] 22 | [InlineData(@"descendant-or-self::para")] 23 | [InlineData(@"self::para")] 24 | [InlineData(@"child::chapter/descendant::para")] 25 | [InlineData(@"child::*/child::para")] 26 | [InlineData(@"/")] 27 | [InlineData(@"/descendant::para")] 28 | [InlineData(@"/descendant::olist/child::item")] 29 | [InlineData(@"child::para[position()=1]")] 30 | [InlineData(@"child::para[position()=last()]")] 31 | [InlineData(@"child::para[position()=last()-1]")] 32 | [InlineData(@"child::para[position()>1]")] 33 | [InlineData(@"following-sibling::chapter[position()=1]")] 34 | [InlineData(@"preceding-sibling::chapter[position()=1]")] 35 | [InlineData(@"/descendant::figure[position()=42]")] 36 | [InlineData(@"/child::doc/child::chapter[position()=5]/child::section[position()=2]")] 37 | [InlineData(@"child::para[attribute::type=""warning""]")] 38 | [InlineData(@"child::para[attribute::type='warning'][position()=5]")] 39 | [InlineData(@"child::para[position()=5][attribute::type=""warning""]")] 40 | [InlineData(@"child::chapter[child::title='Introduction']")] 41 | [InlineData(@"child::chapter[child::title]")] 42 | [InlineData(@"child::*[self::chapter or self::appendix]")] 43 | [InlineData(@"child::*[self::chapter or self::appendix][position()=last()]")] 44 | [Theory] 45 | public void CorrectTest(string expression) 46 | { 47 | RunTestString(expression); 48 | RunTestTree(expression); 49 | } 50 | 51 | [InlineData(@"")] 52 | [InlineData(@"a b")] 53 | [InlineData(@"a[")] 54 | [InlineData(@"]")] 55 | [InlineData(@"///")] 56 | [InlineData(@"fo(")] 57 | [InlineData(@")")] 58 | [InlineData(@"a[']")] 59 | [InlineData(@"b[""]")] 60 | [InlineData(@"3e8")] 61 | [InlineData(@"child::*[self::chapter or self::appendix][position()=last()] child::*[self::chapter or self::appendix][position()=last()]")] 62 | [Theory] 63 | public void ErrorTest(string expression) 64 | { 65 | Assert.Throws(() => RunTestTree(expression)); 66 | } 67 | 68 | static void RunTestString(string xpathExpr) 69 | { 70 | Debug.WriteLine("Translated one: {0}", new XPathParser().Parse(xpathExpr, new XPathStringBuilder())); 71 | } 72 | 73 | static void RunTestTree(string xpathExpr) 74 | { 75 | XElement xe = new XPathParser().Parse(xpathExpr, new XPathTreeBuilder()); 76 | XmlWriterSettings ws = new XmlWriterSettings(); 77 | { 78 | ws.Indent = true; 79 | ws.OmitXmlDeclaration = true; 80 | } 81 | using (XmlWriter w = XmlWriter.Create(Console.Out, ws)) 82 | { 83 | xe.WriteTo(w); 84 | } 85 | } 86 | } 87 | } 88 | 89 | -------------------------------------------------------------------------------- /XPathParserTest/XPathStringBuilder.cs: -------------------------------------------------------------------------------- 1 | using CodePlex.XPathParser; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Diagnostics; 5 | using System.Xml.XPath; 6 | 7 | namespace XPathParserTest 8 | { 9 | class XPathStringBuilder : IXPathBuilder { 10 | #region IXPathBuilder Members 11 | 12 | public void StartBuild() { } 13 | 14 | public string EndBuild(string result) { 15 | return result; 16 | } 17 | 18 | public string String(string value) { 19 | return "'" + value + "'"; 20 | } 21 | 22 | public string Number(string value) { 23 | return value; 24 | } 25 | 26 | public string Operator(XPathOperator op, string left, string right) { 27 | Debug.Assert(op != XPathOperator.Union); 28 | if (op == XPathOperator.UnaryMinus) { 29 | return "-" + left; 30 | } 31 | return left + opStrings[(int)op] + right; 32 | } 33 | 34 | public string Axis(XPathAxis xpathAxis, XPathNodeType nodeType, string prefix, string name) { 35 | string nodeTest; 36 | switch (nodeType) { 37 | case XPathNodeType.ProcessingInstruction: 38 | Debug.Assert(prefix == ""); 39 | nodeTest = "processing-instruction(" + name + ")"; 40 | break; 41 | case XPathNodeType.Text: 42 | Debug.Assert(prefix == null && name == null); 43 | nodeTest = "text()"; 44 | break; 45 | case XPathNodeType.Comment: 46 | Debug.Assert(prefix == null && name == null); 47 | nodeTest = "comment()"; 48 | break; 49 | case XPathNodeType.All: 50 | nodeTest = "node()"; 51 | break; 52 | case XPathNodeType.Attribute: 53 | case XPathNodeType.Element: 54 | case XPathNodeType.Namespace: 55 | nodeTest = QNameOrWildcard(prefix, name); 56 | break; 57 | default: 58 | throw new ArgumentException("unexpected XPathNodeType", "XPathNodeType"); 59 | } 60 | return axisStrings[(int)xpathAxis] + nodeTest; 61 | } 62 | 63 | public string JoinStep(string left, string right) { 64 | return left + '/' + right; 65 | } 66 | 67 | public string Predicate(string node, string condition, bool reverseStep) { 68 | if (!reverseStep) { 69 | // In this method we don't know how axis was represented in original XPath and the only 70 | // difference between ancestor::*[2] and (ancestor::*)[2] is the reverseStep parameter. 71 | // to not store the axis from previous builder events we simply wrap node in the () here. 72 | node = '(' + node + ')'; 73 | } 74 | return node + '[' + condition + ']'; 75 | } 76 | 77 | public string Variable(string prefix, string name) { 78 | return '$' + QName(prefix, name); 79 | } 80 | 81 | public string Function(string prefix, string name, IList args) { 82 | string result = QName(prefix, name) + '('; 83 | for (int i = 0; i < args.Count; i++) { 84 | if (i != 0) { 85 | result += ','; 86 | } 87 | result += args[i]; 88 | } 89 | result += ')'; 90 | return result; 91 | } 92 | 93 | private static string QName(string prefix, string localName) { 94 | if (prefix == null) { 95 | throw new ArgumentNullException("prefix"); 96 | } 97 | if (localName == null) { 98 | throw new ArgumentNullException("localName"); 99 | } 100 | return prefix == "" ? localName : prefix + ':' + localName; 101 | } 102 | 103 | private static string QNameOrWildcard(string prefix, string localName) { 104 | if (prefix == null) { 105 | Debug.Assert(localName == null); 106 | return "*"; 107 | } 108 | if (localName == null) { 109 | Debug.Assert(prefix != ""); 110 | return prefix + ":*"; 111 | } 112 | return prefix == "" ? localName : prefix + ':' + localName; 113 | } 114 | 115 | #endregion 116 | 117 | string[] opStrings = { 118 | /* Unknown */ " Unknown ", 119 | /* Or */ " or " , 120 | /* And */ " and ", 121 | /* Eq */ "=" , 122 | /* Ne */ "!=" , 123 | /* Lt */ "<" , 124 | /* Le */ "<=" , 125 | /* Gt */ ">" , 126 | /* Ge */ ">=" , 127 | /* Plus */ "+" , 128 | /* Minus */ "-" , 129 | /* Multiply */ "*" , 130 | /* Divide */ " div ", 131 | /* Modulo */ " mod ", 132 | /* UnaryMinus */ "-" , 133 | /* Union */ "|" 134 | }; 135 | 136 | string[] axisStrings = { 137 | /*Unknown */ "Unknown::" , 138 | /*Ancestor */ "ancestor::" , 139 | /*AncestorOrSelf */ "ancestor-or-self::" , 140 | /*Attribute */ "attribute::" , 141 | /*Child */ "child::" , 142 | /*Descendant */ "descendant::" , 143 | /*DescendantOrSelf */ "descendant-or-self::", 144 | /*Following */ "following::" , 145 | /*FollowingSibling */ "following-sibling::" , 146 | /*Namespace */ "namespace::" , 147 | /*Parent */ "parent::" , 148 | /*Preceding */ "preceding::" , 149 | /*PrecedingSibling */ "preceding-sibling::" , 150 | /*Self */ "self::" , 151 | /*Root */ "root::" , 152 | }; 153 | } 154 | } 155 | 156 | -------------------------------------------------------------------------------- /XPathParser/XPathScanner.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | using System.Text.RegularExpressions; 3 | 4 | namespace CodePlex.XPathParser { 5 | // Extends XPathOperator enumeration 6 | internal enum LexKind { 7 | Unknown, // Unknown lexeme 8 | Or, // Operator 'or' 9 | And, // Operator 'and' 10 | Eq, // Operator '=' 11 | Ne, // Operator '!=' 12 | Lt, // Operator '<' 13 | Le, // Operator '<=' 14 | Gt, // Operator '>' 15 | Ge, // Operator '>=' 16 | Plus, // Operator '+' 17 | Minus, // Operator '-' 18 | Multiply, // Operator '*' 19 | Divide, // Operator 'div' 20 | Modulo, // Operator 'mod' 21 | UnaryMinus, // Not used 22 | Union, // Operator '|' 23 | LastOperator = Union, 24 | 25 | DotDot, // '..' 26 | ColonColon, // '::' 27 | SlashSlash, // Operator '//' 28 | Number, // Number (numeric literal) 29 | Axis, // AxisName 30 | 31 | Name, // NameTest, NodeType, FunctionName, AxisName, second part of VariableReference 32 | String, // Literal (string literal) 33 | Eof, // End of the expression 34 | 35 | FirstStringable = Name, 36 | LastNonChar = Eof, 37 | 38 | LParens = '(', 39 | RParens = ')', 40 | LBracket = '[', 41 | RBracket = ']', 42 | Dot = '.', 43 | At = '@', 44 | Comma = ',', 45 | 46 | Star = '*', // NameTest 47 | Slash = '/', // Operator '/' 48 | Dollar = '$', // First part of VariableReference 49 | RBrace = '}', // Used for AVTs 50 | }; 51 | 52 | internal sealed class XPathScanner { 53 | private string xpathExpr; 54 | private int curIndex; 55 | private char curChar; 56 | private LexKind kind; 57 | private string name; 58 | private string prefix; 59 | private string stringValue; 60 | private bool canBeFunction; 61 | private int lexStart; 62 | private int prevLexEnd; 63 | private LexKind prevKind; 64 | private XPathAxis axis; 65 | 66 | public XPathScanner(string xpathExpr) : this(xpathExpr, 0) {} 67 | 68 | public XPathScanner(string xpathExpr, int startFrom) { 69 | Debug.Assert(xpathExpr != null); 70 | this.xpathExpr = xpathExpr; 71 | this.kind = LexKind.Unknown; 72 | SetSourceIndex(startFrom); 73 | NextLex(); 74 | } 75 | 76 | public string Source { get { return xpathExpr; } } 77 | public LexKind Kind { get { return kind; } } 78 | public int LexStart { get { return lexStart; } } 79 | public int LexSize { get { return curIndex - lexStart; } } 80 | public int PrevLexEnd { get { return prevLexEnd; } } 81 | 82 | private void SetSourceIndex(int index) { 83 | Debug.Assert(0 <= index && index <= xpathExpr.Length); 84 | curIndex = index - 1; 85 | NextChar(); 86 | } 87 | 88 | private void NextChar() { 89 | Debug.Assert(-1 <= curIndex && curIndex < xpathExpr.Length); 90 | curIndex++; 91 | if (curIndex < xpathExpr.Length) { 92 | curChar = xpathExpr[curIndex]; 93 | } else { 94 | Debug.Assert(curIndex == xpathExpr.Length); 95 | curChar = '\0'; 96 | } 97 | } 98 | 99 | public string Name { 100 | get { 101 | Debug.Assert(kind == LexKind.Name); 102 | Debug.Assert(name != null); 103 | return name; 104 | } 105 | } 106 | 107 | public string Prefix { 108 | get { 109 | Debug.Assert(kind == LexKind.Name); 110 | Debug.Assert(prefix != null); 111 | return prefix; 112 | } 113 | } 114 | 115 | public string RawValue { 116 | get { 117 | if (kind == LexKind.Eof) { 118 | return LexKindToString(kind); 119 | } else { 120 | return xpathExpr.Substring(lexStart, curIndex - lexStart); 121 | } 122 | } 123 | } 124 | 125 | public string StringValue { 126 | get { 127 | Debug.Assert(kind == LexKind.String); 128 | Debug.Assert(stringValue != null); 129 | return stringValue; 130 | } 131 | } 132 | 133 | // Returns true if the character following an QName (possibly after intervening 134 | // ExprWhitespace) is '('. In this case the token must be recognized as a NodeType 135 | // or a FunctionName unless it is an OperatorName. This distinction cannot be done 136 | // without knowing the previous lexeme. For example, "or" in "... or (1 != 0)" may 137 | // be an OperatorName or a FunctionName. 138 | public bool CanBeFunction { 139 | get { 140 | Debug.Assert(kind == LexKind.Name); 141 | return canBeFunction; 142 | } 143 | } 144 | 145 | public XPathAxis Axis { 146 | get { 147 | Debug.Assert(kind == LexKind.Axis); 148 | Debug.Assert(axis != XPathAxis.Unknown); 149 | return axis; 150 | } 151 | } 152 | 153 | private void SkipSpace() { 154 | while (IsWhiteSpace(curChar)) { 155 | NextChar(); 156 | } 157 | } 158 | 159 | private static bool IsAsciiDigit(char ch) { 160 | return (uint)(ch - '0') <= 9; 161 | } 162 | 163 | public static bool IsWhiteSpace(char ch) { 164 | return ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'); 165 | } 166 | 167 | public void NextLex() { 168 | prevLexEnd = curIndex; 169 | prevKind = kind; 170 | SkipSpace(); 171 | lexStart = curIndex; 172 | 173 | switch (curChar) { 174 | case '\0': 175 | kind = LexKind.Eof; 176 | return; 177 | case '(': case ')': case '[': case ']': 178 | case '@': case ',': case '$': case '}': 179 | kind = (LexKind)curChar; 180 | NextChar(); 181 | break; 182 | case '.': 183 | NextChar(); 184 | if (curChar == '.') { 185 | kind = LexKind.DotDot; 186 | NextChar(); 187 | } else if (IsAsciiDigit(curChar)) { 188 | SetSourceIndex(lexStart); 189 | goto case '0'; 190 | } else { 191 | kind = LexKind.Dot; 192 | } 193 | break; 194 | case ':': 195 | NextChar(); 196 | if (curChar == ':') { 197 | kind = LexKind.ColonColon; 198 | NextChar(); 199 | } else { 200 | kind = LexKind.Unknown; 201 | } 202 | break; 203 | case '*': 204 | kind = LexKind.Star; 205 | NextChar(); 206 | CheckOperator(true); 207 | break; 208 | case '/': 209 | NextChar(); 210 | if (curChar == '/') { 211 | kind = LexKind.SlashSlash; 212 | NextChar(); 213 | } else { 214 | kind = LexKind.Slash; 215 | } 216 | break; 217 | case '|': 218 | kind = LexKind.Union; 219 | NextChar(); 220 | break; 221 | case '+': 222 | kind = LexKind.Plus; 223 | NextChar(); 224 | break; 225 | case '-': 226 | kind = LexKind.Minus; 227 | NextChar(); 228 | break; 229 | case '=': 230 | kind = LexKind.Eq; 231 | NextChar(); 232 | break; 233 | case '!': 234 | NextChar(); 235 | if (curChar == '=') { 236 | kind = LexKind.Ne; 237 | NextChar(); 238 | } else { 239 | kind = LexKind.Unknown; 240 | } 241 | break; 242 | case '<': 243 | NextChar(); 244 | if (curChar == '=') { 245 | kind = LexKind.Le; 246 | NextChar(); 247 | } else { 248 | kind = LexKind.Lt; 249 | } 250 | break; 251 | case '>': 252 | NextChar(); 253 | if (curChar == '=') { 254 | kind = LexKind.Ge; 255 | NextChar(); 256 | } else { 257 | kind = LexKind.Gt; 258 | } 259 | break; 260 | case '"': 261 | case '\'': 262 | kind = LexKind.String; 263 | ScanString(); 264 | break; 265 | case '0': case '1': case '2': case '3': 266 | case '4': case '5': case '6': case '7': 267 | case '8': case '9': 268 | kind = LexKind.Number; 269 | ScanNumber(); 270 | break; 271 | default: 272 | this.name = ScanNCName(); 273 | if (this.name != null) { 274 | kind = LexKind.Name; 275 | this.prefix = string.Empty; 276 | this.canBeFunction = false; 277 | this.axis = XPathAxis.Unknown; 278 | bool colonColon = false; 279 | int saveSourceIndex = curIndex; 280 | 281 | // "foo:bar" or "foo:*" -- one lexeme (no spaces allowed) 282 | // "foo::" or "foo ::" -- two lexemes, reported as one (AxisName) 283 | // "foo:?" or "foo :?" -- lexeme "foo" reported 284 | if (curChar == ':') { 285 | NextChar(); 286 | if (curChar == ':') { // "foo::" -> OperatorName, AxisName 287 | NextChar(); 288 | colonColon = true; 289 | SetSourceIndex(saveSourceIndex); 290 | } else { // "foo:bar", "foo:*" or "foo:?" 291 | string ncName = ScanNCName(); 292 | if (ncName != null) { 293 | this.prefix = this.name; 294 | this.name = ncName; 295 | // Look ahead for '(' to determine whether QName can be a FunctionName 296 | saveSourceIndex = curIndex; 297 | SkipSpace(); 298 | this.canBeFunction = (curChar == '('); 299 | SetSourceIndex(saveSourceIndex); 300 | } else if (curChar == '*') { 301 | NextChar(); 302 | this.prefix = this.name; 303 | this.name = "*"; 304 | } else { // "foo:?" -> OperatorName, NameTest 305 | // Return "foo" and leave ":" to be reported later as an unknown lexeme 306 | SetSourceIndex(saveSourceIndex); 307 | } 308 | } 309 | } else { 310 | SkipSpace(); 311 | if (curChar == ':') { // "foo ::" or "foo :?" 312 | NextChar(); 313 | if (curChar == ':') { 314 | NextChar(); 315 | colonColon = true; 316 | } 317 | SetSourceIndex(saveSourceIndex); 318 | } else { 319 | this.canBeFunction = (curChar == '('); 320 | } 321 | } 322 | if (!CheckOperator(false) && colonColon) { 323 | this.axis = CheckAxis(); 324 | } 325 | } else { 326 | kind = LexKind.Unknown; 327 | NextChar(); 328 | } 329 | break; 330 | } 331 | } 332 | 333 | private bool CheckOperator(bool star) { 334 | LexKind opKind; 335 | 336 | if (star) { 337 | opKind = LexKind.Multiply; 338 | } else { 339 | if (prefix.Length != 0 || name.Length > 3) 340 | return false; 341 | 342 | switch (name) { 343 | case "or" : opKind = LexKind.Or; break; 344 | case "and": opKind = LexKind.And; break; 345 | case "div": opKind = LexKind.Divide; break; 346 | case "mod": opKind = LexKind.Modulo; break; 347 | default : return false; 348 | } 349 | } 350 | 351 | // If there is a preceding token and the preceding token is not one of '@', '::', '(', '[', ',' or an Operator, 352 | // then a '*' must be recognized as a MultiplyOperator and an NCName must be recognized as an OperatorName. 353 | if (prevKind <= LexKind.LastOperator) 354 | return false; 355 | 356 | switch (prevKind) { 357 | case LexKind.Slash: 358 | case LexKind.SlashSlash: 359 | case LexKind.At: 360 | case LexKind.ColonColon: 361 | case LexKind.LParens: 362 | case LexKind.LBracket: 363 | case LexKind.Comma: 364 | case LexKind.Dollar: 365 | return false; 366 | } 367 | 368 | this.kind = opKind; 369 | return true; 370 | } 371 | 372 | private XPathAxis CheckAxis() { 373 | this.kind = LexKind.Axis; 374 | switch (name) { 375 | case "ancestor" : return XPathAxis.Ancestor; 376 | case "ancestor-or-self" : return XPathAxis.AncestorOrSelf; 377 | case "attribute" : return XPathAxis.Attribute; 378 | case "child" : return XPathAxis.Child; 379 | case "descendant" : return XPathAxis.Descendant; 380 | case "descendant-or-self" : return XPathAxis.DescendantOrSelf; 381 | case "following" : return XPathAxis.Following; 382 | case "following-sibling" : return XPathAxis.FollowingSibling; 383 | case "namespace" : return XPathAxis.Namespace; 384 | case "parent" : return XPathAxis.Parent; 385 | case "preceding" : return XPathAxis.Preceding; 386 | case "preceding-sibling" : return XPathAxis.PrecedingSibling; 387 | case "self" : return XPathAxis.Self; 388 | default : 389 | this.kind = LexKind.Name; 390 | return XPathAxis.Unknown; 391 | } 392 | } 393 | 394 | private void ScanNumber() { 395 | Debug.Assert(IsAsciiDigit(curChar) || curChar == '.'); 396 | while (IsAsciiDigit(curChar)) { 397 | NextChar(); 398 | } 399 | if (curChar == '.') { 400 | NextChar(); 401 | while (IsAsciiDigit(curChar)) { 402 | NextChar(); 403 | } 404 | } 405 | if ((curChar & (~0x20)) == 'E') { 406 | NextChar(); 407 | if (curChar == '+' || curChar == '-') { 408 | NextChar(); 409 | } 410 | while (IsAsciiDigit(curChar)) { 411 | NextChar(); 412 | } 413 | throw ScientificNotationException(); 414 | } 415 | } 416 | 417 | private void ScanString() { 418 | int startIdx = curIndex + 1; 419 | int endIdx = xpathExpr.IndexOf(curChar, startIdx); 420 | 421 | if (endIdx < 0) { 422 | SetSourceIndex(xpathExpr.Length); 423 | throw UnclosedStringException(); 424 | } 425 | 426 | this.stringValue = xpathExpr.Substring(startIdx, endIdx - startIdx); 427 | SetSourceIndex(endIdx + 1); 428 | } 429 | 430 | static Regex re = new Regex(@"\p{_xmlI}[\p{_xmlC}-[:]]*", RegexOptions.Compiled); 431 | 432 | private string ScanNCName() { 433 | Match m = re.Match(xpathExpr, curIndex); 434 | if (m.Success) { 435 | curIndex += m.Length - 1; 436 | NextChar(); 437 | return m.Value; 438 | } 439 | return null; 440 | } 441 | 442 | public void PassToken(LexKind t) { 443 | CheckToken(t); 444 | NextLex(); 445 | } 446 | 447 | public void CheckToken(LexKind t) { 448 | Debug.Assert(LexKind.FirstStringable <= t); 449 | if (kind != t) { 450 | if (t == LexKind.Eof) { 451 | throw EofExpectedException(RawValue); 452 | } else { 453 | throw TokenExpectedException(LexKindToString(t), RawValue); 454 | } 455 | } 456 | } 457 | 458 | // May be called for the following tokens: Name, String, Eof, Comma, LParens, RParens, LBracket, RBracket, RBrace 459 | private string LexKindToString(LexKind t) { 460 | Debug.Assert(LexKind.FirstStringable <= t); 461 | 462 | if (LexKind.LastNonChar < t) { 463 | Debug.Assert("()[].@,*/$}".IndexOf((char)t) >= 0); 464 | return new string((char)t, 1); 465 | } 466 | 467 | switch (t) { 468 | case LexKind.Name : return ""; 469 | case LexKind.String : return ""; 470 | case LexKind.Eof : return ""; 471 | default: 472 | Debug.Fail("Unexpected LexKind: " + t.ToString()); 473 | return string.Empty; 474 | } 475 | } 476 | 477 | // XPath error messages 478 | // -------------------- 479 | 480 | public XPathParserException UnexpectedTokenException(string token) { 481 | return new XPathParserException(xpathExpr, lexStart, curIndex, 482 | string.Format("Unexpected token '{0}' in the expression.", token) 483 | ); 484 | } 485 | public XPathParserException NodeTestExpectedException(string token) { 486 | return new XPathParserException(xpathExpr, lexStart, curIndex, 487 | string.Format("Expected a node test, found '{0}'.", token) 488 | ); 489 | } 490 | public XPathParserException PredicateAfterDotException() { 491 | return new XPathParserException(xpathExpr, lexStart, curIndex, 492 | "Abbreviated step '.' cannot be followed by a predicate. Use the full form 'self::node()[predicate]' instead." 493 | ); 494 | } 495 | public XPathParserException PredicateAfterDotDotException() { 496 | return new XPathParserException(xpathExpr, lexStart, curIndex, 497 | "Abbreviated step '..' cannot be followed by a predicate. Use the full form 'parent::node()[predicate]' instead." 498 | ); 499 | } 500 | public XPathParserException ScientificNotationException() { 501 | return new XPathParserException(xpathExpr, lexStart, curIndex, 502 | "Scientific notation is not allowed." 503 | ); 504 | } 505 | public XPathParserException UnclosedStringException() { 506 | return new XPathParserException(xpathExpr, lexStart, curIndex, 507 | "String literal was not closed." 508 | ); 509 | } 510 | public XPathParserException EofExpectedException(string token) { 511 | return new XPathParserException(xpathExpr, lexStart, curIndex, 512 | string.Format("Expected end of the expression, found '{0}'.", token) 513 | ); 514 | } 515 | public XPathParserException TokenExpectedException(string expectedToken, string actualToken) { 516 | return new XPathParserException(xpathExpr, lexStart, curIndex, 517 | string.Format("Expected token '{0}', found '{1}'.", expectedToken, actualToken) 518 | ); 519 | } 520 | } 521 | } 522 | -------------------------------------------------------------------------------- /XPathParser/XPathParser.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Diagnostics; 3 | 4 | namespace CodePlex.XPathParser { 5 | using XPathNodeType = System.Xml.XPath.XPathNodeType; 6 | using System.Globalization; 7 | 8 | public class XPathParser { 9 | private XPathScanner scanner; 10 | private IXPathBuilder builder; 11 | private Stack posInfo = new Stack(); 12 | 13 | // Six possible causes of exceptions in the builder: 14 | // 1. Undefined prefix in a node test. 15 | // 2. Undefined prefix in a variable reference, or unknown variable. 16 | // 3. Undefined prefix in a function call, or unknown function, or wrong number/types of arguments. 17 | // 4. Argument of Union operator is not a node-set. 18 | // 5. First argument of Predicate is not a node-set. 19 | // 6. Argument of Axis is not a node-set. 20 | 21 | public Node Parse(string xpathExpr, IXPathBuilder builder) { 22 | Debug.Assert(this.scanner == null && this.builder == null); 23 | Debug.Assert(builder != null); 24 | 25 | Node result = default(Node); 26 | this.scanner = new XPathScanner(xpathExpr); 27 | this.builder = builder; 28 | this.posInfo.Clear(); 29 | 30 | try { 31 | builder.StartBuild(); 32 | result = ParseExpr(); 33 | scanner.CheckToken(LexKind.Eof); 34 | } 35 | catch (XPathParserException e) { 36 | if (e.queryString == null) { 37 | e.queryString = scanner.Source; 38 | PopPosInfo(out e.startChar, out e.endChar); 39 | } 40 | throw; 41 | } 42 | finally { 43 | result = builder.EndBuild(result); 44 | #if DEBUG 45 | this.builder = null; 46 | this.scanner = null; 47 | #endif 48 | } 49 | Debug.Assert(posInfo.Count == 0, "PushPosInfo() and PopPosInfo() calls have been unbalanced"); 50 | return result; 51 | } 52 | 53 | #region Location paths and node tests 54 | /**************************************************************************************************/ 55 | /* Location paths and node tests */ 56 | /**************************************************************************************************/ 57 | 58 | private static bool IsStep(LexKind lexKind) { 59 | return ( 60 | lexKind == LexKind.Dot || 61 | lexKind == LexKind.DotDot || 62 | lexKind == LexKind.At || 63 | lexKind == LexKind.Axis || 64 | lexKind == LexKind.Star || 65 | lexKind == LexKind.Name // NodeTest is also Name 66 | ); 67 | } 68 | 69 | /* 70 | * LocationPath ::= RelativeLocationPath | '/' RelativeLocationPath? | '//' RelativeLocationPath 71 | */ 72 | private Node ParseLocationPath() { 73 | if (scanner.Kind == LexKind.Slash) { 74 | scanner.NextLex(); 75 | Node opnd = builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null); 76 | 77 | if (IsStep(scanner.Kind)) { 78 | opnd = builder.JoinStep(opnd, ParseRelativeLocationPath()); 79 | } 80 | return opnd; 81 | } else if (scanner.Kind == LexKind.SlashSlash) { 82 | scanner.NextLex(); 83 | return builder.JoinStep( 84 | builder.Axis(XPathAxis.Root, XPathNodeType.All, null, null), 85 | builder.JoinStep( 86 | builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null), 87 | ParseRelativeLocationPath() 88 | ) 89 | ); 90 | } else { 91 | return ParseRelativeLocationPath(); 92 | } 93 | } 94 | 95 | /* 96 | * RelativeLocationPath ::= Step (('/' | '//') Step)* 97 | */ 98 | private Node ParseRelativeLocationPath() { 99 | Node opnd = ParseStep(); 100 | if (scanner.Kind == LexKind.Slash) { 101 | scanner.NextLex(); 102 | opnd = builder.JoinStep(opnd, ParseRelativeLocationPath()); 103 | } else if (scanner.Kind == LexKind.SlashSlash) { 104 | scanner.NextLex(); 105 | opnd = builder.JoinStep(opnd, 106 | builder.JoinStep( 107 | builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null), 108 | ParseRelativeLocationPath() 109 | ) 110 | ); 111 | } 112 | return opnd; 113 | } 114 | 115 | /* 116 | * Step ::= '.' | '..' | (AxisName '::' | '@')? NodeTest Predicate* 117 | */ 118 | private Node ParseStep() { 119 | Node opnd; 120 | if (LexKind.Dot == scanner.Kind) { // '.' 121 | scanner.NextLex(); 122 | opnd = builder.Axis(XPathAxis.Self, XPathNodeType.All, null, null); 123 | if (LexKind.LBracket == scanner.Kind) { 124 | throw scanner.PredicateAfterDotException(); 125 | } 126 | } else if (LexKind.DotDot == scanner.Kind) { // '..' 127 | scanner.NextLex(); 128 | opnd = builder.Axis(XPathAxis.Parent, XPathNodeType.All, null, null); 129 | if (LexKind.LBracket == scanner.Kind) { 130 | throw scanner.PredicateAfterDotDotException(); 131 | } 132 | } else { // (AxisName '::' | '@')? NodeTest Predicate* 133 | XPathAxis axis; 134 | switch (scanner.Kind) { 135 | case LexKind.Axis: // AxisName '::' 136 | axis = scanner.Axis; 137 | scanner.NextLex(); 138 | scanner.NextLex(); 139 | break; 140 | case LexKind.At: // '@' 141 | axis = XPathAxis.Attribute; 142 | scanner.NextLex(); 143 | break; 144 | case LexKind.Name: 145 | case LexKind.Star: 146 | // NodeTest must start with Name or '*' 147 | axis = XPathAxis.Child; 148 | break; 149 | default: 150 | throw scanner.UnexpectedTokenException(scanner.RawValue); 151 | } 152 | 153 | opnd = ParseNodeTest(axis); 154 | 155 | while (LexKind.LBracket == scanner.Kind) { 156 | opnd = builder.Predicate(opnd, ParsePredicate(), IsReverseAxis(axis)); 157 | } 158 | } 159 | return opnd; 160 | } 161 | 162 | private static bool IsReverseAxis(XPathAxis axis) { 163 | return ( 164 | axis == XPathAxis.Ancestor || axis == XPathAxis.Preceding || 165 | axis == XPathAxis.AncestorOrSelf || axis == XPathAxis.PrecedingSibling 166 | ); 167 | } 168 | 169 | /* 170 | * NodeTest ::= NameTest | ('comment' | 'text' | 'node') '(' ')' | 'processing-instruction' '(' Literal? ')' 171 | * NameTest ::= '*' | NCName ':' '*' | QName 172 | */ 173 | private Node ParseNodeTest(XPathAxis axis) { 174 | XPathNodeType nodeType; 175 | string nodePrefix, nodeName; 176 | 177 | int startChar = scanner.LexStart; 178 | InternalParseNodeTest(scanner, axis, out nodeType, out nodePrefix, out nodeName); 179 | PushPosInfo(startChar, scanner.PrevLexEnd); 180 | Node result = builder.Axis(axis, nodeType, nodePrefix, nodeName); 181 | PopPosInfo(); 182 | return result; 183 | } 184 | 185 | private static bool IsNodeType(XPathScanner scanner) { 186 | return scanner.Prefix.Length == 0 && ( 187 | scanner.Name == "node" || 188 | scanner.Name == "text" || 189 | scanner.Name == "processing-instruction" || 190 | scanner.Name == "comment" 191 | ); 192 | } 193 | 194 | private static XPathNodeType PrincipalNodeType(XPathAxis axis) { 195 | return ( 196 | axis == XPathAxis.Attribute ? XPathNodeType.Attribute : 197 | axis == XPathAxis.Namespace ? XPathNodeType.Namespace : 198 | /*else*/ XPathNodeType.Element 199 | ); 200 | } 201 | 202 | private static void InternalParseNodeTest(XPathScanner scanner, XPathAxis axis, out XPathNodeType nodeType, out string nodePrefix, out string nodeName) { 203 | switch (scanner.Kind) { 204 | case LexKind.Name : 205 | if (scanner.CanBeFunction && IsNodeType(scanner)) { 206 | nodePrefix = null; 207 | nodeName = null; 208 | switch (scanner.Name) { 209 | case "comment": nodeType = XPathNodeType.Comment; break; 210 | case "text" : nodeType = XPathNodeType.Text; break; 211 | case "node" : nodeType = XPathNodeType.All; break; 212 | default: 213 | Debug.Assert(scanner.Name == "processing-instruction"); 214 | nodeType = XPathNodeType.ProcessingInstruction; 215 | break; 216 | } 217 | 218 | scanner.NextLex(); 219 | scanner.PassToken(LexKind.LParens); 220 | 221 | if (nodeType == XPathNodeType.ProcessingInstruction) { 222 | if (scanner.Kind != LexKind.RParens) { // 'processing-instruction' '(' Literal ')' 223 | scanner.CheckToken(LexKind.String); 224 | // It is not needed to set nodePrefix here, but for our current implementation 225 | // comparing whole QNames is faster than comparing just local names 226 | nodePrefix = string.Empty; 227 | nodeName = scanner.StringValue; 228 | scanner.NextLex(); 229 | } 230 | } 231 | 232 | scanner.PassToken(LexKind.RParens); 233 | } else { 234 | nodePrefix = scanner.Prefix; 235 | nodeName = scanner.Name; 236 | nodeType = PrincipalNodeType(axis); 237 | scanner.NextLex(); 238 | if (nodeName == "*") { 239 | nodeName = null; 240 | } 241 | } 242 | break; 243 | case LexKind.Star : 244 | nodePrefix = null; 245 | nodeName = null; 246 | nodeType = PrincipalNodeType(axis); 247 | scanner.NextLex(); 248 | break; 249 | default : 250 | throw scanner.NodeTestExpectedException(scanner.RawValue); 251 | } 252 | } 253 | 254 | /* 255 | * Predicate ::= '[' Expr ']' 256 | */ 257 | private Node ParsePredicate() { 258 | scanner.PassToken(LexKind.LBracket); 259 | Node opnd = ParseExpr(); 260 | scanner.PassToken(LexKind.RBracket); 261 | return opnd; 262 | } 263 | #endregion 264 | 265 | #region Expressions 266 | /**************************************************************************************************/ 267 | /* Expressions */ 268 | /**************************************************************************************************/ 269 | 270 | /* 271 | * Expr ::= OrExpr 272 | * OrExpr ::= AndExpr ('or' AndExpr)* 273 | * AndExpr ::= EqualityExpr ('and' EqualityExpr)* 274 | * EqualityExpr ::= RelationalExpr (('=' | '!=') RelationalExpr)* 275 | * RelationalExpr ::= AdditiveExpr (('<' | '>' | '<=' | '>=') AdditiveExpr)* 276 | * AdditiveExpr ::= MultiplicativeExpr (('+' | '-') MultiplicativeExpr)* 277 | * MultiplicativeExpr ::= UnaryExpr (('*' | 'div' | 'mod') UnaryExpr)* 278 | * UnaryExpr ::= ('-')* UnionExpr 279 | */ 280 | private Node ParseExpr() { 281 | return ParseSubExpr(/*callerPrec:*/0); 282 | } 283 | 284 | private Node ParseSubExpr(int callerPrec) { 285 | XPathOperator op; 286 | Node opnd; 287 | 288 | // Check for unary operators 289 | if (scanner.Kind == LexKind.Minus) { 290 | op = XPathOperator.UnaryMinus; 291 | int opPrec = XPathOperatorPrecedence[(int)op]; 292 | scanner.NextLex(); 293 | opnd = builder.Operator(op, ParseSubExpr(opPrec), default(Node)); 294 | } else { 295 | opnd = ParseUnionExpr(); 296 | } 297 | 298 | // Process binary operators 299 | while (true) { 300 | op = (scanner.Kind <= LexKind.LastOperator) ? (XPathOperator)scanner.Kind : XPathOperator.Unknown; 301 | int opPrec = XPathOperatorPrecedence[(int)op]; 302 | if (opPrec <= callerPrec) 303 | return opnd; 304 | 305 | // Operator's precedence is greater than the one of our caller, so process it here 306 | scanner.NextLex(); 307 | opnd = builder.Operator(op, opnd, ParseSubExpr(/*callerPrec:*/opPrec)); 308 | } 309 | } 310 | 311 | private static int[] XPathOperatorPrecedence = { 312 | /*Unknown */ 0, 313 | /*Or */ 1, 314 | /*And */ 2, 315 | /*Eq */ 3, 316 | /*Ne */ 3, 317 | /*Lt */ 4, 318 | /*Le */ 4, 319 | /*Gt */ 4, 320 | /*Ge */ 4, 321 | /*Plus */ 5, 322 | /*Minus */ 5, 323 | /*Multiply */ 6, 324 | /*Divide */ 6, 325 | /*Modulo */ 6, 326 | /*UnaryMinus */ 7, 327 | /*Union */ 8, // Not used 328 | }; 329 | 330 | /* 331 | * UnionExpr ::= PathExpr ('|' PathExpr)* 332 | */ 333 | private Node ParseUnionExpr() { 334 | int startChar = scanner.LexStart; 335 | Node opnd1 = ParsePathExpr(); 336 | 337 | if (scanner.Kind == LexKind.Union) { 338 | PushPosInfo(startChar, scanner.PrevLexEnd); 339 | opnd1 = builder.Operator(XPathOperator.Union, default(Node), opnd1); 340 | PopPosInfo(); 341 | 342 | while (scanner.Kind == LexKind.Union) { 343 | scanner.NextLex(); 344 | startChar = scanner.LexStart; 345 | Node opnd2 = ParsePathExpr(); 346 | PushPosInfo(startChar, scanner.PrevLexEnd); 347 | opnd1 = builder.Operator(XPathOperator.Union, opnd1, opnd2); 348 | PopPosInfo(); 349 | } 350 | } 351 | return opnd1; 352 | } 353 | 354 | /* 355 | * PathExpr ::= LocationPath | FilterExpr (('/' | '//') RelativeLocationPath )? 356 | */ 357 | private Node ParsePathExpr() { 358 | // Here we distinguish FilterExpr from LocationPath - the former starts with PrimaryExpr 359 | if (IsPrimaryExpr()) { 360 | int startChar = scanner.LexStart; 361 | Node opnd = ParseFilterExpr(); 362 | int endChar = scanner.PrevLexEnd; 363 | 364 | if (scanner.Kind == LexKind.Slash) { 365 | scanner.NextLex(); 366 | PushPosInfo(startChar, endChar); 367 | opnd = builder.JoinStep(opnd, ParseRelativeLocationPath()); 368 | PopPosInfo(); 369 | } else if (scanner.Kind == LexKind.SlashSlash) { 370 | scanner.NextLex(); 371 | PushPosInfo(startChar, endChar); 372 | opnd = builder.JoinStep(opnd, 373 | builder.JoinStep( 374 | builder.Axis(XPathAxis.DescendantOrSelf, XPathNodeType.All, null, null), 375 | ParseRelativeLocationPath() 376 | ) 377 | ); 378 | PopPosInfo(); 379 | } 380 | return opnd; 381 | } else { 382 | return ParseLocationPath(); 383 | } 384 | } 385 | 386 | /* 387 | * FilterExpr ::= PrimaryExpr Predicate* 388 | */ 389 | private Node ParseFilterExpr() { 390 | int startChar = scanner.LexStart; 391 | Node opnd = ParsePrimaryExpr(); 392 | int endChar = scanner.PrevLexEnd; 393 | 394 | while (scanner.Kind == LexKind.LBracket) { 395 | PushPosInfo(startChar, endChar); 396 | opnd = builder.Predicate(opnd, ParsePredicate(), /*reverseStep:*/false); 397 | PopPosInfo(); 398 | } 399 | return opnd; 400 | } 401 | 402 | private bool IsPrimaryExpr() { 403 | return ( 404 | scanner.Kind == LexKind.String || 405 | scanner.Kind == LexKind.Number || 406 | scanner.Kind == LexKind.Dollar || 407 | scanner.Kind == LexKind.LParens || 408 | scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner) 409 | ); 410 | } 411 | 412 | /* 413 | * PrimaryExpr ::= Literal | Number | VariableReference | '(' Expr ')' | FunctionCall 414 | */ 415 | private Node ParsePrimaryExpr() { 416 | Debug.Assert(IsPrimaryExpr()); 417 | Node opnd; 418 | switch (scanner.Kind) { 419 | case LexKind.String: 420 | opnd = builder.String(scanner.StringValue); 421 | scanner.NextLex(); 422 | break; 423 | case LexKind.Number: 424 | opnd = builder.Number(scanner.RawValue); 425 | scanner.NextLex(); 426 | break; 427 | case LexKind.Dollar: 428 | int startChar = scanner.LexStart; 429 | scanner.NextLex(); 430 | scanner.CheckToken(LexKind.Name); 431 | PushPosInfo(startChar, scanner.LexStart + scanner.LexSize); 432 | opnd = builder.Variable(scanner.Prefix, scanner.Name); 433 | PopPosInfo(); 434 | scanner.NextLex(); 435 | break; 436 | case LexKind.LParens: 437 | scanner.NextLex(); 438 | opnd = ParseExpr(); 439 | scanner.PassToken(LexKind.RParens); 440 | break; 441 | default: 442 | Debug.Assert( 443 | scanner.Kind == LexKind.Name && scanner.CanBeFunction && !IsNodeType(scanner), 444 | "IsPrimaryExpr() returned true, but the lexeme is not recognized" 445 | ); 446 | opnd = ParseFunctionCall(); 447 | break; 448 | } 449 | return opnd; 450 | } 451 | 452 | /* 453 | * FunctionCall ::= FunctionName '(' (Expr (',' Expr)* )? ')' 454 | */ 455 | private Node ParseFunctionCall() { 456 | List argList = new List(); 457 | string name = scanner.Name; 458 | string prefix = scanner.Prefix; 459 | int startChar = scanner.LexStart; 460 | 461 | scanner.PassToken(LexKind.Name); 462 | scanner.PassToken(LexKind.LParens); 463 | 464 | if (scanner.Kind != LexKind.RParens) { 465 | while (true) { 466 | argList.Add(ParseExpr()); 467 | if (scanner.Kind != LexKind.Comma) { 468 | scanner.CheckToken(LexKind.RParens); 469 | break; 470 | } 471 | scanner.NextLex(); // move off the ',' 472 | } 473 | } 474 | 475 | scanner.NextLex(); // move off the ')' 476 | PushPosInfo(startChar, scanner.PrevLexEnd); 477 | Node result = builder.Function(prefix, name, argList); 478 | PopPosInfo(); 479 | return result; 480 | } 481 | #endregion 482 | 483 | /**************************************************************************************************/ 484 | /* Helper methods */ 485 | /**************************************************************************************************/ 486 | 487 | private void PushPosInfo(int startChar, int endChar) { 488 | posInfo.Push(startChar); 489 | posInfo.Push(endChar); 490 | } 491 | 492 | private void PopPosInfo() { 493 | posInfo.Pop(); 494 | posInfo.Pop(); 495 | } 496 | 497 | private void PopPosInfo(out int startChar, out int endChar) { 498 | endChar = posInfo.Pop(); 499 | startChar = posInfo.Pop(); 500 | } 501 | 502 | private static double ToDouble(string str) { 503 | double d; 504 | if (double.TryParse(str, NumberStyles.AllowLeadingSign|NumberStyles.AllowDecimalPoint|NumberStyles.AllowTrailingWhite, NumberFormatInfo.InvariantInfo, out d)) { 505 | return d; 506 | } 507 | return double.NaN; 508 | } 509 | } 510 | } 511 | --------------------------------------------------------------------------------