├── .travis.yml ├── LICENSE ├── README.md ├── doc.go ├── go.mod ├── lexer.go ├── name.go ├── parser.go ├── xpath.go └── xpath_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.8.1 5 | 6 | script: 7 | - go test -v -race -coverprofile=coverage.txt -covermode=atomic 8 | 9 | after_success: 10 | - bash <(curl -s https://codecov.io/bash) 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xpathparser 2 | 3 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 4 | [![GoDoc](https://godoc.org/github.com/santhosh-tekuri/xpathparser?status.svg)](https://godoc.org/github.com/santhosh-tekuri/xpathparser) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/santhosh-tekuri/xpathparser)](https://goreportcard.com/report/github.com/santhosh-tekuri/xpathparser) 6 | [![Build Status](https://travis-ci.org/santhosh-tekuri/xpathparser.svg?branch=master)](https://travis-ci.org/santhosh-tekuri/xpathparser) 7 | [![codecov.io](https://codecov.io/github/santhosh-tekuri/xpathparser/coverage.svg?branch=master)](https://codecov.io/github/santhosh-tekuri/xpathparser?branch=master) 8 | 9 | Package xpathparser provides lexer and parser for XPath 1.0. 10 | 11 | This Package parses given XPath expression to expression model. 12 | 13 | ## Example 14 | 15 | An example of using this package: 16 | 17 | ```go 18 | expr := xpathparser.MustParse("(/a/b)[5]") 19 | fmt.Println(expr) 20 | ``` 21 | 22 | This package does not evaluate xpath. For evaluating xpaths use https://github.com/santhosh-tekuri/xpath 23 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | /* 6 | Package xpathparser provides lexer and parser for XPath 1.0. 7 | 8 | This Package parses given XPath expression to expression model. 9 | 10 | An example of using this package: 11 | 12 | expr := xpathparser.MustParse("(/a/b)[5]") 13 | fmt.Println(expr) 14 | 15 | */ 16 | package xpathparser 17 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/santhosh-tekuri/xpathparser 2 | -------------------------------------------------------------------------------- /lexer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package xpathparser 6 | 7 | // see lexer specification at https://www.w3.org/TR/xpath/#exprlex 8 | 9 | import ( 10 | "bytes" 11 | "strings" 12 | "unicode/utf8" 13 | ) 14 | 15 | type kind int 16 | 17 | const ( 18 | eof kind = iota - 1 19 | 20 | // operators, note the order must same as Op enum values 21 | eq 22 | neq 23 | lt 24 | lte 25 | gt 26 | gte 27 | plus 28 | minus 29 | multiply 30 | mod 31 | div 32 | and 33 | or 34 | pipe 35 | 36 | slash 37 | slashSlash 38 | dot 39 | dotDot 40 | colon 41 | colonColon 42 | 43 | at 44 | dollar 45 | comma 46 | star 47 | 48 | lbracket 49 | rbracket 50 | lparen 51 | rparen 52 | 53 | identifier 54 | literal 55 | number 56 | ) 57 | 58 | var kindNames = []string{ 59 | ``, 60 | `'='`, `"!="`, `'<'`, `"<="`, `'>'`, `">="`, 61 | `'+'`, `'-'`, `'*'`, `"mod"`, `"div"`, 62 | `"and"`, `"or"`, `'|'`, 63 | `'/'`, `"//"`, `'.'`, `".."`, `':'`, `"::"`, 64 | `'@'`, `'$'`, `','`, `'*'`, 65 | `'['`, `']'`, `'('`, `')'`, 66 | ``, ``, ``, 67 | } 68 | 69 | func (k kind) String() string { 70 | return kindNames[k+1] 71 | } 72 | 73 | type token struct { 74 | xpath string 75 | kind kind 76 | begin int 77 | end int 78 | } 79 | 80 | func (t token) text() string { 81 | return t.xpath[t.begin:t.end] 82 | } 83 | 84 | type lexer struct { 85 | xpath string 86 | pos int 87 | expectOp bool 88 | } 89 | 90 | func (l *lexer) err(msg string) (token, error) { 91 | return token{}, &Error{msg, l.xpath, l.pos} 92 | } 93 | 94 | func (l *lexer) char(i int) int { 95 | if l.pos+i < len(l.xpath) { 96 | return int(l.xpath[l.pos+i]) 97 | } 98 | return -1 99 | } 100 | 101 | func (l *lexer) consume(n int) { 102 | l.pos += n 103 | } 104 | 105 | func (l *lexer) hasMore() bool { 106 | return l.pos < len(l.xpath) 107 | } 108 | 109 | func (l *lexer) token(kind kind, n int) (token, error) { 110 | var t token 111 | if n > 0 { 112 | t = token{l.xpath, kind, l.pos, l.pos + n} 113 | l.consume(n) 114 | } else { 115 | t = token{l.xpath, kind, l.pos + n, l.pos} 116 | } 117 | switch kind { 118 | case at, colonColon, lparen, lbracket, and, or, mod, div, colon, slash, slashSlash, 119 | pipe, dollar, plus, minus, multiply, comma, lt, gt, lte, gte, eq, neq: 120 | l.expectOp = false 121 | default: 122 | l.expectOp = true 123 | } 124 | return t, nil 125 | } 126 | 127 | func (l *lexer) next() (token, error) { 128 | SkipWS: 129 | for l.hasMore() { 130 | switch l.char(0) { 131 | case ' ', '\t', '\n', '\r': 132 | l.consume(1) 133 | default: 134 | break SkipWS 135 | } 136 | } 137 | 138 | switch l.char(0) { 139 | case -1: 140 | return l.token(eof, 0) 141 | case '$': 142 | return l.token(dollar, 1) 143 | case '"', '\'': 144 | return l.literal() 145 | case '/': 146 | if l.char(1) == '/' { 147 | return l.token(slashSlash, 2) 148 | } 149 | return l.token(slash, 1) 150 | case ',': 151 | return l.token(comma, 1) 152 | case '(': 153 | return l.token(lparen, 1) 154 | case ')': 155 | return l.token(rparen, 1) 156 | case '[': 157 | return l.token(lbracket, 1) 158 | case ']': 159 | return l.token(rbracket, 1) 160 | case '+': 161 | return l.token(plus, 1) 162 | case '-': 163 | return l.token(minus, 1) 164 | case '<': 165 | if l.char(1) == '=' { 166 | return l.token(lte, 2) 167 | } 168 | return l.token(lt, 1) 169 | case '>': 170 | if l.char(1) == '=' { 171 | return l.token(gte, 2) 172 | } 173 | return l.token(gt, 1) 174 | case '=': 175 | return l.token(eq, 1) 176 | case '!': 177 | if l.char(1) == '=' { 178 | return l.token(neq, 2) 179 | } 180 | return l.err("expected '!='") 181 | case '|': 182 | return l.token(pipe, 1) 183 | case '@': 184 | return l.token(at, 1) 185 | case ':': 186 | if l.char(1) == ':' { 187 | return l.token(colonColon, 2) 188 | } 189 | return l.token(colon, 1) 190 | case '*': 191 | if l.expectOp { 192 | return l.token(multiply, 1) 193 | } 194 | return l.token(star, 1) 195 | case '.': 196 | switch l.char(1) { 197 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 198 | return l.number() 199 | case '.': 200 | return l.token(dotDot, 2) 201 | default: 202 | return l.token(dot, 1) 203 | } 204 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 205 | return l.number() 206 | default: 207 | if l.expectOp { 208 | return l.operator() 209 | } 210 | return l.identifier() 211 | } 212 | } 213 | 214 | func (l *lexer) literal() (token, error) { 215 | quote := l.char(0) 216 | l.consume(1) 217 | begin := l.pos 218 | for { 219 | switch l.char(0) { 220 | case quote: 221 | t, _ := l.token(literal, begin-l.pos) 222 | l.consume(1) 223 | return t, nil 224 | case -1: 225 | return l.err("unclosed literal") 226 | } 227 | l.consume(1) 228 | } 229 | } 230 | 231 | func (l *lexer) number() (token, error) { 232 | begin := l.pos 233 | dotAllowed := true 234 | Loop: 235 | for { 236 | switch l.char(0) { 237 | case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 238 | l.consume(1) 239 | case '.': 240 | if dotAllowed { 241 | dotAllowed = false 242 | l.consume(1) 243 | } else { 244 | break Loop 245 | } 246 | default: 247 | break Loop 248 | } 249 | } 250 | return l.token(number, begin-l.pos) 251 | } 252 | 253 | func (l *lexer) operator() (token, error) { 254 | remaining := l.xpath[l.pos:] 255 | switch { 256 | case strings.HasPrefix(remaining, "and"): 257 | return l.token(and, 3) 258 | case strings.HasPrefix(remaining, "or"): 259 | return l.token(or, 2) 260 | case strings.HasPrefix(remaining, "mod"): 261 | return l.token(mod, 3) 262 | case strings.HasPrefix(remaining, "div"): 263 | return l.token(div, 3) 264 | } 265 | return l.err("operatorName expected") 266 | } 267 | 268 | func (l *lexer) identifier() (token, error) { 269 | begin := l.pos 270 | b, ok := l.readName() 271 | if !ok { 272 | return l.err("identifier expected") 273 | } 274 | if !isName(b) { 275 | l.pos = begin 276 | return l.err("invalid identifier") 277 | } 278 | return l.token(identifier, begin-l.pos) 279 | } 280 | 281 | func (l *lexer) readName() ([]byte, bool) { 282 | if !l.hasMore() { 283 | return nil, false 284 | } 285 | buf := new(bytes.Buffer) 286 | b := byte(l.char(0)) 287 | if b < utf8.RuneSelf && !isNameByte(b) { 288 | return nil, false 289 | } 290 | buf.WriteByte(b) 291 | l.consume(1) 292 | for l.hasMore() { 293 | b = byte(l.char(0)) 294 | if b < utf8.RuneSelf && !isNameByte(b) { 295 | break 296 | } 297 | buf.WriteByte(b) 298 | l.consume(1) 299 | } 300 | return buf.Bytes(), true 301 | } 302 | -------------------------------------------------------------------------------- /name.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package xpathparser 6 | 7 | import ( 8 | "unicode" 9 | "unicode/utf8" 10 | ) 11 | 12 | var first = &unicode.RangeTable{ 13 | R16: []unicode.Range16{ 14 | {0x0041, 0x005A, 1}, 15 | {0x005F, 0x005F, 1}, 16 | {0x0061, 0x007A, 1}, 17 | {0x00C0, 0x00D6, 1}, 18 | {0x00D8, 0x00F6, 1}, 19 | {0x00F8, 0x00FF, 1}, 20 | {0x0100, 0x0131, 1}, 21 | {0x0134, 0x013E, 1}, 22 | {0x0141, 0x0148, 1}, 23 | {0x014A, 0x017E, 1}, 24 | {0x0180, 0x01C3, 1}, 25 | {0x01CD, 0x01F0, 1}, 26 | {0x01F4, 0x01F5, 1}, 27 | {0x01FA, 0x0217, 1}, 28 | {0x0250, 0x02A8, 1}, 29 | {0x02BB, 0x02C1, 1}, 30 | {0x0386, 0x0386, 1}, 31 | {0x0388, 0x038A, 1}, 32 | {0x038C, 0x038C, 1}, 33 | {0x038E, 0x03A1, 1}, 34 | {0x03A3, 0x03CE, 1}, 35 | {0x03D0, 0x03D6, 1}, 36 | {0x03DA, 0x03E0, 2}, 37 | {0x03E2, 0x03F3, 1}, 38 | {0x0401, 0x040C, 1}, 39 | {0x040E, 0x044F, 1}, 40 | {0x0451, 0x045C, 1}, 41 | {0x045E, 0x0481, 1}, 42 | {0x0490, 0x04C4, 1}, 43 | {0x04C7, 0x04C8, 1}, 44 | {0x04CB, 0x04CC, 1}, 45 | {0x04D0, 0x04EB, 1}, 46 | {0x04EE, 0x04F5, 1}, 47 | {0x04F8, 0x04F9, 1}, 48 | {0x0531, 0x0556, 1}, 49 | {0x0559, 0x0559, 1}, 50 | {0x0561, 0x0586, 1}, 51 | {0x05D0, 0x05EA, 1}, 52 | {0x05F0, 0x05F2, 1}, 53 | {0x0621, 0x063A, 1}, 54 | {0x0641, 0x064A, 1}, 55 | {0x0671, 0x06B7, 1}, 56 | {0x06BA, 0x06BE, 1}, 57 | {0x06C0, 0x06CE, 1}, 58 | {0x06D0, 0x06D3, 1}, 59 | {0x06D5, 0x06D5, 1}, 60 | {0x06E5, 0x06E6, 1}, 61 | {0x0905, 0x0939, 1}, 62 | {0x093D, 0x093D, 1}, 63 | {0x0958, 0x0961, 1}, 64 | {0x0985, 0x098C, 1}, 65 | {0x098F, 0x0990, 1}, 66 | {0x0993, 0x09A8, 1}, 67 | {0x09AA, 0x09B0, 1}, 68 | {0x09B2, 0x09B2, 1}, 69 | {0x09B6, 0x09B9, 1}, 70 | {0x09DC, 0x09DD, 1}, 71 | {0x09DF, 0x09E1, 1}, 72 | {0x09F0, 0x09F1, 1}, 73 | {0x0A05, 0x0A0A, 1}, 74 | {0x0A0F, 0x0A10, 1}, 75 | {0x0A13, 0x0A28, 1}, 76 | {0x0A2A, 0x0A30, 1}, 77 | {0x0A32, 0x0A33, 1}, 78 | {0x0A35, 0x0A36, 1}, 79 | {0x0A38, 0x0A39, 1}, 80 | {0x0A59, 0x0A5C, 1}, 81 | {0x0A5E, 0x0A5E, 1}, 82 | {0x0A72, 0x0A74, 1}, 83 | {0x0A85, 0x0A8B, 1}, 84 | {0x0A8D, 0x0A8D, 1}, 85 | {0x0A8F, 0x0A91, 1}, 86 | {0x0A93, 0x0AA8, 1}, 87 | {0x0AAA, 0x0AB0, 1}, 88 | {0x0AB2, 0x0AB3, 1}, 89 | {0x0AB5, 0x0AB9, 1}, 90 | {0x0ABD, 0x0AE0, 0x23}, 91 | {0x0B05, 0x0B0C, 1}, 92 | {0x0B0F, 0x0B10, 1}, 93 | {0x0B13, 0x0B28, 1}, 94 | {0x0B2A, 0x0B30, 1}, 95 | {0x0B32, 0x0B33, 1}, 96 | {0x0B36, 0x0B39, 1}, 97 | {0x0B3D, 0x0B3D, 1}, 98 | {0x0B5C, 0x0B5D, 1}, 99 | {0x0B5F, 0x0B61, 1}, 100 | {0x0B85, 0x0B8A, 1}, 101 | {0x0B8E, 0x0B90, 1}, 102 | {0x0B92, 0x0B95, 1}, 103 | {0x0B99, 0x0B9A, 1}, 104 | {0x0B9C, 0x0B9C, 1}, 105 | {0x0B9E, 0x0B9F, 1}, 106 | {0x0BA3, 0x0BA4, 1}, 107 | {0x0BA8, 0x0BAA, 1}, 108 | {0x0BAE, 0x0BB5, 1}, 109 | {0x0BB7, 0x0BB9, 1}, 110 | {0x0C05, 0x0C0C, 1}, 111 | {0x0C0E, 0x0C10, 1}, 112 | {0x0C12, 0x0C28, 1}, 113 | {0x0C2A, 0x0C33, 1}, 114 | {0x0C35, 0x0C39, 1}, 115 | {0x0C60, 0x0C61, 1}, 116 | {0x0C85, 0x0C8C, 1}, 117 | {0x0C8E, 0x0C90, 1}, 118 | {0x0C92, 0x0CA8, 1}, 119 | {0x0CAA, 0x0CB3, 1}, 120 | {0x0CB5, 0x0CB9, 1}, 121 | {0x0CDE, 0x0CDE, 1}, 122 | {0x0CE0, 0x0CE1, 1}, 123 | {0x0D05, 0x0D0C, 1}, 124 | {0x0D0E, 0x0D10, 1}, 125 | {0x0D12, 0x0D28, 1}, 126 | {0x0D2A, 0x0D39, 1}, 127 | {0x0D60, 0x0D61, 1}, 128 | {0x0E01, 0x0E2E, 1}, 129 | {0x0E30, 0x0E30, 1}, 130 | {0x0E32, 0x0E33, 1}, 131 | {0x0E40, 0x0E45, 1}, 132 | {0x0E81, 0x0E82, 1}, 133 | {0x0E84, 0x0E84, 1}, 134 | {0x0E87, 0x0E88, 1}, 135 | {0x0E8A, 0x0E8D, 3}, 136 | {0x0E94, 0x0E97, 1}, 137 | {0x0E99, 0x0E9F, 1}, 138 | {0x0EA1, 0x0EA3, 1}, 139 | {0x0EA5, 0x0EA7, 2}, 140 | {0x0EAA, 0x0EAB, 1}, 141 | {0x0EAD, 0x0EAE, 1}, 142 | {0x0EB0, 0x0EB0, 1}, 143 | {0x0EB2, 0x0EB3, 1}, 144 | {0x0EBD, 0x0EBD, 1}, 145 | {0x0EC0, 0x0EC4, 1}, 146 | {0x0F40, 0x0F47, 1}, 147 | {0x0F49, 0x0F69, 1}, 148 | {0x10A0, 0x10C5, 1}, 149 | {0x10D0, 0x10F6, 1}, 150 | {0x1100, 0x1100, 1}, 151 | {0x1102, 0x1103, 1}, 152 | {0x1105, 0x1107, 1}, 153 | {0x1109, 0x1109, 1}, 154 | {0x110B, 0x110C, 1}, 155 | {0x110E, 0x1112, 1}, 156 | {0x113C, 0x1140, 2}, 157 | {0x114C, 0x1150, 2}, 158 | {0x1154, 0x1155, 1}, 159 | {0x1159, 0x1159, 1}, 160 | {0x115F, 0x1161, 1}, 161 | {0x1163, 0x1169, 2}, 162 | {0x116D, 0x116E, 1}, 163 | {0x1172, 0x1173, 1}, 164 | {0x1175, 0x119E, 0x119E - 0x1175}, 165 | {0x11A8, 0x11AB, 0x11AB - 0x11A8}, 166 | {0x11AE, 0x11AF, 1}, 167 | {0x11B7, 0x11B8, 1}, 168 | {0x11BA, 0x11BA, 1}, 169 | {0x11BC, 0x11C2, 1}, 170 | {0x11EB, 0x11F0, 0x11F0 - 0x11EB}, 171 | {0x11F9, 0x11F9, 1}, 172 | {0x1E00, 0x1E9B, 1}, 173 | {0x1EA0, 0x1EF9, 1}, 174 | {0x1F00, 0x1F15, 1}, 175 | {0x1F18, 0x1F1D, 1}, 176 | {0x1F20, 0x1F45, 1}, 177 | {0x1F48, 0x1F4D, 1}, 178 | {0x1F50, 0x1F57, 1}, 179 | {0x1F59, 0x1F5B, 0x1F5B - 0x1F59}, 180 | {0x1F5D, 0x1F5D, 1}, 181 | {0x1F5F, 0x1F7D, 1}, 182 | {0x1F80, 0x1FB4, 1}, 183 | {0x1FB6, 0x1FBC, 1}, 184 | {0x1FBE, 0x1FBE, 1}, 185 | {0x1FC2, 0x1FC4, 1}, 186 | {0x1FC6, 0x1FCC, 1}, 187 | {0x1FD0, 0x1FD3, 1}, 188 | {0x1FD6, 0x1FDB, 1}, 189 | {0x1FE0, 0x1FEC, 1}, 190 | {0x1FF2, 0x1FF4, 1}, 191 | {0x1FF6, 0x1FFC, 1}, 192 | {0x2126, 0x2126, 1}, 193 | {0x212A, 0x212B, 1}, 194 | {0x212E, 0x212E, 1}, 195 | {0x2180, 0x2182, 1}, 196 | {0x3007, 0x3007, 1}, 197 | {0x3021, 0x3029, 1}, 198 | {0x3041, 0x3094, 1}, 199 | {0x30A1, 0x30FA, 1}, 200 | {0x3105, 0x312C, 1}, 201 | {0x4E00, 0x9FA5, 1}, 202 | {0xAC00, 0xD7A3, 1}, 203 | }, 204 | } 205 | 206 | var second = &unicode.RangeTable{ 207 | R16: []unicode.Range16{ 208 | {0x002D, 0x002E, 1}, 209 | {0x0030, 0x0039, 1}, 210 | {0x00B7, 0x00B7, 1}, 211 | {0x02D0, 0x02D1, 1}, 212 | {0x0300, 0x0345, 1}, 213 | {0x0360, 0x0361, 1}, 214 | {0x0387, 0x0387, 1}, 215 | {0x0483, 0x0486, 1}, 216 | {0x0591, 0x05A1, 1}, 217 | {0x05A3, 0x05B9, 1}, 218 | {0x05BB, 0x05BD, 1}, 219 | {0x05BF, 0x05BF, 1}, 220 | {0x05C1, 0x05C2, 1}, 221 | {0x05C4, 0x0640, 0x0640 - 0x05C4}, 222 | {0x064B, 0x0652, 1}, 223 | {0x0660, 0x0669, 1}, 224 | {0x0670, 0x0670, 1}, 225 | {0x06D6, 0x06DC, 1}, 226 | {0x06DD, 0x06DF, 1}, 227 | {0x06E0, 0x06E4, 1}, 228 | {0x06E7, 0x06E8, 1}, 229 | {0x06EA, 0x06ED, 1}, 230 | {0x06F0, 0x06F9, 1}, 231 | {0x0901, 0x0903, 1}, 232 | {0x093C, 0x093C, 1}, 233 | {0x093E, 0x094C, 1}, 234 | {0x094D, 0x094D, 1}, 235 | {0x0951, 0x0954, 1}, 236 | {0x0962, 0x0963, 1}, 237 | {0x0966, 0x096F, 1}, 238 | {0x0981, 0x0983, 1}, 239 | {0x09BC, 0x09BC, 1}, 240 | {0x09BE, 0x09BF, 1}, 241 | {0x09C0, 0x09C4, 1}, 242 | {0x09C7, 0x09C8, 1}, 243 | {0x09CB, 0x09CD, 1}, 244 | {0x09D7, 0x09D7, 1}, 245 | {0x09E2, 0x09E3, 1}, 246 | {0x09E6, 0x09EF, 1}, 247 | {0x0A02, 0x0A3C, 0x3A}, 248 | {0x0A3E, 0x0A3F, 1}, 249 | {0x0A40, 0x0A42, 1}, 250 | {0x0A47, 0x0A48, 1}, 251 | {0x0A4B, 0x0A4D, 1}, 252 | {0x0A66, 0x0A6F, 1}, 253 | {0x0A70, 0x0A71, 1}, 254 | {0x0A81, 0x0A83, 1}, 255 | {0x0ABC, 0x0ABC, 1}, 256 | {0x0ABE, 0x0AC5, 1}, 257 | {0x0AC7, 0x0AC9, 1}, 258 | {0x0ACB, 0x0ACD, 1}, 259 | {0x0AE6, 0x0AEF, 1}, 260 | {0x0B01, 0x0B03, 1}, 261 | {0x0B3C, 0x0B3C, 1}, 262 | {0x0B3E, 0x0B43, 1}, 263 | {0x0B47, 0x0B48, 1}, 264 | {0x0B4B, 0x0B4D, 1}, 265 | {0x0B56, 0x0B57, 1}, 266 | {0x0B66, 0x0B6F, 1}, 267 | {0x0B82, 0x0B83, 1}, 268 | {0x0BBE, 0x0BC2, 1}, 269 | {0x0BC6, 0x0BC8, 1}, 270 | {0x0BCA, 0x0BCD, 1}, 271 | {0x0BD7, 0x0BD7, 1}, 272 | {0x0BE7, 0x0BEF, 1}, 273 | {0x0C01, 0x0C03, 1}, 274 | {0x0C3E, 0x0C44, 1}, 275 | {0x0C46, 0x0C48, 1}, 276 | {0x0C4A, 0x0C4D, 1}, 277 | {0x0C55, 0x0C56, 1}, 278 | {0x0C66, 0x0C6F, 1}, 279 | {0x0C82, 0x0C83, 1}, 280 | {0x0CBE, 0x0CC4, 1}, 281 | {0x0CC6, 0x0CC8, 1}, 282 | {0x0CCA, 0x0CCD, 1}, 283 | {0x0CD5, 0x0CD6, 1}, 284 | {0x0CE6, 0x0CEF, 1}, 285 | {0x0D02, 0x0D03, 1}, 286 | {0x0D3E, 0x0D43, 1}, 287 | {0x0D46, 0x0D48, 1}, 288 | {0x0D4A, 0x0D4D, 1}, 289 | {0x0D57, 0x0D57, 1}, 290 | {0x0D66, 0x0D6F, 1}, 291 | {0x0E31, 0x0E31, 1}, 292 | {0x0E34, 0x0E3A, 1}, 293 | {0x0E46, 0x0E46, 1}, 294 | {0x0E47, 0x0E4E, 1}, 295 | {0x0E50, 0x0E59, 1}, 296 | {0x0EB1, 0x0EB1, 1}, 297 | {0x0EB4, 0x0EB9, 1}, 298 | {0x0EBB, 0x0EBC, 1}, 299 | {0x0EC6, 0x0EC6, 1}, 300 | {0x0EC8, 0x0ECD, 1}, 301 | {0x0ED0, 0x0ED9, 1}, 302 | {0x0F18, 0x0F19, 1}, 303 | {0x0F20, 0x0F29, 1}, 304 | {0x0F35, 0x0F39, 2}, 305 | {0x0F3E, 0x0F3F, 1}, 306 | {0x0F71, 0x0F84, 1}, 307 | {0x0F86, 0x0F8B, 1}, 308 | {0x0F90, 0x0F95, 1}, 309 | {0x0F97, 0x0F97, 1}, 310 | {0x0F99, 0x0FAD, 1}, 311 | {0x0FB1, 0x0FB7, 1}, 312 | {0x0FB9, 0x0FB9, 1}, 313 | {0x20D0, 0x20DC, 1}, 314 | {0x20E1, 0x3005, 0x3005 - 0x20E1}, 315 | {0x302A, 0x302F, 1}, 316 | {0x3031, 0x3035, 1}, 317 | {0x3099, 0x309A, 1}, 318 | {0x309D, 0x309E, 1}, 319 | {0x30FC, 0x30FE, 1}, 320 | }, 321 | } 322 | 323 | func isNameByte(c byte) bool { 324 | return 'A' <= c && c <= 'Z' || 325 | 'a' <= c && c <= 'z' || 326 | '0' <= c && c <= '9' || 327 | c == '_' || c == '.' || c == '-' 328 | } 329 | 330 | func isName(s []byte) bool { 331 | if len(s) == 0 { 332 | return false 333 | } 334 | c, n := utf8.DecodeRune(s) 335 | if c == utf8.RuneError && n == 1 { 336 | return false 337 | } 338 | if !unicode.Is(first, c) { 339 | return false 340 | } 341 | for n < len(s) { 342 | s = s[n:] 343 | c, n = utf8.DecodeRune(s) 344 | if c == utf8.RuneError && n == 1 { 345 | return false 346 | } 347 | if !unicode.Is(first, c) && !unicode.Is(second, c) { 348 | return false 349 | } 350 | } 351 | return true 352 | } 353 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package xpathparser 6 | 7 | import ( 8 | "fmt" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | type parser struct { 14 | lexer lexer 15 | tokens []token 16 | } 17 | 18 | func (p *parser) error(format string, args ...interface{}) error { 19 | return &Error{fmt.Sprintf(format, args...), p.lexer.xpath, p.token(0).begin} 20 | } 21 | 22 | func (p *parser) unexpectedToken() error { 23 | return p.error("unexpected token %s", p.token(0).kind) 24 | } 25 | 26 | func (p *parser) expectedTokens(expected ...kind) error { 27 | tokens := make([]string, len(expected)) 28 | for i, k := range expected { 29 | tokens[i] = k.String() 30 | } 31 | return p.error("expected %s, but got %v", strings.Join(tokens, " or "), p.token(0).kind) 32 | } 33 | 34 | func (p *parser) token(i int) token { 35 | for i > len(p.tokens)-1 { 36 | t, err := p.lexer.next() 37 | if err != nil { 38 | panic(err) 39 | } 40 | p.tokens = append(p.tokens, t) 41 | } 42 | return p.tokens[i] 43 | } 44 | 45 | func (p *parser) match(k kind) token { 46 | t := p.token(0) 47 | if t.kind != k { 48 | panic(p.error("expected %v, but got %v", k, t.kind)) 49 | } 50 | p.tokens = p.tokens[1:] 51 | return t 52 | } 53 | 54 | func (p *parser) parse() Expr { 55 | expr := p.orExpr() 56 | p.match(eof) 57 | return expr 58 | } 59 | 60 | func (p *parser) orExpr() Expr { 61 | expr := p.andExpr() 62 | if p.token(0).kind == or { 63 | p.match(or) 64 | return &BinaryExpr{expr, Or, p.orExpr()} 65 | } 66 | return expr 67 | } 68 | 69 | func (p *parser) andExpr() Expr { 70 | expr := p.equalityExpr() 71 | if p.token(0).kind == and { 72 | p.match(and) 73 | return &BinaryExpr{expr, And, p.andExpr()} 74 | } 75 | return expr 76 | } 77 | 78 | func (p *parser) equalityExpr() Expr { 79 | expr := p.relationalExpr() 80 | for { 81 | switch kind := p.token(0).kind; kind { 82 | case eq, neq: 83 | p.match(kind) 84 | expr = &BinaryExpr{expr, Op(kind), p.relationalExpr()} 85 | default: 86 | return expr 87 | } 88 | } 89 | } 90 | 91 | func (p *parser) relationalExpr() Expr { 92 | expr := p.additiveExpr() 93 | for { 94 | switch kind := p.token(0).kind; kind { 95 | case lt, lte, gt, gte: 96 | p.match(kind) 97 | expr = &BinaryExpr{expr, Op(kind), p.additiveExpr()} 98 | default: 99 | return expr 100 | } 101 | } 102 | } 103 | 104 | func (p *parser) additiveExpr() Expr { 105 | expr := p.multiplicativeExpr() 106 | for { 107 | switch kind := p.token(0).kind; kind { 108 | case plus, minus: 109 | p.match(kind) 110 | expr = &BinaryExpr{expr, Op(kind), p.multiplicativeExpr()} 111 | default: 112 | return expr 113 | } 114 | } 115 | } 116 | 117 | func (p *parser) multiplicativeExpr() Expr { 118 | expr := p.unaryExpr() 119 | for { 120 | switch kind := p.token(0).kind; kind { 121 | case multiply, div, mod: 122 | p.match(kind) 123 | expr = &BinaryExpr{expr, Op(kind), p.unaryExpr()} 124 | default: 125 | return expr 126 | } 127 | } 128 | } 129 | 130 | func (p *parser) unaryExpr() Expr { 131 | if p.token(0).kind == minus { 132 | p.match(minus) 133 | return &NegateExpr{p.unionExpr()} 134 | } 135 | return p.unionExpr() 136 | } 137 | 138 | func (p *parser) unionExpr() Expr { 139 | expr := p.pathExpr() 140 | if p.token(0).kind == pipe { 141 | p.match(pipe) 142 | return &BinaryExpr{expr, Union, p.orExpr()} 143 | } 144 | return expr 145 | } 146 | 147 | func (p *parser) pathExpr() Expr { 148 | switch p.token(0).kind { 149 | case number, literal: 150 | filter := p.filterExpr() 151 | switch p.token(0).kind { 152 | case slash, slashSlash: 153 | panic(p.error("nodeset expected")) 154 | } 155 | return filter 156 | case lparen, dollar: 157 | filter := p.filterExpr() 158 | switch p.token(0).kind { 159 | case slash, slashSlash: 160 | return &PathExpr{filter, p.locationPath(false)} 161 | } 162 | return filter 163 | case identifier: 164 | if (p.token(1).kind == lparen && !isNodeTypeName(p.token(0))) || (p.token(1).kind == colon && p.token(3).kind == lparen) { 165 | filter := p.filterExpr() 166 | switch p.token(0).kind { 167 | case slash, slashSlash: 168 | return &PathExpr{filter, p.locationPath(false)} 169 | } 170 | return filter 171 | } 172 | return p.locationPath(false) 173 | case dot, dotDot, star, at: 174 | return p.locationPath(false) 175 | case slash, slashSlash: 176 | return p.locationPath(true) 177 | default: 178 | panic(p.unexpectedToken()) 179 | } 180 | } 181 | 182 | func (p *parser) filterExpr() Expr { 183 | var expr Expr 184 | switch p.token(0).kind { 185 | case number: 186 | f, err := strconv.ParseFloat(p.match(number).text(), 64) 187 | if err != nil { 188 | panic(err) 189 | } 190 | expr = Number(f) 191 | case literal: 192 | expr = String(p.match(literal).text()) 193 | case lparen: 194 | p.match(lparen) 195 | expr = p.orExpr() 196 | p.match(rparen) 197 | case identifier: 198 | expr = p.functionCall() 199 | case dollar: 200 | expr = p.variableReference() 201 | } 202 | predicates := p.predicates() 203 | if len(predicates) == 0 { 204 | return expr 205 | } 206 | return &FilterExpr{expr, predicates} 207 | } 208 | 209 | func (p *parser) functionCall() *FuncCall { 210 | prefix := "" 211 | if p.token(1).kind == colon { 212 | prefix = p.match(identifier).text() 213 | p.match(colon) 214 | } 215 | local := p.match(identifier).text() 216 | p.match(lparen) 217 | args := p.arguments() 218 | p.match(rparen) 219 | return &FuncCall{prefix, local, args} 220 | } 221 | 222 | func (p *parser) arguments() []Expr { 223 | var args []Expr 224 | for p.token(0).kind != rparen { 225 | args = append(args, p.orExpr()) 226 | if p.token(0).kind == comma { 227 | p.match(comma) 228 | continue 229 | } 230 | break 231 | } 232 | return args 233 | } 234 | 235 | func (p *parser) predicates() []Expr { 236 | var predicates []Expr 237 | for p.token(0).kind == lbracket { 238 | p.match(lbracket) 239 | predicates = append(predicates, p.orExpr()) 240 | p.match(rbracket) 241 | } 242 | return predicates 243 | } 244 | 245 | func (p *parser) variableReference() *VarRef { 246 | p.match(dollar) 247 | prefix := "" 248 | if p.token(1).kind == colon { 249 | prefix = p.match(identifier).text() 250 | p.match(colon) 251 | } 252 | return &VarRef{prefix, p.match(identifier).text()} 253 | } 254 | 255 | func (p *parser) locationPath(abs bool) *LocationPath { 256 | switch p.token(0).kind { 257 | case slash, slashSlash: 258 | if abs { 259 | return p.absoluteLocationPath() 260 | } 261 | return p.relativeLocationPath() 262 | case at, identifier, dot, dotDot, star: 263 | return p.relativeLocationPath() 264 | } 265 | panic(p.unexpectedToken()) 266 | } 267 | 268 | func (p *parser) absoluteLocationPath() *LocationPath { 269 | var steps []*Step 270 | switch p.token(0).kind { 271 | case slash: 272 | p.match(slash) 273 | switch p.token(0).kind { 274 | case dot, dotDot, at, identifier, star: 275 | steps = p.steps() 276 | } 277 | case slashSlash: 278 | p.match(slashSlash) 279 | steps = append(steps, &Step{DescendantOrSelf, Node, nil}) 280 | switch p.token(0).kind { 281 | case dot, dotDot, at, identifier, star: 282 | steps = append(steps, p.steps()...) 283 | default: 284 | panic(p.error(`locationPath cannot end with "//"`)) 285 | } 286 | } 287 | return &LocationPath{true, steps} 288 | } 289 | 290 | func (p *parser) relativeLocationPath() *LocationPath { 291 | var steps []*Step 292 | switch p.token(0).kind { 293 | case slash: 294 | p.match(slash) 295 | case slashSlash: 296 | p.match(slashSlash) 297 | steps = append(steps, &Step{DescendantOrSelf, Node, nil}) 298 | } 299 | steps = append(steps, p.steps()...) 300 | return &LocationPath{false, steps} 301 | } 302 | 303 | func (p *parser) steps() []*Step { 304 | var steps []*Step 305 | switch p.token(0).kind { 306 | case dot, dotDot, at, identifier, star: 307 | steps = append(steps, p.step()) 308 | case eof: 309 | return steps 310 | default: 311 | panic(p.expectedTokens(dot, dotDot, at, identifier, star)) 312 | } 313 | for { 314 | switch p.token(0).kind { 315 | case slash: 316 | p.match(slash) 317 | case slashSlash: 318 | p.match(slashSlash) 319 | steps = append(steps, &Step{DescendantOrSelf, Node, nil}) 320 | default: 321 | return steps 322 | } 323 | switch p.token(0).kind { 324 | case dot, dotDot, at, identifier, star: 325 | steps = append(steps, p.step()) 326 | default: 327 | panic(p.expectedTokens(dot, dotDot, at, identifier, star)) 328 | } 329 | } 330 | } 331 | 332 | func (p *parser) step() *Step { 333 | var axis Axis 334 | var nodeTest NodeTest 335 | switch p.token(0).kind { 336 | case dot: 337 | p.match(dot) 338 | axis, nodeTest = Self, Node 339 | case dotDot: 340 | p.match(dotDot) 341 | axis, nodeTest = Parent, Node 342 | default: 343 | switch p.token(0).kind { 344 | case at: 345 | p.match(at) 346 | axis = Attribute 347 | case identifier: 348 | if p.token(1).kind == colonColon { 349 | axis = p.axisSpecifier() 350 | } else { 351 | axis = Child 352 | } 353 | case star: 354 | axis = Child 355 | } 356 | nodeTest = p.nodeTest(axis) 357 | } 358 | return &Step{axis, nodeTest, p.predicates()} 359 | } 360 | 361 | func (p *parser) nodeTest(axis Axis) NodeTest { 362 | switch p.token(0).kind { 363 | case identifier: 364 | if p.token(1).kind == lparen { 365 | return p.nodeTypeTest(axis) 366 | } 367 | return p.nameTest(axis) 368 | case star: 369 | return p.nameTest(axis) 370 | } 371 | panic(p.expectedTokens(identifier, star)) 372 | } 373 | 374 | func (p *parser) nodeTypeTest(axis Axis) NodeTest { 375 | ntype := p.match(identifier).text() 376 | p.match(lparen) 377 | var nodeTest NodeTest 378 | switch ntype { 379 | case "processing-instruction": 380 | piName := "" 381 | if p.token(0).kind == literal { 382 | piName = p.match(literal).text() 383 | } 384 | nodeTest = PITest(piName) 385 | case "node": 386 | nodeTest = Node 387 | case "text": 388 | nodeTest = Text 389 | case "comment": 390 | nodeTest = Comment 391 | default: 392 | panic(p.error("invalid nodeType %q", ntype)) 393 | } 394 | p.match(rparen) 395 | return nodeTest 396 | } 397 | 398 | func (p *parser) nameTest(axis Axis) NodeTest { 399 | var prefix string 400 | if p.token(0).kind == identifier && p.token(1).kind == colon { 401 | prefix = p.match(identifier).text() 402 | p.match(colon) 403 | } 404 | var local string 405 | switch p.token(0).kind { 406 | case identifier: 407 | local = p.match(identifier).text() 408 | case star: 409 | p.match(star) 410 | local = "*" 411 | default: 412 | // let us assume localName as empty-string and continue 413 | } 414 | return &NameTest{prefix, local} 415 | } 416 | 417 | func (p *parser) axisSpecifier() Axis { 418 | name := p.token(0).text() 419 | axis, ok := name2Axis[name] 420 | if !ok { 421 | panic(p.error("invalid axis %s", name)) 422 | } 423 | p.match(identifier) 424 | p.match(colonColon) 425 | return axis 426 | } 427 | 428 | func isNodeTypeName(t token) bool { 429 | switch t.text() { 430 | case "node", "comment", "text", "processing-instruction": 431 | return true 432 | default: 433 | return false 434 | } 435 | } 436 | -------------------------------------------------------------------------------- /xpath.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package xpathparser 6 | 7 | import ( 8 | "fmt" 9 | "runtime" 10 | "strconv" 11 | "strings" 12 | ) 13 | 14 | // Error is the error type returned by Parse function. 15 | // 16 | // It represents a syntax error in the XPath expression. 17 | type Error struct { 18 | Msg string 19 | XPath string 20 | Offset int 21 | } 22 | 23 | func (e *Error) Error() string { 24 | return fmt.Sprintf("%s in xpath %s at offset %d", e.Msg, e.XPath, e.Offset) 25 | } 26 | 27 | // Axis specifies the tree relationship between the nodes selected by the location step and the context node. 28 | type Axis int 29 | 30 | // Possible values for Axis. 31 | const ( 32 | Child Axis = iota 33 | Descendant 34 | Parent 35 | Ancestor 36 | FollowingSibling 37 | PrecedingSibling 38 | Following 39 | Preceding 40 | Attribute 41 | Namespace 42 | Self 43 | DescendantOrSelf 44 | AncestorOrSelf 45 | ) 46 | 47 | var axisNames = []string{ 48 | "child", 49 | "descendant", 50 | "parent", 51 | "ancestor", 52 | "following-sibling", 53 | "preceding-sibling", 54 | "following", 55 | "preceding", 56 | "attribute", 57 | "namespace", 58 | "self", 59 | "descendant-or-self", 60 | "ancestor-or-self", 61 | } 62 | 63 | func (a Axis) String() string { 64 | return axisNames[a] 65 | } 66 | 67 | var name2Axis = make(map[string]Axis) 68 | 69 | func init() { 70 | for i, name := range axisNames { 71 | name2Axis[name] = Axis(i) 72 | } 73 | } 74 | 75 | // NodeType represents test on node type. 76 | type NodeType int 77 | 78 | // Possible values for NodeType. 79 | const ( 80 | Comment NodeType = iota 81 | Text 82 | Node 83 | ) 84 | 85 | var nodeTypeNames = []string{"comment()", "text()", "node()"} 86 | 87 | func (nt NodeType) String() string { 88 | return nodeTypeNames[nt] 89 | } 90 | 91 | // Op represents XPath binrary operator. 92 | type Op int 93 | 94 | // Possible values for Op. 95 | const ( 96 | EQ Op = iota 97 | NEQ 98 | LT 99 | LTE 100 | GT 101 | GTE 102 | Add 103 | Subtract 104 | Multiply 105 | Mod 106 | Div 107 | And 108 | Or 109 | Union 110 | ) 111 | 112 | func (op Op) String() string { 113 | str := kind(op).String() 114 | return str[1 : len(str)-1] 115 | } 116 | 117 | // An Expr is an interface holding one of the types: 118 | // *LocationPath, *FilterExpr, *PathExpr, *BinaryExpr, *NegateExpr, *VarRef, *FuncCall, Number or String 119 | type Expr interface{} 120 | 121 | // BinaryExpr represents a binary operation. 122 | type BinaryExpr struct { 123 | LHS Expr 124 | Op Op 125 | RHS Expr 126 | } 127 | 128 | func (b *BinaryExpr) String() string { 129 | return fmt.Sprintf("(%s %s %s)", b.LHS, b.Op, b.RHS) 130 | } 131 | 132 | // NegateExpr represents unary operator `-`. 133 | type NegateExpr struct { 134 | Expr Expr 135 | } 136 | 137 | func (n *NegateExpr) String() string { 138 | return fmt.Sprintf("-%s", n.Expr) 139 | } 140 | 141 | // LocationPath represents XPath location path. 142 | type LocationPath struct { 143 | Abs bool 144 | Steps []*Step 145 | } 146 | 147 | func (lp *LocationPath) String() string { 148 | s := make([]string, len(lp.Steps)) 149 | for i, step := range lp.Steps { 150 | s[i] = step.String() 151 | } 152 | if lp.Abs { 153 | return fmt.Sprintf("/%s", strings.Join(s, "/")) 154 | } 155 | return fmt.Sprintf("%s", strings.Join(s, "/")) 156 | } 157 | 158 | // FilterExpr represents https://www.w3.org/TR/xpath/#NT-FilterExpr. 159 | type FilterExpr struct { 160 | Expr Expr 161 | Predicates []Expr 162 | } 163 | 164 | func (f *FilterExpr) String() string { 165 | return fmt.Sprintf("(%s)%s", f.Expr, predicatesString(f.Predicates)) 166 | } 167 | 168 | // PathExpr represents https://www.w3.org/TR/xpath/#NT-PathExpr. 169 | type PathExpr struct { 170 | Filter Expr 171 | LocationPath *LocationPath 172 | } 173 | 174 | func (p *PathExpr) String() string { 175 | return fmt.Sprintf("(%s)/%s", p.Filter, p.LocationPath) 176 | } 177 | 178 | // Step represents XPath location step. 179 | type Step struct { 180 | Axis Axis 181 | NodeTest NodeTest 182 | Predicates []Expr 183 | } 184 | 185 | func (s *Step) String() string { 186 | return fmt.Sprintf("%v::%s%s", s.Axis, s.NodeTest, predicatesString(s.Predicates)) 187 | } 188 | 189 | // A NodeTest is an interface holding one of the types: 190 | // NodeType, *NameTest, or PITest. 191 | type NodeTest interface{} 192 | 193 | // NameTest represents https://www.w3.org/TR/xpath/#NT-NameTest. 194 | type NameTest struct { 195 | Prefix string 196 | Local string 197 | } 198 | 199 | func (nt *NameTest) String() string { 200 | if nt.Prefix == "" { 201 | return nt.Local 202 | } 203 | return fmt.Sprintf("%s:%s", nt.Prefix, nt.Local) 204 | } 205 | 206 | // PITest represents processing-instruction test. 207 | type PITest string 208 | 209 | func (pt PITest) String() string { 210 | return fmt.Sprintf("processing-instruction(%q)", string(pt)) 211 | } 212 | 213 | // VarRef represents https://www.w3.org/TR/xpath/#NT-VariableReference. 214 | type VarRef struct { 215 | Prefix string 216 | Local string 217 | } 218 | 219 | func (vr *VarRef) String() string { 220 | if vr.Prefix == "" { 221 | return fmt.Sprintf("$%s", vr.Local) 222 | } 223 | return fmt.Sprintf("$%s:%s", vr.Prefix, vr.Local) 224 | } 225 | 226 | // FuncCall represents https://www.w3.org/TR/xpath/#section-Function-Calls. 227 | type FuncCall struct { 228 | Prefix string 229 | Local string 230 | Args []Expr 231 | } 232 | 233 | func (fc *FuncCall) String() string { 234 | p := make([]string, len(fc.Args)) 235 | for i, param := range fc.Args { 236 | p[i] = fmt.Sprint(param) 237 | } 238 | if fc.Prefix == "" { 239 | return fmt.Sprintf("%s(%s)", fc.Local, strings.Join(p, ", ")) 240 | } 241 | return fmt.Sprintf("%s:%s(%s)", fc.Prefix, fc.Local, strings.Join(p, ", ")) 242 | } 243 | 244 | // Number represents number literal. 245 | type Number float64 246 | 247 | func (n Number) String() string { 248 | return strconv.FormatFloat(float64(n), 'f', -1, 64) 249 | } 250 | 251 | // String represents string literal. 252 | type String string 253 | 254 | func (s String) String() string { 255 | return strconv.Quote(string(s)) 256 | } 257 | 258 | // MustParse is like Parse but panics if the xpath expression has error. 259 | // It simplifies safe initialization of global variables holding parsed expressions. 260 | func MustParse(xpath string) Expr { 261 | p := &parser{lexer: lexer{xpath: xpath}} 262 | return p.parse() 263 | } 264 | 265 | // Parse parses given xpath 1.0 expression. 266 | func Parse(xpath string) (expr Expr, err error) { 267 | defer func() { 268 | if r := recover(); r != nil { 269 | if _, ok := r.(runtime.Error); ok { 270 | panic(r) 271 | } 272 | if _, ok := r.(error); ok { 273 | err = r.(error) 274 | } else { 275 | err = fmt.Errorf("%v", r) 276 | } 277 | } 278 | }() 279 | return MustParse(xpath), nil 280 | } 281 | 282 | func predicatesString(predicates []Expr) string { 283 | p := make([]string, len(predicates)) 284 | for i, predicate := range predicates { 285 | p[i] = fmt.Sprintf("[%s]", predicate) 286 | } 287 | return strings.Join(p, "") 288 | } 289 | -------------------------------------------------------------------------------- /xpath_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Santhosh Kumar Tekuri. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package xpathparser_test 6 | 7 | import ( 8 | "encoding/xml" 9 | "fmt" 10 | "testing" 11 | 12 | . "github.com/santhosh-tekuri/xpathparser" 13 | ) 14 | 15 | func TestInvalidXPaths(t *testing.T) { 16 | tests := []string{ 17 | ``, 18 | `1.2.3`, 19 | `"one`, 20 | `'one`, 21 | `hero::*`, 22 | `$`, 23 | `$$`, 24 | `+`, 25 | `!`, 26 | `!=`, 27 | `abc def`, 28 | `abc and`, 29 | `child::`, 30 | `/abc/`, 31 | `abc/`, 32 | `@`, 33 | `/@`, 34 | `child::abcd()`, 35 | `;abc`, 36 | `abc;def`, 37 | `/;`, 38 | `[`, 39 | `a[`, 40 | `a[1`, 41 | `a[]`, 42 | `@-name`, 43 | `@1one`, 44 | `@.one`, 45 | `abc^def`, 46 | `abc#def`, 47 | `foo(`, 48 | `foo(1`, 49 | `foo(1,`, 50 | `a|`, 51 | `//`, 52 | `//+1`, 53 | `(.)/123`, 54 | `123/`, 55 | `abc[]`, 56 | } 57 | for _, test := range tests { 58 | if _, err := Parse(test); err == nil { 59 | t.Errorf("FAIL: error expected for %s", test) 60 | } else { 61 | t.Log(err) 62 | } 63 | } 64 | } 65 | 66 | func TestCompiledXPaths(t *testing.T) { 67 | tests := map[string]Expr{ 68 | `1`: Number(1), 69 | `-1`: &NegateExpr{Number(1)}, 70 | `1.5`: Number(1.5), 71 | `.5`: Number(.5), 72 | `01.5`: Number(1.5), 73 | `1+2`: &BinaryExpr{Number(1), Add, Number(2)}, 74 | `1-2`: &BinaryExpr{Number(1), Subtract, Number(2)}, 75 | `1*2`: &BinaryExpr{Number(1), Multiply, Number(2)}, 76 | `1+2*3`: &BinaryExpr{ 77 | Number(1), 78 | Add, 79 | &BinaryExpr{Number(2), Multiply, Number(3)}, 80 | }, 81 | `(1+2)*3`: &BinaryExpr{ 82 | &BinaryExpr{Number(1), Add, Number(2)}, 83 | Multiply, 84 | Number(3), 85 | }, 86 | `$var`: &VarRef{"", "var"}, 87 | `$ns:var`: &VarRef{"ns", "var"}, 88 | `1=2`: &BinaryExpr{Number(1), EQ, Number(2)}, 89 | `1!=2`: &BinaryExpr{Number(1), NEQ, Number(2)}, 90 | `1 and 2`: &BinaryExpr{Number(1), And, Number(2)}, 91 | `1 or2`: &BinaryExpr{Number(1), Or, Number(2)}, 92 | `1 mod2`: &BinaryExpr{Number(1), Mod, Number(2)}, 93 | `1 div2`: &BinaryExpr{Number(1), Div, Number(2)}, 94 | `1 <2`: &BinaryExpr{Number(1), LT, Number(2)}, 95 | `1 <=2`: &BinaryExpr{Number(1), LTE, Number(2)}, 96 | `1 >2`: &BinaryExpr{Number(1), GT, Number(2)}, 97 | `1 >=2`: &BinaryExpr{Number(1), GTE, Number(2)}, 98 | `"str"`: String("str"), 99 | `'str'`: String("str"), 100 | `/a`: &LocationPath{true, []*Step{ 101 | {Child, &NameTest{"", "a"}, nil}, 102 | }}, 103 | `abc ander`: &BinaryExpr{ 104 | &LocationPath{false, []*Step{ 105 | {Child, &NameTest{"", "abc"}, nil}, 106 | }}, 107 | And, 108 | &LocationPath{false, []*Step{ 109 | {Child, &NameTest{"", "er"}, nil}, 110 | }}, 111 | }, 112 | `abc|er`: &BinaryExpr{ 113 | &LocationPath{false, []*Step{ 114 | {Child, &NameTest{"", "abc"}, nil}, 115 | }}, 116 | Union, 117 | &LocationPath{false, []*Step{ 118 | {Child, &NameTest{"", "er"}, nil}, 119 | }}, 120 | }, 121 | `a[1]`: &LocationPath{false, []*Step{ 122 | {Child, &NameTest{"", "a"}, []Expr{Number(1)}}, 123 | }}, 124 | `a[1][2]`: &LocationPath{false, []*Step{ 125 | {Child, &NameTest{"", "a"}, []Expr{Number(1), Number(2)}}, 126 | }}, 127 | `foo(1)`: &FuncCall{"", "foo", []Expr{ 128 | Number(1), 129 | }}, 130 | `foo(1,2)`: &FuncCall{"", "foo", []Expr{ 131 | Number(1), 132 | Number(2), 133 | }}, 134 | `foo(1, ns:bar(2), /a)`: &FuncCall{"", "foo", []Expr{ 135 | Number(1), 136 | &FuncCall{"ns", "bar", []Expr{ 137 | Number(2), 138 | }}, 139 | &LocationPath{true, []*Step{ 140 | {Child, &NameTest{"", "a"}, nil}, 141 | }}, 142 | }}, 143 | `.`: &LocationPath{false, []*Step{ 144 | {Self, Node, nil}, 145 | }}, 146 | `..`: &LocationPath{false, []*Step{ 147 | {Parent, Node, nil}, 148 | }}, 149 | `(/a/b)[5]`: &FilterExpr{ 150 | &LocationPath{true, []*Step{ 151 | {Child, &NameTest{"", "a"}, nil}, 152 | {Child, &NameTest{"", "b"}, nil}, 153 | }}, 154 | []Expr{Number(5)}, 155 | }, 156 | `(/a/b)/c`: &PathExpr{ 157 | &LocationPath{true, []*Step{ 158 | {Child, &NameTest{"", "a"}, nil}, 159 | {Child, &NameTest{"", "b"}, nil}, 160 | }}, 161 | &LocationPath{false, []*Step{ 162 | {Child, &NameTest{"", "c"}, nil}, 163 | }}, 164 | }, 165 | `a//b`: &LocationPath{false, []*Step{ 166 | {Child, &NameTest{"", "a"}, nil}, 167 | {DescendantOrSelf, Node, nil}, 168 | {Child, &NameTest{"", "b"}, nil}, 169 | }}, 170 | `//emp`: &LocationPath{true, []*Step{ 171 | {DescendantOrSelf, Node, nil}, 172 | {Child, &NameTest{"", "emp"}, nil}, 173 | }}, 174 | `*//emp`: &LocationPath{false, []*Step{ 175 | {Child, &NameTest{"", "*"}, nil}, 176 | {DescendantOrSelf, Node, nil}, 177 | {Child, &NameTest{"", "emp"}, nil}, 178 | }}, 179 | `processing-instruction('xsl')`: &LocationPath{false, []*Step{ 180 | {Child, PITest("xsl"), nil}, 181 | }}, 182 | `node()`: &LocationPath{false, []*Step{ 183 | {Child, Node, nil}, 184 | }}, 185 | `text()`: &LocationPath{false, []*Step{ 186 | {Child, Text, nil}, 187 | }}, 188 | `comment()`: &LocationPath{false, []*Step{ 189 | {Child, Comment, nil}, 190 | }}, 191 | `ns1:emp`: &LocationPath{false, []*Step{ 192 | {Child, &NameTest{"ns1", "emp"}, nil}, 193 | }}, 194 | `a:`: &LocationPath{false, []*Step{ 195 | {Child, &NameTest{"a", ""}, nil}, 196 | }}, 197 | `document('test.xml')/*`: &PathExpr{ 198 | &FuncCall{"", "document", []Expr{ 199 | String("test.xml"), 200 | }}, 201 | &LocationPath{false, []*Step{ 202 | {Child, &NameTest{"", "*"}, nil}, 203 | }}, 204 | }, 205 | `//book[author = editor]/price`: &LocationPath{true, []*Step{ 206 | {DescendantOrSelf, Node, nil}, 207 | {Child, &NameTest{"", "book"}, []Expr{ 208 | &BinaryExpr{ 209 | &LocationPath{false, []*Step{ 210 | {Child, &NameTest{"", "author"}, nil}, 211 | }}, 212 | EQ, 213 | &LocationPath{false, []*Step{ 214 | {Child, &NameTest{"", "editor"}, nil}, 215 | }}, 216 | }, 217 | }}, 218 | {Child, &NameTest{"", "price"}, nil}, 219 | }}, 220 | `(a)//b`: &PathExpr{ 221 | &LocationPath{false, []*Step{ 222 | {Child, &NameTest{"", "a"}, nil}, 223 | }}, 224 | &LocationPath{false, []*Step{ 225 | {DescendantOrSelf, Node, nil}, 226 | {Child, &NameTest{"", "b"}, nil}, 227 | }}, 228 | }, 229 | `(.)/`: &PathExpr{ 230 | &LocationPath{false, []*Step{ 231 | {Self, Node, nil}, 232 | }}, 233 | &LocationPath{false, nil}, 234 | }, 235 | } 236 | for k, v := range tests { 237 | t.Logf("compiling %s", k) 238 | expr, err := Parse(k) 239 | if err != nil { 240 | t.Errorf("FAIL: %v", err) 241 | continue 242 | } 243 | t.Logf("expr: %v", expr) 244 | if v != nil && !equals(v, expr) { 245 | t.Error("FAIL: expr mismatch") 246 | b, _ := xml.MarshalIndent(expr, " ", " ") 247 | t.Log(string(b)) 248 | } 249 | } 250 | } 251 | 252 | func equals(v1, v2 interface{}) bool { 253 | switch v1 := v1.(type) { 254 | case nil: 255 | return v2 == nil 256 | case Number, String, NodeType, PITest: 257 | return v1 == v2 258 | case *VarRef: 259 | v2, ok := v2.(*VarRef) 260 | return ok && *v1 == *v2 261 | case *NegateExpr: 262 | v2, ok := v2.(*NegateExpr) 263 | return ok && equals(v1.Expr, v2.Expr) 264 | case *BinaryExpr: 265 | v2, ok := v2.(*BinaryExpr) 266 | return ok && equals(v1.LHS, v2.LHS) && equals(v1.RHS, v2.RHS) && v1.Op == v2.Op 267 | case *LocationPath: 268 | v2, ok := v2.(*LocationPath) 269 | if !ok || v1.Abs != v2.Abs || len(v1.Steps) != len(v2.Steps) { 270 | return false 271 | } 272 | for i := range v1.Steps { 273 | if !equals(v1.Steps[i], v2.Steps[i]) { 274 | return false 275 | } 276 | } 277 | return true 278 | case *Step: 279 | v2, ok := v2.(*Step) 280 | return ok && v1.Axis == v2.Axis && equals(v1.NodeTest, v2.NodeTest) && equals(v1.Predicates, v2.Predicates) 281 | case *NameTest: 282 | v2, ok := v2.(*NameTest) 283 | return ok && *v1 == *v2 284 | case []Expr: 285 | v2, ok := v2.([]Expr) 286 | if !ok || len(v1) != len(v2) { 287 | return false 288 | } 289 | for i := range v1 { 290 | if !equals(v1[i], v2[i]) { 291 | return false 292 | } 293 | } 294 | return true 295 | case *FuncCall: 296 | v2, ok := v2.(*FuncCall) 297 | return ok && v1.Prefix == v2.Prefix && v1.Local == v2.Local && equals(v1.Args, v2.Args) 298 | case *FilterExpr: 299 | v2, ok := v2.(*FilterExpr) 300 | return ok && equals(v1.Expr, v2.Expr) && equals(v1.Predicates, v2.Predicates) 301 | case *PathExpr: 302 | v2, ok := v2.(*PathExpr) 303 | return ok && equals(v1.Filter, v2.Filter) && equals(v1.LocationPath, v2.LocationPath) 304 | default: 305 | panic(fmt.Sprintf("equals for %T not implemented yet", v1)) 306 | } 307 | } 308 | --------------------------------------------------------------------------------