├── .gitignore ├── .metadata ├── CHANGELOG.md ├── LICENSE ├── README.md ├── lib ├── token_kind.dart ├── xpath_parser.dart └── xpath_selector.dart ├── pubspec.lock ├── pubspec.yaml └── test └── xpath_test.dart /.gitignore: -------------------------------------------------------------------------------- 1 | # Miscellaneous 2 | *.class 3 | *.log 4 | *.pyc 5 | *.swp 6 | .DS_Store 7 | .atom/ 8 | .buildlog/ 9 | .history 10 | .svn/ 11 | 12 | # IntelliJ related 13 | *.iml 14 | *.ipr 15 | *.iws 16 | .idea/ 17 | 18 | # The .vscode folder contains launch configuration and tasks you configure in 19 | # VS Code which you may wish to be included in version control, so this line 20 | # is commented out by default. 21 | #.vscode/ 22 | 23 | # Flutter/Dart/Pub related 24 | **/doc/api/ 25 | .dart_tool/ 26 | .flutter-plugins 27 | .flutter-plugins-dependencies 28 | .packages 29 | .pub-cache/ 30 | .pub/ 31 | build/ 32 | 33 | # Android related 34 | **/android/**/gradle-wrapper.jar 35 | **/android/.gradle 36 | **/android/captures/ 37 | **/android/gradlew 38 | **/android/gradlew.bat 39 | **/android/local.properties 40 | **/android/**/GeneratedPluginRegistrant.java 41 | 42 | # iOS/XCode related 43 | **/ios/**/*.mode1v3 44 | **/ios/**/*.mode2v3 45 | **/ios/**/*.moved-aside 46 | **/ios/**/*.pbxuser 47 | **/ios/**/*.perspectivev3 48 | **/ios/**/*sync/ 49 | **/ios/**/.sconsign.dblite 50 | **/ios/**/.tags* 51 | **/ios/**/.vagrant/ 52 | **/ios/**/DerivedData/ 53 | **/ios/**/Icon? 54 | **/ios/**/Pods/ 55 | **/ios/**/.symlinks/ 56 | **/ios/**/profile 57 | **/ios/**/xcuserdata 58 | **/ios/.generated/ 59 | **/ios/Flutter/App.framework 60 | **/ios/Flutter/Flutter.framework 61 | **/ios/Flutter/Flutter.podspec 62 | **/ios/Flutter/Generated.xcconfig 63 | **/ios/Flutter/app.flx 64 | **/ios/Flutter/app.zip 65 | **/ios/Flutter/flutter_assets/ 66 | **/ios/Flutter/flutter_export_environment.sh 67 | **/ios/ServiceDefinitions.json 68 | **/ios/Runner/GeneratedPluginRegistrant.* 69 | 70 | # Exceptions to above rules. 71 | !**/ios/**/default.mode1v3 72 | !**/ios/**/default.mode2v3 73 | !**/ios/**/default.pbxuser 74 | !**/ios/**/default.perspectivev3 75 | !/packages/flutter_tools/test/data/dart_dependencies_test/**/.packages 76 | -------------------------------------------------------------------------------- /.metadata: -------------------------------------------------------------------------------- 1 | # This file tracks properties of this Flutter project. 2 | # Used by Flutter tool to assess capabilities and perform upgrades etc. 3 | # 4 | # This file should be version controlled and should not be manually edited. 5 | 6 | version: 7 | revision: 0b8abb4724aa590dd0f429683339b1e045a1594d 8 | channel: stable 9 | 10 | project_type: package 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.0.0] - xpath package create 2 | ## [1.0.1] - add method comment 3 | ## [1.0.2] - fix Health suggestions 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xpath 2 | [![Pub](https://img.shields.io/pub/v/xpath_parse.svg?style=flat-square)](https://pub.dartlang.org/packages/xpath_parse) 3 | [![support](https://img.shields.io/badge/platform-flutter%7Cdart%20vm-ff69b4.svg?style=flat-square)](https://github.com/codingfd/xpath)
4 | XPath selector based on html. 5 | ## Get started 6 | ### Add dependency 7 | ```yaml 8 | dependencies: 9 | xpath_parse: lastVersion 10 | ``` 11 | ### Super simple to use 12 | 13 | ```dart 14 | final String html = ''' 15 | 16 |
github.com
17 |
head
18 |
1234
19 |
end
20 | 21 | '''; 22 | 23 | XPath.source(html).query("//div/a/text()").list() 24 | 25 | ``` 26 | 27 | more simple refer to [this](https://github.com/codingfd/xpath/blob/master/test/xpath_test.dart) 28 | 29 | ## Syntax supported: 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
NameExpression
immediate parent/
parent//
attribute[@key=value]
nth childtag[n]
attribute/@key
wildcard in tagname/*
functionfunction()
64 | 65 | ### Extended syntax supported: 66 | 67 | These XPath syntax are extended only in Xsoup (for convenience in extracting HTML, refer to Jsoup CSS Selector): 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 |
NameExpressionSupport
attribute value not equals[@key!=value]yes
attribute value start with[@key~=value]yes
attribute value end with[@key$=value]yes
attribute value contains[@key*=value]yes
attribute value match regex[@key~=value]yes
-------------------------------------------------------------------------------- /lib/token_kind.dart: -------------------------------------------------------------------------------- 1 | class TokenKind { 2 | // Path Type 3 | static const int CHILD = 1; // / 4 | static const int ROOT = 2; // // 5 | static const int CURRENT = 3; // . 6 | static const int PARENT = 4; // .. 7 | 8 | // List position type 9 | static const int PLUS = 11; // + 10 | static const int MINUS = 12; // - 11 | static const int GREATER = 13; // > 12 | static const int GREATER_OR_EQUALS = 14; // >= 13 | static const int LESS = 15; // < 14 | static const int LESS_OR_EQUALS = 16; // <= 15 | 16 | static const Map _POSITION_OPERATOR = { 17 | "+": PLUS, 18 | "-": MINUS, 19 | ">": GREATER, 20 | ">=": GREATER_OR_EQUALS, 21 | "<": LESS, 22 | "<=": LESS_OR_EQUALS 23 | }; 24 | 25 | // Attribute match types: 26 | static const int EQUALS = 28; // = 27 | static const int NOT_EQUALS = 29; // != 28 | static const int INCLUDES = 530; // ~= 29 | static const int PREFIX_MATCH = 531; // ^= 30 | static const int SUFFIX_MATCH = 532; // $= 31 | static const int SUBSTRING_MATCH = 533; // *= 32 | static const int NO_MATCH = 534; // No operator. 33 | 34 | static const Map _ATTR_OPERATOR = { 35 | "=": EQUALS, 36 | "!=": NOT_EQUALS, 37 | "~=": INCLUDES, 38 | "^=": PREFIX_MATCH, 39 | "\$=": SUFFIX_MATCH, 40 | "*=": SUBSTRING_MATCH 41 | }; 42 | 43 | static const int NUM = 600; // [0] 44 | static const int LAST = 601; // last() 45 | static const int POSITION = 602; // position() 46 | 47 | 48 | ///string to position operator 49 | static int matchPositionOperator(String text) { 50 | return _POSITION_OPERATOR[text] ?? NO_MATCH; 51 | } 52 | 53 | ///string to attr operator 54 | static int matchAttrOperator(String text) { 55 | return _ATTR_OPERATOR[text] ?? NO_MATCH; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /lib/xpath_parser.dart: -------------------------------------------------------------------------------- 1 | import 'package:xpath_parse/token_kind.dart'; 2 | import 'package:xpath_parse/xpath_selector.dart'; 3 | 4 | /// Parse the [XPath] string to [SelectorGroup] 5 | /// 6 | SelectorGroup parseSelectorGroup(String xpath) { 7 | var selectors = []; 8 | String output; 9 | 10 | var matches = RegExp("//|/").allMatches(xpath).toList(); 11 | var selectorSources = List(); 12 | for (var index = 0; index < matches.length; index++) { 13 | if (index > 0) { 14 | selectorSources 15 | .add(xpath.substring(matches[index - 1].start, matches[index].start)); 16 | } 17 | if (index == matches.length - 1) { 18 | selectorSources.add(xpath.substring(matches[index].start, xpath.length)); 19 | } 20 | } 21 | 22 | var lastSource = selectorSources.last.replaceAll("/", ""); 23 | if (lastSource == "text()" || lastSource.startsWith("@")) { 24 | output = selectorSources.last; 25 | selectorSources.removeLast(); 26 | } 27 | 28 | for (var source in selectorSources) { 29 | selectors.add(_parseSelector(source)); 30 | } 31 | 32 | var firstSelector = selectors.first; 33 | if (firstSelector.operatorKind == TokenKind.CHILD) { 34 | var simpleSelector = firstSelector.simpleSelectors.first; 35 | if (simpleSelector != null && 36 | (simpleSelector.name != "body" || simpleSelector.name != "head")) { 37 | selectors.insert( 38 | 0, Selector(TokenKind.CHILD, [ElementSelector("body", "/body")])); 39 | } 40 | } 41 | 42 | return SelectorGroup(selectors, output, xpath); 43 | } 44 | 45 | ///parse input string to [Selector] 46 | /// 47 | Selector _parseSelector(String input) { 48 | int type; 49 | String source; 50 | var simpleSelectors = []; 51 | if (input.startsWith("//")) { 52 | type = TokenKind.ROOT; 53 | source = input.substring(2, input.length); 54 | } else if (input.startsWith("/")) { 55 | type = TokenKind.CHILD; 56 | source = input.substring(1, input.length); 57 | } else { 58 | throw FormatException("'$input' is not a valid xpath query string"); 59 | } 60 | 61 | //匹配所有父节点 62 | if (source == "..") { 63 | return Selector(TokenKind.PARENT, [ElementSelector("*", "")]); 64 | } 65 | 66 | var selector = Selector(type, simpleSelectors); 67 | 68 | //匹配条件 69 | var match = RegExp("(.+)\\[(.+)\\]").firstMatch(source); 70 | if (match != null) { 71 | var elementName = match.group(1); 72 | simpleSelectors.add(ElementSelector(elementName, input)); 73 | var group = match.group(2); 74 | //匹配Attr 75 | if (group.startsWith("@")) { 76 | var m = 77 | RegExp("^@(.+?)(=|!=|\\^=|~=|\\*=|\\\$=)(.+)\$").firstMatch(group); 78 | if (m != null) { 79 | var name = m.group(1); 80 | var op = TokenKind.matchAttrOperator(m.group(2)); 81 | var value = m.group(3).replaceAll(RegExp("['\"]"), ""); 82 | simpleSelectors.add(AttributeSelector(name, op, value, group)); 83 | } else { 84 | simpleSelectors.add(AttributeSelector( 85 | group.substring(1, group.length), TokenKind.NO_MATCH, null, group)); 86 | } 87 | } 88 | //匹配数字 89 | var m = RegExp("^\\d+\$").firstMatch(group); 90 | if (m != null) { 91 | var position = int.tryParse(m.group(0)); 92 | selector.positionSelector = 93 | PositionSelector(TokenKind.NUM, TokenKind.NO_MATCH, position, input); 94 | } 95 | 96 | //匹配position()方法 97 | m = RegExp("^position\\(\\)(<|<=|>|>=)(\\d+)\$").firstMatch(group); 98 | if (m != null) { 99 | var op = TokenKind.matchPositionOperator(m.group(1)); 100 | var value = int.tryParse(m.group(2)); 101 | selector.positionSelector = 102 | PositionSelector(TokenKind.POSITION, op, value, input); 103 | } 104 | 105 | //匹配last()方法 106 | m = RegExp("^last\\(\\)(-)?(\\d+)?\$").firstMatch(group); 107 | if (m != null) { 108 | var op = TokenKind.matchPositionOperator(m.group(1)); 109 | var value = int.tryParse(m.group(2) ?? ""); 110 | selector.positionSelector = 111 | PositionSelector(TokenKind.LAST, op, value, input); 112 | } 113 | } else { 114 | simpleSelectors.add(ElementSelector(source, input)); 115 | } 116 | 117 | return selector; 118 | } 119 | -------------------------------------------------------------------------------- /lib/xpath_selector.dart: -------------------------------------------------------------------------------- 1 | import 'package:html/dom.dart'; 2 | import 'package:html/parser.dart'; 3 | import 'package:xpath_parse/token_kind.dart'; 4 | import 'package:xpath_parse/xpath_parser.dart'; 5 | 6 | class XPath { 7 | final rootElement; 8 | 9 | XPath(this.rootElement); 10 | 11 | ///parse [html] to node 12 | /// 13 | static XPath source(String html) { 14 | var node = parse(html).documentElement; 15 | var evaluator = XPath(node); 16 | return evaluator; 17 | } 18 | 19 | ///query data from [rootElement] by [xpath] 20 | /// 21 | SelectorEvaluator query(String xpath) { 22 | var evaluator = SelectorEvaluator(); 23 | evaluator.matchSelectorGroup(rootElement, parseSelectorGroup(xpath)); 24 | return evaluator; 25 | } 26 | } 27 | 28 | class SelectorEvaluator extends VisitorBase { 29 | Element _element; 30 | 31 | //结果 32 | var _results = []; 33 | var _temps = []; 34 | String _output; 35 | 36 | ///select elements from node or node.child which match selector 37 | /// 38 | void matchSelector(Node node, Selector selector) { 39 | _temps.clear(); 40 | if (node is! Element) return; 41 | switch (selector.operatorKind) { 42 | case TokenKind.CHILD: 43 | { 44 | for (var item in node.nodes) { 45 | if (item is! Element) continue; 46 | _element = item; 47 | if (selector.visit(this)) { 48 | _temps.add(item); 49 | } 50 | } 51 | _removeIfNotMatchPosition(selector); 52 | _results.addAll(_temps); 53 | } 54 | break; 55 | case TokenKind.ROOT: 56 | for (var item in node.nodes) { 57 | if (item is! Element) continue; 58 | _element = item; 59 | if (selector.visit(this)) { 60 | _temps.add(item); 61 | } 62 | } 63 | _removeIfNotMatchPosition(selector); 64 | _results.addAll(_temps); 65 | for (var item in node.nodes) { 66 | matchSelector(item, selector); 67 | } 68 | 69 | break; 70 | case TokenKind.CURRENT: 71 | _element = node; 72 | if (selector.visit(this)) { 73 | _results.add(node); 74 | } 75 | break; 76 | case TokenKind.PARENT: 77 | _element = node.parent; 78 | if (selector.visit(this)) { 79 | _results.add(_element); 80 | } 81 | break; 82 | } 83 | } 84 | 85 | ///select elements from node or node.child which match group 86 | /// 87 | void matchSelectorGroup(Node node, SelectorGroup group) { 88 | _output = group.output; 89 | _results = [node]; 90 | for (var selector in group.selectors) { 91 | var list = List.of(_results); 92 | _results.clear(); 93 | for (var item in list) { 94 | matchSelector(item, selector); 95 | } 96 | } 97 | } 98 | 99 | ///return first of [list] 100 | /// 101 | String get() { 102 | var data = list(); 103 | if (data.isNotEmpty) { 104 | return data.first; 105 | } else { 106 | return ""; 107 | } 108 | } 109 | 110 | ///return List form [_results] output text 111 | /// 112 | List list() { 113 | var list = []; 114 | 115 | if (_output == "/text()") { 116 | for (var element in elements()) { 117 | list.add(element.text.trim()); 118 | } 119 | } else if (_output == "//text()") { 120 | void getTextByElement(List elements) { 121 | for (var item in elements) { 122 | list.add(item.text.trim()); 123 | getTextByElement(item.children); 124 | } 125 | } 126 | 127 | getTextByElement(elements()); 128 | } else if (_output?.startsWith("/@") == true) { 129 | var attr = _output.substring(2, _output.length); 130 | for (var element in elements()) { 131 | var attrValue = element.attributes[attr].trim(); 132 | if (attrValue != null) { 133 | list.add(attrValue); 134 | } 135 | } 136 | } else if (_output?.startsWith("//@") == true) { 137 | var attr = _output.substring(3, _output.length); 138 | void getAttrByElements(List elements) { 139 | for (var element in elements) { 140 | var attrValue = element.attributes[attr].trim(); 141 | if (attrValue != null) { 142 | list.add(attrValue); 143 | } 144 | } 145 | for (var element in elements) { 146 | getAttrByElements(element.children); 147 | } 148 | } 149 | 150 | getAttrByElements(elements()); 151 | } else { 152 | for (var element in elements()) { 153 | list.add(element.outerHtml); 154 | } 155 | } 156 | if (list.isEmpty) { 157 | print("xpath query result is empty"); 158 | } 159 | return list; 160 | } 161 | 162 | List elements() => _results; 163 | 164 | _unsupported(selector) => 165 | FormatException("'$selector' is not a valid selector"); 166 | 167 | @override 168 | bool visitAttributeSelector(AttributeSelector selector) { 169 | // Match name first 170 | var value = _element.attributes[selector.name.toLowerCase()]; 171 | if (value == null) return false; 172 | 173 | if (selector.operatorKind == TokenKind.NO_MATCH) return true; 174 | 175 | var select = '${selector.value}'; 176 | switch (selector.operatorKind) { 177 | case TokenKind.EQUALS: 178 | return value == select; 179 | case TokenKind.NOT_EQUALS: 180 | return value != select; 181 | case TokenKind.INCLUDES: 182 | return value.split(' ').any((v) => v.isNotEmpty && v == select); 183 | case TokenKind.PREFIX_MATCH: 184 | return value.startsWith(select); 185 | case TokenKind.SUFFIX_MATCH: 186 | return value.endsWith(select); 187 | case TokenKind.SUBSTRING_MATCH: 188 | return value.contains(select); 189 | default: 190 | throw _unsupported(selector); 191 | } 192 | } 193 | 194 | @override 195 | bool visitElementSelector(ElementSelector selector) => 196 | selector.isWildcard || _element.localName == selector.name.toLowerCase(); 197 | 198 | @override 199 | bool visitPositionSelector(PositionSelector selector) { 200 | var index = _temps.indexOf(_element) + 1; 201 | if (index == -1) return false; 202 | var value = selector.value; 203 | if (selector._position == TokenKind.NUM) { 204 | return index == value; 205 | } else if (selector._position == TokenKind.POSITION) { 206 | switch (selector.operatorKind) { 207 | case TokenKind.GREATER: 208 | return index > value; 209 | case TokenKind.GREATER_OR_EQUALS: 210 | return index >= value; 211 | case TokenKind.LESS: 212 | return index < value; 213 | case TokenKind.LESS_OR_EQUALS: 214 | return index <= value; 215 | default: 216 | throw _unsupported(selector); 217 | } 218 | } else if (selector._position == TokenKind.LAST) { 219 | switch (selector.operatorKind) { 220 | case TokenKind.MINUS: 221 | return index == _temps.length - value - 1; 222 | case TokenKind.NO_MATCH: 223 | return index >= _temps.length - 1; 224 | default: 225 | throw _unsupported(selector); 226 | } 227 | } else { 228 | throw _unsupported(selector); 229 | } 230 | } 231 | 232 | @override 233 | bool visitSelector(Selector selector) { 234 | var result = true; 235 | for (var s in selector.simpleSelectors) { 236 | result = s.visit(this); 237 | if (!result) break; 238 | } 239 | return result; 240 | } 241 | 242 | void _removeIfNotMatchPosition(Selector node) { 243 | _temps.removeWhere((item) { 244 | _element = item; 245 | return node.positionSelector?.visit(this) == false; 246 | }); 247 | } 248 | 249 | @override 250 | visitSimpleSelector(SimpleSelector node) => false; 251 | } 252 | 253 | /// 254 | /// select element which match [Selector] 255 | /// 256 | class SelectorGroup { 257 | final List selectors; 258 | final String source; 259 | final String output; 260 | 261 | SelectorGroup(this.selectors, this.output, this.source); 262 | } 263 | 264 | /// 265 | /// select element which match [SimpleSelector] 266 | /// 267 | class Selector { 268 | /// [TokenKind.CHILD] 269 | /// [TokenKind.ROOT] 270 | /// [TokenKind.CURRENT] 271 | /// [TokenKind.PARENT] 272 | /// 273 | final int _nodeType; 274 | 275 | final List simpleSelectors; 276 | 277 | PositionSelector positionSelector; 278 | 279 | int get operatorKind => _nodeType; 280 | 281 | Selector(this._nodeType, this.simpleSelectors); 282 | 283 | bool visit(VisitorBase visitor) => visitor.visitSelector(this); 284 | } 285 | 286 | class SimpleSelector { 287 | final String _name; 288 | final String _source; 289 | 290 | SimpleSelector(this._name, this._source); 291 | 292 | String get name => _name; 293 | 294 | bool get isWildcard => _name == "*"; 295 | 296 | ///transfer [VisitorBase.visitSimpleSelector] 297 | visit(VisitorBase visitor) => visitor.visitSimpleSelector(this); 298 | 299 | @override 300 | String toString() => _source; 301 | } 302 | 303 | /// select name of elements 304 | class ElementSelector extends SimpleSelector { 305 | ElementSelector(String name, String source) : super(name, source); 306 | 307 | ///transfer [VisitorBase.visitElementSelector] 308 | visit(VisitorBase visitor) => visitor.visitElementSelector(this); 309 | 310 | String toString() => name; 311 | } 312 | 313 | ///select attr of elements 314 | class AttributeSelector extends SimpleSelector { 315 | final int _op; 316 | final _value; 317 | 318 | AttributeSelector(String name, this._op, this._value, String source) 319 | : super(name, source); 320 | 321 | int get operatorKind => _op; 322 | 323 | get value => _value; 324 | 325 | ///transfer [VisitorBase.visitAttributeSelector] 326 | visit(VisitorBase visitor) => visitor.visitAttributeSelector(this); 327 | } 328 | 329 | ///select position of elements 330 | class PositionSelector extends SimpleSelector { 331 | // last() or position() 332 | final int _position; 333 | 334 | // > >= < <= or null 335 | final int _op; 336 | final int _value; 337 | 338 | PositionSelector(this._position, this._op, this._value, String source) 339 | : super("*", source); 340 | 341 | int get operatorKind => _op; 342 | 343 | get value => _value; 344 | 345 | ///transfer [VisitorBase.visitPositionSelector] 346 | visit(VisitorBase visitor) => visitor.visitPositionSelector(this); 347 | } 348 | 349 | abstract class VisitorBase { 350 | visitSimpleSelector(SimpleSelector node); 351 | 352 | ///return [bool] type 353 | ///if element enable visit by ElementSelector true 354 | ///else false 355 | bool visitElementSelector(ElementSelector node); 356 | 357 | ///return [bool] type 358 | ///if element enable visit by AttributeSelector true 359 | ///else false 360 | bool visitAttributeSelector(AttributeSelector node); 361 | 362 | ///return [bool] type 363 | ///if element enable visit by PositionSelector true 364 | ///else false 365 | bool visitPositionSelector(PositionSelector node); 366 | 367 | ///return [bool] type 368 | ///if element enable visit by selector true 369 | ///else false 370 | bool visitSelector(Selector node); 371 | } 372 | // 373 | -------------------------------------------------------------------------------- /pubspec.lock: -------------------------------------------------------------------------------- 1 | # Generated by pub 2 | # See https://dart.dev/tools/pub/glossary#lockfile 3 | packages: 4 | archive: 5 | dependency: transitive 6 | description: 7 | name: archive 8 | url: "https://pub.dartlang.org" 9 | source: hosted 10 | version: "2.0.11" 11 | args: 12 | dependency: transitive 13 | description: 14 | name: args 15 | url: "https://pub.dartlang.org" 16 | source: hosted 17 | version: "1.5.2" 18 | async: 19 | dependency: transitive 20 | description: 21 | name: async 22 | url: "https://pub.dartlang.org" 23 | source: hosted 24 | version: "2.4.0" 25 | boolean_selector: 26 | dependency: transitive 27 | description: 28 | name: boolean_selector 29 | url: "https://pub.dartlang.org" 30 | source: hosted 31 | version: "1.0.5" 32 | charcode: 33 | dependency: transitive 34 | description: 35 | name: charcode 36 | url: "https://pub.dartlang.org" 37 | source: hosted 38 | version: "1.1.2" 39 | collection: 40 | dependency: transitive 41 | description: 42 | name: collection 43 | url: "https://pub.dartlang.org" 44 | source: hosted 45 | version: "1.14.11" 46 | convert: 47 | dependency: transitive 48 | description: 49 | name: convert 50 | url: "https://pub.dartlang.org" 51 | source: hosted 52 | version: "2.1.1" 53 | crypto: 54 | dependency: transitive 55 | description: 56 | name: crypto 57 | url: "https://pub.dartlang.org" 58 | source: hosted 59 | version: "2.1.3" 60 | csslib: 61 | dependency: transitive 62 | description: 63 | name: csslib 64 | url: "https://pub.dartlang.org" 65 | source: hosted 66 | version: "0.16.1" 67 | flutter: 68 | dependency: "direct main" 69 | description: flutter 70 | source: sdk 71 | version: "0.0.0" 72 | flutter_test: 73 | dependency: "direct dev" 74 | description: flutter 75 | source: sdk 76 | version: "0.0.0" 77 | html: 78 | dependency: "direct main" 79 | description: 80 | name: html 81 | url: "https://pub.dartlang.org" 82 | source: hosted 83 | version: "0.14.0+3" 84 | image: 85 | dependency: transitive 86 | description: 87 | name: image 88 | url: "https://pub.dartlang.org" 89 | source: hosted 90 | version: "2.1.4" 91 | matcher: 92 | dependency: transitive 93 | description: 94 | name: matcher 95 | url: "https://pub.dartlang.org" 96 | source: hosted 97 | version: "0.12.6" 98 | meta: 99 | dependency: transitive 100 | description: 101 | name: meta 102 | url: "https://pub.dartlang.org" 103 | source: hosted 104 | version: "1.1.8" 105 | path: 106 | dependency: transitive 107 | description: 108 | name: path 109 | url: "https://pub.dartlang.org" 110 | source: hosted 111 | version: "1.6.4" 112 | pedantic: 113 | dependency: transitive 114 | description: 115 | name: pedantic 116 | url: "https://pub.dartlang.org" 117 | source: hosted 118 | version: "1.8.0+1" 119 | petitparser: 120 | dependency: transitive 121 | description: 122 | name: petitparser 123 | url: "https://pub.dartlang.org" 124 | source: hosted 125 | version: "2.4.0" 126 | quiver: 127 | dependency: transitive 128 | description: 129 | name: quiver 130 | url: "https://pub.dartlang.org" 131 | source: hosted 132 | version: "2.0.5" 133 | sky_engine: 134 | dependency: transitive 135 | description: flutter 136 | source: sdk 137 | version: "0.0.99" 138 | source_span: 139 | dependency: transitive 140 | description: 141 | name: source_span 142 | url: "https://pub.dartlang.org" 143 | source: hosted 144 | version: "1.5.5" 145 | stack_trace: 146 | dependency: transitive 147 | description: 148 | name: stack_trace 149 | url: "https://pub.dartlang.org" 150 | source: hosted 151 | version: "1.9.3" 152 | stream_channel: 153 | dependency: transitive 154 | description: 155 | name: stream_channel 156 | url: "https://pub.dartlang.org" 157 | source: hosted 158 | version: "2.0.0" 159 | string_scanner: 160 | dependency: transitive 161 | description: 162 | name: string_scanner 163 | url: "https://pub.dartlang.org" 164 | source: hosted 165 | version: "1.0.5" 166 | term_glyph: 167 | dependency: transitive 168 | description: 169 | name: term_glyph 170 | url: "https://pub.dartlang.org" 171 | source: hosted 172 | version: "1.1.0" 173 | test_api: 174 | dependency: transitive 175 | description: 176 | name: test_api 177 | url: "https://pub.dartlang.org" 178 | source: hosted 179 | version: "0.2.11" 180 | typed_data: 181 | dependency: transitive 182 | description: 183 | name: typed_data 184 | url: "https://pub.dartlang.org" 185 | source: hosted 186 | version: "1.1.6" 187 | vector_math: 188 | dependency: transitive 189 | description: 190 | name: vector_math 191 | url: "https://pub.dartlang.org" 192 | source: hosted 193 | version: "2.0.8" 194 | xml: 195 | dependency: transitive 196 | description: 197 | name: xml 198 | url: "https://pub.dartlang.org" 199 | source: hosted 200 | version: "3.5.0" 201 | sdks: 202 | dart: ">=2.4.0 <3.0.0" 203 | -------------------------------------------------------------------------------- /pubspec.yaml: -------------------------------------------------------------------------------- 1 | name: xpath_parse 2 | description: xpath package for dart, uses a path expression to select node or a list of nodes from an XML document. 3 | version: 1.0.2 4 | author: codingfd 5 | homepage: "https://github.com/codingfd/xpath" 6 | 7 | environment: 8 | sdk: ">=2.2.0 <3.0.0" 9 | 10 | dependencies: 11 | flutter: 12 | sdk: flutter 13 | html: ^0.14.0+3 14 | 15 | dev_dependencies: 16 | flutter_test: 17 | sdk: flutter 18 | 19 | # For information on the generic Dart part of this file, see the 20 | # following page: https://dart.dev/tools/pub/pubspec 21 | 22 | # The following section is specific to Flutter. 23 | flutter: 24 | 25 | # To add assets to your package, add an assets section, like this: 26 | # assets: 27 | # - images/a_dot_burr.jpeg 28 | # - images/a_dot_ham.jpeg 29 | # 30 | # For details regarding assets in packages, see 31 | # https://flutter.dev/assets-and-images/#from-packages 32 | # 33 | # An image asset can refer to one or more resolution-specific "variants", see 34 | # https://flutter.dev/assets-and-images/#resolution-aware. 35 | 36 | # To add custom fonts to your package, add a fonts section here, 37 | # in this "flutter" section. Each entry in this list should have a 38 | # "family" key with the font family name, and a "fonts" key with a 39 | # list giving the asset and other descriptors for the font. For 40 | # example: 41 | # fonts: 42 | # - family: Schyler 43 | # fonts: 44 | # - asset: fonts/Schyler-Regular.ttf 45 | # - asset: fonts/Schyler-Italic.ttf 46 | # style: italic 47 | # - family: Trajan Pro 48 | # fonts: 49 | # - asset: fonts/TrajanPro.ttf 50 | # - asset: fonts/TrajanPro_Bold.ttf 51 | # weight: 700 52 | # 53 | # For details regarding fonts in packages, see 54 | # https://flutter.dev/custom-fonts/#from-packages 55 | -------------------------------------------------------------------------------- /test/xpath_test.dart: -------------------------------------------------------------------------------- 1 | import 'package:flutter_test/flutter_test.dart'; 2 | import 'package:xpath_parse/xpath_selector.dart'; 3 | 4 | final String html = ''' 5 | 6 | 7 |
head
8 |
1234
9 |
end
10 | 11 | '''; 12 | 13 | Future main() async { 14 | test('adds one to input values', () async { 15 | var xpath = XPath.source(html); 16 | print(xpath.query("//div/a/text()").list()); 17 | print(xpath.query("//div/a/@href").get()); 18 | print(xpath.query("//div[@class]/text()").list()); 19 | print(xpath.query("//div[@class='head']/text()").get()); 20 | print(xpath.query("//div[@class^='he']/text()").get()); 21 | print(xpath.query("//div[@class\$='nd']/text()").get()); 22 | print(xpath.query("//div[@class*='ea']/text()").get()); 23 | print(xpath.query("//table//td[1]/text()").get()); 24 | print(xpath.query("//table//td[last()]/text()").get()); 25 | print(xpath.query("//table//td[position()<3]/text()").list()); 26 | print(xpath.query("//table//td[position()>2]/text()").list()); 27 | }); 28 | } 29 | --------------------------------------------------------------------------------