├── .gitignore
├── .metadata
├── CHANGELOG.md
├── LICENSE
├── README.md
├── lib
├── token_kind.dart
├── xpath_parser.dart
└── xpath_selector.dart
├── pubspec.lock
├── pubspec.yaml
└── test
└── xpath_test.dart
/.gitignore:
--------------------------------------------------------------------------------
1 | # Miscellaneous
2 | *.class
3 | *.log
4 | *.pyc
5 | *.swp
6 | .DS_Store
7 | .atom/
8 | .buildlog/
9 | .history
10 | .svn/
11 |
12 | # IntelliJ related
13 | *.iml
14 | *.ipr
15 | *.iws
16 | .idea/
17 |
18 | # The .vscode folder contains launch configuration and tasks you configure in
19 | # VS Code which you may wish to be included in version control, so this line
20 | # is commented out by default.
21 | #.vscode/
22 |
23 | # Flutter/Dart/Pub related
24 | **/doc/api/
25 | .dart_tool/
26 | .flutter-plugins
27 | .flutter-plugins-dependencies
28 | .packages
29 | .pub-cache/
30 | .pub/
31 | build/
32 |
33 | # Android related
34 | **/android/**/gradle-wrapper.jar
35 | **/android/.gradle
36 | **/android/captures/
37 | **/android/gradlew
38 | **/android/gradlew.bat
39 | **/android/local.properties
40 | **/android/**/GeneratedPluginRegistrant.java
41 |
42 | # iOS/XCode related
43 | **/ios/**/*.mode1v3
44 | **/ios/**/*.mode2v3
45 | **/ios/**/*.moved-aside
46 | **/ios/**/*.pbxuser
47 | **/ios/**/*.perspectivev3
48 | **/ios/**/*sync/
49 | **/ios/**/.sconsign.dblite
50 | **/ios/**/.tags*
51 | **/ios/**/.vagrant/
52 | **/ios/**/DerivedData/
53 | **/ios/**/Icon?
54 | **/ios/**/Pods/
55 | **/ios/**/.symlinks/
56 | **/ios/**/profile
57 | **/ios/**/xcuserdata
58 | **/ios/.generated/
59 | **/ios/Flutter/App.framework
60 | **/ios/Flutter/Flutter.framework
61 | **/ios/Flutter/Flutter.podspec
62 | **/ios/Flutter/Generated.xcconfig
63 | **/ios/Flutter/app.flx
64 | **/ios/Flutter/app.zip
65 | **/ios/Flutter/flutter_assets/
66 | **/ios/Flutter/flutter_export_environment.sh
67 | **/ios/ServiceDefinitions.json
68 | **/ios/Runner/GeneratedPluginRegistrant.*
69 |
70 | # Exceptions to above rules.
71 | !**/ios/**/default.mode1v3
72 | !**/ios/**/default.mode2v3
73 | !**/ios/**/default.pbxuser
74 | !**/ios/**/default.perspectivev3
75 | !/packages/flutter_tools/test/data/dart_dependencies_test/**/.packages
76 |
--------------------------------------------------------------------------------
/.metadata:
--------------------------------------------------------------------------------
1 | # This file tracks properties of this Flutter project.
2 | # Used by Flutter tool to assess capabilities and perform upgrades etc.
3 | #
4 | # This file should be version controlled and should not be manually edited.
5 |
6 | version:
7 | revision: 0b8abb4724aa590dd0f429683339b1e045a1594d
8 | channel: stable
9 |
10 | project_type: package
11 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## [1.0.0] - xpath package create
2 | ## [1.0.1] - add method comment
3 | ## [1.0.2] - fix Health suggestions
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # xpath
2 | [](https://pub.dartlang.org/packages/xpath_parse)
3 | [](https://github.com/codingfd/xpath)
4 | XPath selector based on html.
5 | ## Get started
6 | ### Add dependency
7 | ```yaml
8 | dependencies:
9 | xpath_parse: lastVersion
10 | ```
11 | ### Super simple to use
12 |
13 | ```dart
14 | final String html = '''
15 |
16 |
17 | head
18 |
19 | end
20 |
21 | ''';
22 |
23 | XPath.source(html).query("//div/a/text()").list()
24 |
25 | ```
26 |
27 | more simple refer to [this](https://github.com/codingfd/xpath/blob/master/test/xpath_test.dart)
28 |
29 | ## Syntax supported:
30 |
31 |
32 | Name |
33 | Expression |
34 |
35 |
36 | immediate parent |
37 | / |
38 |
39 |
40 | parent |
41 | // |
42 |
43 |
44 | attribute |
45 | [@key=value] |
46 |
47 |
48 | nth child |
49 | tag[n] |
50 |
51 |
52 | attribute |
53 | /@key |
54 |
55 |
56 | wildcard in tagname |
57 | /* |
58 |
59 |
60 | function |
61 | function() |
62 |
63 |
64 |
65 | ### Extended syntax supported:
66 |
67 | These XPath syntax are extended only in Xsoup (for convenience in extracting HTML, refer to Jsoup CSS Selector):
68 |
69 |
70 |
71 | Name |
72 | Expression |
73 | Support |
74 |
75 |
76 | attribute value not equals |
77 | [@key!=value] |
78 | yes |
79 |
80 |
81 | attribute value start with |
82 | [@key~=value] |
83 | yes |
84 |
85 |
86 | attribute value end with |
87 | [@key$=value] |
88 | yes |
89 |
90 |
91 | attribute value contains |
92 | [@key*=value] |
93 | yes |
94 |
95 |
96 | attribute value match regex |
97 | [@key~=value] |
98 | yes |
99 |
100 |
--------------------------------------------------------------------------------
/lib/token_kind.dart:
--------------------------------------------------------------------------------
1 | class TokenKind {
2 | // Path Type
3 | static const int CHILD = 1; // /
4 | static const int ROOT = 2; // //
5 | static const int CURRENT = 3; // .
6 | static const int PARENT = 4; // ..
7 |
8 | // List position type
9 | static const int PLUS = 11; // +
10 | static const int MINUS = 12; // -
11 | static const int GREATER = 13; // >
12 | static const int GREATER_OR_EQUALS = 14; // >=
13 | static const int LESS = 15; // <
14 | static const int LESS_OR_EQUALS = 16; // <=
15 |
16 | static const Map _POSITION_OPERATOR = {
17 | "+": PLUS,
18 | "-": MINUS,
19 | ">": GREATER,
20 | ">=": GREATER_OR_EQUALS,
21 | "<": LESS,
22 | "<=": LESS_OR_EQUALS
23 | };
24 |
25 | // Attribute match types:
26 | static const int EQUALS = 28; // =
27 | static const int NOT_EQUALS = 29; // !=
28 | static const int INCLUDES = 530; // ~=
29 | static const int PREFIX_MATCH = 531; // ^=
30 | static const int SUFFIX_MATCH = 532; // $=
31 | static const int SUBSTRING_MATCH = 533; // *=
32 | static const int NO_MATCH = 534; // No operator.
33 |
34 | static const Map _ATTR_OPERATOR = {
35 | "=": EQUALS,
36 | "!=": NOT_EQUALS,
37 | "~=": INCLUDES,
38 | "^=": PREFIX_MATCH,
39 | "\$=": SUFFIX_MATCH,
40 | "*=": SUBSTRING_MATCH
41 | };
42 |
43 | static const int NUM = 600; // [0]
44 | static const int LAST = 601; // last()
45 | static const int POSITION = 602; // position()
46 |
47 |
48 | ///string to position operator
49 | static int matchPositionOperator(String text) {
50 | return _POSITION_OPERATOR[text] ?? NO_MATCH;
51 | }
52 |
53 | ///string to attr operator
54 | static int matchAttrOperator(String text) {
55 | return _ATTR_OPERATOR[text] ?? NO_MATCH;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/lib/xpath_parser.dart:
--------------------------------------------------------------------------------
1 | import 'package:xpath_parse/token_kind.dart';
2 | import 'package:xpath_parse/xpath_selector.dart';
3 |
4 | /// Parse the [XPath] string to [SelectorGroup]
5 | ///
6 | SelectorGroup parseSelectorGroup(String xpath) {
7 | var selectors = [];
8 | String output;
9 |
10 | var matches = RegExp("//|/").allMatches(xpath).toList();
11 | var selectorSources = List();
12 | for (var index = 0; index < matches.length; index++) {
13 | if (index > 0) {
14 | selectorSources
15 | .add(xpath.substring(matches[index - 1].start, matches[index].start));
16 | }
17 | if (index == matches.length - 1) {
18 | selectorSources.add(xpath.substring(matches[index].start, xpath.length));
19 | }
20 | }
21 |
22 | var lastSource = selectorSources.last.replaceAll("/", "");
23 | if (lastSource == "text()" || lastSource.startsWith("@")) {
24 | output = selectorSources.last;
25 | selectorSources.removeLast();
26 | }
27 |
28 | for (var source in selectorSources) {
29 | selectors.add(_parseSelector(source));
30 | }
31 |
32 | var firstSelector = selectors.first;
33 | if (firstSelector.operatorKind == TokenKind.CHILD) {
34 | var simpleSelector = firstSelector.simpleSelectors.first;
35 | if (simpleSelector != null &&
36 | (simpleSelector.name != "body" || simpleSelector.name != "head")) {
37 | selectors.insert(
38 | 0, Selector(TokenKind.CHILD, [ElementSelector("body", "/body")]));
39 | }
40 | }
41 |
42 | return SelectorGroup(selectors, output, xpath);
43 | }
44 |
45 | ///parse input string to [Selector]
46 | ///
47 | Selector _parseSelector(String input) {
48 | int type;
49 | String source;
50 | var simpleSelectors = [];
51 | if (input.startsWith("//")) {
52 | type = TokenKind.ROOT;
53 | source = input.substring(2, input.length);
54 | } else if (input.startsWith("/")) {
55 | type = TokenKind.CHILD;
56 | source = input.substring(1, input.length);
57 | } else {
58 | throw FormatException("'$input' is not a valid xpath query string");
59 | }
60 |
61 | //匹配所有父节点
62 | if (source == "..") {
63 | return Selector(TokenKind.PARENT, [ElementSelector("*", "")]);
64 | }
65 |
66 | var selector = Selector(type, simpleSelectors);
67 |
68 | //匹配条件
69 | var match = RegExp("(.+)\\[(.+)\\]").firstMatch(source);
70 | if (match != null) {
71 | var elementName = match.group(1);
72 | simpleSelectors.add(ElementSelector(elementName, input));
73 | var group = match.group(2);
74 | //匹配Attr
75 | if (group.startsWith("@")) {
76 | var m =
77 | RegExp("^@(.+?)(=|!=|\\^=|~=|\\*=|\\\$=)(.+)\$").firstMatch(group);
78 | if (m != null) {
79 | var name = m.group(1);
80 | var op = TokenKind.matchAttrOperator(m.group(2));
81 | var value = m.group(3).replaceAll(RegExp("['\"]"), "");
82 | simpleSelectors.add(AttributeSelector(name, op, value, group));
83 | } else {
84 | simpleSelectors.add(AttributeSelector(
85 | group.substring(1, group.length), TokenKind.NO_MATCH, null, group));
86 | }
87 | }
88 | //匹配数字
89 | var m = RegExp("^\\d+\$").firstMatch(group);
90 | if (m != null) {
91 | var position = int.tryParse(m.group(0));
92 | selector.positionSelector =
93 | PositionSelector(TokenKind.NUM, TokenKind.NO_MATCH, position, input);
94 | }
95 |
96 | //匹配position()方法
97 | m = RegExp("^position\\(\\)(<|<=|>|>=)(\\d+)\$").firstMatch(group);
98 | if (m != null) {
99 | var op = TokenKind.matchPositionOperator(m.group(1));
100 | var value = int.tryParse(m.group(2));
101 | selector.positionSelector =
102 | PositionSelector(TokenKind.POSITION, op, value, input);
103 | }
104 |
105 | //匹配last()方法
106 | m = RegExp("^last\\(\\)(-)?(\\d+)?\$").firstMatch(group);
107 | if (m != null) {
108 | var op = TokenKind.matchPositionOperator(m.group(1));
109 | var value = int.tryParse(m.group(2) ?? "");
110 | selector.positionSelector =
111 | PositionSelector(TokenKind.LAST, op, value, input);
112 | }
113 | } else {
114 | simpleSelectors.add(ElementSelector(source, input));
115 | }
116 |
117 | return selector;
118 | }
119 |
--------------------------------------------------------------------------------
/lib/xpath_selector.dart:
--------------------------------------------------------------------------------
1 | import 'package:html/dom.dart';
2 | import 'package:html/parser.dart';
3 | import 'package:xpath_parse/token_kind.dart';
4 | import 'package:xpath_parse/xpath_parser.dart';
5 |
6 | class XPath {
7 | final rootElement;
8 |
9 | XPath(this.rootElement);
10 |
11 | ///parse [html] to node
12 | ///
13 | static XPath source(String html) {
14 | var node = parse(html).documentElement;
15 | var evaluator = XPath(node);
16 | return evaluator;
17 | }
18 |
19 | ///query data from [rootElement] by [xpath]
20 | ///
21 | SelectorEvaluator query(String xpath) {
22 | var evaluator = SelectorEvaluator();
23 | evaluator.matchSelectorGroup(rootElement, parseSelectorGroup(xpath));
24 | return evaluator;
25 | }
26 | }
27 |
28 | class SelectorEvaluator extends VisitorBase {
29 | Element _element;
30 |
31 | //结果
32 | var _results = [];
33 | var _temps = [];
34 | String _output;
35 |
36 | ///select elements from node or node.child which match selector
37 | ///
38 | void matchSelector(Node node, Selector selector) {
39 | _temps.clear();
40 | if (node is! Element) return;
41 | switch (selector.operatorKind) {
42 | case TokenKind.CHILD:
43 | {
44 | for (var item in node.nodes) {
45 | if (item is! Element) continue;
46 | _element = item;
47 | if (selector.visit(this)) {
48 | _temps.add(item);
49 | }
50 | }
51 | _removeIfNotMatchPosition(selector);
52 | _results.addAll(_temps);
53 | }
54 | break;
55 | case TokenKind.ROOT:
56 | for (var item in node.nodes) {
57 | if (item is! Element) continue;
58 | _element = item;
59 | if (selector.visit(this)) {
60 | _temps.add(item);
61 | }
62 | }
63 | _removeIfNotMatchPosition(selector);
64 | _results.addAll(_temps);
65 | for (var item in node.nodes) {
66 | matchSelector(item, selector);
67 | }
68 |
69 | break;
70 | case TokenKind.CURRENT:
71 | _element = node;
72 | if (selector.visit(this)) {
73 | _results.add(node);
74 | }
75 | break;
76 | case TokenKind.PARENT:
77 | _element = node.parent;
78 | if (selector.visit(this)) {
79 | _results.add(_element);
80 | }
81 | break;
82 | }
83 | }
84 |
85 | ///select elements from node or node.child which match group
86 | ///
87 | void matchSelectorGroup(Node node, SelectorGroup group) {
88 | _output = group.output;
89 | _results = [node];
90 | for (var selector in group.selectors) {
91 | var list = List.of(_results);
92 | _results.clear();
93 | for (var item in list) {
94 | matchSelector(item, selector);
95 | }
96 | }
97 | }
98 |
99 | ///return first of [list]
100 | ///
101 | String get() {
102 | var data = list();
103 | if (data.isNotEmpty) {
104 | return data.first;
105 | } else {
106 | return "";
107 | }
108 | }
109 |
110 | ///return List form [_results] output text
111 | ///
112 | List list() {
113 | var list = [];
114 |
115 | if (_output == "/text()") {
116 | for (var element in elements()) {
117 | list.add(element.text.trim());
118 | }
119 | } else if (_output == "//text()") {
120 | void getTextByElement(List elements) {
121 | for (var item in elements) {
122 | list.add(item.text.trim());
123 | getTextByElement(item.children);
124 | }
125 | }
126 |
127 | getTextByElement(elements());
128 | } else if (_output?.startsWith("/@") == true) {
129 | var attr = _output.substring(2, _output.length);
130 | for (var element in elements()) {
131 | var attrValue = element.attributes[attr].trim();
132 | if (attrValue != null) {
133 | list.add(attrValue);
134 | }
135 | }
136 | } else if (_output?.startsWith("//@") == true) {
137 | var attr = _output.substring(3, _output.length);
138 | void getAttrByElements(List elements) {
139 | for (var element in elements) {
140 | var attrValue = element.attributes[attr].trim();
141 | if (attrValue != null) {
142 | list.add(attrValue);
143 | }
144 | }
145 | for (var element in elements) {
146 | getAttrByElements(element.children);
147 | }
148 | }
149 |
150 | getAttrByElements(elements());
151 | } else {
152 | for (var element in elements()) {
153 | list.add(element.outerHtml);
154 | }
155 | }
156 | if (list.isEmpty) {
157 | print("xpath query result is empty");
158 | }
159 | return list;
160 | }
161 |
162 | List elements() => _results;
163 |
164 | _unsupported(selector) =>
165 | FormatException("'$selector' is not a valid selector");
166 |
167 | @override
168 | bool visitAttributeSelector(AttributeSelector selector) {
169 | // Match name first
170 | var value = _element.attributes[selector.name.toLowerCase()];
171 | if (value == null) return false;
172 |
173 | if (selector.operatorKind == TokenKind.NO_MATCH) return true;
174 |
175 | var select = '${selector.value}';
176 | switch (selector.operatorKind) {
177 | case TokenKind.EQUALS:
178 | return value == select;
179 | case TokenKind.NOT_EQUALS:
180 | return value != select;
181 | case TokenKind.INCLUDES:
182 | return value.split(' ').any((v) => v.isNotEmpty && v == select);
183 | case TokenKind.PREFIX_MATCH:
184 | return value.startsWith(select);
185 | case TokenKind.SUFFIX_MATCH:
186 | return value.endsWith(select);
187 | case TokenKind.SUBSTRING_MATCH:
188 | return value.contains(select);
189 | default:
190 | throw _unsupported(selector);
191 | }
192 | }
193 |
194 | @override
195 | bool visitElementSelector(ElementSelector selector) =>
196 | selector.isWildcard || _element.localName == selector.name.toLowerCase();
197 |
198 | @override
199 | bool visitPositionSelector(PositionSelector selector) {
200 | var index = _temps.indexOf(_element) + 1;
201 | if (index == -1) return false;
202 | var value = selector.value;
203 | if (selector._position == TokenKind.NUM) {
204 | return index == value;
205 | } else if (selector._position == TokenKind.POSITION) {
206 | switch (selector.operatorKind) {
207 | case TokenKind.GREATER:
208 | return index > value;
209 | case TokenKind.GREATER_OR_EQUALS:
210 | return index >= value;
211 | case TokenKind.LESS:
212 | return index < value;
213 | case TokenKind.LESS_OR_EQUALS:
214 | return index <= value;
215 | default:
216 | throw _unsupported(selector);
217 | }
218 | } else if (selector._position == TokenKind.LAST) {
219 | switch (selector.operatorKind) {
220 | case TokenKind.MINUS:
221 | return index == _temps.length - value - 1;
222 | case TokenKind.NO_MATCH:
223 | return index >= _temps.length - 1;
224 | default:
225 | throw _unsupported(selector);
226 | }
227 | } else {
228 | throw _unsupported(selector);
229 | }
230 | }
231 |
232 | @override
233 | bool visitSelector(Selector selector) {
234 | var result = true;
235 | for (var s in selector.simpleSelectors) {
236 | result = s.visit(this);
237 | if (!result) break;
238 | }
239 | return result;
240 | }
241 |
242 | void _removeIfNotMatchPosition(Selector node) {
243 | _temps.removeWhere((item) {
244 | _element = item;
245 | return node.positionSelector?.visit(this) == false;
246 | });
247 | }
248 |
249 | @override
250 | visitSimpleSelector(SimpleSelector node) => false;
251 | }
252 |
253 | ///
254 | /// select element which match [Selector]
255 | ///
256 | class SelectorGroup {
257 | final List selectors;
258 | final String source;
259 | final String output;
260 |
261 | SelectorGroup(this.selectors, this.output, this.source);
262 | }
263 |
264 | ///
265 | /// select element which match [SimpleSelector]
266 | ///
267 | class Selector {
268 | /// [TokenKind.CHILD]
269 | /// [TokenKind.ROOT]
270 | /// [TokenKind.CURRENT]
271 | /// [TokenKind.PARENT]
272 | ///
273 | final int _nodeType;
274 |
275 | final List simpleSelectors;
276 |
277 | PositionSelector positionSelector;
278 |
279 | int get operatorKind => _nodeType;
280 |
281 | Selector(this._nodeType, this.simpleSelectors);
282 |
283 | bool visit(VisitorBase visitor) => visitor.visitSelector(this);
284 | }
285 |
286 | class SimpleSelector {
287 | final String _name;
288 | final String _source;
289 |
290 | SimpleSelector(this._name, this._source);
291 |
292 | String get name => _name;
293 |
294 | bool get isWildcard => _name == "*";
295 |
296 | ///transfer [VisitorBase.visitSimpleSelector]
297 | visit(VisitorBase visitor) => visitor.visitSimpleSelector(this);
298 |
299 | @override
300 | String toString() => _source;
301 | }
302 |
303 | /// select name of elements
304 | class ElementSelector extends SimpleSelector {
305 | ElementSelector(String name, String source) : super(name, source);
306 |
307 | ///transfer [VisitorBase.visitElementSelector]
308 | visit(VisitorBase visitor) => visitor.visitElementSelector(this);
309 |
310 | String toString() => name;
311 | }
312 |
313 | ///select attr of elements
314 | class AttributeSelector extends SimpleSelector {
315 | final int _op;
316 | final _value;
317 |
318 | AttributeSelector(String name, this._op, this._value, String source)
319 | : super(name, source);
320 |
321 | int get operatorKind => _op;
322 |
323 | get value => _value;
324 |
325 | ///transfer [VisitorBase.visitAttributeSelector]
326 | visit(VisitorBase visitor) => visitor.visitAttributeSelector(this);
327 | }
328 |
329 | ///select position of elements
330 | class PositionSelector extends SimpleSelector {
331 | // last() or position()
332 | final int _position;
333 |
334 | // > >= < <= or null
335 | final int _op;
336 | final int _value;
337 |
338 | PositionSelector(this._position, this._op, this._value, String source)
339 | : super("*", source);
340 |
341 | int get operatorKind => _op;
342 |
343 | get value => _value;
344 |
345 | ///transfer [VisitorBase.visitPositionSelector]
346 | visit(VisitorBase visitor) => visitor.visitPositionSelector(this);
347 | }
348 |
349 | abstract class VisitorBase {
350 | visitSimpleSelector(SimpleSelector node);
351 |
352 | ///return [bool] type
353 | ///if element enable visit by ElementSelector true
354 | ///else false
355 | bool visitElementSelector(ElementSelector node);
356 |
357 | ///return [bool] type
358 | ///if element enable visit by AttributeSelector true
359 | ///else false
360 | bool visitAttributeSelector(AttributeSelector node);
361 |
362 | ///return [bool] type
363 | ///if element enable visit by PositionSelector true
364 | ///else false
365 | bool visitPositionSelector(PositionSelector node);
366 |
367 | ///return [bool] type
368 | ///if element enable visit by selector true
369 | ///else false
370 | bool visitSelector(Selector node);
371 | }
372 | //
373 |
--------------------------------------------------------------------------------
/pubspec.lock:
--------------------------------------------------------------------------------
1 | # Generated by pub
2 | # See https://dart.dev/tools/pub/glossary#lockfile
3 | packages:
4 | archive:
5 | dependency: transitive
6 | description:
7 | name: archive
8 | url: "https://pub.dartlang.org"
9 | source: hosted
10 | version: "2.0.11"
11 | args:
12 | dependency: transitive
13 | description:
14 | name: args
15 | url: "https://pub.dartlang.org"
16 | source: hosted
17 | version: "1.5.2"
18 | async:
19 | dependency: transitive
20 | description:
21 | name: async
22 | url: "https://pub.dartlang.org"
23 | source: hosted
24 | version: "2.4.0"
25 | boolean_selector:
26 | dependency: transitive
27 | description:
28 | name: boolean_selector
29 | url: "https://pub.dartlang.org"
30 | source: hosted
31 | version: "1.0.5"
32 | charcode:
33 | dependency: transitive
34 | description:
35 | name: charcode
36 | url: "https://pub.dartlang.org"
37 | source: hosted
38 | version: "1.1.2"
39 | collection:
40 | dependency: transitive
41 | description:
42 | name: collection
43 | url: "https://pub.dartlang.org"
44 | source: hosted
45 | version: "1.14.11"
46 | convert:
47 | dependency: transitive
48 | description:
49 | name: convert
50 | url: "https://pub.dartlang.org"
51 | source: hosted
52 | version: "2.1.1"
53 | crypto:
54 | dependency: transitive
55 | description:
56 | name: crypto
57 | url: "https://pub.dartlang.org"
58 | source: hosted
59 | version: "2.1.3"
60 | csslib:
61 | dependency: transitive
62 | description:
63 | name: csslib
64 | url: "https://pub.dartlang.org"
65 | source: hosted
66 | version: "0.16.1"
67 | flutter:
68 | dependency: "direct main"
69 | description: flutter
70 | source: sdk
71 | version: "0.0.0"
72 | flutter_test:
73 | dependency: "direct dev"
74 | description: flutter
75 | source: sdk
76 | version: "0.0.0"
77 | html:
78 | dependency: "direct main"
79 | description:
80 | name: html
81 | url: "https://pub.dartlang.org"
82 | source: hosted
83 | version: "0.14.0+3"
84 | image:
85 | dependency: transitive
86 | description:
87 | name: image
88 | url: "https://pub.dartlang.org"
89 | source: hosted
90 | version: "2.1.4"
91 | matcher:
92 | dependency: transitive
93 | description:
94 | name: matcher
95 | url: "https://pub.dartlang.org"
96 | source: hosted
97 | version: "0.12.6"
98 | meta:
99 | dependency: transitive
100 | description:
101 | name: meta
102 | url: "https://pub.dartlang.org"
103 | source: hosted
104 | version: "1.1.8"
105 | path:
106 | dependency: transitive
107 | description:
108 | name: path
109 | url: "https://pub.dartlang.org"
110 | source: hosted
111 | version: "1.6.4"
112 | pedantic:
113 | dependency: transitive
114 | description:
115 | name: pedantic
116 | url: "https://pub.dartlang.org"
117 | source: hosted
118 | version: "1.8.0+1"
119 | petitparser:
120 | dependency: transitive
121 | description:
122 | name: petitparser
123 | url: "https://pub.dartlang.org"
124 | source: hosted
125 | version: "2.4.0"
126 | quiver:
127 | dependency: transitive
128 | description:
129 | name: quiver
130 | url: "https://pub.dartlang.org"
131 | source: hosted
132 | version: "2.0.5"
133 | sky_engine:
134 | dependency: transitive
135 | description: flutter
136 | source: sdk
137 | version: "0.0.99"
138 | source_span:
139 | dependency: transitive
140 | description:
141 | name: source_span
142 | url: "https://pub.dartlang.org"
143 | source: hosted
144 | version: "1.5.5"
145 | stack_trace:
146 | dependency: transitive
147 | description:
148 | name: stack_trace
149 | url: "https://pub.dartlang.org"
150 | source: hosted
151 | version: "1.9.3"
152 | stream_channel:
153 | dependency: transitive
154 | description:
155 | name: stream_channel
156 | url: "https://pub.dartlang.org"
157 | source: hosted
158 | version: "2.0.0"
159 | string_scanner:
160 | dependency: transitive
161 | description:
162 | name: string_scanner
163 | url: "https://pub.dartlang.org"
164 | source: hosted
165 | version: "1.0.5"
166 | term_glyph:
167 | dependency: transitive
168 | description:
169 | name: term_glyph
170 | url: "https://pub.dartlang.org"
171 | source: hosted
172 | version: "1.1.0"
173 | test_api:
174 | dependency: transitive
175 | description:
176 | name: test_api
177 | url: "https://pub.dartlang.org"
178 | source: hosted
179 | version: "0.2.11"
180 | typed_data:
181 | dependency: transitive
182 | description:
183 | name: typed_data
184 | url: "https://pub.dartlang.org"
185 | source: hosted
186 | version: "1.1.6"
187 | vector_math:
188 | dependency: transitive
189 | description:
190 | name: vector_math
191 | url: "https://pub.dartlang.org"
192 | source: hosted
193 | version: "2.0.8"
194 | xml:
195 | dependency: transitive
196 | description:
197 | name: xml
198 | url: "https://pub.dartlang.org"
199 | source: hosted
200 | version: "3.5.0"
201 | sdks:
202 | dart: ">=2.4.0 <3.0.0"
203 |
--------------------------------------------------------------------------------
/pubspec.yaml:
--------------------------------------------------------------------------------
1 | name: xpath_parse
2 | description: xpath package for dart, uses a path expression to select node or a list of nodes from an XML document.
3 | version: 1.0.2
4 | author: codingfd
5 | homepage: "https://github.com/codingfd/xpath"
6 |
7 | environment:
8 | sdk: ">=2.2.0 <3.0.0"
9 |
10 | dependencies:
11 | flutter:
12 | sdk: flutter
13 | html: ^0.14.0+3
14 |
15 | dev_dependencies:
16 | flutter_test:
17 | sdk: flutter
18 |
19 | # For information on the generic Dart part of this file, see the
20 | # following page: https://dart.dev/tools/pub/pubspec
21 |
22 | # The following section is specific to Flutter.
23 | flutter:
24 |
25 | # To add assets to your package, add an assets section, like this:
26 | # assets:
27 | # - images/a_dot_burr.jpeg
28 | # - images/a_dot_ham.jpeg
29 | #
30 | # For details regarding assets in packages, see
31 | # https://flutter.dev/assets-and-images/#from-packages
32 | #
33 | # An image asset can refer to one or more resolution-specific "variants", see
34 | # https://flutter.dev/assets-and-images/#resolution-aware.
35 |
36 | # To add custom fonts to your package, add a fonts section here,
37 | # in this "flutter" section. Each entry in this list should have a
38 | # "family" key with the font family name, and a "fonts" key with a
39 | # list giving the asset and other descriptors for the font. For
40 | # example:
41 | # fonts:
42 | # - family: Schyler
43 | # fonts:
44 | # - asset: fonts/Schyler-Regular.ttf
45 | # - asset: fonts/Schyler-Italic.ttf
46 | # style: italic
47 | # - family: Trajan Pro
48 | # fonts:
49 | # - asset: fonts/TrajanPro.ttf
50 | # - asset: fonts/TrajanPro_Bold.ttf
51 | # weight: 700
52 | #
53 | # For details regarding fonts in packages, see
54 | # https://flutter.dev/custom-fonts/#from-packages
55 |
--------------------------------------------------------------------------------
/test/xpath_test.dart:
--------------------------------------------------------------------------------
1 | import 'package:flutter_test/flutter_test.dart';
2 | import 'package:xpath_parse/xpath_selector.dart';
3 |
4 | final String html = '''
5 |
6 |
7 | head
8 |
9 | end
10 |
11 | ''';
12 |
13 | Future main() async {
14 | test('adds one to input values', () async {
15 | var xpath = XPath.source(html);
16 | print(xpath.query("//div/a/text()").list());
17 | print(xpath.query("//div/a/@href").get());
18 | print(xpath.query("//div[@class]/text()").list());
19 | print(xpath.query("//div[@class='head']/text()").get());
20 | print(xpath.query("//div[@class^='he']/text()").get());
21 | print(xpath.query("//div[@class\$='nd']/text()").get());
22 | print(xpath.query("//div[@class*='ea']/text()").get());
23 | print(xpath.query("//table//td[1]/text()").get());
24 | print(xpath.query("//table//td[last()]/text()").get());
25 | print(xpath.query("//table//td[position()<3]/text()").list());
26 | print(xpath.query("//table//td[position()>2]/text()").list());
27 | });
28 | }
29 |
--------------------------------------------------------------------------------