├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── LICENSE.rapidxml
├── README.md
├── c_src
├── exml.cpp
├── rapidxml.hpp
├── rapidxml_iterators.hpp
├── rapidxml_print.hpp
└── rapidxml_utils.hpp
├── include
├── exml.hrl
└── exml_stream.hrl
├── rebar.config
├── rebar.lock
├── src
├── exml.app.src
├── exml.erl
├── exml_nif.erl
├── exml_query.erl
└── exml_stream.erl
├── test
├── exml_properties_tests.erl
├── exml_query_tests.erl
├── exml_stream_tests.erl
└── exml_tests.erl
└── tools
├── benchmarks.erl
└── get_included_files_h.erl
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: ci
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 | workflow_dispatch:
9 |
10 | jobs:
11 | test:
12 | name: OTP ${{matrix.otp_vsn}}
13 | strategy:
14 | matrix:
15 | otp_vsn: ['28', '27', '26']
16 | rebar_vsn: ['3.25.0']
17 | runs-on: 'ubuntu-24.04'
18 | env:
19 | OTPVER: ${{ matrix.otp }}
20 | steps:
21 | - uses: actions/checkout@v4
22 | - uses: erlef/setup-beam@v1
23 | with:
24 | otp-version: ${{ matrix.otp_vsn }}
25 | rebar3-version: ${{ matrix.rebar_vsn }}
26 | - uses: actions/cache@v4
27 | name: Cache
28 | with:
29 | path: _build
30 | key: ${{ runner.os }}-build-${{ matrix.otp_vsn }}-${{ hashFiles(format('rebar.lock')) }}-1
31 | restore-keys: ${{ runner.os }}-build-${{ matrix.otp_vsn }}-1-
32 | - run: rebar3 as test compile -d
33 | - run: rebar3 eunit
34 | - run: rebar3 dialyzer
35 | - run: rebar3 as test codecov analyze
36 | - run: gcov -o c_src exml
37 | - uses: codecov/codecov-action@v5
38 | with:
39 | name: Upload coverage reports to Codecov
40 | token: ${{ secrets.CODECOV_TOKEN }}
41 | fail_ci_if_error: true
42 | verbose: true
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | /.eunit/
3 | /.rebar/
4 | /c_src/*o
5 | /c_src/*d
6 | /c_src/*gcda
7 | /c_src/*gcno
8 | /deps/
9 | /ebin/
10 | /logs/
11 | /priv/*so
12 | *.beam
13 | /rebar3
14 | doc/
15 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # [3.0.2] 2018-08-27
2 |
3 | ## Fixed
4 |
5 | * "Infinite stream" parser crashed with segfault for certain payloads. #39 #40
6 |
7 | # [3.0.1] 2018-05-22
8 |
9 | ## Fixed
10 |
11 | * Workaround to `enif_inspect_binary` returning corrupted data #36
12 | * Allow to encode other stream elements like `streamstart` and `streamend` #34
13 |
14 | ## Added
15 |
16 | * New API to query elements with specific attribute - #31
17 | This includes path queries as well.
18 |
19 | # [3.0.0] 2018-05-04
20 |
21 | ## Changed
22 |
23 | * Replaced expat with RapidXML
24 |
25 | # [2.5.0] 2018-05-04
26 |
27 | ## Added
28 |
29 | * A new API to query elements with specific namespaces. Path querying is extended with new selectors as well.
30 | * Child element size (in bytes) limit may be configured.
31 | * Stream opening tag is now configurable.
32 |
33 | ## Changed
34 |
35 | * Testable with `rebar3`; cover is now enabled.
36 |
37 | ## Misc
38 |
39 | * C sources reformatting.
40 |
41 | # [2.4.1] 2016-12-17
42 |
43 | ## Changed
44 |
45 | - C code building on OS X and FreeBSD
46 |
47 | # [2.4.0] 2016-10-07
48 |
49 | ## Added
50 |
51 | - support for Erlang/OTP 17 to 19
52 |
53 | ## Changed
54 |
55 | - improved integration with `dialyzer`
56 | - improved integration with `rebar3`
57 |
58 | # [2.3.0] 2016-05-19
59 |
60 | ## Removed
61 |
62 | - support for single `#xmlel` as a child
63 |
64 | # [2.2.0] 2015-10-09
65 |
66 | ## Added
67 |
68 | - escaping attr value: #10
69 | - escaping data tags: #14
70 |
71 | ## Changed
72 |
73 | - improved type specs: #14
74 | - improved binary allocation: #14
75 | - improved stanza size calculation: #14
76 |
77 | # [2.1.5] 2014-09-29
78 |
79 | ## Fixed
80 |
81 | - memory leak in `exml_event`: #8 by @RGafiyatullin
82 |
83 | # [2.1.4] 2014-05-12
84 |
85 | ## Added
86 |
87 | - support for XMPP over WebSockets as in [RFC 7395](https://tools.ietf.org/html/rfc7395)
88 |
89 | # 2.1.0
90 |
91 | - Pretty printing XML elements
92 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 |
--------------------------------------------------------------------------------
/LICENSE.rapidxml:
--------------------------------------------------------------------------------
1 | Use of this software is granted under one of the following two licenses,
2 | to be chosen freely by the user.
3 |
4 | 1. Boost Software License - Version 1.0 - August 17th, 2003
5 | ===============================================================================
6 |
7 | Copyright (c) 2006, 2007 Marcin Kalicinski
8 |
9 | Permission is hereby granted, free of charge, to any person or organization
10 | obtaining a copy of the software and accompanying documentation covered by
11 | this license (the "Software") to use, reproduce, display, distribute,
12 | execute, and transmit the Software, and to prepare derivative works of the
13 | Software, and to permit third-parties to whom the Software is furnished to
14 | do so, all subject to the following:
15 |
16 | The copyright notices in the Software and this entire statement, including
17 | the above license grant, this restriction and the following disclaimer,
18 | must be included in all copies of the Software, in whole or in part, and
19 | all derivative works of the Software, unless such copies or derivative
20 | works are solely in the form of machine-executable object code generated by
21 | a source language processor.
22 |
23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
26 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
27 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
28 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 | DEALINGS IN THE SOFTWARE.
30 |
31 | 2. The MIT License
32 | ===============================================================================
33 |
34 | Copyright (c) 2006, 2007 Marcin Kalicinski
35 |
36 | Permission is hereby granted, free of charge, to any person obtaining a copy
37 | of this software and associated documentation files (the "Software"), to deal
38 | in the Software without restriction, including without limitation the rights
39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
40 | of the Software, and to permit persons to whom the Software is furnished to do so,
41 | subject to the following conditions:
42 |
43 | The above copyright notice and this permission notice shall be included in all
44 | copies or substantial portions of the Software.
45 |
46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
49 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
52 | IN THE SOFTWARE.
53 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | exml
2 | ====
3 |
4 | [](https://github.com/esl/exml/actions/workflows/ci.yml)
5 | [](https://codecov.io/gh/esl/exml)
6 | [](https://hex.pm/packages/hexml)
7 | [](https://hexdocs.pm/hexml/)
8 | [](https://hex.pm/packages/hexml)
9 | [](https://github.com/esl/hexml/blob/master/LICENSE)
10 |
11 | **exml** is an Erlang library for parsing XML streams and doing complex XML structures manipulation.
12 |
13 | Building
14 | ========
15 |
16 | **exml** is a rebar3-compatible OTP application, run `make` or `./rebar3 compile` in order to build it. A C++11 compiler is required.
17 |
18 |
19 | Using
20 | =====
21 |
22 | **exml** can parse both XML streams as well as single XML documents at once.
23 |
24 | To parse a whole XML document:
25 |
26 | ```erlang
27 | {ok, Parser} = exml:parse(<<"">>).
28 | ```
29 |
30 | To generate an XML document from Erlang terms:
31 |
32 | ```erlang
33 | El = #xmlel{name = <<"foo">>,
34 | attrs = [{<<"attr1">>, <<"bar">>}],
35 | children = [{xmlcdata, <<"Some Value">>}]},
36 | exml:to_list(El).
37 | ```
38 | or (pastable into `erl` shell):
39 | ```erlang
40 | El = {xmlel, <<"foo">>,
41 | [{<<"attr1">>, <<"bar">>}],
42 | [{xmlcdata, <<"Some Value">>}]}.
43 | exml:to_list(El).
44 | ```
45 |
46 | Which results in:
47 | ```xml
48 | Some Value
49 | ```
50 |
51 | `exml:to_binary/1` works similarly.
52 |
53 | There's also `exml:to_pretty_iolist/1,3` for a quick'n'dirty document preview (pastable into `erl`):
54 |
55 | ```erlang
56 | rr("include/exml.hrl").
57 | El = #xmlel{name = <<"outer">>,
58 | attrs = [{<<"attr1">>, <<"val1">>},
59 | {<<"attr2">>, <<"val-two">>}],
60 | children = [#xmlel{name = <<"inner-childless">>},
61 | #xmlel{name = <<"inner-w-children">>,
62 | children = [#xmlel{name = <<"a">>}]}]}.
63 | io:format("~s", [exml:to_pretty_iolist(El)]).
64 | ```
65 | which prints:
66 | ```xml
67 |
68 |
69 |
70 |
71 |
72 |
73 | ```
74 |
75 | For an example of using the streaming API see `test/exml_stream_tests.erl`.
76 |
77 | XML Tree navigation
78 | =====
79 |
80 | The `exml_query` module exposes powerful helper functions to navigate the tree, please refer to the documentation available.
81 |
82 |
83 | Notes
84 | =====
85 |
86 | The implementation uses C++ thread-local memory pools of size 10MB by default (override `RAPIDXML_STATIC_POOL_SIZE` and/or `RAPIDXML_DYNAMIC_POOL_SIZE` at compilation time if desired differently), to maximise cache locality and memory allocation patterns. To also improve performance, the NIF calls are not checking input size, nor timeslicing themselves, nor running in dirty schedulers: that means that if called with too big inputs, the NIFs can starve the VM. It's up to the dev to throttle the input sizes and fine-tune the memory pool sizes.
87 |
--------------------------------------------------------------------------------
/c_src/exml.cpp:
--------------------------------------------------------------------------------
1 | #ifndef RAPIDXML_STATIC_POOL_SIZE
2 | #define RAPIDXML_STATIC_POOL_SIZE (10 * 1024 * 1024)
3 | #endif
4 | #ifndef RAPIDXML_DYNAMIC_POOL_SIZE
5 | #define RAPIDXML_DYNAMIC_POOL_SIZE (2 * 1024 * 1024)
6 | #endif
7 |
8 | #include "rapidxml.hpp"
9 | #include "rapidxml_print.hpp"
10 | #include
11 |
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | using ustring = std::vector;
21 |
22 | class xml_document {
23 | public:
24 | struct ParseResult {
25 | bool eof = false;
26 | bool has_error = false;
27 | std::string error_message;
28 | const unsigned char *rest = nullptr;
29 | };
30 |
31 | template
32 | ParseResult parse(unsigned char *text, xml_document &parent) {
33 | return with_error_handling(
34 | [&] { return impl.parse(text, parent.impl); });
35 | }
36 |
37 | template ParseResult parse(unsigned char *text) {
38 | return with_error_handling([&] { return impl.parse(text); });
39 | }
40 |
41 | void clear() { impl.clear(); }
42 |
43 | rapidxml::xml_document impl;
44 |
45 | private:
46 | template ParseResult with_error_handling(F &&f) {
47 | ParseResult result;
48 | try {
49 | result.rest = std::forward(f)();
50 | } catch (const rapidxml::eof_error &e) {
51 | result.eof = true;
52 | result.has_error = true;
53 | result.error_message = e.what();
54 | } catch (const rapidxml::parse_error &e) {
55 | result.has_error = true;
56 | result.error_message = e.what();
57 | }
58 | return result;
59 | }
60 | };
61 |
62 | namespace {
63 | ERL_NIF_TERM atom_ok;
64 | ERL_NIF_TERM atom_error;
65 | ERL_NIF_TERM atom_undefined;
66 | ERL_NIF_TERM atom_xmlel;
67 | ERL_NIF_TERM atom_xmlcdata;
68 | ERL_NIF_TERM atom_xmlstreamstart;
69 | ERL_NIF_TERM atom_xmlstreamend;
70 | ERL_NIF_TERM atom_pretty;
71 | ERL_NIF_TERM atom_escaped;
72 | ERL_NIF_TERM atom_cdata;
73 | ERL_NIF_TERM atom_true;
74 | constexpr const unsigned char EMPTY[1] = {0};
75 |
76 | xml_document &get_static_doc() {
77 | static thread_local xml_document doc;
78 | doc.clear();
79 | return doc;
80 | }
81 |
82 | } // namespace
83 |
84 | struct Parser {
85 | ustring stream_tag;
86 | std::uint64_t max_element_size = 0;
87 | bool infinite_stream = false;
88 |
89 | static thread_local std::vector buffer;
90 | static thread_local std::vector term_buffer;
91 |
92 | bool copy_buffer(ErlNifEnv *env, ERL_NIF_TERM buf) {
93 | buffer.clear();
94 |
95 | ErlNifBinary bin;
96 | if (enif_inspect_binary(env, buf, &bin)) {
97 | buffer.insert(buffer.end(), bin.data, bin.data + bin.size);
98 | } else if (enif_is_list(env, buf)) {
99 | for (ERL_NIF_TERM head; enif_get_list_cell(env, buf, &head, &buf);) {
100 | if (!enif_inspect_binary(env, head, &bin))
101 | return false;
102 |
103 | buffer.insert(buffer.end(), bin.data, bin.data + bin.size);
104 | }
105 | } else {
106 | return false;
107 | }
108 |
109 | buffer.push_back('\0');
110 | return true;
111 | }
112 |
113 | void reset() {
114 | stream_tag.clear();
115 | buffer.clear();
116 | }
117 | };
118 |
119 | thread_local std::vector Parser::buffer;
120 | thread_local std::vector Parser::term_buffer;
121 |
122 | struct ParseCtx {
123 | ErlNifEnv *env;
124 | Parser *parser;
125 | };
126 |
127 | namespace {
128 | ErlNifResourceType *parser_type;
129 |
130 | constexpr int default_parse_flags() {
131 | return rapidxml::parse_no_string_terminators | rapidxml::parse_validate_control_chars;
132 | }
133 |
134 | constexpr int parse_one() {
135 | return rapidxml::parse_parse_one | default_parse_flags();
136 | }
137 |
138 | constexpr int parse_open_only() {
139 | return rapidxml::parse_open_only | default_parse_flags();
140 | }
141 |
142 | ERL_NIF_TERM to_subbinary(ParseCtx &ctx, const unsigned char *text,
143 | std::size_t len) {
144 | ERL_NIF_TERM binary;
145 | unsigned char *bin_data = enif_make_new_binary(ctx.env, len, &binary);
146 | std::copy(text, text + len, bin_data);
147 | return binary;
148 | }
149 |
150 | ERL_NIF_TERM get_xmlcdata(ParseCtx &ctx,
151 | rapidxml::xml_node *node) {
152 | return enif_make_tuple3(ctx.env, atom_xmlcdata,
153 | to_subbinary(ctx, node->value(), node->value_size()),
154 | atom_escaped);
155 | }
156 |
157 | ERL_NIF_TERM merge_data_nodes(ParseCtx &ctx,
158 | rapidxml::xml_node *node,
159 | std::size_t total_size) {
160 | ERL_NIF_TERM bin;
161 | unsigned char *it = enif_make_new_binary(ctx.env, total_size, &bin);
162 |
163 | while (total_size > 0) {
164 | it = std::copy(node->value(), node->value() + node->value_size(), it);
165 | total_size -= node->value_size();
166 | node = node->next_sibling();
167 | }
168 |
169 | return enif_make_tuple3(ctx.env, atom_xmlcdata, bin, atom_escaped);
170 | }
171 |
172 | void append_pending_data_nodes(ParseCtx &ctx,
173 | std::vector &children,
174 | rapidxml::xml_node *node,
175 | const std::size_t pending) {
176 | if (pending == 0)
177 | return;
178 |
179 | if (pending == node->value_size())
180 | children.push_back(get_xmlcdata(ctx, node));
181 | else
182 | children.push_back(merge_data_nodes(ctx, node, pending));
183 | }
184 |
185 | ERL_NIF_TERM make_xmlel(ParseCtx &ctx, rapidxml::xml_node *node);
186 |
187 | ERL_NIF_TERM get_children_tuple(ParseCtx &ctx,
188 | rapidxml::xml_node *node) {
189 | std::vector &children = Parser::term_buffer;
190 | std::size_t begin = children.size();
191 |
192 | rapidxml::xml_node *first_data_node = nullptr;
193 | std::size_t pending_data_size = 0;
194 |
195 | for (rapidxml::xml_node *child = node->first_node(); child;
196 | child = child->next_sibling()) {
197 | const bool is_data_node = child->type() == rapidxml::node_data ||
198 | child->type() == rapidxml::node_cdata;
199 |
200 | if (is_data_node) {
201 | if (pending_data_size == 0)
202 | first_data_node = child;
203 | pending_data_size += child->value_size();
204 | } else {
205 | append_pending_data_nodes(ctx, children, first_data_node,
206 | pending_data_size);
207 | pending_data_size = 0;
208 | if (child->type() == rapidxml::node_element)
209 | children.push_back(make_xmlel(ctx, child));
210 | }
211 | }
212 |
213 | append_pending_data_nodes(ctx, children, first_data_node, pending_data_size);
214 |
215 | std::size_t size = children.size() - begin;
216 | if (size == 0)
217 | return enif_make_list(ctx.env, 0);
218 |
219 | ERL_NIF_TERM arr =
220 | enif_make_list_from_array(ctx.env, children.data() + begin, size);
221 | children.erase(children.end() - size, children.end());
222 | return arr;
223 | }
224 |
225 | std::pair
226 | node_name(rapidxml::xml_node *node) {
227 | const unsigned char *start = node->name();
228 | std::size_t len = node->name_size();
229 | if (node->prefix()) {
230 | start = node->prefix();
231 | len += node->prefix_size() + 1;
232 | }
233 | return {start, len};
234 | }
235 |
236 | ERL_NIF_TERM make_node_name_binary(ParseCtx &ctx,
237 | rapidxml::xml_node *node) {
238 | const unsigned char *start;
239 | std::size_t len;
240 | std::tie(start, len) = node_name(node);
241 | return to_subbinary(ctx, start, len);
242 | }
243 |
244 | ERL_NIF_TERM make_attr_tuple(ParseCtx &ctx,
245 | rapidxml::xml_attribute *attr) {
246 | ERL_NIF_TERM name = to_subbinary(ctx, attr->name(), attr->name_size());
247 | ERL_NIF_TERM value = to_subbinary(ctx, attr->value(), attr->value_size());
248 | return enif_make_tuple2(ctx.env, name, value);
249 | }
250 |
251 | ERL_NIF_TERM get_attributes(ParseCtx &ctx, rapidxml::xml_node *node) {
252 | ERL_NIF_TERM attrs_term = enif_make_new_map(ctx.env);
253 |
254 | for (rapidxml::xml_attribute *attr = node->first_attribute();
255 | attr; attr = attr->next_attribute()) {
256 | ERL_NIF_TERM key = to_subbinary(ctx, attr->name(), attr->name_size());
257 | ERL_NIF_TERM value = to_subbinary(ctx, attr->value(), attr->value_size());
258 | enif_make_map_put(ctx.env, attrs_term, key, value, &attrs_term);
259 | }
260 |
261 | return attrs_term;
262 | }
263 |
264 | ERL_NIF_TERM make_stream_start_tuple(ParseCtx &ctx,
265 | rapidxml::xml_node *node) {
266 |
267 | ERL_NIF_TERM name_term = make_node_name_binary(ctx, node);
268 | ERL_NIF_TERM attrs_term = get_attributes(ctx, node);
269 | return enif_make_tuple3(ctx.env, atom_xmlstreamstart, name_term, attrs_term);
270 | }
271 |
272 | ERL_NIF_TERM make_stream_end_tuple(ParseCtx &ctx) {
273 | ERL_NIF_TERM name;
274 | unsigned char *data =
275 | enif_make_new_binary(ctx.env, ctx.parser->stream_tag.size(), &name);
276 |
277 | std::copy(ctx.parser->stream_tag.begin(), ctx.parser->stream_tag.end(), data);
278 |
279 | return enif_make_tuple2(ctx.env, atom_xmlstreamend, name);
280 | }
281 |
282 | ERL_NIF_TERM make_xmlel(ParseCtx &ctx,
283 | rapidxml::xml_node *node) {
284 | ERL_NIF_TERM name_term = make_node_name_binary(ctx, node);
285 | ERL_NIF_TERM attrs_term = get_attributes(ctx, node);
286 | ERL_NIF_TERM children_term = get_children_tuple(ctx, node);
287 | return enif_make_tuple4(ctx.env, atom_xmlel, name_term, attrs_term, children_term);
288 | }
289 |
290 | bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children,
291 | rapidxml::xml_node &node);
292 |
293 | bool build_cdata(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[],
294 | rapidxml::xml_node &node) {
295 | ErlNifBinary bin;
296 | if (!enif_inspect_iolist_as_binary(env, elem[1], &bin))
297 | return false;
298 |
299 | rapidxml::node_type cdata_type;
300 | if (enif_compare(atom_escaped, elem[2]) == 0)
301 | cdata_type = rapidxml::node_data;
302 | else if (enif_compare(atom_cdata, elem[2]) == 0)
303 | cdata_type = rapidxml::node_cdata;
304 | else
305 | return false;
306 |
307 | auto child = doc.impl.allocate_node(cdata_type);
308 | child->value(bin.size > 0 ? bin.data : EMPTY, bin.size);
309 | node.append_node(child);
310 | return true;
311 | }
312 |
313 | bool build_attrs(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM attrs,
314 | rapidxml::xml_node &node) {
315 |
316 | if (!enif_is_map(env, attrs))
317 | return false;
318 |
319 | ErlNifMapIterator iter;
320 | enif_map_iterator_create(env, attrs, &iter, ERL_NIF_MAP_ITERATOR_FIRST);
321 |
322 | ERL_NIF_TERM map_key, map_value;
323 | while (enif_map_iterator_get_pair(env, &iter, &map_key, &map_value)) {
324 | ErlNifBinary key, value;
325 | if (!enif_inspect_iolist_as_binary(env, map_key, &key))
326 | return false;
327 |
328 | if (!enif_inspect_iolist_as_binary(env, map_value, &value))
329 | return false;
330 |
331 | auto attr = doc.impl.allocate_attribute(key.size > 0 ? key.data : EMPTY,
332 | value.size > 0 ? value.data : EMPTY,
333 | key.size, value.size);
334 | node.append_attribute(attr);
335 | enif_map_iterator_next(env, &iter);
336 | }
337 | enif_map_iterator_destroy(env, &iter);
338 |
339 | return true;
340 | }
341 |
342 | bool build_el(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[],
343 | rapidxml::xml_node &node) {
344 | ErlNifBinary name;
345 | if (!enif_inspect_binary(env, elem[1], &name))
346 | return false;
347 |
348 | auto child = doc.impl.allocate_node(rapidxml::node_element);
349 | child->name(name.size > 0 ? name.data : EMPTY, name.size);
350 | node.append_node(child);
351 |
352 | if (!build_attrs(env, doc, elem[2], *child))
353 | return false;
354 | if (!build_children(env, doc, elem[3], *child))
355 | return false;
356 |
357 | return true;
358 | }
359 |
360 | bool build_child(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM child,
361 | rapidxml::xml_node &node) {
362 | int arity;
363 | const ERL_NIF_TERM *tuple;
364 | if (!enif_get_tuple(env, child, &arity, &tuple))
365 | return false;
366 |
367 | if (arity == 3 && enif_compare(atom_xmlcdata, tuple[0]) == 0) {
368 | if (!build_cdata(env, doc, tuple, node))
369 | return false;
370 | } else if (arity == 4 && enif_compare(atom_xmlel, tuple[0]) == 0) {
371 | if (!build_el(env, doc, tuple, node))
372 | return false;
373 | } else {
374 | return false;
375 | }
376 |
377 | return true;
378 | }
379 |
380 | bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children,
381 | rapidxml::xml_node &node) {
382 |
383 | if (!enif_is_list(env, children))
384 | return false;
385 |
386 | for (ERL_NIF_TERM head;
387 | enif_get_list_cell(env, children, &head, &children);) {
388 | if (!build_child(env, doc, head, node))
389 | return false;
390 | }
391 |
392 | return true;
393 | }
394 |
395 | ERL_NIF_TERM node_to_binary(ErlNifEnv *env,
396 | rapidxml::xml_node &node,
397 | int flags) {
398 | static thread_local std::vector print_buffer;
399 | print_buffer.clear();
400 |
401 | rapidxml::print(std::back_inserter(print_buffer), node, flags);
402 |
403 | ERL_NIF_TERM ret_binary;
404 | unsigned char *data =
405 | enif_make_new_binary(env, print_buffer.size(), &ret_binary);
406 | std::copy(print_buffer.begin(), print_buffer.end(), data);
407 | return ret_binary;
408 | }
409 |
410 | std::size_t stream_closing_tag_size(Parser *parser) {
411 | return 3 + parser->stream_tag.size(); // name + >
412 | }
413 |
414 | bool has_stream_closing_tag(Parser *parser, std::size_t offset) {
415 | if (Parser::buffer.size() < offset + stream_closing_tag_size(parser))
416 | return false;
417 |
418 | if (Parser::buffer[offset] != '<' || Parser::buffer[offset + 1] != '/')
419 | return false;
420 |
421 | if (!std::equal(parser->stream_tag.begin(), parser->stream_tag.end(),
422 | Parser::buffer.begin() + offset + 2))
423 | return false;
424 |
425 | // skip whitespace between tag name and closing '>'
426 | offset = offset + 2 + parser->stream_tag.size();
427 | while (offset < Parser::buffer.size() - 1 &&
428 | std::isspace(Parser::buffer[offset]))
429 | ++offset;
430 |
431 | return Parser::buffer[offset] == '>';
432 | }
433 |
434 | } // namespace
435 |
436 | extern "C" {
437 | static void delete_parser(ErlNifEnv *, void *parser) {
438 | static_cast(parser)->~Parser();
439 | }
440 |
441 | static int load(ErlNifEnv *env, void **, ERL_NIF_TERM) {
442 | parser_type = enif_open_resource_type(
443 | env, "exml_nif", "parser", &delete_parser, ERL_NIF_RT_CREATE, nullptr);
444 | atom_ok = enif_make_atom(env, "ok");
445 | atom_error = enif_make_atom(env, "error");
446 | atom_undefined = enif_make_atom(env, "undefined");
447 | atom_xmlel = enif_make_atom(env, "xmlel");
448 | atom_xmlcdata = enif_make_atom(env, "xmlcdata");
449 | atom_xmlstreamstart = enif_make_atom(env, "xmlstreamstart");
450 | atom_xmlstreamend = enif_make_atom(env, "xmlstreamend");
451 | atom_pretty = enif_make_atom(env, "pretty");
452 | atom_escaped = enif_make_atom(env, "escaped");
453 | atom_cdata = enif_make_atom(env, "cdata");
454 | atom_true = enif_make_atom(env, "true");
455 |
456 | get_static_doc().impl.set_allocator(enif_alloc, enif_free);
457 |
458 | return 0;
459 | }
460 |
461 | static void unload(ErlNifEnv *, void *) {
462 | return;
463 | }
464 |
465 | static ERL_NIF_TERM create(ErlNifEnv *env, int,
466 | const ERL_NIF_TERM argv[]) {
467 | void *mem = enif_alloc_resource(parser_type, sizeof(Parser));
468 | Parser *parser = new (mem) Parser;
469 |
470 | ErlNifUInt64 max_element_size;
471 | if (!enif_get_uint64(env, argv[0], &max_element_size))
472 | return enif_make_badarg(env);
473 | parser->max_element_size = static_cast(max_element_size);
474 | if (enif_compare(atom_true, argv[1]) == 0)
475 | parser->infinite_stream = true;
476 |
477 | ERL_NIF_TERM term = enif_make_resource(env, parser);
478 | enif_release_resource(parser);
479 | return enif_make_tuple2(env, atom_ok, term);
480 | }
481 |
482 | static ERL_NIF_TERM parse_next(ErlNifEnv *env, int,
483 | const ERL_NIF_TERM argv[]) {
484 | Parser *parser;
485 | if (!enif_get_resource(env, argv[0], parser_type,
486 | reinterpret_cast(&parser)))
487 | return enif_make_badarg(env);
488 |
489 | if (!parser->copy_buffer(env, argv[1]))
490 | return enif_make_badarg(env);
491 |
492 | // Skip initial whitespace even if we don't manage to parse anything.
493 | // Also needed for has_stream_closing_tag to recognize the tag.
494 | std::size_t offset = 0;
495 | while (offset < Parser::buffer.size() - 1 &&
496 | std::isspace(Parser::buffer[offset]))
497 | ++offset;
498 |
499 | ParseCtx ctx{env, parser};
500 | xml_document::ParseResult result;
501 | ERL_NIF_TERM element;
502 | const char *error_msg = nullptr;
503 |
504 | xml_document &doc = get_static_doc();
505 | Parser::term_buffer.clear();
506 |
507 | auto parseStreamOpen = [&] {
508 | result = doc.parse(Parser::buffer.data() + offset);
509 | if (!result.has_error) {
510 | if (parser->max_element_size &&
511 | result.rest - Parser::buffer.data() - offset > parser->max_element_size) {
512 | error_msg = "element too big";
513 | } else {
514 | auto name_tag = node_name(doc.impl.first_node());
515 | parser->stream_tag = ustring(std::get<0>(name_tag), std::get<0>(name_tag) + std::get<1>(name_tag));
516 | element = make_stream_start_tuple(ctx, doc.impl.first_node());
517 | }
518 | }
519 | };
520 |
521 | auto hasStreamReopen = [&] {
522 | auto parseOpenRes =
523 | doc.parse(Parser::buffer.data() + offset);
524 | if (parseOpenRes.has_error)
525 | return false;
526 | auto tag_name = node_name(doc.impl.first_node());
527 | return ustring(std::get<0>(tag_name), std::get<0>(tag_name) + std::get<1>(tag_name)) ==
528 | parser->stream_tag;
529 | };
530 |
531 | auto parseElement = [&] {
532 | result = doc.parse(Parser::buffer.data() + offset);
533 | if (!result.has_error) {
534 | if (parser->max_element_size &&
535 | result.rest - Parser::buffer.data() - offset > parser->max_element_size) {
536 | error_msg = "element too big";
537 | } else {
538 | element = make_xmlel(ctx, doc.impl.first_node());
539 | }
540 | }
541 | };
542 |
543 | if (parser->infinite_stream) {
544 | parseElement();
545 | } else if (parser->stream_tag.empty()) {
546 | parseStreamOpen();
547 | } else if (has_stream_closing_tag(parser, offset)) {
548 | doc.clear();
549 | // no data after closing tag
550 | result.rest = &*Parser::buffer.rbegin();
551 | element = make_stream_end_tuple(ctx);
552 | } else {
553 | parseElement();
554 | }
555 |
556 | if (result.eof && hasStreamReopen()) {
557 | doc.clear();
558 | parseStreamOpen();
559 | }
560 |
561 | if (result.eof) {
562 | // Return an error if an incomplete element has at least max_element_size characters.
563 | if (parser->max_element_size &&
564 | Parser::buffer.size() - offset > parser->max_element_size) {
565 | error_msg = "element too big";
566 | } else {
567 | result.rest = Parser::buffer.data() + offset;
568 | element = atom_undefined;
569 | }
570 | } else if (result.has_error) {
571 | error_msg = result.error_message.c_str();
572 | }
573 |
574 | if (!error_msg) {
575 | // Return an error when null character is found.
576 | std::size_t rest_size = &Parser::buffer.back() - result.rest;
577 | if (std::strlen(reinterpret_cast(result.rest)) != rest_size)
578 | error_msg = "null character found in buffer";
579 | }
580 |
581 | if (error_msg) {
582 | ERL_NIF_TERM error_message =
583 | to_subbinary(ctx, (const unsigned char *)error_msg, strlen(error_msg));
584 |
585 | return enif_make_tuple2(env, atom_error, error_message);
586 | }
587 |
588 | return enif_make_tuple3(
589 | env, atom_ok, element,
590 | enif_make_uint64(env, result.rest - Parser::buffer.data()));
591 | }
592 |
593 | static ERL_NIF_TERM parse(ErlNifEnv *env, int, const ERL_NIF_TERM argv[]) {
594 | Parser parser;
595 | parser.copy_buffer(env, argv[0]);
596 | Parser::term_buffer.clear();
597 |
598 | auto &doc = get_static_doc();
599 |
600 | ParseCtx ctx{env, &parser};
601 | auto result = doc.parse(Parser::buffer.data());
602 |
603 | if (!result.has_error) {
604 | ERL_NIF_TERM element = make_xmlel(ctx, doc.impl.first_node());
605 | return enif_make_tuple2(env, atom_ok, element);
606 | }
607 |
608 | ERL_NIF_TERM error_message =
609 | to_subbinary(ctx,
610 | (const unsigned char *)result.error_message.c_str(),
611 | result.error_message.size());
612 |
613 | return enif_make_tuple2(env, atom_error, error_message);
614 | }
615 |
616 | static ERL_NIF_TERM escape_cdata(ErlNifEnv *env, int,
617 | const ERL_NIF_TERM argv[]) {
618 | ErlNifBinary bin;
619 | if (!enif_inspect_iolist_as_binary(env, argv[0], &bin))
620 | return enif_make_badarg(env);
621 |
622 | rapidxml::node_type cdata_type;
623 | if (enif_compare(atom_escaped, argv[1]) == 0)
624 | cdata_type = rapidxml::node_data;
625 | else if (enif_compare(atom_cdata, argv[1]) == 0)
626 | cdata_type = rapidxml::node_cdata;
627 | else
628 | return enif_make_badarg(env);
629 |
630 | rapidxml::xml_node node(cdata_type);
631 | node.value(bin.data, bin.size);
632 | return node_to_binary(env, node, rapidxml::print_no_indenting);
633 | }
634 |
635 | static ERL_NIF_TERM to_binary(ErlNifEnv *env, int,
636 | const ERL_NIF_TERM argv[]) {
637 | int arity;
638 | const ERL_NIF_TERM *xmlel;
639 | if (!enif_get_tuple(env, argv[0], &arity, &xmlel))
640 | return enif_make_badarg(env);
641 |
642 | if (arity != 4 || enif_compare(atom_xmlel, xmlel[0]) != 0)
643 | return enif_make_badarg(env);
644 |
645 | int flags = rapidxml::print_no_indenting;
646 | if (enif_compare(atom_pretty, argv[1]) == 0)
647 | flags = 0;
648 |
649 | xml_document &doc = get_static_doc();
650 | if (!build_el(env, doc, xmlel, doc.impl))
651 | return enif_make_badarg(env);
652 |
653 | return node_to_binary(env, doc.impl, flags);
654 | }
655 |
656 | static ERL_NIF_TERM reset_parser(ErlNifEnv *env, int,
657 | const ERL_NIF_TERM argv[]) {
658 | Parser *parser;
659 | if (!enif_get_resource(env, argv[0], parser_type,
660 | reinterpret_cast(&parser)))
661 | return enif_make_badarg(env);
662 |
663 | parser->reset();
664 | return atom_ok;
665 | }
666 |
667 | static ErlNifFunc nif_funcs[] = {
668 | {"create", 2, create, 0}, {"parse", 1, parse, 0},
669 | {"parse_next", 2, parse_next, 0}, {"escape_cdata", 2, escape_cdata, 0},
670 | {"to_binary", 2, to_binary, 0}, {"reset_parser", 1, reset_parser, 0}};
671 | }
672 |
673 | ERL_NIF_INIT(exml_nif, nif_funcs, &load, nullptr, nullptr, &unload)
674 |
--------------------------------------------------------------------------------
/c_src/rapidxml_iterators.hpp:
--------------------------------------------------------------------------------
1 | #ifndef RAPIDXML_ITERATORS_HPP_INCLUDED
2 | #define RAPIDXML_ITERATORS_HPP_INCLUDED
3 |
4 | // Copyright (C) 2006, 2009 Marcin Kalicinski
5 | // Version 1.13
6 | // Revision $DateTime: 2009/05/13 01:46:17 $
7 | //! \file rapidxml_iterators.hpp This file contains rapidxml iterators
8 |
9 | #include "rapidxml.hpp"
10 |
11 | namespace rapidxml
12 | {
13 |
14 | //! Iterator of child nodes of xml_node
15 | template
16 | class node_iterator
17 | {
18 |
19 | public:
20 |
21 | typedef typename xml_node value_type;
22 | typedef typename xml_node &reference;
23 | typedef typename xml_node *pointer;
24 | typedef std::ptrdiff_t difference_type;
25 | typedef std::bidirectional_iterator_tag iterator_category;
26 |
27 | node_iterator()
28 | : m_node(0)
29 | {
30 | }
31 |
32 | node_iterator(xml_node *node)
33 | : m_node(node->first_node())
34 | {
35 | }
36 |
37 | reference operator *() const
38 | {
39 | assert(m_node);
40 | return *m_node;
41 | }
42 |
43 | pointer operator->() const
44 | {
45 | assert(m_node);
46 | return m_node;
47 | }
48 |
49 | node_iterator& operator++()
50 | {
51 | assert(m_node);
52 | m_node = m_node->next_sibling();
53 | return *this;
54 | }
55 |
56 | node_iterator operator++(int)
57 | {
58 | node_iterator tmp = *this;
59 | ++this;
60 | return tmp;
61 | }
62 |
63 | node_iterator& operator--()
64 | {
65 | assert(m_node && m_node->previous_sibling());
66 | m_node = m_node->previous_sibling();
67 | return *this;
68 | }
69 |
70 | node_iterator operator--(int)
71 | {
72 | node_iterator tmp = *this;
73 | ++this;
74 | return tmp;
75 | }
76 |
77 | bool operator ==(const node_iterator &rhs)
78 | {
79 | return m_node == rhs.m_node;
80 | }
81 |
82 | bool operator !=(const node_iterator &rhs)
83 | {
84 | return m_node != rhs.m_node;
85 | }
86 |
87 | private:
88 |
89 | xml_node *m_node;
90 |
91 | };
92 |
93 | //! Iterator of child attributes of xml_node
94 | template
95 | class attribute_iterator
96 | {
97 |
98 | public:
99 |
100 | typedef typename xml_attribute value_type;
101 | typedef typename xml_attribute &reference;
102 | typedef typename xml_attribute *pointer;
103 | typedef std::ptrdiff_t difference_type;
104 | typedef std::bidirectional_iterator_tag iterator_category;
105 |
106 | attribute_iterator()
107 | : m_attribute(0)
108 | {
109 | }
110 |
111 | attribute_iterator(xml_node *node)
112 | : m_attribute(node->first_attribute())
113 | {
114 | }
115 |
116 | reference operator *() const
117 | {
118 | assert(m_attribute);
119 | return *m_attribute;
120 | }
121 |
122 | pointer operator->() const
123 | {
124 | assert(m_attribute);
125 | return m_attribute;
126 | }
127 |
128 | attribute_iterator& operator++()
129 | {
130 | assert(m_attribute);
131 | m_attribute = m_attribute->next_attribute();
132 | return *this;
133 | }
134 |
135 | attribute_iterator operator++(int)
136 | {
137 | attribute_iterator tmp = *this;
138 | ++this;
139 | return tmp;
140 | }
141 |
142 | attribute_iterator& operator--()
143 | {
144 | assert(m_attribute && m_attribute->previous_attribute());
145 | m_attribute = m_attribute->previous_attribute();
146 | return *this;
147 | }
148 |
149 | attribute_iterator operator--(int)
150 | {
151 | attribute_iterator tmp = *this;
152 | ++this;
153 | return tmp;
154 | }
155 |
156 | bool operator ==(const attribute_iterator &rhs)
157 | {
158 | return m_attribute == rhs.m_attribute;
159 | }
160 |
161 | bool operator !=(const attribute_iterator &rhs)
162 | {
163 | return m_attribute != rhs.m_attribute;
164 | }
165 |
166 | private:
167 |
168 | xml_attribute *m_attribute;
169 |
170 | };
171 |
172 | }
173 |
174 | #endif
175 |
--------------------------------------------------------------------------------
/c_src/rapidxml_print.hpp:
--------------------------------------------------------------------------------
1 | #ifndef RAPIDXML_PRINT_HPP_INCLUDED
2 | #define RAPIDXML_PRINT_HPP_INCLUDED
3 |
4 | // Copyright (C) 2006, 2009 Marcin Kalicinski
5 | // Version 1.13
6 | // Revision $DateTime: 2009/05/13 01:46:17 $
7 | //! \file rapidxml_print.hpp This file contains rapidxml printer implementation
8 |
9 | #include "rapidxml.hpp"
10 |
11 | // Only include streams if not disabled
12 | #ifndef RAPIDXML_NO_STREAMS
13 | #include
14 | #include
15 | #endif
16 |
17 | namespace rapidxml
18 | {
19 |
20 | ///////////////////////////////////////////////////////////////////////
21 | // Printing flags
22 |
23 | const int print_no_indenting = 0x1; //!< Printer flag instructing the printer to suppress indenting of XML. See print() function.
24 |
25 | ///////////////////////////////////////////////////////////////////////
26 | // Internal
27 |
28 | //! \cond internal
29 | namespace internal
30 | {
31 |
32 | ///////////////////////////////////////////////////////////////////////////
33 | // Internal character operations
34 |
35 | // Copy characters from given range to given output iterator
36 | template
37 | inline OutIt copy_chars(const Ch *begin, const Ch *end, OutIt out)
38 | {
39 | while (begin != end)
40 | *out++ = *begin++;
41 | return out;
42 | }
43 |
44 | // Copy characters from given range to given output iterator and expand
45 | // characters into references (< > ' " &)
46 | template
47 | inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, OutIt out)
48 | {
49 | while (begin != end)
50 | {
51 | if (*begin == noexpand)
52 | {
53 | *out++ = *begin; // No expansion, copy character
54 | }
55 | else
56 | {
57 | switch (*begin)
58 | {
59 | case Ch('<'):
60 | *out++ = Ch('&'); *out++ = Ch('l'); *out++ = Ch('t'); *out++ = Ch(';');
61 | break;
62 | case Ch('>'):
63 | *out++ = Ch('&'); *out++ = Ch('g'); *out++ = Ch('t'); *out++ = Ch(';');
64 | break;
65 | case Ch('\''):
66 | *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('p'); *out++ = Ch('o'); *out++ = Ch('s'); *out++ = Ch(';');
67 | break;
68 | case Ch('"'):
69 | *out++ = Ch('&'); *out++ = Ch('q'); *out++ = Ch('u'); *out++ = Ch('o'); *out++ = Ch('t'); *out++ = Ch(';');
70 | break;
71 | case Ch('&'):
72 | *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('m'); *out++ = Ch('p'); *out++ = Ch(';');
73 | break;
74 | default:
75 | *out++ = *begin; // No expansion, copy character
76 | }
77 | }
78 | ++begin; // Step to next character
79 | }
80 | return out;
81 | }
82 |
83 | // Fill given output iterator with repetitions of the same character
84 | template
85 | inline OutIt fill_chars(OutIt out, int n, Ch ch)
86 | {
87 | for (int i = 0; i < n; ++i)
88 | *out++ = ch;
89 | return out;
90 | }
91 |
92 | // Find character
93 | template
94 | inline bool find_char(const Ch *begin, const Ch *end)
95 | {
96 | while (begin != end)
97 | if (*begin++ == ch)
98 | return true;
99 | return false;
100 | }
101 |
102 | ///////////////////////////////////////////////////////////////////////////
103 | // Internal printing operations
104 |
105 | // Print node
106 | template
107 | inline OutIt print_node(OutIt out, const xml_node *node, int flags, int indent);
108 |
109 | // Print children of the node
110 | template
111 | inline OutIt print_children(OutIt out, const xml_node *node, int flags, int indent)
112 | {
113 | for (xml_node *child = node->first_node(); child; child = child->next_sibling())
114 | out = print_node(out, child, flags, indent);
115 | return out;
116 | }
117 |
118 | // Print attributes of the node
119 | template
120 | inline OutIt print_attributes(OutIt out, const xml_node *node, int)
121 | {
122 | for (xml_attribute *attribute = node->first_attribute(); attribute; attribute = attribute->next_attribute())
123 | {
124 | if (attribute->name() && attribute->value())
125 | {
126 | // Print attribute name
127 | *out = Ch(' '), ++out;
128 | out = copy_chars(attribute->name(), attribute->name() + attribute->name_size(), out);
129 | *out = Ch('='), ++out;
130 | // Print attribute value using appropriate quote type
131 | if (find_char(attribute->value(), attribute->value() + attribute->value_size()))
132 | {
133 | *out = Ch('"'), ++out;
134 | out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('\''), out);
135 | *out = Ch('"'), ++out;
136 | }
137 | else
138 | {
139 | *out = Ch('\''), ++out;
140 | out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('"'), out);
141 | *out = Ch('\''), ++out;
142 | }
143 | }
144 | }
145 | return out;
146 | }
147 |
148 | // Print data node
149 | template
150 | inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, int indent)
151 | {
152 | assert(node->type() == node_data);
153 | if (!(flags & print_no_indenting))
154 | out = fill_chars(out, indent, Ch(' '));
155 | out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out);
156 | return out;
157 | }
158 |
159 | // Print data node
160 | template
161 | inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, int indent)
162 | {
163 | assert(node->type() == node_cdata);
164 | if (!(flags & print_no_indenting))
165 | out = fill_chars(out, indent, Ch(' '));
166 | *out = Ch('<'); ++out;
167 | *out = Ch('!'); ++out;
168 | *out = Ch('['); ++out;
169 | *out = Ch('C'); ++out;
170 | *out = Ch('D'); ++out;
171 | *out = Ch('A'); ++out;
172 | *out = Ch('T'); ++out;
173 | *out = Ch('A'); ++out;
174 | *out = Ch('['); ++out;
175 | out = copy_chars(node->value(), node->value() + node->value_size(), out);
176 | *out = Ch(']'); ++out;
177 | *out = Ch(']'); ++out;
178 | *out = Ch('>'); ++out;
179 | return out;
180 | }
181 |
182 | // Print element node
183 | template
184 | inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, int indent)
185 | {
186 | assert(node->type() == node_element);
187 |
188 | // Print element name and attributes, if any
189 | if (!(flags & print_no_indenting))
190 | out = fill_chars(out, indent, Ch(' '));
191 | *out = Ch('<'), ++out;
192 | out = copy_chars(node->name(), node->name() + node->name_size(), out);
193 | out = print_attributes(out, node, flags);
194 |
195 | // If node is childless
196 | if (node->value_size() == 0 && !node->first_node())
197 | {
198 | // Print childless node tag ending
199 | *out = Ch('/'), ++out;
200 | *out = Ch('>'), ++out;
201 | }
202 | else
203 | {
204 | // Print normal node tag ending
205 | *out = Ch('>'), ++out;
206 |
207 | // Test if node contains a single data node only (and no other nodes)
208 | xml_node *child = node->first_node();
209 | if (!child)
210 | {
211 | // If node has no children, only print its value without indenting
212 | out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out);
213 | }
214 | else if (child->next_sibling() == 0 && child->type() == node_data)
215 | {
216 | // If node has a sole data child, only print its value without indenting
217 | out = copy_and_expand_chars(child->value(), child->value() + child->value_size(), Ch(0), out);
218 | }
219 | else
220 | {
221 | // Print all children with full indenting
222 | if (!(flags & print_no_indenting))
223 | *out = Ch('\n'), ++out;
224 | out = print_children(out, node, flags, indent + 2);
225 | if (!(flags & print_no_indenting))
226 | out = fill_chars(out, indent, Ch(' '));
227 | }
228 |
229 | // Print node end
230 | *out = Ch('<'), ++out;
231 | *out = Ch('/'), ++out;
232 | out = copy_chars(node->name(), node->name() + node->name_size(), out);
233 | *out = Ch('>'), ++out;
234 | }
235 | return out;
236 | }
237 |
238 | // Print declaration node
239 | template
240 | inline OutIt print_declaration_node(OutIt out, const xml_node *node, int flags, int indent)
241 | {
242 | // Print declaration start
243 | if (!(flags & print_no_indenting))
244 | out = fill_chars(out, indent, Ch(' '));
245 | *out = Ch('<'), ++out;
246 | *out = Ch('?'), ++out;
247 | *out = Ch('x'), ++out;
248 | *out = Ch('m'), ++out;
249 | *out = Ch('l'), ++out;
250 |
251 | // Print attributes
252 | out = print_attributes(out, node, flags);
253 |
254 | // Print declaration end
255 | *out = Ch('?'), ++out;
256 | *out = Ch('>'), ++out;
257 |
258 | return out;
259 | }
260 |
261 | // Print comment node
262 | template
263 | inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, int indent)
264 | {
265 | assert(node->type() == node_comment);
266 | if (!(flags & print_no_indenting))
267 | out = fill_chars(out, indent, Ch(' '));
268 | *out = Ch('<'), ++out;
269 | *out = Ch('!'), ++out;
270 | *out = Ch('-'), ++out;
271 | *out = Ch('-'), ++out;
272 | out = copy_chars(node->value(), node->value() + node->value_size(), out);
273 | *out = Ch('-'), ++out;
274 | *out = Ch('-'), ++out;
275 | *out = Ch('>'), ++out;
276 | return out;
277 | }
278 |
279 | // Print doctype node
280 | template
281 | inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, int indent)
282 | {
283 | assert(node->type() == node_doctype);
284 | if (!(flags & print_no_indenting))
285 | out = fill_chars(out, indent, Ch(' '));
286 | *out = Ch('<'), ++out;
287 | *out = Ch('!'), ++out;
288 | *out = Ch('D'), ++out;
289 | *out = Ch('O'), ++out;
290 | *out = Ch('C'), ++out;
291 | *out = Ch('T'), ++out;
292 | *out = Ch('Y'), ++out;
293 | *out = Ch('P'), ++out;
294 | *out = Ch('E'), ++out;
295 | *out = Ch(' '), ++out;
296 | out = copy_chars(node->value(), node->value() + node->value_size(), out);
297 | *out = Ch('>'), ++out;
298 | return out;
299 | }
300 |
301 | // Print pi node
302 | template
303 | inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, int indent)
304 | {
305 | assert(node->type() == node_pi);
306 | if (!(flags & print_no_indenting))
307 | out = fill_chars(out, indent, Ch(' '));
308 | *out = Ch('<'), ++out;
309 | *out = Ch('?'), ++out;
310 | out = copy_chars(node->name(), node->name() + node->name_size(), out);
311 | *out = Ch(' '), ++out;
312 | out = copy_chars(node->value(), node->value() + node->value_size(), out);
313 | *out = Ch('?'), ++out;
314 | *out = Ch('>'), ++out;
315 | return out;
316 | }
317 |
318 | // Print literal node
319 | template
320 | inline OutIt print_literal_node(OutIt out, const xml_node *node, int flags, int indent)
321 | {
322 | assert(node->type() == node_literal);
323 | if (!(flags & print_no_indenting))
324 | out = fill_chars(out, indent, Ch(' '));
325 | out = copy_chars(node->value(), node->value() + node->value_size(), out);
326 | return out;
327 | }
328 |
329 | // Print node
330 | // Print node
331 | template
332 | inline OutIt print_node(OutIt out, const xml_node *node, int flags, int indent)
333 | {
334 | // Print proper node type
335 | switch (node->type())
336 | {
337 |
338 | // Document
339 | case node_document:
340 | out = print_children(out, node, flags, indent);
341 | break;
342 |
343 | // Element
344 | case node_element:
345 | out = print_element_node(out, node, flags, indent);
346 | break;
347 |
348 | // Data
349 | case node_data:
350 | out = print_data_node(out, node, flags, indent);
351 | break;
352 |
353 | // CDATA
354 | case node_cdata:
355 | out = print_cdata_node(out, node, flags, indent);
356 | break;
357 |
358 | // Declaration
359 | case node_declaration:
360 | out = print_declaration_node(out, node, flags, indent);
361 | break;
362 |
363 | // Comment
364 | case node_comment:
365 | out = print_comment_node(out, node, flags, indent);
366 | break;
367 |
368 | // Doctype
369 | case node_doctype:
370 | out = print_doctype_node(out, node, flags, indent);
371 | break;
372 |
373 | // Pi
374 | case node_pi:
375 | out = print_pi_node(out, node, flags, indent);
376 | break;
377 |
378 | case node_literal:
379 | out = print_literal_node(out, node, flags, indent);
380 | break;
381 |
382 | // Unknown
383 | default:
384 | assert(0);
385 | break;
386 | }
387 |
388 | // If indenting not disabled, add line break after node
389 | if (!(flags & print_no_indenting))
390 | *out = Ch('\n'), ++out;
391 |
392 | // Return modified iterator
393 | return out;
394 | }
395 |
396 | }
397 | //! \endcond
398 |
399 | ///////////////////////////////////////////////////////////////////////////
400 | // Printing
401 |
402 | //! Prints XML to given output iterator.
403 | //! \param out Output iterator to print to.
404 | //! \param node Node to be printed. Pass xml_document to print entire document.
405 | //! \param flags Flags controlling how XML is printed.
406 | //! \return Output iterator pointing to position immediately after last character of printed text.
407 | template
408 | inline OutIt print(OutIt out, const xml_node &node, int flags = 0)
409 | {
410 | return internal::print_node(out, &node, flags, 0);
411 | }
412 |
413 | #ifndef RAPIDXML_NO_STREAMS
414 |
415 | //! Prints XML to given output stream.
416 | //! \param out Output stream to print to.
417 | //! \param node Node to be printed. Pass xml_document to print entire document.
418 | //! \param flags Flags controlling how XML is printed.
419 | //! \return Output stream.
420 | template
421 | inline std::basic_ostream &print(std::basic_ostream &out, const xml_node &node, int flags = 0)
422 | {
423 | print(std::ostream_iterator(out), node, flags);
424 | return out;
425 | }
426 |
427 | //! Prints formatted XML to given output stream. Uses default printing flags. Use print() function to customize printing process.
428 | //! \param out Output stream to print to.
429 | //! \param node Node to be printed.
430 | //! \return Output stream.
431 | template
432 | inline std::basic_ostream &operator <<(std::basic_ostream &out, const xml_node &node)
433 | {
434 | return print(out, node);
435 | }
436 |
437 | #endif
438 |
439 | }
440 |
441 | #endif
442 |
--------------------------------------------------------------------------------
/c_src/rapidxml_utils.hpp:
--------------------------------------------------------------------------------
1 | #ifndef RAPIDXML_UTILS_HPP_INCLUDED
2 | #define RAPIDXML_UTILS_HPP_INCLUDED
3 |
4 | // Copyright (C) 2006, 2009 Marcin Kalicinski
5 | // Version 1.13
6 | // Revision $DateTime: 2009/05/13 01:46:17 $
7 | //! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities that can be useful
8 | //! in certain simple scenarios. They should probably not be used if maximizing performance is the main objective.
9 |
10 | #include "rapidxml.hpp"
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | namespace rapidxml
17 | {
18 |
19 | //! Represents data loaded from a file
20 | template
21 | class file
22 | {
23 |
24 | public:
25 |
26 | //! Loads file into the memory. Data will be automatically destroyed by the destructor.
27 | //! \param filename Filename to load.
28 | file(const char *filename)
29 | {
30 | using namespace std;
31 |
32 | // Open stream
33 | basic_ifstream stream(filename, ios::binary);
34 | if (!stream)
35 | throw runtime_error(string("cannot open file ") + filename);
36 | stream.unsetf(ios::skipws);
37 |
38 | // Determine stream size
39 | stream.seekg(0, ios::end);
40 | size_t size = stream.tellg();
41 | stream.seekg(0);
42 |
43 | // Load data and add terminating 0
44 | m_data.resize(size + 1);
45 | stream.read(&m_data.front(), static_cast(size));
46 | m_data[size] = 0;
47 | }
48 |
49 | //! Loads file into the memory. Data will be automatically destroyed by the destructor
50 | //! \param stream Stream to load from
51 | file(std::basic_istream &stream)
52 | {
53 | using namespace std;
54 |
55 | // Load data and add terminating 0
56 | stream.unsetf(ios::skipws);
57 | m_data.assign(istreambuf_iterator(stream), istreambuf_iterator());
58 | if (stream.fail() || stream.bad())
59 | throw runtime_error("error reading stream");
60 | m_data.push_back(0);
61 | }
62 |
63 | //! Gets file data.
64 | //! \return Pointer to data of file.
65 | Ch *data()
66 | {
67 | return &m_data.front();
68 | }
69 |
70 | //! Gets file data.
71 | //! \return Pointer to data of file.
72 | const Ch *data() const
73 | {
74 | return &m_data.front();
75 | }
76 |
77 | //! Gets file data size.
78 | //! \return Size of file data, in characters.
79 | std::size_t size() const
80 | {
81 | return m_data.size();
82 | }
83 |
84 | private:
85 |
86 | std::vector m_data; // File data
87 |
88 | };
89 |
90 | //! Counts children of node. Time complexity is O(n).
91 | //! \return Number of children of node
92 | template
93 | inline std::size_t count_children(xml_node *node)
94 | {
95 | xml_node *child = node->first_node();
96 | std::size_t count = 0;
97 | while (child)
98 | {
99 | ++count;
100 | child = child->next_sibling();
101 | }
102 | return count;
103 | }
104 |
105 | //! Counts attributes of node. Time complexity is O(n).
106 | //! \return Number of attributes of node
107 | template
108 | inline std::size_t count_attributes(xml_node *node)
109 | {
110 | xml_attribute *attr = node->first_attribute();
111 | std::size_t count = 0;
112 | while (attr)
113 | {
114 | ++count;
115 | attr = attr->next_attribute();
116 | }
117 | return count;
118 | }
119 |
120 | }
121 |
122 | #endif
123 |
--------------------------------------------------------------------------------
/include/exml.hrl:
--------------------------------------------------------------------------------
1 | %%%-------------------------------------------------------------------
2 | %%% Parts of this file, explicitly marked in the code, were taken from
3 | %%% https://github.com/erszcz/rxml
4 | %%%-------------------------------------------------------------------
5 |
6 | -ifndef(EXML_HEADER).
7 | -define(EXML_HEADER, true).
8 |
9 | -record(xmlcdata, {content = [] :: iodata(),
10 | style = escaped :: escaped | cdata}).
11 |
12 | -record(xmlel, {name :: binary(),
13 | attrs = #{} :: exml:attrs(),
14 | children = [] :: [exml:child()]}).
15 |
16 | %% Implementation of the exmlAssertEqual/2 macro is a modification of
17 | %% https://github.com/erszcz/rxml/commit/e8483408663f0bc2af7896e786c1cdea2e86e43d#diff-2cb5d18741df32f4ead70c21fdd221d1
18 | %% See assertEqual in $ERLANG/lib/stdlib-2.6/include/assert.hrl for the original.
19 | -define(exmlAssertEqual(Expect, Expr),
20 | begin
21 | ((fun () ->
22 | X__X = (exml:xml_sort(Expect)),
23 | case (exml:xml_sort(Expr)) of
24 | X__X -> ok;
25 | X__V -> erlang:error({exmlAssertEqual,
26 | [{module, ?MODULE},
27 | {line, ?LINE},
28 | {expression, (??Expr)},
29 | {expected, Expect},
30 | {value, X__V}]})
31 | end
32 | end)())
33 | end).
34 |
35 | -endif.
36 |
--------------------------------------------------------------------------------
/include/exml_stream.hrl:
--------------------------------------------------------------------------------
1 | -include("exml.hrl").
2 |
3 | -record(xmlstreamstart, {name :: binary(),
4 | attrs = #{} :: exml:attrs()}).
5 |
6 | -record(xmlstreamend, {name :: binary()}).
7 |
--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
1 | {deps, []}.
2 |
3 | {dialyzer,
4 | [{warnings,
5 | [unknown,
6 | unmatched_returns,
7 | error_handling,
8 | underspecs
9 | ]}]}.
10 |
11 | {profiles, [
12 | {test, [
13 | {deps, [
14 | {proper, "1.5.0"}
15 | ]},
16 | {plugins, [
17 | {rebar3_codecov, "0.7.0"}
18 | ]},
19 | {port_env, [
20 | {"CXXFLAGS", "$CXXFLAGS -O3 -std=c++11 -g -Wall -Wextra -fPIC --coverage"},
21 | {"LDFLAGS", "$LDFLAGS --coverage"}
22 | ]},
23 | {eunit_opts, [verbose]},
24 | {cover_opts, [verbose]},
25 | {cover_enabled, true},
26 | {cover_export_enabled, true}
27 | ]}
28 | ]}.
29 |
30 | {project_plugins, [rebar3_hex, rebar3_ex_doc]}.
31 | {plugins, [pc]}.
32 |
33 | % Interrupt compilation, if the artifact is not found
34 | {artifacts, ["priv/exml_nif.so"]}.
35 |
36 | {port_specs, [
37 | {
38 | % Any arch
39 | ".*",
40 | % Create library
41 | "priv/exml_nif.so",
42 | % From files
43 | ["c_src/*.cpp"],
44 | % Using options
45 | [{env, [{"CXXFLAGS", "$CXXFLAGS -O3 -std=c++11 -Wall -Wextra"}]}]
46 | }
47 | ]}.
48 |
49 | {provider_hooks, [
50 | {post, [
51 | {compile, {pc, compile}},
52 | {clean, {pc, clean}}
53 | ]}
54 | ]}.
55 |
56 | {hex, [
57 | {doc, #{provider => ex_doc}}
58 | ]}.
59 | {ex_doc, [
60 | {source_url, <<"https://github.com/esl/exml">>},
61 | {main, <<"readme">>},
62 | {extras, [{'README.md', #{title => <<"README">>}},
63 | {'LICENSE', #{title => <<"License">>}}
64 | ]}
65 | ]}.
66 |
--------------------------------------------------------------------------------
/rebar.lock:
--------------------------------------------------------------------------------
1 | [].
2 |
--------------------------------------------------------------------------------
/src/exml.app.src:
--------------------------------------------------------------------------------
1 | {application, exml,
2 | [{description, "Erlang fast XML parsing library"},
3 | {vsn, git},
4 | {registered, []},
5 | {applications,
6 | [kernel,
7 | stdlib
8 | ]},
9 | {env, []},
10 | {modules, []},
11 | {maintainers, ["ESL"]},
12 | {pkg_name, "hexml"},
13 | {licenses, ["Apache-2.0", "BSL-1.0", "GPL (tests)"]},
14 | {links, [{"GitHub", "https://github.com/esl/exml/"}]},
15 | {exclude_files, ["c_src/exml.d"]}
16 | ]}.
17 |
--------------------------------------------------------------------------------
/src/exml.erl:
--------------------------------------------------------------------------------
1 | %%%-------------------------------------------------------------------
2 | %%% @copyright (C) 2011-2024, Erlang Solutions Ltd.
3 | %%% @doc
4 | %%% @end
5 | %%% Created : 12 Jul 2011 by Michal Ptaszek
6 | %%%
7 | %%% Parts of this file, explicitly marked in the code, were taken from
8 | %%% https://github.com/erszcz/rxml
9 | %%%-------------------------------------------------------------------
10 | -module(exml).
11 |
12 | -include("exml_stream.hrl").
13 |
14 | -export([parse/1]).
15 |
16 | -export([to_list/1,
17 | to_binary/1,
18 | to_iolist/1,
19 | xml_size/1,
20 | to_pretty_iolist/1]).
21 |
22 | -export([filter_children/2,
23 | append_children/2,
24 | upsert_attr_value/3,
25 | upsert_child/2,
26 | insert_new_child/2,
27 | remove_cdata/1,
28 | remove_attr/2,
29 | xml_sort/1]).
30 |
31 | -export_type([attrs/0,
32 | cdata/0,
33 | element/0,
34 | child/0,
35 | item/0]).
36 |
37 | -type attrs() :: #{binary() => binary()}.
38 | -type cdata() :: #xmlcdata{}.
39 | %% CDATA record. Printing escaping rules defaults to escaping character-wise.
40 | %%
41 | %% Escaping rules:
42 | %%
43 | %% - `escaped': escapes all characters by regular `&' control escaping.
44 | %% - `cdata': wraps the entire string into a `' section.
45 | %%
46 | -type element() :: #xmlel{}.
47 | -type item() :: element() | cdata() | exml_stream:start() | exml_stream:stop().
48 | -type child() :: element() | cdata().
49 | -type prettify() :: pretty | not_pretty.
50 | %% Printing indentation rule, see `to_iolist/2'.
51 |
52 | %% @doc Calculate the length of the original XML payload
53 | -spec xml_size(item() | [item()]) -> non_neg_integer().
54 | xml_size([]) ->
55 | 0;
56 | xml_size([Elem | Rest]) ->
57 | xml_size(Elem) + xml_size(Rest);
58 | xml_size(#xmlcdata{content = Content, style = Style}) ->
59 | iolist_size(exml_nif:escape_cdata(Content, Style));
60 | xml_size(#xmlel{ name = Name, attrs = Attrs, children = [] }) ->
61 | 3 % Self-closing: >
62 | + byte_size(Name) + xml_size(maps:to_list(Attrs));
63 | xml_size(#xmlel{ name = Name, attrs = Attrs, children = Children }) ->
64 | % Opening and closing: <>>
65 | 5 + byte_size(Name)*2
66 | + xml_size(maps:to_list(Attrs)) + xml_size(Children);
67 | xml_size(#xmlstreamstart{ name = Name, attrs = Attrs }) ->
68 | byte_size(Name) + 2 + xml_size(maps:to_list(Attrs));
69 | xml_size(#xmlstreamend{ name = Name }) ->
70 | byte_size(Name) + 3;
71 | xml_size({Key, Value}) when is_binary(Key) ->
72 | % Attributes
73 | byte_size(Key)
74 | + 4 % ="" and whitespace before
75 | + byte_size(Value).
76 |
77 | %% @doc Sort in ascending order a list of xml `t:item/0'.
78 | %%
79 | %% Sorting is defined as calling `lists:sort/1' at:
80 | %%
81 | %% - all the `xmlel's provided (if there is a list of them) AND
82 | %% - all the `xmlel' elements' attributes recursively (the root and descendants) AND
83 | %% - all the `xmlel' children recursively (the root and descendants).
84 | %%
85 | %% @end
86 | %% The implementation of this function is a subtle modification of
87 | %% https://github.com/erszcz/rxml/commit/e8483408663f0bc2af7896e786c1cdea2e86e43d
88 | -spec xml_sort([item()]) -> [item()];
89 | (element()) -> element();
90 | (cdata()) -> cdata();
91 | (exml_stream:start()) -> exml_stream:start();
92 | (exml_stream:stop()) -> exml_stream:stop().
93 | xml_sort(#xmlcdata{} = Cdata) ->
94 | Cdata;
95 | xml_sort(#xmlel{children = Children} = El) ->
96 | El#xmlel{
97 | children = [ xml_sort(C) || C <- Children ]
98 | };
99 | xml_sort(#xmlstreamstart{} = StreamStart) ->
100 | StreamStart;
101 | xml_sort(#xmlstreamend{} = StreamEnd) ->
102 | StreamEnd;
103 | xml_sort(Elements) when is_list(Elements) ->
104 | lists:sort([ xml_sort(E) || E <- Elements ]).
105 |
106 | %% @doc Return the given `t:element/0' with the specified filter passed over its children.
107 | -spec filter_children(element(), fun((element() | cdata()) -> boolean())) -> element().
108 | filter_children(#xmlel{children = Children} = El, Pred) ->
109 | NoCdata = lists:filter(Pred, Children),
110 | El#xmlel{children = NoCdata}.
111 |
112 | %% @doc Return the given `t:element/0' without any `t:cdata/0' on its children.
113 | -spec remove_cdata(element()) -> element().
114 | remove_cdata(#xmlel{children = Children} = El) ->
115 | Pred = fun(Child) -> not is_record(Child, xmlcdata) end,
116 | NoCdata = lists:filter(Pred, Children),
117 | El#xmlel{children = NoCdata}.
118 |
119 | %% @doc Remove a given attribute from a `t:element/0'.
120 | -spec remove_attr(exml:element(), binary()) -> element().
121 | remove_attr(#xmlel{attrs = Attrs} = El, Key) ->
122 | El#xmlel{attrs = maps:remove(Key, Attrs)}.
123 |
124 | %% @doc Append new children elements to a `t:element/0'.
125 | -spec append_children(element(), [element() | cdata()]) -> element().
126 | append_children(#xmlel{children = Children} = El, ExtraChildren) ->
127 | El#xmlel{children = Children ++ ExtraChildren}.
128 |
129 | %% @doc Replace or insert the value of a given attribute.
130 | -spec upsert_attr_value(element(), binary(), binary()) -> element().
131 | upsert_attr_value(#xmlel{attrs = Attrs} = El, Key, Value) ->
132 | El#xmlel{attrs = Attrs#{Key => Value}}.
133 |
134 | %% @doc Replace or insert a child by the given one.
135 | -spec upsert_child(element(), element()) -> element().
136 | upsert_child(#xmlel{children = Children} = El, #xmlel{name = Name} = NewChild) ->
137 | Children2 = lists:keystore(Name, #xmlel.name, Children, NewChild),
138 | El#xmlel{children = Children2}.
139 |
140 | %% @doc Insert a child by the given one, if none existed.
141 | -spec insert_new_child(element(), element()) -> element().
142 | insert_new_child(#xmlel{children = Children} = El, #xmlel{name = Name} = NewChild) ->
143 | case lists:keymember(Name, #xmlel.name, Children) of
144 | false ->
145 | El#xmlel{children = [NewChild | Children]};
146 | true ->
147 | El
148 | end.
149 |
150 | %% @equiv erlang:binary_to_list(to_binary(Element))
151 | -spec to_list(exml_stream:element() | [exml_stream:element()]) -> string().
152 | to_list(Element) ->
153 | binary_to_list(to_binary(Element)).
154 |
155 | %% @equiv erlang:iolist_to_binary(to_iolist(Element, not_pretty))
156 | -spec to_binary(exml_stream:element() | [exml_stream:element()]) -> binary().
157 | to_binary(Element) ->
158 | iolist_to_binary(to_iolist(Element, not_pretty)).
159 |
160 | %% @equiv to_iolist(Element, not_pretty)
161 | -spec to_iolist(exml_stream:element() | [exml_stream:element()]) -> iodata().
162 | to_iolist(Element) ->
163 | to_iolist(Element, not_pretty).
164 |
165 | %% @equiv to_iolist(Element, pretty)
166 | -spec to_pretty_iolist(exml_stream:element() | [exml_stream:element()]) -> iodata().
167 | to_pretty_iolist(Element) ->
168 | to_iolist(Element, pretty).
169 |
170 | %% @doc Parses a binary or a list of binaries into an XML `t:element/0'.
171 | -spec parse(binary() | [binary()]) -> {ok, element()} | {error, binary()}.
172 | parse(XML) ->
173 | exml_nif:parse(XML).
174 |
175 | %% @doc Turn a –list of– exml elements into iodata for IO interactions.
176 | %%
177 | %% The `Pretty' argument indicates if the generated XML should have new lines and indentation,
178 | %% which is useful for the debugging eye, or should rather be a minified version,
179 | %% which is better for IO performance.
180 | -spec to_iolist(cdata() | exml_stream:element() | [exml_stream:element()], prettify()) -> iodata().
181 | to_iolist(#xmlel{} = Element, Pretty) ->
182 | to_binary_nif(Element, Pretty);
183 | to_iolist(#xmlstreamstart{name = Name, attrs = Attrs}, _Pretty) ->
184 | Result = to_binary_nif(#xmlel{name = Name, attrs = Attrs}, not_pretty),
185 | FrontSize = byte_size(Result) - 2,
186 | <">> = Result,
187 | [Front, $>];
188 | to_iolist(#xmlstreamend{name = Name}, _Pretty) ->
189 | [<<"">>, Name, <<">">>];
190 | to_iolist(#xmlcdata{content = Content, style = Style}, _Pretty) ->
191 | exml_nif:escape_cdata(Content, Style);
192 | to_iolist([Element], Pretty) ->
193 | to_iolist(Element, Pretty);
194 | to_iolist([#xmlstreamstart{name = Name, attrs = Attrs} | Tail] = Elements, Pretty) ->
195 | [Last | RevChildren] = lists:reverse(Tail),
196 | case Last of
197 | #xmlstreamend{name = Name} ->
198 | %% Add extra nesting for streams so pretty-printing would be indented properly
199 | Element = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(RevChildren)},
200 | to_binary_nif(Element, Pretty);
201 | _ ->
202 | [to_iolist(El, Pretty) || El <- Elements]
203 | end;
204 | to_iolist(Elements, Pretty) when is_list(Elements) ->
205 | [to_iolist(El, Pretty) || El <- Elements].
206 |
207 | -spec to_binary_nif(element(), prettify()) -> binary().
208 | to_binary_nif(#xmlel{} = Element, Pretty) ->
209 | case catch exml_nif:to_binary(Element, Pretty) of
210 | {'EXIT', Reason} -> erlang:error({badxml, Element, Reason});
211 | Result when is_binary(Result) -> Result
212 | end.
213 |
--------------------------------------------------------------------------------
/src/exml_nif.erl:
--------------------------------------------------------------------------------
1 | %%%-------------------------------------------------------------------
2 | %%% @copyright (C) 2018-2024, Erlang Solutions Ltd.
3 | %%% @private
4 | %%%-------------------------------------------------------------------
5 |
6 | -module(exml_nif).
7 |
8 | -nifs([create/2, escape_cdata/2, to_binary/2, parse/1, parse_next/2, reset_parser/1]).
9 |
10 | -type parser() :: term().
11 |
12 | -export([create/2, parse/1, parse_next/2, escape_cdata/2,
13 | to_binary/2, reset_parser/1]).
14 |
15 | -on_load(load/0).
16 |
17 | %%%===================================================================
18 | %%% Public API
19 | %%%===================================================================
20 |
21 | -dialyzer({nowarn_function, [load/0]}).
22 | -spec load() -> any().
23 | load() ->
24 | PrivDir = case code:priv_dir(?MODULE) of
25 | {error, _} ->
26 | case code:which(?MODULE) of
27 | Path when is_list(Path) ->
28 | EbinDir = filename:dirname(Path),
29 | AppPath = filename:dirname(EbinDir),
30 | filename:join(AppPath, "priv");
31 | _ ->
32 | %% cover_compiled | preloaded | non_existing
33 | erlang:error({cannot_get_load_path, ?MODULE})
34 | end;
35 | Path ->
36 | Path
37 | end,
38 | erlang:load_nif(filename:join(PrivDir, ?MODULE_STRING), none).
39 |
40 | -spec create(MaxChildSize :: non_neg_integer(), InfiniteStream :: boolean()) ->
41 | {ok, parser()} | {error, Reason :: any()}.
42 | create(_, _) ->
43 | erlang:nif_error(not_loaded).
44 |
45 | -spec escape_cdata(Bin :: iodata(), atom()) -> binary().
46 | escape_cdata(_Bin, _Style) ->
47 | erlang:nif_error(not_loaded).
48 |
49 | -spec to_binary(Elem :: exml:element(), pretty | not_pretty) -> binary().
50 | to_binary(_Elem, _Pretty) ->
51 | erlang:nif_error(not_loaded).
52 |
53 | -spec parse(Bin :: binary() | [binary()]) -> {ok, exml:element()} | {error, binary()}.
54 | parse(_) ->
55 | erlang:nif_error(not_loaded).
56 |
57 | -spec parse_next(parser(), Data :: binary() | [binary()]) ->
58 | {ok, exml_stream:element() | undefined, non_neg_integer()} |
59 | {error, Reason :: any()}.
60 | parse_next(_, _) ->
61 | erlang:nif_error(not_loaded).
62 |
63 | -spec reset_parser(parser()) -> any().
64 | reset_parser(_) ->
65 | erlang:nif_error(not_loaded).
66 |
--------------------------------------------------------------------------------
/src/exml_query.erl:
--------------------------------------------------------------------------------
1 | %%%-------------------------------------------------------------------
2 | %%% @copyright (C) 2011-2024, Erlang Solutions Ltd.
3 | %%% @doc Easy navigation in XML trees
4 | %%% @end
5 | %%%-------------------------------------------------------------------
6 | -module(exml_query).
7 |
8 | -include("exml.hrl").
9 |
10 | -export([path/2, path/3]).
11 | -export([paths/2]).
12 | -export([subelement/2, subelement/3]).
13 | -export([subelement_with_ns/2, subelement_with_ns/3]).
14 | -export([subelement_with_attr/3, subelement_with_attr/4]).
15 | -export([subelement_with_name_and_ns/3, subelement_with_name_and_ns/4]).
16 | -export([subelements/2]).
17 | -export([subelements_with_ns/2]).
18 | -export([subelements_with_name_and_ns/3]).
19 | -export([subelements_with_attr/3]).
20 | -export([attr/2, attr/3]).
21 | -export([cdata/1]).
22 |
23 |
24 | -type path() :: [cdata |
25 | {attr, binary()} |
26 | {element, binary()} |
27 | {element_with_ns, binary()} |
28 | {element_with_ns, binary(), binary()} |
29 | {element_with_attr, binary(), binary()}].
30 | %% Path definition in an XML query, each step is defined by one of these types.
31 | %%
32 | %%
33 | %% - `cdata': selects cdata from the element
34 | %% - `{attr, Name}': selects a subelement with the given attribute
35 | %% - `{element, Name}': selects a subelement with the given name
36 | %% - `{element_with_ns, NS}': selects a subelement with given namespace
37 | %% - `{element_with_ns, Name, NS}': selects a subelement with given name and namespace
38 | %% - `{element_with_attr, AttrName, AttrValue}': selects a subelement with the given attribute and value
39 | %%
40 |
41 | -export_type([path/0]).
42 |
43 | %%% @doc Like `path/3' but with default `undefined'.
44 | %%% @see path/3
45 | -spec path(exml:element(), path()) -> exml:element() | binary() | undefined.
46 | path(Element, Path) ->
47 | path(Element, Path, undefined).
48 |
49 | %% @doc Gets the element/attr/cdata in the leftmost possible described path,
50 | %% or `Default' if there is no match.
51 | %%
52 | %% Find an element in the xml tree by a path that is pattern-matched against such xml tree structure.
53 | %%
54 | %% For example, given an xml document like
55 | %% ```
56 | %%
57 | %%
58 | %%
59 | %%
60 | %%
61 | %% Message from bob to alice
62 | %%
63 | %%
64 | %%
65 | %%
66 | %% '''
67 | %% The path
68 | %% ```
69 | %% [{element_with_ns, <<"result">>, <<"urn:xmpp:mam:2">>},
70 | %% {element_with_ns, <<"forwarded">>, <<"urn:xmpp:forward:0">>},
71 | %% {element_with_ns, <<"message">>, <<"jabber:client">>},
72 | %% {element, <<"body">>},
73 | %% cdata}],
74 | %% '''
75 | %% will return `<<"Message from bob to alice">>'
76 | %% @end
77 | -spec path(exml:element() | undefined, path(), Default) -> exml:element() | binary() | Default.
78 | path(#xmlel{} = Element, [], _) ->
79 | Element;
80 | path(#xmlel{} = Element, [{element, Name} | Rest], Default) ->
81 | Child = subelement(Element, Name), % may return undefined
82 | path(Child, Rest, Default);
83 | path(#xmlel{} = Element, [{element_with_ns, NS} | Rest], Default) ->
84 | Child = subelement_with_ns(Element, NS),
85 | path(Child, Rest, Default);
86 | path(#xmlel{} = Element, [{element_with_ns, Name, NS} | Rest], Default) ->
87 | Child = subelement_with_name_and_ns(Element, Name, NS),
88 | path(Child, Rest, Default);
89 | path(#xmlel{} = Element, [{element_with_attr, Name, Value} | Rest], Default) ->
90 | Child = subelement_with_attr(Element, Name, Value),
91 | path(Child, Rest, Default);
92 | path(#xmlel{} = Element, [cdata], _) ->
93 | cdata(Element);
94 | path(#xmlel{} = Element, [{attr, Name}], Default) ->
95 | attr(Element, Name, Default);
96 | path(_, _, Default) ->
97 | Default.
98 |
99 | %% @doc Gets the elements/attrs/cdatas reachable by the described path
100 | %% @see path/3
101 | -spec paths(exml:element(), path()) -> [exml:element() | binary()].
102 | paths(#xmlel{} = Element, []) ->
103 | [Element];
104 | paths(#xmlel{} = Element, [{element, Name} | Rest]) ->
105 | Children = subelements(Element, Name),
106 | lists:append([paths(Child, Rest) || Child <- Children]);
107 | paths(#xmlel{} = Element, [{element_with_ns, NS} | Rest]) ->
108 | Children = subelements_with_ns(Element, NS),
109 | lists:append([paths(Child, Rest) || Child <- Children]);
110 | paths(#xmlel{} = Element, [{element_with_ns, Name, NS} | Rest]) ->
111 | Children = subelements_with_name_and_ns(Element, Name, NS),
112 | lists:append([paths(Child, Rest) || Child <- Children]);
113 | paths(#xmlel{} = Element, [{element_with_attr, AttrName, Value} | Rest]) ->
114 | Children = subelements_with_attr(Element, AttrName, Value),
115 | lists:append([paths(Child, Rest) || Child <- Children]);
116 | paths(#xmlel{} = Element, [cdata]) ->
117 | [cdata(Element)];
118 | paths(#xmlel{attrs = Attrs}, [{attr, Name}]) ->
119 | lists:sublist([V || {N, V} <- maps:to_list(Attrs), N =:= Name], 1);
120 | paths(#xmlel{} = El, Path) when is_list(Path) ->
121 | erlang:error(invalid_path, [El, Path]).
122 |
123 | %% @equiv path(Element, [{element, Name}])
124 | -spec subelement(exml:element(), binary()) -> exml:element() | undefined.
125 | subelement(Element, Name) ->
126 | subelement(Element, Name, undefined).
127 |
128 | %% @equiv path(Element, [{element, Name}], Default)
129 | -spec subelement(exml:element(), binary(), Default) -> exml:element() | Default.
130 | subelement(#xmlel{children = Children}, Name, Default) ->
131 | case lists:keyfind(Name, #xmlel.name, Children) of
132 | false ->
133 | Default;
134 | Result ->
135 | Result
136 | end.
137 |
138 | %% @equiv path(Element, [{element_with_ns, NS}])
139 | -spec subelement_with_ns(exml:element(), binary()) -> exml:element() | undefined.
140 | subelement_with_ns(Element, NS) ->
141 | subelement_with_ns(Element, NS, undefined).
142 |
143 | %% @equiv path(Element, [{element_with_ns, NS}], Default)
144 | -spec subelement_with_ns(exml:element(), binary(), Default) -> exml:element() | Default.
145 | subelement_with_ns(#xmlel{children = Children}, NS, Default) ->
146 | child_with_ns(Children, NS, Default).
147 |
148 | child_with_ns([], _, Default) ->
149 | Default;
150 | child_with_ns([#xmlel{} = Element | Rest], NS, Default) ->
151 | case attr(Element, <<"xmlns">>) of
152 | NS ->
153 | Element;
154 | _ ->
155 | child_with_ns(Rest, NS, Default)
156 | end;
157 | child_with_ns([_ | Rest], NS, Default) ->
158 | child_with_ns(Rest, NS, Default).
159 |
160 | %% @equiv path(Element, [{element_with_attr, AttrName, AttrValue}])
161 | -spec subelement_with_attr(exml:element(), AttrName :: binary(), AttrValue :: binary()) ->
162 | exml:element() | undefined.
163 | subelement_with_attr(Element, AttrName, AttrValue) ->
164 | subelement_with_attr(Element, AttrName, AttrValue, undefined).
165 |
166 | %% @equiv path(Element, [{element_with_attr, AttrName, AttrValue}], Default)
167 | -spec subelement_with_attr(Element, AttrName, AttrValue, Default) -> SubElement | Default when
168 | Element :: exml:element(),
169 | AttrName :: binary(),
170 | AttrValue :: binary(),
171 | SubElement :: exml:element(),
172 | Default :: term().
173 | subelement_with_attr(#xmlel{children = Children}, AttrName, AttrValue, Default) ->
174 | child_with_attr(Children, AttrName, AttrValue, Default).
175 |
176 | child_with_attr([], _, _, Default) ->
177 | Default;
178 | child_with_attr([#xmlel{} = Element | Rest], AttrName, AttrVal, Default) ->
179 | case attr(Element, AttrName) of
180 | AttrVal ->
181 | Element;
182 | _ ->
183 | child_with_attr(Rest, AttrName, AttrVal, Default)
184 | end;
185 | child_with_attr([_ | Rest], AttrName, AttrVal, Default) ->
186 | child_with_attr(Rest, AttrName, AttrVal, Default).
187 |
188 | %% @equiv path(Element, [{element_with_ns, Name, NS}])
189 | -spec subelement_with_name_and_ns(exml:element(), binary(), binary()) ->
190 | exml:element() | undefined.
191 | subelement_with_name_and_ns(Element, Name, NS) ->
192 | subelement_with_name_and_ns(Element, Name, NS, undefined).
193 |
194 | %% @equiv path(Element, [{element_with_ns, Name, NS}], Default)
195 | -spec subelement_with_name_and_ns(exml:element(), binary(), binary(), Default) ->
196 | exml:element() | Default.
197 | subelement_with_name_and_ns(Element, Name, NS, Default) ->
198 | case subelements_with_name_and_ns(Element, Name, NS) of
199 | [] ->
200 | Default;
201 | [FirstElem | _] ->
202 | FirstElem
203 | end.
204 |
205 | %% @equiv paths(Element, [{element, Name}])
206 | -spec subelements(exml:element(), binary()) -> [exml:element()].
207 | subelements(#xmlel{children = Children}, Name) ->
208 | lists:filter(fun(#xmlel{name = N}) when N =:= Name ->
209 | true;
210 | (_) ->
211 | false
212 | end, Children).
213 |
214 | %% @equiv paths(Element, [{element_with_ns, NS}])
215 | -spec subelements_with_ns(exml:element(), binary()) -> [exml:element()].
216 | subelements_with_ns(#xmlel{children = Children}, NS) ->
217 | lists:filter(fun(#xmlel{} = Child) ->
218 | NS =:= attr(Child, <<"xmlns">>);
219 | (_) ->
220 | false
221 | end, Children).
222 |
223 | %% @equiv paths(Element, [{element_with_ns, Name, NS}])
224 | -spec subelements_with_name_and_ns(exml:element(), binary(), binary()) -> [exml:element()].
225 | subelements_with_name_and_ns(#xmlel{children = Children}, Name, NS) ->
226 | lists:filter(fun(#xmlel{name = SubName} = Child) ->
227 | SubName =:= Name andalso
228 | NS =:= attr(Child, <<"xmlns">>);
229 | (_) ->
230 | false
231 | end, Children).
232 |
233 | %% @equiv paths(Element, [{element_with_attr, AttrName, AttrValue}])
234 | -spec subelements_with_attr(exml:element(), binary(), binary()) -> [exml:element()].
235 | subelements_with_attr(#xmlel{children = Children}, AttrName, Value) ->
236 | lists:filter(fun(#xmlel{} = Child) ->
237 | Value =:= attr(Child, AttrName);
238 | (_) ->
239 | false
240 | end, Children).
241 |
242 | %% @equiv path(Element, [cdata])
243 | -spec cdata(exml:element()) -> binary().
244 | cdata(#xmlel{children = Children}) ->
245 | list_to_binary([C || #xmlcdata{content = C} <- Children]).
246 |
247 | %% @see attr/3
248 | %% @equiv path(Element, [{attr, Name}])
249 | -spec attr(exml:element(), binary()) -> binary() | undefined.
250 | attr(Element, Name) ->
251 | attr(Element, Name, undefined).
252 |
253 | %% @equiv path(Element, [{attr, Name}], Default)
254 | -spec attr(exml:element(), binary(), Default) -> binary() | Default.
255 | attr(#xmlel{attrs = Attrs}, Name, Default) ->
256 | case maps:find(Name, Attrs) of
257 | {ok, Value} ->
258 | Value;
259 | error ->
260 | Default
261 | end.
262 |
--------------------------------------------------------------------------------
/src/exml_stream.erl:
--------------------------------------------------------------------------------
1 | %%%-------------------------------------------------------------------
2 | %%% @copyright (C) 2011-2021, Erlang Solutions Ltd.
3 | %%% @doc XML stream parser
4 | %%% @end
5 | %%% Created : 21 Jul 2011 by Michal Ptaszek
6 | %%%-------------------------------------------------------------------
7 | -module(exml_stream).
8 |
9 | -include("exml_stream.hrl").
10 |
11 | -export([new_parser/0,
12 | new_parser/1,
13 | parse/2,
14 | reset_parser/1,
15 | free_parser/1]).
16 |
17 | -export_type([element/0,
18 | start/0,
19 | stop/0,
20 | parser/0,
21 | parser_opt/0]).
22 |
23 | -record(parser, {
24 | event_parser :: term(),
25 | buffer :: [binary()]
26 | }).
27 |
28 | -type start() :: #xmlstreamstart{}.
29 | %% `#xmlstreamstart{}' record.
30 | -type stop() :: #xmlstreamend{}.
31 | %% `#xmlstreamend{}' record.
32 | -type parser() :: #parser{}.
33 | %% `#parser{}' record. Keeps track of unparsed buffers.
34 | -type element() :: exml:element() | start() | stop().
35 | %% One of `t:exml:element/0', `t:start/0', or `t:stop/0'.
36 |
37 | -type parser_opt() :: {infinite_stream, boolean()} | {max_element_size, non_neg_integer()}.
38 | %% Parser options
39 | %%
40 | %%
41 | %% - `infinite_stream': No distinct `t:start/0' or `t:stop/0', only `#xmlel{}' will be returned.
42 | %% - `max_element_size': Specifies maximum byte size of any parsed XML element.
43 | %% The only exception is the "stream start" element,
44 | %% for which only the size of the opening tag is limited.
45 | %%
46 |
47 | %%%===================================================================
48 | %%% Public API
49 | %%%===================================================================
50 |
51 | %% @see new_parser/1
52 | -spec new_parser() -> {ok, parser()} | {error, any()}.
53 | new_parser() ->
54 | new_parser([]).
55 |
56 | %% @doc Creates a new parser. See `t:parser_opt/0' for configuration.
57 | -spec new_parser([parser_opt()]) -> {ok, parser()} | {error, any()}.
58 | new_parser(Opts)->
59 | MaxElementSize = proplists:get_value(max_element_size, Opts, 0),
60 | InfiniteStream = proplists:get_value(infinite_stream, Opts, false),
61 | case exml_nif:create(MaxElementSize, InfiniteStream) of
62 | {ok, EventParser} ->
63 | {ok, #parser{event_parser = EventParser, buffer = []}};
64 | Error ->
65 | Error
66 | end.
67 |
68 | %% @doc Makes a parser parse input.
69 | %%
70 | %% If successful, returns parsed elements and a new parser with updated buffers.
71 | -spec parse(parser(), binary()) ->
72 | {ok, parser(), [element()]} | {error, Reason :: binary()}.
73 | parse(Parser, Input) when is_binary(Input) ->
74 | #parser{event_parser = EventParser, buffer = OldBuf} = Parser,
75 | Buffer = OldBuf ++ [Input],
76 | case parse_all(EventParser, Buffer, []) of
77 | {ok, Elems, NewBuffer} ->
78 | {ok, Parser#parser{buffer = NewBuffer}, Elems};
79 | Other ->
80 | Other
81 | end.
82 |
83 | %% @doc Resets the parser's buffers
84 | -spec reset_parser(parser()) -> {ok, parser()}.
85 | reset_parser(#parser{event_parser = NifParser} = Parser) ->
86 | exml_nif:reset_parser(NifParser),
87 | {ok, Parser#parser{buffer = []}}.
88 |
89 | %% @doc Free a parser
90 | %%
91 | %% Kept for backwards-compatibility, it is a no-op.
92 | -spec free_parser(parser()) -> ok.
93 | free_parser(#parser{}) ->
94 | ok.
95 |
96 | %%%===================================================================
97 | %%% Helpers
98 | %%%===================================================================
99 |
100 | parse_all(_Parser, [], Acc) ->
101 | {ok, lists:reverse(Acc), []};
102 | parse_all(Parser, Buffer, Acc) ->
103 | Val = exml_nif:parse_next(Parser, Buffer),
104 | case Val of
105 | {ok, undefined, Offset} ->
106 | {ok, lists:reverse(Acc), drop_offset(Buffer, Offset)};
107 | {ok, Element, Offset} ->
108 | parse_all(Parser, drop_offset(Buffer, Offset), [Element | Acc]);
109 | {error, _} = Error ->
110 | Error
111 | end.
112 |
113 | drop_offset(Buffer, 0) ->
114 | Buffer;
115 | drop_offset([Front | Rest], Offset) when byte_size(Front) =< Offset ->
116 | drop_offset(Rest, Offset - byte_size(Front));
117 | drop_offset([Front | Rest], Offset) ->
118 | <<_:Offset/binary, Part/binary>> = Front,
119 | [Part | Rest].
120 |
--------------------------------------------------------------------------------
/test/exml_properties_tests.erl:
--------------------------------------------------------------------------------
1 | -module(exml_properties_tests).
2 |
3 | -include_lib("proper/include/proper.hrl").
4 | -include_lib("eunit/include/eunit.hrl").
5 |
6 | -compile([export_all, nowarn_export_all]).
7 |
8 | p(Name, Property) ->
9 | ?assert(proper:quickcheck
10 | (proper:conjunction([{Name, Property}]),
11 | [100, long_result, {to_file, user}])).
12 |
13 | vector_1_forbidden_control_char_test() ->
14 | ?assertMatch({error, _}, exml:parse(<<"", 16#1B,"">>)).
15 |
16 | vector_2_forbidden_control_char_test() ->
17 | ?assertMatch({error, _}, exml:parse(<<"">>)).
18 |
19 | vector_3_forbidden_control_char_test() ->
20 | ?assertMatch({error, _}, exml:parse(<<"">>)).
21 |
22 | vector_4_forbidden_control_char_test() ->
23 | ?assertMatch({error, _},
24 | exml:parse(<<"<body>", 16#1B,"</body>