├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── LICENSE.rapidxml ├── README.md ├── c_src ├── exml.cpp ├── rapidxml.hpp ├── rapidxml_iterators.hpp ├── rapidxml_print.hpp └── rapidxml_utils.hpp ├── include ├── exml.hrl └── exml_stream.hrl ├── rebar.config ├── rebar.lock ├── src ├── exml.app.src ├── exml.erl ├── exml_nif.erl ├── exml_query.erl └── exml_stream.erl ├── test ├── exml_properties_tests.erl ├── exml_query_tests.erl ├── exml_stream_tests.erl └── exml_tests.erl └── tools ├── benchmarks.erl └── get_included_files_h.erl /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | test: 12 | name: OTP ${{matrix.otp_vsn}} 13 | strategy: 14 | matrix: 15 | otp_vsn: ['28', '27', '26'] 16 | rebar_vsn: ['3.25.0'] 17 | runs-on: 'ubuntu-24.04' 18 | env: 19 | OTPVER: ${{ matrix.otp }} 20 | steps: 21 | - uses: actions/checkout@v4 22 | - uses: erlef/setup-beam@v1 23 | with: 24 | otp-version: ${{ matrix.otp_vsn }} 25 | rebar3-version: ${{ matrix.rebar_vsn }} 26 | - uses: actions/cache@v4 27 | name: Cache 28 | with: 29 | path: _build 30 | key: ${{ runner.os }}-build-${{ matrix.otp_vsn }}-${{ hashFiles(format('rebar.lock')) }}-1 31 | restore-keys: ${{ runner.os }}-build-${{ matrix.otp_vsn }}-1- 32 | - run: rebar3 as test compile -d 33 | - run: rebar3 eunit 34 | - run: rebar3 dialyzer 35 | - run: rebar3 as test codecov analyze 36 | - run: gcov -o c_src exml 37 | - uses: codecov/codecov-action@v5 38 | with: 39 | name: Upload coverage reports to Codecov 40 | token: ${{ secrets.CODECOV_TOKEN }} 41 | fail_ci_if_error: true 42 | verbose: true 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | /.eunit/ 3 | /.rebar/ 4 | /c_src/*o 5 | /c_src/*d 6 | /c_src/*gcda 7 | /c_src/*gcno 8 | /deps/ 9 | /ebin/ 10 | /logs/ 11 | /priv/*so 12 | *.beam 13 | /rebar3 14 | doc/ 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # [3.0.2] 2018-08-27 2 | 3 | ## Fixed 4 | 5 | * "Infinite stream" parser crashed with segfault for certain payloads. #39 #40 6 | 7 | # [3.0.1] 2018-05-22 8 | 9 | ## Fixed 10 | 11 | * Workaround to `enif_inspect_binary` returning corrupted data #36 12 | * Allow to encode other stream elements like `streamstart` and `streamend` #34 13 | 14 | ## Added 15 | 16 | * New API to query elements with specific attribute - #31 17 | This includes path queries as well. 18 | 19 | # [3.0.0] 2018-05-04 20 | 21 | ## Changed 22 | 23 | * Replaced expat with RapidXML 24 | 25 | # [2.5.0] 2018-05-04 26 | 27 | ## Added 28 | 29 | * A new API to query elements with specific namespaces. Path querying is extended with new selectors as well. 30 | * Child element size (in bytes) limit may be configured. 31 | * Stream opening tag is now configurable. 32 | 33 | ## Changed 34 | 35 | * Testable with `rebar3`; cover is now enabled. 36 | 37 | ## Misc 38 | 39 | * C sources reformatting. 40 | 41 | # [2.4.1] 2016-12-17 42 | 43 | ## Changed 44 | 45 | - C code building on OS X and FreeBSD 46 | 47 | # [2.4.0] 2016-10-07 48 | 49 | ## Added 50 | 51 | - support for Erlang/OTP 17 to 19 52 | 53 | ## Changed 54 | 55 | - improved integration with `dialyzer` 56 | - improved integration with `rebar3` 57 | 58 | # [2.3.0] 2016-05-19 59 | 60 | ## Removed 61 | 62 | - support for single `#xmlel` as a child 63 | 64 | # [2.2.0] 2015-10-09 65 | 66 | ## Added 67 | 68 | - escaping attr value: #10 69 | - escaping data tags: #14 70 | 71 | ## Changed 72 | 73 | - improved type specs: #14 74 | - improved binary allocation: #14 75 | - improved stanza size calculation: #14 76 | 77 | # [2.1.5] 2014-09-29 78 | 79 | ## Fixed 80 | 81 | - memory leak in `exml_event`: #8 by @RGafiyatullin 82 | 83 | # [2.1.4] 2014-05-12 84 | 85 | ## Added 86 | 87 | - support for XMPP over WebSockets as in [RFC 7395](https://tools.ietf.org/html/rfc7395) 88 | 89 | # 2.1.0 90 | 91 | - Pretty printing XML elements 92 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | -------------------------------------------------------------------------------- /LICENSE.rapidxml: -------------------------------------------------------------------------------- 1 | Use of this software is granted under one of the following two licenses, 2 | to be chosen freely by the user. 3 | 4 | 1. Boost Software License - Version 1.0 - August 17th, 2003 5 | =============================================================================== 6 | 7 | Copyright (c) 2006, 2007 Marcin Kalicinski 8 | 9 | Permission is hereby granted, free of charge, to any person or organization 10 | obtaining a copy of the software and accompanying documentation covered by 11 | this license (the "Software") to use, reproduce, display, distribute, 12 | execute, and transmit the Software, and to prepare derivative works of the 13 | Software, and to permit third-parties to whom the Software is furnished to 14 | do so, all subject to the following: 15 | 16 | The copyright notices in the Software and this entire statement, including 17 | the above license grant, this restriction and the following disclaimer, 18 | must be included in all copies of the Software, in whole or in part, and 19 | all derivative works of the Software, unless such copies or derivative 20 | works are solely in the form of machine-executable object code generated by 21 | a source language processor. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 26 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 27 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 28 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 29 | DEALINGS IN THE SOFTWARE. 30 | 31 | 2. The MIT License 32 | =============================================================================== 33 | 34 | Copyright (c) 2006, 2007 Marcin Kalicinski 35 | 36 | Permission is hereby granted, free of charge, to any person obtaining a copy 37 | of this software and associated documentation files (the "Software"), to deal 38 | in the Software without restriction, including without limitation the rights 39 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 40 | of the Software, and to permit persons to whom the Software is furnished to do so, 41 | subject to the following conditions: 42 | 43 | The above copyright notice and this permission notice shall be included in all 44 | copies or substantial portions of the Software. 45 | 46 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 47 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 48 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 49 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 50 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 51 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 52 | IN THE SOFTWARE. 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | exml 2 | ==== 3 | 4 | [![Actions Status](https://github.com/esl/exml/actions/workflows/ci.yml/badge.svg)](https://github.com/esl/exml/actions/workflows/ci.yml) 5 | [![Codecov](https://codecov.io/gh/esl/exml/branch/master/graph/badge.svg)](https://codecov.io/gh/esl/exml) 6 | [![Hex pm](https://img.shields.io/hexpm/v/hexml.svg)](https://hex.pm/packages/hexml) 7 | [![Hex Docs](https://img.shields.io/badge/hex-docs-lightgreen.svg)](https://hexdocs.pm/hexml/) 8 | [![Downloads](https://img.shields.io/hexpm/dt/hexml.svg)](https://hex.pm/packages/hexml) 9 | [![License](https://img.shields.io/hexpm/l/hexml.svg)](https://github.com/esl/hexml/blob/master/LICENSE) 10 | 11 | **exml** is an Erlang library for parsing XML streams and doing complex XML structures manipulation. 12 | 13 | Building 14 | ======== 15 | 16 | **exml** is a rebar3-compatible OTP application, run `make` or `./rebar3 compile` in order to build it. A C++11 compiler is required. 17 | 18 | 19 | Using 20 | ===== 21 | 22 | **exml** can parse both XML streams as well as single XML documents at once. 23 | 24 | To parse a whole XML document: 25 | 26 | ```erlang 27 | {ok, Parser} = exml:parse(<<"">>). 28 | ``` 29 | 30 | To generate an XML document from Erlang terms: 31 | 32 | ```erlang 33 | El = #xmlel{name = <<"foo">>, 34 | attrs = [{<<"attr1">>, <<"bar">>}], 35 | children = [{xmlcdata, <<"Some Value">>}]}, 36 | exml:to_list(El). 37 | ``` 38 | or (pastable into `erl` shell): 39 | ```erlang 40 | El = {xmlel, <<"foo">>, 41 | [{<<"attr1">>, <<"bar">>}], 42 | [{xmlcdata, <<"Some Value">>}]}. 43 | exml:to_list(El). 44 | ``` 45 | 46 | Which results in: 47 | ```xml 48 | Some Value 49 | ``` 50 | 51 | `exml:to_binary/1` works similarly. 52 | 53 | There's also `exml:to_pretty_iolist/1,3` for a quick'n'dirty document preview (pastable into `erl`): 54 | 55 | ```erlang 56 | rr("include/exml.hrl"). 57 | El = #xmlel{name = <<"outer">>, 58 | attrs = [{<<"attr1">>, <<"val1">>}, 59 | {<<"attr2">>, <<"val-two">>}], 60 | children = [#xmlel{name = <<"inner-childless">>}, 61 | #xmlel{name = <<"inner-w-children">>, 62 | children = [#xmlel{name = <<"a">>}]}]}. 63 | io:format("~s", [exml:to_pretty_iolist(El)]). 64 | ``` 65 | which prints: 66 | ```xml 67 | 68 | 69 | 70 | 71 | 72 | 73 | ``` 74 | 75 | For an example of using the streaming API see `test/exml_stream_tests.erl`. 76 | 77 | XML Tree navigation 78 | ===== 79 | 80 | The `exml_query` module exposes powerful helper functions to navigate the tree, please refer to the documentation available. 81 | 82 | 83 | Notes 84 | ===== 85 | 86 | The implementation uses C++ thread-local memory pools of size 10MB by default (override `RAPIDXML_STATIC_POOL_SIZE` and/or `RAPIDXML_DYNAMIC_POOL_SIZE` at compilation time if desired differently), to maximise cache locality and memory allocation patterns. To also improve performance, the NIF calls are not checking input size, nor timeslicing themselves, nor running in dirty schedulers: that means that if called with too big inputs, the NIFs can starve the VM. It's up to the dev to throttle the input sizes and fine-tune the memory pool sizes. 87 | -------------------------------------------------------------------------------- /c_src/exml.cpp: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDXML_STATIC_POOL_SIZE 2 | #define RAPIDXML_STATIC_POOL_SIZE (10 * 1024 * 1024) 3 | #endif 4 | #ifndef RAPIDXML_DYNAMIC_POOL_SIZE 5 | #define RAPIDXML_DYNAMIC_POOL_SIZE (2 * 1024 * 1024) 6 | #endif 7 | 8 | #include "rapidxml.hpp" 9 | #include "rapidxml_print.hpp" 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using ustring = std::vector; 21 | 22 | class xml_document { 23 | public: 24 | struct ParseResult { 25 | bool eof = false; 26 | bool has_error = false; 27 | std::string error_message; 28 | const unsigned char *rest = nullptr; 29 | }; 30 | 31 | template 32 | ParseResult parse(unsigned char *text, xml_document &parent) { 33 | return with_error_handling( 34 | [&] { return impl.parse(text, parent.impl); }); 35 | } 36 | 37 | template ParseResult parse(unsigned char *text) { 38 | return with_error_handling([&] { return impl.parse(text); }); 39 | } 40 | 41 | void clear() { impl.clear(); } 42 | 43 | rapidxml::xml_document impl; 44 | 45 | private: 46 | template ParseResult with_error_handling(F &&f) { 47 | ParseResult result; 48 | try { 49 | result.rest = std::forward(f)(); 50 | } catch (const rapidxml::eof_error &e) { 51 | result.eof = true; 52 | result.has_error = true; 53 | result.error_message = e.what(); 54 | } catch (const rapidxml::parse_error &e) { 55 | result.has_error = true; 56 | result.error_message = e.what(); 57 | } 58 | return result; 59 | } 60 | }; 61 | 62 | namespace { 63 | ERL_NIF_TERM atom_ok; 64 | ERL_NIF_TERM atom_error; 65 | ERL_NIF_TERM atom_undefined; 66 | ERL_NIF_TERM atom_xmlel; 67 | ERL_NIF_TERM atom_xmlcdata; 68 | ERL_NIF_TERM atom_xmlstreamstart; 69 | ERL_NIF_TERM atom_xmlstreamend; 70 | ERL_NIF_TERM atom_pretty; 71 | ERL_NIF_TERM atom_escaped; 72 | ERL_NIF_TERM atom_cdata; 73 | ERL_NIF_TERM atom_true; 74 | constexpr const unsigned char EMPTY[1] = {0}; 75 | 76 | xml_document &get_static_doc() { 77 | static thread_local xml_document doc; 78 | doc.clear(); 79 | return doc; 80 | } 81 | 82 | } // namespace 83 | 84 | struct Parser { 85 | ustring stream_tag; 86 | std::uint64_t max_element_size = 0; 87 | bool infinite_stream = false; 88 | 89 | static thread_local std::vector buffer; 90 | static thread_local std::vector term_buffer; 91 | 92 | bool copy_buffer(ErlNifEnv *env, ERL_NIF_TERM buf) { 93 | buffer.clear(); 94 | 95 | ErlNifBinary bin; 96 | if (enif_inspect_binary(env, buf, &bin)) { 97 | buffer.insert(buffer.end(), bin.data, bin.data + bin.size); 98 | } else if (enif_is_list(env, buf)) { 99 | for (ERL_NIF_TERM head; enif_get_list_cell(env, buf, &head, &buf);) { 100 | if (!enif_inspect_binary(env, head, &bin)) 101 | return false; 102 | 103 | buffer.insert(buffer.end(), bin.data, bin.data + bin.size); 104 | } 105 | } else { 106 | return false; 107 | } 108 | 109 | buffer.push_back('\0'); 110 | return true; 111 | } 112 | 113 | void reset() { 114 | stream_tag.clear(); 115 | buffer.clear(); 116 | } 117 | }; 118 | 119 | thread_local std::vector Parser::buffer; 120 | thread_local std::vector Parser::term_buffer; 121 | 122 | struct ParseCtx { 123 | ErlNifEnv *env; 124 | Parser *parser; 125 | }; 126 | 127 | namespace { 128 | ErlNifResourceType *parser_type; 129 | 130 | constexpr int default_parse_flags() { 131 | return rapidxml::parse_no_string_terminators | rapidxml::parse_validate_control_chars; 132 | } 133 | 134 | constexpr int parse_one() { 135 | return rapidxml::parse_parse_one | default_parse_flags(); 136 | } 137 | 138 | constexpr int parse_open_only() { 139 | return rapidxml::parse_open_only | default_parse_flags(); 140 | } 141 | 142 | ERL_NIF_TERM to_subbinary(ParseCtx &ctx, const unsigned char *text, 143 | std::size_t len) { 144 | ERL_NIF_TERM binary; 145 | unsigned char *bin_data = enif_make_new_binary(ctx.env, len, &binary); 146 | std::copy(text, text + len, bin_data); 147 | return binary; 148 | } 149 | 150 | ERL_NIF_TERM get_xmlcdata(ParseCtx &ctx, 151 | rapidxml::xml_node *node) { 152 | return enif_make_tuple3(ctx.env, atom_xmlcdata, 153 | to_subbinary(ctx, node->value(), node->value_size()), 154 | atom_escaped); 155 | } 156 | 157 | ERL_NIF_TERM merge_data_nodes(ParseCtx &ctx, 158 | rapidxml::xml_node *node, 159 | std::size_t total_size) { 160 | ERL_NIF_TERM bin; 161 | unsigned char *it = enif_make_new_binary(ctx.env, total_size, &bin); 162 | 163 | while (total_size > 0) { 164 | it = std::copy(node->value(), node->value() + node->value_size(), it); 165 | total_size -= node->value_size(); 166 | node = node->next_sibling(); 167 | } 168 | 169 | return enif_make_tuple3(ctx.env, atom_xmlcdata, bin, atom_escaped); 170 | } 171 | 172 | void append_pending_data_nodes(ParseCtx &ctx, 173 | std::vector &children, 174 | rapidxml::xml_node *node, 175 | const std::size_t pending) { 176 | if (pending == 0) 177 | return; 178 | 179 | if (pending == node->value_size()) 180 | children.push_back(get_xmlcdata(ctx, node)); 181 | else 182 | children.push_back(merge_data_nodes(ctx, node, pending)); 183 | } 184 | 185 | ERL_NIF_TERM make_xmlel(ParseCtx &ctx, rapidxml::xml_node *node); 186 | 187 | ERL_NIF_TERM get_children_tuple(ParseCtx &ctx, 188 | rapidxml::xml_node *node) { 189 | std::vector &children = Parser::term_buffer; 190 | std::size_t begin = children.size(); 191 | 192 | rapidxml::xml_node *first_data_node = nullptr; 193 | std::size_t pending_data_size = 0; 194 | 195 | for (rapidxml::xml_node *child = node->first_node(); child; 196 | child = child->next_sibling()) { 197 | const bool is_data_node = child->type() == rapidxml::node_data || 198 | child->type() == rapidxml::node_cdata; 199 | 200 | if (is_data_node) { 201 | if (pending_data_size == 0) 202 | first_data_node = child; 203 | pending_data_size += child->value_size(); 204 | } else { 205 | append_pending_data_nodes(ctx, children, first_data_node, 206 | pending_data_size); 207 | pending_data_size = 0; 208 | if (child->type() == rapidxml::node_element) 209 | children.push_back(make_xmlel(ctx, child)); 210 | } 211 | } 212 | 213 | append_pending_data_nodes(ctx, children, first_data_node, pending_data_size); 214 | 215 | std::size_t size = children.size() - begin; 216 | if (size == 0) 217 | return enif_make_list(ctx.env, 0); 218 | 219 | ERL_NIF_TERM arr = 220 | enif_make_list_from_array(ctx.env, children.data() + begin, size); 221 | children.erase(children.end() - size, children.end()); 222 | return arr; 223 | } 224 | 225 | std::pair 226 | node_name(rapidxml::xml_node *node) { 227 | const unsigned char *start = node->name(); 228 | std::size_t len = node->name_size(); 229 | if (node->prefix()) { 230 | start = node->prefix(); 231 | len += node->prefix_size() + 1; 232 | } 233 | return {start, len}; 234 | } 235 | 236 | ERL_NIF_TERM make_node_name_binary(ParseCtx &ctx, 237 | rapidxml::xml_node *node) { 238 | const unsigned char *start; 239 | std::size_t len; 240 | std::tie(start, len) = node_name(node); 241 | return to_subbinary(ctx, start, len); 242 | } 243 | 244 | ERL_NIF_TERM make_attr_tuple(ParseCtx &ctx, 245 | rapidxml::xml_attribute *attr) { 246 | ERL_NIF_TERM name = to_subbinary(ctx, attr->name(), attr->name_size()); 247 | ERL_NIF_TERM value = to_subbinary(ctx, attr->value(), attr->value_size()); 248 | return enif_make_tuple2(ctx.env, name, value); 249 | } 250 | 251 | ERL_NIF_TERM get_attributes(ParseCtx &ctx, rapidxml::xml_node *node) { 252 | ERL_NIF_TERM attrs_term = enif_make_new_map(ctx.env); 253 | 254 | for (rapidxml::xml_attribute *attr = node->first_attribute(); 255 | attr; attr = attr->next_attribute()) { 256 | ERL_NIF_TERM key = to_subbinary(ctx, attr->name(), attr->name_size()); 257 | ERL_NIF_TERM value = to_subbinary(ctx, attr->value(), attr->value_size()); 258 | enif_make_map_put(ctx.env, attrs_term, key, value, &attrs_term); 259 | } 260 | 261 | return attrs_term; 262 | } 263 | 264 | ERL_NIF_TERM make_stream_start_tuple(ParseCtx &ctx, 265 | rapidxml::xml_node *node) { 266 | 267 | ERL_NIF_TERM name_term = make_node_name_binary(ctx, node); 268 | ERL_NIF_TERM attrs_term = get_attributes(ctx, node); 269 | return enif_make_tuple3(ctx.env, atom_xmlstreamstart, name_term, attrs_term); 270 | } 271 | 272 | ERL_NIF_TERM make_stream_end_tuple(ParseCtx &ctx) { 273 | ERL_NIF_TERM name; 274 | unsigned char *data = 275 | enif_make_new_binary(ctx.env, ctx.parser->stream_tag.size(), &name); 276 | 277 | std::copy(ctx.parser->stream_tag.begin(), ctx.parser->stream_tag.end(), data); 278 | 279 | return enif_make_tuple2(ctx.env, atom_xmlstreamend, name); 280 | } 281 | 282 | ERL_NIF_TERM make_xmlel(ParseCtx &ctx, 283 | rapidxml::xml_node *node) { 284 | ERL_NIF_TERM name_term = make_node_name_binary(ctx, node); 285 | ERL_NIF_TERM attrs_term = get_attributes(ctx, node); 286 | ERL_NIF_TERM children_term = get_children_tuple(ctx, node); 287 | return enif_make_tuple4(ctx.env, atom_xmlel, name_term, attrs_term, children_term); 288 | } 289 | 290 | bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children, 291 | rapidxml::xml_node &node); 292 | 293 | bool build_cdata(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[], 294 | rapidxml::xml_node &node) { 295 | ErlNifBinary bin; 296 | if (!enif_inspect_iolist_as_binary(env, elem[1], &bin)) 297 | return false; 298 | 299 | rapidxml::node_type cdata_type; 300 | if (enif_compare(atom_escaped, elem[2]) == 0) 301 | cdata_type = rapidxml::node_data; 302 | else if (enif_compare(atom_cdata, elem[2]) == 0) 303 | cdata_type = rapidxml::node_cdata; 304 | else 305 | return false; 306 | 307 | auto child = doc.impl.allocate_node(cdata_type); 308 | child->value(bin.size > 0 ? bin.data : EMPTY, bin.size); 309 | node.append_node(child); 310 | return true; 311 | } 312 | 313 | bool build_attrs(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM attrs, 314 | rapidxml::xml_node &node) { 315 | 316 | if (!enif_is_map(env, attrs)) 317 | return false; 318 | 319 | ErlNifMapIterator iter; 320 | enif_map_iterator_create(env, attrs, &iter, ERL_NIF_MAP_ITERATOR_FIRST); 321 | 322 | ERL_NIF_TERM map_key, map_value; 323 | while (enif_map_iterator_get_pair(env, &iter, &map_key, &map_value)) { 324 | ErlNifBinary key, value; 325 | if (!enif_inspect_iolist_as_binary(env, map_key, &key)) 326 | return false; 327 | 328 | if (!enif_inspect_iolist_as_binary(env, map_value, &value)) 329 | return false; 330 | 331 | auto attr = doc.impl.allocate_attribute(key.size > 0 ? key.data : EMPTY, 332 | value.size > 0 ? value.data : EMPTY, 333 | key.size, value.size); 334 | node.append_attribute(attr); 335 | enif_map_iterator_next(env, &iter); 336 | } 337 | enif_map_iterator_destroy(env, &iter); 338 | 339 | return true; 340 | } 341 | 342 | bool build_el(ErlNifEnv *env, xml_document &doc, const ERL_NIF_TERM elem[], 343 | rapidxml::xml_node &node) { 344 | ErlNifBinary name; 345 | if (!enif_inspect_binary(env, elem[1], &name)) 346 | return false; 347 | 348 | auto child = doc.impl.allocate_node(rapidxml::node_element); 349 | child->name(name.size > 0 ? name.data : EMPTY, name.size); 350 | node.append_node(child); 351 | 352 | if (!build_attrs(env, doc, elem[2], *child)) 353 | return false; 354 | if (!build_children(env, doc, elem[3], *child)) 355 | return false; 356 | 357 | return true; 358 | } 359 | 360 | bool build_child(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM child, 361 | rapidxml::xml_node &node) { 362 | int arity; 363 | const ERL_NIF_TERM *tuple; 364 | if (!enif_get_tuple(env, child, &arity, &tuple)) 365 | return false; 366 | 367 | if (arity == 3 && enif_compare(atom_xmlcdata, tuple[0]) == 0) { 368 | if (!build_cdata(env, doc, tuple, node)) 369 | return false; 370 | } else if (arity == 4 && enif_compare(atom_xmlel, tuple[0]) == 0) { 371 | if (!build_el(env, doc, tuple, node)) 372 | return false; 373 | } else { 374 | return false; 375 | } 376 | 377 | return true; 378 | } 379 | 380 | bool build_children(ErlNifEnv *env, xml_document &doc, ERL_NIF_TERM children, 381 | rapidxml::xml_node &node) { 382 | 383 | if (!enif_is_list(env, children)) 384 | return false; 385 | 386 | for (ERL_NIF_TERM head; 387 | enif_get_list_cell(env, children, &head, &children);) { 388 | if (!build_child(env, doc, head, node)) 389 | return false; 390 | } 391 | 392 | return true; 393 | } 394 | 395 | ERL_NIF_TERM node_to_binary(ErlNifEnv *env, 396 | rapidxml::xml_node &node, 397 | int flags) { 398 | static thread_local std::vector print_buffer; 399 | print_buffer.clear(); 400 | 401 | rapidxml::print(std::back_inserter(print_buffer), node, flags); 402 | 403 | ERL_NIF_TERM ret_binary; 404 | unsigned char *data = 405 | enif_make_new_binary(env, print_buffer.size(), &ret_binary); 406 | std::copy(print_buffer.begin(), print_buffer.end(), data); 407 | return ret_binary; 408 | } 409 | 410 | std::size_t stream_closing_tag_size(Parser *parser) { 411 | return 3 + parser->stream_tag.size(); // name + 412 | } 413 | 414 | bool has_stream_closing_tag(Parser *parser, std::size_t offset) { 415 | if (Parser::buffer.size() < offset + stream_closing_tag_size(parser)) 416 | return false; 417 | 418 | if (Parser::buffer[offset] != '<' || Parser::buffer[offset + 1] != '/') 419 | return false; 420 | 421 | if (!std::equal(parser->stream_tag.begin(), parser->stream_tag.end(), 422 | Parser::buffer.begin() + offset + 2)) 423 | return false; 424 | 425 | // skip whitespace between tag name and closing '>' 426 | offset = offset + 2 + parser->stream_tag.size(); 427 | while (offset < Parser::buffer.size() - 1 && 428 | std::isspace(Parser::buffer[offset])) 429 | ++offset; 430 | 431 | return Parser::buffer[offset] == '>'; 432 | } 433 | 434 | } // namespace 435 | 436 | extern "C" { 437 | static void delete_parser(ErlNifEnv *, void *parser) { 438 | static_cast(parser)->~Parser(); 439 | } 440 | 441 | static int load(ErlNifEnv *env, void **, ERL_NIF_TERM) { 442 | parser_type = enif_open_resource_type( 443 | env, "exml_nif", "parser", &delete_parser, ERL_NIF_RT_CREATE, nullptr); 444 | atom_ok = enif_make_atom(env, "ok"); 445 | atom_error = enif_make_atom(env, "error"); 446 | atom_undefined = enif_make_atom(env, "undefined"); 447 | atom_xmlel = enif_make_atom(env, "xmlel"); 448 | atom_xmlcdata = enif_make_atom(env, "xmlcdata"); 449 | atom_xmlstreamstart = enif_make_atom(env, "xmlstreamstart"); 450 | atom_xmlstreamend = enif_make_atom(env, "xmlstreamend"); 451 | atom_pretty = enif_make_atom(env, "pretty"); 452 | atom_escaped = enif_make_atom(env, "escaped"); 453 | atom_cdata = enif_make_atom(env, "cdata"); 454 | atom_true = enif_make_atom(env, "true"); 455 | 456 | get_static_doc().impl.set_allocator(enif_alloc, enif_free); 457 | 458 | return 0; 459 | } 460 | 461 | static void unload(ErlNifEnv *, void *) { 462 | return; 463 | } 464 | 465 | static ERL_NIF_TERM create(ErlNifEnv *env, int, 466 | const ERL_NIF_TERM argv[]) { 467 | void *mem = enif_alloc_resource(parser_type, sizeof(Parser)); 468 | Parser *parser = new (mem) Parser; 469 | 470 | ErlNifUInt64 max_element_size; 471 | if (!enif_get_uint64(env, argv[0], &max_element_size)) 472 | return enif_make_badarg(env); 473 | parser->max_element_size = static_cast(max_element_size); 474 | if (enif_compare(atom_true, argv[1]) == 0) 475 | parser->infinite_stream = true; 476 | 477 | ERL_NIF_TERM term = enif_make_resource(env, parser); 478 | enif_release_resource(parser); 479 | return enif_make_tuple2(env, atom_ok, term); 480 | } 481 | 482 | static ERL_NIF_TERM parse_next(ErlNifEnv *env, int, 483 | const ERL_NIF_TERM argv[]) { 484 | Parser *parser; 485 | if (!enif_get_resource(env, argv[0], parser_type, 486 | reinterpret_cast(&parser))) 487 | return enif_make_badarg(env); 488 | 489 | if (!parser->copy_buffer(env, argv[1])) 490 | return enif_make_badarg(env); 491 | 492 | // Skip initial whitespace even if we don't manage to parse anything. 493 | // Also needed for has_stream_closing_tag to recognize the tag. 494 | std::size_t offset = 0; 495 | while (offset < Parser::buffer.size() - 1 && 496 | std::isspace(Parser::buffer[offset])) 497 | ++offset; 498 | 499 | ParseCtx ctx{env, parser}; 500 | xml_document::ParseResult result; 501 | ERL_NIF_TERM element; 502 | const char *error_msg = nullptr; 503 | 504 | xml_document &doc = get_static_doc(); 505 | Parser::term_buffer.clear(); 506 | 507 | auto parseStreamOpen = [&] { 508 | result = doc.parse(Parser::buffer.data() + offset); 509 | if (!result.has_error) { 510 | if (parser->max_element_size && 511 | result.rest - Parser::buffer.data() - offset > parser->max_element_size) { 512 | error_msg = "element too big"; 513 | } else { 514 | auto name_tag = node_name(doc.impl.first_node()); 515 | parser->stream_tag = ustring(std::get<0>(name_tag), std::get<0>(name_tag) + std::get<1>(name_tag)); 516 | element = make_stream_start_tuple(ctx, doc.impl.first_node()); 517 | } 518 | } 519 | }; 520 | 521 | auto hasStreamReopen = [&] { 522 | auto parseOpenRes = 523 | doc.parse(Parser::buffer.data() + offset); 524 | if (parseOpenRes.has_error) 525 | return false; 526 | auto tag_name = node_name(doc.impl.first_node()); 527 | return ustring(std::get<0>(tag_name), std::get<0>(tag_name) + std::get<1>(tag_name)) == 528 | parser->stream_tag; 529 | }; 530 | 531 | auto parseElement = [&] { 532 | result = doc.parse(Parser::buffer.data() + offset); 533 | if (!result.has_error) { 534 | if (parser->max_element_size && 535 | result.rest - Parser::buffer.data() - offset > parser->max_element_size) { 536 | error_msg = "element too big"; 537 | } else { 538 | element = make_xmlel(ctx, doc.impl.first_node()); 539 | } 540 | } 541 | }; 542 | 543 | if (parser->infinite_stream) { 544 | parseElement(); 545 | } else if (parser->stream_tag.empty()) { 546 | parseStreamOpen(); 547 | } else if (has_stream_closing_tag(parser, offset)) { 548 | doc.clear(); 549 | // no data after closing tag 550 | result.rest = &*Parser::buffer.rbegin(); 551 | element = make_stream_end_tuple(ctx); 552 | } else { 553 | parseElement(); 554 | } 555 | 556 | if (result.eof && hasStreamReopen()) { 557 | doc.clear(); 558 | parseStreamOpen(); 559 | } 560 | 561 | if (result.eof) { 562 | // Return an error if an incomplete element has at least max_element_size characters. 563 | if (parser->max_element_size && 564 | Parser::buffer.size() - offset > parser->max_element_size) { 565 | error_msg = "element too big"; 566 | } else { 567 | result.rest = Parser::buffer.data() + offset; 568 | element = atom_undefined; 569 | } 570 | } else if (result.has_error) { 571 | error_msg = result.error_message.c_str(); 572 | } 573 | 574 | if (!error_msg) { 575 | // Return an error when null character is found. 576 | std::size_t rest_size = &Parser::buffer.back() - result.rest; 577 | if (std::strlen(reinterpret_cast(result.rest)) != rest_size) 578 | error_msg = "null character found in buffer"; 579 | } 580 | 581 | if (error_msg) { 582 | ERL_NIF_TERM error_message = 583 | to_subbinary(ctx, (const unsigned char *)error_msg, strlen(error_msg)); 584 | 585 | return enif_make_tuple2(env, atom_error, error_message); 586 | } 587 | 588 | return enif_make_tuple3( 589 | env, atom_ok, element, 590 | enif_make_uint64(env, result.rest - Parser::buffer.data())); 591 | } 592 | 593 | static ERL_NIF_TERM parse(ErlNifEnv *env, int, const ERL_NIF_TERM argv[]) { 594 | Parser parser; 595 | parser.copy_buffer(env, argv[0]); 596 | Parser::term_buffer.clear(); 597 | 598 | auto &doc = get_static_doc(); 599 | 600 | ParseCtx ctx{env, &parser}; 601 | auto result = doc.parse(Parser::buffer.data()); 602 | 603 | if (!result.has_error) { 604 | ERL_NIF_TERM element = make_xmlel(ctx, doc.impl.first_node()); 605 | return enif_make_tuple2(env, atom_ok, element); 606 | } 607 | 608 | ERL_NIF_TERM error_message = 609 | to_subbinary(ctx, 610 | (const unsigned char *)result.error_message.c_str(), 611 | result.error_message.size()); 612 | 613 | return enif_make_tuple2(env, atom_error, error_message); 614 | } 615 | 616 | static ERL_NIF_TERM escape_cdata(ErlNifEnv *env, int, 617 | const ERL_NIF_TERM argv[]) { 618 | ErlNifBinary bin; 619 | if (!enif_inspect_iolist_as_binary(env, argv[0], &bin)) 620 | return enif_make_badarg(env); 621 | 622 | rapidxml::node_type cdata_type; 623 | if (enif_compare(atom_escaped, argv[1]) == 0) 624 | cdata_type = rapidxml::node_data; 625 | else if (enif_compare(atom_cdata, argv[1]) == 0) 626 | cdata_type = rapidxml::node_cdata; 627 | else 628 | return enif_make_badarg(env); 629 | 630 | rapidxml::xml_node node(cdata_type); 631 | node.value(bin.data, bin.size); 632 | return node_to_binary(env, node, rapidxml::print_no_indenting); 633 | } 634 | 635 | static ERL_NIF_TERM to_binary(ErlNifEnv *env, int, 636 | const ERL_NIF_TERM argv[]) { 637 | int arity; 638 | const ERL_NIF_TERM *xmlel; 639 | if (!enif_get_tuple(env, argv[0], &arity, &xmlel)) 640 | return enif_make_badarg(env); 641 | 642 | if (arity != 4 || enif_compare(atom_xmlel, xmlel[0]) != 0) 643 | return enif_make_badarg(env); 644 | 645 | int flags = rapidxml::print_no_indenting; 646 | if (enif_compare(atom_pretty, argv[1]) == 0) 647 | flags = 0; 648 | 649 | xml_document &doc = get_static_doc(); 650 | if (!build_el(env, doc, xmlel, doc.impl)) 651 | return enif_make_badarg(env); 652 | 653 | return node_to_binary(env, doc.impl, flags); 654 | } 655 | 656 | static ERL_NIF_TERM reset_parser(ErlNifEnv *env, int, 657 | const ERL_NIF_TERM argv[]) { 658 | Parser *parser; 659 | if (!enif_get_resource(env, argv[0], parser_type, 660 | reinterpret_cast(&parser))) 661 | return enif_make_badarg(env); 662 | 663 | parser->reset(); 664 | return atom_ok; 665 | } 666 | 667 | static ErlNifFunc nif_funcs[] = { 668 | {"create", 2, create, 0}, {"parse", 1, parse, 0}, 669 | {"parse_next", 2, parse_next, 0}, {"escape_cdata", 2, escape_cdata, 0}, 670 | {"to_binary", 2, to_binary, 0}, {"reset_parser", 1, reset_parser, 0}}; 671 | } 672 | 673 | ERL_NIF_INIT(exml_nif, nif_funcs, &load, nullptr, nullptr, &unload) 674 | -------------------------------------------------------------------------------- /c_src/rapidxml_iterators.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDXML_ITERATORS_HPP_INCLUDED 2 | #define RAPIDXML_ITERATORS_HPP_INCLUDED 3 | 4 | // Copyright (C) 2006, 2009 Marcin Kalicinski 5 | // Version 1.13 6 | // Revision $DateTime: 2009/05/13 01:46:17 $ 7 | //! \file rapidxml_iterators.hpp This file contains rapidxml iterators 8 | 9 | #include "rapidxml.hpp" 10 | 11 | namespace rapidxml 12 | { 13 | 14 | //! Iterator of child nodes of xml_node 15 | template 16 | class node_iterator 17 | { 18 | 19 | public: 20 | 21 | typedef typename xml_node value_type; 22 | typedef typename xml_node &reference; 23 | typedef typename xml_node *pointer; 24 | typedef std::ptrdiff_t difference_type; 25 | typedef std::bidirectional_iterator_tag iterator_category; 26 | 27 | node_iterator() 28 | : m_node(0) 29 | { 30 | } 31 | 32 | node_iterator(xml_node *node) 33 | : m_node(node->first_node()) 34 | { 35 | } 36 | 37 | reference operator *() const 38 | { 39 | assert(m_node); 40 | return *m_node; 41 | } 42 | 43 | pointer operator->() const 44 | { 45 | assert(m_node); 46 | return m_node; 47 | } 48 | 49 | node_iterator& operator++() 50 | { 51 | assert(m_node); 52 | m_node = m_node->next_sibling(); 53 | return *this; 54 | } 55 | 56 | node_iterator operator++(int) 57 | { 58 | node_iterator tmp = *this; 59 | ++this; 60 | return tmp; 61 | } 62 | 63 | node_iterator& operator--() 64 | { 65 | assert(m_node && m_node->previous_sibling()); 66 | m_node = m_node->previous_sibling(); 67 | return *this; 68 | } 69 | 70 | node_iterator operator--(int) 71 | { 72 | node_iterator tmp = *this; 73 | ++this; 74 | return tmp; 75 | } 76 | 77 | bool operator ==(const node_iterator &rhs) 78 | { 79 | return m_node == rhs.m_node; 80 | } 81 | 82 | bool operator !=(const node_iterator &rhs) 83 | { 84 | return m_node != rhs.m_node; 85 | } 86 | 87 | private: 88 | 89 | xml_node *m_node; 90 | 91 | }; 92 | 93 | //! Iterator of child attributes of xml_node 94 | template 95 | class attribute_iterator 96 | { 97 | 98 | public: 99 | 100 | typedef typename xml_attribute value_type; 101 | typedef typename xml_attribute &reference; 102 | typedef typename xml_attribute *pointer; 103 | typedef std::ptrdiff_t difference_type; 104 | typedef std::bidirectional_iterator_tag iterator_category; 105 | 106 | attribute_iterator() 107 | : m_attribute(0) 108 | { 109 | } 110 | 111 | attribute_iterator(xml_node *node) 112 | : m_attribute(node->first_attribute()) 113 | { 114 | } 115 | 116 | reference operator *() const 117 | { 118 | assert(m_attribute); 119 | return *m_attribute; 120 | } 121 | 122 | pointer operator->() const 123 | { 124 | assert(m_attribute); 125 | return m_attribute; 126 | } 127 | 128 | attribute_iterator& operator++() 129 | { 130 | assert(m_attribute); 131 | m_attribute = m_attribute->next_attribute(); 132 | return *this; 133 | } 134 | 135 | attribute_iterator operator++(int) 136 | { 137 | attribute_iterator tmp = *this; 138 | ++this; 139 | return tmp; 140 | } 141 | 142 | attribute_iterator& operator--() 143 | { 144 | assert(m_attribute && m_attribute->previous_attribute()); 145 | m_attribute = m_attribute->previous_attribute(); 146 | return *this; 147 | } 148 | 149 | attribute_iterator operator--(int) 150 | { 151 | attribute_iterator tmp = *this; 152 | ++this; 153 | return tmp; 154 | } 155 | 156 | bool operator ==(const attribute_iterator &rhs) 157 | { 158 | return m_attribute == rhs.m_attribute; 159 | } 160 | 161 | bool operator !=(const attribute_iterator &rhs) 162 | { 163 | return m_attribute != rhs.m_attribute; 164 | } 165 | 166 | private: 167 | 168 | xml_attribute *m_attribute; 169 | 170 | }; 171 | 172 | } 173 | 174 | #endif 175 | -------------------------------------------------------------------------------- /c_src/rapidxml_print.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDXML_PRINT_HPP_INCLUDED 2 | #define RAPIDXML_PRINT_HPP_INCLUDED 3 | 4 | // Copyright (C) 2006, 2009 Marcin Kalicinski 5 | // Version 1.13 6 | // Revision $DateTime: 2009/05/13 01:46:17 $ 7 | //! \file rapidxml_print.hpp This file contains rapidxml printer implementation 8 | 9 | #include "rapidxml.hpp" 10 | 11 | // Only include streams if not disabled 12 | #ifndef RAPIDXML_NO_STREAMS 13 | #include 14 | #include 15 | #endif 16 | 17 | namespace rapidxml 18 | { 19 | 20 | /////////////////////////////////////////////////////////////////////// 21 | // Printing flags 22 | 23 | const int print_no_indenting = 0x1; //!< Printer flag instructing the printer to suppress indenting of XML. See print() function. 24 | 25 | /////////////////////////////////////////////////////////////////////// 26 | // Internal 27 | 28 | //! \cond internal 29 | namespace internal 30 | { 31 | 32 | /////////////////////////////////////////////////////////////////////////// 33 | // Internal character operations 34 | 35 | // Copy characters from given range to given output iterator 36 | template 37 | inline OutIt copy_chars(const Ch *begin, const Ch *end, OutIt out) 38 | { 39 | while (begin != end) 40 | *out++ = *begin++; 41 | return out; 42 | } 43 | 44 | // Copy characters from given range to given output iterator and expand 45 | // characters into references (< > ' " &) 46 | template 47 | inline OutIt copy_and_expand_chars(const Ch *begin, const Ch *end, Ch noexpand, OutIt out) 48 | { 49 | while (begin != end) 50 | { 51 | if (*begin == noexpand) 52 | { 53 | *out++ = *begin; // No expansion, copy character 54 | } 55 | else 56 | { 57 | switch (*begin) 58 | { 59 | case Ch('<'): 60 | *out++ = Ch('&'); *out++ = Ch('l'); *out++ = Ch('t'); *out++ = Ch(';'); 61 | break; 62 | case Ch('>'): 63 | *out++ = Ch('&'); *out++ = Ch('g'); *out++ = Ch('t'); *out++ = Ch(';'); 64 | break; 65 | case Ch('\''): 66 | *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('p'); *out++ = Ch('o'); *out++ = Ch('s'); *out++ = Ch(';'); 67 | break; 68 | case Ch('"'): 69 | *out++ = Ch('&'); *out++ = Ch('q'); *out++ = Ch('u'); *out++ = Ch('o'); *out++ = Ch('t'); *out++ = Ch(';'); 70 | break; 71 | case Ch('&'): 72 | *out++ = Ch('&'); *out++ = Ch('a'); *out++ = Ch('m'); *out++ = Ch('p'); *out++ = Ch(';'); 73 | break; 74 | default: 75 | *out++ = *begin; // No expansion, copy character 76 | } 77 | } 78 | ++begin; // Step to next character 79 | } 80 | return out; 81 | } 82 | 83 | // Fill given output iterator with repetitions of the same character 84 | template 85 | inline OutIt fill_chars(OutIt out, int n, Ch ch) 86 | { 87 | for (int i = 0; i < n; ++i) 88 | *out++ = ch; 89 | return out; 90 | } 91 | 92 | // Find character 93 | template 94 | inline bool find_char(const Ch *begin, const Ch *end) 95 | { 96 | while (begin != end) 97 | if (*begin++ == ch) 98 | return true; 99 | return false; 100 | } 101 | 102 | /////////////////////////////////////////////////////////////////////////// 103 | // Internal printing operations 104 | 105 | // Print node 106 | template 107 | inline OutIt print_node(OutIt out, const xml_node *node, int flags, int indent); 108 | 109 | // Print children of the node 110 | template 111 | inline OutIt print_children(OutIt out, const xml_node *node, int flags, int indent) 112 | { 113 | for (xml_node *child = node->first_node(); child; child = child->next_sibling()) 114 | out = print_node(out, child, flags, indent); 115 | return out; 116 | } 117 | 118 | // Print attributes of the node 119 | template 120 | inline OutIt print_attributes(OutIt out, const xml_node *node, int) 121 | { 122 | for (xml_attribute *attribute = node->first_attribute(); attribute; attribute = attribute->next_attribute()) 123 | { 124 | if (attribute->name() && attribute->value()) 125 | { 126 | // Print attribute name 127 | *out = Ch(' '), ++out; 128 | out = copy_chars(attribute->name(), attribute->name() + attribute->name_size(), out); 129 | *out = Ch('='), ++out; 130 | // Print attribute value using appropriate quote type 131 | if (find_char(attribute->value(), attribute->value() + attribute->value_size())) 132 | { 133 | *out = Ch('"'), ++out; 134 | out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('\''), out); 135 | *out = Ch('"'), ++out; 136 | } 137 | else 138 | { 139 | *out = Ch('\''), ++out; 140 | out = copy_and_expand_chars(attribute->value(), attribute->value() + attribute->value_size(), Ch('"'), out); 141 | *out = Ch('\''), ++out; 142 | } 143 | } 144 | } 145 | return out; 146 | } 147 | 148 | // Print data node 149 | template 150 | inline OutIt print_data_node(OutIt out, const xml_node *node, int flags, int indent) 151 | { 152 | assert(node->type() == node_data); 153 | if (!(flags & print_no_indenting)) 154 | out = fill_chars(out, indent, Ch(' ')); 155 | out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out); 156 | return out; 157 | } 158 | 159 | // Print data node 160 | template 161 | inline OutIt print_cdata_node(OutIt out, const xml_node *node, int flags, int indent) 162 | { 163 | assert(node->type() == node_cdata); 164 | if (!(flags & print_no_indenting)) 165 | out = fill_chars(out, indent, Ch(' ')); 166 | *out = Ch('<'); ++out; 167 | *out = Ch('!'); ++out; 168 | *out = Ch('['); ++out; 169 | *out = Ch('C'); ++out; 170 | *out = Ch('D'); ++out; 171 | *out = Ch('A'); ++out; 172 | *out = Ch('T'); ++out; 173 | *out = Ch('A'); ++out; 174 | *out = Ch('['); ++out; 175 | out = copy_chars(node->value(), node->value() + node->value_size(), out); 176 | *out = Ch(']'); ++out; 177 | *out = Ch(']'); ++out; 178 | *out = Ch('>'); ++out; 179 | return out; 180 | } 181 | 182 | // Print element node 183 | template 184 | inline OutIt print_element_node(OutIt out, const xml_node *node, int flags, int indent) 185 | { 186 | assert(node->type() == node_element); 187 | 188 | // Print element name and attributes, if any 189 | if (!(flags & print_no_indenting)) 190 | out = fill_chars(out, indent, Ch(' ')); 191 | *out = Ch('<'), ++out; 192 | out = copy_chars(node->name(), node->name() + node->name_size(), out); 193 | out = print_attributes(out, node, flags); 194 | 195 | // If node is childless 196 | if (node->value_size() == 0 && !node->first_node()) 197 | { 198 | // Print childless node tag ending 199 | *out = Ch('/'), ++out; 200 | *out = Ch('>'), ++out; 201 | } 202 | else 203 | { 204 | // Print normal node tag ending 205 | *out = Ch('>'), ++out; 206 | 207 | // Test if node contains a single data node only (and no other nodes) 208 | xml_node *child = node->first_node(); 209 | if (!child) 210 | { 211 | // If node has no children, only print its value without indenting 212 | out = copy_and_expand_chars(node->value(), node->value() + node->value_size(), Ch(0), out); 213 | } 214 | else if (child->next_sibling() == 0 && child->type() == node_data) 215 | { 216 | // If node has a sole data child, only print its value without indenting 217 | out = copy_and_expand_chars(child->value(), child->value() + child->value_size(), Ch(0), out); 218 | } 219 | else 220 | { 221 | // Print all children with full indenting 222 | if (!(flags & print_no_indenting)) 223 | *out = Ch('\n'), ++out; 224 | out = print_children(out, node, flags, indent + 2); 225 | if (!(flags & print_no_indenting)) 226 | out = fill_chars(out, indent, Ch(' ')); 227 | } 228 | 229 | // Print node end 230 | *out = Ch('<'), ++out; 231 | *out = Ch('/'), ++out; 232 | out = copy_chars(node->name(), node->name() + node->name_size(), out); 233 | *out = Ch('>'), ++out; 234 | } 235 | return out; 236 | } 237 | 238 | // Print declaration node 239 | template 240 | inline OutIt print_declaration_node(OutIt out, const xml_node *node, int flags, int indent) 241 | { 242 | // Print declaration start 243 | if (!(flags & print_no_indenting)) 244 | out = fill_chars(out, indent, Ch(' ')); 245 | *out = Ch('<'), ++out; 246 | *out = Ch('?'), ++out; 247 | *out = Ch('x'), ++out; 248 | *out = Ch('m'), ++out; 249 | *out = Ch('l'), ++out; 250 | 251 | // Print attributes 252 | out = print_attributes(out, node, flags); 253 | 254 | // Print declaration end 255 | *out = Ch('?'), ++out; 256 | *out = Ch('>'), ++out; 257 | 258 | return out; 259 | } 260 | 261 | // Print comment node 262 | template 263 | inline OutIt print_comment_node(OutIt out, const xml_node *node, int flags, int indent) 264 | { 265 | assert(node->type() == node_comment); 266 | if (!(flags & print_no_indenting)) 267 | out = fill_chars(out, indent, Ch(' ')); 268 | *out = Ch('<'), ++out; 269 | *out = Ch('!'), ++out; 270 | *out = Ch('-'), ++out; 271 | *out = Ch('-'), ++out; 272 | out = copy_chars(node->value(), node->value() + node->value_size(), out); 273 | *out = Ch('-'), ++out; 274 | *out = Ch('-'), ++out; 275 | *out = Ch('>'), ++out; 276 | return out; 277 | } 278 | 279 | // Print doctype node 280 | template 281 | inline OutIt print_doctype_node(OutIt out, const xml_node *node, int flags, int indent) 282 | { 283 | assert(node->type() == node_doctype); 284 | if (!(flags & print_no_indenting)) 285 | out = fill_chars(out, indent, Ch(' ')); 286 | *out = Ch('<'), ++out; 287 | *out = Ch('!'), ++out; 288 | *out = Ch('D'), ++out; 289 | *out = Ch('O'), ++out; 290 | *out = Ch('C'), ++out; 291 | *out = Ch('T'), ++out; 292 | *out = Ch('Y'), ++out; 293 | *out = Ch('P'), ++out; 294 | *out = Ch('E'), ++out; 295 | *out = Ch(' '), ++out; 296 | out = copy_chars(node->value(), node->value() + node->value_size(), out); 297 | *out = Ch('>'), ++out; 298 | return out; 299 | } 300 | 301 | // Print pi node 302 | template 303 | inline OutIt print_pi_node(OutIt out, const xml_node *node, int flags, int indent) 304 | { 305 | assert(node->type() == node_pi); 306 | if (!(flags & print_no_indenting)) 307 | out = fill_chars(out, indent, Ch(' ')); 308 | *out = Ch('<'), ++out; 309 | *out = Ch('?'), ++out; 310 | out = copy_chars(node->name(), node->name() + node->name_size(), out); 311 | *out = Ch(' '), ++out; 312 | out = copy_chars(node->value(), node->value() + node->value_size(), out); 313 | *out = Ch('?'), ++out; 314 | *out = Ch('>'), ++out; 315 | return out; 316 | } 317 | 318 | // Print literal node 319 | template 320 | inline OutIt print_literal_node(OutIt out, const xml_node *node, int flags, int indent) 321 | { 322 | assert(node->type() == node_literal); 323 | if (!(flags & print_no_indenting)) 324 | out = fill_chars(out, indent, Ch(' ')); 325 | out = copy_chars(node->value(), node->value() + node->value_size(), out); 326 | return out; 327 | } 328 | 329 | // Print node 330 | // Print node 331 | template 332 | inline OutIt print_node(OutIt out, const xml_node *node, int flags, int indent) 333 | { 334 | // Print proper node type 335 | switch (node->type()) 336 | { 337 | 338 | // Document 339 | case node_document: 340 | out = print_children(out, node, flags, indent); 341 | break; 342 | 343 | // Element 344 | case node_element: 345 | out = print_element_node(out, node, flags, indent); 346 | break; 347 | 348 | // Data 349 | case node_data: 350 | out = print_data_node(out, node, flags, indent); 351 | break; 352 | 353 | // CDATA 354 | case node_cdata: 355 | out = print_cdata_node(out, node, flags, indent); 356 | break; 357 | 358 | // Declaration 359 | case node_declaration: 360 | out = print_declaration_node(out, node, flags, indent); 361 | break; 362 | 363 | // Comment 364 | case node_comment: 365 | out = print_comment_node(out, node, flags, indent); 366 | break; 367 | 368 | // Doctype 369 | case node_doctype: 370 | out = print_doctype_node(out, node, flags, indent); 371 | break; 372 | 373 | // Pi 374 | case node_pi: 375 | out = print_pi_node(out, node, flags, indent); 376 | break; 377 | 378 | case node_literal: 379 | out = print_literal_node(out, node, flags, indent); 380 | break; 381 | 382 | // Unknown 383 | default: 384 | assert(0); 385 | break; 386 | } 387 | 388 | // If indenting not disabled, add line break after node 389 | if (!(flags & print_no_indenting)) 390 | *out = Ch('\n'), ++out; 391 | 392 | // Return modified iterator 393 | return out; 394 | } 395 | 396 | } 397 | //! \endcond 398 | 399 | /////////////////////////////////////////////////////////////////////////// 400 | // Printing 401 | 402 | //! Prints XML to given output iterator. 403 | //! \param out Output iterator to print to. 404 | //! \param node Node to be printed. Pass xml_document to print entire document. 405 | //! \param flags Flags controlling how XML is printed. 406 | //! \return Output iterator pointing to position immediately after last character of printed text. 407 | template 408 | inline OutIt print(OutIt out, const xml_node &node, int flags = 0) 409 | { 410 | return internal::print_node(out, &node, flags, 0); 411 | } 412 | 413 | #ifndef RAPIDXML_NO_STREAMS 414 | 415 | //! Prints XML to given output stream. 416 | //! \param out Output stream to print to. 417 | //! \param node Node to be printed. Pass xml_document to print entire document. 418 | //! \param flags Flags controlling how XML is printed. 419 | //! \return Output stream. 420 | template 421 | inline std::basic_ostream &print(std::basic_ostream &out, const xml_node &node, int flags = 0) 422 | { 423 | print(std::ostream_iterator(out), node, flags); 424 | return out; 425 | } 426 | 427 | //! Prints formatted XML to given output stream. Uses default printing flags. Use print() function to customize printing process. 428 | //! \param out Output stream to print to. 429 | //! \param node Node to be printed. 430 | //! \return Output stream. 431 | template 432 | inline std::basic_ostream &operator <<(std::basic_ostream &out, const xml_node &node) 433 | { 434 | return print(out, node); 435 | } 436 | 437 | #endif 438 | 439 | } 440 | 441 | #endif 442 | -------------------------------------------------------------------------------- /c_src/rapidxml_utils.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RAPIDXML_UTILS_HPP_INCLUDED 2 | #define RAPIDXML_UTILS_HPP_INCLUDED 3 | 4 | // Copyright (C) 2006, 2009 Marcin Kalicinski 5 | // Version 1.13 6 | // Revision $DateTime: 2009/05/13 01:46:17 $ 7 | //! \file rapidxml_utils.hpp This file contains high-level rapidxml utilities that can be useful 8 | //! in certain simple scenarios. They should probably not be used if maximizing performance is the main objective. 9 | 10 | #include "rapidxml.hpp" 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace rapidxml 17 | { 18 | 19 | //! Represents data loaded from a file 20 | template 21 | class file 22 | { 23 | 24 | public: 25 | 26 | //! Loads file into the memory. Data will be automatically destroyed by the destructor. 27 | //! \param filename Filename to load. 28 | file(const char *filename) 29 | { 30 | using namespace std; 31 | 32 | // Open stream 33 | basic_ifstream stream(filename, ios::binary); 34 | if (!stream) 35 | throw runtime_error(string("cannot open file ") + filename); 36 | stream.unsetf(ios::skipws); 37 | 38 | // Determine stream size 39 | stream.seekg(0, ios::end); 40 | size_t size = stream.tellg(); 41 | stream.seekg(0); 42 | 43 | // Load data and add terminating 0 44 | m_data.resize(size + 1); 45 | stream.read(&m_data.front(), static_cast(size)); 46 | m_data[size] = 0; 47 | } 48 | 49 | //! Loads file into the memory. Data will be automatically destroyed by the destructor 50 | //! \param stream Stream to load from 51 | file(std::basic_istream &stream) 52 | { 53 | using namespace std; 54 | 55 | // Load data and add terminating 0 56 | stream.unsetf(ios::skipws); 57 | m_data.assign(istreambuf_iterator(stream), istreambuf_iterator()); 58 | if (stream.fail() || stream.bad()) 59 | throw runtime_error("error reading stream"); 60 | m_data.push_back(0); 61 | } 62 | 63 | //! Gets file data. 64 | //! \return Pointer to data of file. 65 | Ch *data() 66 | { 67 | return &m_data.front(); 68 | } 69 | 70 | //! Gets file data. 71 | //! \return Pointer to data of file. 72 | const Ch *data() const 73 | { 74 | return &m_data.front(); 75 | } 76 | 77 | //! Gets file data size. 78 | //! \return Size of file data, in characters. 79 | std::size_t size() const 80 | { 81 | return m_data.size(); 82 | } 83 | 84 | private: 85 | 86 | std::vector m_data; // File data 87 | 88 | }; 89 | 90 | //! Counts children of node. Time complexity is O(n). 91 | //! \return Number of children of node 92 | template 93 | inline std::size_t count_children(xml_node *node) 94 | { 95 | xml_node *child = node->first_node(); 96 | std::size_t count = 0; 97 | while (child) 98 | { 99 | ++count; 100 | child = child->next_sibling(); 101 | } 102 | return count; 103 | } 104 | 105 | //! Counts attributes of node. Time complexity is O(n). 106 | //! \return Number of attributes of node 107 | template 108 | inline std::size_t count_attributes(xml_node *node) 109 | { 110 | xml_attribute *attr = node->first_attribute(); 111 | std::size_t count = 0; 112 | while (attr) 113 | { 114 | ++count; 115 | attr = attr->next_attribute(); 116 | } 117 | return count; 118 | } 119 | 120 | } 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /include/exml.hrl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% Parts of this file, explicitly marked in the code, were taken from 3 | %%% https://github.com/erszcz/rxml 4 | %%%------------------------------------------------------------------- 5 | 6 | -ifndef(EXML_HEADER). 7 | -define(EXML_HEADER, true). 8 | 9 | -record(xmlcdata, {content = [] :: iodata(), 10 | style = escaped :: escaped | cdata}). 11 | 12 | -record(xmlel, {name :: binary(), 13 | attrs = #{} :: exml:attrs(), 14 | children = [] :: [exml:child()]}). 15 | 16 | %% Implementation of the exmlAssertEqual/2 macro is a modification of 17 | %% https://github.com/erszcz/rxml/commit/e8483408663f0bc2af7896e786c1cdea2e86e43d#diff-2cb5d18741df32f4ead70c21fdd221d1 18 | %% See assertEqual in $ERLANG/lib/stdlib-2.6/include/assert.hrl for the original. 19 | -define(exmlAssertEqual(Expect, Expr), 20 | begin 21 | ((fun () -> 22 | X__X = (exml:xml_sort(Expect)), 23 | case (exml:xml_sort(Expr)) of 24 | X__X -> ok; 25 | X__V -> erlang:error({exmlAssertEqual, 26 | [{module, ?MODULE}, 27 | {line, ?LINE}, 28 | {expression, (??Expr)}, 29 | {expected, Expect}, 30 | {value, X__V}]}) 31 | end 32 | end)()) 33 | end). 34 | 35 | -endif. 36 | -------------------------------------------------------------------------------- /include/exml_stream.hrl: -------------------------------------------------------------------------------- 1 | -include("exml.hrl"). 2 | 3 | -record(xmlstreamstart, {name :: binary(), 4 | attrs = #{} :: exml:attrs()}). 5 | 6 | -record(xmlstreamend, {name :: binary()}). 7 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {deps, []}. 2 | 3 | {dialyzer, 4 | [{warnings, 5 | [unknown, 6 | unmatched_returns, 7 | error_handling, 8 | underspecs 9 | ]}]}. 10 | 11 | {profiles, [ 12 | {test, [ 13 | {deps, [ 14 | {proper, "1.5.0"} 15 | ]}, 16 | {plugins, [ 17 | {rebar3_codecov, "0.7.0"} 18 | ]}, 19 | {port_env, [ 20 | {"CXXFLAGS", "$CXXFLAGS -O3 -std=c++11 -g -Wall -Wextra -fPIC --coverage"}, 21 | {"LDFLAGS", "$LDFLAGS --coverage"} 22 | ]}, 23 | {eunit_opts, [verbose]}, 24 | {cover_opts, [verbose]}, 25 | {cover_enabled, true}, 26 | {cover_export_enabled, true} 27 | ]} 28 | ]}. 29 | 30 | {project_plugins, [rebar3_hex, rebar3_ex_doc]}. 31 | {plugins, [pc]}. 32 | 33 | % Interrupt compilation, if the artifact is not found 34 | {artifacts, ["priv/exml_nif.so"]}. 35 | 36 | {port_specs, [ 37 | { 38 | % Any arch 39 | ".*", 40 | % Create library 41 | "priv/exml_nif.so", 42 | % From files 43 | ["c_src/*.cpp"], 44 | % Using options 45 | [{env, [{"CXXFLAGS", "$CXXFLAGS -O3 -std=c++11 -Wall -Wextra"}]}] 46 | } 47 | ]}. 48 | 49 | {provider_hooks, [ 50 | {post, [ 51 | {compile, {pc, compile}}, 52 | {clean, {pc, clean}} 53 | ]} 54 | ]}. 55 | 56 | {hex, [ 57 | {doc, #{provider => ex_doc}} 58 | ]}. 59 | {ex_doc, [ 60 | {source_url, <<"https://github.com/esl/exml">>}, 61 | {main, <<"readme">>}, 62 | {extras, [{'README.md', #{title => <<"README">>}}, 63 | {'LICENSE', #{title => <<"License">>}} 64 | ]} 65 | ]}. 66 | -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /src/exml.app.src: -------------------------------------------------------------------------------- 1 | {application, exml, 2 | [{description, "Erlang fast XML parsing library"}, 3 | {vsn, git}, 4 | {registered, []}, 5 | {applications, 6 | [kernel, 7 | stdlib 8 | ]}, 9 | {env, []}, 10 | {modules, []}, 11 | {maintainers, ["ESL"]}, 12 | {pkg_name, "hexml"}, 13 | {licenses, ["Apache-2.0", "BSL-1.0", "GPL (tests)"]}, 14 | {links, [{"GitHub", "https://github.com/esl/exml/"}]}, 15 | {exclude_files, ["c_src/exml.d"]} 16 | ]}. 17 | -------------------------------------------------------------------------------- /src/exml.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @copyright (C) 2011-2024, Erlang Solutions Ltd. 3 | %%% @doc 4 | %%% @end 5 | %%% Created : 12 Jul 2011 by Michal Ptaszek 6 | %%% 7 | %%% Parts of this file, explicitly marked in the code, were taken from 8 | %%% https://github.com/erszcz/rxml 9 | %%%------------------------------------------------------------------- 10 | -module(exml). 11 | 12 | -include("exml_stream.hrl"). 13 | 14 | -export([parse/1]). 15 | 16 | -export([to_list/1, 17 | to_binary/1, 18 | to_iolist/1, 19 | xml_size/1, 20 | to_pretty_iolist/1]). 21 | 22 | -export([filter_children/2, 23 | append_children/2, 24 | upsert_attr_value/3, 25 | upsert_child/2, 26 | insert_new_child/2, 27 | remove_cdata/1, 28 | remove_attr/2, 29 | xml_sort/1]). 30 | 31 | -export_type([attrs/0, 32 | cdata/0, 33 | element/0, 34 | child/0, 35 | item/0]). 36 | 37 | -type attrs() :: #{binary() => binary()}. 38 | -type cdata() :: #xmlcdata{}. 39 | %% CDATA record. Printing escaping rules defaults to escaping character-wise. 40 | %% 41 | %% Escaping rules: 42 | %%
    43 | %%
  • `escaped': escapes all characters by regular `&' control escaping.
  • 44 | %%
  • `cdata': wraps the entire string into a `' section.
  • 45 | %%
46 | -type element() :: #xmlel{}. 47 | -type item() :: element() | cdata() | exml_stream:start() | exml_stream:stop(). 48 | -type child() :: element() | cdata(). 49 | -type prettify() :: pretty | not_pretty. 50 | %% Printing indentation rule, see `to_iolist/2'. 51 | 52 | %% @doc Calculate the length of the original XML payload 53 | -spec xml_size(item() | [item()]) -> non_neg_integer(). 54 | xml_size([]) -> 55 | 0; 56 | xml_size([Elem | Rest]) -> 57 | xml_size(Elem) + xml_size(Rest); 58 | xml_size(#xmlcdata{content = Content, style = Style}) -> 59 | iolist_size(exml_nif:escape_cdata(Content, Style)); 60 | xml_size(#xmlel{ name = Name, attrs = Attrs, children = [] }) -> 61 | 3 % Self-closing: 62 | + byte_size(Name) + xml_size(maps:to_list(Attrs)); 63 | xml_size(#xmlel{ name = Name, attrs = Attrs, children = Children }) -> 64 | % Opening and closing: <> 65 | 5 + byte_size(Name)*2 66 | + xml_size(maps:to_list(Attrs)) + xml_size(Children); 67 | xml_size(#xmlstreamstart{ name = Name, attrs = Attrs }) -> 68 | byte_size(Name) + 2 + xml_size(maps:to_list(Attrs)); 69 | xml_size(#xmlstreamend{ name = Name }) -> 70 | byte_size(Name) + 3; 71 | xml_size({Key, Value}) when is_binary(Key) -> 72 | % Attributes 73 | byte_size(Key) 74 | + 4 % ="" and whitespace before 75 | + byte_size(Value). 76 | 77 | %% @doc Sort in ascending order a list of xml `t:item/0'. 78 | %% 79 | %% Sorting is defined as calling `lists:sort/1' at: 80 | %%
    81 | %%
  • all the `xmlel's provided (if there is a list of them) AND
  • 82 | %%
  • all the `xmlel' elements' attributes recursively (the root and descendants) AND
  • 83 | %%
  • all the `xmlel' children recursively (the root and descendants).
  • 84 | %%
85 | %% @end 86 | %% The implementation of this function is a subtle modification of 87 | %% https://github.com/erszcz/rxml/commit/e8483408663f0bc2af7896e786c1cdea2e86e43d 88 | -spec xml_sort([item()]) -> [item()]; 89 | (element()) -> element(); 90 | (cdata()) -> cdata(); 91 | (exml_stream:start()) -> exml_stream:start(); 92 | (exml_stream:stop()) -> exml_stream:stop(). 93 | xml_sort(#xmlcdata{} = Cdata) -> 94 | Cdata; 95 | xml_sort(#xmlel{children = Children} = El) -> 96 | El#xmlel{ 97 | children = [ xml_sort(C) || C <- Children ] 98 | }; 99 | xml_sort(#xmlstreamstart{} = StreamStart) -> 100 | StreamStart; 101 | xml_sort(#xmlstreamend{} = StreamEnd) -> 102 | StreamEnd; 103 | xml_sort(Elements) when is_list(Elements) -> 104 | lists:sort([ xml_sort(E) || E <- Elements ]). 105 | 106 | %% @doc Return the given `t:element/0' with the specified filter passed over its children. 107 | -spec filter_children(element(), fun((element() | cdata()) -> boolean())) -> element(). 108 | filter_children(#xmlel{children = Children} = El, Pred) -> 109 | NoCdata = lists:filter(Pred, Children), 110 | El#xmlel{children = NoCdata}. 111 | 112 | %% @doc Return the given `t:element/0' without any `t:cdata/0' on its children. 113 | -spec remove_cdata(element()) -> element(). 114 | remove_cdata(#xmlel{children = Children} = El) -> 115 | Pred = fun(Child) -> not is_record(Child, xmlcdata) end, 116 | NoCdata = lists:filter(Pred, Children), 117 | El#xmlel{children = NoCdata}. 118 | 119 | %% @doc Remove a given attribute from a `t:element/0'. 120 | -spec remove_attr(exml:element(), binary()) -> element(). 121 | remove_attr(#xmlel{attrs = Attrs} = El, Key) -> 122 | El#xmlel{attrs = maps:remove(Key, Attrs)}. 123 | 124 | %% @doc Append new children elements to a `t:element/0'. 125 | -spec append_children(element(), [element() | cdata()]) -> element(). 126 | append_children(#xmlel{children = Children} = El, ExtraChildren) -> 127 | El#xmlel{children = Children ++ ExtraChildren}. 128 | 129 | %% @doc Replace or insert the value of a given attribute. 130 | -spec upsert_attr_value(element(), binary(), binary()) -> element(). 131 | upsert_attr_value(#xmlel{attrs = Attrs} = El, Key, Value) -> 132 | El#xmlel{attrs = Attrs#{Key => Value}}. 133 | 134 | %% @doc Replace or insert a child by the given one. 135 | -spec upsert_child(element(), element()) -> element(). 136 | upsert_child(#xmlel{children = Children} = El, #xmlel{name = Name} = NewChild) -> 137 | Children2 = lists:keystore(Name, #xmlel.name, Children, NewChild), 138 | El#xmlel{children = Children2}. 139 | 140 | %% @doc Insert a child by the given one, if none existed. 141 | -spec insert_new_child(element(), element()) -> element(). 142 | insert_new_child(#xmlel{children = Children} = El, #xmlel{name = Name} = NewChild) -> 143 | case lists:keymember(Name, #xmlel.name, Children) of 144 | false -> 145 | El#xmlel{children = [NewChild | Children]}; 146 | true -> 147 | El 148 | end. 149 | 150 | %% @equiv erlang:binary_to_list(to_binary(Element)) 151 | -spec to_list(exml_stream:element() | [exml_stream:element()]) -> string(). 152 | to_list(Element) -> 153 | binary_to_list(to_binary(Element)). 154 | 155 | %% @equiv erlang:iolist_to_binary(to_iolist(Element, not_pretty)) 156 | -spec to_binary(exml_stream:element() | [exml_stream:element()]) -> binary(). 157 | to_binary(Element) -> 158 | iolist_to_binary(to_iolist(Element, not_pretty)). 159 | 160 | %% @equiv to_iolist(Element, not_pretty) 161 | -spec to_iolist(exml_stream:element() | [exml_stream:element()]) -> iodata(). 162 | to_iolist(Element) -> 163 | to_iolist(Element, not_pretty). 164 | 165 | %% @equiv to_iolist(Element, pretty) 166 | -spec to_pretty_iolist(exml_stream:element() | [exml_stream:element()]) -> iodata(). 167 | to_pretty_iolist(Element) -> 168 | to_iolist(Element, pretty). 169 | 170 | %% @doc Parses a binary or a list of binaries into an XML `t:element/0'. 171 | -spec parse(binary() | [binary()]) -> {ok, element()} | {error, binary()}. 172 | parse(XML) -> 173 | exml_nif:parse(XML). 174 | 175 | %% @doc Turn a –list of– exml elements into iodata for IO interactions. 176 | %% 177 | %% The `Pretty' argument indicates if the generated XML should have new lines and indentation, 178 | %% which is useful for the debugging eye, or should rather be a minified version, 179 | %% which is better for IO performance. 180 | -spec to_iolist(cdata() | exml_stream:element() | [exml_stream:element()], prettify()) -> iodata(). 181 | to_iolist(#xmlel{} = Element, Pretty) -> 182 | to_binary_nif(Element, Pretty); 183 | to_iolist(#xmlstreamstart{name = Name, attrs = Attrs}, _Pretty) -> 184 | Result = to_binary_nif(#xmlel{name = Name, attrs = Attrs}, not_pretty), 185 | FrontSize = byte_size(Result) - 2, 186 | <">> = Result, 187 | [Front, $>]; 188 | to_iolist(#xmlstreamend{name = Name}, _Pretty) -> 189 | [<<">, Name, <<">">>]; 190 | to_iolist(#xmlcdata{content = Content, style = Style}, _Pretty) -> 191 | exml_nif:escape_cdata(Content, Style); 192 | to_iolist([Element], Pretty) -> 193 | to_iolist(Element, Pretty); 194 | to_iolist([#xmlstreamstart{name = Name, attrs = Attrs} | Tail] = Elements, Pretty) -> 195 | [Last | RevChildren] = lists:reverse(Tail), 196 | case Last of 197 | #xmlstreamend{name = Name} -> 198 | %% Add extra nesting for streams so pretty-printing would be indented properly 199 | Element = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(RevChildren)}, 200 | to_binary_nif(Element, Pretty); 201 | _ -> 202 | [to_iolist(El, Pretty) || El <- Elements] 203 | end; 204 | to_iolist(Elements, Pretty) when is_list(Elements) -> 205 | [to_iolist(El, Pretty) || El <- Elements]. 206 | 207 | -spec to_binary_nif(element(), prettify()) -> binary(). 208 | to_binary_nif(#xmlel{} = Element, Pretty) -> 209 | case catch exml_nif:to_binary(Element, Pretty) of 210 | {'EXIT', Reason} -> erlang:error({badxml, Element, Reason}); 211 | Result when is_binary(Result) -> Result 212 | end. 213 | -------------------------------------------------------------------------------- /src/exml_nif.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @copyright (C) 2018-2024, Erlang Solutions Ltd. 3 | %%% @private 4 | %%%------------------------------------------------------------------- 5 | 6 | -module(exml_nif). 7 | 8 | -nifs([create/2, escape_cdata/2, to_binary/2, parse/1, parse_next/2, reset_parser/1]). 9 | 10 | -type parser() :: term(). 11 | 12 | -export([create/2, parse/1, parse_next/2, escape_cdata/2, 13 | to_binary/2, reset_parser/1]). 14 | 15 | -on_load(load/0). 16 | 17 | %%%=================================================================== 18 | %%% Public API 19 | %%%=================================================================== 20 | 21 | -dialyzer({nowarn_function, [load/0]}). 22 | -spec load() -> any(). 23 | load() -> 24 | PrivDir = case code:priv_dir(?MODULE) of 25 | {error, _} -> 26 | case code:which(?MODULE) of 27 | Path when is_list(Path) -> 28 | EbinDir = filename:dirname(Path), 29 | AppPath = filename:dirname(EbinDir), 30 | filename:join(AppPath, "priv"); 31 | _ -> 32 | %% cover_compiled | preloaded | non_existing 33 | erlang:error({cannot_get_load_path, ?MODULE}) 34 | end; 35 | Path -> 36 | Path 37 | end, 38 | erlang:load_nif(filename:join(PrivDir, ?MODULE_STRING), none). 39 | 40 | -spec create(MaxChildSize :: non_neg_integer(), InfiniteStream :: boolean()) -> 41 | {ok, parser()} | {error, Reason :: any()}. 42 | create(_, _) -> 43 | erlang:nif_error(not_loaded). 44 | 45 | -spec escape_cdata(Bin :: iodata(), atom()) -> binary(). 46 | escape_cdata(_Bin, _Style) -> 47 | erlang:nif_error(not_loaded). 48 | 49 | -spec to_binary(Elem :: exml:element(), pretty | not_pretty) -> binary(). 50 | to_binary(_Elem, _Pretty) -> 51 | erlang:nif_error(not_loaded). 52 | 53 | -spec parse(Bin :: binary() | [binary()]) -> {ok, exml:element()} | {error, binary()}. 54 | parse(_) -> 55 | erlang:nif_error(not_loaded). 56 | 57 | -spec parse_next(parser(), Data :: binary() | [binary()]) -> 58 | {ok, exml_stream:element() | undefined, non_neg_integer()} | 59 | {error, Reason :: any()}. 60 | parse_next(_, _) -> 61 | erlang:nif_error(not_loaded). 62 | 63 | -spec reset_parser(parser()) -> any(). 64 | reset_parser(_) -> 65 | erlang:nif_error(not_loaded). 66 | -------------------------------------------------------------------------------- /src/exml_query.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @copyright (C) 2011-2024, Erlang Solutions Ltd. 3 | %%% @doc Easy navigation in XML trees 4 | %%% @end 5 | %%%------------------------------------------------------------------- 6 | -module(exml_query). 7 | 8 | -include("exml.hrl"). 9 | 10 | -export([path/2, path/3]). 11 | -export([paths/2]). 12 | -export([subelement/2, subelement/3]). 13 | -export([subelement_with_ns/2, subelement_with_ns/3]). 14 | -export([subelement_with_attr/3, subelement_with_attr/4]). 15 | -export([subelement_with_name_and_ns/3, subelement_with_name_and_ns/4]). 16 | -export([subelements/2]). 17 | -export([subelements_with_ns/2]). 18 | -export([subelements_with_name_and_ns/3]). 19 | -export([subelements_with_attr/3]). 20 | -export([attr/2, attr/3]). 21 | -export([cdata/1]). 22 | 23 | 24 | -type path() :: [cdata | 25 | {attr, binary()} | 26 | {element, binary()} | 27 | {element_with_ns, binary()} | 28 | {element_with_ns, binary(), binary()} | 29 | {element_with_attr, binary(), binary()}]. 30 | %% Path definition in an XML query, each step is defined by one of these types. 31 | %% 32 | %%
    33 | %%
  • `cdata': selects cdata from the element
  • 34 | %%
  • `{attr, Name}': selects a subelement with the given attribute
  • 35 | %%
  • `{element, Name}': selects a subelement with the given name
  • 36 | %%
  • `{element_with_ns, NS}': selects a subelement with given namespace
  • 37 | %%
  • `{element_with_ns, Name, NS}': selects a subelement with given name and namespace
  • 38 | %%
  • `{element_with_attr, AttrName, AttrValue}': selects a subelement with the given attribute and value
  • 39 | %%
40 | 41 | -export_type([path/0]). 42 | 43 | %%% @doc Like `path/3' but with default `undefined'. 44 | %%% @see path/3 45 | -spec path(exml:element(), path()) -> exml:element() | binary() | undefined. 46 | path(Element, Path) -> 47 | path(Element, Path, undefined). 48 | 49 | %% @doc Gets the element/attr/cdata in the leftmost possible described path, 50 | %% or `Default' if there is no match. 51 | %% 52 | %% Find an element in the xml tree by a path that is pattern-matched against such xml tree structure. 53 | %% 54 | %% For example, given an xml document like 55 | %% ``` 56 | %% 57 | %% 58 | %% 59 | %% 60 | %% 61 | %% Message from bob to alice 62 | %% 63 | %% 64 | %% 65 | %% 66 | %% ''' 67 | %% The path 68 | %% ``` 69 | %% [{element_with_ns, <<"result">>, <<"urn:xmpp:mam:2">>}, 70 | %% {element_with_ns, <<"forwarded">>, <<"urn:xmpp:forward:0">>}, 71 | %% {element_with_ns, <<"message">>, <<"jabber:client">>}, 72 | %% {element, <<"body">>}, 73 | %% cdata}], 74 | %% ''' 75 | %% will return `<<"Message from bob to alice">>' 76 | %% @end 77 | -spec path(exml:element() | undefined, path(), Default) -> exml:element() | binary() | Default. 78 | path(#xmlel{} = Element, [], _) -> 79 | Element; 80 | path(#xmlel{} = Element, [{element, Name} | Rest], Default) -> 81 | Child = subelement(Element, Name), % may return undefined 82 | path(Child, Rest, Default); 83 | path(#xmlel{} = Element, [{element_with_ns, NS} | Rest], Default) -> 84 | Child = subelement_with_ns(Element, NS), 85 | path(Child, Rest, Default); 86 | path(#xmlel{} = Element, [{element_with_ns, Name, NS} | Rest], Default) -> 87 | Child = subelement_with_name_and_ns(Element, Name, NS), 88 | path(Child, Rest, Default); 89 | path(#xmlel{} = Element, [{element_with_attr, Name, Value} | Rest], Default) -> 90 | Child = subelement_with_attr(Element, Name, Value), 91 | path(Child, Rest, Default); 92 | path(#xmlel{} = Element, [cdata], _) -> 93 | cdata(Element); 94 | path(#xmlel{} = Element, [{attr, Name}], Default) -> 95 | attr(Element, Name, Default); 96 | path(_, _, Default) -> 97 | Default. 98 | 99 | %% @doc Gets the elements/attrs/cdatas reachable by the described path 100 | %% @see path/3 101 | -spec paths(exml:element(), path()) -> [exml:element() | binary()]. 102 | paths(#xmlel{} = Element, []) -> 103 | [Element]; 104 | paths(#xmlel{} = Element, [{element, Name} | Rest]) -> 105 | Children = subelements(Element, Name), 106 | lists:append([paths(Child, Rest) || Child <- Children]); 107 | paths(#xmlel{} = Element, [{element_with_ns, NS} | Rest]) -> 108 | Children = subelements_with_ns(Element, NS), 109 | lists:append([paths(Child, Rest) || Child <- Children]); 110 | paths(#xmlel{} = Element, [{element_with_ns, Name, NS} | Rest]) -> 111 | Children = subelements_with_name_and_ns(Element, Name, NS), 112 | lists:append([paths(Child, Rest) || Child <- Children]); 113 | paths(#xmlel{} = Element, [{element_with_attr, AttrName, Value} | Rest]) -> 114 | Children = subelements_with_attr(Element, AttrName, Value), 115 | lists:append([paths(Child, Rest) || Child <- Children]); 116 | paths(#xmlel{} = Element, [cdata]) -> 117 | [cdata(Element)]; 118 | paths(#xmlel{attrs = Attrs}, [{attr, Name}]) -> 119 | lists:sublist([V || {N, V} <- maps:to_list(Attrs), N =:= Name], 1); 120 | paths(#xmlel{} = El, Path) when is_list(Path) -> 121 | erlang:error(invalid_path, [El, Path]). 122 | 123 | %% @equiv path(Element, [{element, Name}]) 124 | -spec subelement(exml:element(), binary()) -> exml:element() | undefined. 125 | subelement(Element, Name) -> 126 | subelement(Element, Name, undefined). 127 | 128 | %% @equiv path(Element, [{element, Name}], Default) 129 | -spec subelement(exml:element(), binary(), Default) -> exml:element() | Default. 130 | subelement(#xmlel{children = Children}, Name, Default) -> 131 | case lists:keyfind(Name, #xmlel.name, Children) of 132 | false -> 133 | Default; 134 | Result -> 135 | Result 136 | end. 137 | 138 | %% @equiv path(Element, [{element_with_ns, NS}]) 139 | -spec subelement_with_ns(exml:element(), binary()) -> exml:element() | undefined. 140 | subelement_with_ns(Element, NS) -> 141 | subelement_with_ns(Element, NS, undefined). 142 | 143 | %% @equiv path(Element, [{element_with_ns, NS}], Default) 144 | -spec subelement_with_ns(exml:element(), binary(), Default) -> exml:element() | Default. 145 | subelement_with_ns(#xmlel{children = Children}, NS, Default) -> 146 | child_with_ns(Children, NS, Default). 147 | 148 | child_with_ns([], _, Default) -> 149 | Default; 150 | child_with_ns([#xmlel{} = Element | Rest], NS, Default) -> 151 | case attr(Element, <<"xmlns">>) of 152 | NS -> 153 | Element; 154 | _ -> 155 | child_with_ns(Rest, NS, Default) 156 | end; 157 | child_with_ns([_ | Rest], NS, Default) -> 158 | child_with_ns(Rest, NS, Default). 159 | 160 | %% @equiv path(Element, [{element_with_attr, AttrName, AttrValue}]) 161 | -spec subelement_with_attr(exml:element(), AttrName :: binary(), AttrValue :: binary()) -> 162 | exml:element() | undefined. 163 | subelement_with_attr(Element, AttrName, AttrValue) -> 164 | subelement_with_attr(Element, AttrName, AttrValue, undefined). 165 | 166 | %% @equiv path(Element, [{element_with_attr, AttrName, AttrValue}], Default) 167 | -spec subelement_with_attr(Element, AttrName, AttrValue, Default) -> SubElement | Default when 168 | Element :: exml:element(), 169 | AttrName :: binary(), 170 | AttrValue :: binary(), 171 | SubElement :: exml:element(), 172 | Default :: term(). 173 | subelement_with_attr(#xmlel{children = Children}, AttrName, AttrValue, Default) -> 174 | child_with_attr(Children, AttrName, AttrValue, Default). 175 | 176 | child_with_attr([], _, _, Default) -> 177 | Default; 178 | child_with_attr([#xmlel{} = Element | Rest], AttrName, AttrVal, Default) -> 179 | case attr(Element, AttrName) of 180 | AttrVal -> 181 | Element; 182 | _ -> 183 | child_with_attr(Rest, AttrName, AttrVal, Default) 184 | end; 185 | child_with_attr([_ | Rest], AttrName, AttrVal, Default) -> 186 | child_with_attr(Rest, AttrName, AttrVal, Default). 187 | 188 | %% @equiv path(Element, [{element_with_ns, Name, NS}]) 189 | -spec subelement_with_name_and_ns(exml:element(), binary(), binary()) -> 190 | exml:element() | undefined. 191 | subelement_with_name_and_ns(Element, Name, NS) -> 192 | subelement_with_name_and_ns(Element, Name, NS, undefined). 193 | 194 | %% @equiv path(Element, [{element_with_ns, Name, NS}], Default) 195 | -spec subelement_with_name_and_ns(exml:element(), binary(), binary(), Default) -> 196 | exml:element() | Default. 197 | subelement_with_name_and_ns(Element, Name, NS, Default) -> 198 | case subelements_with_name_and_ns(Element, Name, NS) of 199 | [] -> 200 | Default; 201 | [FirstElem | _] -> 202 | FirstElem 203 | end. 204 | 205 | %% @equiv paths(Element, [{element, Name}]) 206 | -spec subelements(exml:element(), binary()) -> [exml:element()]. 207 | subelements(#xmlel{children = Children}, Name) -> 208 | lists:filter(fun(#xmlel{name = N}) when N =:= Name -> 209 | true; 210 | (_) -> 211 | false 212 | end, Children). 213 | 214 | %% @equiv paths(Element, [{element_with_ns, NS}]) 215 | -spec subelements_with_ns(exml:element(), binary()) -> [exml:element()]. 216 | subelements_with_ns(#xmlel{children = Children}, NS) -> 217 | lists:filter(fun(#xmlel{} = Child) -> 218 | NS =:= attr(Child, <<"xmlns">>); 219 | (_) -> 220 | false 221 | end, Children). 222 | 223 | %% @equiv paths(Element, [{element_with_ns, Name, NS}]) 224 | -spec subelements_with_name_and_ns(exml:element(), binary(), binary()) -> [exml:element()]. 225 | subelements_with_name_and_ns(#xmlel{children = Children}, Name, NS) -> 226 | lists:filter(fun(#xmlel{name = SubName} = Child) -> 227 | SubName =:= Name andalso 228 | NS =:= attr(Child, <<"xmlns">>); 229 | (_) -> 230 | false 231 | end, Children). 232 | 233 | %% @equiv paths(Element, [{element_with_attr, AttrName, AttrValue}]) 234 | -spec subelements_with_attr(exml:element(), binary(), binary()) -> [exml:element()]. 235 | subelements_with_attr(#xmlel{children = Children}, AttrName, Value) -> 236 | lists:filter(fun(#xmlel{} = Child) -> 237 | Value =:= attr(Child, AttrName); 238 | (_) -> 239 | false 240 | end, Children). 241 | 242 | %% @equiv path(Element, [cdata]) 243 | -spec cdata(exml:element()) -> binary(). 244 | cdata(#xmlel{children = Children}) -> 245 | list_to_binary([C || #xmlcdata{content = C} <- Children]). 246 | 247 | %% @see attr/3 248 | %% @equiv path(Element, [{attr, Name}]) 249 | -spec attr(exml:element(), binary()) -> binary() | undefined. 250 | attr(Element, Name) -> 251 | attr(Element, Name, undefined). 252 | 253 | %% @equiv path(Element, [{attr, Name}], Default) 254 | -spec attr(exml:element(), binary(), Default) -> binary() | Default. 255 | attr(#xmlel{attrs = Attrs}, Name, Default) -> 256 | case maps:find(Name, Attrs) of 257 | {ok, Value} -> 258 | Value; 259 | error -> 260 | Default 261 | end. 262 | -------------------------------------------------------------------------------- /src/exml_stream.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @copyright (C) 2011-2021, Erlang Solutions Ltd. 3 | %%% @doc XML stream parser 4 | %%% @end 5 | %%% Created : 21 Jul 2011 by Michal Ptaszek 6 | %%%------------------------------------------------------------------- 7 | -module(exml_stream). 8 | 9 | -include("exml_stream.hrl"). 10 | 11 | -export([new_parser/0, 12 | new_parser/1, 13 | parse/2, 14 | reset_parser/1, 15 | free_parser/1]). 16 | 17 | -export_type([element/0, 18 | start/0, 19 | stop/0, 20 | parser/0, 21 | parser_opt/0]). 22 | 23 | -record(parser, { 24 | event_parser :: term(), 25 | buffer :: [binary()] 26 | }). 27 | 28 | -type start() :: #xmlstreamstart{}. 29 | %% `#xmlstreamstart{}' record. 30 | -type stop() :: #xmlstreamend{}. 31 | %% `#xmlstreamend{}' record. 32 | -type parser() :: #parser{}. 33 | %% `#parser{}' record. Keeps track of unparsed buffers. 34 | -type element() :: exml:element() | start() | stop(). 35 | %% One of `t:exml:element/0', `t:start/0', or `t:stop/0'. 36 | 37 | -type parser_opt() :: {infinite_stream, boolean()} | {max_element_size, non_neg_integer()}. 38 | %% Parser options 39 | %% 40 | %%
    41 | %%
  • `infinite_stream': No distinct `t:start/0' or `t:stop/0', only `#xmlel{}' will be returned.
  • 42 | %%
  • `max_element_size': Specifies maximum byte size of any parsed XML element. 43 | %% The only exception is the "stream start" element, 44 | %% for which only the size of the opening tag is limited.
  • 45 | %%
46 | 47 | %%%=================================================================== 48 | %%% Public API 49 | %%%=================================================================== 50 | 51 | %% @see new_parser/1 52 | -spec new_parser() -> {ok, parser()} | {error, any()}. 53 | new_parser() -> 54 | new_parser([]). 55 | 56 | %% @doc Creates a new parser. See `t:parser_opt/0' for configuration. 57 | -spec new_parser([parser_opt()]) -> {ok, parser()} | {error, any()}. 58 | new_parser(Opts)-> 59 | MaxElementSize = proplists:get_value(max_element_size, Opts, 0), 60 | InfiniteStream = proplists:get_value(infinite_stream, Opts, false), 61 | case exml_nif:create(MaxElementSize, InfiniteStream) of 62 | {ok, EventParser} -> 63 | {ok, #parser{event_parser = EventParser, buffer = []}}; 64 | Error -> 65 | Error 66 | end. 67 | 68 | %% @doc Makes a parser parse input. 69 | %% 70 | %% If successful, returns parsed elements and a new parser with updated buffers. 71 | -spec parse(parser(), binary()) -> 72 | {ok, parser(), [element()]} | {error, Reason :: binary()}. 73 | parse(Parser, Input) when is_binary(Input) -> 74 | #parser{event_parser = EventParser, buffer = OldBuf} = Parser, 75 | Buffer = OldBuf ++ [Input], 76 | case parse_all(EventParser, Buffer, []) of 77 | {ok, Elems, NewBuffer} -> 78 | {ok, Parser#parser{buffer = NewBuffer}, Elems}; 79 | Other -> 80 | Other 81 | end. 82 | 83 | %% @doc Resets the parser's buffers 84 | -spec reset_parser(parser()) -> {ok, parser()}. 85 | reset_parser(#parser{event_parser = NifParser} = Parser) -> 86 | exml_nif:reset_parser(NifParser), 87 | {ok, Parser#parser{buffer = []}}. 88 | 89 | %% @doc Free a parser 90 | %% 91 | %% Kept for backwards-compatibility, it is a no-op. 92 | -spec free_parser(parser()) -> ok. 93 | free_parser(#parser{}) -> 94 | ok. 95 | 96 | %%%=================================================================== 97 | %%% Helpers 98 | %%%=================================================================== 99 | 100 | parse_all(_Parser, [], Acc) -> 101 | {ok, lists:reverse(Acc), []}; 102 | parse_all(Parser, Buffer, Acc) -> 103 | Val = exml_nif:parse_next(Parser, Buffer), 104 | case Val of 105 | {ok, undefined, Offset} -> 106 | {ok, lists:reverse(Acc), drop_offset(Buffer, Offset)}; 107 | {ok, Element, Offset} -> 108 | parse_all(Parser, drop_offset(Buffer, Offset), [Element | Acc]); 109 | {error, _} = Error -> 110 | Error 111 | end. 112 | 113 | drop_offset(Buffer, 0) -> 114 | Buffer; 115 | drop_offset([Front | Rest], Offset) when byte_size(Front) =< Offset -> 116 | drop_offset(Rest, Offset - byte_size(Front)); 117 | drop_offset([Front | Rest], Offset) -> 118 | <<_:Offset/binary, Part/binary>> = Front, 119 | [Part | Rest]. 120 | -------------------------------------------------------------------------------- /test/exml_properties_tests.erl: -------------------------------------------------------------------------------- 1 | -module(exml_properties_tests). 2 | 3 | -include_lib("proper/include/proper.hrl"). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | -compile([export_all, nowarn_export_all]). 7 | 8 | p(Name, Property) -> 9 | ?assert(proper:quickcheck 10 | (proper:conjunction([{Name, Property}]), 11 | [100, long_result, {to_file, user}])). 12 | 13 | vector_1_forbidden_control_char_test() -> 14 | ?assertMatch({error, _}, exml:parse(<<"", 16#1B,"">>)). 15 | 16 | vector_2_forbidden_control_char_test() -> 17 | ?assertMatch({error, _}, exml:parse(<<"">>)). 18 | 19 | vector_3_forbidden_control_char_test() -> 20 | ?assertMatch({error, _}, exml:parse(<<"">>)). 21 | 22 | vector_4_forbidden_control_char_test() -> 23 | ?assertMatch({error, _}, 24 | exml:parse(<<"&lt;body&gt;", 16#1B,"&lt;/body&gt;">>)). 25 | 26 | fail_forbidden_control_char_test() -> 27 | p("All valid xml cdata can be parsed", 28 | ?FORALL(Doc, utf8_doc_bad(), 29 | not is_parseable(Doc))). 30 | 31 | parse_test() -> 32 | p("All valid xml cdata can be parsed", 33 | ?FORALL(Doc, utf8_doc(), 34 | is_parseable(Doc))). 35 | 36 | serialize_test() -> 37 | p("All valid xml cdata can be serialized", 38 | ?FORALL(Doc, utf8_doc(), 39 | is_binary(exml:to_binary(parse(Doc))))). 40 | 41 | inverse_test() -> 42 | p("exml:parse can parse the output of exml:to_binary", 43 | ?FORALL(Doc, utf8_doc(), 44 | ok == element(1, exml:parse(exml:to_binary(parse(Doc)))))). 45 | 46 | size_test() -> 47 | p("exml:size equals actual size of output xml string", 48 | ?FORALL(Doc, utf8_doc(), 49 | iolist_size(exml:to_binary(parse(Doc))) == exml:xml_size(parse(Doc)))). 50 | 51 | is_parseable(Doc) -> 52 | case exml:parse(Doc) of 53 | {ok, _} -> true; 54 | _ -> false 55 | end. 56 | 57 | parse(Doc) -> 58 | case exml:parse(Doc) of 59 | {ok, X} -> X; 60 | {error, E} -> throw(E) 61 | end. 62 | 63 | %% 64 | %% Generators 65 | %% 66 | 67 | utf8_doc() -> 68 | ?LET({{ElOpen,ElClose}, Cdata}, 69 | {xml_open_close(), xml_cdata()}, 70 | unicode:characters_to_binary(ElOpen ++ Cdata ++ ElClose)). 71 | 72 | utf8_doc_bad() -> 73 | ?LET({{ElOpen,ElClose}, Cdata}, 74 | {xml_open_close_maybe_bad(), utf8_text_bad()}, 75 | unicode:characters_to_binary(ElOpen ++ Cdata ++ ElClose)). 76 | 77 | xml_open_close() -> 78 | ?LET(TagName, tagname_text(), 79 | {lists:flatten("<" ++ TagName ++ ">"), 80 | lists:flatten("")}). 81 | 82 | xml_open_close_maybe_bad() -> 83 | ?LET(TagName, tagname_text_maybe_bad(), 84 | {lists:flatten("<" ++ TagName ++ ">"), 85 | lists:flatten("")}). 86 | 87 | tagname_text() -> 88 | non_empty(list(choose($a, $z))). 89 | 90 | tagname_text_maybe_bad() -> 91 | non_empty(list(oneof([$a, $z, xml_c0_forbidden_control()]))). 92 | 93 | %% see: https://en.wikipedia.org/wiki/Valid_characters_in_XML#XML_1.0 94 | utf8_char() -> 95 | oneof([xml_escaped_entity(), 96 | xml_c0_control(), 97 | xml_utf8_bmp_char()]). 98 | 99 | xml_c0_control() -> 100 | elements([16#0009, 16#000A, 16#000D]). 101 | 102 | xml_c0_forbidden_control() -> 103 | elements([16#0000, 16#0001, 16#0002, 16#0003, 16#0004, 16#0005, 16#0006, 16#0007, 104 | 16#0008, 16#000B, 16#000C, 16#000E, 16#000F, 105 | 16#0010, 16#0011, 16#0012, 16#0013, 16#0014, 16#0015, 16#0016, 16#0017, 106 | 16#0018, 16#0019, 16#001A, 16#001B, 16#001C, 16#001D, 16#001E, 16#001F]). 107 | 108 | utf8_text_bad() -> 109 | non_empty(list(xml_c0_forbidden_control())). 110 | 111 | xml_utf8_bmp_char() -> 112 | ?SUCHTHAT(C, oneof([choose(16#0020,16#D7FF), 113 | choose(16#E000, 16#FFFD)]), 114 | not lists:member(C, [$<,$>,$&])). 115 | 116 | xml_escaped_entity() -> 117 | oneof(["&", "<", ">"]). 118 | 119 | utf8_text() -> 120 | non_empty(list(utf8_char())). 121 | 122 | xml_cdata() -> 123 | utf8_text(). 124 | -------------------------------------------------------------------------------- /test/exml_query_tests.erl: -------------------------------------------------------------------------------- 1 | %%%------------------------------------------------------------------- 2 | %%% @author Michal Ptaszek 3 | %%% @copyright (C) 2011, Erlang Solutions Ltd. 4 | %%% @doc Unit tests for exml_query module 5 | %%% @end 6 | %%%------------------------------------------------------------------- 7 | 8 | -module(exml_query_tests). 9 | 10 | -include_lib("eunit/include/eunit.hrl"). 11 | -include("exml.hrl"). 12 | 13 | -compile([export_all, nowarn_export_all]). 14 | 15 | -define(MY_SPOON, xml(<<"", 16 | "is too big", 17 | "is too big", 18 | "is too big", 19 | "">>)). 20 | -define (HTML, xml(<<" 21 |
  • 22 |
      My spoon is too 23 | big
    24 |
      My spoon is too 25 | big
    26 |
      My spoon is too 27 | big
    28 |
  • 29 | ">>)). 30 | 31 | %%-------------------------------------------------------------------- 32 | %% tests 33 | %%-------------------------------------------------------------------- 34 | 35 | element_query_test() -> 36 | %% we return only the first (leftmost) match 37 | ?assertEqual(xml(<<"is too big">>), 38 | exml_query:subelement(?MY_SPOON, <<"problem">>)), 39 | ?assertEqual(xml(<<"is too big">>), 40 | exml_query:path(?MY_SPOON, [{element, <<"problem">>}])). 41 | 42 | elements_query_test() -> 43 | Exemplar = [xml(<<"is too big">>), 44 | xml(<<"is too big">>), 45 | xml(<<"is too big">>)], 46 | ?assertEqual(Exemplar, exml_query:subelements(?MY_SPOON, <<"problem">>)). 47 | 48 | element_with_ns_query_test() -> 49 | ?assertEqual(xml(<<"">>), 51 | exml_query:subelement_with_ns(chat_marker(), 52 | <<"urn:xmpp:chat-markers:0">>)), 53 | 54 | ?assertEqual(xml(<<"">>), 56 | exml_query:path(chat_marker(), 57 | [{element_with_ns, <<"urn:xmpp:chat-markers:0">>}])). 58 | 59 | no_element_with_ns_query_test() -> 60 | ?assertEqual(none, 61 | exml_query:subelement_with_ns(chat_marker(), 62 | <<"wrong">>, none)). 63 | 64 | element_with_attr_query_test() -> 65 | ?assertEqual(xml(<<"">>), 67 | exml_query:subelement_with_attr(chat_marker(), <<"xmlns">>, 68 | <<"urn:xmpp:chat-markers:0">>)), 69 | 70 | ?assertEqual(xml(<<"">>), 72 | exml_query:path(chat_marker(), [{element_with_attr, <<"xmlns">>, 73 | <<"urn:xmpp:chat-markers:0">>}])). 74 | element_with_attr_query_returns_first_match_test() -> 75 | ?assertEqual(xml(<<"">>), 77 | exml_query:subelement_with_attr(chat_markers(), <<"xmlns">>, 78 | <<"urn:xmpp:chat-markers:0">>)), 79 | 80 | ?assertEqual(xml(<<"">>), 82 | exml_query:path(chat_markers(), [{element_with_attr, <<"xmlns">>, 83 | <<"urn:xmpp:chat-markers:0">>}])). 84 | 85 | element_with_attr_query_returns_first_match_by_id_test() -> 86 | ?assertEqual(xml(<<"">>), 88 | exml_query:subelement_with_attr(chat_markers(), <<"id">>, 89 | <<"0e300615-7a77-4b5e-91c5-52d8c44149cf">>)), 90 | 91 | ?assertEqual(xml(<<"">>), 93 | exml_query:path(chat_markers(), [{element_with_attr, <<"id">>, 94 | <<"0e300615-7a77-4b5e-91c5-52d8c44149cf">>}])). 95 | 96 | no_element_with_attr_query_test() -> 97 | ?assertEqual(none, 98 | exml_query:subelement_with_attr(chat_marker(), 99 | <<"xmlns">>, <<"wrong">>, none)). 100 | elements_with_ns_query_test() -> 101 | ValidResult = [ 102 | xml(<<"">>), 104 | xml(<<"">>) 106 | ], 107 | ?assertEqual(ValidResult, exml_query:subelements_with_ns(chat_markers(), 108 | <<"urn:xmpp:chat-markers:0">>)), 109 | ?assertEqual(ValidResult, exml_query:paths(chat_markers(), 110 | [{element_with_ns, <<"urn:xmpp:chat-markers:0">>}])). 111 | 112 | elements_with_attr_query_test() -> 113 | ValidResult = [ 114 | xml(<<"">>), 116 | xml(<<"">>) 118 | ], 119 | ?assertEqual(ValidResult, exml_query:subelements_with_attr(chat_markers(), <<"xmlns">>, 120 | <<"urn:xmpp:chat-markers:0">>)), 121 | ?assertEqual(ValidResult, exml_query:paths(chat_markers(), 122 | [{element_with_attr, <<"xmlns">>, <<"urn:xmpp:chat-markers:0">>}])). 123 | 124 | element_with_name_and_ns_query_test() -> 125 | ValidResult = xml(<<"">>), 127 | ?assertEqual(ValidResult, exml_query:subelement_with_name_and_ns(chat_markers(), 128 | <<"displayed">>, 129 | <<"urn:xmpp:chat-markers:0">>)), 130 | ?assertEqual(ValidResult, exml_query:path(chat_markers(), 131 | [{element_with_ns, <<"displayed">>, 132 | <<"urn:xmpp:chat-markers:0">>}])). 133 | 134 | element_with_name_and_ns_two_names_only_one_ns_query_test() -> 135 | Elem1 = #xmlel{name = <<"a">>, attrs = #{<<"xmlns">> => <<"ns1">>}}, 136 | Elem2 = #xmlel{name = <<"a">>, attrs = #{<<"xmlns">> => <<"ns2">>}}, 137 | Xml = #xmlel{name = <<"element">>, children = [Elem1, Elem2]}, 138 | ?assertEqual(Elem2, exml_query:subelement_with_name_and_ns(Xml, <<"a">>, <<"ns2">>)), 139 | ?assertEqual(Elem2, exml_query:path(Xml, [{element_with_ns, <<"a">>, <<"ns2">>}])). 140 | 141 | no_element_with_name_and_ns_query_test() -> 142 | ?assertEqual(none, 143 | exml_query:subelement_with_name_and_ns(chat_marker(), 144 | <<"wrong">>, <<"urn:xmpp:chat-markers:0">>, 145 | none)), 146 | ?assertEqual(none, 147 | exml_query:subelement_with_name_and_ns(chat_marker(), 148 | <<"received">>, <<"wrong:xmpp:chat-markers:0">>, 149 | none)). 150 | elements_with_name_and_ns_query_test() -> 151 | ValidResult = [ 152 | xml(<<"">>), 154 | xml(<<"">>) 156 | ], 157 | ?assertEqual(ValidResult, exml_query:subelements_with_name_and_ns(items_with_ns(), <<"item">>, 158 | <<"urn:xmpp:chat-markers:0">>)), 159 | ?assertEqual(ValidResult, exml_query:paths(items_with_ns(), 160 | [{element_with_ns, <<"item">>, 161 | <<"urn:xmpp:chat-markers:0">>}])). 162 | chat_marker() -> 163 | Stanza = 164 | <<" 166 | 168 | ">>, 169 | xml(Stanza). 170 | 171 | %% There shouldn't be more than one chat marker in single message 172 | %% but hey, it's a test to verify a function, right? 173 | chat_markers() -> 174 | Stanza = 175 | <<" 177 | 179 | 181 | ">>, 182 | xml(Stanza). 183 | 184 | items_with_ns() -> 185 | Stanza = 186 | <<" 188 | 190 | 192 | 194 | 196 | ">>, 197 | xml(Stanza). 198 | 199 | attribute_query_test() -> 200 | ?assertEqual(<<"my">>, exml_query:attr(?MY_SPOON, <<"whose">>)), 201 | ?assertEqual(<<"my">>, exml_query:path(?MY_SPOON, [{attr, <<"whose">>}])), 202 | ?assertEqual(undefined, exml_query:attr(?MY_SPOON, <<"banana">>)), 203 | ?assertEqual('IAmA', exml_query:attr(?MY_SPOON, <<"banana">>, 'IAmA')). 204 | 205 | cdata_query_test() -> 206 | ?assertEqual(<<"">>, exml_query:cdata(?MY_SPOON)), 207 | ?assertEqual(<<"">>, exml_query:path(?MY_SPOON, [cdata])), 208 | IAmA = xml(<<"a banana">>), 209 | ?assertEqual(<<"a banana">>, exml_query:cdata(IAmA)), 210 | ?assertEqual(<<"a banana">>, exml_query:path(IAmA, [cdata])). 211 | 212 | path_query_test() -> 213 | ?assertEqual(?MY_SPOON, exml_query:path(?MY_SPOON, [])), 214 | ?assertEqual(<<"is too big">>, 215 | exml_query:path(?MY_SPOON, [{element, <<"problem">>}, cdata])), 216 | ?assertEqual(<<"1">>, 217 | exml_query:path(?MY_SPOON, [{element, <<"problem">>}, 218 | {attr, <<"no">>}])), 219 | ?assertEqual(<<"3">>, 220 | exml_query:path(?MY_SPOON, [{element_with_ns, <<"urn:accidents">>}, 221 | {attr, <<"no">>}])), 222 | 223 | Msg = #xmlel{name = <<"message">>, children = [#xmlcdata{content = <<"x">>}]}, 224 | ?assertEqual(undefined, 225 | exml_query:path(Msg, [{element_with_ns, <<"x">>, <<"urn:wrong_ns">>}])), 226 | 227 | %% I couldn't find anything complex enough in that silly cartoon :[ 228 | Qux = xml(<<"qux">>), 229 | ?assertEqual(<<"qux">>, exml_query:path(Qux, [{element, <<"bar">>}, 230 | {element, <<"baz">>}, 231 | cdata])), 232 | ?assertEqual(<<"b">>, exml_query:path(Qux, [{element, <<"bar">>}, 233 | {element, <<"baz">>}, 234 | {attr, <<"a">>}])). 235 | 236 | failed_path_query_test() -> 237 | ?assertEqual(undefined, exml_query:path(?MY_SPOON, 238 | [{element, <<"banana">>}])), 239 | ?assertEqual('IAmA', exml_query:path(?MY_SPOON, 240 | [{element, <<"banana">>}], 241 | 'IAmA')). 242 | 243 | paths_query_test() -> 244 | ?assertEqual([?MY_SPOON], exml_query:paths(?MY_SPOON, [])), 245 | ?assertEqual([<<"is too big">>, <<"is too big">>, <<"is too big">>], 246 | exml_query:paths(?MY_SPOON, [{element, <<"problem">>}, 247 | cdata])), 248 | ?assertEqual([<<"1">>, <<"2">>, <<"3">>], 249 | exml_query:paths(?MY_SPOON, [{element, <<"problem">>}, 250 | {attr, <<"no">>}])), 251 | ?assertEqual([<<"1">>, <<"2">>], 252 | exml_query:paths(?MY_SPOON, [{element_with_ns, <<"urn:issues">>}, 253 | {attr, <<"no">>}])), 254 | ?assertEqual([], exml_query:paths(?MY_SPOON, [{element, <<"banana">>}])), 255 | ?assertEqual([<<"My">>, <<"spoon">>, <<"is">>], 256 | exml_query:paths(?HTML, [{element, <<"li">>}, 257 | {element, <<"ul">>}, 258 | {element, <<"i">>}, 259 | cdata])), 260 | ?assertEqual([<<"size">>, <<"size">>, <<"size">>], 261 | exml_query:paths(?HTML, [{element, <<"li">>}, 262 | {element, <<"ul">>}, 263 | {element, <<"span">>}, 264 | {attr, <<"class">>}])), 265 | ?assertError(invalid_path, exml_query:paths(?HTML, [{attr, <<"li">>}, cdata])). 266 | 267 | %%-------------------------------------------------------------------- 268 | %% helpers 269 | %%-------------------------------------------------------------------- 270 | 271 | xml(Raw) -> 272 | {ok, Tree} = exml:parse(Raw), 273 | Tree. 274 | -------------------------------------------------------------------------------- /test/exml_stream_tests.erl: -------------------------------------------------------------------------------- 1 | -module(exml_stream_tests). 2 | 3 | -include("exml_stream.hrl"). 4 | -include_lib("eunit/include/eunit.hrl"). 5 | 6 | -compile([export_all, nowarn_export_all]). 7 | 8 | basic_parse_test() -> 9 | {ok, Parser0} = exml_stream:new_parser(), 10 | {ok, Parser1, Empty0} = 11 | exml_stream:parse(Parser0, <<">), 12 | ?assertEqual([], Empty0), 13 | {ok, Parser2, StreamStart} = 14 | exml_stream:parse(Parser1, <<" to='i.am.banana.com' xml:lang='en'>>), 15 | ?assertEqual( 16 | [#xmlstreamstart{name = <<"stream:stream">>, 17 | attrs = #{<<"xmlns:stream">> => <<"http://etherx.jabber.org/streams">>, 18 | <<"version">> => <<"1.0">>, 19 | <<"to">> => <<"i.am.banana.com">>, 20 | <<"xml:lang">> => <<"en">>}}], 21 | StreamStart), 22 | {ok, Parser3, Auth} = exml_stream:parse(Parser2, <<" mechanism='DIGEST-MD5'/>">>), 23 | ?assertEqual( 24 | [#xmlel{name = <<"auth">>, attrs = #{<<"mechanism">> => <<"DIGEST-MD5">>}}], 25 | Auth), 26 | {ok, Parser4, Empty1} = exml_stream:parse(Parser3, <<">), 27 | ?assertEqual([], Empty1), 28 | {ok, Parser5, Empty2} = exml_stream:parse(Parser4, <<"/>This is ">>), 29 | ?assertEqual([], Empty2), 30 | {ok, _Parser6, Features} = exml_stream:parse(Parser5, <<"some CData">>), 31 | ?assertMatch( 32 | [#xmlel{name = <<"stream:features">>, 33 | children = [#xmlel{name = <<"bind">>, 34 | attrs = #{<<"xmlns">> := <<"some_ns">>}}, 35 | #xmlel{name = <<"session">>, 36 | attrs = #{<<"xmlns">> := <<"some_other">>}}, 37 | _CData]}], 38 | Features), 39 | [#xmlel{children=[_, _, CData]}] = Features, 40 | ?assertEqual(#xmlcdata{content = <<"This is some CData">>}, CData). 41 | 42 | parser_errors_test() -> 43 | ?assertMatch({error, _}, exml:parse(<<"">>)), 44 | %% it is the special case, because we are wrapping binary in the following way 45 | %% Stream = <<"", XML/binary, "">>, 46 | %% to make it a non-blocking call(?) 47 | ?assertMatch({error, _}, exml:parse(<<"">>)). 48 | 49 | -define(BANANA_STREAM, <<"I am a banana!">>). 50 | -define(assertIsBanana(Elements), (fun() -> % fun instead of begin/end because we bind CData in unhygenic macro 51 | ?assertMatch([#xmlstreamstart{name = <<"stream:stream">>, 52 | attrs = #{<<"xmlns:stream">> := <<"something">>}}, 53 | #xmlel{name = <<"foo">>, 54 | attrs = #{<<"attr">> := <<"bar">>}, 55 | children = [_CData, #xmlel{name = <<"baz">>}]}, 56 | #xmlstreamend{name = <<"stream:stream">>}], 57 | Elements), 58 | [_, #xmlel{children=[CData|_]}|_] = Elements, 59 | ?assertEqual(#xmlcdata{content = <<"I am a banana!">>}, CData), 60 | Elements 61 | end)()). 62 | 63 | conv_test() -> 64 | AssertParses = fun(Input) -> 65 | {ok, Parser0} = exml_stream:new_parser(), 66 | {ok, _Parser1, Elements} = exml_stream:parse(Parser0, Input), 67 | ?assertIsBanana(Elements) 68 | end, 69 | Elements = AssertParses(?BANANA_STREAM), 70 | AssertParses(exml:to_binary(Elements)), 71 | AssertParses(list_to_binary(exml:to_list(Elements))), 72 | AssertParses(exml:to_iolist(Elements)), 73 | AssertParses(iolist_to_binary(re:replace(exml:to_pretty_iolist(Elements), "\n\\s*", "", [global]))). 74 | 75 | stream_reopen_test() -> 76 | {ok, Parser0} = exml_stream:new_parser(), 77 | {ok, Parser1, Elements1} = exml_stream:parse(Parser0, ?BANANA_STREAM), 78 | ?assertIsBanana(Elements1), 79 | {ok, Parser2} = exml_stream:reset_parser(Parser1), 80 | {ok, _Parser3, Elements2} = exml_stream:parse(Parser2, ?BANANA_STREAM), 81 | ?assertIsBanana(Elements2). 82 | 83 | infinit_framed_stream_test() -> 84 | {ok, Parser0} = exml_stream:new_parser([{infinite_stream, true}, 85 | {autoreset, true}]), 86 | Els = [#xmlel{name = <<"open">>, 87 | attrs = #{<<"xmlns">> => <<"urn:ietf:params:xml:ns:xmpp-framing">>, 88 | <<"to">> => <<"example.com">>, 89 | <<"version">> => <<"1.0">>}}, 90 | #xmlel{name = <<"foo">>}, 91 | #xmlel{name = <<"message">>, 92 | attrs = #{<<"to">> => <<"ala@example.com">>}, 93 | children = [#xmlel{name = <<"body">>, 94 | children = [#xmlcdata{content = <<"Hi, How Are You?">>}]}]} 95 | ], 96 | lists:foldl(fun(#xmlel{name = Name} = Elem, Parser) -> 97 | Bin = exml:to_binary(Elem), 98 | {ok, Parser1, [Element]} = exml_stream:parse(Parser, Bin), %% matches to one element list 99 | #xmlel{ name = Name} = Element, %% checks if returned is xmlel of given name 100 | Parser1 101 | end, Parser0, Els). 102 | 103 | parse_error_test() -> 104 | {ok, Parser0} = exml_stream:new_parser(), 105 | Input = <<"top-level non-tag">>, 106 | ?assertMatch({error, _}, exml_stream:parse(Parser0, Input)). 107 | 108 | assert_parses_escape_cdata(Text) -> 109 | Tag = #xmlel{name = <<"tag">>, children=[#xmlcdata{content = Text}]}, 110 | Stream = [#xmlstreamstart{name = <<"s">>}, Tag, #xmlstreamend{name = <<"s">>}], 111 | {ok, Parser0} = exml_stream:new_parser(), 112 | {ok, _Parser1, Elements} = exml_stream:parse(Parser0, exml:to_binary(Stream)), 113 | ?assertMatch([#xmlstreamstart{name = <<"s">>}, 114 | #xmlel{name = <<"tag">>, children=[_CData]}, 115 | #xmlstreamend{name = <<"s">>}], 116 | Elements), 117 | [_, #xmlel{children=[CData]}, _] = Elements, 118 | ?assertEqual(#xmlcdata{content = Text}, CData). 119 | 120 | reset_parser_error_test() -> 121 | {ok, _P} = exml_stream:new_parser(), 122 | BadParser = {parser, foo, bar, baz}, 123 | ?assertError(function_clause, exml_stream:reset_parser(BadParser)). 124 | 125 | cdata_is_an_error_when_first_child_of_stream_test() -> 126 | {ok, P} = exml_stream:new_parser(), 127 | ?assertMatch({error, _}, exml_stream:parse(P, <<"hello">>)). 128 | 129 | multiple_cdata_are_joined_test() -> 130 | {ok, P} = exml_stream:new_parser(), 131 | {ok, P1, _} = exml_stream:parse(P, <<"
    ">>), 132 | {ok, P2, E1} = exml_stream:parse(P1, <<", world">>), 133 | {ok, _, _} = exml_stream:parse(P2, <<"">>), 134 | #xmlel{children=[CData]} = hd(E1), 135 | ?assertEqual(#xmlcdata{content = <<"hello, world">>}, CData). 136 | 137 | cdata_test() -> 138 | assert_parses_escape_cdata(<<"I am a banana!">>), 139 | assert_parses_escape_cdata(<<"]:-> ]]> >">>), 140 | assert_parses_escape_cdata(<<">>), 141 | assert_parses_escape_cdata(<<"