├── .gitignore ├── data └── test1.xml ├── README.md ├── CMakeLists.txt ├── appveyor.yml ├── LICENSE.md ├── doc └── XML.md ├── include └── Cats │ ├── Textcat.hpp │ └── Textcat │ ├── XML.hpp │ └── XML │ ├── Handler.hpp │ ├── Serializer.hpp │ ├── Document.hpp │ └── Parser.hpp ├── example ├── XML_DOMWriter │ └── XML_DOMWriter.cpp ├── XML_SAXWriter │ └── XML_SAXWriter.cpp ├── XML_DOMReader │ └── XML_DOMReader.cpp └── XML_SAXReader │ └── XML_SAXReader.cpp └── .travis.yml /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /include/Cats/Corecat 3 | -------------------------------------------------------------------------------- /data/test1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | The quick brown fox jumps over the lazy dog. 10 | I can eat glass, it doesn't hurt me. 11 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Textcat: Text data processing library 2 | 3 | Textcat is a *The Cats Project* library for processing text data. 4 | 5 | CI | Status 6 | :---: | :---: 7 | AppVeyor | [![AppYeyor](https://ci.appveyor.com/api/projects/status/github/SuperSodaSea/Textcat?branch=master&svg=true)](https://ci.appveyor.com/project/SuperSodaSea/Textcat) 8 | Travis CI | [![Travis CI](https://travis-ci.org/SuperSodaSea/Textcat.svg?branch=master)](https://travis-ci.org/SuperSodaSea/Textcat) 9 | 10 | Currently supported format: 11 | 12 | + [XML](/doc/XML.md) 13 | 14 | 15 | ## License 16 | 17 | [MIT License](/LICENSE.md) 18 | 19 | 20 | ## Requirement 21 | 22 | + C++14 23 | + [Corecat][Corecat] 24 | 25 | 26 | [Corecat]: https://github.com/SuperSodaSea/Corecat 27 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1) 2 | 3 | project(Textcat) 4 | 5 | set(CMAKE_CXX_STANDARD 14) 6 | 7 | if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O2") 9 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-s") 10 | if(WIN32) 11 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") 12 | endif() 13 | elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") 14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_SCL_SECURE_NO_WARNINGS /EHsc /W3 /O2 /MT") 15 | endif() 16 | 17 | include_directories("include") 18 | 19 | set(EXAMPLE 20 | XML_SAXReader 21 | XML_SAXWriter 22 | XML_DOMReader 23 | XML_DOMWriter) 24 | 25 | foreach(example ${EXAMPLE}) 26 | add_executable(${example} example/${example}/${example}.cpp) 27 | endforeach() 28 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | os: Visual Studio 2017 2 | version: 0.0.0.{build} 3 | 4 | configuration: 5 | - Debug 6 | - Release 7 | 8 | environment: 9 | matrix: 10 | - VS_VERSION: 15 2017 11 | VS_PLATFORM: win32 12 | - VS_VERSION: 15 2017 13 | VS_PLATFORM: x64 14 | 15 | before_build: 16 | - cd .. 17 | - git clone -q --branch=master https://github.com/SuperSodaSea/Corecat.git 18 | - cd Corecat 19 | - git checkout -fq master 20 | - xcopy include ..\Textcat\include /E /I /Q 21 | - cd ..\Textcat 22 | - cmake -H. -Bbuild -G "Visual Studio %VS_VERSION%" -DCMAKE_GENERATOR_PLATFORM=%VS_PLATFORM% 23 | 24 | build: 25 | project: build/Textcat.sln 26 | parallel: true 27 | verbosity: minimal 28 | 29 | test_script: 30 | - build\%CONFIGURATION%\XML_DOMReader.exe data\test1.xml 31 | - build\%CONFIGURATION%\XML_DOMWriter.exe 32 | - build\%CONFIGURATION%\XML_SAXReader.exe data\test1.xml 33 | - build\%CONFIGURATION%\XML_SAXWriter.exe 34 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2016-2018 The Cats Project 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /doc/XML.md: -------------------------------------------------------------------------------- 1 | # Textcat::XML 2 | 3 | Textcat::XML contains the XML parser and serializer. 4 | 5 | 6 | ## About XML 7 | 8 | XML stands for "Extensible Markup Language". 9 | 10 | Specification for XML: 11 | 12 | * [Extensible Markup Language (XML) 1.0](https://www.w3.org/TR/xml/) 13 | * [Extensible Markup Language (XML) 1.1](https://www.w3.org/TR/xml11/) 14 | 15 | Specification for DOM: 16 | 17 | * [Document Object Model Level 1](https://www.w3.org/TR/REC-DOM-Level-1/) 18 | 19 | ## Features 20 | 21 | * **Easy to use**. Textcat::XML provides both SAX and DOM style API. 22 | * **High-performance**. Textcat::XML learned from RapidXml and RapidJSON, which are probably the fastest choices for XML and JSON. Under the same condition, it is sometimes even faster than RapidXml. 23 | * **Header-only**. Textcat::XML is lightweight, and only require [Corecat][Corecat], which is the core of *The Cats Project* and is also header-only. 24 | 25 | 26 | ## Start in a minute 27 | 28 | ```cpp 29 | #include 30 | 31 | #include "Cats/Textcat/XML.hpp" 32 | 33 | using namespace Cats::Textcat::XML; 34 | 35 | int main() { 36 | 37 | char data[] = R"()"; 38 | 39 | XMLDocument document; 40 | document.parse<>(data); 41 | std::cout << document << std::endl; 42 | 43 | return 0; 44 | 45 | } 46 | ``` 47 | -------------------------------------------------------------------------------- /include/Cats/Textcat.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #ifndef CATS_TEXTCAT_HPP 28 | #define CATS_TEXTCAT_HPP 29 | 30 | 31 | #include "Textcat/XML.hpp" 32 | 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /include/Cats/Textcat/XML.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #ifndef CATS_TEXTCAT_XML_HPP 28 | #define CATS_TEXTCAT_XML_HPP 29 | 30 | 31 | #include "XML/Document.hpp" 32 | #include "XML/Handler.hpp" 33 | #include "XML/Parser.hpp" 34 | #include "XML/Serializer.hpp" 35 | 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /example/XML_DOMWriter/XML_DOMWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #include 28 | 29 | #include "Cats/Textcat/XML.hpp" 30 | 31 | using namespace Cats::Textcat; 32 | 33 | int main() { 34 | 35 | XMLDocument document; 36 | XMLElement list("list"); document.appendChild(list); 37 | XMLElement person("person"); list.appendChild(person); 38 | XMLAttribute name("name", "SuperSodaSea"); person.appendAttribute(name); 39 | XMLAttribute gender("gender", "male"); person.appendAttribute(gender); 40 | XMLAttribute age("age", "17"); person.appendAttribute(age); 41 | std::cout << document << std::endl; 42 | 43 | return 0; 44 | 45 | } 46 | -------------------------------------------------------------------------------- /example/XML_SAXWriter/XML_SAXWriter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #include 28 | 29 | #include "Cats/Corecat/Data/Stream.hpp" 30 | #include "Cats/Textcat/XML.hpp" 31 | 32 | using namespace Cats::Corecat; 33 | using namespace Cats::Textcat; 34 | 35 | int main() { 36 | 37 | auto wrapper = createWrapperOutputStream(std::cout); 38 | XMLSerializer s(wrapper); 39 | s.startDocument(); 40 | s.startElement("list"); 41 | s.endAttributes(false); 42 | s.startElement("person"); 43 | s.attribute("name", "SuperSodaSea"); 44 | s.attribute("gender", "male"); 45 | s.attribute("age", "17"); 46 | s.endAttributes(true); 47 | s.endElement("list"); 48 | s.endDocument(); 49 | std::cout << std::endl; 50 | 51 | return 0; 52 | 53 | } 54 | -------------------------------------------------------------------------------- /include/Cats/Textcat/XML/Handler.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #ifndef CATS_TEXTCAT_XML_HANDLER_HPP 28 | #define CATS_TEXTCAT_XML_HANDLER_HPP 29 | 30 | 31 | #include "Cats/Corecat/Text/String.hpp" 32 | 33 | 34 | namespace Cats { 35 | namespace Textcat{ 36 | inline namespace XML { 37 | 38 | class XMLHandlerBase { 39 | 40 | protected: 41 | 42 | using StringView8 = Corecat::StringView8; 43 | 44 | public: 45 | 46 | void startDocument() {} 47 | void endDocument() {} 48 | void startElement(StringView8 /*name*/) {} 49 | void endElement(StringView8 /*name*/) {} 50 | void endAttributes(bool /*empty*/) {} 51 | void doctype() {} 52 | void attribute(StringView8 /*name*/, StringView8 /*value*/) {} 53 | void text(StringView8 /*value*/) {} 54 | void cdata(StringView8 /*value*/) {} 55 | void comment(StringView8 /*value*/) {} 56 | void processingInstruction(StringView8 /*name*/, StringView8 /*value*/) {} 57 | 58 | }; 59 | 60 | } 61 | } 62 | } 63 | 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /example/XML_DOMReader/XML_DOMReader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include "Cats/Textcat/XML.hpp" 32 | 33 | using namespace Cats::Corecat; 34 | using namespace Cats::Textcat; 35 | 36 | std::vector readFile(const char* path) { 37 | 38 | std::ifstream is(path, std::ios::binary); 39 | if(!is) throw IOException("Cannot read file"); 40 | is.seekg(0, std::ios::end); 41 | std::size_t size = static_cast(is.tellg()); 42 | is.seekg(0); 43 | std::vector data(size + 1); 44 | is.read(data.data(), size); 45 | data[size] = 0; 46 | return data; 47 | 48 | } 49 | 50 | int main(int argc, char** argv) { 51 | 52 | try { 53 | 54 | if(argc < 2) throw InvalidArgumentException("File name needed"); 55 | 56 | for(int i = 1; i < argc; ++i) { 57 | 58 | auto data = readFile(argv[i]); 59 | XMLDocument document; 60 | document.parse<>(data.data()); 61 | std::cout << document << std::endl; 62 | 63 | } 64 | 65 | } catch(std::exception& e) { std::cerr << e.what() << std::endl; return 1; } 66 | 67 | return 0; 68 | 69 | } 70 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | sudo: required 4 | 5 | matrix: 6 | include: 7 | - os: linux 8 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m32" CONFIG=Debug 9 | addons: 10 | apt: 11 | sources: ['ubuntu-toolchain-r-test'] 12 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev'] 13 | - os: linux 14 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m32" CONFIG=Release 15 | addons: 16 | apt: 17 | sources: ['ubuntu-toolchain-r-test'] 18 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev'] 19 | - os: linux 20 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m64" CONFIG=Debug 21 | addons: 22 | apt: 23 | sources: ['ubuntu-toolchain-r-test'] 24 | packages: ['g++-5'] 25 | - os: linux 26 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m64" CONFIG=Release 27 | addons: 28 | apt: 29 | sources: ['ubuntu-toolchain-r-test'] 30 | packages: ['g++-5'] 31 | - os: linux 32 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Debug 33 | addons: 34 | apt: 35 | sources: ['ubuntu-toolchain-r-test'] 36 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev'] 37 | - os: linux 38 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Release 39 | addons: 40 | apt: 41 | sources: ['ubuntu-toolchain-r-test'] 42 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev'] 43 | - os: linux 44 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Debug 45 | addons: 46 | apt: 47 | sources: ['ubuntu-toolchain-r-test'] 48 | packages: ['g++-5'] 49 | - os: linux 50 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Release 51 | addons: 52 | apt: 53 | sources: ['ubuntu-toolchain-r-test'] 54 | packages: ['g++-5'] 55 | - os: osx 56 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Debug 57 | - os: osx 58 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Release 59 | - os: osx 60 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Debug 61 | - os: osx 62 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Release 63 | 64 | before_script: 65 | - if [ "$OS" == "Linux" ]; then sudo ln -s /usr/include/asm-generic /usr/include/asm; fi 66 | - eval "${EVAL}" 67 | - $CXX -v 68 | - cd .. 69 | - git clone -q --branch=master https://github.com/SuperSodaSea/Corecat.git 70 | - cd Corecat 71 | - git checkout -fq master 72 | - cp -r include ../Textcat 73 | - cd ../Textcat 74 | - cmake -H. -Bbuild -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_CXX_FLAGS=$FLAGS -DCMAKE_BUILD_TYPE=$CONFIG 75 | 76 | script: 77 | - make -C build 78 | 79 | after_success: 80 | - ./build/XML_DOMReader data/test1.xml 81 | - ./build/XML_DOMWriter 82 | - ./build/XML_SAXReader data/test1.xml 83 | - ./build/XML_SAXWriter 84 | -------------------------------------------------------------------------------- /include/Cats/Textcat/XML/Serializer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #ifndef CATS_TEXTCAT_XML_SERIALIZER_HPP 28 | #define CATS_TEXTCAT_XML_SERIALIZER_HPP 29 | 30 | 31 | #include 32 | #include 33 | 34 | #include 35 | 36 | #include "Cats/Corecat/Data/Stream/OutputStream.hpp" 37 | #include "Cats/Corecat/Text/String.hpp" 38 | 39 | #include "Handler.hpp" 40 | 41 | 42 | namespace Cats { 43 | namespace Textcat{ 44 | inline namespace XML { 45 | 46 | class XMLSerializer : public XMLHandlerBase { 47 | 48 | private: 49 | 50 | template 51 | using OutputStream = Corecat::OutputStream; 52 | using StringView8 = Corecat::StringView8; 53 | 54 | private: 55 | 56 | OutputStream* stream; 57 | 58 | public: 59 | 60 | XMLSerializer(OutputStream& stream_) : stream(&stream_) {} 61 | 62 | void startDocument() {} 63 | void endDocument() {} 64 | void startElement(StringView8 name) { 65 | 66 | stream->writeAll("<", 1); 67 | stream->writeAll(name.getData(), name.getLength()); 68 | 69 | } 70 | void endElement(StringView8 name) { 71 | 72 | stream->writeAll("writeAll(name.getData(), name.getLength()); 74 | stream->writeAll(">", 1); 75 | 76 | } 77 | void endAttributes(bool empty) { 78 | 79 | if(empty) stream->writeAll("/>", 2); 80 | else stream->writeAll(">", 1); 81 | 82 | } 83 | void doctype() {} 84 | void attribute(StringView8 name, StringView8 value) { 85 | 86 | stream->writeAll(" ", 1); 87 | stream->writeAll(name.getData(), name.getLength()); 88 | stream->writeAll("=\"", 2); 89 | stream->writeAll(value.getData(), value.getLength()); 90 | stream->writeAll("\"", 1); 91 | 92 | } 93 | void text(StringView8 value) { 94 | 95 | stream->writeAll(value.getData(), value.getLength()); 96 | 97 | } 98 | void cdata(StringView8 value) { 99 | 100 | stream->writeAll("writeAll(value.getData(), value.getLength()); 102 | stream->writeAll("]]>", 3); 103 | 104 | } 105 | void comment(StringView8 value) { 106 | 107 | stream->writeAll("", 3); 110 | 111 | } 112 | void processingInstruction(StringView8 name, StringView8 value) { 113 | 114 | stream->writeAll("writeAll(name.getData(), name.getLength()); 116 | stream->writeAll(" ", 1); 117 | stream->writeAll(value.getData(), value.getLength()); 118 | stream->writeAll("?>", 2); 119 | 120 | } 121 | 122 | OutputStream& getStream() { return *stream; } 123 | void setStream(OutputStream& stream_) { stream = &stream_; } 124 | 125 | }; 126 | 127 | } 128 | } 129 | } 130 | 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /example/XML_SAXReader/XML_SAXReader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include "Cats/Corecat/Text.hpp" 32 | #include "Cats/Textcat/XML.hpp" 33 | 34 | using namespace Cats::Corecat; 35 | using namespace Cats::Textcat; 36 | 37 | class Handler : public XMLHandlerBase { 38 | 39 | private: 40 | 41 | int level; 42 | 43 | private: 44 | 45 | void indent() { for(int i = 0; i < level; ++i) std::cout << " "; } 46 | 47 | public: 48 | 49 | void startDocument() { level = 0; std::cout << "startDocument()\n"; } 50 | void endDocument() { std::cout << "endDocument()\n"; } 51 | void startElement(StringView8 name) { 52 | 53 | indent(); std::cout << "startElement(\"" << name << "\")\n"; ++level; 54 | 55 | } 56 | void endElement(StringView8 name) { 57 | 58 | --level; indent(); std::cout << "endElement(\"" << name << "\")\n"; 59 | 60 | } 61 | void endAttributes(bool empty) { 62 | 63 | indent(); std::cout << "endAttributes(" << std::boolalpha << empty << ")\n"; if(empty) --level; 64 | 65 | } 66 | void doctype() { indent(); std::cout << "doctype()\n"; } 67 | void attribute(StringView8 name, StringView8 value) { 68 | 69 | indent(); std::cout << "attribute(\"" << name << "\", \"" << value << "\")\n"; 70 | 71 | } 72 | void text(StringView8 value) { 73 | 74 | indent(); std::cout << "text(\"" << value << "\")\n"; 75 | 76 | } 77 | void cdata(StringView8 value) { 78 | 79 | indent(); std::cout << "cdata(\"" << value << "\")\n"; 80 | 81 | } 82 | void comment(StringView8 value) { 83 | 84 | indent(); std::cout << "comment(\"" << value << "\")\n"; 85 | 86 | } 87 | void processingInstruction(StringView8 name, StringView8 value) { 88 | 89 | indent(); std::cout << "processingInstruction(\"" << name << "\", \"" << value << "\")\n"; 90 | 91 | } 92 | 93 | }; 94 | 95 | std::vector readFile(const char* path) { 96 | 97 | std::ifstream is(path, std::ios::binary); 98 | if(!is) throw IOException("Cannot read file"); 99 | is.seekg(0, std::ios::end); 100 | std::size_t size = static_cast(is.tellg()); 101 | is.seekg(0); 102 | std::vector data(size + 1); 103 | is.read(data.data(), size); 104 | data[size] = 0; 105 | return data; 106 | 107 | } 108 | 109 | int main(int argc, char** argv) { 110 | 111 | try { 112 | 113 | if(argc < 2) throw InvalidArgumentException("File name needed"); 114 | 115 | for(int i = 1; i < argc; ++i) { 116 | 117 | auto data = readFile(argv[i]); 118 | XMLParser parser; 119 | Handler handler; 120 | parser.parse<>(data.data(), handler); 121 | 122 | } 123 | 124 | } catch(std::exception& e) { std::cerr << e.what() << std::endl; return 1; } 125 | 126 | return 0; 127 | 128 | } 129 | -------------------------------------------------------------------------------- /include/Cats/Textcat/XML/Document.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #ifndef CATS_TEXTCAT_XML_DOCUMENT_HPP 28 | #define CATS_TEXTCAT_XML_DOCUMENT_HPP 29 | 30 | 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | #include "Cats/Corecat/Data/Allocator/FastAllocator.hpp" 38 | #include "Cats/Corecat/Data/Stream.hpp" 39 | #include "Cats/Corecat/Text/String.hpp" 40 | #include "Cats/Corecat/Util/Exception.hpp" 41 | 42 | #include "Handler.hpp" 43 | #include "Parser.hpp" 44 | #include "Serializer.hpp" 45 | 46 | 47 | namespace Cats { 48 | namespace Textcat{ 49 | inline namespace XML { 50 | 51 | class XMLNode; 52 | class XMLElement; 53 | class XMLText; 54 | class XMLCDATA; 55 | class XMLComment; 56 | class XMLProcessingInstruction; 57 | class XMLDocument; 58 | 59 | namespace Impl { 60 | 61 | template 62 | class List { 63 | 64 | public: 65 | 66 | struct ListElement { 67 | 68 | T* prev; 69 | T* next; 70 | XMLNode* parent; 71 | 72 | ListElement() : prev(), next(), parent() {} 73 | 74 | }; 75 | 76 | class Iterator { 77 | 78 | private: 79 | 80 | List* list; 81 | ListElement* p; 82 | 83 | public: 84 | 85 | Iterator(List* list_, ListElement* p_) : list(list_), p(p_) {} 86 | Iterator(const Iterator& src) : list(src.list), p(src.p) {} 87 | 88 | Iterator& operator =(const Iterator& src) { list = src.list, p = src.p; return *this; } 89 | 90 | T& operator *() const { return static_cast(*p); } 91 | T* operator ->() const { return static_cast(p); } 92 | bool operator ==(const Iterator& it) { return p == it.p; } 93 | bool operator ==(XMLNode* q) { return p == q; } 94 | bool operator !=(const Iterator& it) { return p != it.p; } 95 | Iterator& operator ++() { p = p->next; return *this; } 96 | Iterator operator ++(int) { Iterator tmp = *this; ++*this; return tmp; } 97 | Iterator& operator --() { p = p ? p->prev : list->last; return *this; } 98 | Iterator operator --(int) { Iterator tmp = *this; --*this; return tmp; } 99 | 100 | }; 101 | 102 | private: 103 | 104 | T* first; 105 | T* last; 106 | 107 | public: 108 | 109 | List() : first(), last() {} 110 | List(const List& src) = delete; 111 | ~List() = default; 112 | 113 | T& append(XMLNode& parent, T& child) { 114 | 115 | assert(!child.parent); 116 | if(first) { child.prev = last; last->next = &child; last = &child; } 117 | else first = last = &child; 118 | child.parent = &parent; 119 | return child; 120 | 121 | } 122 | 123 | T& insertBefore(T& child, T& ref) { 124 | 125 | assert(!child.parent && ref.parent); 126 | auto pPrev = ref.prev; 127 | child.prev = pPrev; 128 | child.next = &ref; 129 | if(pPrev) pPrev->next = &child; 130 | ref.prev = &child; 131 | child.parent = ref.parent; 132 | return child; 133 | 134 | } 135 | 136 | T& remove(T& child) { 137 | 138 | auto pPrev = child.prev; 139 | auto pNext = child.next; 140 | if(pPrev) pPrev->next = pNext; 141 | else first = pNext; 142 | if(pNext) pNext->next = pPrev; 143 | else last = pPrev; 144 | child.prev = nullptr; 145 | child.next = nullptr; 146 | child.parent = nullptr; 147 | return child; 148 | 149 | } 150 | 151 | T& getFirst() { return *first; } 152 | T& getLast() { return *last; } 153 | 154 | bool empty() const { return !first; } 155 | 156 | Iterator begin() { return Iterator(this, first); } 157 | Iterator end() { return Iterator(this, nullptr); } 158 | 159 | }; 160 | 161 | } 162 | 163 | 164 | class XMLDOMException : public Corecat::Exception { 165 | 166 | public: 167 | 168 | XMLDOMException(const String8& data) : Exception("XMLDOMException: " + data) {} 169 | 170 | }; 171 | 172 | enum class XMLNodeType : uint16_t { 173 | 174 | Element, 175 | Text, 176 | CDATA, 177 | Comment, 178 | ProcessingInstruction, 179 | Document, 180 | 181 | }; 182 | 183 | class XMLNode : public Impl::List::ListElement { 184 | 185 | private: 186 | 187 | const XMLNodeType type; 188 | Impl::List listChild; 189 | 190 | public: 191 | 192 | XMLNode(XMLNodeType type_) : Impl::List::ListElement(), type(type_), listChild() {} 193 | XMLNode(const XMLNode& src) = delete; 194 | 195 | XMLNodeType getType() const { return type; } 196 | 197 | Impl::List& child() { return listChild; } 198 | 199 | XMLNode& getFirstChild() { return listChild.getFirst(); } 200 | XMLNode& getLastChild() { return listChild.getLast(); } 201 | 202 | XMLNode& appendChild(XMLNode& child) { return listChild.append(*this, child); } 203 | XMLNode& insertBefore(XMLNode& child, XMLNode& ref) { return listChild.insertBefore(child, ref); } 204 | XMLNode& removeChild(XMLNode& child) { return listChild.remove(child); } 205 | bool hasChildNodes() { return !listChild.empty(); } 206 | 207 | XMLElement& asElement() noexcept { return reinterpret_cast(*this); } 208 | const XMLElement& asElement() const noexcept { return reinterpret_cast(*this); } 209 | XMLText& asText() noexcept { return reinterpret_cast(*this); } 210 | const XMLText& asText() const noexcept { return reinterpret_cast(*this); } 211 | XMLCDATA& asCDATA() noexcept { return reinterpret_cast(*this); } 212 | const XMLCDATA& asCDATA() const noexcept { return reinterpret_cast(*this); } 213 | XMLComment& asComment() noexcept { return reinterpret_cast(*this); } 214 | const XMLComment& asComment() const noexcept { return reinterpret_cast(*this); } 215 | XMLProcessingInstruction& asProcessingInstruction() noexcept { return reinterpret_cast(*this); } 216 | const XMLProcessingInstruction& asProcessingInstruction() const noexcept { return reinterpret_cast(*this); } 217 | XMLDocument& asDocument() noexcept { return reinterpret_cast(*this); } 218 | const XMLDocument& asDocument() const noexcept { return reinterpret_cast(*this); } 219 | 220 | }; 221 | 222 | class XMLAttribute : public Impl::List::ListElement { 223 | 224 | private: 225 | 226 | using StringView8 = Corecat::StringView8; 227 | 228 | private: 229 | 230 | StringView8 name; 231 | StringView8 value; 232 | 233 | public: 234 | 235 | XMLAttribute() : Impl::List::ListElement(), name(), value() {} 236 | XMLAttribute(StringView8 name_, StringView8 value_) : 237 | Impl::List::ListElement(), name(name_), value(value_) {} 238 | XMLAttribute(const XMLAttribute& src) = delete; 239 | 240 | StringView8 getName() const { return name; } 241 | void setName(StringView8 name_) { name = name_; } 242 | StringView8 getValue() const { return value; } 243 | void setValue(StringView8 value_) { value = value_; } 244 | 245 | }; 246 | 247 | class XMLElement : public XMLNode { 248 | 249 | private: 250 | 251 | using StringView8 = Corecat::StringView8; 252 | 253 | private: 254 | 255 | Impl::List listAttr; 256 | StringView8 name; 257 | 258 | public: 259 | 260 | XMLElement() : XMLNode(XMLNodeType::Element), listAttr(), name() {} 261 | XMLElement(StringView8 name_) : XMLNode(XMLNodeType::Element), listAttr(), name(name_) {} 262 | XMLElement(const XMLElement& src) = delete; 263 | 264 | Impl::List& attribute() { return listAttr; } 265 | 266 | StringView8 getName() const { return name; } 267 | void setName(StringView8 name_) { name = name_; } 268 | 269 | XMLAttribute& getFirstAttribute() { return listAttr.getFirst(); } 270 | XMLAttribute& getLastAttribute() { return listAttr.getLast(); } 271 | XMLAttribute& appendAttribute(XMLAttribute& attr) { return listAttr.append(*this, attr); } 272 | XMLAttribute& removeAttribute(XMLAttribute& attr) { return listAttr.remove(attr); } 273 | 274 | }; 275 | 276 | class XMLText : public XMLNode { 277 | 278 | private: 279 | 280 | using StringView8 = Corecat::StringView8; 281 | 282 | private: 283 | 284 | StringView8 value; 285 | 286 | public: 287 | 288 | XMLText() : XMLNode(XMLNodeType::Text), value() {} 289 | XMLText(StringView8 value_) : XMLNode(XMLNodeType::Text), value(value_) {} 290 | XMLText(const XMLText& src) = delete; 291 | 292 | StringView8 getValue() const { return value; } 293 | void setValue(StringView8 value_) { value = value_; } 294 | 295 | }; 296 | 297 | class XMLCDATA : public XMLNode { 298 | 299 | private: 300 | 301 | using StringView8 = Corecat::StringView8; 302 | 303 | private: 304 | 305 | StringView8 value; 306 | 307 | public: 308 | 309 | XMLCDATA() : XMLNode(XMLNodeType::CDATA), value() {} 310 | XMLCDATA(StringView8 value_) : XMLNode(XMLNodeType::CDATA), value(value_) {} 311 | XMLCDATA(const XMLCDATA& src) = delete; 312 | 313 | StringView8 getValue() const { return value; } 314 | void setValue(StringView8 value_) { value = value_; } 315 | 316 | }; 317 | 318 | class XMLComment : public XMLNode { 319 | 320 | private: 321 | 322 | using StringView8 = Corecat::StringView8; 323 | 324 | private: 325 | 326 | StringView8 value; 327 | 328 | public: 329 | 330 | XMLComment() : XMLNode(XMLNodeType::Comment), value() {} 331 | XMLComment(StringView8 value_) : XMLNode(XMLNodeType::Comment), value(value_) {} 332 | XMLComment(const XMLComment& src) = delete; 333 | 334 | StringView8 getValue() const { return value; } 335 | void setValue(StringView8 value_) { value = value_; } 336 | 337 | }; 338 | 339 | class XMLProcessingInstruction : public XMLNode { 340 | 341 | private: 342 | 343 | using StringView8 = Corecat::StringView8; 344 | 345 | private: 346 | 347 | StringView8 name; 348 | StringView8 value; 349 | 350 | public: 351 | 352 | XMLProcessingInstruction() : XMLNode(XMLNodeType::ProcessingInstruction), name(), value() {}; 353 | XMLProcessingInstruction(StringView8& name_, StringView8& value_) : 354 | XMLNode(XMLNodeType::ProcessingInstruction), name(name_), value(value_) {} 355 | XMLProcessingInstruction(const XMLProcessingInstruction& src) = delete; 356 | 357 | StringView8 getName() const { return name; } 358 | void setName(StringView8 name_) { name = name_; } 359 | StringView8 getValue() const { return value; } 360 | void setValue(StringView8 value_) { value = value_; } 361 | 362 | }; 363 | 364 | class XMLDocument : public XMLNode { 365 | 366 | private: 367 | 368 | template 369 | using OutputStream = Corecat::OutputStream; 370 | using StringView8 = Corecat::StringView8; 371 | using FastAllocator = Corecat::FastAllocator<>; 372 | 373 | private: 374 | 375 | FastAllocator allocator; 376 | 377 | public: 378 | 379 | XMLDocument() : XMLNode(XMLNodeType::Document), allocator() {} 380 | XMLDocument(const XMLDocument& src) = delete; 381 | 382 | XMLElement& createElement(StringView8 name) { 383 | 384 | return *new(allocator.allocate(sizeof(XMLElement))) XMLElement(name); 385 | 386 | } 387 | XMLAttribute& createAttribute(StringView8 name, StringView8 value) { 388 | 389 | return *new(allocator.allocate(sizeof(XMLAttribute))) XMLAttribute(name, value); 390 | 391 | } 392 | XMLText& createText(StringView8 value) { 393 | 394 | return *new(allocator.allocate(sizeof(XMLText))) XMLText(value); 395 | 396 | } 397 | XMLCDATA& createCDATA(StringView8 value) { 398 | 399 | return *new(allocator.allocate(sizeof(XMLCDATA))) XMLCDATA(value); 400 | 401 | } 402 | XMLComment& createComment(StringView8 value) { 403 | 404 | return *new(allocator.allocate(sizeof(XMLComment))) XMLComment(value); 405 | 406 | } 407 | XMLProcessingInstruction& createProcessingInstruction(StringView8 name, StringView8 value) { 408 | 409 | return *new(allocator.allocate(sizeof(XMLProcessingInstruction))) XMLProcessingInstruction(name, value); 410 | 411 | } 412 | 413 | void clear() { 414 | 415 | allocator.clear(); 416 | 417 | } 418 | 419 | XMLElement& getRootElement() { 420 | 421 | for(auto& node : child()) if(node.getType() == XMLNodeType::Element) return static_cast(node); 422 | throw XMLDOMException("Root element not found"); 423 | 424 | } 425 | 426 | template 427 | void parse(char* data) { 428 | 429 | class Handler : public XMLHandlerBase { 430 | 431 | private: 432 | 433 | XMLDocument* document; 434 | XMLNode* cur; 435 | 436 | public: 437 | 438 | Handler(XMLDocument* document_) : document(document_), cur(nullptr) {} 439 | 440 | void startDocument() { cur = document; } 441 | void startElement(StringView8 name) { 442 | 443 | auto& element = document->createElement(name); 444 | cur->appendChild(element); 445 | cur = &element; 446 | 447 | } 448 | void endElement(StringView8 /*name*/) { 449 | 450 | cur = cur->parent; 451 | 452 | } 453 | void endAttributes(bool empty) { 454 | 455 | if(empty) cur = cur->parent; 456 | 457 | } 458 | void attribute(StringView8 name, StringView8 value) { 459 | 460 | static_cast(cur)->appendAttribute(document->createAttribute(name, value)); 461 | 462 | } 463 | void text(StringView8 value) { 464 | 465 | cur->appendChild(document->createText(value)); 466 | 467 | } 468 | void cdata(StringView8 value) { 469 | 470 | cur->appendChild(document->createCDATA(value)); 471 | 472 | } 473 | void comment(StringView8 value) { 474 | 475 | cur->appendChild(document->createComment(value)); 476 | 477 | } 478 | void processingInstruction(StringView8 name, StringView8 value) { 479 | 480 | cur->appendChild(document->createProcessingInstruction(name, value)); 481 | 482 | } 483 | 484 | }; 485 | 486 | assert(data); 487 | 488 | clear(); 489 | XMLParser parser; 490 | Handler handler(this); 491 | parser.parse(data, handler); 492 | 493 | } 494 | 495 | template 496 | void visit(H& handler) { 497 | 498 | handler.startDocument(); 499 | if(hasChildNodes()) { 500 | 501 | XMLNode* cur = &getFirstChild(); 502 | while(true) { 503 | 504 | switch(cur->getType()) { 505 | 506 | case XMLNodeType::Element: { 507 | 508 | auto& element = static_cast(*cur); 509 | handler.startElement(element.getName()); 510 | for(auto& attr : element.attribute()) 511 | handler.attribute(attr.getName(), attr.getValue()); 512 | bool empty = !cur->hasChildNodes(); 513 | handler.endAttributes(empty); 514 | if(!empty) { cur = &cur->getFirstChild(); continue; } 515 | break; 516 | 517 | } 518 | case XMLNodeType::Text: { 519 | 520 | auto& text = static_cast(*cur); 521 | handler.text(text.getValue()); 522 | break; 523 | 524 | } 525 | case XMLNodeType::CDATA: { 526 | 527 | auto& cdata = static_cast(*cur); 528 | handler.cdata(cdata.getValue()); 529 | break; 530 | 531 | } 532 | case XMLNodeType::Comment: { 533 | 534 | auto& comment = static_cast(*cur); 535 | handler.comment(comment.getValue()); 536 | break; 537 | 538 | } 539 | case XMLNodeType::ProcessingInstruction: { 540 | 541 | auto& pi = static_cast(*cur); 542 | handler.processingInstruction(pi.getName(), pi.getValue()); 543 | break; 544 | 545 | } 546 | default: throw XMLDOMException("Invalid node type"); 547 | 548 | } 549 | while(!cur->next) { 550 | 551 | cur = cur->parent; 552 | if(cur == this) break; 553 | auto name = static_cast(cur)->getName(); 554 | handler.endElement(name); 555 | 556 | } 557 | if(cur == this) break; 558 | cur = cur->next; 559 | 560 | } 561 | 562 | } 563 | handler.endDocument(); 564 | 565 | } 566 | void serialize(OutputStream& stream) { 567 | 568 | XMLSerializer serializer(stream); 569 | visit(serializer); 570 | 571 | } 572 | 573 | }; 574 | 575 | inline std::ostream& operator <<(std::ostream& stream, XMLDocument& document) { 576 | 577 | auto wrapper = Corecat::createWrapperOutputStream(stream); 578 | document.serialize(wrapper); 579 | return stream; 580 | 581 | } 582 | 583 | } 584 | } 585 | } 586 | 587 | 588 | #endif 589 | -------------------------------------------------------------------------------- /include/Cats/Textcat/XML/Parser.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * MIT License 4 | * 5 | * Copyright (c) 2016-2018 The Cats Project 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | * 25 | */ 26 | 27 | #ifndef CATS_TEXTCAT_XML_PARSER_HPP 28 | #define CATS_TEXTCAT_XML_PARSER_HPP 29 | 30 | 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #include "Cats/Corecat/Util/Sequence.hpp" 39 | 40 | 41 | namespace Cats { 42 | namespace Textcat{ 43 | inline namespace XML { 44 | 45 | namespace Impl { 46 | 47 | template 48 | struct Include { 49 | 50 | static constexpr bool get(T t) { using namespace Corecat; return ContainSequence>::get(t); } 51 | 52 | }; 53 | 54 | template 55 | struct Exclude { 56 | 57 | static constexpr bool get(T t) { using namespace Corecat; return !ContainSequence>::get(t); } 58 | 59 | }; 60 | 61 | 62 | template 63 | struct Skipper { 64 | 65 | static size_t skip(char*& p) { 66 | 67 | using namespace Corecat; 68 | 69 | auto t = p; 70 | while(SequenceTable>>::get(*t)) ++t; 71 | const size_t length = t - p; 72 | p = t; 73 | return length; 74 | 75 | } 76 | 77 | }; 78 | 79 | 80 | using Space = Include; 81 | using Name = Exclude', '?'>; 82 | using AttributeName = Exclude', '?'>; 83 | using AttributeValue1 = Exclude; 84 | using AttributeValueNoRef1 = Exclude; 85 | using AttributeValue2 = Exclude; 86 | using AttributeValueNoRef2 = Exclude; 87 | using Text = Exclude; 88 | using TextNoSpace = Exclude; 89 | using TextNoRef = Exclude; 90 | using TextNoSpaceRef = Exclude; 91 | 92 | struct Decimal { 93 | 94 | static constexpr unsigned char get(unsigned char t) { 95 | 96 | return (t >= '0' && t <= '9') ? (t - '0') : 255; 97 | 98 | } 99 | 100 | }; 101 | 102 | struct Hexadecimal { 103 | 104 | static constexpr unsigned char get(unsigned char t) { 105 | 106 | return (t >= '0' && t <= '9') ? (t - '0') 107 | : ((t >= 'A' && t <= 'F') ? (t - 'A' + 10) 108 | : ((t >= 'a' && t <= 'f') ? (t - 'a' + 10) : 255)); 109 | 110 | } 111 | 112 | }; 113 | 114 | } 115 | 116 | 117 | class XMLParseException : public Corecat::Exception { 118 | 119 | private: 120 | 121 | std::size_t pos; 122 | 123 | public: 124 | 125 | XMLParseException(const String8& data, std::size_t pos_) : Exception("XMLParseException: " + data), pos(pos_) {} 126 | 127 | }; 128 | 129 | class XMLParser { 130 | 131 | private: 132 | 133 | using StringView8 = Corecat::StringView8; 134 | 135 | public: 136 | 137 | enum class Flag : std::uint32_t { 138 | 139 | None = 0x00000000, 140 | TrimSpace = 0x00000001, 141 | NormalizeSpace = 0x00000002, 142 | EntityTranslation = 0x00000004, 143 | ClosingTagValidate = 0x00000008, 144 | 145 | Default = TrimSpace | EntityTranslation, 146 | 147 | }; 148 | friend constexpr bool operator &(Flag a, Flag b) { 149 | 150 | return static_cast(a) & static_cast(b); 151 | 152 | } 153 | friend constexpr Flag operator |(Flag a, Flag b) { 154 | 155 | return static_cast(static_cast(a) | static_cast(b)); 156 | 157 | } 158 | 159 | private: 160 | 161 | char* s; 162 | char* p; 163 | 164 | private: 165 | 166 | template 167 | void parseReference(char*& q) { 168 | 169 | using namespace Corecat::Util; 170 | 171 | switch(p[1]) { 172 | 173 | case 0: throw XMLParseException("Unexpected end of data", p - s); 174 | case '#': { 175 | 176 | if(p[2] == 'x') { 177 | 178 | p += 3; 179 | if(*p == ';') throw XMLParseException("Unexpected ;", p - s); 180 | std::uint32_t code = 0; 181 | for(unsigned char t; (t = SequenceTable>>::get(*p)) != 255; code = code * 16 + t, ++p); 182 | if(*p != ';') throw XMLParseException("Expected ;", p - s); 183 | ++p; 184 | // TODO: Code conversion 185 | *q = code; 186 | ++q; 187 | 188 | } else { 189 | 190 | p += 2; 191 | if(*p == ';') throw XMLParseException("Unexpected ;", p - s); 192 | std::uint32_t code = 0; 193 | for(unsigned char t; (t = SequenceTable>>::get(*p)) != 255; code = code * 10 + t, ++p); 194 | if(*p != ';') throw XMLParseException("Expected ;", p - s); 195 | ++p; 196 | // TODO: Code conversion 197 | *q = code; 198 | ++q; 199 | 200 | } 201 | return; 202 | 203 | } 204 | case 'a': { 205 | 206 | if(p[2] == 'm' && p[3] == 'p' && p[4] == ';') { 207 | 208 | // amp 209 | p += 5; 210 | *q = '&'; 211 | ++q; 212 | return; 213 | 214 | } 215 | if(p[2] == 'p' && p[3] == 'o' && p[4] == 's' && p[5] == ';') { 216 | 217 | // apos 218 | p += 6; 219 | *q = '\''; 220 | ++q; 221 | return; 222 | 223 | } 224 | break; 225 | 226 | } 227 | case 'g': { 228 | 229 | if(p[2] == 't' && p[3] == ';') { 230 | 231 | // gt 232 | p += 4; 233 | *q = '>'; 234 | ++q; 235 | return; 236 | 237 | } 238 | break; 239 | 240 | } 241 | case 'l': { 242 | 243 | if(p[2] == 't' && p[3] == ';') { 244 | 245 | // lt 246 | p += 4; 247 | *q = '<'; 248 | ++q; 249 | return; 250 | 251 | } 252 | break; 253 | 254 | } 255 | case 'q': { 256 | 257 | if(p[2] == 'u' && p[3] == 'o' && p[4] == 't' && p[5] == ';') { 258 | 259 | // quot 260 | p += 6; 261 | *q = '"'; 262 | ++q; 263 | return; 264 | 265 | } 266 | break; 267 | 268 | } 269 | default: { 270 | 271 | break; 272 | 273 | } 274 | 275 | } 276 | throw XMLParseException("Invalid reference", p - s); 277 | 278 | } 279 | template 280 | void parseXMLDeclaration(H& /*handler*/) { 281 | 282 | using namespace Corecat::Util; 283 | 284 | Impl::Skipper::skip(p); 285 | 286 | // Parse "version" 287 | if(p[0] != 'v' || p[1] != 'e' || p[2] != 'r' || p[3] != 's' || p[4] != 'i' || p[5] != 'o' || p[6] != 'n') 288 | throw XMLParseException("Expected version", p - s); 289 | p += 7; 290 | Impl::Skipper::skip(p); 291 | if(*p != '=') throw XMLParseException("Expected =", p - s); 292 | ++p; 293 | Impl::Skipper::skip(p); 294 | if(*p == '"') { 295 | 296 | ++p; 297 | Impl::Skipper::skip(p); 298 | if(*p != '"') throw XMLParseException("Expected \"", p - s); 299 | 300 | } else if(*p == '\'') { 301 | 302 | ++p; 303 | Impl::Skipper::skip(p); 304 | if(*p != '\'') throw XMLParseException("Expected '", p - s); 305 | 306 | } else throw XMLParseException("Expected \" or '", p - s); 307 | ++p; 308 | 309 | if(*p != '?' && !SequenceTable>>::get(*p)) 310 | throw XMLParseException("Unexpected character", p - s); 311 | Impl::Skipper::skip(p); 312 | 313 | // Parse "encoding" 314 | if(p[0] == 'e' && p[1] == 'n' && p[2] == 'c' && p[3] == 'o' && p[4] == 'd' && p[5] == 'i' && p[6] == 'n' && p[7] == 'g') { 315 | 316 | p += 8; 317 | Impl::Skipper::skip(p); 318 | if(*p != '=') throw XMLParseException("Expected =", p - s); 319 | ++p; 320 | Impl::Skipper::skip(p); 321 | if(*p == '"') { 322 | 323 | ++p; 324 | Impl::Skipper::skip(p); 325 | if(*p != '"') throw XMLParseException("Expected \"", p - s); 326 | 327 | } else if(*p == '\'') { 328 | 329 | ++p; 330 | Impl::Skipper::skip(p); 331 | if(*p != '\'') throw XMLParseException("Expected '", p - s); 332 | 333 | } else throw XMLParseException("Expected \" or '", p - s); 334 | ++p; 335 | 336 | } 337 | 338 | if(*p != '?' && !SequenceTable>>::get(*p)) 339 | throw XMLParseException("Unexpected character", p - s); 340 | Impl::Skipper::skip(p); 341 | 342 | // Parse "standalone" 343 | if(p[0] == 's' && p[1] == 't' && p[2] == 'a' && p[3] == 'n' && p[4] == 'd' && p[5] == 'a' && p[6] == 'l' && p[7] == 'o' && p[8] == 'n' && p[9] == 'e') { 344 | 345 | p += 10; 346 | Impl::Skipper::skip(p); 347 | if(*p != '=') throw XMLParseException("Expected =", p - s); 348 | ++p; 349 | Impl::Skipper::skip(p); 350 | if(*p == '"') { 351 | 352 | ++p; 353 | Impl::Skipper::skip(p); 354 | if(*p != '"') throw XMLParseException("Expected \"", p - s); 355 | 356 | } else if(*p == '\'') { 357 | 358 | ++p; 359 | Impl::Skipper::skip(p); 360 | if(*p != '\'') throw XMLParseException("Expected '", p - s); 361 | 362 | } else throw XMLParseException("Expected \" or '", p - s); 363 | ++p; 364 | 365 | } 366 | 367 | Impl::Skipper::skip(p); 368 | if(p[0] != '?' || p[1] != '>') throw XMLParseException("Expected ?>", p - s); 369 | p += 2; 370 | 371 | } 372 | template 373 | void parseDoctype(H& /*handler*/) { 374 | 375 | throw XMLParseException("Not implemented", p - s); 376 | 377 | } 378 | template 379 | void parseComment(H& handler) { 380 | 381 | StringView8 comment(p, 1); 382 | // Until "-->" 383 | while(*p && (p[0] != '-' || p[1] != '-' || p[2] != '>')) ++p; 384 | if(!*p) throw XMLParseException("Unexpected end of data", p - s); 385 | comment.setLength(p - comment.getData()); 386 | p += 3; 387 | handler.comment(comment); 388 | 389 | } 390 | template 391 | void parseProcessingInstruction(H& handler) { 392 | 393 | StringView8 target(p, 1); 394 | target.setLength(Impl::Skipper::skip(p)); 395 | if(!target.getLength()) throw XMLParseException("Expected PI target", p - s); 396 | if((p[0] != '?' || p[1] != '>') && !Impl::Skipper::skip(p)) 397 | throw XMLParseException("Expected white space", p - s); 398 | 399 | StringView8 content(p, 1); 400 | // Until "?>" 401 | while(*p && (p[0] != '?' || p[1] != '>')) ++p; 402 | if(!*p) throw XMLParseException("Unexpected end of data", p - s); 403 | content.setLength(p - content.getData()); 404 | p += 2; 405 | 406 | handler.processingInstruction(target, content); 407 | 408 | } 409 | template 410 | void parseCDATA(H& handler) { 411 | 412 | StringView8 text(p, 1); 413 | // Until "]]>" 414 | while(*p && (p[0] != ']' || p[1] != ']' || p[2] != '>')) ++p; 415 | if(!*p) throw XMLParseException("Unexpected end of data", p - s); 416 | text.setLength(p - text.getData()); 417 | p += 3; 418 | handler.cdata(text); 419 | 420 | } 421 | template 422 | void parseElement(H& handler) { 423 | 424 | using namespace Corecat::Util; 425 | 426 | // Parse element type 427 | StringView8 name(p, 1); 428 | name.setLength(Impl::Skipper::skip(p)); 429 | if(!name.getLength()) throw XMLParseException("Expected element type", p - s); 430 | bool empty = false; 431 | if(*p == '>') { 432 | 433 | ++p; 434 | handler.startElement(name); 435 | 436 | } else if(*p == '/') { 437 | 438 | if(p[1] != '>') throw XMLParseException("eExpected >", p + 1 - s); 439 | p += 2; 440 | handler.startElement(name); 441 | empty = true; 442 | 443 | } else { 444 | 445 | ++p; 446 | handler.startElement(name); 447 | Impl::Skipper::skip(p); 448 | while(SequenceTable>>::get(*p)) { 449 | 450 | // Parse attribute name 451 | StringView8 name(p, 1); 452 | name.setLength(Impl::Skipper::skip(p)); 453 | if(!name.getLength()) throw XMLParseException("Expected attribute name", p - s); 454 | Impl::Skipper::skip(p); 455 | if(*p != '=') throw XMLParseException("Expected =", p - s); 456 | ++p; 457 | Impl::Skipper::skip(p); 458 | 459 | // Parse attribute value 460 | StringView8 value; 461 | if(*p == '"') { 462 | 463 | ++p; 464 | value.setData(p, 0); 465 | if(F & Flag::EntityTranslation) { 466 | 467 | auto q = p; 468 | while(true) { 469 | 470 | auto len = Impl::Skipper::skip(p); 471 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 472 | if(p != q + len) std::copy(q, q + len, p - len); 473 | q += len; 474 | if(*p == '&') parseReference(q); 475 | else break; 476 | 477 | } 478 | value.setLength(q - value.getData()); 479 | 480 | } else { 481 | 482 | value.setLength(Impl::Skipper::skip(p)); 483 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 484 | 485 | } 486 | ++p; 487 | 488 | } else if(*p == '\'') { 489 | 490 | ++p; 491 | value.setData(p, 0); 492 | if(F & Flag::EntityTranslation) { 493 | 494 | auto q = p; 495 | while(true) { 496 | 497 | auto len = Impl::Skipper::skip(p); 498 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 499 | if(p != q + len) std::copy(q, q + len, p - len); 500 | q += len; 501 | if(*p == '&') parseReference(q); 502 | else break; 503 | 504 | } 505 | value.setLength(q - value.getData()); 506 | 507 | } else { 508 | 509 | value.setLength(Impl::Skipper::skip(p)); 510 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 511 | 512 | } 513 | ++p; 514 | 515 | } else throw XMLParseException("Expected \" or '", p - s); 516 | handler.attribute(name, value); 517 | Impl::Skipper::skip(p); 518 | 519 | } 520 | if(*p == '>') { 521 | 522 | ++p; 523 | 524 | } else if(*p == '/') { 525 | 526 | if(p[1] != '>') throw XMLParseException("Expected >", p + 1 - s); 527 | p += 2; 528 | empty = true; 529 | 530 | } else throw XMLParseException("Unexpected character", p + 1 - s); 531 | 532 | } 533 | handler.endAttributes(empty); 534 | if(!empty) { 535 | 536 | bool c = true; 537 | do { 538 | 539 | // Parse text 540 | if(F & Flag::TrimSpace) Impl::Skipper::skip(p); 541 | if(*p != '<') { 542 | 543 | if(F & Flag::EntityTranslation) { 544 | 545 | if(F & Flag::NormalizeSpace) { 546 | 547 | StringView8 text(p, 1); 548 | auto q = p; 549 | while(true) { 550 | 551 | auto len = Impl::Skipper::skip(p); 552 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 553 | if(p != q + len) std::copy(p - len, p, q); 554 | q += len; 555 | if(*p == '&') parseReference(q); 556 | else if(*p != '<') { Impl::Skipper::skip(p); *(q++) = ' '; } 557 | else break; 558 | 559 | } 560 | if(F & Flag::TrimSpace && q[-1] == ' ') --q; 561 | text.setLength(q - text.getData()); 562 | handler.text(text); 563 | 564 | } else { 565 | 566 | StringView8 text(p, 1); 567 | auto q = p; 568 | while(true) { 569 | 570 | auto len = Impl::Skipper::skip(p); 571 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 572 | if(p != q + len) std::copy(p - len, p, q); 573 | q += len; 574 | if(*p == '&') parseReference(q); 575 | else break; 576 | 577 | } 578 | --q; 579 | if(F & Flag::TrimSpace) 580 | for(; SequenceTable>>::get(*q); --q); 581 | ++q; 582 | text.setLength(q - text.getData()); 583 | handler.text(text); 584 | 585 | } 586 | 587 | } else { 588 | 589 | if(F & Flag::NormalizeSpace) { 590 | 591 | StringView8 text(p, 1); 592 | auto q = p; 593 | while(true) { 594 | 595 | auto len = Impl::Skipper::skip(p); 596 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 597 | if(p != q + len) std::copy(p - len, p, q); 598 | q += len; 599 | if(*p != '<') { Impl::Skipper::skip(p); *(q++) = ' '; } 600 | else break; 601 | 602 | } 603 | --q; 604 | if(F & Flag::TrimSpace) 605 | for(; SequenceTable>>::get(*q); --q); 606 | ++q; 607 | text.setLength(q - text.getData()); 608 | handler.text(text); 609 | 610 | } else { 611 | 612 | StringView8 text(p, 1); 613 | Impl::Skipper::skip(p); 614 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s); 615 | auto q = p - 1; 616 | if(F & Flag::TrimSpace) 617 | for(; SequenceTable>>::get(*q); --q); 618 | ++q; 619 | text.setLength(q - text.getData()); 620 | handler.text(text); 621 | 622 | } 623 | 624 | } 625 | 626 | } 627 | 628 | ++p; 629 | switch(*p) { 630 | 631 | case '!': { 632 | 633 | ++p; 634 | if(p[0] == '-' && p[1] == '-') { 635 | 636 | p += 2; 637 | parseComment(handler); 638 | 639 | } else if(p[0] == '[' && p[1] == 'C' && p[2] == 'D' && p[3] == 'A' && p[4] == 'T' && p[5] == 'A' && p[6] == '[') { 640 | 641 | // "[CDATA[" 642 | p += 7; 643 | parseCDATA(handler); 644 | 645 | } else throw XMLParseException("Unexpected character", p - s); 646 | break; 647 | 648 | } 649 | case '/': { 650 | 651 | ++p; 652 | if(F & Flag::ClosingTagValidate) { 653 | 654 | StringView8 endName(p, 1); 655 | Impl::Skipper::skip(p); 656 | endName.setLength(p - endName.getData()); 657 | Impl::Skipper::skip(p); 658 | if(*p != '>') throw XMLParseException("Expected >", p - s); 659 | ++p; 660 | handler.endElement(endName); 661 | 662 | } else { 663 | 664 | StringView8 endName(p, name.getLength()); 665 | if(endName != name) throw XMLParseException("Unmatch element type", p - s); 666 | p += name.getLength(); 667 | Impl::Skipper::skip(p); 668 | if(*p != '>') throw XMLParseException("Expected >", p - s); 669 | ++p; 670 | handler.endElement(endName); 671 | 672 | } 673 | c = false; 674 | break; 675 | 676 | } 677 | case '?': { 678 | 679 | ++p; 680 | parseProcessingInstruction(handler); 681 | break; 682 | 683 | } 684 | default: { 685 | 686 | parseElement(handler); 687 | break; 688 | 689 | } 690 | 691 | } 692 | 693 | } while(c); 694 | 695 | } 696 | 697 | } 698 | 699 | public: 700 | 701 | XMLParser() = default; 702 | 703 | template 704 | void parse(char* data, H& handler) { 705 | 706 | using namespace Corecat::Util; 707 | 708 | assert(data); 709 | 710 | s = data; 711 | p = data; 712 | handler.startDocument(); 713 | 714 | // Parse BOM 715 | if(static_cast(p[0]) == 0xEF && 716 | static_cast(p[1]) == 0xBB && 717 | static_cast(p[2]) == 0xBF) { 718 | 719 | p += 3; 720 | 721 | } 722 | 723 | // Parse XML declaration 724 | if(p[0] == '<' && p[1] == '?' && p[2] == 'x' && p[3] == 'm' && p[4] == 'l' && SequenceTable>>::get(p[5])) { 725 | 726 | // "(handler); 729 | 730 | } 731 | while(true) { 732 | 733 | Impl::Skipper::skip(p); 734 | if(!*p) break; 735 | else if(*p == '<') { 736 | 737 | ++p; 738 | if(*p == '!') { 739 | 740 | ++p; 741 | if(p[0] == '-' && p[1] == '-') { 742 | 743 | p += 2; 744 | parseComment(handler); 745 | 746 | } else if(p[0] == 'D' && p[1] == 'O' && p[2] == 'C' && p[3] == 'T' && p[4] == 'Y' && p[5] == 'P' && p[6] == 'E') { 747 | 748 | // "DOCTYPE" 749 | p += 7; 750 | parseDoctype(handler); 751 | 752 | } else throw XMLParseException("Unexpected character", p - s); 753 | 754 | } else if(*p == '?') { 755 | 756 | ++p; 757 | parseProcessingInstruction(handler); 758 | 759 | } else { 760 | 761 | parseElement(handler); 762 | 763 | } 764 | 765 | } else throw XMLParseException("Expected <", p - s); 766 | 767 | } 768 | 769 | handler.endDocument(); 770 | 771 | } 772 | 773 | }; 774 | 775 | } 776 | } 777 | } 778 | 779 | 780 | #endif 781 | --------------------------------------------------------------------------------