├── .gitignore
├── data
└── test1.xml
├── README.md
├── CMakeLists.txt
├── appveyor.yml
├── LICENSE.md
├── doc
└── XML.md
├── include
└── Cats
│ ├── Textcat.hpp
│ └── Textcat
│ ├── XML.hpp
│ └── XML
│ ├── Handler.hpp
│ ├── Serializer.hpp
│ ├── Document.hpp
│ └── Parser.hpp
├── example
├── XML_DOMWriter
│ └── XML_DOMWriter.cpp
├── XML_SAXWriter
│ └── XML_SAXWriter.cpp
├── XML_DOMReader
│ └── XML_DOMReader.cpp
└── XML_SAXReader
│ └── XML_SAXReader.cpp
└── .travis.yml
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /include/Cats/Corecat
3 |
--------------------------------------------------------------------------------
/data/test1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | The quick brown fox jumps over the lazy dog.
10 | I can eat glass, it doesn't hurt me.
11 |
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Textcat: Text data processing library
2 |
3 | Textcat is a *The Cats Project* library for processing text data.
4 |
5 | CI | Status
6 | :---: | :---:
7 | AppVeyor | [](https://ci.appveyor.com/project/SuperSodaSea/Textcat)
8 | Travis CI | [](https://travis-ci.org/SuperSodaSea/Textcat)
9 |
10 | Currently supported format:
11 |
12 | + [XML](/doc/XML.md)
13 |
14 |
15 | ## License
16 |
17 | [MIT License](/LICENSE.md)
18 |
19 |
20 | ## Requirement
21 |
22 | + C++14
23 | + [Corecat][Corecat]
24 |
25 |
26 | [Corecat]: https://github.com/SuperSodaSea/Corecat
27 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.1)
2 |
3 | project(Textcat)
4 |
5 | set(CMAKE_CXX_STANDARD 14)
6 |
7 | if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O2")
9 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-s")
10 | if(WIN32)
11 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
12 | endif()
13 | elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_SCL_SECURE_NO_WARNINGS /EHsc /W3 /O2 /MT")
15 | endif()
16 |
17 | include_directories("include")
18 |
19 | set(EXAMPLE
20 | XML_SAXReader
21 | XML_SAXWriter
22 | XML_DOMReader
23 | XML_DOMWriter)
24 |
25 | foreach(example ${EXAMPLE})
26 | add_executable(${example} example/${example}/${example}.cpp)
27 | endforeach()
28 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | os: Visual Studio 2017
2 | version: 0.0.0.{build}
3 |
4 | configuration:
5 | - Debug
6 | - Release
7 |
8 | environment:
9 | matrix:
10 | - VS_VERSION: 15 2017
11 | VS_PLATFORM: win32
12 | - VS_VERSION: 15 2017
13 | VS_PLATFORM: x64
14 |
15 | before_build:
16 | - cd ..
17 | - git clone -q --branch=master https://github.com/SuperSodaSea/Corecat.git
18 | - cd Corecat
19 | - git checkout -fq master
20 | - xcopy include ..\Textcat\include /E /I /Q
21 | - cd ..\Textcat
22 | - cmake -H. -Bbuild -G "Visual Studio %VS_VERSION%" -DCMAKE_GENERATOR_PLATFORM=%VS_PLATFORM%
23 |
24 | build:
25 | project: build/Textcat.sln
26 | parallel: true
27 | verbosity: minimal
28 |
29 | test_script:
30 | - build\%CONFIGURATION%\XML_DOMReader.exe data\test1.xml
31 | - build\%CONFIGURATION%\XML_DOMWriter.exe
32 | - build\%CONFIGURATION%\XML_SAXReader.exe data\test1.xml
33 | - build\%CONFIGURATION%\XML_SAXWriter.exe
34 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2016-2018 The Cats Project
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/doc/XML.md:
--------------------------------------------------------------------------------
1 | # Textcat::XML
2 |
3 | Textcat::XML contains the XML parser and serializer.
4 |
5 |
6 | ## About XML
7 |
8 | XML stands for "Extensible Markup Language".
9 |
10 | Specification for XML:
11 |
12 | * [Extensible Markup Language (XML) 1.0](https://www.w3.org/TR/xml/)
13 | * [Extensible Markup Language (XML) 1.1](https://www.w3.org/TR/xml11/)
14 |
15 | Specification for DOM:
16 |
17 | * [Document Object Model Level 1](https://www.w3.org/TR/REC-DOM-Level-1/)
18 |
19 | ## Features
20 |
21 | * **Easy to use**. Textcat::XML provides both SAX and DOM style API.
22 | * **High-performance**. Textcat::XML learned from RapidXml and RapidJSON, which are probably the fastest choices for XML and JSON. Under the same condition, it is sometimes even faster than RapidXml.
23 | * **Header-only**. Textcat::XML is lightweight, and only require [Corecat][Corecat], which is the core of *The Cats Project* and is also header-only.
24 |
25 |
26 | ## Start in a minute
27 |
28 | ```cpp
29 | #include
30 |
31 | #include "Cats/Textcat/XML.hpp"
32 |
33 | using namespace Cats::Textcat::XML;
34 |
35 | int main() {
36 |
37 | char data[] = R"(
)";
38 |
39 | XMLDocument document;
40 | document.parse<>(data);
41 | std::cout << document << std::endl;
42 |
43 | return 0;
44 |
45 | }
46 | ```
47 |
--------------------------------------------------------------------------------
/include/Cats/Textcat.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #ifndef CATS_TEXTCAT_HPP
28 | #define CATS_TEXTCAT_HPP
29 |
30 |
31 | #include "Textcat/XML.hpp"
32 |
33 |
34 | #endif
35 |
--------------------------------------------------------------------------------
/include/Cats/Textcat/XML.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #ifndef CATS_TEXTCAT_XML_HPP
28 | #define CATS_TEXTCAT_XML_HPP
29 |
30 |
31 | #include "XML/Document.hpp"
32 | #include "XML/Handler.hpp"
33 | #include "XML/Parser.hpp"
34 | #include "XML/Serializer.hpp"
35 |
36 |
37 | #endif
38 |
--------------------------------------------------------------------------------
/example/XML_DOMWriter/XML_DOMWriter.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #include
28 |
29 | #include "Cats/Textcat/XML.hpp"
30 |
31 | using namespace Cats::Textcat;
32 |
33 | int main() {
34 |
35 | XMLDocument document;
36 | XMLElement list("list"); document.appendChild(list);
37 | XMLElement person("person"); list.appendChild(person);
38 | XMLAttribute name("name", "SuperSodaSea"); person.appendAttribute(name);
39 | XMLAttribute gender("gender", "male"); person.appendAttribute(gender);
40 | XMLAttribute age("age", "17"); person.appendAttribute(age);
41 | std::cout << document << std::endl;
42 |
43 | return 0;
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/example/XML_SAXWriter/XML_SAXWriter.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #include
28 |
29 | #include "Cats/Corecat/Data/Stream.hpp"
30 | #include "Cats/Textcat/XML.hpp"
31 |
32 | using namespace Cats::Corecat;
33 | using namespace Cats::Textcat;
34 |
35 | int main() {
36 |
37 | auto wrapper = createWrapperOutputStream(std::cout);
38 | XMLSerializer s(wrapper);
39 | s.startDocument();
40 | s.startElement("list");
41 | s.endAttributes(false);
42 | s.startElement("person");
43 | s.attribute("name", "SuperSodaSea");
44 | s.attribute("gender", "male");
45 | s.attribute("age", "17");
46 | s.endAttributes(true);
47 | s.endElement("list");
48 | s.endDocument();
49 | std::cout << std::endl;
50 |
51 | return 0;
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/include/Cats/Textcat/XML/Handler.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #ifndef CATS_TEXTCAT_XML_HANDLER_HPP
28 | #define CATS_TEXTCAT_XML_HANDLER_HPP
29 |
30 |
31 | #include "Cats/Corecat/Text/String.hpp"
32 |
33 |
34 | namespace Cats {
35 | namespace Textcat{
36 | inline namespace XML {
37 |
38 | class XMLHandlerBase {
39 |
40 | protected:
41 |
42 | using StringView8 = Corecat::StringView8;
43 |
44 | public:
45 |
46 | void startDocument() {}
47 | void endDocument() {}
48 | void startElement(StringView8 /*name*/) {}
49 | void endElement(StringView8 /*name*/) {}
50 | void endAttributes(bool /*empty*/) {}
51 | void doctype() {}
52 | void attribute(StringView8 /*name*/, StringView8 /*value*/) {}
53 | void text(StringView8 /*value*/) {}
54 | void cdata(StringView8 /*value*/) {}
55 | void comment(StringView8 /*value*/) {}
56 | void processingInstruction(StringView8 /*name*/, StringView8 /*value*/) {}
57 |
58 | };
59 |
60 | }
61 | }
62 | }
63 |
64 |
65 | #endif
66 |
--------------------------------------------------------------------------------
/example/XML_DOMReader/XML_DOMReader.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #include
28 | #include
29 | #include
30 |
31 | #include "Cats/Textcat/XML.hpp"
32 |
33 | using namespace Cats::Corecat;
34 | using namespace Cats::Textcat;
35 |
36 | std::vector readFile(const char* path) {
37 |
38 | std::ifstream is(path, std::ios::binary);
39 | if(!is) throw IOException("Cannot read file");
40 | is.seekg(0, std::ios::end);
41 | std::size_t size = static_cast(is.tellg());
42 | is.seekg(0);
43 | std::vector data(size + 1);
44 | is.read(data.data(), size);
45 | data[size] = 0;
46 | return data;
47 |
48 | }
49 |
50 | int main(int argc, char** argv) {
51 |
52 | try {
53 |
54 | if(argc < 2) throw InvalidArgumentException("File name needed");
55 |
56 | for(int i = 1; i < argc; ++i) {
57 |
58 | auto data = readFile(argv[i]);
59 | XMLDocument document;
60 | document.parse<>(data.data());
61 | std::cout << document << std::endl;
62 |
63 | }
64 |
65 | } catch(std::exception& e) { std::cerr << e.what() << std::endl; return 1; }
66 |
67 | return 0;
68 |
69 | }
70 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: cpp
2 |
3 | sudo: required
4 |
5 | matrix:
6 | include:
7 | - os: linux
8 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m32" CONFIG=Debug
9 | addons:
10 | apt:
11 | sources: ['ubuntu-toolchain-r-test']
12 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev']
13 | - os: linux
14 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m32" CONFIG=Release
15 | addons:
16 | apt:
17 | sources: ['ubuntu-toolchain-r-test']
18 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev']
19 | - os: linux
20 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m64" CONFIG=Debug
21 | addons:
22 | apt:
23 | sources: ['ubuntu-toolchain-r-test']
24 | packages: ['g++-5']
25 | - os: linux
26 | env: OS=Linux EVAL="CXX=g++-5" FLAGS="-m64" CONFIG=Release
27 | addons:
28 | apt:
29 | sources: ['ubuntu-toolchain-r-test']
30 | packages: ['g++-5']
31 | - os: linux
32 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Debug
33 | addons:
34 | apt:
35 | sources: ['ubuntu-toolchain-r-test']
36 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev']
37 | - os: linux
38 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Release
39 | addons:
40 | apt:
41 | sources: ['ubuntu-toolchain-r-test']
42 | packages: ['g++-5-multilib', 'lib32stdc++6', 'linux-libc-dev']
43 | - os: linux
44 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Debug
45 | addons:
46 | apt:
47 | sources: ['ubuntu-toolchain-r-test']
48 | packages: ['g++-5']
49 | - os: linux
50 | env: OS=Linux EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Release
51 | addons:
52 | apt:
53 | sources: ['ubuntu-toolchain-r-test']
54 | packages: ['g++-5']
55 | - os: osx
56 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Debug
57 | - os: osx
58 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m32" CONFIG=Release
59 | - os: osx
60 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Debug
61 | - os: osx
62 | env: OS=MacOS EVAL="CXX=clang++" FLAGS="-m64" CONFIG=Release
63 |
64 | before_script:
65 | - if [ "$OS" == "Linux" ]; then sudo ln -s /usr/include/asm-generic /usr/include/asm; fi
66 | - eval "${EVAL}"
67 | - $CXX -v
68 | - cd ..
69 | - git clone -q --branch=master https://github.com/SuperSodaSea/Corecat.git
70 | - cd Corecat
71 | - git checkout -fq master
72 | - cp -r include ../Textcat
73 | - cd ../Textcat
74 | - cmake -H. -Bbuild -DCMAKE_CXX_COMPILER=$CXX -DCMAKE_CXX_FLAGS=$FLAGS -DCMAKE_BUILD_TYPE=$CONFIG
75 |
76 | script:
77 | - make -C build
78 |
79 | after_success:
80 | - ./build/XML_DOMReader data/test1.xml
81 | - ./build/XML_DOMWriter
82 | - ./build/XML_SAXReader data/test1.xml
83 | - ./build/XML_SAXWriter
84 |
--------------------------------------------------------------------------------
/include/Cats/Textcat/XML/Serializer.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #ifndef CATS_TEXTCAT_XML_SERIALIZER_HPP
28 | #define CATS_TEXTCAT_XML_SERIALIZER_HPP
29 |
30 |
31 | #include
32 | #include
33 |
34 | #include
35 |
36 | #include "Cats/Corecat/Data/Stream/OutputStream.hpp"
37 | #include "Cats/Corecat/Text/String.hpp"
38 |
39 | #include "Handler.hpp"
40 |
41 |
42 | namespace Cats {
43 | namespace Textcat{
44 | inline namespace XML {
45 |
46 | class XMLSerializer : public XMLHandlerBase {
47 |
48 | private:
49 |
50 | template
51 | using OutputStream = Corecat::OutputStream;
52 | using StringView8 = Corecat::StringView8;
53 |
54 | private:
55 |
56 | OutputStream* stream;
57 |
58 | public:
59 |
60 | XMLSerializer(OutputStream& stream_) : stream(&stream_) {}
61 |
62 | void startDocument() {}
63 | void endDocument() {}
64 | void startElement(StringView8 name) {
65 |
66 | stream->writeAll("<", 1);
67 | stream->writeAll(name.getData(), name.getLength());
68 |
69 | }
70 | void endElement(StringView8 name) {
71 |
72 | stream->writeAll("", 2);
73 | stream->writeAll(name.getData(), name.getLength());
74 | stream->writeAll(">", 1);
75 |
76 | }
77 | void endAttributes(bool empty) {
78 |
79 | if(empty) stream->writeAll("/>", 2);
80 | else stream->writeAll(">", 1);
81 |
82 | }
83 | void doctype() {}
84 | void attribute(StringView8 name, StringView8 value) {
85 |
86 | stream->writeAll(" ", 1);
87 | stream->writeAll(name.getData(), name.getLength());
88 | stream->writeAll("=\"", 2);
89 | stream->writeAll(value.getData(), value.getLength());
90 | stream->writeAll("\"", 1);
91 |
92 | }
93 | void text(StringView8 value) {
94 |
95 | stream->writeAll(value.getData(), value.getLength());
96 |
97 | }
98 | void cdata(StringView8 value) {
99 |
100 | stream->writeAll("writeAll(value.getData(), value.getLength());
102 | stream->writeAll("]]>", 3);
103 |
104 | }
105 | void comment(StringView8 value) {
106 |
107 | stream->writeAll("", 3);
110 |
111 | }
112 | void processingInstruction(StringView8 name, StringView8 value) {
113 |
114 | stream->writeAll("", 2);
115 | stream->writeAll(name.getData(), name.getLength());
116 | stream->writeAll(" ", 1);
117 | stream->writeAll(value.getData(), value.getLength());
118 | stream->writeAll("?>", 2);
119 |
120 | }
121 |
122 | OutputStream& getStream() { return *stream; }
123 | void setStream(OutputStream& stream_) { stream = &stream_; }
124 |
125 | };
126 |
127 | }
128 | }
129 | }
130 |
131 |
132 | #endif
133 |
--------------------------------------------------------------------------------
/example/XML_SAXReader/XML_SAXReader.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #include
28 | #include
29 | #include
30 |
31 | #include "Cats/Corecat/Text.hpp"
32 | #include "Cats/Textcat/XML.hpp"
33 |
34 | using namespace Cats::Corecat;
35 | using namespace Cats::Textcat;
36 |
37 | class Handler : public XMLHandlerBase {
38 |
39 | private:
40 |
41 | int level;
42 |
43 | private:
44 |
45 | void indent() { for(int i = 0; i < level; ++i) std::cout << " "; }
46 |
47 | public:
48 |
49 | void startDocument() { level = 0; std::cout << "startDocument()\n"; }
50 | void endDocument() { std::cout << "endDocument()\n"; }
51 | void startElement(StringView8 name) {
52 |
53 | indent(); std::cout << "startElement(\"" << name << "\")\n"; ++level;
54 |
55 | }
56 | void endElement(StringView8 name) {
57 |
58 | --level; indent(); std::cout << "endElement(\"" << name << "\")\n";
59 |
60 | }
61 | void endAttributes(bool empty) {
62 |
63 | indent(); std::cout << "endAttributes(" << std::boolalpha << empty << ")\n"; if(empty) --level;
64 |
65 | }
66 | void doctype() { indent(); std::cout << "doctype()\n"; }
67 | void attribute(StringView8 name, StringView8 value) {
68 |
69 | indent(); std::cout << "attribute(\"" << name << "\", \"" << value << "\")\n";
70 |
71 | }
72 | void text(StringView8 value) {
73 |
74 | indent(); std::cout << "text(\"" << value << "\")\n";
75 |
76 | }
77 | void cdata(StringView8 value) {
78 |
79 | indent(); std::cout << "cdata(\"" << value << "\")\n";
80 |
81 | }
82 | void comment(StringView8 value) {
83 |
84 | indent(); std::cout << "comment(\"" << value << "\")\n";
85 |
86 | }
87 | void processingInstruction(StringView8 name, StringView8 value) {
88 |
89 | indent(); std::cout << "processingInstruction(\"" << name << "\", \"" << value << "\")\n";
90 |
91 | }
92 |
93 | };
94 |
95 | std::vector readFile(const char* path) {
96 |
97 | std::ifstream is(path, std::ios::binary);
98 | if(!is) throw IOException("Cannot read file");
99 | is.seekg(0, std::ios::end);
100 | std::size_t size = static_cast(is.tellg());
101 | is.seekg(0);
102 | std::vector data(size + 1);
103 | is.read(data.data(), size);
104 | data[size] = 0;
105 | return data;
106 |
107 | }
108 |
109 | int main(int argc, char** argv) {
110 |
111 | try {
112 |
113 | if(argc < 2) throw InvalidArgumentException("File name needed");
114 |
115 | for(int i = 1; i < argc; ++i) {
116 |
117 | auto data = readFile(argv[i]);
118 | XMLParser parser;
119 | Handler handler;
120 | parser.parse<>(data.data(), handler);
121 |
122 | }
123 |
124 | } catch(std::exception& e) { std::cerr << e.what() << std::endl; return 1; }
125 |
126 | return 0;
127 |
128 | }
129 |
--------------------------------------------------------------------------------
/include/Cats/Textcat/XML/Document.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #ifndef CATS_TEXTCAT_XML_DOCUMENT_HPP
28 | #define CATS_TEXTCAT_XML_DOCUMENT_HPP
29 |
30 |
31 | #include
32 | #include
33 |
34 | #include
35 | #include
36 |
37 | #include "Cats/Corecat/Data/Allocator/FastAllocator.hpp"
38 | #include "Cats/Corecat/Data/Stream.hpp"
39 | #include "Cats/Corecat/Text/String.hpp"
40 | #include "Cats/Corecat/Util/Exception.hpp"
41 |
42 | #include "Handler.hpp"
43 | #include "Parser.hpp"
44 | #include "Serializer.hpp"
45 |
46 |
47 | namespace Cats {
48 | namespace Textcat{
49 | inline namespace XML {
50 |
51 | class XMLNode;
52 | class XMLElement;
53 | class XMLText;
54 | class XMLCDATA;
55 | class XMLComment;
56 | class XMLProcessingInstruction;
57 | class XMLDocument;
58 |
59 | namespace Impl {
60 |
61 | template
62 | class List {
63 |
64 | public:
65 |
66 | struct ListElement {
67 |
68 | T* prev;
69 | T* next;
70 | XMLNode* parent;
71 |
72 | ListElement() : prev(), next(), parent() {}
73 |
74 | };
75 |
76 | class Iterator {
77 |
78 | private:
79 |
80 | List* list;
81 | ListElement* p;
82 |
83 | public:
84 |
85 | Iterator(List* list_, ListElement* p_) : list(list_), p(p_) {}
86 | Iterator(const Iterator& src) : list(src.list), p(src.p) {}
87 |
88 | Iterator& operator =(const Iterator& src) { list = src.list, p = src.p; return *this; }
89 |
90 | T& operator *() const { return static_cast(*p); }
91 | T* operator ->() const { return static_cast(p); }
92 | bool operator ==(const Iterator& it) { return p == it.p; }
93 | bool operator ==(XMLNode* q) { return p == q; }
94 | bool operator !=(const Iterator& it) { return p != it.p; }
95 | Iterator& operator ++() { p = p->next; return *this; }
96 | Iterator operator ++(int) { Iterator tmp = *this; ++*this; return tmp; }
97 | Iterator& operator --() { p = p ? p->prev : list->last; return *this; }
98 | Iterator operator --(int) { Iterator tmp = *this; --*this; return tmp; }
99 |
100 | };
101 |
102 | private:
103 |
104 | T* first;
105 | T* last;
106 |
107 | public:
108 |
109 | List() : first(), last() {}
110 | List(const List& src) = delete;
111 | ~List() = default;
112 |
113 | T& append(XMLNode& parent, T& child) {
114 |
115 | assert(!child.parent);
116 | if(first) { child.prev = last; last->next = &child; last = &child; }
117 | else first = last = &child;
118 | child.parent = &parent;
119 | return child;
120 |
121 | }
122 |
123 | T& insertBefore(T& child, T& ref) {
124 |
125 | assert(!child.parent && ref.parent);
126 | auto pPrev = ref.prev;
127 | child.prev = pPrev;
128 | child.next = &ref;
129 | if(pPrev) pPrev->next = &child;
130 | ref.prev = &child;
131 | child.parent = ref.parent;
132 | return child;
133 |
134 | }
135 |
136 | T& remove(T& child) {
137 |
138 | auto pPrev = child.prev;
139 | auto pNext = child.next;
140 | if(pPrev) pPrev->next = pNext;
141 | else first = pNext;
142 | if(pNext) pNext->next = pPrev;
143 | else last = pPrev;
144 | child.prev = nullptr;
145 | child.next = nullptr;
146 | child.parent = nullptr;
147 | return child;
148 |
149 | }
150 |
151 | T& getFirst() { return *first; }
152 | T& getLast() { return *last; }
153 |
154 | bool empty() const { return !first; }
155 |
156 | Iterator begin() { return Iterator(this, first); }
157 | Iterator end() { return Iterator(this, nullptr); }
158 |
159 | };
160 |
161 | }
162 |
163 |
164 | class XMLDOMException : public Corecat::Exception {
165 |
166 | public:
167 |
168 | XMLDOMException(const String8& data) : Exception("XMLDOMException: " + data) {}
169 |
170 | };
171 |
172 | enum class XMLNodeType : uint16_t {
173 |
174 | Element,
175 | Text,
176 | CDATA,
177 | Comment,
178 | ProcessingInstruction,
179 | Document,
180 |
181 | };
182 |
183 | class XMLNode : public Impl::List::ListElement {
184 |
185 | private:
186 |
187 | const XMLNodeType type;
188 | Impl::List listChild;
189 |
190 | public:
191 |
192 | XMLNode(XMLNodeType type_) : Impl::List::ListElement(), type(type_), listChild() {}
193 | XMLNode(const XMLNode& src) = delete;
194 |
195 | XMLNodeType getType() const { return type; }
196 |
197 | Impl::List& child() { return listChild; }
198 |
199 | XMLNode& getFirstChild() { return listChild.getFirst(); }
200 | XMLNode& getLastChild() { return listChild.getLast(); }
201 |
202 | XMLNode& appendChild(XMLNode& child) { return listChild.append(*this, child); }
203 | XMLNode& insertBefore(XMLNode& child, XMLNode& ref) { return listChild.insertBefore(child, ref); }
204 | XMLNode& removeChild(XMLNode& child) { return listChild.remove(child); }
205 | bool hasChildNodes() { return !listChild.empty(); }
206 |
207 | XMLElement& asElement() noexcept { return reinterpret_cast(*this); }
208 | const XMLElement& asElement() const noexcept { return reinterpret_cast(*this); }
209 | XMLText& asText() noexcept { return reinterpret_cast(*this); }
210 | const XMLText& asText() const noexcept { return reinterpret_cast(*this); }
211 | XMLCDATA& asCDATA() noexcept { return reinterpret_cast(*this); }
212 | const XMLCDATA& asCDATA() const noexcept { return reinterpret_cast(*this); }
213 | XMLComment& asComment() noexcept { return reinterpret_cast(*this); }
214 | const XMLComment& asComment() const noexcept { return reinterpret_cast(*this); }
215 | XMLProcessingInstruction& asProcessingInstruction() noexcept { return reinterpret_cast(*this); }
216 | const XMLProcessingInstruction& asProcessingInstruction() const noexcept { return reinterpret_cast(*this); }
217 | XMLDocument& asDocument() noexcept { return reinterpret_cast(*this); }
218 | const XMLDocument& asDocument() const noexcept { return reinterpret_cast(*this); }
219 |
220 | };
221 |
222 | class XMLAttribute : public Impl::List::ListElement {
223 |
224 | private:
225 |
226 | using StringView8 = Corecat::StringView8;
227 |
228 | private:
229 |
230 | StringView8 name;
231 | StringView8 value;
232 |
233 | public:
234 |
235 | XMLAttribute() : Impl::List::ListElement(), name(), value() {}
236 | XMLAttribute(StringView8 name_, StringView8 value_) :
237 | Impl::List::ListElement(), name(name_), value(value_) {}
238 | XMLAttribute(const XMLAttribute& src) = delete;
239 |
240 | StringView8 getName() const { return name; }
241 | void setName(StringView8 name_) { name = name_; }
242 | StringView8 getValue() const { return value; }
243 | void setValue(StringView8 value_) { value = value_; }
244 |
245 | };
246 |
247 | class XMLElement : public XMLNode {
248 |
249 | private:
250 |
251 | using StringView8 = Corecat::StringView8;
252 |
253 | private:
254 |
255 | Impl::List listAttr;
256 | StringView8 name;
257 |
258 | public:
259 |
260 | XMLElement() : XMLNode(XMLNodeType::Element), listAttr(), name() {}
261 | XMLElement(StringView8 name_) : XMLNode(XMLNodeType::Element), listAttr(), name(name_) {}
262 | XMLElement(const XMLElement& src) = delete;
263 |
264 | Impl::List& attribute() { return listAttr; }
265 |
266 | StringView8 getName() const { return name; }
267 | void setName(StringView8 name_) { name = name_; }
268 |
269 | XMLAttribute& getFirstAttribute() { return listAttr.getFirst(); }
270 | XMLAttribute& getLastAttribute() { return listAttr.getLast(); }
271 | XMLAttribute& appendAttribute(XMLAttribute& attr) { return listAttr.append(*this, attr); }
272 | XMLAttribute& removeAttribute(XMLAttribute& attr) { return listAttr.remove(attr); }
273 |
274 | };
275 |
276 | class XMLText : public XMLNode {
277 |
278 | private:
279 |
280 | using StringView8 = Corecat::StringView8;
281 |
282 | private:
283 |
284 | StringView8 value;
285 |
286 | public:
287 |
288 | XMLText() : XMLNode(XMLNodeType::Text), value() {}
289 | XMLText(StringView8 value_) : XMLNode(XMLNodeType::Text), value(value_) {}
290 | XMLText(const XMLText& src) = delete;
291 |
292 | StringView8 getValue() const { return value; }
293 | void setValue(StringView8 value_) { value = value_; }
294 |
295 | };
296 |
297 | class XMLCDATA : public XMLNode {
298 |
299 | private:
300 |
301 | using StringView8 = Corecat::StringView8;
302 |
303 | private:
304 |
305 | StringView8 value;
306 |
307 | public:
308 |
309 | XMLCDATA() : XMLNode(XMLNodeType::CDATA), value() {}
310 | XMLCDATA(StringView8 value_) : XMLNode(XMLNodeType::CDATA), value(value_) {}
311 | XMLCDATA(const XMLCDATA& src) = delete;
312 |
313 | StringView8 getValue() const { return value; }
314 | void setValue(StringView8 value_) { value = value_; }
315 |
316 | };
317 |
318 | class XMLComment : public XMLNode {
319 |
320 | private:
321 |
322 | using StringView8 = Corecat::StringView8;
323 |
324 | private:
325 |
326 | StringView8 value;
327 |
328 | public:
329 |
330 | XMLComment() : XMLNode(XMLNodeType::Comment), value() {}
331 | XMLComment(StringView8 value_) : XMLNode(XMLNodeType::Comment), value(value_) {}
332 | XMLComment(const XMLComment& src) = delete;
333 |
334 | StringView8 getValue() const { return value; }
335 | void setValue(StringView8 value_) { value = value_; }
336 |
337 | };
338 |
339 | class XMLProcessingInstruction : public XMLNode {
340 |
341 | private:
342 |
343 | using StringView8 = Corecat::StringView8;
344 |
345 | private:
346 |
347 | StringView8 name;
348 | StringView8 value;
349 |
350 | public:
351 |
352 | XMLProcessingInstruction() : XMLNode(XMLNodeType::ProcessingInstruction), name(), value() {};
353 | XMLProcessingInstruction(StringView8& name_, StringView8& value_) :
354 | XMLNode(XMLNodeType::ProcessingInstruction), name(name_), value(value_) {}
355 | XMLProcessingInstruction(const XMLProcessingInstruction& src) = delete;
356 |
357 | StringView8 getName() const { return name; }
358 | void setName(StringView8 name_) { name = name_; }
359 | StringView8 getValue() const { return value; }
360 | void setValue(StringView8 value_) { value = value_; }
361 |
362 | };
363 |
364 | class XMLDocument : public XMLNode {
365 |
366 | private:
367 |
368 | template
369 | using OutputStream = Corecat::OutputStream;
370 | using StringView8 = Corecat::StringView8;
371 | using FastAllocator = Corecat::FastAllocator<>;
372 |
373 | private:
374 |
375 | FastAllocator allocator;
376 |
377 | public:
378 |
379 | XMLDocument() : XMLNode(XMLNodeType::Document), allocator() {}
380 | XMLDocument(const XMLDocument& src) = delete;
381 |
382 | XMLElement& createElement(StringView8 name) {
383 |
384 | return *new(allocator.allocate(sizeof(XMLElement))) XMLElement(name);
385 |
386 | }
387 | XMLAttribute& createAttribute(StringView8 name, StringView8 value) {
388 |
389 | return *new(allocator.allocate(sizeof(XMLAttribute))) XMLAttribute(name, value);
390 |
391 | }
392 | XMLText& createText(StringView8 value) {
393 |
394 | return *new(allocator.allocate(sizeof(XMLText))) XMLText(value);
395 |
396 | }
397 | XMLCDATA& createCDATA(StringView8 value) {
398 |
399 | return *new(allocator.allocate(sizeof(XMLCDATA))) XMLCDATA(value);
400 |
401 | }
402 | XMLComment& createComment(StringView8 value) {
403 |
404 | return *new(allocator.allocate(sizeof(XMLComment))) XMLComment(value);
405 |
406 | }
407 | XMLProcessingInstruction& createProcessingInstruction(StringView8 name, StringView8 value) {
408 |
409 | return *new(allocator.allocate(sizeof(XMLProcessingInstruction))) XMLProcessingInstruction(name, value);
410 |
411 | }
412 |
413 | void clear() {
414 |
415 | allocator.clear();
416 |
417 | }
418 |
419 | XMLElement& getRootElement() {
420 |
421 | for(auto& node : child()) if(node.getType() == XMLNodeType::Element) return static_cast(node);
422 | throw XMLDOMException("Root element not found");
423 |
424 | }
425 |
426 | template
427 | void parse(char* data) {
428 |
429 | class Handler : public XMLHandlerBase {
430 |
431 | private:
432 |
433 | XMLDocument* document;
434 | XMLNode* cur;
435 |
436 | public:
437 |
438 | Handler(XMLDocument* document_) : document(document_), cur(nullptr) {}
439 |
440 | void startDocument() { cur = document; }
441 | void startElement(StringView8 name) {
442 |
443 | auto& element = document->createElement(name);
444 | cur->appendChild(element);
445 | cur = &element;
446 |
447 | }
448 | void endElement(StringView8 /*name*/) {
449 |
450 | cur = cur->parent;
451 |
452 | }
453 | void endAttributes(bool empty) {
454 |
455 | if(empty) cur = cur->parent;
456 |
457 | }
458 | void attribute(StringView8 name, StringView8 value) {
459 |
460 | static_cast(cur)->appendAttribute(document->createAttribute(name, value));
461 |
462 | }
463 | void text(StringView8 value) {
464 |
465 | cur->appendChild(document->createText(value));
466 |
467 | }
468 | void cdata(StringView8 value) {
469 |
470 | cur->appendChild(document->createCDATA(value));
471 |
472 | }
473 | void comment(StringView8 value) {
474 |
475 | cur->appendChild(document->createComment(value));
476 |
477 | }
478 | void processingInstruction(StringView8 name, StringView8 value) {
479 |
480 | cur->appendChild(document->createProcessingInstruction(name, value));
481 |
482 | }
483 |
484 | };
485 |
486 | assert(data);
487 |
488 | clear();
489 | XMLParser parser;
490 | Handler handler(this);
491 | parser.parse(data, handler);
492 |
493 | }
494 |
495 | template
496 | void visit(H& handler) {
497 |
498 | handler.startDocument();
499 | if(hasChildNodes()) {
500 |
501 | XMLNode* cur = &getFirstChild();
502 | while(true) {
503 |
504 | switch(cur->getType()) {
505 |
506 | case XMLNodeType::Element: {
507 |
508 | auto& element = static_cast(*cur);
509 | handler.startElement(element.getName());
510 | for(auto& attr : element.attribute())
511 | handler.attribute(attr.getName(), attr.getValue());
512 | bool empty = !cur->hasChildNodes();
513 | handler.endAttributes(empty);
514 | if(!empty) { cur = &cur->getFirstChild(); continue; }
515 | break;
516 |
517 | }
518 | case XMLNodeType::Text: {
519 |
520 | auto& text = static_cast(*cur);
521 | handler.text(text.getValue());
522 | break;
523 |
524 | }
525 | case XMLNodeType::CDATA: {
526 |
527 | auto& cdata = static_cast(*cur);
528 | handler.cdata(cdata.getValue());
529 | break;
530 |
531 | }
532 | case XMLNodeType::Comment: {
533 |
534 | auto& comment = static_cast(*cur);
535 | handler.comment(comment.getValue());
536 | break;
537 |
538 | }
539 | case XMLNodeType::ProcessingInstruction: {
540 |
541 | auto& pi = static_cast(*cur);
542 | handler.processingInstruction(pi.getName(), pi.getValue());
543 | break;
544 |
545 | }
546 | default: throw XMLDOMException("Invalid node type");
547 |
548 | }
549 | while(!cur->next) {
550 |
551 | cur = cur->parent;
552 | if(cur == this) break;
553 | auto name = static_cast(cur)->getName();
554 | handler.endElement(name);
555 |
556 | }
557 | if(cur == this) break;
558 | cur = cur->next;
559 |
560 | }
561 |
562 | }
563 | handler.endDocument();
564 |
565 | }
566 | void serialize(OutputStream& stream) {
567 |
568 | XMLSerializer serializer(stream);
569 | visit(serializer);
570 |
571 | }
572 |
573 | };
574 |
575 | inline std::ostream& operator <<(std::ostream& stream, XMLDocument& document) {
576 |
577 | auto wrapper = Corecat::createWrapperOutputStream(stream);
578 | document.serialize(wrapper);
579 | return stream;
580 |
581 | }
582 |
583 | }
584 | }
585 | }
586 |
587 |
588 | #endif
589 |
--------------------------------------------------------------------------------
/include/Cats/Textcat/XML/Parser.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * MIT License
4 | *
5 | * Copyright (c) 2016-2018 The Cats Project
6 | *
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy
8 | * of this software and associated documentation files (the "Software"), to deal
9 | * in the Software without restriction, including without limitation the rights
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | * copies of the Software, and to permit persons to whom the Software is
12 | * furnished to do so, subject to the following conditions:
13 | *
14 | * The above copyright notice and this permission notice shall be included in all
15 | * copies or substantial portions of the Software.
16 | *
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | * SOFTWARE.
24 | *
25 | */
26 |
27 | #ifndef CATS_TEXTCAT_XML_PARSER_HPP
28 | #define CATS_TEXTCAT_XML_PARSER_HPP
29 |
30 |
31 | #include
32 | #include
33 |
34 | #include
35 | #include
36 | #include
37 |
38 | #include "Cats/Corecat/Util/Sequence.hpp"
39 |
40 |
41 | namespace Cats {
42 | namespace Textcat{
43 | inline namespace XML {
44 |
45 | namespace Impl {
46 |
47 | template
48 | struct Include {
49 |
50 | static constexpr bool get(T t) { using namespace Corecat; return ContainSequence>::get(t); }
51 |
52 | };
53 |
54 | template
55 | struct Exclude {
56 |
57 | static constexpr bool get(T t) { using namespace Corecat; return !ContainSequence>::get(t); }
58 |
59 | };
60 |
61 |
62 | template
63 | struct Skipper {
64 |
65 | static size_t skip(char*& p) {
66 |
67 | using namespace Corecat;
68 |
69 | auto t = p;
70 | while(SequenceTable>>::get(*t)) ++t;
71 | const size_t length = t - p;
72 | p = t;
73 | return length;
74 |
75 | }
76 |
77 | };
78 |
79 |
80 | using Space = Include;
81 | using Name = Exclude', '?'>;
82 | using AttributeName = Exclude', '?'>;
83 | using AttributeValue1 = Exclude;
84 | using AttributeValueNoRef1 = Exclude;
85 | using AttributeValue2 = Exclude;
86 | using AttributeValueNoRef2 = Exclude;
87 | using Text = Exclude;
88 | using TextNoSpace = Exclude;
89 | using TextNoRef = Exclude;
90 | using TextNoSpaceRef = Exclude;
91 |
92 | struct Decimal {
93 |
94 | static constexpr unsigned char get(unsigned char t) {
95 |
96 | return (t >= '0' && t <= '9') ? (t - '0') : 255;
97 |
98 | }
99 |
100 | };
101 |
102 | struct Hexadecimal {
103 |
104 | static constexpr unsigned char get(unsigned char t) {
105 |
106 | return (t >= '0' && t <= '9') ? (t - '0')
107 | : ((t >= 'A' && t <= 'F') ? (t - 'A' + 10)
108 | : ((t >= 'a' && t <= 'f') ? (t - 'a' + 10) : 255));
109 |
110 | }
111 |
112 | };
113 |
114 | }
115 |
116 |
117 | class XMLParseException : public Corecat::Exception {
118 |
119 | private:
120 |
121 | std::size_t pos;
122 |
123 | public:
124 |
125 | XMLParseException(const String8& data, std::size_t pos_) : Exception("XMLParseException: " + data), pos(pos_) {}
126 |
127 | };
128 |
129 | class XMLParser {
130 |
131 | private:
132 |
133 | using StringView8 = Corecat::StringView8;
134 |
135 | public:
136 |
137 | enum class Flag : std::uint32_t {
138 |
139 | None = 0x00000000,
140 | TrimSpace = 0x00000001,
141 | NormalizeSpace = 0x00000002,
142 | EntityTranslation = 0x00000004,
143 | ClosingTagValidate = 0x00000008,
144 |
145 | Default = TrimSpace | EntityTranslation,
146 |
147 | };
148 | friend constexpr bool operator &(Flag a, Flag b) {
149 |
150 | return static_cast(a) & static_cast(b);
151 |
152 | }
153 | friend constexpr Flag operator |(Flag a, Flag b) {
154 |
155 | return static_cast(static_cast(a) | static_cast(b));
156 |
157 | }
158 |
159 | private:
160 |
161 | char* s;
162 | char* p;
163 |
164 | private:
165 |
166 | template
167 | void parseReference(char*& q) {
168 |
169 | using namespace Corecat::Util;
170 |
171 | switch(p[1]) {
172 |
173 | case 0: throw XMLParseException("Unexpected end of data", p - s);
174 | case '#': {
175 |
176 | if(p[2] == 'x') {
177 |
178 | p += 3;
179 | if(*p == ';') throw XMLParseException("Unexpected ;", p - s);
180 | std::uint32_t code = 0;
181 | for(unsigned char t; (t = SequenceTable>>::get(*p)) != 255; code = code * 16 + t, ++p);
182 | if(*p != ';') throw XMLParseException("Expected ;", p - s);
183 | ++p;
184 | // TODO: Code conversion
185 | *q = code;
186 | ++q;
187 |
188 | } else {
189 |
190 | p += 2;
191 | if(*p == ';') throw XMLParseException("Unexpected ;", p - s);
192 | std::uint32_t code = 0;
193 | for(unsigned char t; (t = SequenceTable>>::get(*p)) != 255; code = code * 10 + t, ++p);
194 | if(*p != ';') throw XMLParseException("Expected ;", p - s);
195 | ++p;
196 | // TODO: Code conversion
197 | *q = code;
198 | ++q;
199 |
200 | }
201 | return;
202 |
203 | }
204 | case 'a': {
205 |
206 | if(p[2] == 'm' && p[3] == 'p' && p[4] == ';') {
207 |
208 | // amp
209 | p += 5;
210 | *q = '&';
211 | ++q;
212 | return;
213 |
214 | }
215 | if(p[2] == 'p' && p[3] == 'o' && p[4] == 's' && p[5] == ';') {
216 |
217 | // apos
218 | p += 6;
219 | *q = '\'';
220 | ++q;
221 | return;
222 |
223 | }
224 | break;
225 |
226 | }
227 | case 'g': {
228 |
229 | if(p[2] == 't' && p[3] == ';') {
230 |
231 | // gt
232 | p += 4;
233 | *q = '>';
234 | ++q;
235 | return;
236 |
237 | }
238 | break;
239 |
240 | }
241 | case 'l': {
242 |
243 | if(p[2] == 't' && p[3] == ';') {
244 |
245 | // lt
246 | p += 4;
247 | *q = '<';
248 | ++q;
249 | return;
250 |
251 | }
252 | break;
253 |
254 | }
255 | case 'q': {
256 |
257 | if(p[2] == 'u' && p[3] == 'o' && p[4] == 't' && p[5] == ';') {
258 |
259 | // quot
260 | p += 6;
261 | *q = '"';
262 | ++q;
263 | return;
264 |
265 | }
266 | break;
267 |
268 | }
269 | default: {
270 |
271 | break;
272 |
273 | }
274 |
275 | }
276 | throw XMLParseException("Invalid reference", p - s);
277 |
278 | }
279 | template
280 | void parseXMLDeclaration(H& /*handler*/) {
281 |
282 | using namespace Corecat::Util;
283 |
284 | Impl::Skipper::skip(p);
285 |
286 | // Parse "version"
287 | if(p[0] != 'v' || p[1] != 'e' || p[2] != 'r' || p[3] != 's' || p[4] != 'i' || p[5] != 'o' || p[6] != 'n')
288 | throw XMLParseException("Expected version", p - s);
289 | p += 7;
290 | Impl::Skipper::skip(p);
291 | if(*p != '=') throw XMLParseException("Expected =", p - s);
292 | ++p;
293 | Impl::Skipper::skip(p);
294 | if(*p == '"') {
295 |
296 | ++p;
297 | Impl::Skipper::skip(p);
298 | if(*p != '"') throw XMLParseException("Expected \"", p - s);
299 |
300 | } else if(*p == '\'') {
301 |
302 | ++p;
303 | Impl::Skipper::skip(p);
304 | if(*p != '\'') throw XMLParseException("Expected '", p - s);
305 |
306 | } else throw XMLParseException("Expected \" or '", p - s);
307 | ++p;
308 |
309 | if(*p != '?' && !SequenceTable>>::get(*p))
310 | throw XMLParseException("Unexpected character", p - s);
311 | Impl::Skipper::skip(p);
312 |
313 | // Parse "encoding"
314 | if(p[0] == 'e' && p[1] == 'n' && p[2] == 'c' && p[3] == 'o' && p[4] == 'd' && p[5] == 'i' && p[6] == 'n' && p[7] == 'g') {
315 |
316 | p += 8;
317 | Impl::Skipper::skip(p);
318 | if(*p != '=') throw XMLParseException("Expected =", p - s);
319 | ++p;
320 | Impl::Skipper::skip(p);
321 | if(*p == '"') {
322 |
323 | ++p;
324 | Impl::Skipper::skip(p);
325 | if(*p != '"') throw XMLParseException("Expected \"", p - s);
326 |
327 | } else if(*p == '\'') {
328 |
329 | ++p;
330 | Impl::Skipper::skip(p);
331 | if(*p != '\'') throw XMLParseException("Expected '", p - s);
332 |
333 | } else throw XMLParseException("Expected \" or '", p - s);
334 | ++p;
335 |
336 | }
337 |
338 | if(*p != '?' && !SequenceTable>>::get(*p))
339 | throw XMLParseException("Unexpected character", p - s);
340 | Impl::Skipper::skip(p);
341 |
342 | // Parse "standalone"
343 | if(p[0] == 's' && p[1] == 't' && p[2] == 'a' && p[3] == 'n' && p[4] == 'd' && p[5] == 'a' && p[6] == 'l' && p[7] == 'o' && p[8] == 'n' && p[9] == 'e') {
344 |
345 | p += 10;
346 | Impl::Skipper::skip(p);
347 | if(*p != '=') throw XMLParseException("Expected =", p - s);
348 | ++p;
349 | Impl::Skipper::skip(p);
350 | if(*p == '"') {
351 |
352 | ++p;
353 | Impl::Skipper::skip(p);
354 | if(*p != '"') throw XMLParseException("Expected \"", p - s);
355 |
356 | } else if(*p == '\'') {
357 |
358 | ++p;
359 | Impl::Skipper::skip(p);
360 | if(*p != '\'') throw XMLParseException("Expected '", p - s);
361 |
362 | } else throw XMLParseException("Expected \" or '", p - s);
363 | ++p;
364 |
365 | }
366 |
367 | Impl::Skipper::skip(p);
368 | if(p[0] != '?' || p[1] != '>') throw XMLParseException("Expected ?>", p - s);
369 | p += 2;
370 |
371 | }
372 | template
373 | void parseDoctype(H& /*handler*/) {
374 |
375 | throw XMLParseException("Not implemented", p - s);
376 |
377 | }
378 | template
379 | void parseComment(H& handler) {
380 |
381 | StringView8 comment(p, 1);
382 | // Until "-->"
383 | while(*p && (p[0] != '-' || p[1] != '-' || p[2] != '>')) ++p;
384 | if(!*p) throw XMLParseException("Unexpected end of data", p - s);
385 | comment.setLength(p - comment.getData());
386 | p += 3;
387 | handler.comment(comment);
388 |
389 | }
390 | template
391 | void parseProcessingInstruction(H& handler) {
392 |
393 | StringView8 target(p, 1);
394 | target.setLength(Impl::Skipper::skip(p));
395 | if(!target.getLength()) throw XMLParseException("Expected PI target", p - s);
396 | if((p[0] != '?' || p[1] != '>') && !Impl::Skipper::skip(p))
397 | throw XMLParseException("Expected white space", p - s);
398 |
399 | StringView8 content(p, 1);
400 | // Until "?>"
401 | while(*p && (p[0] != '?' || p[1] != '>')) ++p;
402 | if(!*p) throw XMLParseException("Unexpected end of data", p - s);
403 | content.setLength(p - content.getData());
404 | p += 2;
405 |
406 | handler.processingInstruction(target, content);
407 |
408 | }
409 | template
410 | void parseCDATA(H& handler) {
411 |
412 | StringView8 text(p, 1);
413 | // Until "]]>"
414 | while(*p && (p[0] != ']' || p[1] != ']' || p[2] != '>')) ++p;
415 | if(!*p) throw XMLParseException("Unexpected end of data", p - s);
416 | text.setLength(p - text.getData());
417 | p += 3;
418 | handler.cdata(text);
419 |
420 | }
421 | template
422 | void parseElement(H& handler) {
423 |
424 | using namespace Corecat::Util;
425 |
426 | // Parse element type
427 | StringView8 name(p, 1);
428 | name.setLength(Impl::Skipper::skip(p));
429 | if(!name.getLength()) throw XMLParseException("Expected element type", p - s);
430 | bool empty = false;
431 | if(*p == '>') {
432 |
433 | ++p;
434 | handler.startElement(name);
435 |
436 | } else if(*p == '/') {
437 |
438 | if(p[1] != '>') throw XMLParseException("eExpected >", p + 1 - s);
439 | p += 2;
440 | handler.startElement(name);
441 | empty = true;
442 |
443 | } else {
444 |
445 | ++p;
446 | handler.startElement(name);
447 | Impl::Skipper::skip(p);
448 | while(SequenceTable>>::get(*p)) {
449 |
450 | // Parse attribute name
451 | StringView8 name(p, 1);
452 | name.setLength(Impl::Skipper::skip(p));
453 | if(!name.getLength()) throw XMLParseException("Expected attribute name", p - s);
454 | Impl::Skipper::skip(p);
455 | if(*p != '=') throw XMLParseException("Expected =", p - s);
456 | ++p;
457 | Impl::Skipper::skip(p);
458 |
459 | // Parse attribute value
460 | StringView8 value;
461 | if(*p == '"') {
462 |
463 | ++p;
464 | value.setData(p, 0);
465 | if(F & Flag::EntityTranslation) {
466 |
467 | auto q = p;
468 | while(true) {
469 |
470 | auto len = Impl::Skipper::skip(p);
471 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
472 | if(p != q + len) std::copy(q, q + len, p - len);
473 | q += len;
474 | if(*p == '&') parseReference(q);
475 | else break;
476 |
477 | }
478 | value.setLength(q - value.getData());
479 |
480 | } else {
481 |
482 | value.setLength(Impl::Skipper::skip(p));
483 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
484 |
485 | }
486 | ++p;
487 |
488 | } else if(*p == '\'') {
489 |
490 | ++p;
491 | value.setData(p, 0);
492 | if(F & Flag::EntityTranslation) {
493 |
494 | auto q = p;
495 | while(true) {
496 |
497 | auto len = Impl::Skipper::skip(p);
498 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
499 | if(p != q + len) std::copy(q, q + len, p - len);
500 | q += len;
501 | if(*p == '&') parseReference(q);
502 | else break;
503 |
504 | }
505 | value.setLength(q - value.getData());
506 |
507 | } else {
508 |
509 | value.setLength(Impl::Skipper::skip(p));
510 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
511 |
512 | }
513 | ++p;
514 |
515 | } else throw XMLParseException("Expected \" or '", p - s);
516 | handler.attribute(name, value);
517 | Impl::Skipper::skip(p);
518 |
519 | }
520 | if(*p == '>') {
521 |
522 | ++p;
523 |
524 | } else if(*p == '/') {
525 |
526 | if(p[1] != '>') throw XMLParseException("Expected >", p + 1 - s);
527 | p += 2;
528 | empty = true;
529 |
530 | } else throw XMLParseException("Unexpected character", p + 1 - s);
531 |
532 | }
533 | handler.endAttributes(empty);
534 | if(!empty) {
535 |
536 | bool c = true;
537 | do {
538 |
539 | // Parse text
540 | if(F & Flag::TrimSpace) Impl::Skipper::skip(p);
541 | if(*p != '<') {
542 |
543 | if(F & Flag::EntityTranslation) {
544 |
545 | if(F & Flag::NormalizeSpace) {
546 |
547 | StringView8 text(p, 1);
548 | auto q = p;
549 | while(true) {
550 |
551 | auto len = Impl::Skipper::skip(p);
552 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
553 | if(p != q + len) std::copy(p - len, p, q);
554 | q += len;
555 | if(*p == '&') parseReference(q);
556 | else if(*p != '<') { Impl::Skipper::skip(p); *(q++) = ' '; }
557 | else break;
558 |
559 | }
560 | if(F & Flag::TrimSpace && q[-1] == ' ') --q;
561 | text.setLength(q - text.getData());
562 | handler.text(text);
563 |
564 | } else {
565 |
566 | StringView8 text(p, 1);
567 | auto q = p;
568 | while(true) {
569 |
570 | auto len = Impl::Skipper::skip(p);
571 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
572 | if(p != q + len) std::copy(p - len, p, q);
573 | q += len;
574 | if(*p == '&') parseReference(q);
575 | else break;
576 |
577 | }
578 | --q;
579 | if(F & Flag::TrimSpace)
580 | for(; SequenceTable>>::get(*q); --q);
581 | ++q;
582 | text.setLength(q - text.getData());
583 | handler.text(text);
584 |
585 | }
586 |
587 | } else {
588 |
589 | if(F & Flag::NormalizeSpace) {
590 |
591 | StringView8 text(p, 1);
592 | auto q = p;
593 | while(true) {
594 |
595 | auto len = Impl::Skipper::skip(p);
596 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
597 | if(p != q + len) std::copy(p - len, p, q);
598 | q += len;
599 | if(*p != '<') { Impl::Skipper::skip(p); *(q++) = ' '; }
600 | else break;
601 |
602 | }
603 | --q;
604 | if(F & Flag::TrimSpace)
605 | for(; SequenceTable>>::get(*q); --q);
606 | ++q;
607 | text.setLength(q - text.getData());
608 | handler.text(text);
609 |
610 | } else {
611 |
612 | StringView8 text(p, 1);
613 | Impl::Skipper::skip(p);
614 | if(*p == 0) throw XMLParseException("Unexpected end of data", p - s);
615 | auto q = p - 1;
616 | if(F & Flag::TrimSpace)
617 | for(; SequenceTable>>::get(*q); --q);
618 | ++q;
619 | text.setLength(q - text.getData());
620 | handler.text(text);
621 |
622 | }
623 |
624 | }
625 |
626 | }
627 |
628 | ++p;
629 | switch(*p) {
630 |
631 | case '!': {
632 |
633 | ++p;
634 | if(p[0] == '-' && p[1] == '-') {
635 |
636 | p += 2;
637 | parseComment(handler);
638 |
639 | } else if(p[0] == '[' && p[1] == 'C' && p[2] == 'D' && p[3] == 'A' && p[4] == 'T' && p[5] == 'A' && p[6] == '[') {
640 |
641 | // "[CDATA["
642 | p += 7;
643 | parseCDATA(handler);
644 |
645 | } else throw XMLParseException("Unexpected character", p - s);
646 | break;
647 |
648 | }
649 | case '/': {
650 |
651 | ++p;
652 | if(F & Flag::ClosingTagValidate) {
653 |
654 | StringView8 endName(p, 1);
655 | Impl::Skipper::skip(p);
656 | endName.setLength(p - endName.getData());
657 | Impl::Skipper::skip(p);
658 | if(*p != '>') throw XMLParseException("Expected >", p - s);
659 | ++p;
660 | handler.endElement(endName);
661 |
662 | } else {
663 |
664 | StringView8 endName(p, name.getLength());
665 | if(endName != name) throw XMLParseException("Unmatch element type", p - s);
666 | p += name.getLength();
667 | Impl::Skipper::skip(p);
668 | if(*p != '>') throw XMLParseException("Expected >", p - s);
669 | ++p;
670 | handler.endElement(endName);
671 |
672 | }
673 | c = false;
674 | break;
675 |
676 | }
677 | case '?': {
678 |
679 | ++p;
680 | parseProcessingInstruction(handler);
681 | break;
682 |
683 | }
684 | default: {
685 |
686 | parseElement(handler);
687 | break;
688 |
689 | }
690 |
691 | }
692 |
693 | } while(c);
694 |
695 | }
696 |
697 | }
698 |
699 | public:
700 |
701 | XMLParser() = default;
702 |
703 | template
704 | void parse(char* data, H& handler) {
705 |
706 | using namespace Corecat::Util;
707 |
708 | assert(data);
709 |
710 | s = data;
711 | p = data;
712 | handler.startDocument();
713 |
714 | // Parse BOM
715 | if(static_cast(p[0]) == 0xEF &&
716 | static_cast(p[1]) == 0xBB &&
717 | static_cast(p[2]) == 0xBF) {
718 |
719 | p += 3;
720 |
721 | }
722 |
723 | // Parse XML declaration
724 | if(p[0] == '<' && p[1] == '?' && p[2] == 'x' && p[3] == 'm' && p[4] == 'l' && SequenceTable>>::get(p[5])) {
725 |
726 | // "(handler);
729 |
730 | }
731 | while(true) {
732 |
733 | Impl::Skipper::skip(p);
734 | if(!*p) break;
735 | else if(*p == '<') {
736 |
737 | ++p;
738 | if(*p == '!') {
739 |
740 | ++p;
741 | if(p[0] == '-' && p[1] == '-') {
742 |
743 | p += 2;
744 | parseComment(handler);
745 |
746 | } else if(p[0] == 'D' && p[1] == 'O' && p[2] == 'C' && p[3] == 'T' && p[4] == 'Y' && p[5] == 'P' && p[6] == 'E') {
747 |
748 | // "DOCTYPE"
749 | p += 7;
750 | parseDoctype(handler);
751 |
752 | } else throw XMLParseException("Unexpected character", p - s);
753 |
754 | } else if(*p == '?') {
755 |
756 | ++p;
757 | parseProcessingInstruction(handler);
758 |
759 | } else {
760 |
761 | parseElement(handler);
762 |
763 | }
764 |
765 | } else throw XMLParseException("Expected <", p - s);
766 |
767 | }
768 |
769 | handler.endDocument();
770 |
771 | }
772 |
773 | };
774 |
775 | }
776 | }
777 | }
778 |
779 |
780 | #endif
781 |
--------------------------------------------------------------------------------