├── .gitattributes ├── .gitignore ├── .gitmodules ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── NEWS ├── README.md ├── build ├── .gitignore ├── bootstrap.build ├── export.build └── root.build ├── buildfile ├── libstud └── json │ ├── .gitignore │ ├── buildfile │ ├── event.hxx │ ├── export.hxx │ ├── parser.cxx │ ├── parser.hxx │ ├── parser.ixx │ ├── pdjson.c │ ├── pdjson.h │ ├── serializer.cxx │ ├── serializer.hxx │ ├── serializer.ixx │ └── version.hxx.in ├── manifest ├── repositories.manifest └── tests ├── .gitignore ├── build ├── .gitignore ├── bootstrap.build └── root.build ├── buildfile ├── parser ├── basics │ ├── basics.testscript │ ├── buildfile │ ├── driver.cxx │ └── multi-value.testscript ├── buildfile ├── fuzz-llvm │ ├── README.md │ ├── buildfile │ └── driver.cxx └── peek │ ├── buildfile │ └── driver.cxx └── serializer ├── basics ├── buildfile └── driver.cxx ├── buildfile ├── fuzz-llvm ├── README.md ├── buildfile ├── convert.cxx └── driver.cxx └── roundtrip ├── buildfile ├── driver.cxx └── testscript /.gitattributes: -------------------------------------------------------------------------------- 1 | # This is a good default: files that are auto-detected by git to be text are 2 | # converted to the platform-native line ending (LF on Unix, CRLF on Windows) 3 | # in the working tree and to LF in the repository. 4 | # 5 | * text=auto 6 | 7 | # Use `eol=crlf` for files that should have the CRLF line ending both in the 8 | # working tree (even on Unix) and in the repository. 9 | # 10 | #*.bat text eol=crlf 11 | 12 | # Use `eol=lf` for files that should have the LF line ending both in the 13 | # working tree (even on Windows) and in the repository. 14 | # 15 | #*.sh text eol=lf 16 | 17 | # Use `binary` to make sure certain files are never auto-detected as text. 18 | # 19 | #*.png binary 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .bdep/ 2 | 3 | # Local default options files. 4 | # 5 | .build2/local/ 6 | 7 | # Compiler/linker output. 8 | # 9 | *.d 10 | *.t 11 | *.i 12 | *.ii 13 | *.o 14 | *.obj 15 | *.so 16 | *.dll 17 | *.a 18 | *.lib 19 | *.exp 20 | *.pdb 21 | *.ilk 22 | *.exe 23 | *.exe.dlls/ 24 | *.exe.manifest 25 | *.pc 26 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pdjson"] 2 | path = pdjson 3 | url = https://github.com/boris-kolpackov/pdjson.git 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | This file contains information about the libstud authors for copyright 2 | purposes. 3 | 4 | Note that the libstud/json/pdjson.* files are from the pdjson project that 5 | released them into the public domain. 6 | 7 | The copyright for the code is held by the contributors of the code. The 8 | revision history in the version control system is the primary source of 9 | authorship information for copyright purposes. Contributors that have 10 | requested to also be noted explicitly in this file are listed below: 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | The copyright for the code is held by the contributors of the code (see the 2 | `AUTHORS` file). The code is licensed under permissive open source licensing 3 | terms (see the `LICENSE` file). When you contribute code to this project, you 4 | license it under these terms. Before contributing please make sure that these 5 | terms are acceptable to you (and to your employer(s), if they have rights to 6 | intellectual property that you create) and that the code being contributed is 7 | your original creation. 8 | 9 | The revision history in the version control system is the primary source of 10 | authorship information for copyright purposes. If, however, you would like 11 | to also be noted explicitly, please include the appropriate change to the 12 | `AUTHORS` file along with your contribution. 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2025 the libstud authors (see the AUTHORS file). 4 | Copyright (c) dedicated to the public domain for the libstud/json/pdjson.* files. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | During the pre-1.0.0 development see the revision history. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libstud-json - JSON parser/serializer library for C++ 2 | 3 | A portable, dependency-free, MIT-licensed JSON pull-parser/push-serializer 4 | library for C++. 5 | 6 | The goal of this library is to provide a *pull*-style parser (instead of 7 | *push*/SAX or DOM) and *push*-style serializer with clean, modern interfaces 8 | and conforming, well-tested (and well-fuzzed, including the serializer) 9 | implementations. In particular, pull-style parsers are not very common, and we 10 | couldn't find any C++ implementations that also satisfy the above 11 | requirements. 12 | 13 | Typical parser usage (low-level API): 14 | 15 | ```c++ 16 | #include 17 | 18 | int main () 19 | { 20 | using namespace stud::json; 21 | 22 | parser p (std::cin, ""); 23 | 24 | for (event e: p) 25 | { 26 | switch (e) 27 | { 28 | case event::begin_object: 29 | // ... 30 | case event::end_object: 31 | // ... 32 | case event::name: 33 | { 34 | const std::string& n (p.name ()); 35 | // ... 36 | } 37 | case event::number: 38 | { 39 | std::int64_t n (p.value ()); 40 | // ... 41 | } 42 | } 43 | } 44 | } 45 | ``` 46 | 47 | Or using the higher-level API to parse a specific JSON vocabulary: 48 | 49 | ```c++ 50 | #include 51 | 52 | int main () 53 | { 54 | using namespace stud::json; 55 | 56 | parser p (std::cin, ""); 57 | 58 | p.next_expect (event::begin_object); 59 | { 60 | std::string planet (p.next_expect_member_string ("planet")); 61 | 62 | p.next_expect_member_array ("measurements"); 63 | while (p.next_expect (event::number, event::end_array)) 64 | { 65 | std::uint64 m (p.value ()); 66 | } 67 | } 68 | p.next_expect (event::end_object); 69 | } 70 | ``` 71 | 72 | See the [`libstud/json/parser.hxx`][parser.hxx] header for the parser 73 | interface details and the [`libstud/json/event.hxx`][event.hxx] header for the 74 | complete list of events. 75 | 76 | Typical serializer usage: 77 | 78 | ```c++ 79 | #include 80 | 81 | int main () 82 | { 83 | using namespace stud::json; 84 | 85 | stream_serializer s (std::cout); 86 | 87 | s.begin_object (); 88 | s.member ("planet", "Venus"); 89 | s.member_name ("measurement"); 90 | s.begin_array (); 91 | s.value (123); 92 | s.value (234); 93 | s.value (345); 94 | s.end_array (); 95 | s.end_object (); 96 | } 97 | ``` 98 | 99 | See the [`libstud/json/serializer.hxx`][serializer.hxx] header for the 100 | serializer interface details. 101 | 102 | See the [`NEWS`][news] file for changes and the 103 | [`cppget.org/libstud-json`][pkg] package page for build status. 104 | 105 | [event.hxx]: https://github.com/libstud/libstud-json/blob/master/libstud/json/event.hxx 106 | [parser.hxx]: https://github.com/libstud/libstud-json/blob/master/libstud/json/parser.hxx 107 | [serializer.hxx]: https://github.com/libstud/libstud-json/blob/master/libstud/json/serializer.hxx 108 | [news]: https://github.com/libstud/libstud-json/blob/master/NEWS 109 | [pkg]: https://cppget.org/libstud-json 110 | -------------------------------------------------------------------------------- /build/.gitignore: -------------------------------------------------------------------------------- 1 | config.build 2 | root/ 3 | bootstrap/ 4 | -------------------------------------------------------------------------------- /build/bootstrap.build: -------------------------------------------------------------------------------- 1 | project = libstud-json 2 | 3 | using version 4 | using config 5 | using test 6 | using install 7 | using dist 8 | -------------------------------------------------------------------------------- /build/export.build: -------------------------------------------------------------------------------- 1 | $out_root/ 2 | { 3 | include libstud/json/ 4 | } 5 | 6 | export $out_root/libstud/json/$import.target 7 | -------------------------------------------------------------------------------- /build/root.build: -------------------------------------------------------------------------------- 1 | cxx.std = latest 2 | 3 | using cxx 4 | 5 | hxx{*}: extension = hxx 6 | ixx{*}: extension = ixx 7 | txx{*}: extension = txx 8 | cxx{*}: extension = cxx 9 | 10 | # Assume headers are importable unless stated otherwise. 11 | # 12 | hxx{*}: cxx.importable = true 13 | 14 | using c # For the c{} target type. 15 | 16 | if ($cxx.target.system == 'win32-msvc') 17 | cc.poptions += -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS 18 | 19 | if ($cxx.class == 'msvc') 20 | cc.coptions += /wd4251 /wd4275 /wd4800 21 | 22 | # The test target for cross-testing (running tests under Wine, etc). 23 | # 24 | test.target = $cxx.target 25 | -------------------------------------------------------------------------------- /buildfile: -------------------------------------------------------------------------------- 1 | ./: {*/ -build/ -pdjson/} doc{README.md NEWS} legal{LICENSE AUTHORS} manifest 2 | 3 | # Don't install tests. 4 | # 5 | tests/: install = false 6 | -------------------------------------------------------------------------------- /libstud/json/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated version header. 2 | # 3 | version.hxx 4 | -------------------------------------------------------------------------------- /libstud/json/buildfile: -------------------------------------------------------------------------------- 1 | import intf_libs = libstud-optional%lib{stud-optional} 2 | 3 | lib{stud-json}: {hxx ixx txx cxx}{** -version} hxx{version} $intf_libs 4 | 5 | # The pdjson.c file is included into parser.cxx so exclude it from the 6 | # compilation. Note that pdjson.h must be installed since it is included in 7 | # parser.hxx. 8 | # 9 | lib{stud-json}: c{pdjson}: include = adhoc 10 | lib{stud-json}: h{pdjson} 11 | 12 | # Include the generated version header into the distribution (so that we don't 13 | # pick up an installed one) and don't remove it when cleaning in src (so that 14 | # clean results in a state identical to distributed). 15 | # 16 | hxx{version}: in{version} $src_root/manifest 17 | { 18 | dist = true 19 | clean = ($src_root != $out_root) 20 | } 21 | 22 | hxx{export}@./: cxx.importable = false 23 | 24 | # Build options. 25 | # 26 | cxx.poptions =+ "-I$out_root" "-I$src_root" 27 | 28 | obja{*}: cxx.poptions += -DLIBSTUD_JSON_STATIC_BUILD 29 | objs{*}: cxx.poptions += -DLIBSTUD_JSON_SHARED_BUILD 30 | 31 | # Export options. 32 | # 33 | lib{stud-json}: 34 | { 35 | cxx.export.poptions = "-I$out_root" "-I$src_root" 36 | cxx.export.libs = $intf_libs 37 | } 38 | 39 | liba{stud-json}: cxx.export.poptions += -DLIBSTUD_JSON_STATIC 40 | libs{stud-json}: cxx.export.poptions += -DLIBSTUD_JSON_SHARED 41 | 42 | # For pre-releases use the complete version to make sure they cannot be used 43 | # in place of another pre-release or the final version. See the version module 44 | # for details on the version.* variable values. 45 | # 46 | if $version.pre_release 47 | lib{stud-json}: bin.lib.version = @"-$version.project_id" 48 | else 49 | lib{stud-json}: bin.lib.version = @"-$version.major.$version.minor" 50 | 51 | # Install into the libstud/json/ subdirectory of, say, /usr/include/ 52 | # recreating subdirectories. 53 | # 54 | {h hxx ixx txx}{*}: 55 | { 56 | install = include/libstud/json/ 57 | install.subdirs = true 58 | } 59 | -------------------------------------------------------------------------------- /libstud/json/event.hxx: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace stud 7 | { 8 | namespace json 9 | { 10 | // Parsing/serialization event. 11 | // 12 | enum class event: std::uint8_t 13 | { 14 | begin_object = 1, 15 | end_object, 16 | begin_array, 17 | end_array, 18 | name, 19 | string, 20 | number, 21 | boolean, 22 | null 23 | }; 24 | 25 | constexpr std::size_t event_count = 9; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /libstud/json/export.hxx: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Normally we don't export class templates (but do complete specializations), 4 | // inline functions, and classes with only inline member functions. Exporting 5 | // classes that inherit from non-exported/imported bases (e.g., std::string) 6 | // will end up badly. The only known workarounds are to not inherit or to not 7 | // export. Also, MinGW GCC doesn't like seeing non-exported functions being 8 | // used before their inline definition. The workaround is to reorder code. In 9 | // the end it's all trial and error. 10 | 11 | #if defined(LIBSTUD_JSON_STATIC) // Using static. 12 | # define LIBSTUD_JSON_SYMEXPORT 13 | #elif defined(LIBSTUD_JSON_STATIC_BUILD) // Building static. 14 | # define LIBSTUD_JSON_SYMEXPORT 15 | #elif defined(LIBSTUD_JSON_SHARED) // Using shared. 16 | # ifdef _WIN32 17 | # define LIBSTUD_JSON_SYMEXPORT __declspec(dllimport) 18 | # else 19 | # define LIBSTUD_JSON_SYMEXPORT 20 | # endif 21 | #elif defined(LIBSTUD_JSON_SHARED_BUILD) // Building shared. 22 | # ifdef _WIN32 23 | # define LIBSTUD_JSON_SYMEXPORT __declspec(dllexport) 24 | # else 25 | # define LIBSTUD_JSON_SYMEXPORT 26 | # endif 27 | #else 28 | // If none of the above macros are defined, then we assume we are being used 29 | // by some third-party build system that cannot/doesn't signal the library 30 | // type. Note that this fallback works for both static and shared but in case 31 | // of shared will be sub-optimal compared to having dllimport. 32 | // 33 | # define LIBSTUD_JSON_SYMEXPORT // Using static or shared. 34 | #endif 35 | -------------------------------------------------------------------------------- /libstud/json/parser.cxx: -------------------------------------------------------------------------------- 1 | #define PDJSON_SYMEXPORT static // See below. 2 | 3 | #include 4 | 5 | #include 6 | 7 | // There is an issue (segfault) with using std::current_exception() and 8 | // std::rethrow_exception() with older versions of libc++ on Linux. While the 9 | // exact root cause hasn't been determined, the suspicion is that something 10 | // gets messed up if we "smuggle" std::exception_ptr through extern "C" call 11 | // frames (we cannot even destroy such an exception without a segfault). We 12 | // also could not determine in which version exactly this has been fixed but 13 | // we know that libc++ 6.0.0 doesn't appear to have this issue (though we are 14 | // not entirely sure the issue is (only) in libc++; libgcc_s could also be 15 | // involved). 16 | // 17 | // The workaround is to just catch (and note) the exception and then throw a 18 | // new instance of generic std::istream::failure. In order not to drag the 19 | // below test into the header, we wrap exception_ptr with optional<> and use 20 | // NULL to indicate the presence of the exception when the workaround is 21 | // required. 22 | // 23 | // Note that if/when we drop this workaround, we should also get rid of 24 | // optional<> in stream::exception member. 25 | // 26 | #undef LIBSTUD_JSON_NO_EXCEPTION_PTR 27 | 28 | #if defined (__linux__) && defined(__clang__) 29 | # if __has_include(<__config>) 30 | # include <__config> // _LIBCPP_VERSION 31 | # if _LIBCPP_VERSION < 6000 32 | # define LIBSTUD_JSON_NO_EXCEPTION_PTR 1 33 | # endif 34 | # endif 35 | #endif 36 | 37 | namespace stud 38 | { 39 | namespace json 40 | { 41 | using namespace std; 42 | 43 | parser:: 44 | ~parser () 45 | { 46 | json_close (impl_); 47 | } 48 | 49 | static int 50 | stream_get (void* x) 51 | { 52 | auto& s (*static_cast (x)); 53 | 54 | // In the multi-value mode reading of whitespaces/separators is split 55 | // between our code and pdjson's. As a result, these functions may end 56 | // up being called more than once after EOF is reached. Which is 57 | // something iostream does not handle gracefully. 58 | // 59 | if (!s.is->eof ()) 60 | { 61 | try 62 | { 63 | // We first peek not to trip failbit on EOF. 64 | // 65 | if (s.is->peek () != istream::traits_type::eof ()) 66 | return static_cast (s.is->get ()); 67 | } 68 | catch (...) 69 | { 70 | #ifndef LIBSTUD_JSON_NO_EXCEPTION_PTR 71 | s.exception = current_exception (); 72 | #else 73 | s.exception = nullptr; 74 | #endif 75 | } 76 | } 77 | 78 | return EOF; 79 | } 80 | 81 | static int 82 | stream_peek (void* x) 83 | { 84 | auto& s (*static_cast (x)); 85 | 86 | if (!s.is->eof ()) 87 | { 88 | try 89 | { 90 | auto c (s.is->peek ()); 91 | if (c != istream::traits_type::eof ()) 92 | return static_cast (c); 93 | } 94 | catch (...) 95 | { 96 | #ifndef LIBSTUD_JSON_NO_EXCEPTION_PTR 97 | s.exception = current_exception (); 98 | #else 99 | s.exception = nullptr; 100 | #endif 101 | } 102 | } 103 | 104 | return EOF; 105 | } 106 | 107 | // NOTE: watch out for exception safety (specifically, doing anything that 108 | // might throw after opening the stream). 109 | // 110 | parser:: 111 | parser (istream& is, const char* n, bool mv, const char* sep) noexcept 112 | : input_name (n), 113 | stream_ {&is, nullopt}, 114 | multi_value_ (mv), 115 | separators_ (sep), 116 | raw_s_ (nullptr), 117 | raw_n_ (0) 118 | { 119 | json_open_user (impl_, &stream_get, &stream_peek, &stream_); 120 | json_set_streaming (impl_, multi_value_); 121 | } 122 | 123 | parser:: 124 | parser (const void* t, 125 | size_t s, 126 | const char* n, 127 | bool mv, 128 | const char* sep) noexcept 129 | : input_name (n), 130 | stream_ {nullptr, nullopt}, 131 | multi_value_ (mv), 132 | separators_ (sep), 133 | raw_s_ (nullptr), 134 | raw_n_ (0) 135 | { 136 | json_open_buffer (impl_, t, s); 137 | json_set_streaming (impl_, multi_value_); 138 | } 139 | 140 | optional parser:: 141 | next () 142 | { 143 | name_p_ = value_p_ = location_p_ = false; 144 | 145 | // Note that for now we don't worry about the state of the parser if 146 | // next_impl() throws assuming it is not going to be reused. 147 | // 148 | if (peeked_) 149 | { 150 | parsed_ = peeked_; 151 | peeked_ = nullopt; 152 | } 153 | else 154 | parsed_ = next_impl (); 155 | 156 | return translate (*parsed_); 157 | } 158 | 159 | optional parser:: 160 | peek () 161 | { 162 | if (!peeked_) 163 | { 164 | if (parsed_) 165 | { 166 | cache_parsed_data (); 167 | cache_parsed_location (); 168 | } 169 | peeked_ = next_impl (); 170 | } 171 | return translate (*peeked_); 172 | } 173 | 174 | static inline const char* 175 | event_name (event e) 176 | { 177 | switch (e) 178 | { 179 | case event::begin_object: return "beginning of object"; 180 | case event::end_object: return "end of object"; 181 | case event::begin_array: return "beginning of array"; 182 | case event::end_array: return "end of array"; 183 | case event::name: return "member name"; 184 | case event::string: return "string value"; 185 | case event::number: return "numeric value"; 186 | case event::boolean: return "boolean value"; 187 | case event::null: return "null value"; 188 | } 189 | 190 | return ""; 191 | } 192 | 193 | bool parser:: 194 | next_expect (event p, optional s) 195 | { 196 | optional e (next ()); 197 | bool r; 198 | if (e && ((r = *e == p) || (s && *e == *s))) 199 | return r; 200 | 201 | string d ("expected "); 202 | d += event_name (p); 203 | 204 | if (s) 205 | { 206 | d += " or "; 207 | d += event_name (*s); 208 | } 209 | 210 | if (e) 211 | { 212 | d += " instead of "; 213 | d += event_name (*e); 214 | } 215 | 216 | throw invalid_json_input (input_name != nullptr ? input_name : "", 217 | line (), 218 | column (), 219 | position (), 220 | move (d)); 221 | } 222 | 223 | void parser:: 224 | next_expect_name (const char* n, bool su) 225 | { 226 | for (;;) 227 | { 228 | next_expect (event::name); 229 | 230 | if (name () == n) 231 | return; 232 | 233 | if (!su) 234 | break; 235 | 236 | next_expect_value_skip (); 237 | } 238 | 239 | string d ("expected object member name '"); 240 | d += n; 241 | d += "' instead of '"; 242 | d += name (); 243 | d += '\''; 244 | 245 | throw invalid_json_input (input_name != nullptr ? input_name : "", 246 | line (), 247 | column (), 248 | position (), 249 | move (d)); 250 | } 251 | 252 | void parser:: 253 | next_expect_value_skip () 254 | { 255 | optional e (next ()); 256 | 257 | if (e) 258 | { 259 | switch (*e) 260 | { 261 | case event::begin_object: 262 | case event::begin_array: 263 | { 264 | // Skip until matching end_object/array keeping track of nesting. 265 | // We are going to rely on the fact that we should either get such 266 | // an event or next() should throw. 267 | // 268 | event be (*e); 269 | event ee (be == event::begin_object 270 | ? event::end_object 271 | : event::end_array); 272 | 273 | for (size_t n (0);; ) 274 | { 275 | event e (*next ()); 276 | 277 | if (e == ee) 278 | { 279 | if (n == 0) 280 | break; 281 | 282 | --n; 283 | } 284 | else if (e == be) 285 | ++n; 286 | } 287 | 288 | return; 289 | } 290 | case event::string: 291 | case event::number: 292 | case event::boolean: 293 | case event::null: 294 | return; 295 | case event::name: 296 | case event::end_object: 297 | case event::end_array: 298 | break; 299 | } 300 | } 301 | 302 | string d ("expected value"); 303 | 304 | if (e) 305 | { 306 | d += " instead of "; 307 | d += event_name (*e); 308 | } 309 | 310 | throw invalid_json_input (input_name != nullptr ? input_name : "", 311 | line (), 312 | column (), 313 | position (), 314 | move (d)); 315 | } 316 | 317 | std::uint64_t parser:: 318 | line () const noexcept 319 | { 320 | if (!location_p_) 321 | { 322 | if (!parsed_) 323 | return 0; 324 | 325 | assert (!peeked_); 326 | 327 | return static_cast ( 328 | json_get_lineno (const_cast (impl_))); 329 | } 330 | 331 | return line_; 332 | } 333 | 334 | std::uint64_t parser:: 335 | column () const noexcept 336 | { 337 | if (!location_p_) 338 | { 339 | if (!parsed_) 340 | return 0; 341 | 342 | assert (!peeked_); 343 | 344 | return static_cast ( 345 | json_get_column (const_cast (impl_))); 346 | } 347 | 348 | return column_; 349 | } 350 | 351 | std::uint64_t parser:: 352 | position () const noexcept 353 | { 354 | if (!location_p_) 355 | { 356 | if (!parsed_) 357 | return 0; 358 | 359 | assert (!peeked_); 360 | 361 | return static_cast ( 362 | json_get_position (const_cast (impl_))); 363 | } 364 | 365 | return position_; 366 | } 367 | 368 | json_type parser:: 369 | next_impl () 370 | { 371 | raw_s_ = nullptr; 372 | raw_n_ = 0; 373 | json_type e; 374 | 375 | // Read characters between values skipping required separators and JSON 376 | // whitespaces. Return whether a required separator was encountered as 377 | // well as the first non-separator/whitespace character (which, if EOF, 378 | // should trigger a check for input/output errors). 379 | // 380 | // Note that the returned non-separator will not have been extracted 381 | // from the input (so position, column, etc. will still refer to its 382 | // predecessor). 383 | // 384 | auto skip_separators = [this] () -> pair 385 | { 386 | bool r (separators_ == nullptr); 387 | 388 | int c; 389 | for (; (c = json_source_peek (impl_)) != EOF; json_source_get (impl_)) 390 | { 391 | // User separator. 392 | // 393 | if (separators_ != nullptr && *separators_ != '\0') 394 | { 395 | if (strchr (separators_, c) != nullptr) 396 | { 397 | r = true; 398 | continue; 399 | } 400 | } 401 | 402 | // JSON separator. 403 | // 404 | if (json_isspace (c)) 405 | { 406 | if (separators_ != nullptr && *separators_ == '\0') 407 | r = true; 408 | 409 | continue; 410 | } 411 | 412 | break; 413 | } 414 | 415 | return make_pair (r, c); 416 | }; 417 | 418 | // In the multi-value mode skip any instances of required separators 419 | // (and any other JSON whitespace) preceding the first JSON value. 420 | // 421 | if (multi_value_ && !parsed_ && !peeked_) 422 | { 423 | if (skip_separators ().second == EOF && stream_.is != nullptr) 424 | { 425 | if (stream_.exception) goto fail_rethrow; 426 | if (stream_.is->fail ()) goto fail_stream; 427 | } 428 | } 429 | 430 | e = json_next (impl_); 431 | 432 | // First check for a pending input/output error. 433 | // 434 | if (stream_.is != nullptr) 435 | { 436 | if (stream_.exception) goto fail_rethrow; 437 | if (stream_.is->fail ()) goto fail_stream; 438 | } 439 | 440 | // There are two ways to view separation between two values: as following 441 | // the first value or as preceding the second value. And one aspect that 442 | // is determined by this is whether a separation violation is a problem 443 | // with the first value or with the second, which becomes important if 444 | // the user bails out before parsing the second value. 445 | // 446 | // Consider these two unseparated value (yes, in JSON they are two 447 | // values, leading zeros are not allowed in JSON numbers): 448 | // 449 | // 01 450 | // 451 | // If the user bails out after parsing 0 in a stream that should have 452 | // been newline-delimited, they most likely would want to get an error 453 | // since this is most definitely an invalid value rather than two 454 | // values that are not properly separated. So in this light we handle 455 | // separators at the end of the first value. 456 | // 457 | switch (e) 458 | { 459 | case JSON_DONE: 460 | { 461 | // Deal with the following value separators. 462 | // 463 | // Note that we must not do this for the second JSON_DONE (or the 464 | // first one in case there are no values) that signals the end of 465 | // input. 466 | // 467 | if (multi_value_ && 468 | (parsed_ || peeked_) && 469 | (peeked_ ? *peeked_ : *parsed_) != JSON_DONE) 470 | { 471 | auto p (skip_separators ()); 472 | 473 | if (p.second == EOF && stream_.is != nullptr) 474 | { 475 | if (stream_.exception) goto fail_rethrow; 476 | if (stream_.is->fail ()) goto fail_stream; 477 | } 478 | 479 | // Note that we don't require separators after the last value. 480 | // 481 | if (!p.first && p.second != EOF) 482 | { 483 | json_source_get (impl_); // Consume to update column number. 484 | goto fail_separation; 485 | } 486 | 487 | json_reset (impl_); 488 | } 489 | break; 490 | } 491 | case JSON_ERROR: goto fail_json; 492 | case JSON_STRING: 493 | case JSON_NUMBER: 494 | raw_s_ = json_get_string (impl_, &raw_n_); 495 | raw_n_--; // Includes terminating `\0`. 496 | break; 497 | case JSON_TRUE: raw_s_ = "true"; raw_n_ = 4; break; 498 | case JSON_FALSE: raw_s_ = "false"; raw_n_ = 5; break; 499 | case JSON_NULL: raw_s_ = "null"; raw_n_ = 4; break; 500 | default: break; 501 | } 502 | 503 | return e; 504 | 505 | fail_json: 506 | throw invalid_json_input ( 507 | input_name != nullptr ? input_name : "", 508 | static_cast (json_get_lineno (impl_)), 509 | static_cast (json_get_column (impl_)), 510 | static_cast (json_get_position (impl_)), 511 | json_get_error (impl_)); 512 | 513 | fail_separation: 514 | throw invalid_json_input ( 515 | input_name != nullptr ? input_name : "", 516 | static_cast (json_get_lineno (impl_)), 517 | static_cast (json_get_column (impl_)), 518 | static_cast (json_get_position (impl_)), 519 | "missing separator between JSON values"); 520 | 521 | fail_stream: 522 | throw invalid_json_input ( 523 | input_name != nullptr ? input_name : "", 524 | static_cast (json_get_lineno (impl_)), 525 | static_cast (json_get_column (impl_)), 526 | static_cast (json_get_position (impl_)), 527 | "unable to read JSON input text"); 528 | 529 | fail_rethrow: 530 | #ifndef LIBSTUD_JSON_NO_EXCEPTION_PTR 531 | rethrow_exception (move (*stream_.exception)); 532 | #else 533 | throw istream::failure ("unable to read"); 534 | #endif 535 | } 536 | 537 | optional parser:: 538 | translate (json_type e) const noexcept 539 | { 540 | switch (e) 541 | { 542 | case JSON_DONE: return nullopt; 543 | case JSON_OBJECT: return event::begin_object; 544 | case JSON_OBJECT_END: return event::end_object; 545 | case JSON_ARRAY: return event::begin_array; 546 | case JSON_ARRAY_END: return event::end_array; 547 | case JSON_STRING: 548 | { 549 | // This can be a value or, inside an object, a name from the 550 | // name/value pair. 551 | // 552 | size_t n; 553 | return json_get_context (const_cast (impl_), &n) == 554 | JSON_OBJECT && 555 | n % 2 == 1 556 | ? event::name 557 | : event::string; 558 | } 559 | case JSON_NUMBER: return event::number; 560 | case JSON_TRUE: return event::boolean; 561 | case JSON_FALSE: return event::boolean; 562 | case JSON_NULL: return event::null; 563 | case JSON_ERROR: assert (false); // Should've been handled by caller. 564 | } 565 | 566 | return nullopt; // Should never reach. 567 | } 568 | 569 | void parser:: 570 | cache_parsed_data () 571 | { 572 | name_p_ = value_p_ = false; 573 | if (const optional e = translate (*parsed_)) 574 | { 575 | if (e == event::name) 576 | { 577 | name_.assign (raw_s_, raw_n_); 578 | name_p_ = true; 579 | } 580 | else if (value_event (e)) 581 | { 582 | value_.assign (raw_s_, raw_n_); 583 | value_p_ = true; 584 | } 585 | } 586 | } 587 | 588 | void parser:: 589 | cache_parsed_location () noexcept 590 | { 591 | line_ = static_cast (json_get_lineno (impl_)); 592 | column_ = static_cast (json_get_column (impl_)); 593 | position_ = static_cast (json_get_position (impl_)); 594 | location_p_ = true; 595 | } 596 | 597 | bool parser:: 598 | value_event (optional e) noexcept 599 | { 600 | if (!e) 601 | return false; 602 | 603 | switch (*e) 604 | { 605 | case event::string: 606 | case event::number: 607 | case event::boolean: 608 | case event::null: 609 | return true; 610 | default: 611 | return false; 612 | } 613 | } 614 | 615 | [[noreturn]] void parser:: 616 | throw_invalid_value (const char* type, const char* v, size_t n) const 617 | { 618 | string d (string ("invalid ") + type + " value: '"); 619 | d.append (v, n); 620 | d += '\''; 621 | 622 | throw invalid_json_input (input_name != nullptr ? input_name : "", 623 | line (), 624 | column (), 625 | position (), 626 | move (d)); 627 | } 628 | } // namespace json 629 | } // namespace stud 630 | 631 | // Include the implementation into our translation unit (instead of compiling 632 | // it separately) to (hopefully) get function inlining without LTO. 633 | // 634 | // Let's keep it last since the implementation defines a couple of macros. 635 | // 636 | #if defined(__clang__) || defined(__GNUC__) 637 | # pragma GCC diagnostic ignored "-Wunused-function" 638 | #endif 639 | 640 | extern "C" 641 | { 642 | #define PDJSON_STACK_INC 16 643 | #define PDJSON_STACK_MAX 2048 644 | #include "pdjson.c" 645 | } 646 | -------------------------------------------------------------------------------- /libstud/json/parser.hxx: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include // size_t 6 | #include // uint64_t 7 | #include // pair 8 | #include // exception_ptr 9 | #include // invalid_argument 10 | 11 | #include // stud::optional is std::optional or similar. 12 | 13 | #include 14 | 15 | #include // Implementation details. 16 | 17 | #include 18 | 19 | namespace stud 20 | { 21 | // Using the RFC8259 terminology: JSON (input) text, JSON value, object 22 | // member. 23 | // 24 | namespace json 25 | { 26 | class invalid_json_input: public std::invalid_argument 27 | { 28 | public: 29 | std::string name; 30 | std::uint64_t line; 31 | std::uint64_t column; 32 | std::uint64_t position; 33 | 34 | invalid_json_input (std::string name, 35 | std::uint64_t line, 36 | std::uint64_t column, 37 | std::uint64_t position, 38 | const std::string& description); 39 | 40 | invalid_json_input (std::string name, 41 | std::uint64_t line, 42 | std::uint64_t column, 43 | std::uint64_t position, 44 | const char* description); 45 | }; 46 | 47 | class LIBSTUD_JSON_SYMEXPORT parser 48 | { 49 | public: 50 | const char* input_name; 51 | 52 | // Construction. 53 | // 54 | 55 | // Parse JSON input text from std::istream. 56 | // 57 | // The name argument is used to identify the input being parsed. Note 58 | // that the stream, name, and separators are kept as references so they 59 | // must outlive the parser instance. 60 | // 61 | // If stream exceptions are enabled then the std::ios_base::failure 62 | // exception is used to report input/output errors (badbit and failbit). 63 | // Otherwise, those are reported as the invalid_json_input exception. 64 | // 65 | // If multi_value is true, enable the multi-value mode in which case the 66 | // input stream may contain multiple JSON values (more precisely, zero 67 | // or more). If false (the default), parsing will fail unless there is 68 | // exactly one JSON value in the input stream. 69 | // 70 | // If multi_value is true, the separators argument specifies the 71 | // required separator characters between JSON values. At least one of 72 | // them must be present between every pair of JSON values (in addition 73 | // to any number of JSON whitespaces). No separators are required after 74 | // the last JSON value (but any found will be skipped). 75 | // 76 | // Specifically, if it is NULL, then no separation is required (that is, 77 | // both `{...}{...}` and `{...} {...}` would be valid). If it is empty, 78 | // then at least one JSON whitespace is required. And if it is non- 79 | // empty, then at least one of its characters must be present (for 80 | // example, "\n\t" would require at least one newline or TAB character 81 | // between JSON values). 82 | // 83 | // Note that a separator need not be valid JSON whitespace: any 84 | // character is acceptable (though it probably shouldn't be an object, 85 | // array, or string delimiter and should not occur within a non-self- 86 | // delimited top-level value, such as `true`, `false`, `null`, or a 87 | // number). All instances of required separators before and after a 88 | // value are skipped. Therefore JSON Text Sequences (RFC 7464; AKA 89 | // Record Separator-delimited JSON), which requires the RS (0x1E) 90 | // character before each value, can be handled as well. 91 | // 92 | parser (std::istream&, 93 | const std::string& name, 94 | bool multi_value = false, 95 | const char* separators = nullptr) noexcept; 96 | 97 | parser (std::istream&, 98 | const char* name, 99 | bool multi_value = false, 100 | const char* separators = nullptr) noexcept; 101 | 102 | parser (std::istream&, 103 | std::string&&, 104 | bool = false, 105 | const char* = nullptr) = delete; 106 | 107 | // Parse a memory buffer that contains the entire JSON input text. 108 | // 109 | // The name argument is used to identify the input being parsed. Note 110 | // that the buffer, name, and separators are kept as references so they 111 | // must outlive the parser instance. 112 | // 113 | parser (const void* text, 114 | std::size_t size, 115 | const std::string& name, 116 | bool multi_value = false, 117 | const char* separators = nullptr) noexcept; 118 | 119 | parser (const void* text, 120 | std::size_t size, 121 | const char* name, 122 | bool multi_value = false, 123 | const char* separators = nullptr) noexcept; 124 | 125 | parser (const void*, 126 | std::size_t, 127 | std::string&&, 128 | bool = false, 129 | const char* = nullptr) = delete; 130 | 131 | // Similar to the above but parse a string. 132 | // 133 | parser (const std::string& text, 134 | const std::string& name, 135 | bool multi_value = false, 136 | const char* separators = nullptr) noexcept; 137 | 138 | parser (const std::string& text, 139 | const char* name, 140 | bool multi_value = false, 141 | const char* separators = nullptr) noexcept; 142 | 143 | parser (const std::string&, 144 | std::string&&, 145 | bool = false, 146 | const char* = nullptr) = delete; 147 | 148 | // Similar to the above but parse a C-string. 149 | // 150 | parser (const char* text, 151 | const std::string& name, 152 | bool multi_value = false, 153 | const char* separators = nullptr) noexcept; 154 | 155 | parser (const char* text, 156 | const char* name, 157 | bool multi_value = false, 158 | const char* separators = nullptr) noexcept; 159 | 160 | parser (const char*, 161 | std::string&&, 162 | bool = false, 163 | const char* = nullptr) = delete; 164 | 165 | parser (parser&&) = delete; 166 | parser (const parser&) = delete; 167 | 168 | parser& operator= (parser&&) = delete; 169 | parser& operator= (const parser&) = delete; 170 | 171 | // Event iteration. 172 | // 173 | 174 | // Return the next event or nullopt if end of input is reached. 175 | // 176 | // In the single-value parsing mode (default) the parsing code could 177 | // look like this: 178 | // 179 | // while (optional e = p.next ()) 180 | // { 181 | // switch (*e) 182 | // { 183 | // // ... 184 | // } 185 | // } 186 | // 187 | // In the multi-value mode the parser additionally returns nullopt after 188 | // every JSON value parsed (so there will be two nullopt's after the 189 | // last JSON value, the second indicating the end of input). 190 | // 191 | // One way to perform multi-value parsing is with the help of the peek() 192 | // function (see below): 193 | // 194 | // while (p.peek ()) 195 | // { 196 | // while (optional e = p.next ()) 197 | // { 198 | // switch (*e) 199 | // { 200 | // //... 201 | // } 202 | // } 203 | // } 204 | // 205 | // Note that while the single-value mode will always parse exactly one 206 | // value, the multi-value mode will accept zero values in which case a 207 | // single nullopt is returned. 208 | // 209 | optional 210 | next (); 211 | 212 | // The range-based for loop support. 213 | // 214 | // In the single-value parsing mode (default) the parsing code could 215 | // look like this: 216 | // 217 | // for (event e: p) 218 | // { 219 | // switch (e) 220 | // { 221 | // //... 222 | // } 223 | // } 224 | // 225 | // And in the multi-value mode (see next() for more information) like 226 | // this: 227 | // 228 | // while (p.peek ()) 229 | // { 230 | // for (event e: p) 231 | // { 232 | // switch (e) 233 | // { 234 | // //... 235 | // } 236 | // } 237 | // } 238 | // 239 | // Note that generally, the iterator interface doesn't make much sense 240 | // for the parser so for now we have an implementation that is just 241 | // enough for the range-based for. 242 | // 243 | struct iterator; 244 | 245 | iterator begin () {return iterator (this, next ());} 246 | iterator end () {return iterator (nullptr, nullopt);} 247 | 248 | // Return the next event without considering it parsed. In other words, 249 | // after this call, any subsequent calls to peek() and the next call to 250 | // next() (if any) will all return the same event. 251 | // 252 | // Note that the name, value, and line corresponding to the peeked event 253 | // are not accessible with name(), value() and line(); these functions 254 | // will still return values corresponding to the most recent call to 255 | // next(). The peeked values, however, can be accessed in the raw form 256 | // using data(). 257 | // 258 | optional 259 | peek (); 260 | 261 | 262 | // Event data access. 263 | // 264 | 265 | // Return the object member name. 266 | // 267 | const std::string& 268 | name (); 269 | 270 | // Any value (string, number, boolean, and null) can be retrieved as a 271 | // string. Calling this function after any non-value events is illegal. 272 | // 273 | // Note that the value is returned as a non-const string reference and 274 | // you are allowed to move the value out of it. However, this should not 275 | // be done unnecessarily or in cases where the small string optimization 276 | // is likely since the string's buffer is reused to store subsequent 277 | // values. 278 | // 279 | std::string& 280 | value (); 281 | 282 | // Convert the value to an integer, floating point, or bool. Throw 283 | // invalid_json_input if the conversion is impossible without a loss. 284 | // 285 | template 286 | T 287 | value () const; 288 | 289 | // Return the value or object member name in the raw form. 290 | // 291 | // Calling this function on non-value/name events is legal in which case 292 | // NULL is returned. Note also that the returned data corresponds to the 293 | // most recent event, whether peeked or parsed. 294 | // 295 | std::pair 296 | data () const {return std::make_pair (raw_s_, raw_n_);} 297 | 298 | 299 | // Higher-level API suitable for parsing specific JSON vocabularies. 300 | // 301 | // The API summary: 302 | // 303 | // void next_expect (event); 304 | // bool next_expect (event primary, event secondary); 305 | // 306 | // void next_expect_name (string name, bool skip_unknown = false); 307 | // 308 | // std::string& next_expect_string (); 309 | // T next_expect_string (); 310 | // std::string& next_expect_number (); 311 | // T next_expect_number (); 312 | // std::string& next_expect_boolean (); 313 | // T next_expect_boolean(); 314 | // 315 | // std::string* next_expect_string_null (); 316 | // optional next_expect_string_null (); 317 | // std::string* next_expect_number_null (); 318 | // optional next_expect_number_null (); 319 | // std::string* next_expect_boolean_null (); 320 | // optional next_expect_boolean_null(); 321 | // 322 | // std::string& next_expect_member_string (string name, bool = false); 323 | // T next_expect_member_string (string name, bool = false); 324 | // std::string& next_expect_member_number (string name, bool = false); 325 | // T next_expect_member_number (string name, bool = false); 326 | // std::string& next_expect_member_boolean (string name, bool = false); 327 | // T next_expect_member_boolean(string name, bool = false); 328 | // 329 | // std::string* next_expect_member_string_null (string, bool = false); 330 | // optional next_expect_member_string_null (string, bool = false); 331 | // std::string* next_expect_member_number_null (string, bool = false); 332 | // optional next_expect_member_number_null (string, bool = false); 333 | // std::string* next_expect_member_boolean_null (string, bool = false); 334 | // optional next_expect_member_boolean_null(string, bool = false); 335 | // 336 | // void next_expect_member_object (string name, bool = false); 337 | // bool next_expect_member_object_null(string name, bool = false); 338 | // 339 | // void next_expect_member_array (string name, bool = false); 340 | // bool next_expect_member_array_null(string name, bool = false); 341 | // 342 | // void next_expect_value_skip(); 343 | 344 | // Get the next event and make sure that it's what's expected: primary 345 | // or, if specified, secondary event. If it is not either, then throw 346 | // invalid_json_input with appropriate description. Return true if it is 347 | // primary. 348 | // 349 | // The secondary expected event is primarily useful for handling 350 | // optional members. For example: 351 | // 352 | // while (p.next_expect (event::name, event::end_object)) 353 | // { 354 | // // Handle object member. 355 | // } 356 | // 357 | // Or homogeneous arrays: 358 | // 359 | // while (p.next_expect (event::string, event::end_array)) 360 | // { 361 | // // Handle array element. 362 | // } 363 | // 364 | // Or values that can be null: 365 | // 366 | // if (p.next_expect (event::begin_object, event::null)) 367 | // { 368 | // // Parse object. 369 | // } 370 | // 371 | bool 372 | next_expect (event primary, optional secondary = nullopt); 373 | 374 | // Get the next event and make sure it is event::name and the object 375 | // member matches the specified name. If either is not, then throw 376 | // invalid_json_input with appropriate description. If skip_unknown is 377 | // true, then skip over unknown member names until a match is found. 378 | // 379 | void 380 | next_expect_name (const char* name, bool skip_unknown = false); 381 | 382 | void 383 | next_expect_name (const std::string&, bool = false); 384 | 385 | // Get the next event and make sure it is event:: returning its 386 | // value similar to the value() functions. If it is not, then throw 387 | // invalid_json_input with appropriate description. 388 | // 389 | std::string& 390 | next_expect_string (); 391 | 392 | template 393 | T 394 | next_expect_string (); 395 | 396 | std::string& 397 | next_expect_number (); 398 | 399 | template 400 | T 401 | next_expect_number (); 402 | 403 | std::string& 404 | next_expect_boolean (); 405 | 406 | template 407 | T 408 | next_expect_boolean (); 409 | 410 | // Similar to next_expect_() but in addition to event:: also 411 | // allow event::null, in which case returning no value. 412 | // 413 | std::string* 414 | next_expect_string_null (); 415 | 416 | template 417 | optional 418 | next_expect_string_null (); 419 | 420 | std::string* 421 | next_expect_number_null (); 422 | 423 | template 424 | optional 425 | next_expect_number_null (); 426 | 427 | std::string* 428 | next_expect_boolean_null (); 429 | 430 | template 431 | optional 432 | next_expect_boolean_null (); 433 | 434 | // Call next_expect_name() followed by next_expect_[_null]() 435 | // returning its result. In other words, parse the entire object member 436 | // with the specifed name and of type , returning its value. 437 | 438 | // next_expect_member_string() 439 | // 440 | std::string& 441 | next_expect_member_string (const char* name, bool skip_unknown = false); 442 | 443 | std::string& 444 | next_expect_member_string (const std::string&, bool = false); 445 | 446 | template 447 | T 448 | next_expect_member_string (const char*, bool = false); 449 | 450 | template 451 | T 452 | next_expect_member_string (const std::string&, bool = false); 453 | 454 | // next_expect_member_number() 455 | // 456 | std::string& 457 | next_expect_member_number (const char* name, bool skip_unknown = false); 458 | 459 | std::string& 460 | next_expect_member_number (const std::string&, bool = false); 461 | 462 | template 463 | T 464 | next_expect_member_number (const char*, bool = false); 465 | 466 | template 467 | T 468 | next_expect_member_number (const std::string&, bool = false); 469 | 470 | // next_expect_member_boolean() 471 | // 472 | std::string& 473 | next_expect_member_boolean (const char* name, bool skip_unknown = false); 474 | 475 | std::string& 476 | next_expect_member_boolean (const std::string&, bool = false); 477 | 478 | template 479 | T 480 | next_expect_member_boolean (const char*, bool = false); 481 | 482 | template 483 | T 484 | next_expect_member_boolean (const std::string&, bool = false); 485 | 486 | // next_expect_member_string_null() 487 | // 488 | std::string* 489 | next_expect_member_string_null (const char*, bool = false); 490 | 491 | std::string* 492 | next_expect_member_string_null (const std::string&, bool = false); 493 | 494 | template 495 | optional 496 | next_expect_member_string_null (const char*, bool = false); 497 | 498 | template 499 | optional 500 | next_expect_member_string_null (const std::string&, bool = false); 501 | 502 | // next_expect_member_number_null() 503 | // 504 | std::string* 505 | next_expect_member_number_null (const char*, bool = false); 506 | 507 | std::string* 508 | next_expect_member_number_null (const std::string&, bool = false); 509 | 510 | template 511 | optional 512 | next_expect_member_number_null (const char*, bool = false); 513 | 514 | template 515 | optional 516 | next_expect_member_number_null (const std::string&, bool = false); 517 | 518 | // next_expect_member_boolean_null() 519 | // 520 | std::string* 521 | next_expect_member_boolean_null (const char*, bool = false); 522 | 523 | std::string* 524 | next_expect_member_boolean_null (const std::string&, bool = false); 525 | 526 | template 527 | optional 528 | next_expect_member_boolean_null (const char*, bool = false); 529 | 530 | template 531 | optional 532 | next_expect_member_boolean_null (const std::string&, bool = false); 533 | 534 | // Call next_expect_name() followed by next_expect(event::begin_object). 535 | // In the _null version also allow event::null, in which case return 536 | // false. 537 | // 538 | void 539 | next_expect_member_object (const char* name, bool skip_unknown = false); 540 | 541 | void 542 | next_expect_member_object (const std::string&, bool = false); 543 | 544 | bool 545 | next_expect_member_object_null (const char*, bool = false); 546 | 547 | bool 548 | next_expect_member_object_null (const std::string&, bool = false); 549 | 550 | // Call next_expect_name() followed by next_expect(event::begin_array). 551 | // In the _null version also allow event::null, in which case return 552 | // false. 553 | // 554 | void 555 | next_expect_member_array (const char* name, bool skip_unknown = false); 556 | 557 | void 558 | next_expect_member_array (const std::string&, bool = false); 559 | 560 | bool 561 | next_expect_member_array_null (const char*, bool = false); 562 | 563 | bool 564 | next_expect_member_array_null (const std::string&, bool = false); 565 | 566 | // Get the next event and make sure it is the beginning of a value 567 | // (begin_object, begin_array, string, number, boolean, null). If it is 568 | // not, then throw invalid_json_input with appropriate description. 569 | // Otherwise, skip until the end of the value, recursively in case of 570 | // object and array. 571 | // 572 | // This function is primarily useful for skipping unknown object 573 | // members, for example: 574 | // 575 | // while (p.next_expect (event::name, event::end_object)) 576 | // { 577 | // if (p.name () == "known") 578 | // { 579 | // // Handle known member. 580 | // } 581 | // else 582 | // p.next_expect_value_skip (); 583 | // } 584 | // 585 | void 586 | next_expect_value_skip (); 587 | 588 | // Parsing location. 589 | // 590 | 591 | // Return the line number (1-based) corresponding to the most recently 592 | // parsed event or 0 if nothing has been parsed yet. 593 | // 594 | std::uint64_t 595 | line () const noexcept; 596 | 597 | // Return the column number (1-based) corresponding to the beginning of 598 | // the most recently parsed event or 0 if nothing has been parsed yet. 599 | // 600 | std::uint64_t 601 | column () const noexcept; 602 | 603 | // Return the position (byte offset) pointing immediately after the most 604 | // recently parsed event or 0 if nothing has been parsed yet. 605 | // 606 | std::uint64_t 607 | position () const noexcept; 608 | 609 | // Implementation details. 610 | // 611 | public: 612 | struct iterator 613 | { 614 | using value_type = event; 615 | 616 | explicit 617 | iterator (parser* p = nullptr, optional e = nullopt) 618 | : p_ (p), e_ (e) {} 619 | 620 | event operator* () const {return *e_;} 621 | iterator& operator++ () {e_ = p_->next (); return *this;} 622 | 623 | // Comparison only makes sense when comparing to end (eof). 624 | // 625 | bool operator== (iterator y) const {return !e_ && !y.e_;} 626 | bool operator!= (iterator y) const {return !(*this == y);} 627 | 628 | private: 629 | parser* p_; 630 | optional e_; 631 | }; 632 | 633 | struct stream 634 | { 635 | std::istream* is; 636 | optional exception; 637 | }; 638 | 639 | [[noreturn]] void 640 | throw_invalid_value (const char* type, const char*, std::size_t) const; 641 | 642 | ~parser (); 643 | 644 | private: 645 | // Functionality shared by next() and peek(). 646 | // 647 | json_type 648 | next_impl (); 649 | 650 | // Translate the event produced by the most recent call to next_impl(). 651 | // 652 | // Note that the underlying parser state determines whether name or 653 | // value is returned when translating JSON_STRING. 654 | // 655 | optional 656 | translate (json_type) const noexcept; 657 | 658 | // Cache state (name/value) produced by the most recent call to 659 | // next_impl(). 660 | // 661 | void 662 | cache_parsed_data (); 663 | 664 | // Cache the location numbers as determined by the most recent call to 665 | // next_impl(). 666 | // 667 | void 668 | cache_parsed_location () noexcept; 669 | 670 | // Return true if this is a value event (string, number, boolean, or 671 | // null). 672 | // 673 | static bool 674 | value_event (optional) noexcept; 675 | 676 | stream stream_; 677 | 678 | bool multi_value_; 679 | const char* separators_; 680 | 681 | // The *_p_ members indicate whether the value is present (cached). 682 | // Note: not using optional not to reallocate the string's buffer. 683 | // 684 | std::string name_; bool name_p_ = false; 685 | std::string value_; bool value_p_ = false; 686 | std::uint64_t line_, column_, position_; bool location_p_ = false; 687 | 688 | optional parsed_; // Current parsed event if any. 689 | optional peeked_; // Current peeked event if any. 690 | 691 | ::json_stream impl_[1]; 692 | 693 | // Cached raw value. 694 | // 695 | const char* raw_s_; 696 | std::size_t raw_n_; 697 | }; 698 | } 699 | } 700 | 701 | #include 702 | -------------------------------------------------------------------------------- /libstud/json/parser.ixx: -------------------------------------------------------------------------------- 1 | #include 2 | #include // numeric_limits 3 | #include // move() 4 | #include 5 | #include // strto*() 6 | #include // enable_if, is_* 7 | #include // strlen() 8 | 9 | namespace stud 10 | { 11 | namespace json 12 | { 13 | inline invalid_json_input:: 14 | invalid_json_input (std::string n, 15 | std::uint64_t l, 16 | std::uint64_t c, 17 | std::uint64_t p, 18 | const std::string& d) 19 | : invalid_json_input (move (n), l, c, p, d.c_str ()) 20 | { 21 | } 22 | 23 | inline invalid_json_input:: 24 | invalid_json_input (std::string n, 25 | std::uint64_t l, 26 | std::uint64_t c, 27 | std::uint64_t p, 28 | const char* d) 29 | : invalid_argument (d), 30 | name (std::move (n)), 31 | line (l), column (c), position (p) 32 | { 33 | } 34 | 35 | inline parser:: 36 | parser (std::istream& is, 37 | const std::string& n, 38 | bool mv, 39 | const char* sep) noexcept 40 | : parser (is, n.c_str (), mv, sep) 41 | { 42 | } 43 | 44 | inline parser:: 45 | parser (const void* t, 46 | std::size_t s, 47 | const std::string& n, 48 | bool mv, 49 | const char* sep) noexcept 50 | : parser (t, s, n.c_str (), mv, sep) 51 | { 52 | } 53 | 54 | inline parser:: 55 | parser (const std::string& t, 56 | const std::string& n, 57 | bool mv, 58 | const char* sep) noexcept 59 | : parser (t.data (), t.size (), n.c_str (), mv, sep) 60 | { 61 | } 62 | 63 | inline parser:: 64 | parser (const std::string& t, 65 | const char* n, 66 | bool mv, 67 | const char* sep) noexcept 68 | : parser (t.data (), t.size (), n, mv, sep) 69 | { 70 | } 71 | 72 | inline parser:: 73 | parser (const char* t, 74 | const std::string& n, 75 | bool mv, 76 | const char* sep) noexcept 77 | : parser (t, std::strlen (t), n.c_str (), mv, sep) 78 | { 79 | } 80 | 81 | inline parser:: 82 | parser (const char* t, 83 | const char* n, 84 | bool mv, 85 | const char* sep) noexcept 86 | : parser (t, std::strlen (t), n, mv, sep) 87 | { 88 | } 89 | 90 | inline const std::string& parser:: 91 | name () 92 | { 93 | if (!name_p_) 94 | { 95 | assert (parsed_ && !peeked_ && !value_p_); 96 | cache_parsed_data (); 97 | assert (name_p_); 98 | } 99 | return name_; 100 | } 101 | 102 | inline std::string& parser:: 103 | value () 104 | { 105 | if (!value_p_) 106 | { 107 | assert (parsed_ && !peeked_ && !name_p_); 108 | cache_parsed_data (); 109 | assert (value_p_); 110 | } 111 | return value_; 112 | } 113 | 114 | // Note: one day we will be able to use C++17 from_chars() which was made 115 | // exactly for this. 116 | // 117 | template 118 | inline typename std::enable_if::value, T>::type 119 | parse_value (const char* b, size_t, const parser&) 120 | { 121 | return *b == 't'; 122 | } 123 | 124 | template 125 | inline typename std::enable_if< 126 | std::is_integral::value && 127 | std::is_signed::value && 128 | !std::is_same::value, T>::type 129 | parse_value (const char* b, size_t n, const parser& p) 130 | { 131 | char* e (nullptr); 132 | errno = 0; // We must clear it according to POSIX. 133 | std::int64_t v (strtoll (b, &e, 10)); // Can't throw. 134 | 135 | if (e == b || e != b + n || errno == ERANGE || 136 | v < std::numeric_limits::min () || 137 | v > std::numeric_limits::max ()) 138 | p.throw_invalid_value ("signed integer", b, n); 139 | 140 | return static_cast (v); 141 | } 142 | 143 | template 144 | inline typename std::enable_if< 145 | std::is_integral::value && 146 | std::is_unsigned::value && 147 | !std::is_same::value, T>::type 148 | parse_value (const char* b, size_t n, const parser& p) 149 | { 150 | char* e (nullptr); 151 | errno = 0; // We must clear it according to POSIX. 152 | std::uint64_t v (strtoull (b, &e, 10)); // Can't throw. 153 | 154 | if (e == b || e != b + n || errno == ERANGE || 155 | v > std::numeric_limits::max ()) 156 | p.throw_invalid_value ("unsigned integer", b, n); 157 | 158 | return static_cast (v); 159 | } 160 | 161 | template 162 | inline typename std::enable_if::value, T>::type 163 | parse_value (const char* b, size_t n, const parser& p) 164 | { 165 | char* e (nullptr); 166 | errno = 0; // We must clear it according to POSIX. 167 | T r (std::strtof (b, &e)); 168 | 169 | if (e == b || e != b + n || errno == ERANGE) 170 | p.throw_invalid_value ("float", b, n); 171 | 172 | return r; 173 | } 174 | 175 | template 176 | inline typename std::enable_if::value, T>::type 177 | parse_value (const char* b, size_t n, const parser& p) 178 | { 179 | char* e (nullptr); 180 | errno = 0; // We must clear it according to POSIX. 181 | T r (std::strtod (b, &e)); 182 | 183 | if (e == b || e != b + n || errno == ERANGE) 184 | p.throw_invalid_value ("double", b, n); 185 | 186 | return r; 187 | } 188 | 189 | template 190 | inline typename std::enable_if::value, T>::type 191 | parse_value (const char* b, size_t n, const parser& p) 192 | { 193 | char* e (nullptr); 194 | errno = 0; // We must clear it according to POSIX. 195 | T r (std::strtold (b, &e)); 196 | 197 | if (e == b || e != b + n || errno == ERANGE) 198 | p.throw_invalid_value ("long double", b, n); 199 | 200 | return r; 201 | } 202 | 203 | template 204 | inline T parser:: 205 | value () const 206 | { 207 | if (!value_p_) 208 | { 209 | assert (parsed_ && !peeked_ && value_event (translate (*parsed_))); 210 | return parse_value (raw_s_, raw_n_, *this); 211 | } 212 | 213 | return parse_value (value_.data (), value_.size (), *this); 214 | } 215 | 216 | inline void parser:: 217 | next_expect_name (const std::string& n, bool su) 218 | { 219 | next_expect_name (n.c_str (), su); 220 | } 221 | 222 | // next_expect_() 223 | // 224 | inline std::string& parser:: 225 | next_expect_string () 226 | { 227 | next_expect (event::string); 228 | return value (); 229 | } 230 | 231 | template 232 | inline T parser:: 233 | next_expect_string () 234 | { 235 | next_expect (event::string); 236 | return value (); 237 | } 238 | 239 | inline std::string& parser:: 240 | next_expect_number () 241 | { 242 | next_expect (event::number); 243 | return value (); 244 | } 245 | 246 | template 247 | inline T parser:: 248 | next_expect_number () 249 | { 250 | next_expect (event::number); 251 | return value (); 252 | } 253 | 254 | inline std::string& parser:: 255 | next_expect_boolean () 256 | { 257 | next_expect (event::boolean); 258 | return value (); 259 | } 260 | 261 | template 262 | inline T parser:: 263 | next_expect_boolean () 264 | { 265 | next_expect (event::boolean); 266 | return value (); 267 | } 268 | 269 | // next_expect__null() 270 | // 271 | inline std::string* parser:: 272 | next_expect_string_null () 273 | { 274 | return next_expect (event::string, event::null) ? &value () : nullptr; 275 | } 276 | 277 | template 278 | inline optional parser:: 279 | next_expect_string_null () 280 | { 281 | return next_expect (event::string, event::null) 282 | ? optional (value ()) 283 | : nullopt; 284 | } 285 | 286 | inline std::string* parser:: 287 | next_expect_number_null () 288 | { 289 | return next_expect (event::number, event::null) ? &value () : nullptr; 290 | } 291 | 292 | template 293 | inline optional parser:: 294 | next_expect_number_null () 295 | { 296 | return next_expect (event::number, event::null) 297 | ? optional (value ()) 298 | : nullopt; 299 | } 300 | 301 | inline std::string* parser:: 302 | next_expect_boolean_null () 303 | { 304 | return next_expect (event::boolean, event::null) ? &value () : nullptr; 305 | } 306 | 307 | template 308 | inline optional parser:: 309 | next_expect_boolean_null () 310 | { 311 | return next_expect (event::boolean, event::null) 312 | ? optional (value ()) 313 | : nullopt; 314 | } 315 | 316 | // next_expect_member_string() 317 | // 318 | inline std::string& parser:: 319 | next_expect_member_string (const char* n, bool su) 320 | { 321 | next_expect_name (n, su); 322 | return next_expect_string (); 323 | } 324 | 325 | inline std::string& parser:: 326 | next_expect_member_string (const std::string& n, bool su) 327 | { 328 | return next_expect_member_string (n.c_str (), su); 329 | } 330 | 331 | template 332 | inline T parser:: 333 | next_expect_member_string (const char* n, bool su) 334 | { 335 | next_expect_name (n, su); 336 | return next_expect_string (); 337 | } 338 | 339 | template 340 | inline T parser:: 341 | next_expect_member_string (const std::string& n, bool su) 342 | { 343 | return next_expect_member_string (n.c_str (), su); 344 | } 345 | 346 | // next_expect_member_number() 347 | // 348 | inline std::string& parser:: 349 | next_expect_member_number (const char* n, bool su) 350 | { 351 | next_expect_name (n, su); 352 | return next_expect_number (); 353 | } 354 | 355 | inline std::string& parser:: 356 | next_expect_member_number (const std::string& n, bool su) 357 | { 358 | return next_expect_member_number (n.c_str (), su); 359 | } 360 | 361 | template 362 | inline T parser:: 363 | next_expect_member_number (const char* n, bool su) 364 | { 365 | next_expect_name (n, su); 366 | return next_expect_number (); 367 | } 368 | 369 | template 370 | inline T parser:: 371 | next_expect_member_number (const std::string& n, bool su) 372 | { 373 | return next_expect_member_number (n.c_str (), su); 374 | } 375 | 376 | // next_expect_member_boolean() 377 | // 378 | inline std::string& parser:: 379 | next_expect_member_boolean (const char* n, bool su) 380 | { 381 | next_expect_name (n, su); 382 | return next_expect_boolean (); 383 | } 384 | 385 | inline std::string& parser:: 386 | next_expect_member_boolean (const std::string& n, bool su) 387 | { 388 | return next_expect_member_boolean (n.c_str (), su); 389 | } 390 | 391 | template 392 | inline T parser:: 393 | next_expect_member_boolean (const char* n, bool su) 394 | { 395 | next_expect_name (n, su); 396 | return next_expect_boolean (); 397 | } 398 | 399 | template 400 | inline T parser:: 401 | next_expect_member_boolean (const std::string& n, bool su) 402 | { 403 | return next_expect_member_boolean (n.c_str (), su); 404 | } 405 | 406 | // next_expect_member_string_null() 407 | // 408 | inline std::string* parser:: 409 | next_expect_member_string_null (const char* n, bool su) 410 | { 411 | next_expect_name (n, su); 412 | return next_expect_string_null (); 413 | } 414 | 415 | inline std::string* parser:: 416 | next_expect_member_string_null (const std::string& n, bool su) 417 | { 418 | return next_expect_member_string_null (n.c_str (), su); 419 | } 420 | 421 | template 422 | inline optional parser:: 423 | next_expect_member_string_null (const char* n, bool su) 424 | { 425 | next_expect_name (n, su); 426 | return next_expect_string_null (); 427 | } 428 | 429 | template 430 | inline optional parser:: 431 | next_expect_member_string_null (const std::string& n, bool su) 432 | { 433 | return next_expect_member_string_null (n.c_str (), su); 434 | } 435 | 436 | // next_expect_member_number_null() 437 | // 438 | inline std::string* parser:: 439 | next_expect_member_number_null (const char* n, bool su) 440 | { 441 | next_expect_name (n, su); 442 | return next_expect_number_null (); 443 | } 444 | 445 | inline std::string* parser:: 446 | next_expect_member_number_null (const std::string& n, bool su) 447 | { 448 | return next_expect_member_number_null (n.c_str (), su); 449 | } 450 | 451 | template 452 | inline optional parser:: 453 | next_expect_member_number_null (const char* n, bool su) 454 | { 455 | next_expect_name (n, su); 456 | return next_expect_number_null (); 457 | } 458 | 459 | template 460 | inline optional parser:: 461 | next_expect_member_number_null (const std::string& n, bool su) 462 | { 463 | return next_expect_member_number_null (n.c_str (), su); 464 | } 465 | 466 | // next_expect_member_boolean_null() 467 | // 468 | inline std::string* parser:: 469 | next_expect_member_boolean_null (const char* n, bool su) 470 | { 471 | next_expect_name (n, su); 472 | return next_expect_boolean_null (); 473 | } 474 | 475 | inline std::string* parser:: 476 | next_expect_member_boolean_null (const std::string& n, bool su) 477 | { 478 | return next_expect_member_boolean_null (n.c_str (), su); 479 | } 480 | 481 | template 482 | inline optional parser:: 483 | next_expect_member_boolean_null (const char* n, bool su) 484 | { 485 | next_expect_name (n, su); 486 | return next_expect_boolean_null (); 487 | } 488 | 489 | template 490 | inline optional parser:: 491 | next_expect_member_boolean_null (const std::string& n, bool su) 492 | { 493 | return next_expect_member_boolean_null (n.c_str (), su); 494 | } 495 | 496 | // next_expect_member_object[_null]() 497 | // 498 | inline void parser:: 499 | next_expect_member_object (const char* n, bool su) 500 | { 501 | next_expect_name (n, su); 502 | next_expect (event::begin_object); 503 | } 504 | 505 | inline void parser:: 506 | next_expect_member_object (const std::string& n, bool su) 507 | { 508 | next_expect_member_object (n.c_str (), su); 509 | } 510 | 511 | inline bool parser:: 512 | next_expect_member_object_null (const char* n, bool su) 513 | { 514 | next_expect_name (n, su); 515 | return next_expect (event::begin_object, event::null); 516 | } 517 | 518 | inline bool parser:: 519 | next_expect_member_object_null (const std::string& n, bool su) 520 | { 521 | return next_expect_member_object_null (n.c_str (), su); 522 | } 523 | 524 | // next_expect_member_array[_null]() 525 | // 526 | inline void parser:: 527 | next_expect_member_array (const char* n, bool su) 528 | { 529 | next_expect_name (n, su); 530 | next_expect (event::begin_array); 531 | } 532 | 533 | inline void parser:: 534 | next_expect_member_array (const std::string& n, bool su) 535 | { 536 | next_expect_member_array (n.c_str (), su); 537 | } 538 | 539 | inline bool parser:: 540 | next_expect_member_array_null (const char* n, bool su) 541 | { 542 | next_expect_name (n, su); 543 | return next_expect (event::begin_array, event::null); 544 | } 545 | 546 | inline bool parser:: 547 | next_expect_member_array_null (const std::string& n, bool su) 548 | { 549 | return next_expect_member_array_null (n.c_str (), su); 550 | } 551 | } 552 | } 553 | -------------------------------------------------------------------------------- /libstud/json/pdjson.c: -------------------------------------------------------------------------------- 1 | ../../pdjson/pdjson.c -------------------------------------------------------------------------------- /libstud/json/pdjson.h: -------------------------------------------------------------------------------- 1 | ../../pdjson/pdjson.h -------------------------------------------------------------------------------- /libstud/json/serializer.cxx: -------------------------------------------------------------------------------- 1 | #include // snprintf 2 | #include // va_list 3 | #include // memcpy, strlen 4 | #include 5 | 6 | #include 7 | 8 | using namespace std; 9 | 10 | namespace stud 11 | { 12 | namespace json 13 | { 14 | using buffer = buffer_serializer::buffer; 15 | using error_code = invalid_json_output::error_code; 16 | 17 | template 18 | static void 19 | dynarray_overflow (void* d, event, buffer& b, size_t ex) 20 | { 21 | T& v (*static_cast (d)); 22 | v.resize (b.capacity + ex); 23 | v.resize (v.capacity ()); 24 | // const_cast is required for std::string pre C++17. 25 | // 26 | b.data = const_cast (v.data ()); 27 | b.capacity = v.size (); 28 | } 29 | 30 | template 31 | static void 32 | dynarray_flush (void* d, event, buffer& b) 33 | { 34 | T& v (*static_cast (d)); 35 | v.resize (b.size); 36 | b.data = const_cast (v.data ()); 37 | b.capacity = b.size; 38 | } 39 | 40 | buffer_serializer:: 41 | buffer_serializer (string& s, size_t i, const char* mvs) 42 | : buffer_serializer (const_cast (s.data ()), size_, s.size (), 43 | dynarray_overflow, 44 | dynarray_flush, 45 | &s, 46 | i, mvs) 47 | { 48 | size_ = s.size (); 49 | } 50 | 51 | buffer_serializer:: 52 | buffer_serializer (vector& v, size_t i, const char* mvs) 53 | : buffer_serializer (v.data (), size_, v.size (), 54 | dynarray_overflow>, 55 | dynarray_flush>, 56 | &v, 57 | i, mvs) 58 | { 59 | size_ = v.size (); 60 | } 61 | 62 | static void 63 | ostream_overflow (void* d, event e, buffer& b, size_t) 64 | { 65 | ostream& s (*static_cast (d)); 66 | s.write (static_cast (b.data), b.size); 67 | if (s.fail ()) 68 | throw invalid_json_output ( 69 | e, error_code::buffer_overflow, "unable to write JSON output text"); 70 | b.size = 0; 71 | } 72 | 73 | static void 74 | ostream_flush (void* d, event e, buffer& b) 75 | { 76 | ostream_overflow (d, e, b, 0); 77 | 78 | ostream& s (*static_cast (d)); 79 | s.flush (); 80 | if (s.fail ()) 81 | throw invalid_json_output ( 82 | e, error_code::buffer_overflow, "unable to write JSON output text"); 83 | } 84 | 85 | stream_serializer:: 86 | stream_serializer (ostream& os, size_t i, const char* mvs) 87 | : buffer_serializer (tmp_, sizeof (tmp_), 88 | ostream_overflow, 89 | ostream_flush, 90 | &os, 91 | i, mvs) 92 | { 93 | } 94 | 95 | bool buffer_serializer:: 96 | next (optional e, pair val, bool check) 97 | { 98 | if (absent_ == 2) 99 | goto fail_complete; 100 | 101 | if (e == nullopt) 102 | { 103 | if (!state_.empty ()) 104 | goto fail_incomplete; 105 | 106 | absent_++; 107 | return false; 108 | } 109 | 110 | absent_ = 0; // Clear inter-value absent event. 111 | 112 | { 113 | state* st (state_.empty () ? nullptr : &state_.back ()); 114 | 115 | auto name_expected = [] (const state& s) 116 | { 117 | return s.type == event::begin_object && s.count % 2 == 0; 118 | }; 119 | 120 | auto make_str = [] (const char* s, size_t n) 121 | { 122 | return make_pair (s, n); 123 | }; 124 | 125 | // When it comes to pretty-printing, the common way to do it is along 126 | // these lines: 127 | // 128 | // { 129 | // "str": "value", 130 | // "obj": { 131 | // "arr": [ 132 | // 1, 133 | // 2, 134 | // 3 135 | // ] 136 | // }, 137 | // "num": 123 138 | // } 139 | // 140 | // Empty objects and arrays are printed without a newline: 141 | // 142 | // { 143 | // "obj": {}, 144 | // "arr": [] 145 | // } 146 | // 147 | // There are two types of separators: between name and value, which is 148 | // always ": ", and before/after value inside an object or array which 149 | // is either newline followed by indentation, or comma followed by 150 | // newline followed by indentation (we also have separation between 151 | // top-level values but that's orthogonal to pretty-printing). 152 | // 153 | // Based on this observation, we are going to handle the latter case by 154 | // starting with the ",\n" string (in this->sep_) and pushing/popping 155 | // indentation spaces as we enter/leave objects and arrays. We handle 156 | // the cases where we don't need the comma by simply skipping it in the 157 | // C-string pointer. 158 | // 159 | bool pp (indent_ != 0); 160 | 161 | pair sep; 162 | if (st != nullptr) 163 | { 164 | // The name-value separator. 165 | // 166 | if (st->type == event::begin_object && st->count % 2 == 1) 167 | { 168 | sep = !pp ? make_str (":", 1) : make_str (": ", 2); 169 | } 170 | // We don't need the comma if we are closing the object or array. 171 | // 172 | else if (e == event::end_array || e == event::end_object) 173 | { 174 | // But in this case we need to unindent one level prior to writing 175 | // the brace. Also handle the empty object/array as a special case. 176 | // 177 | sep = !pp || st->count == 0 178 | ? make_str (nullptr, 0) 179 | : make_str (sep_.c_str () + 1, sep_.size () - 1 - indent_); 180 | } 181 | // Or if this is the first value (note: must come after end_*). 182 | // 183 | else if (st->count == 0) 184 | { 185 | sep = !pp 186 | ? make_str (nullptr, 0) 187 | : make_str (sep_.c_str () + 1, sep_.size () - 1); 188 | } 189 | else 190 | { 191 | sep = !pp 192 | ? make_str (",", 1) 193 | : make_str (sep_.c_str (), sep_.size ()); 194 | } 195 | } 196 | else if (values_ != 0) // Subsequent top-level value. 197 | { 198 | // Top-level value separation. 199 | // 200 | sep = make_str ( 201 | mv_separator_, 202 | (mv_separator_ == nullptr || mv_separator_[0] == '\0' ? 0 : 203 | mv_separator_[1] == '\0' ? 1 : 204 | strlen (mv_separator_))); 205 | } 206 | 207 | switch (*e) 208 | { 209 | case event::begin_array: 210 | case event::begin_object: 211 | { 212 | if (st != nullptr && name_expected (*st)) 213 | goto fail_unexpected_event; 214 | 215 | write (*e, 216 | sep, 217 | make_str (e == event::begin_array ? "[" : "{", 1), 218 | false); 219 | 220 | if (st != nullptr) 221 | st->count++; 222 | 223 | if (pp) 224 | sep_.append (indent_, ' '); 225 | 226 | state_.push_back (state {*e, 0}); 227 | break; 228 | } 229 | case event::end_array: 230 | case event::end_object: 231 | { 232 | if (st == nullptr || (e == event::end_array 233 | ? st->type != event::begin_array 234 | : !name_expected (*st))) 235 | goto fail_unexpected_event; 236 | 237 | write (*e, 238 | sep, 239 | make_str (e == event::end_array ? "]" : "}", 1), 240 | false); 241 | 242 | if (pp) 243 | sep_.erase (sep_.size () - indent_); 244 | 245 | state_.pop_back (); 246 | break; 247 | } 248 | case event::name: 249 | case event::string: 250 | { 251 | if (e == event::name 252 | ? (st == nullptr || !name_expected (*st)) 253 | : (st != nullptr && name_expected (*st))) 254 | goto fail_unexpected_event; 255 | 256 | write (*e, sep, val, check, '"'); 257 | 258 | if (st != nullptr) 259 | st->count++; 260 | break; 261 | } 262 | case event::null: 263 | case event::boolean: 264 | { 265 | if (e == event::null && val.first == nullptr) 266 | val = {"null", 4}; 267 | else if (check) 268 | { 269 | auto eq = [&val] (const char* v, size_t n) 270 | { 271 | return val.second == n && memcmp (val.first, v, n) == 0; 272 | }; 273 | 274 | if (e == event::null) 275 | { 276 | if (!eq ("null", 4)) 277 | goto fail_null; 278 | } 279 | else 280 | { 281 | if (!eq ("true", 4) && !eq ("false", 5)) 282 | goto fail_bool; 283 | } 284 | } 285 | } 286 | // Fall through. 287 | case event::number: 288 | { 289 | // Note: this event is also used by value_json_text(). 290 | 291 | if (st != nullptr && name_expected (*st)) 292 | goto fail_unexpected_event; 293 | 294 | write (*e, sep, val, check); 295 | 296 | if (st != nullptr) 297 | st->count++; 298 | break; 299 | } 300 | } 301 | } 302 | 303 | if (state_.empty ()) 304 | { 305 | values_++; 306 | if (flush_ != nullptr) 307 | flush_ (data_, *e, buf_); 308 | 309 | return false; 310 | } 311 | 312 | return true; 313 | 314 | fail_complete: 315 | throw invalid_json_output ( 316 | e, error_code::invalid_value, "value sequence is complete"); 317 | fail_incomplete: 318 | throw invalid_json_output ( 319 | e, error_code::invalid_value, "value is incomplete"); 320 | fail_null: 321 | throw invalid_json_output ( 322 | e, error_code::invalid_value, "invalid null value"); 323 | fail_bool: 324 | throw invalid_json_output ( 325 | e, error_code::invalid_value, "invalid boolean value"); 326 | fail_unexpected_event: 327 | throw invalid_json_output ( 328 | e, error_code::unexpected_event, "unexpected event"); 329 | } 330 | 331 | // JSON escape sequences for control characters <= 0x1F. 332 | // 333 | static const char* json_escapes[] = 334 | {"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", 335 | "\\u0006", "\\u0007", "\\b", "\\t", "\\n", "\\u000B", 336 | "\\f", "\\r", "\\u000E", "\\u000F", "\\u0010", "\\u0011", 337 | "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017", 338 | "\\u0018", "\\u0019", "\\u001A", "\\u001B", "\\u001C", "\\u001D", 339 | "\\u001E", "\\u001F"}; 340 | 341 | void buffer_serializer:: 342 | write (event e, 343 | pair sep, 344 | pair val, 345 | bool check, 346 | char q) 347 | { 348 | // Assumptions: 349 | // 350 | // 1. A call to overflow should be able to provide enough capacity to 351 | // write the entire separator (in other words, we are not going to 352 | // bother with chunking the separator). 353 | // 354 | // 2. Similarly, a call to overflow should be able to provide enough 355 | // capacity to write an entire UTF-8 multi-byte sequence. 356 | // 357 | // 3. Performance-wise, we do not expect very long contiguous sequences 358 | // of character that require escaping. 359 | 360 | // Total number of bytes remaining to be written and the capacity 361 | // currently available. 362 | // 363 | size_t size (sep.second + val.second + (q != '\0' ? 2 : 0)); 364 | size_t cap (buf_.capacity - buf_.size); 365 | 366 | auto grow = [this, e, &size, &cap] (size_t min, size_t extra = 0) 367 | { 368 | if (overflow_ == nullptr) 369 | return false; 370 | 371 | extra += size; 372 | extra -= cap; 373 | overflow_ (data_, e, buf_, extra > min ? extra : min); 374 | cap = buf_.capacity - buf_.size; 375 | 376 | return cap >= min; 377 | }; 378 | 379 | auto append = [this, &cap, &size] (const char* d, size_t s) 380 | { 381 | memcpy (static_cast (buf_.data) + buf_.size, d, s); 382 | buf_.size += s; 383 | cap -= s; 384 | size -= s; 385 | }; 386 | 387 | // Return the longest chunk of input that fits into the buffer and does 388 | // not end in the middle of a multi-byte UTF-8 sequence. Assume value 389 | // size and capacity are not 0. Return NULL in first if no chunk could 390 | // be found that fits into the remaining space. In this case, second is 391 | // the additional (to size) required space (used to handle escapes in 392 | // the checked version). 393 | // 394 | // The basic idea is to seek in the input buffer to the capacity of the 395 | // output buffer (unless the input is shorter than the output). If we 396 | // ended up in the middle of a multi-byte UTF-8 sequence, then seek back 397 | // until we end up at the UTF-8 sequence boundary. Note that this 398 | // implementation assumes valid UTF-8. 399 | // 400 | auto chunk = [&cap, &val] () -> pair 401 | { 402 | pair r (nullptr, 0); 403 | 404 | if (cap >= val.second) 405 | r = val; 406 | else 407 | { 408 | // Start from the character past capacity and search for a UTF-8 409 | // sequence boundary. 410 | // 411 | for (const char* p (val.first + cap); p != val.first; --p) 412 | { 413 | const auto u (static_cast (*p)); 414 | if (u < 0x80 || u > 0xBF) // Not a continuation byte 415 | { 416 | r = {val.first, p - val.first}; 417 | break; 418 | } 419 | } 420 | } 421 | 422 | val.first += r.second; 423 | val.second -= r.second; 424 | 425 | return r; 426 | }; 427 | 428 | // Escaping and UTF-8-validating version of chunk(). 429 | // 430 | // There are three classes of mandatory escapes in a JSON string: 431 | // 432 | // - \\ and \" 433 | // 434 | // - \b \f \n \r \t for popular control characters 435 | // 436 | // - \u00NN for other control characters <= 0x1F 437 | // 438 | // If the input begins with a character that must be escaped, return 439 | // only its escape sequence. Otherwise validate and return everything up 440 | // to the end of input or buffer capacity, but cutting it short before 441 | // the next character that must be escaped or the first UTF-8 sequence 442 | // that would not fit. 443 | // 444 | // Return string::npos in second in case of a stray continuation byte or 445 | // any byte in an invalid UTF-8 range (for example, an "overlong" 2-byte 446 | // encoding of a 7-bit/ASCII character or a 4-, 5-, or 6-byte sequence 447 | // that would encode a codepoint beyond the U+10FFFF Unicode limit). 448 | // 449 | auto chunk_checked = [&cap, &size, &val] () -> pair 450 | { 451 | pair r (nullptr, 0); 452 | 453 | // Check whether the first character needs to be escaped. 454 | // 455 | const uint8_t c (val.first[0]); 456 | if (c == '"') 457 | r = {"\\\"", 2}; 458 | else if (c == '\\') 459 | r = {"\\\\", 2}; 460 | else if (c <= 0x1F) 461 | { 462 | auto s (json_escapes[c]); 463 | r = {s, s[1] == 'u' ? 6 : 2}; 464 | } 465 | 466 | if (r.first != nullptr) 467 | { 468 | // Return in second the additional (to size) space required. 469 | // 470 | if (r.second > cap) 471 | return {nullptr, r.second - 1}; 472 | 473 | // If we had to escape the character then adjust size accordingly 474 | // (see append() above). 475 | // 476 | size += r.second - 1; 477 | 478 | val.first += 1; 479 | val.second -= 1; 480 | return r; 481 | } 482 | 483 | // First character doesn't need to be escaped. Return as much of the 484 | // rest of the input as possible. 485 | // 486 | size_t i (0); 487 | for (size_t n (min (cap, val.second)); i != n; i++) 488 | { 489 | const uint8_t c1 (val.first[i]); 490 | 491 | if (c1 == '"' || c1 == '\\' || c1 <= 0x1F) // Needs to be escaped. 492 | break; 493 | else if (c1 >= 0x80) // Not ASCII, so validate as a UTF-8 sequence. 494 | { 495 | size_t i1 (i); // Position of the first byte. 496 | 497 | // The control flow here is to continue if valid and to fall 498 | // through to return on error. 499 | // 500 | if (c1 >= 0xC2 && c1 <= 0xDF) // 2-byte sequence. 501 | { 502 | if (i + 2 <= val.second) // Sequence is complete in JSON value. 503 | { 504 | if (i + 2 > cap) // Sequence won't fit. 505 | break; 506 | 507 | const uint8_t c2 (val.first[++i]); 508 | 509 | if (c2 >= 0x80 && c2 <= 0xBF) 510 | continue; 511 | } 512 | } 513 | else if (c1 >= 0xE0 && c1 <= 0xEF) // 3-byte sequence. 514 | { 515 | if (i + 3 <= val.second) 516 | { 517 | if (i + 3 > cap) 518 | break; 519 | 520 | const uint8_t c2 (val.first[++i]), c3 (val.first[++i]); 521 | 522 | if (c3 >= 0x80 && c3 <= 0xBF) 523 | { 524 | switch (c1) 525 | { 526 | case 0xE0: if (c2 >= 0xA0 && c2 <= 0xBF) continue; break; 527 | case 0xED: if (c2 >= 0x80 && c2 <= 0x9F) continue; break; 528 | default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break; 529 | } 530 | } 531 | } 532 | } 533 | else if (c1 >= 0xF0 && c1 <= 0xF4) // 4-byte sequence. 534 | { 535 | if (i + 4 <= val.second) 536 | { 537 | if (i + 4 > cap) 538 | break; 539 | 540 | const uint8_t c2 (val.first[++i]), 541 | c3 (val.first[++i]), 542 | c4 (val.first[++i]); 543 | 544 | if (c3 >= 0x80 && c3 <= 0xBF && 545 | c4 >= 0x80 && c4 <= 0xBF) 546 | { 547 | switch (c1) 548 | { 549 | case 0xF0: if (c2 >= 0x90 && c2 <= 0xBF) continue; break; 550 | case 0xF4: if (c2 >= 0x80 && c2 <= 0x8F) continue; break; 551 | default: if (c2 >= 0x80 && c2 <= 0xBF) continue; break; 552 | } 553 | } 554 | } 555 | } 556 | 557 | r = {val.first, string::npos}; 558 | 559 | // Update val to point to the beginning of the invalid sequence. 560 | // 561 | val.first += i1; 562 | val.second -= i1; 563 | 564 | return r; 565 | } 566 | } 567 | 568 | if (i != 0) // We have a chunk. 569 | { 570 | r = {val.first, i}; 571 | 572 | val.first += i; 573 | val.second -= i; 574 | } 575 | 576 | return r; 577 | }; 578 | 579 | // Value's original size (used to calculate the offset of the errant 580 | // character in case of a validation failure). 581 | // 582 | const size_t vn (val.second); 583 | 584 | // Write the separator, if any. 585 | // 586 | if (sep.second != 0) 587 | { 588 | if (cap < sep.second && !grow (sep.second)) 589 | goto fail_nospace; 590 | 591 | append (sep.first, sep.second); 592 | } 593 | 594 | // Write the value's opening quote, if requested. 595 | // 596 | if (q != '\0') 597 | { 598 | if (cap == 0 && !grow (1)) 599 | goto fail_nospace; 600 | 601 | append ("\"", 1); 602 | } 603 | 604 | // Write the value, unless empty. 605 | // 606 | while (val.second != 0) 607 | { 608 | pair ch (nullptr, 0); 609 | 610 | if (cap != 0) 611 | ch = check ? chunk_checked () : chunk (); 612 | 613 | if (ch.first == nullptr) 614 | { 615 | // The minimum extra bytes we need the overflow function to be able 616 | // to provide is based on these sequences that we do not break: 617 | // 618 | // - 4 bytes for a UTF-8 sequence 619 | // - 6 bytes for an escaped Unicode sequence (\uXXXX). 620 | // 621 | if (!grow (6, ch.second)) 622 | goto fail_nospace; 623 | } 624 | else if (ch.second != string::npos) 625 | append (ch.first, ch.second); 626 | else 627 | goto fail_utf8; 628 | } 629 | 630 | // Write the value's closing quote, if requested. 631 | // 632 | if (q != '\0') 633 | { 634 | if (cap == 0 && !grow (1)) 635 | goto fail_nospace; 636 | 637 | append ("\"", 1); 638 | } 639 | 640 | return; 641 | 642 | // Note: keep descriptions consistent with the parser. 643 | // 644 | fail_utf8: 645 | throw invalid_json_output (e, 646 | e == event::name ? error_code::invalid_name 647 | : error_code::invalid_value, 648 | "invalid UTF-8 text", 649 | vn - val.second); 650 | 651 | fail_nospace: 652 | throw invalid_json_output ( 653 | e, error_code::buffer_overflow, "insufficient space in buffer"); 654 | } 655 | 656 | size_t buffer_serializer:: 657 | to_chars_impl (char* b, size_t n, const char* f, ...) 658 | { 659 | va_list a; 660 | va_start (a, f); 661 | const int r (vsnprintf (b, n, f, a)); 662 | va_end (a); 663 | 664 | if (r < 0 || r >= static_cast (n)) 665 | { 666 | throw invalid_json_output (event::number, 667 | error_code::invalid_value, 668 | "unable to convert number to string"); 669 | } 670 | 671 | return static_cast (r); 672 | } 673 | } 674 | } 675 | -------------------------------------------------------------------------------- /libstud/json/serializer.hxx: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include // size_t, nullptr_t 8 | #include // pair 9 | #include // invalid_argument 10 | #include // enable_if, is_* 11 | 12 | #include // stud::optional is std::optional or similar. 13 | 14 | #include 15 | 16 | #include 17 | 18 | namespace stud 19 | { 20 | // Using the RFC8259 terminology: JSON (output) text, JSON value, object 21 | // member. 22 | // 23 | namespace json 24 | { 25 | class invalid_json_output: public std::invalid_argument 26 | { 27 | public: 28 | using event_type = json::event; 29 | 30 | enum class error_code 31 | { 32 | buffer_overflow, 33 | unexpected_event, 34 | invalid_name, 35 | invalid_value 36 | }; 37 | 38 | invalid_json_output (optional event, 39 | error_code code, 40 | const char* description, 41 | std::size_t offset = std::string::npos); 42 | 43 | invalid_json_output (optional event, 44 | error_code code, 45 | const std::string& description, 46 | std::size_t offset = std::string::npos); 47 | 48 | // Event that triggered the error. If the error is in the value, then 49 | // offset points to the offending byte (for example, the beginning of an 50 | // invalid UTF-8 byte sequence). Otherwise, offset is string::npos. 51 | // 52 | optional event; 53 | error_code code; 54 | std::size_t offset; 55 | }; 56 | 57 | // The serializer makes sure the resulting JSON is syntactically but not 58 | // necessarily semantically correct. For example, it's possible to 59 | // serialize a number event with non-numeric data. 60 | // 61 | // Note that unlike the parser, the serializer is always in the multi- 62 | // value mode allowing the serialization of zero or more values. Note also 63 | // that while values are by default separated with newlines, there is no 64 | // trailing newline after the last (or only) value and the user is 65 | // expected to add it manually, if needed. 66 | // 67 | // Also note that while RFC8259 recommends object members to have unique 68 | // names, the serializer does not enforce this. 69 | // 70 | class LIBSTUD_JSON_SYMEXPORT buffer_serializer 71 | { 72 | public: 73 | // Serialize to string growing it as necessary. Note that the result is 74 | // appended to any existing data in the string. 75 | // 76 | // The indentation argument specifies the number of indentation spaces 77 | // that should be used for pretty-printing. If 0 is passed, no 78 | // pretty-printing is performed. 79 | // 80 | // The multi_value_separator argument specifies the character sequence 81 | // to use to separate multiple top-level values. NULL or empty string 82 | // means no separator. Note that it is kept as a reference and so must 83 | // outlive the serializer instance. 84 | // 85 | explicit 86 | buffer_serializer (std::string&, 87 | std::size_t indentation = 2, 88 | const char* multi_value_separator = "\n"); 89 | 90 | // Serialize to vector of characters growing it as necessary. Note that 91 | // the result is appended to any existing data in the vector. 92 | // 93 | explicit 94 | buffer_serializer (std::vector&, 95 | std::size_t indentation = 2, 96 | const char* multi_value_separator = "\n"); 97 | 98 | // Serialize to a fixed array. 99 | // 100 | // The length of the output text written is tracked in the size 101 | // argument. 102 | // 103 | // If the array is not big enough to store the entire output text, the 104 | // next() call that reaches the limit will throw invalid_json_output. 105 | // 106 | template 107 | buffer_serializer (std::array&, std::size_t& size, 108 | std::size_t indentation = 2, 109 | const char* multi_value_separator = "\n"); 110 | 111 | // Serialize to a fixed buffer. 112 | // 113 | // The length of the output text written is tracked in the size 114 | // argument. 115 | // 116 | // If the buffer is not big enough to store the entire output text, the 117 | // next() call that reaches the limit will throw invalid_json_output. 118 | // 119 | buffer_serializer (void* buf, std::size_t& size, std::size_t capacity, 120 | std::size_t indentation = 2, 121 | const char* multi_value_separator = "\n"); 122 | 123 | // The overflow function is called when the output buffer is out of 124 | // space. The extra argument is a hint indicating the extra space likely 125 | // to be required. 126 | // 127 | // Possible strategies include re-allocating a larger buffer or flushing 128 | // the contents of the original buffer to the output destination. In 129 | // case of a reallocation, the implementation is responsible for copying 130 | // the contents of the original buffer over. 131 | // 132 | // The flush function is called when the complete JSON value has been 133 | // serialized to the buffer. It can be used to write the contents of the 134 | // buffer to the output destination. Note that flush is not called after 135 | // the second absent (nullopt) event (or the only absent event; see 136 | // next() for details). 137 | // 138 | // Both functions are passed the original buffer, its size (the amount 139 | // of output text), and its capacity. They return (by modifying the 140 | // argument) the replacement buffer and its size and capacity (these may 141 | // refer to the original buffer). If space cannot be made available, the 142 | // implementation can throw an appropriate exception (for example, 143 | // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is 144 | // propagated to the user. 145 | // 146 | struct buffer 147 | { 148 | void* data; 149 | std::size_t& size; 150 | std::size_t capacity; 151 | }; 152 | 153 | using overflow_function = void (void* data, 154 | event, 155 | buffer&, 156 | std::size_t extra); 157 | using flush_function = void (void* data, event, buffer&); 158 | 159 | // Serialize using a custom buffer and overflow/flush functions (both 160 | // are optional). 161 | // 162 | buffer_serializer (void* buf, std::size_t capacity, 163 | overflow_function*, 164 | flush_function*, 165 | void* data, 166 | std::size_t indentation = 2, 167 | const char* multi_value_separator = "\n"); 168 | 169 | // As above but the length of the output text written is tracked in the 170 | // size argument. 171 | // 172 | buffer_serializer (void* buf, std::size_t& size, std::size_t capacity, 173 | overflow_function*, 174 | flush_function*, 175 | void* data, 176 | std::size_t indentation = 2, 177 | const char* multi_value_separator = "\n"); 178 | 179 | // Begin/end an object. 180 | // 181 | // The member_begin_object() version is a shortcut for: 182 | // 183 | // member_name (name, check); 184 | // begin_object (); 185 | // 186 | void 187 | begin_object (); 188 | 189 | void 190 | member_begin_object (const char*, bool check = true); 191 | 192 | void 193 | member_begin_object (const std::string&, bool check = true); 194 | 195 | void 196 | end_object (); 197 | 198 | // Serialize an object member (name and value). 199 | // 200 | // If check is false, then don't check whether the name (or value, if 201 | // it's a string) is valid UTF-8 and don't escape any characters. 202 | // 203 | template 204 | void 205 | member (const char* name, const T& value, bool check = true); 206 | 207 | template 208 | void 209 | member (const std::string& name, const T& value, bool check = true); 210 | 211 | // Serialize an object member name. 212 | // 213 | // If check is false, then don't check whether the name is valid UTF-8 214 | // and don't escape any characters. 215 | // 216 | void 217 | member_name (const char*, bool check = true); 218 | 219 | void 220 | member_name (const std::string&, bool check = true); 221 | 222 | // Begin/end an array. 223 | // 224 | // The member_begin_array() version is a shortcut for: 225 | // 226 | // member_name (name, check); 227 | // begin_array (); 228 | // 229 | void 230 | begin_array (); 231 | 232 | void 233 | member_begin_array (const char*, bool check = true); 234 | 235 | void 236 | member_begin_array (const std::string&, bool check = true); 237 | 238 | void 239 | end_array (); 240 | 241 | // Serialize a string. 242 | // 243 | // If check is false, then don't check whether the value is valid UTF-8 244 | // and don't escape any characters. 245 | // 246 | // Note that a NULL C-string pointer is serialized as a null value. 247 | // 248 | void 249 | value (const char*, bool check = true); 250 | 251 | void 252 | value (const std::string&, bool check = true); 253 | 254 | // Serialize a number. 255 | // 256 | template 257 | typename std::enable_if::value || 258 | std::is_floating_point::value>::type 259 | value (T); 260 | 261 | // Serialize a boolean value. 262 | // 263 | void 264 | value (bool); 265 | 266 | // Serialize a null value. 267 | // 268 | void 269 | value (std::nullptr_t); 270 | 271 | // Serialize value as a pre-serialized JSON value. 272 | // 273 | // Note that the value is expected to be a valid (and suitable) UTF-8- 274 | // encoded JSON text. Note also that if pretty-printing is enabled, 275 | // the resulting output may not be correctly indented. 276 | // 277 | void 278 | value_json_text (const char*); 279 | 280 | void 281 | value_json_text (const std::string&); 282 | 283 | // Serialize next JSON event. 284 | // 285 | // If check is false, then don't check whether the value is valid UTF-8 286 | // and don't escape any characters. 287 | // 288 | // Return true if more events are required to complete the (top-level) 289 | // value (that is, it is currently incomplete) and false otherwise. 290 | // Throw invalid_json_output exception in case of an invalid event or 291 | // value. 292 | // 293 | // At the end of the value an optional absent (nullopt) event can be 294 | // serialized to verify the value is complete. If it is incomplete an 295 | // invalid_json_output exception is thrown. An optional followup absent 296 | // event can be serialized to indicate the completion of a multi-value 297 | // sequence (one and only absent event indicates a zero value sequence). 298 | // If anything is serialized to a complete value sequence an 299 | // invalid_json_output exception is thrown. 300 | // 301 | // Note that this function was designed to be easily invoked with the 302 | // output from parser::next() and parser::data(). For example, for a 303 | // single-value mode: 304 | // 305 | // optional e; 306 | // do 307 | // { 308 | // e = p.next (); 309 | // s.next (e, p.data ()); 310 | // } 311 | // while (e); 312 | // 313 | // For a multi-value mode: 314 | // 315 | // while (p.peek ()) 316 | // { 317 | // optional e; 318 | // do 319 | // { 320 | // e = p.next (); 321 | // s.next (e, p.data ()); 322 | // } 323 | // while (e); 324 | // } 325 | // s.next (nullopt); // End of value sequence. 326 | // 327 | bool 328 | next (optional event, 329 | std::pair value = {}, 330 | bool check = true); 331 | 332 | private: 333 | void 334 | write (event, 335 | std::pair sep, 336 | std::pair val, 337 | bool check, char quote = '\0'); 338 | 339 | // Forward a value(v, check) call to value(v) ignoring the check 340 | // argument. Used in the member() implementation. 341 | // 342 | template 343 | void 344 | value (const T& v, bool /*check*/) 345 | { 346 | value (v); 347 | } 348 | 349 | // Convert numbers to string. 350 | // 351 | static std::size_t to_chars (char*, std::size_t, int); 352 | static std::size_t to_chars (char*, std::size_t, long); 353 | static std::size_t to_chars (char*, std::size_t, long long); 354 | static std::size_t to_chars (char*, std::size_t, unsigned int); 355 | static std::size_t to_chars (char*, std::size_t, unsigned long); 356 | static std::size_t to_chars (char*, std::size_t, unsigned long long); 357 | static std::size_t to_chars (char*, std::size_t, double); 358 | static std::size_t to_chars (char*, std::size_t, long double); 359 | 360 | static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...); 361 | 362 | buffer buf_; 363 | std::size_t size_; 364 | overflow_function* overflow_; 365 | flush_function* flush_; 366 | void* data_; 367 | 368 | // State of a "structured type" (array or object; as per the RFC 369 | // terminology). 370 | // 371 | struct state 372 | { 373 | const event type; // Type kind (begin_array or begin_object). 374 | std::size_t count; // Number of events serialized inside this type. 375 | }; 376 | 377 | // Stack of nested structured type states. 378 | // 379 | // @@ TODO: would have been nice to use small_vector. 380 | // 381 | std::vector state_; 382 | 383 | // The number of consecutive absent events (nullopt) serialized thus 384 | // far. 385 | // 386 | // Note: initialized to 1 to naturally handle a single absent event 387 | // (declares an empty value sequence complete). 388 | // 389 | std::size_t absent_ = 1; 390 | 391 | // The number of spaces with which to indent (once for each level of 392 | // nesting). If zero, pretty-printing is disabled. 393 | // 394 | std::size_t indent_; 395 | 396 | // Separator and indentation before/after value inside an object or 397 | // array (see pretty-printing implementation for details). 398 | // 399 | std::string sep_; 400 | 401 | // The number of complete top-level values serialized thus far. 402 | // 403 | std::size_t values_ = 0; 404 | 405 | // Multi-value separator. 406 | // 407 | const char* mv_separator_; 408 | }; 409 | 410 | class LIBSTUD_JSON_SYMEXPORT stream_serializer: public buffer_serializer 411 | { 412 | public: 413 | // Serialize to std::ostream. 414 | // 415 | // If stream exceptions are enabled then the std::ios_base::failure 416 | // exception is used to report input/output errors (badbit and failbit). 417 | // Otherwise, those are reported as the invalid_json_output exception. 418 | // 419 | explicit 420 | stream_serializer (std::ostream&, 421 | std::size_t indentation = 2, 422 | const char* multi_value_separator = "\n"); 423 | 424 | protected: 425 | char tmp_[4096]; 426 | }; 427 | } 428 | } 429 | 430 | #include 431 | -------------------------------------------------------------------------------- /libstud/json/serializer.ixx: -------------------------------------------------------------------------------- 1 | #include // strlen() 2 | 3 | namespace stud 4 | { 5 | namespace json 6 | { 7 | inline invalid_json_output:: 8 | invalid_json_output (optional e, 9 | error_code c, 10 | const char* d, 11 | std::size_t o) 12 | : std::invalid_argument (d), event (e), code (c), offset (o) 13 | { 14 | } 15 | 16 | inline invalid_json_output:: 17 | invalid_json_output (optional e, 18 | error_code c, 19 | const std::string& d, 20 | std::size_t o) 21 | : invalid_json_output (e, c, d.c_str (), o) 22 | { 23 | } 24 | 25 | inline buffer_serializer:: 26 | buffer_serializer (void* b, std::size_t& s, std::size_t c, 27 | overflow_function* o, flush_function* f, void* d, 28 | std::size_t i, const char* mvs) 29 | : buf_ {b, s, c}, 30 | overflow_ (o), 31 | flush_ (f), 32 | data_ (d), 33 | indent_ (i), 34 | sep_ (indent_ != 0 ? ",\n" : ""), 35 | mv_separator_ (mvs) 36 | { 37 | } 38 | 39 | template 40 | inline buffer_serializer:: 41 | buffer_serializer (std::array& a, std::size_t& s, 42 | std::size_t i, const char* mvs) 43 | : buffer_serializer (a.data (), s, a.size (), 44 | nullptr, nullptr, nullptr, 45 | i, mvs) 46 | { 47 | } 48 | 49 | inline buffer_serializer:: 50 | buffer_serializer (void* b, std::size_t& s, std::size_t c, 51 | std::size_t i, const char* mvs) 52 | : buffer_serializer (b, s, c, nullptr, nullptr, nullptr, i, mvs) 53 | { 54 | } 55 | 56 | inline buffer_serializer:: 57 | buffer_serializer (void* b, std::size_t c, 58 | overflow_function* o, flush_function* f, void* d, 59 | std::size_t i, const char* mvs) 60 | : buffer_serializer (b, size_, c, o, f, d, i, mvs) 61 | { 62 | size_ = 0; 63 | } 64 | 65 | inline void buffer_serializer:: 66 | begin_object () 67 | { 68 | next (event::begin_object); 69 | } 70 | 71 | inline void buffer_serializer:: 72 | end_object () 73 | { 74 | next (event::end_object); 75 | } 76 | 77 | inline void buffer_serializer:: 78 | member_name (const char* n, bool c) 79 | { 80 | next (event::name, {n, n != nullptr ? std::strlen (n) : 0}, c); 81 | } 82 | 83 | inline void buffer_serializer:: 84 | member_name (const std::string& n, bool c) 85 | { 86 | next (event::name, {n.c_str (), n.size ()}, c); 87 | } 88 | 89 | inline void buffer_serializer:: 90 | member_begin_object (const char* n, bool c) 91 | { 92 | member_name (n, c); 93 | begin_object (); 94 | } 95 | 96 | inline void buffer_serializer:: 97 | member_begin_object (const std::string& n, bool c) 98 | { 99 | member_name (n, c); 100 | begin_object (); 101 | } 102 | 103 | template 104 | inline void buffer_serializer:: 105 | member (const char* n, const T& v, bool c) 106 | { 107 | member_name (n, c); 108 | value (v, c); 109 | } 110 | 111 | template 112 | inline void buffer_serializer:: 113 | member (const std::string& n, const T& v, bool c) 114 | { 115 | member_name (n, c); 116 | value (v, c); 117 | } 118 | 119 | inline void buffer_serializer:: 120 | begin_array () 121 | { 122 | next (event::begin_array); 123 | } 124 | 125 | inline void buffer_serializer:: 126 | member_begin_array (const char* n, bool c) 127 | { 128 | member_name (n, c); 129 | begin_array (); 130 | } 131 | 132 | inline void buffer_serializer:: 133 | member_begin_array (const std::string& n, bool c) 134 | { 135 | member_name (n, c); 136 | begin_array (); 137 | } 138 | 139 | inline void buffer_serializer:: 140 | end_array () 141 | { 142 | next (event::end_array); 143 | } 144 | 145 | inline void buffer_serializer:: 146 | value (const char* v, bool c) 147 | { 148 | if (v != nullptr) 149 | next (event::string, {v, std::strlen (v)}, c); 150 | else 151 | next (event::null); 152 | } 153 | 154 | inline void buffer_serializer:: 155 | value (const std::string& v, bool c) 156 | { 157 | next (event::string, {v.c_str (), v.size ()}, c); 158 | } 159 | 160 | template 161 | typename std::enable_if::value || 162 | std::is_floating_point::value>::type 163 | buffer_serializer:: 164 | value (T v) 165 | { 166 | // The largest 128-bit integer has 39 digits, and long floating point 167 | // numbers will fit because they are output in scientific notation. 168 | // 169 | char b[40]; 170 | const std::size_t n (to_chars (b, sizeof (b), v)); 171 | next (event::number, {b, n}); 172 | } 173 | 174 | inline void buffer_serializer:: 175 | value (bool b) 176 | { 177 | next (event::boolean, 178 | b ? std::make_pair ("true", 4) : std::make_pair ("false", 5)); 179 | } 180 | 181 | inline void buffer_serializer:: 182 | value (std::nullptr_t) 183 | { 184 | next (event::null); 185 | } 186 | 187 | inline void buffer_serializer:: 188 | value_json_text (const char* v) 189 | { 190 | // Use event::number (which doesn't involve any quoting) with a disabled 191 | // check. 192 | // 193 | next (event::number, {v, std::strlen (v)}, false /* check */); 194 | } 195 | 196 | inline void buffer_serializer:: 197 | value_json_text (const std::string& v) 198 | { 199 | next (event::number, {v.c_str (), v.size ()}, false /* check */); 200 | } 201 | 202 | inline size_t buffer_serializer:: 203 | to_chars (char* b, size_t s, int v) 204 | { 205 | return to_chars_impl (b, s, "%d", v); 206 | } 207 | 208 | inline size_t buffer_serializer:: 209 | to_chars (char* b, size_t s, long v) 210 | { 211 | return to_chars_impl (b, s, "%ld", v); 212 | } 213 | 214 | inline size_t buffer_serializer:: 215 | to_chars (char* b, size_t s, long long v) 216 | { 217 | return to_chars_impl (b, s, "%lld", v); 218 | } 219 | 220 | inline size_t buffer_serializer:: 221 | to_chars (char* b, size_t s, unsigned v) 222 | { 223 | return to_chars_impl (b, s, "%u", v); 224 | } 225 | 226 | inline size_t buffer_serializer:: 227 | to_chars (char* b, size_t s, unsigned long v) 228 | { 229 | return to_chars_impl (b, s, "%lu", v); 230 | } 231 | 232 | inline size_t buffer_serializer:: 233 | to_chars (char* b, size_t s, unsigned long long v) 234 | { 235 | return to_chars_impl (b, s, "%llu", v); 236 | } 237 | 238 | inline size_t buffer_serializer:: 239 | to_chars (char* b, size_t s, double v) 240 | { 241 | return to_chars_impl (b, s, "%.10g", v); 242 | } 243 | 244 | inline size_t buffer_serializer:: 245 | to_chars (char* b, size_t s, long double v) 246 | { 247 | return to_chars_impl (b, s, "%.10Lg", v); 248 | } 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /libstud/json/version.hxx.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // The numeric version format is AAAAABBBBBCCCCCDDDE where: 4 | // 5 | // AAAAA - major version number 6 | // BBBBB - minor version number 7 | // CCCCC - bugfix version number 8 | // DDD - alpha / beta (DDD + 500) version number 9 | // E - final (0) / snapshot (1) 10 | // 11 | // When DDDE is not 0, 1 is subtracted from AAAAABBBBBCCCCC. For example: 12 | // 13 | // Version AAAAABBBBBCCCCCDDDE 14 | // 15 | // 0.1.0 0000000001000000000 16 | // 0.1.2 0000000001000020000 17 | // 1.2.3 0000100002000030000 18 | // 2.2.0-a.1 0000200001999990010 19 | // 3.0.0-b.2 0000299999999995020 20 | // 2.2.0-a.1.z 0000200001999990011 21 | // 22 | #define LIBSTUD_JSON_VERSION $libstud_json.version.project_number$ULL 23 | #define LIBSTUD_JSON_VERSION_STR "$libstud_json.version.project$" 24 | #define LIBSTUD_JSON_VERSION_ID "$libstud_json.version.project_id$" 25 | 26 | #define LIBSTUD_JSON_VERSION_MAJOR $libstud_json.version.major$ 27 | #define LIBSTUD_JSON_VERSION_MINOR $libstud_json.version.minor$ 28 | #define LIBSTUD_JSON_VERSION_PATCH $libstud_json.version.patch$ 29 | 30 | #define LIBSTUD_JSON_PRE_RELEASE $libstud_json.version.pre_release$ 31 | 32 | #define LIBSTUD_JSON_SNAPSHOT_SN $libstud_json.version.snapshot_sn$ULL 33 | #define LIBSTUD_JSON_SNAPSHOT_ID "$libstud_json.version.snapshot_id$" 34 | -------------------------------------------------------------------------------- /manifest: -------------------------------------------------------------------------------- 1 | : 1 2 | name: libstud-json 3 | project: libstud 4 | version: 0.5.1-a.0.z 5 | summary: JSON pull-parser/push-serializer library for C++ 6 | license: MIT ; MIT License. 7 | topics: C++, JSON, parsing, serialization, streaming 8 | description-file: README.md 9 | changes-file: NEWS 10 | url: https://github.com/libstud/libstud-json 11 | email: libstud-authors@build2.org 12 | build-warning-email: libstud-authors@build2.org 13 | builds: all 14 | depends: * build2 >= 0.18.0- 15 | depends: * bpkg >= 0.18.0- 16 | depends: libstud-optional ^1.0.0 17 | -------------------------------------------------------------------------------- /repositories.manifest: -------------------------------------------------------------------------------- 1 | : 1 2 | summary: libstud-json project repository 3 | 4 | : 5 | role: prerequisite 6 | location: ../libstud-optional.git##HEAD 7 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | # Test executables. 2 | # 3 | driver 4 | 5 | # Testscript output directories (can be symlinks). 6 | # 7 | test 8 | test-* 9 | -------------------------------------------------------------------------------- /tests/build/.gitignore: -------------------------------------------------------------------------------- 1 | config.build 2 | root/ 3 | bootstrap/ 4 | -------------------------------------------------------------------------------- /tests/build/bootstrap.build: -------------------------------------------------------------------------------- 1 | project = # Unnamed tests subproject. 2 | 3 | using version 4 | using config 5 | using test 6 | using dist 7 | -------------------------------------------------------------------------------- /tests/build/root.build: -------------------------------------------------------------------------------- 1 | cxx.std = latest 2 | 3 | using cxx 4 | 5 | hxx{*}: extension = hxx 6 | ixx{*}: extension = ixx 7 | txx{*}: extension = txx 8 | cxx{*}: extension = cxx 9 | 10 | if ($cxx.target.system == 'win32-msvc') 11 | cxx.poptions += -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS 12 | 13 | if ($cxx.class == 'msvc') 14 | cxx.coptions += /wd4251 /wd4275 /wd4800 15 | 16 | # Every exe{} in this subproject is by default a test. 17 | # 18 | exe{*}: test = true 19 | 20 | # The test target for cross-testing (running tests under Wine, etc). 21 | # 22 | test.target = $cxx.target 23 | -------------------------------------------------------------------------------- /tests/buildfile: -------------------------------------------------------------------------------- 1 | ./: {*/ -build/} 2 | -------------------------------------------------------------------------------- /tests/parser/basics/basics.testscript: -------------------------------------------------------------------------------- 1 | : basic 2 | : 3 | $* <>EOO 4 | { 5 | "string": "str", 6 | "number": 123, 7 | "boolean": true, 8 | "null": null, 9 | "array": ["str", 123, false, null], 10 | "subobj": {"foo": "bar"} 11 | } 12 | EOI 13 | 1, 1: { 14 | 2, 3: string 15 | 2, 14: "str" 16 | 3, 3: number 17 | 3, 14: 123 18 | 4, 3: boolean 19 | 4, 14: true 20 | 5, 3: null 21 | 5, 14: NULL 22 | 6, 3: array 23 | 6, 14: [ 24 | 6, 15: "str" 25 | 6, 22: 123 26 | 6, 27: false 27 | 6, 34: NULL 28 | 6, 38: ] 29 | 7, 3: subobj 30 | 7, 14: { 31 | 7, 15: foo 32 | 7, 22: "bar" 33 | 7, 27: } 34 | 8, 1: } 35 | EOO 36 | 37 | : empty 38 | : 39 | $* 2>>EOE != 0 40 | :1:1: error: unexpected end of text 41 | EOE 42 | 43 | : fail-exception 44 | : 45 | $* --fail-exc <'{' >' 1, 1: {' 2>>EOE != 0 46 | error: unable to read from stdin 47 | EOE 48 | 49 | : fail-bit 50 | : 51 | $* --fail-bit <'{' >' 1, 1: {' 2>>EOE != 0 52 | :1:1: error: unable to read JSON input text 53 | EOE 54 | 55 | : value-sint 56 | : 57 | {{ 58 | : max 59 | : 60 | $* i <'2147483647' >' 1, 1: 2147483647' 61 | 62 | : min 63 | : 64 | $* i <'-2147483648' >' 1, 1: -2147483648' 65 | 66 | : fail-min 67 | : 68 | $* i <'-2147483649' 2>>EOE != 0 69 | :1:1: error: invalid signed integer value: '-2147483649' 70 | EOE 71 | 72 | : fail-max 73 | : 74 | $* i <'2147483648' 2>>EOE != 0 75 | :1:1: error: invalid signed integer value: '2147483648' 76 | EOE 77 | }} 78 | 79 | : value-uint 80 | : 81 | {{ 82 | : max 83 | : 84 | $* u <'4294967295' >' 1, 1: 4294967295' 85 | 86 | : fail-max 87 | : 88 | $* u <'4294967296' 2>>EOE != 0 89 | :1:1: error: invalid unsigned integer value: '4294967296' 90 | EOE 91 | }} 92 | 93 | # Testing floating point values is a pain. So we only check the success cases 94 | # trusting the strto*() functions to detect range issues as expected. 95 | # 96 | : value-float 97 | : 98 | $* f <'0.123' >~'/ 1, 1: 0.123[0-9]*/' 99 | 100 | : value-double 101 | : 102 | $* d <'0.1234' >~'/ 1, 1: 0.1234[0-9]*/' 103 | 104 | : value-long-double 105 | : 106 | $* l <'0.12345' >~'/ 1, 1: 0.12345[0-9]*/' 107 | 108 | # Test a bunch of corner cases. 109 | # 110 | : leading-zero 111 | : 112 | $* <'01' >' 1, 1: 0' 2>>EOE != 0 113 | :1:2: error: expected end of text instead of byte '1' 114 | EOE 115 | 116 | : wrong-letter 117 | : 118 | $* <'trux' 2>>EOE != 0 119 | :1:4: error: expected 'e' instead of byte 'x' 120 | EOE 121 | 122 | : error-within-value 123 | : Checks that, in case of invalid multi-character token, the column number 124 | : points to the errant character instead of the beginning of the token. 125 | : 126 | $* <'12a45' >' 1, 1: 12' 2>>EOE != 0 127 | :1:3: error: expected end of text instead of byte 'a' 128 | EOE 129 | 130 | : end-of-text 131 | : 132 | {{ 133 | # Note: suppress trailing newline in input. 134 | 135 | : array 136 | : 137 | $* <:'[2' >- 2>>EOE != 0 138 | :1:2: error: unexpected end of text 139 | EOE 140 | 141 | : literal 142 | : 143 | $* <:'tru' 2>>EOE != 0 144 | :1:3: error: expected 'e' instead of end of text 145 | EOE 146 | }} 147 | 148 | : utf-8 149 | : 150 | {{ 151 | : 2-byte 152 | : 153 | {{ 154 | : literal 155 | : 156 | $* <'"¢"' >' 1, 1: "¢"' 157 | 158 | : escaped 159 | : 160 | $* <'"\u00A2"' >' 1, 1: "¢"' 161 | }} 162 | 163 | : 3-byte 164 | : 165 | {{ 166 | : literal 167 | : 168 | $* <'"ह"' >' 1, 1: "ह"' 169 | 170 | : escaped 171 | : 172 | $* <'"\u0939"' >' 1, 1: "ह"' 173 | }} 174 | 175 | : 2-and-3-byte 176 | : 177 | $* <'"ह\u00A2¢\u0939"' >' 1, 1: "ह¢¢ह"' 178 | 179 | : column-numbers 180 | : Checks that literal/directly-embedded multi-byte codepoints occupy only a 181 | : single column and that escaped codepoints occupy as many columns as there 182 | : are characters in the escape sequence. 183 | : 184 | $* <'["ह","¢","\u20AC"]' >>EOO 185 | 1, 1: [ 186 | 1, 2: "ह" 187 | 1, 6: "¢" 188 | 1, 10: "€" 189 | 1, 18: ] 190 | EOO 191 | 192 | : Invalid byte 193 | : 194 | $* <'"\u0MA2"' 2>>EOE != 0 195 | :1:5: error: invalid escape Unicode byte 'M' 196 | EOE 197 | }} 198 | -------------------------------------------------------------------------------- /tests/parser/basics/buildfile: -------------------------------------------------------------------------------- 1 | import libs = libstud-json%lib{stud-json} 2 | 3 | ./: exe{driver}: {cxx}{driver} $libs 4 | 5 | # Run the tests twice, once as is and another time with a pre-peek of every 6 | # token (as an extra test for the peek logic). If/when we have support for a 7 | # for-loop in Testscript we can handle this cleanly there. For now we use this 8 | # alias trick (or hack, if you wish). 9 | # 10 | exe{driver}: test = false 11 | 12 | ./: alias{default peek}: exe{driver} testscript{*} 13 | { 14 | test = exe{driver} 15 | } 16 | 17 | alias{peek}: test.options += --peek 18 | -------------------------------------------------------------------------------- /tests/parser/basics/driver.cxx: -------------------------------------------------------------------------------- 1 | // Usage: argv[0] [--multi[=]] [--peek] --fail-exc|--fail-bit|[] 2 | // 3 | // --multi= -- enable multi-value mode with the specified separators 4 | // --peek -- pre-peek every token before parsing (must come first) 5 | // --fail-exc -- fail due to istream exception 6 | // --fail-bit -- fail due to istream badbit 7 | // -- numeric value parsing mode: i|u|f|d|l| 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #undef NDEBUG 16 | #include 17 | 18 | using namespace std; 19 | namespace json = stud::json; 20 | 21 | static string 22 | number (const string& m, json::parser& p) 23 | { 24 | if (m == "") return p.value (); 25 | if (m == "i") return to_string (p.value ()); 26 | if (m == "u") return to_string (p.value ()); 27 | if (m == "f") return to_string (p.value ()); 28 | if (m == "d") return to_string (p.value ()); 29 | if (m == "l") return to_string (p.value ()); 30 | 31 | assert (false); 32 | return ""; 33 | } 34 | 35 | int main (int argc, const char* argv[]) 36 | { 37 | bool multi (false); 38 | const char* sep (nullptr); 39 | bool peek (false); 40 | bool fail_exc (false); 41 | bool fail_bit (false); 42 | 43 | string nm; 44 | for (int i (1); i < argc; ++i) 45 | { 46 | string o (argv[i]); 47 | 48 | if (o.compare (0, 7, "--multi") == 0) 49 | { 50 | multi = true; 51 | if (o.size () > 7) 52 | sep = argv[i] + 8; 53 | continue; 54 | } 55 | 56 | if (o == "--peek") 57 | { 58 | peek = true; 59 | continue; 60 | } 61 | 62 | if (o == "--fail-exc") fail_exc = true; 63 | else if (o == "--fail-bit") fail_bit = true; 64 | else nm = move (o); 65 | break; // One of these should be last. 66 | } 67 | 68 | try 69 | { 70 | using namespace json; 71 | 72 | // It's not easy to cause the stream to fail when called by the parser. 73 | // So we will fail on EOF as the next best thing. 74 | // 75 | if (!fail_bit) 76 | cin.exceptions (istream::badbit | 77 | istream::failbit | 78 | (fail_exc ? istream::eofbit : istream::goodbit)); 79 | 80 | parser p (cin, "", multi, sep); 81 | size_t i (0); // Indentation. 82 | 83 | cout << right << setfill (' '); // Line number formatting. 84 | 85 | auto process_event = [&p, &i, nm, fail_bit] (event e) 86 | { 87 | size_t j (i); 88 | string s; 89 | 90 | switch (e) 91 | { 92 | case event::begin_object: s = "{"; i += 2; break; 93 | case event::end_object: s = "}"; j = i -= 2; break; 94 | case event::begin_array: s = "["; i += 2; break; 95 | case event::end_array: s = "]"; j = i -= 2; break; 96 | case event::name: s = p.name (); break; 97 | case event::string: s = '"' + p.value () + '"'; break; 98 | case event::number: s = number (nm, p); break; 99 | case event::boolean: s = p.value () ? "true" : "false"; break; 100 | case event::null: s = "NULL"; break; 101 | } 102 | 103 | cout << setw (3) << p.line () << "," << setw (3) << p.column () << ": " 104 | << string (j, ' ') << s << '\n'; 105 | 106 | if (fail_bit) 107 | cin.setstate (istream::badbit); 108 | }; 109 | 110 | // Use the "canonical" parsing code for both modes. 111 | // 112 | if (!multi) 113 | { 114 | if (peek) 115 | p.peek (); 116 | 117 | for (event e: p) 118 | { 119 | process_event (e); 120 | 121 | if (peek) 122 | p.peek (); 123 | } 124 | } 125 | else 126 | { 127 | while (p.peek ()) 128 | for (event e: p) 129 | { 130 | process_event (e); 131 | 132 | if (peek) 133 | p.peek (); 134 | } 135 | } 136 | 137 | return 0; 138 | } 139 | catch (const json::invalid_json_input& e) 140 | { 141 | cerr << e.name << ':' << e.line << ':' << e.column << ": error: " 142 | << e.what () << endl; 143 | } 144 | catch (const istream::failure&) 145 | { 146 | cerr << "error: unable to read from stdin" << endl; 147 | } 148 | 149 | return 1; 150 | } 151 | -------------------------------------------------------------------------------- /tests/parser/basics/multi-value.testscript: -------------------------------------------------------------------------------- 1 | : empty-input 2 | : 3 | $* --multi=" " <'' 4 | 5 | : single-value 6 | : 7 | $* --multi=" " <'1' >' 1, 1: 1' 8 | 9 | : extra-whitespace-ignored 10 | : Extra JSON whitespace (newlines in this case) is ignored before first value, 11 | : between values, and after last value. 12 | : 13 | $* --multi=" " <>EOO 14 | 15 | 1 16 | 2 17 | 18 | EOI 19 | 2, 2: 1 20 | 3, 2: 2 21 | EOO 22 | 23 | : null-separators 24 | : Zero or more JSON whitespaces should be accepted. 25 | : 26 | {{ 27 | test.options += --multi 28 | 29 | : valid 30 | : One unseparated pair, one separated pair. 31 | : 32 | $* <'"1""2" 3' >>EOO 33 | 1, 1: "1" 34 | 1, 4: "2" 35 | 1, 8: 3 36 | EOO 37 | 38 | : invalid-separator 39 | : Non-whitespace separator. 40 | : 41 | $* <'1@2' >' 1, 1: 1' 2>>EOE != 0 42 | :1:2: error: unexpected byte '@' in value 43 | EOE 44 | }} 45 | 46 | : any-whitespace 47 | : One or more JSON whitespaces should be accepted. 48 | : 49 | {{ 50 | test.options += --multi= 51 | 52 | : valid 53 | : 54 | $* <>EOO 55 | 1 2 56 | 3 57 | EOI 58 | 1, 1: 1 59 | 1, 3: 2 60 | 2, 2: 3 61 | EOO 62 | 63 | : invalid-separator 64 | : 65 | $* <'1@2' >' 1, 1: 1' 2>>EOE != 0 66 | :1:2: error: missing separator between JSON values 67 | EOE 68 | 69 | : unseparated 70 | : 71 | $* <'"1""2"' >' 1, 1: "1"' 2>>EOE != 0 72 | :1:4: error: missing separator between JSON values 73 | EOE 74 | }} 75 | 76 | : specific-separators 77 | : At least one of the specified separators must be present. 78 | : 79 | {{ 80 | test.options += --multi=" " 81 | 82 | : valid 83 | : 84 | $* <'1 2' >>EOO 85 | 1, 1: 1 86 | 1, 3: 2 87 | EOO 88 | 89 | : invalid-separator 90 | : 91 | $* <' 1, 1: 1' 2>>EOE != 0 92 | 1 93 | 2 94 | EOI 95 | :2:1: error: missing separator between JSON values 96 | EOE 97 | 98 | : unseparated 99 | : 100 | $* <'"1""2"' >' 1, 1: "1"' 2>>EOE != 0 101 | :1:4: error: missing separator between JSON values 102 | EOE 103 | }} 104 | 105 | : multiple-separators 106 | : 107 | {{ 108 | # Newline + space. 109 | # 110 | test.options += --multi=" 111 | " 112 | 113 | : valid 114 | : 115 | $* <>EOO 116 | 1 2 117 | 3 118 | 4 119 | 120 | EOI 121 | 1, 1: 1 122 | 1, 3: 2 123 | 2, 1: 3 124 | 3, 2: 4 125 | EOO 126 | 127 | : invalid-separator 128 | : 129 | $* <"1 2" >' 1, 1: 1' 2>>EOE != 0 # Note: it's a TAB. 130 | :1:3: error: missing separator between JSON values 131 | EOE 132 | 133 | : unseparated 134 | : 135 | $* <'"1""2"' >' 1, 1: "1"' 2>>EOE != 0 136 | :1:4: error: missing separator between JSON values 137 | EOE 138 | }} 139 | 140 | : non-whitespace-separator 141 | : 142 | {{ 143 | test.options += --multi="@" 144 | 145 | : valid 146 | : 147 | $* <'1@2@3' >>EOO 148 | 1, 1: 1 149 | 1, 3: 2 150 | 1, 5: 3 151 | EOO 152 | 153 | : invalid-separator 154 | : 155 | $* <'1 2' >' 1, 1: 1' 2>>EOE != 0 156 | :1:3: error: missing separator between JSON values 157 | EOE 158 | 159 | : unseparated 160 | : 161 | $* <'"1""2"' >' 1, 1: "1"' 2>>EOE != 0 162 | :1:4: error: missing separator between JSON values 163 | EOE 164 | }} 165 | 166 | : record-separator 167 | : 168 | {{ 169 | # @@ Currently it's impossible to specify RS characters in testscript so 170 | # using `@` until that has been adressed. 171 | # 172 | test.options += --multi="@" 173 | 174 | : leading-delimited 175 | : RS-delimited: each value preceded by RS and succeeded by NL/LF. 176 | : 177 | $* <>EOO 178 | @1 179 | @2 180 | 181 | EOI 182 | 1, 2: 1 183 | 2, 2: 2 184 | EOO 185 | 186 | : empty 187 | : While not valid per the RFC, we accept this. 188 | : 189 | $* < 2 | 3 | #include 4 | 5 | #undef NDEBUG 6 | #include 7 | 8 | using namespace std; 9 | using namespace stud::json; 10 | 11 | // Parse the data in the specified mode (default or multi-value) returning 12 | // true if the data is valid JSON and false otherwise. 13 | // 14 | static bool 15 | parse (const void* data, size_t size, bool multi, const char* sep) 16 | { 17 | parser p (data, size, "", multi, sep); 18 | 19 | auto handle_event = [&p] (event e) 20 | { 21 | assert (p.line () >= 1 && p.column () >= 1 && p.position () >= 1); 22 | 23 | switch (e) 24 | { 25 | case event::begin_object: 26 | case event::end_object: 27 | case event::begin_array: 28 | case event::end_array: break; 29 | case event::string: p.value (); break; 30 | case event::name: p.name (); break; 31 | case event::null: assert (p.value () == "null"); break; 32 | case event::boolean: 33 | { 34 | p.value (); 35 | assert (p.value () == "true" || p.value () == "false"); 36 | break; 37 | } 38 | case event::number: 39 | { 40 | try 41 | { 42 | p.value (); 43 | } 44 | catch (const invalid_json_input&) 45 | { 46 | try 47 | { 48 | p.value (); 49 | } 50 | catch (const invalid_json_input&) 51 | { 52 | p.value (); 53 | } 54 | } 55 | break; 56 | } 57 | } 58 | }; 59 | 60 | try 61 | { 62 | if (!multi) 63 | for (auto e: p) handle_event (e); 64 | else 65 | while (p.peek ()) 66 | for (auto e: p) handle_event (e); 67 | 68 | return true; 69 | } 70 | catch (const invalid_json_input&) 71 | { 72 | return false; 73 | } 74 | } 75 | 76 | extern "C" int 77 | LLVMFuzzerTestOneInput (const uint8_t* data, size_t size) 78 | { 79 | // If it's valid in default mode, don't waste time parsing it in multi-value 80 | // mode. 81 | // 82 | if (!parse (data, size, false, nullptr)) 83 | { 84 | // Multi-value mode enabled and configured to accept zero or more JSON 85 | // whitespaces between values. The longer the list of accepted separator 86 | // characters, the better the balance with the hundreds of invalid 87 | // possibilities. 88 | // 89 | parse (data, size, true, nullptr); 90 | } 91 | return 0; 92 | } 93 | -------------------------------------------------------------------------------- /tests/parser/peek/buildfile: -------------------------------------------------------------------------------- 1 | import libs = libstud-json%lib{stud-json} 2 | 3 | exe{driver}: {cxx}{driver} $libs 4 | -------------------------------------------------------------------------------- /tests/parser/peek/driver.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #undef NDEBUG 7 | #include 8 | 9 | using namespace std; 10 | using namespace stud::json; 11 | 12 | int 13 | main () 14 | { 15 | using stud::nullopt; 16 | 17 | // Value in initial state. 18 | // 19 | { 20 | parser p ("1", "test"); 21 | // assert (p.value ().empty ()); 22 | assert (p.data ().first == nullptr); 23 | assert (p.data ().second == 0); 24 | } 25 | 26 | // Peek in initial state (before any next()s): no value available except 27 | // through data(). 28 | // 29 | { 30 | parser p ("1", "test"); 31 | assert (p.peek () == event::number); 32 | // assert (p.value ().empty ()); 33 | assert (p.data ().first != nullptr); 34 | assert (string (p.data ().first) == "1"); 35 | } 36 | 37 | // Next in initial state. 38 | // 39 | { 40 | parser p ("1", "test"); 41 | assert (p.next () == event::number); 42 | assert (p.value () == 1); 43 | assert (p.data ().first != nullptr); 44 | assert (string (p.data ().first) == "1"); 45 | } 46 | 47 | // Peek followed by next. 48 | // 49 | { 50 | parser p ("1", "test"); 51 | assert (p.peek () == event::number); 52 | // assert (p.value ().empty ()); 53 | assert (p.data ().first != nullptr); 54 | assert (string (p.data ().first) == "1"); 55 | assert (p.data ().second == 1); 56 | 57 | assert (p.next () == event::number); 58 | assert (p.value () == 1); 59 | assert (string (p.data ().first) == "1"); 60 | assert (p.data ().second == 1); 61 | } 62 | 63 | // Next followed by peek. 64 | // 65 | { 66 | parser p ("[1,2]", "test"); 67 | assert (p.next () == event::begin_array); 68 | assert (p.next () == event::number); 69 | assert (p.value () == 1); 70 | 71 | assert (p.peek () == event::number); 72 | assert (p.value () == 1); 73 | } 74 | 75 | // Latest value always available via data(). 76 | // 77 | { 78 | parser p ("[1,222]", "test"); 79 | assert (p.peek () == event::begin_array); 80 | assert (p.data ().first == nullptr); 81 | assert (p.data ().second == 0); 82 | 83 | assert (p.next () == event::begin_array); 84 | assert (p.data ().first == nullptr); 85 | assert (p.data ().second == 0); 86 | 87 | // Peeked value accessible in raw form. 88 | // 89 | assert (p.peek () == event::number); 90 | assert (p.data ().first != nullptr); 91 | assert (string (p.data ().first) == "1"); 92 | assert (p.data ().second == 1); 93 | 94 | // Parsed value accessible in raw form. 95 | // 96 | assert (p.next () == event::number); 97 | assert (p.data ().first != nullptr); 98 | assert (string (p.data ().first) == "1"); 99 | assert (p.data ().second == 1); 100 | 101 | // Peeked value once again accessible in raw form. 102 | // 103 | assert (p.peek () == event::number); 104 | assert (p.data ().first != nullptr); 105 | assert (string (p.data ().first) == "222"); 106 | assert (p.data ().second == 3); 107 | } 108 | 109 | // After peek(), value() returns value from previous next(). 110 | // 111 | { 112 | parser p ("[1, \"hello\", 3]", "test"); 113 | assert (p.next () == event::begin_array); 114 | assert (p.next () == event::number); 115 | assert (p.value () == "1"); 116 | 117 | assert (p.peek () == event::string); 118 | assert (p.value () == "1"); 119 | } 120 | 121 | // Peek is idempotent. 122 | // 123 | { 124 | parser p ("[1, \"hello\"]", "test"); 125 | assert (p.peek () == event::begin_array); 126 | assert (p.peek () == event::begin_array); 127 | 128 | assert (p.next () == event::begin_array); 129 | 130 | // Peek #1. 131 | // 132 | assert (p.peek () == event::number); 133 | // assert (p.value ().empty ()); 134 | assert (p.data ().first != nullptr); 135 | assert (string (p.data ().first) == "1"); 136 | 137 | // Peek #2. 138 | // 139 | assert (p.peek () == event::number); 140 | // assert (p.value ().empty ()); 141 | assert (p.data ().first != nullptr); 142 | assert (string (p.data ().first) == "1"); 143 | 144 | assert (p.next () == event::number); 145 | 146 | // Peek #1. 147 | // 148 | assert (p.peek () == event::string); 149 | assert (p.value () == "1"); 150 | assert (p.data ().first != nullptr); 151 | assert (string (p.data ().first) == "hello"); 152 | 153 | // Peek #2. 154 | // 155 | assert (p.peek () == event::string); 156 | assert (p.value () == "1"); 157 | assert (p.data ().first != nullptr); 158 | assert (string (p.data ().first) == "hello"); 159 | 160 | // Get to last value. 161 | // 162 | assert (p.next () == event::string); 163 | assert (p.next () == event::end_array); 164 | 165 | // Peek past last value. 166 | // 167 | assert (p.peek () == nullopt); 168 | assert (p.data ().first == nullptr); 169 | assert (p.data ().second == 0); 170 | 171 | // Get to EOF. 172 | // 173 | assert (p.next () == nullopt); 174 | 175 | // Peek at (past) EOF is idempotent. 176 | // 177 | assert (p.peek () == nullopt); 178 | assert (p.data ().first == nullptr); 179 | assert (p.data ().second == 0); 180 | assert (p.peek () == nullopt); 181 | assert (p.data ().first == nullptr); 182 | assert (p.data ().second == 0); 183 | } 184 | 185 | // Peek EOF. 186 | // 187 | { 188 | parser p ("1", "test"); 189 | assert (p.next () == event::number); 190 | assert (p.peek () == nullopt); 191 | assert (p.value () == "1"); 192 | assert (p.value () == 1); 193 | assert (p.next () == nullopt); 194 | assert (p.peek () == nullopt); 195 | } 196 | 197 | // Parse at EOF. 198 | // 199 | { 200 | parser p ("1", "test"); 201 | assert (p.next () == event::number); 202 | 203 | assert (p.next () == nullopt); 204 | assert (p.data ().first == nullptr); 205 | assert (p.data ().second == 0); 206 | 207 | assert (p.next () == nullopt); 208 | assert (p.data ().first == nullptr); 209 | assert (p.data ().second == 0); 210 | } 211 | 212 | // Beginning-to-end: parse only. 213 | // 214 | { 215 | parser p ("[1,2]", "test"); 216 | assert (p.next () == event::begin_array); 217 | assert (p.next () == event::number); 218 | assert (p.value () == 1); 219 | assert (p.next () == event::number); 220 | assert (p.value () == 2); 221 | assert (p.next () == event::end_array); 222 | assert (p.next () == nullopt); 223 | } 224 | 225 | // Beginning-to-end: peek first. 226 | // 227 | { 228 | parser p ("[1,2,3]", "test"); 229 | assert (p.peek () == event::begin_array); 230 | assert (p.peek () == event::begin_array); 231 | assert (p.next () == event::begin_array); 232 | assert (p.peek () == event::number); // 1 233 | assert (p.peek () == event::number); // 1 234 | assert (p.next () == event::number); // 1 235 | assert (p.next () == event::number); // 2 236 | assert (p.peek () == event::number); // 3 237 | assert (p.peek () == event::number); // 3 238 | assert (p.next () == event::number); // 3 239 | assert (p.peek () == event::end_array); 240 | assert (p.peek () == event::end_array); 241 | assert (p.next () == event::end_array); 242 | assert (p.peek () == nullopt); 243 | assert (p.peek () == nullopt); 244 | assert (p.next () == nullopt); 245 | assert (p.peek () == nullopt); 246 | assert (p.peek () == nullopt); 247 | } 248 | 249 | // Beginning-to-end: parse first. 250 | // 251 | { 252 | parser p ("[1,2,3]", "test"); 253 | assert (p.next () == event::begin_array); 254 | assert (p.peek () == event::number); // 1 255 | assert (p.peek () == event::number); // 1 256 | assert (p.next () == event::number); // 1 257 | assert (p.peek () == event::number); // 2 258 | assert (p.peek () == event::number); // 2 259 | assert (p.next () == event::number); // 2 260 | assert (p.next () == event::number); // 3 261 | assert (p.peek () == event::end_array); 262 | assert (p.peek () == event::end_array); 263 | assert (p.next () == event::end_array); 264 | assert (p.peek () == nullopt); 265 | assert (p.peek () == nullopt); 266 | assert (p.next () == nullopt); 267 | assert (p.peek () == nullopt); 268 | assert (p.peek () == nullopt); 269 | } 270 | 271 | // Don't get caught out by empty JSON string. 272 | // 273 | { 274 | parser p ("[\"\", \"hello\"]", "test"); 275 | assert (p.next () == event::begin_array); 276 | assert (p.next () == event::string); 277 | assert (p.value () == ""); 278 | assert (p.peek () == event::string); 279 | assert (p.value () == ""); 280 | } 281 | 282 | return 0; 283 | } 284 | -------------------------------------------------------------------------------- /tests/serializer/basics/buildfile: -------------------------------------------------------------------------------- 1 | import libs = libstud-json%lib{stud-json} 2 | 3 | exe{driver}: {cxx}{driver} $libs 4 | -------------------------------------------------------------------------------- /tests/serializer/basics/driver.cxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include // size_t 3 | #include // memcmp() 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #undef NDEBUG 10 | #include 11 | 12 | using namespace std; 13 | using namespace stud::json; 14 | 15 | // Overflow function which always grows the buffer by exactly N bytes. 16 | // 17 | template 18 | static void 19 | overflow (void*, event, buffer_serializer::buffer& b, size_t) 20 | { 21 | b.capacity = b.size + N; 22 | } 23 | 24 | int 25 | main () 26 | { 27 | using stud::optional; 28 | using stud::nullopt; 29 | 30 | using error = invalid_json_output::error_code; 31 | 32 | // Return true if a call to s.next () with these arguments throws an 33 | // invalid_json_output exception with the specified error code (ec). 34 | // 35 | auto next_throws = [] (error ec, 36 | buffer_serializer& s, 37 | optional e, 38 | pair val = {}, 39 | bool check = true) 40 | { 41 | try 42 | { 43 | s.next (e, val, check); 44 | return false; 45 | } 46 | catch (const invalid_json_output& e) 47 | { 48 | return e.code == ec; 49 | } 50 | }; 51 | 52 | // Return true if the serialization (with checking enabled) of a string 53 | // throws. 54 | // 55 | auto serialize_throws = [next_throws] (const string& v) 56 | { 57 | string b; 58 | buffer_serializer s (b); 59 | return next_throws ( 60 | error::invalid_value, s, event::string, {v.c_str (), v.size ()}, true); 61 | }; 62 | 63 | // Return the serialized form of a string (with checking enabled). Note that 64 | // the quotes are removed to ease comparisons. 65 | // 66 | auto serialize = [] (const string& v) 67 | { 68 | string b; 69 | buffer_serializer s (b); 70 | s.next (event::string, {v.c_str (), v.size ()}, true); 71 | return b.size () >= 2 ? b.substr (1, b.size () - 2) : ""; 72 | }; 73 | 74 | // Completeness of top-level JSON value sequences. 75 | // 76 | { 77 | // Open array detected as incomplete. 78 | // 79 | { 80 | string b; 81 | buffer_serializer s (b); 82 | s.next (event::begin_array); 83 | assert (next_throws (error::invalid_value, s, nullopt)); 84 | } 85 | 86 | // Open object detected as incomplete. 87 | // 88 | { 89 | string b; 90 | buffer_serializer s (b); 91 | s.next (event::begin_object); 92 | assert (next_throws (error::invalid_value, s, nullopt)); 93 | } 94 | 95 | // Declare top-level value sequence complete by serializing an absent 96 | // event (nullopt). 97 | // 98 | // After that, serializing anything, even nullopt, is an error. 99 | // 100 | { 101 | // Empty top-level value sequence. 102 | // 103 | // If no values have been serialized, the first absent event declares 104 | // the top-level value sequence complete. 105 | // 106 | { 107 | string b; 108 | buffer_serializer s (b); 109 | s.next (nullopt); // Declare this an empty sequence of top-level values. 110 | assert (next_throws (error::invalid_value, s, event::number, {"2", 1})); 111 | assert (next_throws (error::invalid_value, s, nullopt)); 112 | } 113 | 114 | // One top-level value. 115 | // 116 | { 117 | string b; 118 | buffer_serializer s (b); 119 | s.next (event::number, {"1", 1}); 120 | s.next (nullopt); // Check for completeness (throws if not). 121 | s.next (nullopt); // Declare end of top-level value sequence. 122 | assert (next_throws (error::invalid_value, s, event::number, {"2", 1})); 123 | assert (next_throws (error::invalid_value, s, nullopt)); 124 | } 125 | 126 | // Multiple top-level values. 127 | // 128 | { 129 | string b; 130 | buffer_serializer s (b); 131 | s.next (event::number, {"1", 1}); 132 | s.next (event::number, {"2", 1}); 133 | s.next (nullopt); // Check for completeness (throws if not). 134 | s.next (nullopt); // Declare end of top-level value sequence. 135 | assert (next_throws (error::invalid_value, s, event::number, {"3", 1})); 136 | assert (next_throws (error::invalid_value, s, nullopt)); 137 | } 138 | } 139 | } 140 | 141 | // Array structure. 142 | // 143 | { 144 | // End array outside array. 145 | // 146 | { 147 | string b; 148 | buffer_serializer s (b); 149 | assert (next_throws (error::unexpected_event, s, event::end_array)); 150 | } 151 | 152 | // End object inside array. 153 | // 154 | { 155 | string b; 156 | buffer_serializer s (b); 157 | s.next (event::begin_array); 158 | assert (next_throws (error::unexpected_event, s, event::end_object)); 159 | } 160 | } 161 | 162 | // Object structure. 163 | // 164 | { 165 | // End object outside object. 166 | // 167 | { 168 | string b; 169 | buffer_serializer s (b); 170 | assert (next_throws (error::unexpected_event, s, event::end_object)); 171 | } 172 | 173 | // End object when member value is expected. 174 | // 175 | { 176 | string b; 177 | buffer_serializer s (b); 178 | s.next (event::begin_object); 179 | s.next (event::name, {"n", 1}); 180 | assert (next_throws (error::unexpected_event, s, event::end_object)); 181 | } 182 | 183 | // End array inside object. 184 | // 185 | { 186 | string b; 187 | buffer_serializer s (b); 188 | s.next (event::begin_object); 189 | assert (next_throws (error::unexpected_event, s, event::end_array)); 190 | } 191 | 192 | // Value when expecting a name. 193 | // 194 | { 195 | { 196 | string b; 197 | buffer_serializer s (b); 198 | s.next (event::begin_object); 199 | assert ( 200 | next_throws (error::unexpected_event, s, event::number, {"1", 1})); 201 | } 202 | { 203 | string b; 204 | buffer_serializer s (b); 205 | s.next (event::begin_object); 206 | assert ( 207 | next_throws (error::unexpected_event, s, event::string, {"1", 1})); 208 | } 209 | { 210 | string b; 211 | buffer_serializer s (b); 212 | s.next (event::begin_object); 213 | assert (next_throws ( 214 | error::unexpected_event, s, event::boolean, {"true", 4})); 215 | } 216 | { 217 | string b; 218 | buffer_serializer s (b); 219 | s.next (event::begin_object); 220 | assert ( 221 | next_throws (error::unexpected_event, s, event::null, {"null", 4})); 222 | } 223 | 224 | // When there is already a complete member. 225 | // 226 | { 227 | string b; 228 | buffer_serializer s (b); 229 | s.next (event::begin_object); 230 | s.next (event::name, {"a", 1}); 231 | s.next (event::number, {"1", 1}); 232 | assert ( 233 | next_throws (error::unexpected_event, s, event::number, {"1", 1})); 234 | } 235 | } 236 | 237 | // Begin object when expecting a name. 238 | // 239 | { 240 | string b; 241 | buffer_serializer s (b); 242 | s.next (event::begin_object); 243 | assert (next_throws (error::unexpected_event, s, event::begin_object)); 244 | } 245 | 246 | // Name when expecting a value. 247 | // 248 | { 249 | string b; 250 | buffer_serializer s (b); 251 | s.next (event::begin_object); 252 | s.next (event::name, {"a", 1}); 253 | assert (next_throws (error::unexpected_event, s, event::name, {"b", 1})); 254 | } 255 | } 256 | 257 | // Buffer management. 258 | // 259 | { 260 | // Fixed-size buffer: capacity exceeded. 261 | // 262 | { 263 | uint8_t b[3]; 264 | size_t n (0); 265 | buffer_serializer s (b, n, 3); 266 | s.next (event::number, {"12", 2}); // 3 bytes written (val + newline). 267 | assert (next_throws (error::buffer_overflow, s, event::number, {"2", 1})); 268 | } 269 | 270 | // Serialization of value with multiple calls to overflow. 271 | // 272 | { 273 | uint8_t b[100]; 274 | size_t n (0); 275 | buffer_serializer s (b, n, 0, &overflow<6>, nullptr, nullptr); 276 | const string v (50, 'a'); 277 | s.next (event::string, {v.c_str (), v.size ()}); 278 | // +1 skips the opening quote. 279 | // 280 | assert (memcmp (b + 1, v.c_str (), v.size ()) == 0); 281 | } 282 | 283 | // Serializer appends to user buffer (that is, preserves its contents). 284 | // 285 | { 286 | // String. 287 | // 288 | { 289 | string b ("aaa"); 290 | buffer_serializer s (b); 291 | const string v ("bbb"); 292 | s.next (event::string, {v.c_str(), v.size ()}); 293 | assert (b == "aaa\"bbb\""); 294 | } 295 | 296 | // Array. 297 | // 298 | { 299 | uint8_t b[100] {'a', 'a', 'a'}; 300 | size_t n (3); 301 | buffer_serializer s (b, n, 10, nullptr, nullptr, nullptr); 302 | const string v ("bbb"); 303 | s.next (event::string, {v.c_str(), v.size ()}); 304 | assert (n == 8); 305 | assert (memcmp (b, "aaa\"bbb\"", 8) == 0); 306 | } 307 | } 308 | 309 | // Regression tests. 310 | // 311 | { 312 | // This is a regression test for two different but related 313 | // buffer-management bugs. 314 | // 315 | // Whether or not either of these bugs are triggered depends on the 316 | // capacity of the buffer and thus on the allocation patterns of 317 | // std::string, and therefore it's not practical to construct a small 318 | // number of minimal and specific test cases. For both libstdc++ and 319 | // libc++, however, both bugs were triggered in under 20 characters so 320 | // the 100 used here should cover most implementations. (I think the 321 | // crucial value is the size of the SSO buffer.) 322 | // 323 | { 324 | for (size_t i (1); i < 100; i++) 325 | serialize (string (i, 'a') + "\x01"); 326 | } 327 | 328 | // With this setup and input we get to the first byte of the UTF-8 329 | // sequence with the bytes left to be written (size, value 2) is less 330 | // than the bytes left in the buffer (cap, value 3) (see 331 | // serializer::write()). Thus a value of (size - cap = 2 - 3 = 332 | // underflow) was being passed to the overflow function. See the fake 333 | // overflow implementation above for details. 334 | // 335 | { 336 | uint8_t b[20]; 337 | size_t n (0); 338 | buffer_serializer s (b, n, 0, &overflow<6>, nullptr, nullptr); 339 | // 0xF0 indicates the beginning of a 4-byte UTF-8 sequence. 340 | // 341 | const string v ("12\xF0"); 342 | try 343 | { 344 | s.next (event::string, {v.c_str (), v.size ()}, true); 345 | } 346 | catch (const invalid_json_output& e) 347 | { 348 | assert (e.code == error::invalid_value); 349 | } 350 | } 351 | } 352 | } 353 | 354 | // Validation of literal values (null and boolean). All JSON literals must 355 | // be lower case. 356 | // 357 | { 358 | string b; 359 | buffer_serializer s (b); 360 | 361 | auto next_throws_invalid_value = 362 | [&next_throws, &s] (event e, pair v) 363 | { 364 | return next_throws (error::invalid_value, s, e, v, true); 365 | }; 366 | 367 | assert (next_throws_invalid_value (event::null, {"Null", 4})); 368 | assert (next_throws_invalid_value (event::null, {"NULL", 4})); 369 | assert (next_throws_invalid_value (event::null, {"nul", 3})); 370 | assert (next_throws_invalid_value (event::null, {"nullX", 5})); 371 | assert (next_throws_invalid_value (event::null, {"null ", 5})); 372 | 373 | assert (next_throws_invalid_value (event::boolean, {"True", 4})); 374 | assert (next_throws_invalid_value (event::boolean, {"TRUE", 4})); 375 | assert (next_throws_invalid_value (event::boolean, {"tru", 3})); 376 | assert (next_throws_invalid_value (event::boolean, {"trueX", 5})); 377 | assert (next_throws_invalid_value (event::boolean, {"true ", 5})); 378 | 379 | assert (next_throws_invalid_value (event::boolean, {"False", 5})); 380 | assert (next_throws_invalid_value (event::boolean, {"FALSE", 5})); 381 | assert (next_throws_invalid_value (event::boolean, {"fals", 4})); 382 | assert (next_throws_invalid_value (event::boolean, {"falseX", 6})); 383 | assert (next_throws_invalid_value (event::boolean, {"false ", 6})); 384 | } 385 | 386 | // null event: the value is supplied if it is unspecified. 387 | // 388 | { 389 | string b; 390 | buffer_serializer s (b); 391 | s.next (event::null); 392 | assert (b == "null"); 393 | } 394 | 395 | // UTF-8 sequences are not split if buffer runs out of space. 396 | // 397 | // Despite there being capacity for the first part of a UTF-8 sequence, none 398 | // of it must be written. 399 | // 400 | { 401 | uint8_t b[100]; 402 | const string v ("\xE2\x82\xAC"); // U+20AC '€' 403 | 404 | // Using the unchecked version of next(). 405 | // 406 | { 407 | size_t n (0); 408 | buffer_serializer s (b, n, 3); 409 | assert (next_throws (error::buffer_overflow, 410 | s, 411 | event::string, 412 | {v.c_str (), v.size ()}, 413 | false)); 414 | assert (n == 1); // Only the opening quote should've been written. 415 | } 416 | 417 | // Using the checked version of next(). 418 | // 419 | { 420 | size_t n (0); 421 | buffer_serializer s (b, n, 3); 422 | assert (next_throws (error::buffer_overflow, 423 | s, 424 | event::string, 425 | {v.c_str (), v.size ()}, 426 | true)); 427 | assert (n == 1); 428 | } 429 | } 430 | 431 | // UTF-8 validation. 432 | // 433 | { 434 | assert (serialize_throws ("\xC2")); // Truncated 2-byte sequence. 435 | assert (serialize_throws ("\xE1\x80")); // Truncated 3-byte sequence. 436 | assert (serialize_throws ("\xF1\x80\x80")); // Truncated 4-byte sequence. 437 | assert (serialize_throws ("\xC0\xB0")); // Overlong encoding of '0' (0x30). 438 | assert (serialize_throws ("\xC1\xBE")); // Overlong encoding of '~' (0x7E). 439 | assert (serialize_throws ("\xC2\x7F")); // 2nd byte < valid range. 440 | assert (serialize_throws ("\xC2\xC0")); // 2nd byte > valid range. 441 | 442 | // Special second-byte cases. 443 | // 444 | assert (serialize_throws ("\xE0\x9F\x80")); // 2nd byte < valid range. 445 | assert (serialize_throws ("\xED\xA0\x80")); // 2nd byte > valid range. 446 | assert (serialize_throws ("\xF0\x8F\x80\x80")); // 2nd byte < valid range. 447 | assert (serialize_throws ("\xF4\x90\x80\x80")); // 2nd byte > valid range. 448 | } 449 | 450 | // Escaping. 451 | // 452 | { 453 | assert (serialize ("\"") == "\\\""); 454 | assert (serialize ("\\") == "\\\\"); 455 | assert (serialize ("\t") == "\\t"); 456 | assert (serialize ("\n") == "\\n"); 457 | assert (serialize ("\b") == "\\b"); 458 | assert (serialize ("\r") == "\\r"); 459 | assert (serialize ("\f") == "\\f"); 460 | assert (serialize ("\x01") == "\\u0001"); 461 | assert (serialize ("\x1F") == "\\u001F"); 462 | assert (serialize ("ABC \t DEF \x01\x02 GHI") == 463 | "ABC \\t DEF \\u0001\\u0002 GHI"); 464 | } 465 | 466 | // Exception offset. 467 | // 468 | // The offset stored in the invalid_json_output exception should point to 469 | // the beginning of the invalid UTF-8 sequence (a truncated 3-byte sequence 470 | // in this case). 471 | // 472 | { 473 | string b; 474 | buffer_serializer s (b); 475 | try 476 | { 477 | s.next (event::string, {"abc\xE1\x80", 5}, true); 478 | assert (false); 479 | } 480 | catch (const invalid_json_output& e) 481 | { 482 | assert (e.offset == 3); 483 | } 484 | } 485 | 486 | // High-level interface. 487 | // 488 | { 489 | // All JSON types. 490 | // 491 | { 492 | string b; 493 | buffer_serializer s (b, 0); 494 | s.value ("a"); 495 | s.value (string ("b")); 496 | s.value (999); 497 | s.value (nullptr); 498 | s.value (true); 499 | assert (b == "\"a\"\n\"b\"\n999\nnull\ntrue"); 500 | } 501 | 502 | // Object. 503 | // 504 | { 505 | string b; 506 | buffer_serializer s (b, 0); 507 | s.begin_object (); 508 | s.member ("a", 1); 509 | s.member_name ("b"); s.value ("z"); 510 | s.member ("c", string ("y")); 511 | s.member ("d", nullptr); 512 | s.member ("e", true); 513 | s.end_object (); 514 | assert (b == "{\"a\":1,\"b\":\"z\",\"c\":\"y\",\"d\":null,\"e\":true}"); 515 | } 516 | 517 | // Array. 518 | { 519 | string b; 520 | buffer_serializer s (b, 0); 521 | s.begin_array (); 522 | s.value (1); 523 | s.value ("a"); 524 | s.end_array (); 525 | assert (b == "[1,\"a\"]"); 526 | } 527 | 528 | // Long floating point numbers should be output in scientific notation. 529 | // (This also tests that numbers with many digits do not break things.) 530 | // 531 | { 532 | string b; 533 | buffer_serializer s (b, 0); 534 | s.value (numeric_limits::max ()); 535 | assert (b.find ("e+") != string::npos); 536 | } 537 | 538 | // A null char* is serialized as a JSON null. 539 | // 540 | { 541 | string b; 542 | buffer_serializer s (b); 543 | const char* cp (nullptr); 544 | s.value (cp); 545 | assert (b == "null"); 546 | } 547 | 548 | // Pre-serialized JSON value. 549 | // 550 | { 551 | string b; 552 | buffer_serializer s (b, 0); 553 | s.begin_array (); 554 | s.value_json_text ("{\"a\":1}"); 555 | s.value_json_text ("{\"a\":2}"); 556 | s.end_array (); 557 | assert (b == "[{\"a\":1},{\"a\":2}]"); 558 | } 559 | } 560 | } 561 | -------------------------------------------------------------------------------- /tests/serializer/buildfile: -------------------------------------------------------------------------------- 1 | ./: {*/ -fuzz-llvm/} 2 | -------------------------------------------------------------------------------- /tests/serializer/fuzz-llvm/README.md: -------------------------------------------------------------------------------- 1 | This is an [LLVM LibFuzzer](https://llvm.org/docs/LibFuzzer.html)-based test. 2 | 3 | A typical setup could look like this: 4 | 5 | ``` 6 | cd libstud-json 7 | bdep init -C @fuzz cc config.cxx=clang++ config.cxx.coptions="-g -O3 -fsanitize=address,undefined,fuzzer-no-link" 8 | b ../libstud-json-fuzz/libstud-json/tests/serializer/fuzz-llvm/ # Directory may not exist at this point 9 | cd ../libstud-json-fuzz/libstud-json/tests/serializer/fuzz-llvm/ 10 | mkdir corpus 11 | ./driver corpus/ 12 | ``` 13 | 14 | The serializer's driver does not support starting from an empty corpus so the 15 | corpus has to be pre-initialized. It is highly recommended to start with as 16 | many high-quality samples as possible. The following repositories are a good 17 | starting point: 18 | * The `test_parsing/` directory from 19 | [JSONTestSuite](https://github.com/nst/JSONTestSuite) 20 | * The `json/corpus/` directory from 21 | [go-fuzz-corpus](https://github.com/dvyukov/go-fuzz-corpus/tree/master) 22 | 23 | It would also be wise to include a basic multi-value input such as this: 24 | 25 | ``` 26 | 123 27 | "abc" 28 | true 29 | false 30 | null 31 | [] 32 | {} 33 | ``` 34 | 35 | The serializer's fuzz driver uses a custom input format. The included 36 | `convert` utility can be used to convert valid JSON input to this custom 37 | format. The following shell command can be used to convert an entire 38 | directory of JSON files: 39 | 40 | ``` 41 | for f in corpus-json/* 42 | do 43 | ./convert $f corpus/`basename $f` || rm corpus/`basename $f` 44 | done 45 | ``` 46 | -------------------------------------------------------------------------------- /tests/serializer/fuzz-llvm/buildfile: -------------------------------------------------------------------------------- 1 | import libs = libstud-json%lib{stud-json} 2 | 3 | ./: exe{driver}: {cxx}{driver} $libs 4 | 5 | exe{driver}: 6 | { 7 | cxx.loptions += -fsanitize=fuzzer 8 | } 9 | 10 | ./: exe{convert}: {cxx}{convert} $libs 11 | -------------------------------------------------------------------------------- /tests/serializer/fuzz-llvm/convert.cxx: -------------------------------------------------------------------------------- 1 | // This utility can be used to create an initial serializer fuzz corpus from 2 | // valid JSON inputs. Usage: 3 | // 4 | // convert input.json output.bin 5 | // 6 | // See driver.cxx for the output format description. 7 | // 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | using namespace std; 15 | using namespace stud::json; 16 | 17 | int 18 | main (int argc, const char** argv) 19 | { 20 | if (argc != 3) 21 | { 22 | cerr << "usage: " << argv[0] << " " << endl; 23 | return 1; 24 | } 25 | 26 | // Setup input. 27 | // 28 | ifstream in (argv[1]); 29 | if (in.fail ()) 30 | { 31 | cerr << "unable to open file '" << argv[1] << "' for reading" << endl; 32 | return 1; 33 | } 34 | in.exceptions (ios::badbit | ios::failbit); 35 | parser p (in, argv[1], true /* multi_value */); 36 | 37 | // Setup output. 38 | // 39 | ofstream out (argv[2], ios::binary); 40 | if (out.fail ()) 41 | { 42 | cerr << "unable to open file '" << argv[2] << "' for writing" << endl; 43 | return 1; 44 | } 45 | out.exceptions (ios::badbit | ios::failbit); 46 | 47 | // Writes an event and (potentially absent, empty) value to stdout. 48 | // 49 | auto write = [&out] (uint8_t e, const string* v) 50 | { 51 | out.write (reinterpret_cast (&e), sizeof (e)); 52 | 53 | if (v != nullptr) 54 | { 55 | const uint32_t s (v->size ()); 56 | auto sp (reinterpret_cast (&s)); 57 | out.write (sp, sizeof (s)); 58 | out.write (v->data (), v->size ()); 59 | } 60 | }; 61 | 62 | try 63 | { 64 | uint32_t n (0); // Number of events. 65 | 66 | while (p.peek ()) 67 | { 68 | for (event e: p) 69 | { 70 | switch (e) 71 | { 72 | case event::name: 73 | { 74 | write (static_cast (e), &p.name ()); 75 | break; 76 | } 77 | case event::string: 78 | case event::number: 79 | case event::boolean: 80 | case event::null: 81 | { 82 | write (static_cast (e), &p.value ()); 83 | break; 84 | } 85 | default: 86 | { 87 | write (static_cast (e), nullptr); 88 | break; 89 | } 90 | } 91 | n++; 92 | } 93 | write (0, nullptr); // Absent event. 94 | n++; 95 | } 96 | write (0, nullptr); // Absent event. 97 | n++; 98 | 99 | out.write (reinterpret_cast (&n), sizeof (n)); 100 | } 101 | catch (const invalid_json_input& e) 102 | { 103 | cerr << e.name << ':' << e.line << ':' << e.column << ": error: " 104 | << e.what () << endl; 105 | return 1; 106 | } 107 | catch (const std::exception& e) 108 | { 109 | cerr << e.what () << endl; 110 | return 1; 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /tests/serializer/fuzz-llvm/driver.cxx: -------------------------------------------------------------------------------- 1 | // Usage: argv[0] [libFuzzer options] 2 | // 3 | // A corpus containing valid inputs must be provided. Starting from an empty 4 | // corpus is not supported. 5 | 6 | // The input format is a sequence of events in the following form: 7 | // 8 | // e[llllv...] 9 | // 10 | // e: event type (uint8_t), 0 for absent 11 | // l: value length (uint32_t) 12 | // v: value bytes (UTF-8 string) 13 | // 14 | // LLVMFuzzerTestOneInput() takes one file from the fuzz corpus as input and 15 | // feeds the events and values it contains to the serializer one at a 16 | // time. The file will first be passed to LLVMFuzzerCustomMutator() which will 17 | // perform one of a number of different kinds of mutations on it, after which 18 | // it is passed to LLVMFuzzerTestOneInput(). 19 | 20 | #include 21 | #include 22 | #include // abort 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | #undef NDEBUG 30 | #include 31 | 32 | using namespace std; 33 | using namespace stud::json; 34 | 35 | // Return true if a JSON event does not come with a value. 36 | // 37 | static bool 38 | valueless (uint8_t e) noexcept 39 | { 40 | if (e != 0) // Absent event. 41 | { 42 | switch (static_cast (e)) 43 | { 44 | case event::begin_object: 45 | case event::end_object: 46 | case event::begin_array: 47 | case event::end_array: 48 | { 49 | return true; 50 | break; 51 | } 52 | case event::name: 53 | case event::string: 54 | case event::number: 55 | case event::boolean: 56 | case event::null: 57 | { 58 | return false; 59 | break; 60 | } 61 | } 62 | } 63 | 64 | return true; 65 | } 66 | 67 | // Feed the events and values contained in the input buffer to the serializer. 68 | // 69 | extern "C" int 70 | LLVMFuzzerTestOneInput (const uint8_t* data, size_t size) 71 | { 72 | using stud::optional; 73 | 74 | // Note that libFuzzer will invoke this function once with empty input 75 | // before starting the fuzzing run. 76 | // 77 | if (size == 0) 78 | return 0; 79 | 80 | // Detect when we seem to be running without a corpus. 81 | // 82 | if (size < sizeof (uint32_t)) 83 | { 84 | cerr << "empty corpus" << endl; 85 | exit (1); 86 | } 87 | 88 | string b; 89 | buffer_serializer s (b); 90 | 91 | // Extract the event count. 92 | // 93 | uint32_t en (0); 94 | memcpy (&en, data + size - sizeof (en), sizeof (en)); 95 | 96 | // Parse events and their values from data and pass them to the serializer. 97 | // 98 | for (size_t ei (0), i (0); ei != en; ei++) 99 | { 100 | const uint8_t e (data[i++]); 101 | 102 | // Extract the value length and the value. 103 | // 104 | const char* v (nullptr); 105 | uint32_t n (0); 106 | if (!valueless (e)) 107 | { 108 | memcpy (&n, data + i, sizeof (n)); 109 | i += sizeof (n); 110 | v = reinterpret_cast (data + i); 111 | i += n; 112 | } 113 | 114 | // Serialize the event and its value. 115 | // 116 | try 117 | { 118 | s.next (e != 0 ? static_cast (e) : optional (), 119 | {v, n}, 120 | true /* check */); 121 | } 122 | catch (const invalid_json_output& e) 123 | { 124 | // If the error code is buffer_overflow the bug must be in the 125 | // serializer's code because this driver serializes to a std::string so 126 | // a real allocation failure would throw bad_alloc. 127 | // 128 | if (e.code == invalid_json_output::error_code::buffer_overflow) 129 | abort (); 130 | 131 | break; 132 | } 133 | } 134 | 135 | return 0; 136 | } 137 | 138 | extern "C" size_t 139 | LLVMFuzzerMutate (uint8_t* data, size_t size, size_t maxsize); 140 | 141 | // Default values for the event insertion mutation indexed by event. 142 | // 143 | static const char* default_values[event_count] { 144 | nullptr, 145 | nullptr, 146 | nullptr, 147 | nullptr, 148 | "fuzz-name", 149 | "fuzz-string", 150 | "1234", 151 | "true", 152 | "null"}; 153 | 154 | // Select an event at random and either mutate its type, mutate its value 155 | // (including updating its length), remove it, or insert a new event after it. 156 | // 157 | // The seed argument is a pseudo-random number which should be used in such a 158 | // way as to cause a different mutation to be performed on each invocation. 159 | // 160 | // LibFuzzer's main fuzz loop (which is infinite by default) works as follows: 161 | // an input file is selected at random. It then loops over that input a 162 | // maximum of 5 times with each iteration consisting of a mutation (an 163 | // invocation of LLVMFuzzerCustomMutator()) and a test (an invocation of 164 | // LLVMFuzzerTestOneInput()). The same buffer is passed to the mutator each 165 | // time, so mutations are cumulative. If coverage increases or the input was 166 | // reduced, the inner loop is terminated immediately and the outer loop 167 | // selects the next input. Note that each input will be selected for these 168 | // 5-iteration runs repeatedly and thus ultimately be invoked many times (with 169 | // different seed values). 170 | // 171 | // What this all means in the end (and according to our understanding), is 172 | // that we don't want to perform pervasive mutations where the entire input is 173 | // changed. Instead, we want to perform a small, localized mutation on each 174 | // step (at least this is how the default mutation works if we did not provide 175 | // a custom mutator). 176 | // 177 | // This function performs a single mutation per invocation. It selects an 178 | // event to mutate and mutation type based on the seed argument. 179 | // 180 | extern "C" size_t 181 | LLVMFuzzerCustomMutator (uint8_t* data, 182 | size_t size, 183 | size_t maxsize, 184 | unsigned int seed) 185 | { 186 | // Looking at other custom mutator implementations, it seems this is how 187 | // the seed should be used. 188 | // 189 | minstd_rand rand (seed); 190 | 191 | // Read the event count from the end of the input. 192 | // 193 | uint32_t en (0); 194 | memcpy (&en, data + size - sizeof (en), sizeof (en)); 195 | 196 | // The plan is as follows: iterate over events in data copying them over to 197 | // the temporary buffer until we reach the event that we want to mutate. 198 | // Once we've performed the mutation (and added the result into the buffer), 199 | // we continue iterating over the remaining events copying them over into 200 | // the buffer as long as they fit. 201 | // 202 | uint32_t em (0); // Number of events appended. 203 | vector buf; // Buffer to which mutated input is written. 204 | buf.reserve (maxsize); 205 | 206 | maxsize -= sizeof (en); 207 | 208 | // Append a simple value to the buffer (note: assumes sufficient space). 209 | // 210 | auto append_v = [&buf] (auto v) 211 | { 212 | auto p (reinterpret_cast (&v)); 213 | buf.insert (buf.end (), p, p + sizeof (v)); 214 | }; 215 | 216 | // Append an event and its value, if any, to the buffer. Return false if 217 | // there wasn't enough space. 218 | // 219 | auto append_e = [&buf, &append_v, maxsize, &em] 220 | (uint8_t e, const void* v, uint32_t n) 221 | { 222 | const size_t cap (maxsize - buf.size ()); 223 | 224 | if (cap < (v == nullptr ? sizeof (e) : sizeof (e) + sizeof (n) + n)) 225 | return false; 226 | 227 | append_v (e); 228 | 229 | if (v != nullptr) 230 | { 231 | append_v (n); 232 | auto p (static_cast (v)); 233 | buf.insert (buf.end (), p, p + n); 234 | } 235 | 236 | em++; 237 | return true; 238 | }; 239 | 240 | const uint32_t ej (rand () % en); // Index of event to mutate. 241 | for (size_t ei (0), i (0); ei != en; ei++) 242 | { 243 | const uint8_t e (data[i++]); 244 | 245 | // Extract the value length and the value. 246 | // 247 | const uint8_t* v (nullptr); 248 | uint32_t n (0); 249 | if (!valueless (e)) 250 | { 251 | memcpy (&n, data + i, sizeof (n)); 252 | i += sizeof (n); 253 | v = data + i; 254 | i += n; 255 | } 256 | 257 | // Copy over events that don't need mutation. 258 | // 259 | if (ei != ej) 260 | { 261 | if (!append_e (e, v, n)) // Did not fit. 262 | goto done; 263 | 264 | continue; 265 | } 266 | 267 | // Apply the mutation and append the result to the buffer, except if we're 268 | // removing the current event, in which case we do nothing. 269 | // 270 | switch (rand () % 4) 271 | { 272 | case 0: // Remove the current event. 273 | { 274 | // If this is the only event, then we fall through to add an event 275 | // instead. 276 | // 277 | if (en != 1) 278 | break; 279 | } 280 | // Fall through. 281 | case 1: // Insert a new event. 282 | { 283 | // Copy the current event to the buffer. 284 | // 285 | if (!append_e (e, v, n)) 286 | goto done; 287 | 288 | // Insert a new event. 289 | // 290 | const uint8_t e1 (rand () % event_count + 1); 291 | const char* v1 (default_values[e1 - 1]); 292 | if (!append_e (e1, v1, v1 == nullptr ? 0 : strlen (v1))) 293 | goto done; 294 | 295 | break; 296 | } 297 | case 2: // Mutate the current event's value. 298 | { 299 | // If the event has no value, then we fall through to mutate the event 300 | // itself. 301 | // 302 | if (!valueless (e)) 303 | { 304 | // Mutate the value, allowing it to grow by up to 100 bytes in size. 305 | // 306 | vector v1 (n + 100); 307 | memcpy (v1.data (), v, n); 308 | const size_t n1 (LLVMFuzzerMutate (v1.data (), n, v1.size ())); 309 | 310 | if (!append_e (e, v1.data (), n1)) 311 | goto done; 312 | 313 | break; 314 | } 315 | } 316 | // Fall through. 317 | case 3: // Mutate the current event (but not the value). 318 | { 319 | // If the new event doesn't need a value, then we drop the old value 320 | // (if any). If the new event needs a value and the old one did not 321 | // have any, then we use the default value as in the insert case 322 | // above. 323 | // 324 | const uint8_t e1 (rand () % event_count + 1); 325 | 326 | if (valueless (e1)) 327 | { 328 | if (!append_e (e1, nullptr, 0)) 329 | goto done; 330 | } 331 | else 332 | { 333 | if (!valueless (e)) 334 | { 335 | if (!append_e (e1, v, n)) 336 | goto done; 337 | } 338 | else 339 | { 340 | const char* v1 (default_values[e1 - 1]); 341 | if (!append_e (e1, v1, v1 == nullptr ? 0 : strlen (v1))) 342 | goto done; 343 | } 344 | } 345 | 346 | break; 347 | } 348 | } 349 | } 350 | 351 | done: 352 | // Copy the mutated data and event count back into the input buffer. 353 | // 354 | append_v (em); 355 | memcpy (data, buf.data (), buf.size ()); 356 | return buf.size (); 357 | } 358 | -------------------------------------------------------------------------------- /tests/serializer/roundtrip/buildfile: -------------------------------------------------------------------------------- 1 | import libs = libstud-json%lib{stud-json} 2 | 3 | ./: exe{driver}: cxx{driver} $libs 4 | 5 | # Run the tests twice, once with checking disabled and once with checking 6 | # enabled. If/when we have support for a for-loop in Testscript we can handle 7 | # this cleanly there. For now we use this alias trick (or hack, if you wish). 8 | # 9 | exe{driver}: test = false 10 | 11 | ./: alias{unchecked checked}: exe{driver} testscript 12 | { 13 | test = exe{driver} 14 | } 15 | 16 | alias{checked}: test.options += --check 17 | -------------------------------------------------------------------------------- /tests/serializer/roundtrip/driver.cxx: -------------------------------------------------------------------------------- 1 | // Usage: argv[0] [--check] [--pretty] 2 | // 3 | // --check -- enable UTF-8 checking and escaping 4 | // --pretty -- enable pretty-printing 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #undef NDEBUG 13 | #include 14 | 15 | using namespace std; 16 | using namespace stud::json; 17 | 18 | int 19 | main (int argc, const char* argv[]) 20 | { 21 | using stud::nullopt; 22 | 23 | bool check (false); 24 | bool pretty (false); 25 | 26 | for (int i (1); i < argc; i++) 27 | { 28 | const string o (argv[i]); 29 | 30 | if (o == "--check") 31 | check = true; 32 | else if (o == "--pretty") 33 | pretty = true; 34 | } 35 | 36 | parser p (cin, "", true /* multi_value*/); 37 | stream_serializer s (cout, pretty ? 2 : 0); 38 | 39 | try 40 | { 41 | if (p.peek ()) 42 | { 43 | while (p.peek ()) 44 | { 45 | for (event e: p) 46 | s.next (e, p.data (), check); 47 | s.next (nullopt); 48 | } 49 | s.next (nullopt); 50 | cout << endl; 51 | } 52 | 53 | return 0; 54 | } 55 | catch (const invalid_json_output& e) 56 | { 57 | cerr << e.what () << endl; 58 | } 59 | catch (const invalid_json_input& e) 60 | { 61 | cerr << e.what () << endl; 62 | } 63 | catch (const ios::failure& e) 64 | { 65 | cerr << "io error: " << e.what () << endl; 66 | } 67 | 68 | return 1; 69 | } 70 | -------------------------------------------------------------------------------- /tests/serializer/roundtrip/testscript: -------------------------------------------------------------------------------- 1 | : simple 2 | : 3 | {{ 4 | : number 5 | : 6 | $* <>EOI 7 | 12345 8 | EOI 9 | 10 | : string 11 | : 12 | $* <>EOI 13 | "" 14 | "हab¢" 15 | EOI 16 | 17 | : null 18 | : 19 | $* <>EOI 20 | null 21 | EOI 22 | 23 | : true 24 | : 25 | $* <>EOI 26 | true 27 | EOI 28 | 29 | : false 30 | : 31 | $* <>EOI 32 | false 33 | EOI 34 | }} 35 | 36 | : array 37 | : 38 | {{ 39 | : empty 40 | : 41 | $* <>EOI 42 | [] 43 | EOI 44 | 45 | : empty-pretty 46 | : 47 | $* --pretty <>EOI 48 | [] 49 | EOI 50 | 51 | : single 52 | : 53 | $* <>EOI 54 | [1] 55 | EOI 56 | 57 | : single-pretty 58 | : 59 | $* --pretty <>EOI 60 | [ 61 | 1 62 | ] 63 | EOI 64 | 65 | : multi 66 | : 67 | $* <>EOI 68 | [1,2,3] 69 | EOI 70 | 71 | : multi-pretty 72 | : 73 | $* --pretty <>EOI 74 | [ 75 | 1, 76 | 2, 77 | 3 78 | ] 79 | EOI 80 | 81 | # Strings are given special treatment here because their quotes are 82 | # additional delimiters. 83 | # 84 | : string 85 | : 86 | $* <>EOI 87 | ["abc","def",""] 88 | EOI 89 | 90 | : string-pretty 91 | : 92 | $* --pretty <>EOI 93 | [ 94 | "abc", 95 | "def", 96 | "" 97 | ] 98 | EOI 99 | 100 | : array 101 | : 102 | $* <>EOI 103 | [[1,2,3],[]] 104 | EOI 105 | 106 | : array-pretty 107 | : 108 | $* --pretty <>EOI 109 | [ 110 | [ 111 | 1, 112 | 2, 113 | 3 114 | ], 115 | [] 116 | ] 117 | EOI 118 | 119 | : object 120 | : 121 | $* <>EOI 122 | [{"a":1,"b":2,"c":3},{}] 123 | EOI 124 | 125 | : object-pretty 126 | : 127 | $* --pretty <>EOI 128 | [ 129 | { 130 | "a": 1, 131 | "b": 2, 132 | "c": 3 133 | }, 134 | {} 135 | ] 136 | EOI 137 | }} 138 | 139 | : object 140 | : 141 | {{ 142 | : empty 143 | : 144 | $* <>EOI 145 | {} 146 | EOI 147 | 148 | : empty-pretty 149 | : 150 | $* --pretty <>EOI 151 | {} 152 | EOI 153 | 154 | : single 155 | : 156 | $* <>EOI 157 | {"a":1} 158 | EOI 159 | 160 | : single-pretty 161 | : 162 | $* --pretty <>EOI 163 | { 164 | "a": 1 165 | } 166 | EOI 167 | 168 | : multi 169 | : 170 | $* <>EOI 171 | {"a":1,"b":2,"c":3} 172 | EOI 173 | 174 | : multi-pretty 175 | : 176 | $* --pretty <>EOI 177 | { 178 | "a": 1, 179 | "b": 2, 180 | "c": 3 181 | } 182 | EOI 183 | 184 | : string 185 | : 186 | $* <>EOI 187 | {"a":"abc","b":"def","c":""} 188 | EOI 189 | 190 | : string-pretty 191 | : 192 | $* --pretty <>EOI 193 | { 194 | "a": "abc", 195 | "b": "def", 196 | "c": "" 197 | } 198 | EOI 199 | 200 | : object 201 | : 202 | $* <>EOI 203 | {"a":{"b":1,"c":2,"d":3},"e":{}} 204 | EOI 205 | 206 | : object-pretty 207 | : 208 | $* --pretty <>EOI 209 | { 210 | "a": { 211 | "b": 1, 212 | "c": 2, 213 | "d": 3 214 | }, 215 | "e": {} 216 | } 217 | EOI 218 | 219 | : array 220 | : 221 | $* <>EOI 222 | {"a":[1,2,3],"b":[]} 223 | EOI 224 | 225 | : array-pretty 226 | : 227 | $* --pretty <>EOI 228 | { 229 | "a": [ 230 | 1, 231 | 2, 232 | 3 233 | ], 234 | "b": [] 235 | } 236 | EOI 237 | }} 238 | 239 | # Multiple levels of nesting covering all of the nesting cases: {{}}, {[]}, 240 | # [[]], [{}]. 241 | # 242 | : nested 243 | : 244 | {{ 245 | : pretty 246 | : 247 | $* --pretty <>EOI 248 | { 249 | "a": { 250 | "b": [ 251 | { 252 | "c": 1, 253 | "d": 2, 254 | "e": 3 255 | }, 256 | {}, 257 | [ 258 | 3, 259 | 4, 260 | 5 261 | ], 262 | [] 263 | ], 264 | "f": [] 265 | }, 266 | "g": {} 267 | } 268 | EOI 269 | 270 | : no-pretty 271 | : 272 | $* <>EOI 273 | {"a":{"b":[{"c":1,"d":2,"e":3},{},[3,4,5],[]],"f":[]},"g":{}} 274 | EOI 275 | }} 276 | 277 | : multival 278 | : 279 | {{ 280 | : zero 281 | : Checks that no newline is printed following a complete but empty value 282 | : sequence, independently of whether or not pretty-printing is enabled. 283 | : 284 | {{ 285 | : pretty 286 | : 287 | $* --pretty <'' >:'' 288 | 289 | : no-pretty 290 | : 291 | $* <'' >:'' 292 | }} 293 | 294 | : multi 295 | : Checks that top-level values are always separated, independently of 296 | : whether or not pretty-printing is enabled. 297 | : 298 | {{ 299 | : pretty 300 | : 301 | $* <>EOI 302 | 1 303 | "abc" 304 | true 305 | false 306 | null 307 | [] 308 | {} 309 | EOI 310 | 311 | : no-pretty 312 | : 313 | $* --pretty <>EOI 314 | 1 315 | 2 316 | EOI 317 | }} 318 | }} 319 | --------------------------------------------------------------------------------