├── .gitattributes
├── .gitignore
├── .gitmodules
├── AUTHORS
├── CONTRIBUTING.md
├── LICENSE
├── NEWS
├── README.md
├── build
    ├── .gitignore
    ├── bootstrap.build
    ├── export.build
    └── root.build
├── buildfile
├── libstud
    └── json
    │   ├── .gitignore
    │   ├── buildfile
    │   ├── event.hxx
    │   ├── export.hxx
    │   ├── parser.cxx
    │   ├── parser.hxx
    │   ├── parser.ixx
    │   ├── pdjson.c
    │   ├── pdjson.h
    │   ├── serializer.cxx
    │   ├── serializer.hxx
    │   ├── serializer.ixx
    │   └── version.hxx.in
├── manifest
├── repositories.manifest
└── tests
    ├── .gitignore
    ├── build
        ├── .gitignore
        ├── bootstrap.build
        └── root.build
    ├── buildfile
    ├── parser
        ├── basics
        │   ├── basics.testscript
        │   ├── buildfile
        │   ├── driver.cxx
        │   └── multi-value.testscript
        ├── buildfile
        ├── fuzz-llvm
        │   ├── README.md
        │   ├── buildfile
        │   └── driver.cxx
        └── peek
        │   ├── buildfile
        │   └── driver.cxx
    └── serializer
        ├── basics
            ├── buildfile
            └── driver.cxx
        ├── buildfile
        ├── fuzz-llvm
            ├── README.md
            ├── buildfile
            ├── convert.cxx
            └── driver.cxx
        └── roundtrip
            ├── buildfile
            ├── driver.cxx
            └── testscript


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # This is a good default: files that are auto-detected by git to be text are
 2 | # converted to the platform-native line ending (LF on Unix, CRLF on Windows)
 3 | # in the working tree and to LF in the repository.
 4 | #
 5 | * text=auto
 6 | 
 7 | # Use `eol=crlf` for files that should have the CRLF line ending both in the
 8 | # working tree (even on Unix) and in the repository.
 9 | #
10 | #*.bat text eol=crlf
11 | 
12 | # Use `eol=lf` for files that should have the LF line ending both in the
13 | # working tree (even on Windows) and in the repository.
14 | #
15 | #*.sh text eol=lf
16 | 
17 | # Use `binary` to make sure certain files are never auto-detected as text.
18 | #
19 | #*.png binary
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .bdep/
 2 | 
 3 | # Local default options files.
 4 | #
 5 | .build2/local/
 6 | 
 7 | # Compiler/linker output.
 8 | #
 9 | *.d
10 | *.t
11 | *.i
12 | *.ii
13 | *.o
14 | *.obj
15 | *.so
16 | *.dll
17 | *.a
18 | *.lib
19 | *.exp
20 | *.pdb
21 | *.ilk
22 | *.exe
23 | *.exe.dlls/
24 | *.exe.manifest
25 | *.pc
26 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "pdjson"]
2 | 	path = pdjson
3 | 	url = https://github.com/boris-kolpackov/pdjson.git
4 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | This file contains information about the libstud authors for copyright
 2 | purposes.
 3 | 
 4 | Note that the libstud/json/pdjson.* files are from the pdjson project that
 5 | released them into the public domain.
 6 | 
 7 | The copyright for the code is held by the contributors of the code. The
 8 | revision history in the version control system is the primary source of
 9 | authorship information for copyright purposes. Contributors that have
10 | requested to also be noted explicitly in this file are listed below:
11 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | The copyright for the code is held by the contributors of the code (see the
 2 | `AUTHORS` file). The code is licensed under permissive open source licensing
 3 | terms (see the `LICENSE` file). When you contribute code to this project, you
 4 | license it under these terms. Before contributing please make sure that these
 5 | terms are acceptable to you (and to your employer(s), if they have rights to
 6 | intellectual property that you create) and that the code being contributed is
 7 | your original creation.
 8 | 
 9 | The revision history in the version control system is the primary source of
10 | authorship information for copyright purposes. If, however, you would like
11 | to also be noted explicitly, please include the appropriate change to the
12 | `AUTHORS` file along with your contribution.
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021-2025 the libstud authors (see the AUTHORS file).
 4 | Copyright (c) dedicated to the public domain for the libstud/json/pdjson.* files.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | During the pre-1.0.0 development see the revision history.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # libstud-json - JSON parser/serializer library for C++
  2 | 
  3 | A portable, dependency-free, MIT-licensed JSON pull-parser/push-serializer
  4 | library for C++.
  5 | 
  6 | The goal of this library is to provide a *pull*-style parser (instead of
  7 | *push*/SAX or DOM) and *push*-style serializer with clean, modern interfaces
  8 | and conforming, well-tested (and well-fuzzed, including the serializer)
  9 | implementations. In particular, pull-style parsers are not very common, and we
 10 | couldn't find any C++ implementations that also satisfy the above
 11 | requirements.
 12 | 
 13 | Typical parser usage (low-level API):
 14 | 
 15 | ```c++
 16 | #include <libstud/json/parser.hxx>
 17 | 
 18 | int main ()
 19 | {
 20 |   using namespace stud::json;
 21 | 
 22 |   parser p (std::cin, "<stdin>");
 23 | 
 24 |   for (event e: p)
 25 |   {
 26 |     switch (e)
 27 |     {
 28 |     case event::begin_object:
 29 |       // ...
 30 |     case event::end_object:
 31 |       // ...
 32 |     case event::name:
 33 |       {
 34 |         const std::string& n (p.name ());
 35 |         // ...
 36 |       }
 37 |     case event::number:
 38 |       {
 39 |         std::int64_t n (p.value<std::int64_t> ());
 40 |         // ...
 41 |       }
 42 |     }
 43 |   }
 44 | }
 45 | ```
 46 | 
 47 | Or using the higher-level API to parse a specific JSON vocabulary:
 48 | 
 49 | ```c++
 50 | #include <libstud/json/parser.hxx>
 51 | 
 52 | int main ()
 53 | {
 54 |   using namespace stud::json;
 55 | 
 56 |   parser p (std::cin, "<stdin>");
 57 | 
 58 |   p.next_expect (event::begin_object);
 59 |   {
 60 |     std::string planet (p.next_expect_member_string ("planet"));
 61 | 
 62 |     p.next_expect_member_array ("measurements");
 63 |     while (p.next_expect (event::number, event::end_array))
 64 |     {
 65 |       std::uint64 m (p.value<std::uint64> ());
 66 |     }
 67 |   }
 68 |   p.next_expect (event::end_object);
 69 | }
 70 | ```
 71 | 
 72 | See the [`libstud/json/parser.hxx`][parser.hxx] header for the parser
 73 | interface details and the [`libstud/json/event.hxx`][event.hxx] header for the
 74 | complete list of events.
 75 | 
 76 | Typical serializer usage:
 77 | 
 78 | ```c++
 79 | #include <libstud/json/serializer.hxx>
 80 | 
 81 | int main ()
 82 | {
 83 |   using namespace stud::json;
 84 | 
 85 |   stream_serializer s (std::cout);
 86 | 
 87 |   s.begin_object ();
 88 |   s.member ("planet", "Venus");
 89 |   s.member_name ("measurement");
 90 |   s.begin_array ();
 91 |   s.value (123);
 92 |   s.value (234);
 93 |   s.value (345);
 94 |   s.end_array ();
 95 |   s.end_object ();
 96 | }
 97 | ```
 98 | 
 99 | See the [`libstud/json/serializer.hxx`][serializer.hxx] header for the
100 | serializer interface details.
101 | 
102 | See the [`NEWS`][news] file for changes and the
103 | [`cppget.org/libstud-json`][pkg] package page for build status.
104 | 
105 | [event.hxx]:      https://github.com/libstud/libstud-json/blob/master/libstud/json/event.hxx
106 | [parser.hxx]:     https://github.com/libstud/libstud-json/blob/master/libstud/json/parser.hxx
107 | [serializer.hxx]: https://github.com/libstud/libstud-json/blob/master/libstud/json/serializer.hxx
108 | [news]:           https://github.com/libstud/libstud-json/blob/master/NEWS
109 | [pkg]:            https://cppget.org/libstud-json
110 | 


--------------------------------------------------------------------------------
/build/.gitignore:
--------------------------------------------------------------------------------
1 | config.build
2 | root/
3 | bootstrap/
4 | 


--------------------------------------------------------------------------------
/build/bootstrap.build:
--------------------------------------------------------------------------------
1 | project = libstud-json
2 | 
3 | using version
4 | using config
5 | using test
6 | using install
7 | using dist
8 | 


--------------------------------------------------------------------------------
/build/export.build:
--------------------------------------------------------------------------------
1 | $out_root/
2 | {
3 |   include libstud/json/
4 | }
5 | 
6 | export $out_root/libstud/json/$import.target
7 | 


--------------------------------------------------------------------------------
/build/root.build:
--------------------------------------------------------------------------------
 1 | cxx.std = latest
 2 | 
 3 | using cxx
 4 | 
 5 | hxx{*}: extension = hxx
 6 | ixx{*}: extension = ixx
 7 | txx{*}: extension = txx
 8 | cxx{*}: extension = cxx
 9 | 
10 | # Assume headers are importable unless stated otherwise.
11 | #
12 | hxx{*}: cxx.importable = true
13 | 
14 | using c # For the c{} target type.
15 | 
16 | if ($cxx.target.system == 'win32-msvc')
17 |   cc.poptions += -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS
18 | 
19 | if ($cxx.class == 'msvc')
20 |   cc.coptions += /wd4251 /wd4275 /wd4800
21 | 
22 | # The test target for cross-testing (running tests under Wine, etc).
23 | #
24 | test.target = $cxx.target
25 | 


--------------------------------------------------------------------------------
/buildfile:
--------------------------------------------------------------------------------
1 | ./: {*/ -build/ -pdjson/} doc{README.md NEWS} legal{LICENSE AUTHORS} manifest
2 | 
3 | # Don't install tests.
4 | #
5 | tests/: install = false
6 | 


--------------------------------------------------------------------------------
/libstud/json/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated version header.
2 | #
3 | version.hxx
4 | 


--------------------------------------------------------------------------------
/libstud/json/buildfile:
--------------------------------------------------------------------------------
 1 | import intf_libs = libstud-optional%lib{stud-optional}
 2 | 
 3 | lib{stud-json}: {hxx ixx txx cxx}{** -version} hxx{version} $intf_libs
 4 | 
 5 | # The pdjson.c file is included into parser.cxx so exclude it from the
 6 | # compilation. Note that pdjson.h must be installed since it is included in
 7 | # parser.hxx.
 8 | #
 9 | lib{stud-json}: c{pdjson}: include = adhoc
10 | lib{stud-json}: h{pdjson}
11 | 
12 | # Include the generated version header into the distribution (so that we don't
13 | # pick up an installed one) and don't remove it when cleaning in src (so that
14 | # clean results in a state identical to distributed).
15 | #
16 | hxx{version}: in{version} $src_root/manifest
17 | {
18 |   dist  = true
19 |   clean = ($src_root != $out_root)
20 | }
21 | 
22 | hxx{export}@./: cxx.importable = false
23 | 
24 | # Build options.
25 | #
26 | cxx.poptions =+ "-I$out_root" "-I$src_root"
27 | 
28 | obja{*}: cxx.poptions += -DLIBSTUD_JSON_STATIC_BUILD
29 | objs{*}: cxx.poptions += -DLIBSTUD_JSON_SHARED_BUILD
30 | 
31 | # Export options.
32 | #
33 | lib{stud-json}:
34 | {
35 |   cxx.export.poptions = "-I$out_root" "-I$src_root"
36 |   cxx.export.libs = $intf_libs
37 | }
38 | 
39 | liba{stud-json}: cxx.export.poptions += -DLIBSTUD_JSON_STATIC
40 | libs{stud-json}: cxx.export.poptions += -DLIBSTUD_JSON_SHARED
41 | 
42 | # For pre-releases use the complete version to make sure they cannot be used
43 | # in place of another pre-release or the final version. See the version module
44 | # for details on the version.* variable values.
45 | #
46 | if $version.pre_release
47 |   lib{stud-json}: bin.lib.version = @"-$version.project_id"
48 | else
49 |   lib{stud-json}: bin.lib.version = @"-$version.major.$version.minor"
50 | 
51 | # Install into the libstud/json/ subdirectory of, say, /usr/include/
52 | # recreating subdirectories.
53 | #
54 | {h hxx ixx txx}{*}:
55 | {
56 |   install         = include/libstud/json/
57 |   install.subdirs = true
58 | }
59 | 


--------------------------------------------------------------------------------
/libstud/json/event.hxx:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstddef>
 4 | #include <cstdint>
 5 | 
 6 | namespace stud
 7 | {
 8 |   namespace json
 9 |   {
10 |     // Parsing/serialization event.
11 |     //
12 |     enum class event: std::uint8_t
13 |     {
14 |       begin_object = 1,
15 |       end_object,
16 |       begin_array,
17 |       end_array,
18 |       name,
19 |       string,
20 |       number,
21 |       boolean,
22 |       null
23 |     };
24 | 
25 |     constexpr std::size_t event_count = 9;
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/libstud/json/export.hxx:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // Normally we don't export class templates (but do complete specializations),
 4 | // inline functions, and classes with only inline member functions. Exporting
 5 | // classes that inherit from non-exported/imported bases (e.g., std::string)
 6 | // will end up badly. The only known workarounds are to not inherit or to not
 7 | // export. Also, MinGW GCC doesn't like seeing non-exported functions being
 8 | // used before their inline definition. The workaround is to reorder code. In
 9 | // the end it's all trial and error.
10 | 
11 | #if defined(LIBSTUD_JSON_STATIC)         // Using static.
12 | #  define LIBSTUD_JSON_SYMEXPORT
13 | #elif defined(LIBSTUD_JSON_STATIC_BUILD) // Building static.
14 | #  define LIBSTUD_JSON_SYMEXPORT
15 | #elif defined(LIBSTUD_JSON_SHARED)       // Using shared.
16 | #  ifdef _WIN32
17 | #    define LIBSTUD_JSON_SYMEXPORT __declspec(dllimport)
18 | #  else
19 | #    define LIBSTUD_JSON_SYMEXPORT
20 | #  endif
21 | #elif defined(LIBSTUD_JSON_SHARED_BUILD) // Building shared.
22 | #  ifdef _WIN32
23 | #    define LIBSTUD_JSON_SYMEXPORT __declspec(dllexport)
24 | #  else
25 | #    define LIBSTUD_JSON_SYMEXPORT
26 | #  endif
27 | #else
28 | // If none of the above macros are defined, then we assume we are being used
29 | // by some third-party build system that cannot/doesn't signal the library
30 | // type. Note that this fallback works for both static and shared but in case
31 | // of shared will be sub-optimal compared to having dllimport.
32 | //
33 | #  define LIBSTUD_JSON_SYMEXPORT         // Using static or shared.
34 | #endif
35 | 


--------------------------------------------------------------------------------
/libstud/json/parser.cxx:
--------------------------------------------------------------------------------
  1 | #define PDJSON_SYMEXPORT static // See below.
  2 | 
  3 | #include <libstud/json/parser.hxx>
  4 | 
  5 | #include <istream>
  6 | 
  7 | // There is an issue (segfault) with using std::current_exception() and
  8 | // std::rethrow_exception() with older versions of libc++ on Linux. While the
  9 | // exact root cause hasn't been determined, the suspicion is that something
 10 | // gets messed up if we "smuggle" std::exception_ptr through extern "C" call
 11 | // frames (we cannot even destroy such an exception without a segfault). We
 12 | // also could not determine in which version exactly this has been fixed but
 13 | // we know that libc++ 6.0.0 doesn't appear to have this issue (though we are
 14 | // not entirely sure the issue is (only) in libc++; libgcc_s could also be
 15 | // involved).
 16 | //
 17 | // The workaround is to just catch (and note) the exception and then throw a
 18 | // new instance of generic std::istream::failure. In order not to drag the
 19 | // below test into the header, we wrap exception_ptr with optional<> and use
 20 | // NULL to indicate the presence of the exception when the workaround is
 21 | // required.
 22 | //
 23 | // Note that if/when we drop this workaround, we should also get rid of
 24 | // optional<> in stream::exception member.
 25 | //
 26 | #undef LIBSTUD_JSON_NO_EXCEPTION_PTR
 27 | 
 28 | #if defined (__linux__) && defined(__clang__)
 29 | #  if __has_include(<__config>)
 30 | #    include <__config> // _LIBCPP_VERSION
 31 | #    if _LIBCPP_VERSION < 6000
 32 | #      define LIBSTUD_JSON_NO_EXCEPTION_PTR 1
 33 | #    endif
 34 | #  endif
 35 | #endif
 36 | 
 37 | namespace stud
 38 | {
 39 |   namespace json
 40 |   {
 41 |     using namespace std;
 42 | 
 43 |     parser::
 44 |     ~parser ()
 45 |     {
 46 |       json_close (impl_);
 47 |     }
 48 | 
 49 |     static int
 50 |     stream_get (void* x)
 51 |     {
 52 |       auto& s (*static_cast<parser::stream*> (x));
 53 | 
 54 |       // In the multi-value mode reading of whitespaces/separators is split
 55 |       // between our code and pdjson's. As a result, these functions may end
 56 |       // up being called more than once after EOF is reached. Which is
 57 |       // something iostream does not handle gracefully.
 58 |       //
 59 |       if (!s.is->eof ())
 60 |       {
 61 |         try
 62 |         {
 63 |           // We first peek not to trip failbit on EOF.
 64 |           //
 65 |           if (s.is->peek () != istream::traits_type::eof ())
 66 |             return static_cast<char> (s.is->get ());
 67 |         }
 68 |         catch (...)
 69 |         {
 70 | #ifndef LIBSTUD_JSON_NO_EXCEPTION_PTR
 71 |           s.exception = current_exception ();
 72 | #else
 73 |           s.exception = nullptr;
 74 | #endif
 75 |         }
 76 |       }
 77 | 
 78 |       return EOF;
 79 |     }
 80 | 
 81 |     static int
 82 |     stream_peek (void* x)
 83 |     {
 84 |       auto& s (*static_cast<parser::stream*> (x));
 85 | 
 86 |       if (!s.is->eof ())
 87 |       {
 88 |         try
 89 |         {
 90 |           auto c (s.is->peek ());
 91 |           if (c != istream::traits_type::eof ())
 92 |             return static_cast<char> (c);
 93 |         }
 94 |         catch (...)
 95 |         {
 96 | #ifndef LIBSTUD_JSON_NO_EXCEPTION_PTR
 97 |           s.exception = current_exception ();
 98 | #else
 99 |           s.exception = nullptr;
100 | #endif
101 |         }
102 |       }
103 | 
104 |       return EOF;
105 |     }
106 | 
107 |     // NOTE: watch out for exception safety (specifically, doing anything that
108 |     // might throw after opening the stream).
109 |     //
110 |     parser::
111 |     parser (istream& is, const char* n, bool mv, const char* sep) noexcept
112 |         : input_name (n),
113 |           stream_ {&is, nullopt},
114 |           multi_value_ (mv),
115 |           separators_ (sep),
116 |           raw_s_ (nullptr),
117 |           raw_n_ (0)
118 |     {
119 |       json_open_user (impl_, &stream_get, &stream_peek, &stream_);
120 |       json_set_streaming (impl_, multi_value_);
121 |     }
122 | 
123 |     parser::
124 |     parser (const void* t,
125 |             size_t s,
126 |             const char* n,
127 |             bool mv,
128 |             const char* sep) noexcept
129 |         : input_name (n),
130 |           stream_ {nullptr, nullopt},
131 |           multi_value_ (mv),
132 |           separators_ (sep),
133 |           raw_s_ (nullptr),
134 |           raw_n_ (0)
135 |     {
136 |       json_open_buffer (impl_, t, s);
137 |       json_set_streaming (impl_, multi_value_);
138 |     }
139 | 
140 |     optional<event> parser::
141 |     next ()
142 |     {
143 |       name_p_ = value_p_ = location_p_ = false;
144 | 
145 |       // Note that for now we don't worry about the state of the parser if
146 |       // next_impl() throws assuming it is not going to be reused.
147 |       //
148 |       if (peeked_)
149 |       {
150 |         parsed_ = peeked_;
151 |         peeked_ = nullopt;
152 |       }
153 |       else
154 |         parsed_ = next_impl ();
155 | 
156 |       return translate (*parsed_);
157 |     }
158 | 
159 |     optional<event> parser::
160 |     peek ()
161 |     {
162 |       if (!peeked_)
163 |       {
164 |         if (parsed_)
165 |         {
166 |           cache_parsed_data ();
167 |           cache_parsed_location ();
168 |         }
169 |         peeked_ = next_impl ();
170 |       }
171 |       return translate (*peeked_);
172 |     }
173 | 
174 |     static inline const char*
175 |     event_name (event e)
176 |     {
177 |       switch (e)
178 |       {
179 |       case event::begin_object: return "beginning of object";
180 |       case event::end_object:   return "end of object";
181 |       case event::begin_array:  return "beginning of array";
182 |       case event::end_array:    return "end of array";
183 |       case event::name:         return "member name";
184 |       case event::string:       return "string value";
185 |       case event::number:       return "numeric value";
186 |       case event::boolean:      return "boolean value";
187 |       case event::null:         return "null value";
188 |       }
189 | 
190 |       return "";
191 |     }
192 | 
193 |     bool parser::
194 |     next_expect (event p, optional<event> s)
195 |     {
196 |       optional<event> e (next ());
197 |       bool r;
198 |       if (e && ((r = *e == p) || (s && *e == *s)))
199 |         return r;
200 | 
201 |       string d ("expected ");
202 |       d += event_name (p);
203 | 
204 |       if (s)
205 |       {
206 |         d += " or ";
207 |         d += event_name (*s);
208 |       }
209 | 
210 |       if (e)
211 |       {
212 |         d += " instead of ";
213 |         d += event_name (*e);
214 |       }
215 | 
216 |       throw invalid_json_input (input_name != nullptr ? input_name : "",
217 |                                 line (),
218 |                                 column (),
219 |                                 position (),
220 |                                 move (d));
221 |     }
222 | 
223 |     void parser::
224 |     next_expect_name (const char* n, bool su)
225 |     {
226 |       for (;;)
227 |       {
228 |         next_expect (event::name);
229 | 
230 |         if (name () == n)
231 |           return;
232 | 
233 |         if (!su)
234 |           break;
235 | 
236 |         next_expect_value_skip ();
237 |       }
238 | 
239 |       string d ("expected object member name '");
240 |       d += n;
241 |       d += "' instead of '";
242 |       d += name ();
243 |       d += '\'';
244 | 
245 |       throw invalid_json_input (input_name != nullptr ? input_name : "",
246 |                                 line (),
247 |                                 column (),
248 |                                 position (),
249 |                                 move (d));
250 |     }
251 | 
252 |     void parser::
253 |     next_expect_value_skip ()
254 |     {
255 |       optional<event> e (next ());
256 | 
257 |       if (e)
258 |       {
259 |         switch (*e)
260 |         {
261 |         case event::begin_object:
262 |         case event::begin_array:
263 |           {
264 |             // Skip until matching end_object/array keeping track of nesting.
265 |             // We are going to rely on the fact that we should either get such
266 |             // an event or next() should throw.
267 |             //
268 |             event be (*e);
269 |             event ee (be == event::begin_object
270 |                       ? event::end_object
271 |                       : event::end_array);
272 | 
273 |             for (size_t n (0);; )
274 |             {
275 |               event e (*next ());
276 | 
277 |               if (e == ee)
278 |               {
279 |                 if (n == 0)
280 |                   break;
281 | 
282 |                 --n;
283 |               }
284 |               else if (e == be)
285 |                 ++n;
286 |             }
287 | 
288 |             return;
289 |           }
290 |         case event::string:
291 |         case event::number:
292 |         case event::boolean:
293 |         case event::null:
294 |           return;
295 |         case event::name:
296 |         case event::end_object:
297 |         case event::end_array:
298 |           break;
299 |         }
300 |       }
301 | 
302 |       string d ("expected value");
303 | 
304 |       if (e)
305 |       {
306 |         d += " instead of ";
307 |         d += event_name (*e);
308 |       }
309 | 
310 |       throw invalid_json_input (input_name != nullptr ? input_name : "",
311 |                                 line (),
312 |                                 column (),
313 |                                 position (),
314 |                                 move (d));
315 |     }
316 | 
317 |     std::uint64_t parser::
318 |     line () const noexcept
319 |     {
320 |       if (!location_p_)
321 |       {
322 |         if (!parsed_)
323 |           return 0;
324 | 
325 |         assert (!peeked_);
326 | 
327 |         return static_cast<uint64_t> (
328 |             json_get_lineno (const_cast<json_stream*> (impl_)));
329 |       }
330 | 
331 |       return line_;
332 |     }
333 | 
334 |     std::uint64_t parser::
335 |     column () const noexcept
336 |     {
337 |       if (!location_p_)
338 |       {
339 |         if (!parsed_)
340 |           return 0;
341 | 
342 |         assert (!peeked_);
343 | 
344 |         return static_cast<uint64_t> (
345 |             json_get_column (const_cast<json_stream*> (impl_)));
346 |       }
347 | 
348 |       return column_;
349 |     }
350 | 
351 |     std::uint64_t parser::
352 |     position () const noexcept
353 |     {
354 |       if (!location_p_)
355 |       {
356 |         if (!parsed_)
357 |           return 0;
358 | 
359 |         assert (!peeked_);
360 | 
361 |         return static_cast<uint64_t> (
362 |             json_get_position (const_cast<json_stream*> (impl_)));
363 |       }
364 | 
365 |       return position_;
366 |     }
367 | 
368 |     json_type parser::
369 |     next_impl ()
370 |     {
371 |       raw_s_ = nullptr;
372 |       raw_n_ = 0;
373 |       json_type e;
374 | 
375 |       // Read characters between values skipping required separators and JSON
376 |       // whitespaces. Return whether a required separator was encountered as
377 |       // well as the first non-separator/whitespace character (which, if EOF,
378 |       // should trigger a check for input/output errors).
379 |       //
380 |       // Note that the returned non-separator will not have been extracted
381 |       // from the input (so position, column, etc. will still refer to its
382 |       // predecessor).
383 |       //
384 |       auto skip_separators = [this] () -> pair<bool, int>
385 |       {
386 |         bool r (separators_ == nullptr);
387 | 
388 |         int c;
389 |         for (; (c = json_source_peek (impl_)) != EOF; json_source_get (impl_))
390 |         {
391 |           // User separator.
392 |           //
393 |           if (separators_ != nullptr && *separators_ != '\0')
394 |           {
395 |             if (strchr (separators_, c) != nullptr)
396 |             {
397 |               r = true;
398 |               continue;
399 |             }
400 |           }
401 | 
402 |           // JSON separator.
403 |           //
404 |           if (json_isspace (c))
405 |           {
406 |             if (separators_ != nullptr && *separators_ == '\0')
407 |               r = true;
408 | 
409 |             continue;
410 |           }
411 | 
412 |           break;
413 |         }
414 | 
415 |         return make_pair (r, c);
416 |       };
417 | 
418 |       // In the multi-value mode skip any instances of required separators
419 |       // (and any other JSON whitespace) preceding the first JSON value.
420 |       //
421 |       if (multi_value_ && !parsed_ && !peeked_)
422 |       {
423 |         if (skip_separators ().second == EOF && stream_.is != nullptr)
424 |         {
425 |           if (stream_.exception)   goto fail_rethrow;
426 |           if (stream_.is->fail ()) goto fail_stream;
427 |         }
428 |       }
429 | 
430 |       e = json_next (impl_);
431 | 
432 |       // First check for a pending input/output error.
433 |       //
434 |       if (stream_.is != nullptr)
435 |       {
436 |         if (stream_.exception)   goto fail_rethrow;
437 |         if (stream_.is->fail ()) goto fail_stream;
438 |       }
439 | 
440 |       // There are two ways to view separation between two values: as following
441 |       // the first value or as preceding the second value. And one aspect that
442 |       // is determined by this is whether a separation violation is a problem
443 |       // with the first value or with the second, which becomes important if
444 |       // the user bails out before parsing the second value.
445 |       //
446 |       // Consider these two unseparated value (yes, in JSON they are two
447 |       // values, leading zeros are not allowed in JSON numbers):
448 |       //
449 |       // 01
450 |       //
451 |       // If the user bails out after parsing 0 in a stream that should have
452 |       // been newline-delimited, they most likely would want to get an error
453 |       // since this is most definitely an invalid value rather than two
454 |       // values that are not properly separated. So in this light we handle
455 |       // separators at the end of the first value.
456 |       //
457 |       switch (e)
458 |       {
459 |       case JSON_DONE:
460 |         {
461 |           // Deal with the following value separators.
462 |           //
463 |           // Note that we must not do this for the second JSON_DONE (or the
464 |           // first one in case there are no values) that signals the end of
465 |           // input.
466 |           //
467 |           if (multi_value_         &&
468 |               (parsed_ || peeked_) &&
469 |               (peeked_ ? *peeked_ : *parsed_) != JSON_DONE)
470 |           {
471 |             auto p (skip_separators ());
472 | 
473 |             if (p.second == EOF && stream_.is != nullptr)
474 |             {
475 |               if (stream_.exception)   goto fail_rethrow;
476 |               if (stream_.is->fail ()) goto fail_stream;
477 |             }
478 | 
479 |             // Note that we don't require separators after the last value.
480 |             //
481 |             if (!p.first && p.second != EOF)
482 |             {
483 |               json_source_get (impl_); // Consume to update column number.
484 |               goto fail_separation;
485 |             }
486 | 
487 |             json_reset (impl_);
488 |           }
489 |           break;
490 |         }
491 |       case JSON_ERROR: goto fail_json;
492 |       case JSON_STRING:
493 |       case JSON_NUMBER:
494 |         raw_s_ = json_get_string (impl_, &raw_n_);
495 |         raw_n_--; // Includes terminating `\0`.
496 |         break;
497 |       case JSON_TRUE:  raw_s_ = "true";  raw_n_ = 4; break;
498 |       case JSON_FALSE: raw_s_ = "false"; raw_n_ = 5; break;
499 |       case JSON_NULL:  raw_s_ = "null";  raw_n_ = 4; break;
500 |       default: break;
501 |       }
502 | 
503 |       return e;
504 | 
505 |     fail_json:
506 |       throw invalid_json_input (
507 |           input_name != nullptr ? input_name : "",
508 |           static_cast<uint64_t> (json_get_lineno (impl_)),
509 |           static_cast<uint64_t> (json_get_column (impl_)),
510 |           static_cast<uint64_t> (json_get_position (impl_)),
511 |           json_get_error (impl_));
512 | 
513 |     fail_separation:
514 |       throw invalid_json_input (
515 |           input_name != nullptr ? input_name : "",
516 |           static_cast<uint64_t> (json_get_lineno (impl_)),
517 |           static_cast<uint64_t> (json_get_column (impl_)),
518 |           static_cast<uint64_t> (json_get_position (impl_)),
519 |           "missing separator between JSON values");
520 | 
521 |     fail_stream:
522 |       throw invalid_json_input (
523 |           input_name != nullptr ? input_name : "",
524 |           static_cast<uint64_t> (json_get_lineno (impl_)),
525 |           static_cast<uint64_t> (json_get_column (impl_)),
526 |           static_cast<uint64_t> (json_get_position (impl_)),
527 |           "unable to read JSON input text");
528 | 
529 |     fail_rethrow:
530 | #ifndef LIBSTUD_JSON_NO_EXCEPTION_PTR
531 |       rethrow_exception (move (*stream_.exception));
532 | #else
533 |       throw istream::failure ("unable to read");
534 | #endif
535 |     }
536 | 
537 |     optional<event> parser::
538 |     translate (json_type e) const noexcept
539 |     {
540 |       switch (e)
541 |       {
542 |       case JSON_DONE: return nullopt;
543 |       case JSON_OBJECT: return event::begin_object;
544 |       case JSON_OBJECT_END: return event::end_object;
545 |       case JSON_ARRAY: return event::begin_array;
546 |       case JSON_ARRAY_END: return event::end_array;
547 |       case JSON_STRING:
548 |         {
549 |           // This can be a value or, inside an object, a name from the
550 |           // name/value pair.
551 |           //
552 |           size_t n;
553 |           return json_get_context (const_cast<json_stream*> (impl_), &n) ==
554 |                              JSON_OBJECT &&
555 |                          n % 2 == 1
556 |                      ? event::name
557 |                      : event::string;
558 |         }
559 |       case JSON_NUMBER: return event::number;
560 |       case JSON_TRUE: return event::boolean;
561 |       case JSON_FALSE: return event::boolean;
562 |       case JSON_NULL: return event::null;
563 |       case JSON_ERROR: assert (false); // Should've been handled by caller.
564 |       }
565 | 
566 |       return nullopt; // Should never reach.
567 |     }
568 | 
569 |     void parser::
570 |     cache_parsed_data ()
571 |     {
572 |       name_p_ = value_p_ = false;
573 |       if (const optional<event> e = translate (*parsed_))
574 |       {
575 |         if (e == event::name)
576 |         {
577 |           name_.assign (raw_s_, raw_n_);
578 |           name_p_ = true;
579 |         }
580 |         else if (value_event (e))
581 |         {
582 |           value_.assign (raw_s_, raw_n_);
583 |           value_p_ = true;
584 |         }
585 |       }
586 |     }
587 | 
588 |     void parser::
589 |     cache_parsed_location () noexcept
590 |     {
591 |       line_ = static_cast<uint64_t> (json_get_lineno (impl_));
592 |       column_ = static_cast<uint64_t> (json_get_column (impl_));
593 |       position_ = static_cast<uint64_t> (json_get_position (impl_));
594 |       location_p_ = true;
595 |     }
596 | 
597 |     bool parser::
598 |     value_event (optional<event> e) noexcept
599 |     {
600 |       if (!e)
601 |         return false;
602 | 
603 |       switch (*e)
604 |       {
605 |       case event::string:
606 |       case event::number:
607 |       case event::boolean:
608 |       case event::null:
609 |         return true;
610 |       default:
611 |         return false;
612 |       }
613 |     }
614 | 
615 |     [[noreturn]] void parser::
616 |     throw_invalid_value (const char* type, const char* v, size_t n) const
617 |     {
618 |       string d (string ("invalid ") + type + " value: '");
619 |       d.append (v, n);
620 |       d += '\'';
621 | 
622 |       throw invalid_json_input (input_name != nullptr ? input_name : "",
623 |                                 line (),
624 |                                 column (),
625 |                                 position (),
626 |                                 move (d));
627 |     }
628 |   } // namespace json
629 | } // namespace stud
630 | 
631 | // Include the implementation into our translation unit (instead of compiling
632 | // it separately) to (hopefully) get function inlining without LTO.
633 | //
634 | // Let's keep it last since the implementation defines a couple of macros.
635 | //
636 | #if defined(__clang__) || defined(__GNUC__)
637 | #  pragma GCC diagnostic ignored "-Wunused-function"
638 | #endif
639 | 
640 | extern "C"
641 | {
642 | #define PDJSON_STACK_INC 16
643 | #define PDJSON_STACK_MAX 2048
644 | #include "pdjson.c"
645 | }
646 | 


--------------------------------------------------------------------------------
/libstud/json/parser.hxx:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <iosfwd>
  4 | #include <string>
  5 | #include <cstddef>   // size_t
  6 | #include <cstdint>   // uint64_t
  7 | #include <utility>   // pair
  8 | #include <exception> // exception_ptr
  9 | #include <stdexcept> // invalid_argument
 10 | 
 11 | #include <libstud/optional.hxx> // stud::optional is std::optional or similar.
 12 | 
 13 | #include <libstud/json/event.hxx>
 14 | 
 15 | #include <libstud/json/pdjson.h> // Implementation details.
 16 | 
 17 | #include <libstud/json/export.hxx>
 18 | 
 19 | namespace stud
 20 | {
 21 |   // Using the RFC8259 terminology: JSON (input) text, JSON value, object
 22 |   // member.
 23 |   //
 24 |   namespace json
 25 |   {
 26 |     class invalid_json_input: public std::invalid_argument
 27 |     {
 28 |     public:
 29 |       std::string   name;
 30 |       std::uint64_t line;
 31 |       std::uint64_t column;
 32 |       std::uint64_t position;
 33 | 
 34 |       invalid_json_input (std::string name,
 35 |                           std::uint64_t line,
 36 |                           std::uint64_t column,
 37 |                           std::uint64_t position,
 38 |                           const std::string& description);
 39 | 
 40 |       invalid_json_input (std::string name,
 41 |                           std::uint64_t line,
 42 |                           std::uint64_t column,
 43 |                           std::uint64_t position,
 44 |                           const char* description);
 45 |     };
 46 | 
 47 |     class LIBSTUD_JSON_SYMEXPORT parser
 48 |     {
 49 |     public:
 50 |       const char* input_name;
 51 | 
 52 |       // Construction.
 53 |       //
 54 | 
 55 |       // Parse JSON input text from std::istream.
 56 |       //
 57 |       // The name argument is used to identify the input being parsed. Note
 58 |       // that the stream, name, and separators are kept as references so they
 59 |       // must outlive the parser instance.
 60 |       //
 61 |       // If stream exceptions are enabled then the std::ios_base::failure
 62 |       // exception is used to report input/output errors (badbit and failbit).
 63 |       // Otherwise, those are reported as the invalid_json_input exception.
 64 |       //
 65 |       // If multi_value is true, enable the multi-value mode in which case the
 66 |       // input stream may contain multiple JSON values (more precisely, zero
 67 |       // or more). If false (the default), parsing will fail unless there is
 68 |       // exactly one JSON value in the input stream.
 69 |       //
 70 |       // If multi_value is true, the separators argument specifies the
 71 |       // required separator characters between JSON values. At least one of
 72 |       // them must be present between every pair of JSON values (in addition
 73 |       // to any number of JSON whitespaces). No separators are required after
 74 |       // the last JSON value (but any found will be skipped).
 75 |       //
 76 |       // Specifically, if it is NULL, then no separation is required (that is,
 77 |       // both `{...}{...}` and `{...}  {...}` would be valid). If it is empty,
 78 |       // then at least one JSON whitespace is required. And if it is non-
 79 |       // empty, then at least one of its characters must be present (for
 80 |       // example, "\n\t" would require at least one newline or TAB character
 81 |       // between JSON values).
 82 |       //
 83 |       // Note that a separator need not be valid JSON whitespace: any
 84 |       // character is acceptable (though it probably shouldn't be an object,
 85 |       // array, or string delimiter and should not occur within a non-self-
 86 |       // delimited top-level value, such as `true`, `false`, `null`, or a
 87 |       // number). All instances of required separators before and after a
 88 |       // value are skipped. Therefore JSON Text Sequences (RFC 7464; AKA
 89 |       // Record Separator-delimited JSON), which requires the RS (0x1E)
 90 |       // character before each value, can be handled as well.
 91 |       //
 92 |       parser (std::istream&,
 93 |               const std::string& name,
 94 |               bool multi_value = false,
 95 |               const char* separators = nullptr) noexcept;
 96 | 
 97 |       parser (std::istream&,
 98 |               const char* name,
 99 |               bool multi_value = false,
100 |               const char* separators = nullptr) noexcept;
101 | 
102 |       parser (std::istream&,
103 |               std::string&&,
104 |               bool = false,
105 |               const char* = nullptr) = delete;
106 | 
107 |       // Parse a memory buffer that contains the entire JSON input text.
108 |       //
109 |       // The name argument is used to identify the input being parsed. Note
110 |       // that the buffer, name, and separators are kept as references so they
111 |       // must outlive the parser instance.
112 |       //
113 |       parser (const void* text,
114 |               std::size_t size,
115 |               const std::string& name,
116 |               bool multi_value = false,
117 |               const char* separators = nullptr) noexcept;
118 | 
119 |       parser (const void* text,
120 |               std::size_t size,
121 |               const char* name,
122 |               bool multi_value = false,
123 |               const char* separators = nullptr) noexcept;
124 | 
125 |       parser (const void*,
126 |               std::size_t,
127 |               std::string&&,
128 |               bool = false,
129 |               const char* = nullptr) = delete;
130 | 
131 |       // Similar to the above but parse a string.
132 |       //
133 |       parser (const std::string& text,
134 |               const std::string& name,
135 |               bool multi_value = false,
136 |               const char* separators = nullptr) noexcept;
137 | 
138 |       parser (const std::string& text,
139 |               const char* name,
140 |               bool multi_value = false,
141 |               const char* separators = nullptr) noexcept;
142 | 
143 |       parser (const std::string&,
144 |               std::string&&,
145 |               bool = false,
146 |               const char* = nullptr) = delete;
147 | 
148 |       // Similar to the above but parse a C-string.
149 |       //
150 |       parser (const char* text,
151 |               const std::string& name,
152 |               bool multi_value = false,
153 |               const char* separators = nullptr) noexcept;
154 | 
155 |       parser (const char* text,
156 |               const char* name,
157 |               bool multi_value = false,
158 |               const char* separators = nullptr) noexcept;
159 | 
160 |       parser (const char*,
161 |               std::string&&,
162 |               bool = false,
163 |               const char* = nullptr) = delete;
164 | 
165 |       parser (parser&&) = delete;
166 |       parser (const parser&) = delete;
167 | 
168 |       parser& operator= (parser&&) = delete;
169 |       parser& operator= (const parser&) = delete;
170 | 
171 |       // Event iteration.
172 |       //
173 | 
174 |       // Return the next event or nullopt if end of input is reached.
175 |       //
176 |       // In the single-value parsing mode (default) the parsing code could
177 |       // look like this:
178 |       //
179 |       //     while (optional<event> e = p.next ())
180 |       //     {
181 |       //       switch (*e)
182 |       //       {
183 |       //         // ...
184 |       //       }
185 |       //     }
186 |       //
187 |       // In the multi-value mode the parser additionally returns nullopt after
188 |       // every JSON value parsed (so there will be two nullopt's after the
189 |       // last JSON value, the second indicating the end of input).
190 |       //
191 |       // One way to perform multi-value parsing is with the help of the peek()
192 |       // function (see below):
193 |       //
194 |       //     while (p.peek ())
195 |       //     {
196 |       //       while (optional<event> e = p.next ())
197 |       //       {
198 |       //         switch (*e)
199 |       //         {
200 |       //           //...
201 |       //         }
202 |       //       }
203 |       //     }
204 |       //
205 |       // Note that while the single-value mode will always parse exactly one
206 |       // value, the multi-value mode will accept zero values in which case a
207 |       // single nullopt is returned.
208 |       //
209 |       optional<event>
210 |       next ();
211 | 
212 |       // The range-based for loop support.
213 |       //
214 |       // In the single-value parsing mode (default) the parsing code could
215 |       // look like this:
216 |       //
217 |       //     for (event e: p)
218 |       //     {
219 |       //       switch (e)
220 |       //       {
221 |       //         //...
222 |       //       }
223 |       //     }
224 |       //
225 |       // And in the multi-value mode (see next() for more information) like
226 |       // this:
227 |       //
228 |       //     while (p.peek ())
229 |       //     {
230 |       //       for (event e: p)
231 |       //       {
232 |       //         switch (e)
233 |       //         {
234 |       //           //...
235 |       //         }
236 |       //       }
237 |       //     }
238 |       //
239 |       // Note that generally, the iterator interface doesn't make much sense
240 |       // for the parser so for now we have an implementation that is just
241 |       // enough for the range-based for.
242 |       //
243 |       struct iterator;
244 | 
245 |       iterator begin () {return iterator (this, next ());}
246 |       iterator end ()   {return iterator (nullptr, nullopt);}
247 | 
248 |       // Return the next event without considering it parsed. In other words,
249 |       // after this call, any subsequent calls to peek() and the next call to
250 |       // next() (if any) will all return the same event.
251 |       //
252 |       // Note that the name, value, and line corresponding to the peeked event
253 |       // are not accessible with name(), value() and line(); these functions
254 |       // will still return values corresponding to the most recent call to
255 |       // next(). The peeked values, however, can be accessed in the raw form
256 |       // using data().
257 |       //
258 |       optional<event>
259 |       peek ();
260 | 
261 | 
262 |       // Event data access.
263 |       //
264 | 
265 |       // Return the object member name.
266 |       //
267 |       const std::string&
268 |       name ();
269 | 
270 |       // Any value (string, number, boolean, and null) can be retrieved as a
271 |       // string. Calling this function after any non-value events is illegal.
272 |       //
273 |       // Note that the value is returned as a non-const string reference and
274 |       // you are allowed to move the value out of it. However, this should not
275 |       // be done unnecessarily or in cases where the small string optimization
276 |       // is likely since the string's buffer is reused to store subsequent
277 |       // values.
278 |       //
279 |       std::string&
280 |       value ();
281 | 
282 |       // Convert the value to an integer, floating point, or bool. Throw
283 |       // invalid_json_input if the conversion is impossible without a loss.
284 |       //
285 |       template <typename T>
286 |       T
287 |       value () const;
288 | 
289 |       // Return the value or object member name in the raw form.
290 |       //
291 |       // Calling this function on non-value/name events is legal in which case
292 |       // NULL is returned. Note also that the returned data corresponds to the
293 |       // most recent event, whether peeked or parsed.
294 |       //
295 |       std::pair<const char*, std::size_t>
296 |       data () const {return std::make_pair (raw_s_, raw_n_);}
297 | 
298 | 
299 |       // Higher-level API suitable for parsing specific JSON vocabularies.
300 |       //
301 |       // The API summary:
302 |       //
303 |       // void next_expect (event);
304 |       // bool next_expect (event primary, event secondary);
305 |       //
306 |       // void next_expect_name (string name, bool skip_unknown = false);
307 |       //
308 |       // std::string& next_expect_string    ();
309 |       // T            next_expect_string<T> ();
310 |       // std::string& next_expect_number    ();
311 |       // T            next_expect_number<T> ();
312 |       // std::string& next_expect_boolean   ();
313 |       // T            next_expect_boolean<T>();
314 |       //
315 |       // std::string* next_expect_string_null    ();
316 |       // optional<T>  next_expect_string_null<T> ();
317 |       // std::string* next_expect_number_null    ();
318 |       // optional<T>  next_expect_number_null<T> ();
319 |       // std::string* next_expect_boolean_null   ();
320 |       // optional<T>  next_expect_boolean_null<T>();
321 |       //
322 |       // std::string& next_expect_member_string    (string name, bool = false);
323 |       // T            next_expect_member_string<T> (string name, bool = false);
324 |       // std::string& next_expect_member_number    (string name, bool = false);
325 |       // T            next_expect_member_number<T> (string name, bool = false);
326 |       // std::string& next_expect_member_boolean   (string name, bool = false);
327 |       // T            next_expect_member_boolean<T>(string name, bool = false);
328 |       //
329 |       // std::string* next_expect_member_string_null    (string, bool = false);
330 |       // optional<T>  next_expect_member_string_null<T> (string, bool = false);
331 |       // std::string* next_expect_member_number_null    (string, bool = false);
332 |       // optional<T>  next_expect_member_number_null<T> (string, bool = false);
333 |       // std::string* next_expect_member_boolean_null   (string, bool = false);
334 |       // optional<T>  next_expect_member_boolean_null<T>(string, bool = false);
335 |       //
336 |       // void next_expect_member_object     (string name, bool = false);
337 |       // bool next_expect_member_object_null(string name, bool = false);
338 |       //
339 |       // void next_expect_member_array     (string name, bool = false);
340 |       // bool next_expect_member_array_null(string name, bool = false);
341 |       //
342 |       // void next_expect_value_skip();
343 | 
344 |       // Get the next event and make sure that it's what's expected: primary
345 |       // or, if specified, secondary event. If it is not either, then throw
346 |       // invalid_json_input with appropriate description. Return true if it is
347 |       // primary.
348 |       //
349 |       // The secondary expected event is primarily useful for handling
350 |       // optional members. For example:
351 |       //
352 |       //     while (p.next_expect (event::name, event::end_object))
353 |       //     {
354 |       //       // Handle object member.
355 |       //     }
356 |       //
357 |       // Or homogeneous arrays:
358 |       //
359 |       //     while (p.next_expect (event::string, event::end_array))
360 |       //     {
361 |       //       // Handle array element.
362 |       //     }
363 |       //
364 |       // Or values that can be null:
365 |       //
366 |       //     if (p.next_expect (event::begin_object, event::null))
367 |       //     {
368 |       //       // Parse object.
369 |       //     }
370 |       //
371 |       bool
372 |       next_expect (event primary, optional<event> secondary = nullopt);
373 | 
374 |       // Get the next event and make sure it is event::name and the object
375 |       // member matches the specified name. If either is not, then throw
376 |       // invalid_json_input with appropriate description. If skip_unknown is
377 |       // true, then skip over unknown member names until a match is found.
378 |       //
379 |       void
380 |       next_expect_name (const char* name, bool skip_unknown = false);
381 | 
382 |       void
383 |       next_expect_name (const std::string&, bool = false);
384 | 
385 |       // Get the next event and make sure it is event::<type> returning its
386 |       // value similar to the value() functions. If it is not, then throw
387 |       // invalid_json_input with appropriate description.
388 |       //
389 |       std::string&
390 |       next_expect_string ();
391 | 
392 |       template <typename T>
393 |       T
394 |       next_expect_string ();
395 | 
396 |       std::string&
397 |       next_expect_number ();
398 | 
399 |       template <typename T>
400 |       T
401 |       next_expect_number ();
402 | 
403 |       std::string&
404 |       next_expect_boolean ();
405 | 
406 |       template <typename T>
407 |       T
408 |       next_expect_boolean ();
409 | 
410 |       // Similar to next_expect_<type>() but in addition to event::<type> also
411 |       // allow event::null, in which case returning no value.
412 |       //
413 |       std::string*
414 |       next_expect_string_null ();
415 | 
416 |       template <typename T>
417 |       optional<T>
418 |       next_expect_string_null ();
419 | 
420 |       std::string*
421 |       next_expect_number_null ();
422 | 
423 |       template <typename T>
424 |       optional<T>
425 |       next_expect_number_null ();
426 | 
427 |       std::string*
428 |       next_expect_boolean_null ();
429 | 
430 |       template <typename T>
431 |       optional<T>
432 |       next_expect_boolean_null ();
433 | 
434 |       // Call next_expect_name() followed by next_expect_<type>[_null]()
435 |       // returning its result. In other words, parse the entire object member
436 |       // with the specifed name and of type <type>, returning its value.
437 | 
438 |       // next_expect_member_string()
439 |       //
440 |       std::string&
441 |       next_expect_member_string (const char* name, bool skip_unknown = false);
442 | 
443 |       std::string&
444 |       next_expect_member_string (const std::string&, bool = false);
445 | 
446 |       template <typename T>
447 |       T
448 |       next_expect_member_string (const char*, bool = false);
449 | 
450 |       template <typename T>
451 |       T
452 |       next_expect_member_string (const std::string&, bool = false);
453 | 
454 |       // next_expect_member_number()
455 |       //
456 |       std::string&
457 |       next_expect_member_number (const char* name, bool skip_unknown = false);
458 | 
459 |       std::string&
460 |       next_expect_member_number (const std::string&, bool = false);
461 | 
462 |       template <typename T>
463 |       T
464 |       next_expect_member_number (const char*, bool = false);
465 | 
466 |       template <typename T>
467 |       T
468 |       next_expect_member_number (const std::string&, bool = false);
469 | 
470 |       // next_expect_member_boolean()
471 |       //
472 |       std::string&
473 |       next_expect_member_boolean (const char* name, bool skip_unknown = false);
474 | 
475 |       std::string&
476 |       next_expect_member_boolean (const std::string&, bool = false);
477 | 
478 |       template <typename T>
479 |       T
480 |       next_expect_member_boolean (const char*, bool = false);
481 | 
482 |       template <typename T>
483 |       T
484 |       next_expect_member_boolean (const std::string&, bool = false);
485 | 
486 |       // next_expect_member_string_null()
487 |       //
488 |       std::string*
489 |       next_expect_member_string_null (const char*, bool = false);
490 | 
491 |       std::string*
492 |       next_expect_member_string_null (const std::string&, bool = false);
493 | 
494 |       template <typename T>
495 |       optional<T>
496 |       next_expect_member_string_null (const char*, bool = false);
497 | 
498 |       template <typename T>
499 |       optional<T>
500 |       next_expect_member_string_null (const std::string&, bool = false);
501 | 
502 |       // next_expect_member_number_null()
503 |       //
504 |       std::string*
505 |       next_expect_member_number_null (const char*, bool = false);
506 | 
507 |       std::string*
508 |       next_expect_member_number_null (const std::string&, bool = false);
509 | 
510 |       template <typename T>
511 |       optional<T>
512 |       next_expect_member_number_null (const char*, bool = false);
513 | 
514 |       template <typename T>
515 |       optional<T>
516 |       next_expect_member_number_null (const std::string&, bool = false);
517 | 
518 |       // next_expect_member_boolean_null()
519 |       //
520 |       std::string*
521 |       next_expect_member_boolean_null (const char*, bool = false);
522 | 
523 |       std::string*
524 |       next_expect_member_boolean_null (const std::string&, bool = false);
525 | 
526 |       template <typename T>
527 |       optional<T>
528 |       next_expect_member_boolean_null (const char*, bool = false);
529 | 
530 |       template <typename T>
531 |       optional<T>
532 |       next_expect_member_boolean_null (const std::string&, bool = false);
533 | 
534 |       // Call next_expect_name() followed by next_expect(event::begin_object).
535 |       // In the _null version also allow event::null, in which case return
536 |       // false.
537 |       //
538 |       void
539 |       next_expect_member_object (const char* name, bool skip_unknown = false);
540 | 
541 |       void
542 |       next_expect_member_object (const std::string&, bool = false);
543 | 
544 |       bool
545 |       next_expect_member_object_null (const char*, bool = false);
546 | 
547 |       bool
548 |       next_expect_member_object_null (const std::string&, bool = false);
549 | 
550 |       // Call next_expect_name() followed by next_expect(event::begin_array).
551 |       // In the _null version also allow event::null, in which case return
552 |       // false.
553 |       //
554 |       void
555 |       next_expect_member_array (const char* name, bool skip_unknown = false);
556 | 
557 |       void
558 |       next_expect_member_array (const std::string&, bool = false);
559 | 
560 |       bool
561 |       next_expect_member_array_null (const char*, bool = false);
562 | 
563 |       bool
564 |       next_expect_member_array_null (const std::string&, bool = false);
565 | 
566 |       // Get the next event and make sure it is the beginning of a value
567 |       // (begin_object, begin_array, string, number, boolean, null). If it is
568 |       // not, then throw invalid_json_input with appropriate description.
569 |       // Otherwise, skip until the end of the value, recursively in case of
570 |       // object and array.
571 |       //
572 |       // This function is primarily useful for skipping unknown object
573 |       // members, for example:
574 |       //
575 |       //     while (p.next_expect (event::name, event::end_object))
576 |       //     {
577 |       //       if (p.name () == "known")
578 |       //       {
579 |       //         // Handle known member.
580 |       //       }
581 |       //       else
582 |       //         p.next_expect_value_skip ();
583 |       //     }
584 |       //
585 |       void
586 |       next_expect_value_skip ();
587 | 
588 |       // Parsing location.
589 |       //
590 | 
591 |       // Return the line number (1-based) corresponding to the most recently
592 |       // parsed event or 0 if nothing has been parsed yet.
593 |       //
594 |       std::uint64_t
595 |       line () const noexcept;
596 | 
597 |       // Return the column number (1-based) corresponding to the beginning of
598 |       // the most recently parsed event or 0 if nothing has been parsed yet.
599 |       //
600 |       std::uint64_t
601 |       column () const noexcept;
602 | 
603 |       // Return the position (byte offset) pointing immediately after the most
604 |       // recently parsed event or 0 if nothing has been parsed yet.
605 |       //
606 |       std::uint64_t
607 |       position () const noexcept;
608 | 
609 |       // Implementation details.
610 |       //
611 |     public:
612 |       struct iterator
613 |       {
614 |         using value_type = event;
615 | 
616 |         explicit
617 |         iterator (parser* p = nullptr, optional<event> e = nullopt)
618 |             : p_ (p), e_ (e) {}
619 | 
620 |         event operator* () const {return *e_;}
621 |         iterator& operator++ () {e_ = p_->next (); return *this;}
622 | 
623 |         // Comparison only makes sense when comparing to end (eof).
624 |         //
625 |         bool operator== (iterator y) const {return !e_ && !y.e_;}
626 |         bool operator!= (iterator y) const {return !(*this == y);}
627 | 
628 |       private:
629 |         parser* p_;
630 |         optional<event> e_;
631 |       };
632 | 
633 |       struct stream
634 |       {
635 |         std::istream*                is;
636 |         optional<std::exception_ptr> exception;
637 |       };
638 | 
639 |       [[noreturn]] void
640 |       throw_invalid_value (const char* type, const char*, std::size_t) const;
641 | 
642 |       ~parser ();
643 | 
644 |     private:
645 |       // Functionality shared by next() and peek().
646 |       //
647 |       json_type
648 |       next_impl ();
649 | 
650 |       // Translate the event produced by the most recent call to next_impl().
651 |       //
652 |       // Note that the underlying parser state determines whether name or
653 |       // value is returned when translating JSON_STRING.
654 |       //
655 |       optional<event>
656 |       translate (json_type) const noexcept;
657 | 
658 |       // Cache state (name/value) produced by the most recent call to
659 |       // next_impl().
660 |       //
661 |       void
662 |       cache_parsed_data ();
663 | 
664 |       // Cache the location numbers as determined by the most recent call to
665 |       // next_impl().
666 |       //
667 |       void
668 |       cache_parsed_location () noexcept;
669 | 
670 |       // Return true if this is a value event (string, number, boolean, or
671 |       // null).
672 |       //
673 |       static bool
674 |       value_event (optional<event>) noexcept;
675 | 
676 |       stream stream_;
677 | 
678 |       bool multi_value_;
679 |       const char* separators_;
680 | 
681 |       // The *_p_ members indicate whether the value is present (cached).
682 |       // Note: not using optional not to reallocate the string's buffer.
683 |       //
684 |       std::string name_;                       bool name_p_     = false;
685 |       std::string value_;                      bool value_p_    = false;
686 |       std::uint64_t line_, column_, position_; bool location_p_ = false;
687 | 
688 |       optional<json_type> parsed_; // Current parsed event if any.
689 |       optional<json_type> peeked_; // Current peeked event if any.
690 | 
691 |       ::json_stream impl_[1];
692 | 
693 |       // Cached raw value.
694 |       //
695 |       const char* raw_s_;
696 |       std::size_t raw_n_;
697 |     };
698 |   }
699 | }
700 | 
701 | #include <libstud/json/parser.ixx>
702 | 


--------------------------------------------------------------------------------
/libstud/json/parser.ixx:
--------------------------------------------------------------------------------
  1 | #include <cerrno>
  2 | #include <limits>      // numeric_limits
  3 | #include <utility>     // move()
  4 | #include <cassert>
  5 | #include <cstdlib>     // strto*()
  6 | #include <type_traits> // enable_if, is_*
  7 | #include <cstring>     // strlen()
  8 | 
  9 | namespace stud
 10 | {
 11 |   namespace json
 12 |   {
 13 |     inline invalid_json_input::
 14 |     invalid_json_input (std::string n,
 15 |                         std::uint64_t l,
 16 |                         std::uint64_t c,
 17 |                         std::uint64_t p,
 18 |                         const std::string& d)
 19 |         : invalid_json_input (move (n), l, c, p, d.c_str ())
 20 |     {
 21 |     }
 22 | 
 23 |     inline invalid_json_input::
 24 |     invalid_json_input (std::string n,
 25 |                         std::uint64_t l,
 26 |                         std::uint64_t c,
 27 |                         std::uint64_t p,
 28 |                         const char* d)
 29 |         : invalid_argument (d),
 30 |           name (std::move (n)),
 31 |           line (l), column (c), position (p)
 32 |     {
 33 |     }
 34 | 
 35 |     inline parser::
 36 |     parser (std::istream& is,
 37 |             const std::string& n,
 38 |             bool mv,
 39 |             const char* sep) noexcept
 40 |         : parser (is, n.c_str (), mv, sep)
 41 |     {
 42 |     }
 43 | 
 44 |     inline parser::
 45 |     parser (const void* t,
 46 |             std::size_t s,
 47 |             const std::string& n,
 48 |             bool mv,
 49 |             const char* sep) noexcept
 50 |         : parser (t, s, n.c_str (), mv, sep)
 51 |     {
 52 |     }
 53 | 
 54 |     inline parser::
 55 |     parser (const std::string& t,
 56 |             const std::string& n,
 57 |             bool mv,
 58 |             const char* sep) noexcept
 59 |         : parser (t.data (), t.size (), n.c_str (), mv, sep)
 60 |     {
 61 |     }
 62 | 
 63 |     inline parser::
 64 |     parser (const std::string& t,
 65 |             const char* n,
 66 |             bool mv,
 67 |             const char* sep) noexcept
 68 |         : parser (t.data (), t.size (), n, mv, sep)
 69 |     {
 70 |     }
 71 | 
 72 |     inline parser::
 73 |     parser (const char* t,
 74 |             const std::string& n,
 75 |             bool mv,
 76 |             const char* sep) noexcept
 77 |         : parser (t, std::strlen (t), n.c_str (), mv, sep)
 78 |     {
 79 |     }
 80 | 
 81 |     inline parser::
 82 |     parser (const char* t,
 83 |             const char* n,
 84 |             bool mv,
 85 |             const char* sep) noexcept
 86 |         : parser (t, std::strlen (t), n, mv, sep)
 87 |     {
 88 |     }
 89 | 
 90 |     inline const std::string& parser::
 91 |     name ()
 92 |     {
 93 |       if (!name_p_)
 94 |       {
 95 |         assert (parsed_ && !peeked_ && !value_p_);
 96 |         cache_parsed_data ();
 97 |         assert (name_p_);
 98 |       }
 99 |       return name_;
100 |     }
101 | 
102 |     inline std::string& parser::
103 |     value ()
104 |     {
105 |       if (!value_p_)
106 |       {
107 |         assert (parsed_ && !peeked_ && !name_p_);
108 |         cache_parsed_data ();
109 |         assert (value_p_);
110 |       }
111 |       return value_;
112 |     }
113 | 
114 |     // Note: one day we will be able to use C++17 from_chars() which was made
115 |     // exactly for this.
116 |     //
117 |     template <typename T>
118 |     inline typename std::enable_if<std::is_same<T, bool>::value, T>::type
119 |     parse_value (const char* b, size_t, const parser&)
120 |     {
121 |       return *b == 't';
122 |     }
123 | 
124 |     template <typename T>
125 |     inline typename std::enable_if<
126 |       std::is_integral<T>::value &&
127 |       std::is_signed<T>::value &&
128 |       !std::is_same<T, bool>::value, T>::type
129 |     parse_value (const char* b, size_t n, const parser& p)
130 |     {
131 |       char* e (nullptr);
132 |       errno = 0; // We must clear it according to POSIX.
133 |       std::int64_t v (strtoll (b, &e, 10)); // Can't throw.
134 | 
135 |       if (e == b || e != b + n || errno == ERANGE ||
136 |           v < std::numeric_limits<T>::min () ||
137 |           v > std::numeric_limits<T>::max ())
138 |         p.throw_invalid_value ("signed integer", b, n);
139 | 
140 |       return static_cast<T> (v);
141 |     }
142 | 
143 |     template <typename T>
144 |     inline typename std::enable_if<
145 |       std::is_integral<T>::value &&
146 |       std::is_unsigned<T>::value &&
147 |       !std::is_same<T, bool>::value, T>::type
148 |     parse_value (const char* b, size_t n, const parser& p)
149 |     {
150 |       char* e (nullptr);
151 |       errno = 0; // We must clear it according to POSIX.
152 |       std::uint64_t v (strtoull (b, &e, 10)); // Can't throw.
153 | 
154 |       if (e == b || e != b + n || errno == ERANGE ||
155 |           v > std::numeric_limits<T>::max ())
156 |         p.throw_invalid_value ("unsigned integer", b, n);
157 | 
158 |       return static_cast<T> (v);
159 |     }
160 | 
161 |     template <typename T>
162 |     inline typename std::enable_if<std::is_same<T, float>::value, T>::type
163 |     parse_value (const char* b, size_t n, const parser& p)
164 |     {
165 |       char* e (nullptr);
166 |       errno = 0; // We must clear it according to POSIX.
167 |       T r (std::strtof (b, &e));
168 | 
169 |       if (e == b || e != b + n || errno == ERANGE)
170 |         p.throw_invalid_value ("float", b, n);
171 | 
172 |       return r;
173 |     }
174 | 
175 |     template <typename T>
176 |     inline typename std::enable_if<std::is_same<T, double>::value, T>::type
177 |     parse_value (const char* b, size_t n, const parser& p)
178 |     {
179 |       char* e (nullptr);
180 |       errno = 0; // We must clear it according to POSIX.
181 |       T r (std::strtod (b, &e));
182 | 
183 |       if (e == b || e != b + n || errno == ERANGE)
184 |         p.throw_invalid_value ("double", b, n);
185 | 
186 |       return r;
187 |     }
188 | 
189 |     template <typename T>
190 |     inline typename std::enable_if<std::is_same<T, long double>::value, T>::type
191 |     parse_value (const char* b, size_t n, const parser& p)
192 |     {
193 |       char* e (nullptr);
194 |       errno = 0; // We must clear it according to POSIX.
195 |       T r (std::strtold (b, &e));
196 | 
197 |       if (e == b || e != b + n || errno == ERANGE)
198 |         p.throw_invalid_value ("long double", b, n);
199 | 
200 |       return r;
201 |     }
202 | 
203 |     template <typename T>
204 |     inline T parser::
205 |     value () const
206 |     {
207 |       if (!value_p_)
208 |       {
209 |         assert (parsed_ && !peeked_ && value_event (translate (*parsed_)));
210 |         return parse_value<T> (raw_s_, raw_n_, *this);
211 |       }
212 | 
213 |       return parse_value<T> (value_.data (), value_.size (), *this);
214 |     }
215 | 
216 |     inline void parser::
217 |     next_expect_name (const std::string& n, bool su)
218 |     {
219 |       next_expect_name (n.c_str (), su);
220 |     }
221 | 
222 |     // next_expect_<type>()
223 |     //
224 |     inline std::string& parser::
225 |     next_expect_string ()
226 |     {
227 |       next_expect (event::string);
228 |       return value ();
229 |     }
230 | 
231 |     template <typename T>
232 |     inline T parser::
233 |     next_expect_string ()
234 |     {
235 |       next_expect (event::string);
236 |       return value<T> ();
237 |     }
238 | 
239 |     inline std::string& parser::
240 |     next_expect_number ()
241 |     {
242 |       next_expect (event::number);
243 |       return value ();
244 |     }
245 | 
246 |     template <typename T>
247 |     inline T parser::
248 |     next_expect_number ()
249 |     {
250 |       next_expect (event::number);
251 |       return value<T> ();
252 |     }
253 | 
254 |     inline std::string& parser::
255 |     next_expect_boolean ()
256 |     {
257 |       next_expect (event::boolean);
258 |       return value ();
259 |     }
260 | 
261 |     template <typename T>
262 |     inline T parser::
263 |     next_expect_boolean ()
264 |     {
265 |       next_expect (event::boolean);
266 |       return value<T> ();
267 |     }
268 | 
269 |     // next_expect_<type>_null()
270 |     //
271 |     inline std::string* parser::
272 |     next_expect_string_null ()
273 |     {
274 |       return next_expect (event::string, event::null) ? &value () : nullptr;
275 |     }
276 | 
277 |     template <typename T>
278 |     inline optional<T> parser::
279 |     next_expect_string_null ()
280 |     {
281 |       return next_expect (event::string, event::null)
282 |         ? optional<T> (value<T> ())
283 |         : nullopt;
284 |     }
285 | 
286 |     inline std::string* parser::
287 |     next_expect_number_null ()
288 |     {
289 |       return next_expect (event::number, event::null) ? &value () : nullptr;
290 |     }
291 | 
292 |     template <typename T>
293 |     inline optional<T> parser::
294 |     next_expect_number_null ()
295 |     {
296 |       return next_expect (event::number, event::null)
297 |         ? optional<T> (value<T> ())
298 |         : nullopt;
299 |     }
300 | 
301 |     inline std::string* parser::
302 |     next_expect_boolean_null ()
303 |     {
304 |       return next_expect (event::boolean, event::null) ? &value () : nullptr;
305 |     }
306 | 
307 |     template <typename T>
308 |     inline optional<T> parser::
309 |     next_expect_boolean_null ()
310 |     {
311 |       return next_expect (event::boolean, event::null)
312 |         ? optional<T> (value<T> ())
313 |         : nullopt;
314 |     }
315 | 
316 |     // next_expect_member_string()
317 |     //
318 |     inline std::string& parser::
319 |     next_expect_member_string (const char* n, bool su)
320 |     {
321 |       next_expect_name (n, su);
322 |       return next_expect_string ();
323 |     }
324 | 
325 |     inline std::string& parser::
326 |     next_expect_member_string (const std::string& n, bool su)
327 |     {
328 |       return next_expect_member_string (n.c_str (), su);
329 |     }
330 | 
331 |     template <typename T>
332 |     inline T parser::
333 |     next_expect_member_string (const char* n, bool su)
334 |     {
335 |       next_expect_name (n, su);
336 |       return next_expect_string<T> ();
337 |     }
338 | 
339 |     template <typename T>
340 |     inline T parser::
341 |     next_expect_member_string (const std::string& n, bool su)
342 |     {
343 |       return next_expect_member_string<T> (n.c_str (), su);
344 |     }
345 | 
346 |     // next_expect_member_number()
347 |     //
348 |     inline std::string& parser::
349 |     next_expect_member_number (const char* n, bool su)
350 |     {
351 |       next_expect_name (n, su);
352 |       return next_expect_number ();
353 |     }
354 | 
355 |     inline std::string& parser::
356 |     next_expect_member_number (const std::string& n, bool su)
357 |     {
358 |       return next_expect_member_number (n.c_str (), su);
359 |     }
360 | 
361 |     template <typename T>
362 |     inline T parser::
363 |     next_expect_member_number (const char* n, bool su)
364 |     {
365 |       next_expect_name (n, su);
366 |       return next_expect_number<T> ();
367 |     }
368 | 
369 |     template <typename T>
370 |     inline T parser::
371 |     next_expect_member_number (const std::string& n, bool su)
372 |     {
373 |       return next_expect_member_number<T> (n.c_str (), su);
374 |     }
375 | 
376 |     // next_expect_member_boolean()
377 |     //
378 |     inline std::string& parser::
379 |     next_expect_member_boolean (const char* n, bool su)
380 |     {
381 |       next_expect_name (n, su);
382 |       return next_expect_boolean ();
383 |     }
384 | 
385 |     inline std::string& parser::
386 |     next_expect_member_boolean (const std::string& n, bool su)
387 |     {
388 |       return next_expect_member_boolean (n.c_str (), su);
389 |     }
390 | 
391 |     template <typename T>
392 |     inline T parser::
393 |     next_expect_member_boolean (const char* n, bool su)
394 |     {
395 |       next_expect_name (n, su);
396 |       return next_expect_boolean<T> ();
397 |     }
398 | 
399 |     template <typename T>
400 |     inline T parser::
401 |     next_expect_member_boolean (const std::string& n, bool su)
402 |     {
403 |       return next_expect_member_boolean<T> (n.c_str (), su);
404 |     }
405 | 
406 |     // next_expect_member_string_null()
407 |     //
408 |     inline std::string* parser::
409 |     next_expect_member_string_null (const char* n, bool su)
410 |     {
411 |       next_expect_name (n, su);
412 |       return next_expect_string_null ();
413 |     }
414 | 
415 |     inline std::string* parser::
416 |     next_expect_member_string_null (const std::string& n, bool su)
417 |     {
418 |       return next_expect_member_string_null (n.c_str (), su);
419 |     }
420 | 
421 |     template <typename T>
422 |     inline optional<T> parser::
423 |     next_expect_member_string_null (const char* n, bool su)
424 |     {
425 |       next_expect_name (n, su);
426 |       return next_expect_string_null<T> ();
427 |     }
428 | 
429 |     template <typename T>
430 |     inline optional<T> parser::
431 |     next_expect_member_string_null (const std::string& n, bool su)
432 |     {
433 |       return next_expect_member_string_null<T> (n.c_str (), su);
434 |     }
435 | 
436 |     // next_expect_member_number_null()
437 |     //
438 |     inline std::string* parser::
439 |     next_expect_member_number_null (const char* n, bool su)
440 |     {
441 |       next_expect_name (n, su);
442 |       return next_expect_number_null ();
443 |     }
444 | 
445 |     inline std::string* parser::
446 |     next_expect_member_number_null (const std::string& n, bool su)
447 |     {
448 |       return next_expect_member_number_null (n.c_str (), su);
449 |     }
450 | 
451 |     template <typename T>
452 |     inline optional<T> parser::
453 |     next_expect_member_number_null (const char* n, bool su)
454 |     {
455 |       next_expect_name (n, su);
456 |       return next_expect_number_null<T> ();
457 |     }
458 | 
459 |     template <typename T>
460 |     inline optional<T> parser::
461 |     next_expect_member_number_null (const std::string& n, bool su)
462 |     {
463 |       return next_expect_member_number_null<T> (n.c_str (), su);
464 |     }
465 | 
466 |     // next_expect_member_boolean_null()
467 |     //
468 |     inline std::string* parser::
469 |     next_expect_member_boolean_null (const char* n, bool su)
470 |     {
471 |       next_expect_name (n, su);
472 |       return next_expect_boolean_null ();
473 |     }
474 | 
475 |     inline std::string* parser::
476 |     next_expect_member_boolean_null (const std::string& n, bool su)
477 |     {
478 |       return next_expect_member_boolean_null (n.c_str (), su);
479 |     }
480 | 
481 |     template <typename T>
482 |     inline optional<T> parser::
483 |     next_expect_member_boolean_null (const char* n, bool su)
484 |     {
485 |       next_expect_name (n, su);
486 |       return next_expect_boolean_null<T> ();
487 |     }
488 | 
489 |     template <typename T>
490 |     inline optional<T> parser::
491 |     next_expect_member_boolean_null (const std::string& n, bool su)
492 |     {
493 |       return next_expect_member_boolean_null<T> (n.c_str (), su);
494 |     }
495 | 
496 |     // next_expect_member_object[_null]()
497 |     //
498 |     inline void parser::
499 |     next_expect_member_object (const char* n, bool su)
500 |     {
501 |       next_expect_name (n, su);
502 |       next_expect (event::begin_object);
503 |     }
504 | 
505 |     inline void parser::
506 |     next_expect_member_object (const std::string& n, bool su)
507 |     {
508 |       next_expect_member_object (n.c_str (), su);
509 |     }
510 | 
511 |     inline bool parser::
512 |     next_expect_member_object_null (const char* n, bool su)
513 |     {
514 |       next_expect_name (n, su);
515 |       return next_expect (event::begin_object, event::null);
516 |     }
517 | 
518 |     inline bool parser::
519 |     next_expect_member_object_null (const std::string& n, bool su)
520 |     {
521 |       return next_expect_member_object_null (n.c_str (), su);
522 |     }
523 | 
524 |     // next_expect_member_array[_null]()
525 |     //
526 |     inline void parser::
527 |     next_expect_member_array (const char* n, bool su)
528 |     {
529 |       next_expect_name (n, su);
530 |       next_expect (event::begin_array);
531 |     }
532 | 
533 |     inline void parser::
534 |     next_expect_member_array (const std::string& n, bool su)
535 |     {
536 |       next_expect_member_array (n.c_str (), su);
537 |     }
538 | 
539 |     inline bool parser::
540 |     next_expect_member_array_null (const char* n, bool su)
541 |     {
542 |       next_expect_name (n, su);
543 |       return next_expect (event::begin_array, event::null);
544 |     }
545 | 
546 |     inline bool parser::
547 |     next_expect_member_array_null (const std::string& n, bool su)
548 |     {
549 |       return next_expect_member_array_null (n.c_str (), su);
550 |     }
551 |   }
552 | }
553 | 


--------------------------------------------------------------------------------
/libstud/json/pdjson.c:
--------------------------------------------------------------------------------
1 | ../../pdjson/pdjson.c


--------------------------------------------------------------------------------
/libstud/json/pdjson.h:
--------------------------------------------------------------------------------
1 | ../../pdjson/pdjson.h


--------------------------------------------------------------------------------
/libstud/json/serializer.cxx:
--------------------------------------------------------------------------------
  1 | #include <cstdio>   // snprintf
  2 | #include <cstdarg>  // va_list
  3 | #include <cstring>  // memcpy, strlen
  4 | #include <ostream>
  5 | 
  6 | #include <libstud/json/serializer.hxx>
  7 | 
  8 | using namespace std;
  9 | 
 10 | namespace stud
 11 | {
 12 |   namespace json
 13 |   {
 14 |     using buffer     = buffer_serializer::buffer;
 15 |     using error_code = invalid_json_output::error_code;
 16 | 
 17 |     template <typename T>
 18 |     static void
 19 |     dynarray_overflow (void* d, event, buffer& b, size_t ex)
 20 |     {
 21 |       T& v (*static_cast<T*> (d));
 22 |       v.resize (b.capacity + ex);
 23 |       v.resize (v.capacity ());
 24 |       // const_cast is required for std::string pre C++17.
 25 |       //
 26 |       b.data = const_cast<typename T::value_type*> (v.data ());
 27 |       b.capacity = v.size ();
 28 |     }
 29 | 
 30 |     template <typename T>
 31 |     static void
 32 |     dynarray_flush (void* d, event, buffer& b)
 33 |     {
 34 |       T& v (*static_cast<T*> (d));
 35 |       v.resize (b.size);
 36 |       b.data = const_cast<typename T::value_type*> (v.data ());
 37 |       b.capacity = b.size;
 38 |     }
 39 | 
 40 |     buffer_serializer::
 41 |     buffer_serializer (string& s, size_t i, const char* mvs)
 42 |         : buffer_serializer (const_cast<char*> (s.data ()), size_, s.size (),
 43 |                              dynarray_overflow<string>,
 44 |                              dynarray_flush<string>,
 45 |                              &s,
 46 |                              i, mvs)
 47 |     {
 48 |       size_ = s.size ();
 49 |     }
 50 | 
 51 |     buffer_serializer::
 52 |     buffer_serializer (vector<char>& v, size_t i, const char* mvs)
 53 |         : buffer_serializer (v.data (), size_, v.size (),
 54 |                              dynarray_overflow<vector<char>>,
 55 |                              dynarray_flush<vector<char>>,
 56 |                              &v,
 57 |                              i, mvs)
 58 |     {
 59 |       size_ = v.size ();
 60 |     }
 61 | 
 62 |     static void
 63 |     ostream_overflow (void* d, event e, buffer& b, size_t)
 64 |     {
 65 |       ostream& s (*static_cast<ostream*> (d));
 66 |       s.write (static_cast<char*> (b.data), b.size);
 67 |       if (s.fail ())
 68 |         throw invalid_json_output (
 69 |             e, error_code::buffer_overflow, "unable to write JSON output text");
 70 |       b.size = 0;
 71 |     }
 72 | 
 73 |     static void
 74 |     ostream_flush (void* d, event e, buffer& b)
 75 |     {
 76 |       ostream_overflow (d, e, b, 0);
 77 | 
 78 |       ostream& s (*static_cast<ostream*> (d));
 79 |       s.flush ();
 80 |       if (s.fail ())
 81 |         throw invalid_json_output (
 82 |             e, error_code::buffer_overflow, "unable to write JSON output text");
 83 |     }
 84 | 
 85 |     stream_serializer::
 86 |     stream_serializer (ostream& os, size_t i, const char* mvs)
 87 |         : buffer_serializer (tmp_, sizeof (tmp_),
 88 |                              ostream_overflow,
 89 |                              ostream_flush,
 90 |                              &os,
 91 |                              i, mvs)
 92 |     {
 93 |     }
 94 | 
 95 |     bool buffer_serializer::
 96 |     next (optional<event> e, pair<const char*, size_t> val, bool check)
 97 |     {
 98 |       if (absent_ == 2)
 99 |         goto fail_complete;
100 | 
101 |       if (e == nullopt)
102 |       {
103 |         if (!state_.empty ())
104 |           goto fail_incomplete;
105 | 
106 |         absent_++;
107 |         return false;
108 |       }
109 | 
110 |       absent_ = 0; // Clear inter-value absent event.
111 | 
112 |       {
113 |         state* st (state_.empty () ? nullptr : &state_.back ());
114 | 
115 |         auto name_expected = [] (const state& s)
116 |         {
117 |           return s.type == event::begin_object && s.count % 2 == 0;
118 |         };
119 | 
120 |         auto make_str = [] (const char* s, size_t n)
121 |         {
122 |           return make_pair (s, n);
123 |         };
124 | 
125 |         // When it comes to pretty-printing, the common way to do it is along
126 |         // these lines:
127 |         //
128 |         // {
129 |         //   "str": "value",
130 |         //   "obj": {
131 |         //     "arr": [
132 |         //       1,
133 |         //       2,
134 |         //       3
135 |         //     ]
136 |         //   },
137 |         //   "num": 123
138 |         // }
139 |         //
140 |         // Empty objects and arrays are printed without a newline:
141 |         //
142 |         // {
143 |         //   "obj": {},
144 |         //   "arr": []
145 |         // }
146 |         //
147 |         // There are two types of separators: between name and value, which is
148 |         // always ": ", and before/after value inside an object or array which
149 |         // is either newline followed by indentation, or comma followed by
150 |         // newline followed by indentation (we also have separation between
151 |         // top-level values but that's orthogonal to pretty-printing).
152 |         //
153 |         // Based on this observation, we are going to handle the latter case by
154 |         // starting with the ",\n" string (in this->sep_) and pushing/popping
155 |         // indentation spaces as we enter/leave objects and arrays. We handle
156 |         // the cases where we don't need the comma by simply skipping it in the
157 |         // C-string pointer.
158 |         //
159 |         bool pp (indent_ != 0);
160 | 
161 |         pair<const char*, size_t> sep;
162 |         if (st != nullptr)
163 |         {
164 |           // The name-value separator.
165 |           //
166 |           if (st->type == event::begin_object && st->count % 2 == 1)
167 |           {
168 |             sep = !pp ? make_str (":", 1) : make_str (": ", 2);
169 |           }
170 |           // We don't need the comma if we are closing the object or array.
171 |           //
172 |           else if (e == event::end_array || e == event::end_object)
173 |           {
174 |             // But in this case we need to unindent one level prior to writing
175 |             // the brace. Also handle the empty object/array as a special case.
176 |             //
177 |             sep = !pp || st->count == 0
178 |               ? make_str (nullptr, 0)
179 |               : make_str (sep_.c_str () + 1, sep_.size () - 1 - indent_);
180 |           }
181 |           // Or if this is the first value (note: must come after end_*).
182 |           //
183 |           else if (st->count == 0)
184 |           {
185 |             sep = !pp
186 |               ? make_str (nullptr, 0)
187 |               : make_str (sep_.c_str () + 1, sep_.size () - 1);
188 |           }
189 |           else
190 |           {
191 |             sep = !pp
192 |               ? make_str (",", 1)
193 |               : make_str (sep_.c_str (), sep_.size ());
194 |           }
195 |         }
196 |         else if (values_ != 0) // Subsequent top-level value.
197 |         {
198 |           // Top-level value separation.
199 |           //
200 |           sep = make_str (
201 |             mv_separator_,
202 |             (mv_separator_ == nullptr || mv_separator_[0] == '\0' ? 0 :
203 |              mv_separator_[1] == '\0'                             ? 1 :
204 |              strlen (mv_separator_)));
205 |         }
206 | 
207 |         switch (*e)
208 |         {
209 |         case event::begin_array:
210 |         case event::begin_object:
211 |           {
212 |             if (st != nullptr && name_expected (*st))
213 |               goto fail_unexpected_event;
214 | 
215 |             write (*e,
216 |                    sep,
217 |                    make_str (e == event::begin_array ? "[" : "{", 1),
218 |                    false);
219 | 
220 |             if (st != nullptr)
221 |               st->count++;
222 | 
223 |             if (pp)
224 |               sep_.append (indent_, ' ');
225 | 
226 |             state_.push_back (state {*e, 0});
227 |             break;
228 |           }
229 |         case event::end_array:
230 |         case event::end_object:
231 |           {
232 |             if (st == nullptr || (e == event::end_array
233 |                                   ? st->type != event::begin_array
234 |                                   : !name_expected (*st)))
235 |               goto fail_unexpected_event;
236 | 
237 |             write (*e,
238 |                    sep,
239 |                    make_str (e == event::end_array ? "]" : "}", 1),
240 |                    false);
241 | 
242 |             if (pp)
243 |               sep_.erase (sep_.size () - indent_);
244 | 
245 |             state_.pop_back ();
246 |             break;
247 |           }
248 |         case event::name:
249 |         case event::string:
250 |           {
251 |             if (e == event::name
252 |                 ? (st == nullptr || !name_expected (*st))
253 |                 : (st != nullptr && name_expected (*st)))
254 |               goto fail_unexpected_event;
255 | 
256 |             write (*e, sep, val, check, '"');
257 | 
258 |             if (st != nullptr)
259 |               st->count++;
260 |             break;
261 |           }
262 |         case event::null:
263 |         case event::boolean:
264 |           {
265 |             if (e == event::null && val.first == nullptr)
266 |               val = {"null", 4};
267 |             else if (check)
268 |             {
269 |               auto eq = [&val] (const char* v, size_t n)
270 |               {
271 |                 return val.second == n && memcmp (val.first, v, n) == 0;
272 |               };
273 | 
274 |               if (e == event::null)
275 |               {
276 |                 if (!eq ("null", 4))
277 |                   goto fail_null;
278 |               }
279 |               else
280 |               {
281 |                 if (!eq ("true", 4) && !eq ("false", 5))
282 |                   goto fail_bool;
283 |               }
284 |             }
285 |           }
286 |           // Fall through.
287 |         case event::number:
288 |           {
289 |             // Note: this event is also used by value_json_text().
290 | 
291 |             if (st != nullptr && name_expected (*st))
292 |               goto fail_unexpected_event;
293 | 
294 |             write (*e, sep, val, check);
295 | 
296 |             if (st != nullptr)
297 |               st->count++;
298 |             break;
299 |           }
300 |         }
301 |       }
302 | 
303 |       if (state_.empty ())
304 |       {
305 |         values_++;
306 |         if (flush_ != nullptr)
307 |           flush_ (data_, *e, buf_);
308 | 
309 |         return false;
310 |       }
311 | 
312 |       return true;
313 | 
314 |     fail_complete:
315 |       throw invalid_json_output (
316 |           e, error_code::invalid_value, "value sequence is complete");
317 |     fail_incomplete:
318 |       throw invalid_json_output (
319 |           e, error_code::invalid_value, "value is incomplete");
320 |     fail_null:
321 |       throw invalid_json_output (
322 |           e, error_code::invalid_value, "invalid null value");
323 |     fail_bool:
324 |       throw invalid_json_output (
325 |           e, error_code::invalid_value, "invalid boolean value");
326 |     fail_unexpected_event:
327 |       throw invalid_json_output (
328 |           e, error_code::unexpected_event, "unexpected event");
329 |     }
330 | 
331 |     // JSON escape sequences for control characters <= 0x1F.
332 |     //
333 |     static const char* json_escapes[] =
334 |     {"\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005",
335 |      "\\u0006", "\\u0007", "\\b",     "\\t",     "\\n",     "\\u000B",
336 |      "\\f",     "\\r",     "\\u000E", "\\u000F", "\\u0010", "\\u0011",
337 |      "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017",
338 |      "\\u0018", "\\u0019", "\\u001A", "\\u001B", "\\u001C", "\\u001D",
339 |      "\\u001E", "\\u001F"};
340 | 
341 |     void buffer_serializer::
342 |     write (event e,
343 |            pair<const char*, size_t> sep,
344 |            pair<const char*, size_t> val,
345 |            bool check,
346 |            char q)
347 |     {
348 |       // Assumptions:
349 |       //
350 |       // 1. A call to overflow should be able to provide enough capacity to
351 |       //    write the entire separator (in other words, we are not going to
352 |       //    bother with chunking the separator).
353 |       //
354 |       // 2. Similarly, a call to overflow should be able to provide enough
355 |       //    capacity to write an entire UTF-8 multi-byte sequence.
356 |       //
357 |       // 3. Performance-wise, we do not expect very long contiguous sequences
358 |       //    of character that require escaping.
359 | 
360 |       // Total number of bytes remaining to be written and the capacity
361 |       // currently available.
362 |       //
363 |       size_t size (sep.second + val.second + (q != '\0' ? 2 : 0));
364 |       size_t cap (buf_.capacity - buf_.size);
365 | 
366 |       auto grow = [this, e, &size, &cap] (size_t min, size_t extra = 0)
367 |       {
368 |         if (overflow_ == nullptr)
369 |           return false;
370 | 
371 |         extra += size;
372 |         extra -= cap;
373 |         overflow_ (data_, e, buf_, extra > min ? extra : min);
374 |         cap = buf_.capacity - buf_.size;
375 | 
376 |         return cap >= min;
377 |       };
378 | 
379 |       auto append = [this, &cap, &size] (const char* d, size_t s)
380 |       {
381 |         memcpy (static_cast<char*> (buf_.data) + buf_.size, d, s);
382 |         buf_.size += s;
383 |         cap -= s;
384 |         size -= s;
385 |       };
386 | 
387 |       // Return the longest chunk of input that fits into the buffer and does
388 |       // not end in the middle of a multi-byte UTF-8 sequence. Assume value
389 |       // size and capacity are not 0. Return NULL in first if no chunk could
390 |       // be found that fits into the remaining space. In this case, second is
391 |       // the additional (to size) required space (used to handle escapes in
392 |       // the checked version).
393 |       //
394 |       // The basic idea is to seek in the input buffer to the capacity of the
395 |       // output buffer (unless the input is shorter than the output). If we
396 |       // ended up in the middle of a multi-byte UTF-8 sequence, then seek back
397 |       // until we end up at the UTF-8 sequence boundary. Note that this
398 |       // implementation assumes valid UTF-8.
399 |       //
400 |       auto chunk = [&cap, &val] () -> pair<const char*, size_t>
401 |       {
402 |         pair<const char*, size_t> r (nullptr, 0);
403 | 
404 |         if (cap >= val.second)
405 |           r = val;
406 |         else
407 |         {
408 |           // Start from the character past capacity and search for a UTF-8
409 |           // sequence boundary.
410 |           //
411 |           for (const char* p (val.first + cap); p != val.first; --p)
412 |           {
413 |             const auto u (static_cast<uint8_t> (*p));
414 |             if (u < 0x80 || u > 0xBF) // Not a continuation byte
415 |             {
416 |               r = {val.first, p - val.first};
417 |               break;
418 |             }
419 |           }
420 |         }
421 | 
422 |         val.first += r.second;
423 |         val.second -= r.second;
424 | 
425 |         return r;
426 |       };
427 | 
428 |       // Escaping and UTF-8-validating version of chunk().
429 |       //
430 |       // There are three classes of mandatory escapes in a JSON string:
431 |       //
432 |       // - \\ and \"
433 |       //
434 |       // - \b \f \n \r \t for popular control characters
435 |       //
436 |       // - \u00NN for other control characters <= 0x1F
437 |       //
438 |       // If the input begins with a character that must be escaped, return
439 |       // only its escape sequence. Otherwise validate and return everything up
440 |       // to the end of input or buffer capacity, but cutting it short before
441 |       // the next character that must be escaped or the first UTF-8 sequence
442 |       // that would not fit.
443 |       //
444 |       // Return string::npos in second in case of a stray continuation byte or
445 |       // any byte in an invalid UTF-8 range (for example, an "overlong" 2-byte
446 |       // encoding of a 7-bit/ASCII character or a 4-, 5-, or 6-byte sequence
447 |       // that would encode a codepoint beyond the U+10FFFF Unicode limit).
448 |       //
449 |       auto chunk_checked = [&cap, &size, &val] () -> pair<const char*, size_t>
450 |       {
451 |         pair<const char*, size_t> r (nullptr, 0);
452 | 
453 |         // Check whether the first character needs to be escaped.
454 |         //
455 |         const uint8_t c (val.first[0]);
456 |         if (c == '"')
457 |           r = {"\\\"", 2};
458 |         else if (c == '\\')
459 |           r = {"\\\\", 2};
460 |         else if (c <= 0x1F)
461 |         {
462 |           auto s (json_escapes[c]);
463 |           r = {s, s[1] == 'u' ? 6 : 2};
464 |         }
465 | 
466 |         if (r.first != nullptr)
467 |         {
468 |           // Return in second the additional (to size) space required.
469 |           //
470 |           if (r.second > cap)
471 |             return {nullptr, r.second - 1};
472 | 
473 |           // If we had to escape the character then adjust size accordingly
474 |           // (see append() above).
475 |           //
476 |           size += r.second - 1;
477 | 
478 |           val.first += 1;
479 |           val.second -= 1;
480 |           return r;
481 |         }
482 | 
483 |         // First character doesn't need to be escaped. Return as much of the
484 |         // rest of the input as possible.
485 |         //
486 |         size_t i (0);
487 |         for (size_t n (min (cap, val.second)); i != n; i++)
488 |         {
489 |           const uint8_t c1 (val.first[i]);
490 | 
491 |           if (c1 == '"' || c1 == '\\' || c1 <= 0x1F) // Needs to be escaped.
492 |             break;
493 |           else if (c1 >= 0x80) // Not ASCII, so validate as a UTF-8 sequence.
494 |           {
495 |             size_t i1 (i); // Position of the first byte.
496 | 
497 |             // The control flow here is to continue if valid and to fall
498 |             // through to return on error.
499 |             //
500 |             if (c1 >= 0xC2 && c1 <= 0xDF) // 2-byte sequence.
501 |             {
502 |               if (i + 2 <= val.second) // Sequence is complete in JSON value.
503 |               {
504 |                 if (i + 2 > cap) // Sequence won't fit.
505 |                   break;
506 | 
507 |                 const uint8_t c2 (val.first[++i]);
508 | 
509 |                 if (c2 >= 0x80 && c2 <= 0xBF)
510 |                   continue;
511 |               }
512 |             }
513 |             else if (c1 >= 0xE0 && c1 <= 0xEF) // 3-byte sequence.
514 |             {
515 |               if (i + 3 <= val.second)
516 |               {
517 |                 if (i + 3 > cap)
518 |                   break;
519 | 
520 |                 const uint8_t c2 (val.first[++i]), c3 (val.first[++i]);
521 | 
522 |                 if (c3 >= 0x80 && c3 <= 0xBF)
523 |                 {
524 |                   switch (c1)
525 |                   {
526 |                   case 0xE0: if (c2 >= 0xA0 && c2 <= 0xBF) continue; break;
527 |                   case 0xED: if (c2 >= 0x80 && c2 <= 0x9F) continue; break;
528 |                   default:   if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
529 |                   }
530 |                 }
531 |               }
532 |             }
533 |             else if (c1 >= 0xF0 && c1 <= 0xF4) // 4-byte sequence.
534 |             {
535 |               if (i + 4 <= val.second)
536 |               {
537 |                 if (i + 4 > cap)
538 |                   break;
539 | 
540 |                 const uint8_t c2 (val.first[++i]),
541 |                               c3 (val.first[++i]),
542 |                               c4 (val.first[++i]);
543 | 
544 |                 if (c3 >= 0x80 && c3 <= 0xBF &&
545 |                     c4 >= 0x80 && c4 <= 0xBF)
546 |                 {
547 |                   switch (c1)
548 |                   {
549 |                   case 0xF0: if (c2 >= 0x90 && c2 <= 0xBF) continue; break;
550 |                   case 0xF4: if (c2 >= 0x80 && c2 <= 0x8F) continue; break;
551 |                   default:   if (c2 >= 0x80 && c2 <= 0xBF) continue; break;
552 |                   }
553 |                 }
554 |               }
555 |             }
556 | 
557 |             r = {val.first, string::npos};
558 | 
559 |             // Update val to point to the beginning of the invalid sequence.
560 |             //
561 |             val.first += i1;
562 |             val.second -= i1;
563 | 
564 |             return r;
565 |           }
566 |         }
567 | 
568 |         if (i != 0) // We have a chunk.
569 |         {
570 |           r = {val.first, i};
571 | 
572 |           val.first += i;
573 |           val.second -= i;
574 |         }
575 | 
576 |         return r;
577 |       };
578 | 
579 |       // Value's original size (used to calculate the offset of the errant
580 |       // character in case of a validation failure).
581 |       //
582 |       const size_t vn (val.second);
583 | 
584 |       // Write the separator, if any.
585 |       //
586 |       if (sep.second != 0)
587 |       {
588 |         if (cap < sep.second && !grow (sep.second))
589 |           goto fail_nospace;
590 | 
591 |         append (sep.first, sep.second);
592 |       }
593 | 
594 |       // Write the value's opening quote, if requested.
595 |       //
596 |       if (q != '\0')
597 |       {
598 |         if (cap == 0 && !grow (1))
599 |           goto fail_nospace;
600 | 
601 |         append ("\"", 1);
602 |       }
603 | 
604 |       // Write the value, unless empty.
605 |       //
606 |       while (val.second != 0)
607 |       {
608 |         pair<const char*, size_t> ch (nullptr, 0);
609 | 
610 |         if (cap != 0)
611 |           ch = check ? chunk_checked () : chunk ();
612 | 
613 |         if (ch.first == nullptr)
614 |         {
615 |           // The minimum extra bytes we need the overflow function to be able
616 |           // to provide is based on these sequences that we do not break:
617 |           //
618 |           // - 4 bytes for a UTF-8 sequence
619 |           // - 6 bytes for an escaped Unicode sequence (\uXXXX).
620 |           //
621 |           if (!grow (6, ch.second))
622 |             goto fail_nospace;
623 |         }
624 |         else if (ch.second != string::npos)
625 |           append (ch.first, ch.second);
626 |         else
627 |           goto fail_utf8;
628 |       }
629 | 
630 |       // Write the value's closing quote, if requested.
631 |       //
632 |       if (q != '\0')
633 |       {
634 |         if (cap == 0 && !grow (1))
635 |           goto fail_nospace;
636 | 
637 |         append ("\"", 1);
638 |       }
639 | 
640 |       return;
641 | 
642 |       // Note: keep descriptions consistent with the parser.
643 |       //
644 |     fail_utf8:
645 |       throw invalid_json_output (e,
646 |                                  e == event::name ? error_code::invalid_name
647 |                                                   : error_code::invalid_value,
648 |                                  "invalid UTF-8 text",
649 |                                  vn - val.second);
650 | 
651 |     fail_nospace:
652 |       throw invalid_json_output (
653 |           e, error_code::buffer_overflow, "insufficient space in buffer");
654 |     }
655 | 
656 |     size_t buffer_serializer::
657 |     to_chars_impl (char* b, size_t n, const char* f, ...)
658 |     {
659 |       va_list a;
660 |       va_start (a, f);
661 |       const int r (vsnprintf (b, n, f, a));
662 |       va_end (a);
663 | 
664 |       if (r < 0 || r >= static_cast<int> (n))
665 |       {
666 |         throw invalid_json_output (event::number,
667 |                                    error_code::invalid_value,
668 |                                    "unable to convert number to string");
669 |       }
670 | 
671 |       return static_cast<size_t> (r);
672 |     }
673 |   }
674 | }
675 | 


--------------------------------------------------------------------------------
/libstud/json/serializer.hxx:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <array>
  4 | #include <iosfwd>
  5 | #include <string>
  6 | #include <vector>
  7 | #include <cstddef>     // size_t, nullptr_t
  8 | #include <utility>     // pair
  9 | #include <stdexcept>   // invalid_argument
 10 | #include <type_traits> // enable_if, is_*
 11 | 
 12 | #include <libstud/optional.hxx> // stud::optional is std::optional or similar.
 13 | 
 14 | #include <libstud/json/event.hxx>
 15 | 
 16 | #include <libstud/json/export.hxx>
 17 | 
 18 | namespace stud
 19 | {
 20 |   // Using the RFC8259 terminology: JSON (output) text, JSON value, object
 21 |   // member.
 22 |   //
 23 |   namespace json
 24 |   {
 25 |     class invalid_json_output: public std::invalid_argument
 26 |     {
 27 |     public:
 28 |       using event_type = json::event;
 29 | 
 30 |       enum class error_code
 31 |       {
 32 |         buffer_overflow,
 33 |         unexpected_event,
 34 |         invalid_name,
 35 |         invalid_value
 36 |       };
 37 | 
 38 |       invalid_json_output (optional<event_type> event,
 39 |                            error_code code,
 40 |                            const char* description,
 41 |                            std::size_t offset = std::string::npos);
 42 | 
 43 |       invalid_json_output (optional<event_type> event,
 44 |                            error_code code,
 45 |                            const std::string& description,
 46 |                            std::size_t offset = std::string::npos);
 47 | 
 48 |       // Event that triggered the error. If the error is in the value, then
 49 |       // offset points to the offending byte (for example, the beginning of an
 50 |       // invalid UTF-8 byte sequence). Otherwise, offset is string::npos.
 51 |       //
 52 |       optional<event_type> event;
 53 |       error_code           code;
 54 |       std::size_t          offset;
 55 |     };
 56 | 
 57 |     // The serializer makes sure the resulting JSON is syntactically but not
 58 |     // necessarily semantically correct. For example, it's possible to
 59 |     // serialize a number event with non-numeric data.
 60 |     //
 61 |     // Note that unlike the parser, the serializer is always in the multi-
 62 |     // value mode allowing the serialization of zero or more values. Note also
 63 |     // that while values are by default separated with newlines, there is no
 64 |     // trailing newline after the last (or only) value and the user is
 65 |     // expected to add it manually, if needed.
 66 |     //
 67 |     // Also note that while RFC8259 recommends object members to have unique
 68 |     // names, the serializer does not enforce this.
 69 |     //
 70 |     class LIBSTUD_JSON_SYMEXPORT buffer_serializer
 71 |     {
 72 |     public:
 73 |       // Serialize to string growing it as necessary. Note that the result is
 74 |       // appended to any existing data in the string.
 75 |       //
 76 |       // The indentation argument specifies the number of indentation spaces
 77 |       // that should be used for pretty-printing. If 0 is passed, no
 78 |       // pretty-printing is performed.
 79 |       //
 80 |       // The multi_value_separator argument specifies the character sequence
 81 |       // to use to separate multiple top-level values. NULL or empty string
 82 |       // means no separator. Note that it is kept as a reference and so must
 83 |       // outlive the serializer instance.
 84 |       //
 85 |       explicit
 86 |       buffer_serializer (std::string&,
 87 |                          std::size_t indentation = 2,
 88 |                          const char* multi_value_separator = "\n");
 89 | 
 90 |       // Serialize to vector of characters growing it as necessary. Note that
 91 |       // the result is appended to any existing data in the vector.
 92 |       //
 93 |       explicit
 94 |       buffer_serializer (std::vector<char>&,
 95 |                          std::size_t indentation = 2,
 96 |                          const char* multi_value_separator = "\n");
 97 | 
 98 |       // Serialize to a fixed array.
 99 |       //
100 |       // The length of the output text written is tracked in the size
101 |       // argument.
102 |       //
103 |       // If the array is not big enough to store the entire output text, the
104 |       // next() call that reaches the limit will throw invalid_json_output.
105 |       //
106 |       template <std::size_t N>
107 |       buffer_serializer (std::array<char, N>&, std::size_t& size,
108 |                          std::size_t indentation = 2,
109 |                          const char* multi_value_separator = "\n");
110 | 
111 |       // Serialize to a fixed buffer.
112 |       //
113 |       // The length of the output text written is tracked in the size
114 |       // argument.
115 |       //
116 |       // If the buffer is not big enough to store the entire output text, the
117 |       // next() call that reaches the limit will throw invalid_json_output.
118 |       //
119 |       buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
120 |                          std::size_t indentation = 2,
121 |                          const char* multi_value_separator = "\n");
122 | 
123 |       // The overflow function is called when the output buffer is out of
124 |       // space. The extra argument is a hint indicating the extra space likely
125 |       // to be required.
126 |       //
127 |       // Possible strategies include re-allocating a larger buffer or flushing
128 |       // the contents of the original buffer to the output destination. In
129 |       // case of a reallocation, the implementation is responsible for copying
130 |       // the contents of the original buffer over.
131 |       //
132 |       // The flush function is called when the complete JSON value has been
133 |       // serialized to the buffer. It can be used to write the contents of the
134 |       // buffer to the output destination. Note that flush is not called after
135 |       // the second absent (nullopt) event (or the only absent event; see
136 |       // next() for details).
137 |       //
138 |       // Both functions are passed the original buffer, its size (the amount
139 |       // of output text), and its capacity. They return (by modifying the
140 |       // argument) the replacement buffer and its size and capacity (these may
141 |       // refer to the original buffer). If space cannot be made available, the
142 |       // implementation can throw an appropriate exception (for example,
143 |       // std::bad_alloc or std::ios_base::failure). Any exceptions thrown is
144 |       // propagated to the user.
145 |       //
146 |       struct buffer
147 |       {
148 |         void*        data;
149 |         std::size_t& size;
150 |         std::size_t  capacity;
151 |       };
152 | 
153 |       using overflow_function = void (void* data,
154 |                                       event,
155 |                                       buffer&,
156 |                                       std::size_t extra);
157 |       using flush_function    = void (void* data, event, buffer&);
158 | 
159 |       // Serialize using a custom buffer and overflow/flush functions (both
160 |       // are optional).
161 |       //
162 |       buffer_serializer (void* buf, std::size_t capacity,
163 |                          overflow_function*,
164 |                          flush_function*,
165 |                          void* data,
166 |                          std::size_t indentation = 2,
167 |                          const char* multi_value_separator = "\n");
168 | 
169 |       // As above but the length of the output text written is tracked in the
170 |       // size argument.
171 |       //
172 |       buffer_serializer (void* buf, std::size_t& size, std::size_t capacity,
173 |                          overflow_function*,
174 |                          flush_function*,
175 |                          void* data,
176 |                          std::size_t indentation = 2,
177 |                          const char* multi_value_separator = "\n");
178 | 
179 |       // Begin/end an object.
180 |       //
181 |       // The member_begin_object() version is a shortcut for:
182 |       //
183 |       //     member_name (name, check);
184 |       //     begin_object ();
185 |       //
186 |       void
187 |       begin_object ();
188 | 
189 |       void
190 |       member_begin_object (const char*, bool check = true);
191 | 
192 |       void
193 |       member_begin_object (const std::string&, bool check = true);
194 | 
195 |       void
196 |       end_object ();
197 | 
198 |       // Serialize an object member (name and value).
199 |       //
200 |       // If check is false, then don't check whether the name (or value, if
201 |       // it's a string) is valid UTF-8 and don't escape any characters.
202 |       //
203 |       template <typename T>
204 |       void
205 |       member (const char* name, const T& value, bool check = true);
206 | 
207 |       template <typename T>
208 |       void
209 |       member (const std::string& name, const T& value, bool check = true);
210 | 
211 |       // Serialize an object member name.
212 |       //
213 |       // If check is false, then don't check whether the name is valid UTF-8
214 |       // and don't escape any characters.
215 |       //
216 |       void
217 |       member_name (const char*, bool check = true);
218 | 
219 |       void
220 |       member_name (const std::string&, bool check = true);
221 | 
222 |       // Begin/end an array.
223 |       //
224 |       // The member_begin_array() version is a shortcut for:
225 |       //
226 |       //     member_name (name, check);
227 |       //     begin_array ();
228 |       //
229 |       void
230 |       begin_array ();
231 | 
232 |       void
233 |       member_begin_array (const char*, bool check = true);
234 | 
235 |       void
236 |       member_begin_array (const std::string&, bool check = true);
237 | 
238 |       void
239 |       end_array ();
240 | 
241 |       // Serialize a string.
242 |       //
243 |       // If check is false, then don't check whether the value is valid UTF-8
244 |       // and don't escape any characters.
245 |       //
246 |       // Note that a NULL C-string pointer is serialized as a null value.
247 |       //
248 |       void
249 |       value (const char*, bool check = true);
250 | 
251 |       void
252 |       value (const std::string&, bool check = true);
253 | 
254 |       // Serialize a number.
255 |       //
256 |       template <typename T>
257 |       typename std::enable_if<std::is_integral<T>::value ||
258 |                               std::is_floating_point<T>::value>::type
259 |       value (T);
260 | 
261 |       // Serialize a boolean value.
262 |       //
263 |       void
264 |       value (bool);
265 | 
266 |       // Serialize a null value.
267 |       //
268 |       void
269 |       value (std::nullptr_t);
270 | 
271 |       // Serialize value as a pre-serialized JSON value.
272 |       //
273 |       // Note that the value is expected to be a valid (and suitable) UTF-8-
274 |       // encoded JSON text. Note also that if pretty-printing is enabled,
275 |       // the resulting output may not be correctly indented.
276 |       //
277 |       void
278 |       value_json_text (const char*);
279 | 
280 |       void
281 |       value_json_text (const std::string&);
282 | 
283 |       // Serialize next JSON event.
284 |       //
285 |       // If check is false, then don't check whether the value is valid UTF-8
286 |       // and don't escape any characters.
287 |       //
288 |       // Return true if more events are required to complete the (top-level)
289 |       // value (that is, it is currently incomplete) and false otherwise.
290 |       // Throw invalid_json_output exception in case of an invalid event or
291 |       // value.
292 |       //
293 |       // At the end of the value an optional absent (nullopt) event can be
294 |       // serialized to verify the value is complete. If it is incomplete an
295 |       // invalid_json_output exception is thrown. An optional followup absent
296 |       // event can be serialized to indicate the completion of a multi-value
297 |       // sequence (one and only absent event indicates a zero value sequence).
298 |       // If anything is serialized to a complete value sequence an
299 |       // invalid_json_output exception is thrown.
300 |       //
301 |       // Note that this function was designed to be easily invoked with the
302 |       // output from parser::next() and parser::data(). For example, for a
303 |       // single-value mode:
304 |       //
305 |       //   optional<event> e;
306 |       //   do
307 |       //   {
308 |       //     e = p.next ();
309 |       //     s.next (e, p.data ());
310 |       //   }
311 |       //   while (e);
312 |       //
313 |       // For a multi-value mode:
314 |       //
315 |       //   while (p.peek ())
316 |       //   {
317 |       //     optional<event> e;
318 |       //     do
319 |       //     {
320 |       //       e = p.next ();
321 |       //       s.next (e, p.data ());
322 |       //     }
323 |       //     while (e);
324 |       //   }
325 |       //   s.next (nullopt); // End of value sequence.
326 |       //
327 |       bool
328 |       next (optional<event> event,
329 |             std::pair<const char*, std::size_t> value = {},
330 |             bool check = true);
331 | 
332 |     private:
333 |       void
334 |       write (event,
335 |              std::pair<const char*, std::size_t> sep,
336 |              std::pair<const char*, std::size_t> val,
337 |              bool check, char quote = '\0');
338 | 
339 |       // Forward a value(v, check) call to value(v) ignoring the check
340 |       // argument. Used in the member() implementation.
341 |       //
342 |       template <typename T>
343 |       void
344 |       value (const T& v, bool /*check*/)
345 |       {
346 |         value (v);
347 |       }
348 | 
349 |       // Convert numbers to string.
350 |       //
351 |       static std::size_t to_chars (char*, std::size_t, int);
352 |       static std::size_t to_chars (char*, std::size_t, long);
353 |       static std::size_t to_chars (char*, std::size_t, long long);
354 |       static std::size_t to_chars (char*, std::size_t, unsigned int);
355 |       static std::size_t to_chars (char*, std::size_t, unsigned long);
356 |       static std::size_t to_chars (char*, std::size_t, unsigned long long);
357 |       static std::size_t to_chars (char*, std::size_t, double);
358 |       static std::size_t to_chars (char*, std::size_t, long double);
359 | 
360 |       static std::size_t to_chars_impl (char*, size_t, const char* fmt, ...);
361 | 
362 |       buffer buf_;
363 |       std::size_t size_;
364 |       overflow_function* overflow_;
365 |       flush_function* flush_;
366 |       void* data_;
367 | 
368 |       // State of a "structured type" (array or object; as per the RFC
369 |       // terminology).
370 |       //
371 |       struct state
372 |       {
373 |         const event type;  // Type kind (begin_array or begin_object).
374 |         std::size_t count; // Number of events serialized inside this type.
375 |       };
376 | 
377 |       // Stack of nested structured type states.
378 |       //
379 |       // @@ TODO: would have been nice to use small_vector.
380 |       //
381 |       std::vector<state> state_;
382 | 
383 |       // The number of consecutive absent events (nullopt) serialized thus
384 |       // far.
385 |       //
386 |       // Note: initialized to 1 to naturally handle a single absent event
387 |       // (declares an empty value sequence complete).
388 |       //
389 |       std::size_t absent_ = 1;
390 | 
391 |       // The number of spaces with which to indent (once for each level of
392 |       // nesting). If zero, pretty-printing is disabled.
393 |       //
394 |       std::size_t indent_;
395 | 
396 |       // Separator and indentation before/after value inside an object or
397 |       // array (see pretty-printing implementation for details).
398 |       //
399 |       std::string sep_;
400 | 
401 |       // The number of complete top-level values serialized thus far.
402 |       //
403 |       std::size_t values_ = 0;
404 | 
405 |       // Multi-value separator.
406 |       //
407 |       const char* mv_separator_;
408 |     };
409 | 
410 |     class LIBSTUD_JSON_SYMEXPORT stream_serializer: public buffer_serializer
411 |     {
412 |     public:
413 |       // Serialize to std::ostream.
414 |       //
415 |       // If stream exceptions are enabled then the std::ios_base::failure
416 |       // exception is used to report input/output errors (badbit and failbit).
417 |       // Otherwise, those are reported as the invalid_json_output exception.
418 |       //
419 |       explicit
420 |       stream_serializer (std::ostream&,
421 |                          std::size_t indentation = 2,
422 |                          const char* multi_value_separator = "\n");
423 | 
424 |     protected:
425 |       char tmp_[4096];
426 |     };
427 |   }
428 | }
429 | 
430 | #include <libstud/json/serializer.ixx>
431 | 


--------------------------------------------------------------------------------
/libstud/json/serializer.ixx:
--------------------------------------------------------------------------------
  1 | #include <cstring> // strlen()
  2 | 
  3 | namespace stud
  4 | {
  5 |   namespace json
  6 |   {
  7 |     inline invalid_json_output::
  8 |     invalid_json_output (optional<event_type> e,
  9 |                          error_code c,
 10 |                          const char* d,
 11 |                          std::size_t o)
 12 |         : std::invalid_argument (d), event (e), code (c), offset (o)
 13 |     {
 14 |     }
 15 | 
 16 |     inline invalid_json_output::
 17 |     invalid_json_output (optional<event_type> e,
 18 |                          error_code c,
 19 |                          const std::string& d,
 20 |                          std::size_t o)
 21 |         : invalid_json_output (e, c, d.c_str (), o)
 22 |     {
 23 |     }
 24 | 
 25 |     inline buffer_serializer::
 26 |     buffer_serializer (void* b, std::size_t& s, std::size_t c,
 27 |                        overflow_function* o, flush_function* f, void* d,
 28 |                        std::size_t i, const char* mvs)
 29 |         : buf_ {b, s, c},
 30 |           overflow_ (o),
 31 |           flush_ (f),
 32 |           data_ (d),
 33 |           indent_ (i),
 34 |           sep_ (indent_ != 0 ? ",\n" : ""),
 35 |           mv_separator_ (mvs)
 36 |     {
 37 |     }
 38 | 
 39 |     template <std::size_t N>
 40 |     inline buffer_serializer::
 41 |     buffer_serializer (std::array<char, N>& a, std::size_t& s,
 42 |                        std::size_t i, const char* mvs)
 43 |         : buffer_serializer (a.data (), s, a.size (),
 44 |                              nullptr, nullptr, nullptr,
 45 |                              i, mvs)
 46 |     {
 47 |     }
 48 | 
 49 |     inline buffer_serializer::
 50 |     buffer_serializer (void* b, std::size_t& s, std::size_t c,
 51 |                        std::size_t i, const char* mvs)
 52 |         : buffer_serializer (b, s, c, nullptr, nullptr, nullptr, i, mvs)
 53 |     {
 54 |     }
 55 | 
 56 |     inline buffer_serializer::
 57 |     buffer_serializer (void* b, std::size_t c,
 58 |                        overflow_function* o, flush_function* f, void* d,
 59 |                        std::size_t i, const char* mvs)
 60 |         : buffer_serializer (b, size_, c, o, f, d, i, mvs)
 61 |     {
 62 |       size_ = 0;
 63 |     }
 64 | 
 65 |     inline void buffer_serializer::
 66 |     begin_object ()
 67 |     {
 68 |       next (event::begin_object);
 69 |     }
 70 | 
 71 |     inline void buffer_serializer::
 72 |     end_object ()
 73 |     {
 74 |       next (event::end_object);
 75 |     }
 76 | 
 77 |     inline void buffer_serializer::
 78 |     member_name (const char* n, bool c)
 79 |     {
 80 |       next (event::name, {n, n != nullptr ? std::strlen (n) : 0}, c);
 81 |     }
 82 | 
 83 |     inline void buffer_serializer::
 84 |     member_name (const std::string& n, bool c)
 85 |     {
 86 |       next (event::name, {n.c_str (), n.size ()}, c);
 87 |     }
 88 | 
 89 |     inline void buffer_serializer::
 90 |     member_begin_object (const char* n, bool c)
 91 |     {
 92 |       member_name (n, c);
 93 |       begin_object ();
 94 |     }
 95 | 
 96 |     inline void buffer_serializer::
 97 |     member_begin_object (const std::string& n, bool c)
 98 |     {
 99 |       member_name (n, c);
100 |       begin_object ();
101 |     }
102 | 
103 |     template <typename T>
104 |     inline void buffer_serializer::
105 |     member (const char* n, const T& v, bool c)
106 |     {
107 |       member_name (n, c);
108 |       value (v, c);
109 |     }
110 | 
111 |     template <typename T>
112 |     inline void buffer_serializer::
113 |     member (const std::string& n, const T& v, bool c)
114 |     {
115 |       member_name (n, c);
116 |       value (v, c);
117 |     }
118 | 
119 |     inline void buffer_serializer::
120 |     begin_array ()
121 |     {
122 |       next (event::begin_array);
123 |     }
124 | 
125 |     inline void buffer_serializer::
126 |     member_begin_array (const char* n, bool c)
127 |     {
128 |       member_name (n, c);
129 |       begin_array ();
130 |     }
131 | 
132 |     inline void buffer_serializer::
133 |     member_begin_array (const std::string& n, bool c)
134 |     {
135 |       member_name (n, c);
136 |       begin_array ();
137 |     }
138 | 
139 |     inline void buffer_serializer::
140 |     end_array ()
141 |     {
142 |       next (event::end_array);
143 |     }
144 | 
145 |     inline void buffer_serializer::
146 |     value (const char* v, bool c)
147 |     {
148 |       if (v != nullptr)
149 |         next (event::string, {v, std::strlen (v)}, c);
150 |       else
151 |         next (event::null);
152 |     }
153 | 
154 |     inline void buffer_serializer::
155 |     value (const std::string& v, bool c)
156 |     {
157 |       next (event::string, {v.c_str (), v.size ()}, c);
158 |     }
159 | 
160 |     template <typename T>
161 |     typename std::enable_if<std::is_integral<T>::value ||
162 |                             std::is_floating_point<T>::value>::type
163 |     buffer_serializer::
164 |     value (T v)
165 |     {
166 |       // The largest 128-bit integer has 39 digits, and long floating point
167 |       // numbers will fit because they are output in scientific notation.
168 |       //
169 |       char b[40];
170 |       const std::size_t n (to_chars (b, sizeof (b), v));
171 |       next (event::number, {b, n});
172 |     }
173 | 
174 |     inline void buffer_serializer::
175 |     value (bool b)
176 |     {
177 |       next (event::boolean,
178 |             b ? std::make_pair ("true", 4) : std::make_pair ("false", 5));
179 |     }
180 | 
181 |     inline void buffer_serializer::
182 |     value (std::nullptr_t)
183 |     {
184 |       next (event::null);
185 |     }
186 | 
187 |     inline void buffer_serializer::
188 |     value_json_text (const char* v)
189 |     {
190 |       // Use event::number (which doesn't involve any quoting) with a disabled
191 |       // check.
192 |       //
193 |       next (event::number, {v, std::strlen (v)}, false /* check */);
194 |     }
195 | 
196 |     inline void buffer_serializer::
197 |     value_json_text (const std::string& v)
198 |     {
199 |       next (event::number, {v.c_str (), v.size ()}, false /* check */);
200 |     }
201 | 
202 |     inline size_t buffer_serializer::
203 |     to_chars (char* b, size_t s, int v)
204 |     {
205 |       return to_chars_impl (b, s, "%d", v);
206 |     }
207 | 
208 |     inline size_t buffer_serializer::
209 |     to_chars (char* b, size_t s, long v)
210 |     {
211 |       return to_chars_impl (b, s, "%ld", v);
212 |     }
213 | 
214 |     inline size_t buffer_serializer::
215 |     to_chars (char* b, size_t s, long long v)
216 |     {
217 |       return to_chars_impl (b, s, "%lld", v);
218 |     }
219 | 
220 |     inline size_t buffer_serializer::
221 |     to_chars (char* b, size_t s, unsigned v)
222 |     {
223 |       return to_chars_impl (b, s, "%u", v);
224 |     }
225 | 
226 |     inline size_t buffer_serializer::
227 |     to_chars (char* b, size_t s, unsigned long v)
228 |     {
229 |       return to_chars_impl (b, s, "%lu", v);
230 |     }
231 | 
232 |     inline size_t buffer_serializer::
233 |     to_chars (char* b, size_t s, unsigned long long v)
234 |     {
235 |       return to_chars_impl (b, s, "%llu", v);
236 |     }
237 | 
238 |     inline size_t buffer_serializer::
239 |     to_chars (char* b, size_t s, double v)
240 |     {
241 |       return to_chars_impl (b, s, "%.10g", v);
242 |     }
243 | 
244 |     inline size_t buffer_serializer::
245 |     to_chars (char* b, size_t s, long double v)
246 |     {
247 |       return to_chars_impl (b, s, "%.10Lg", v);
248 |     }
249 |   }
250 | }
251 | 


--------------------------------------------------------------------------------
/libstud/json/version.hxx.in:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // The numeric version format is AAAAABBBBBCCCCCDDDE where:
 4 | //
 5 | // AAAAA - major version number
 6 | // BBBBB - minor version number
 7 | // CCCCC - bugfix version number
 8 | // DDD   - alpha / beta (DDD + 500) version number
 9 | // E     - final (0) / snapshot (1)
10 | //
11 | // When DDDE is not 0, 1 is subtracted from AAAAABBBBBCCCCC. For example:
12 | //
13 | // Version      AAAAABBBBBCCCCCDDDE
14 | //
15 | // 0.1.0        0000000001000000000
16 | // 0.1.2        0000000001000020000
17 | // 1.2.3        0000100002000030000
18 | // 2.2.0-a.1    0000200001999990010
19 | // 3.0.0-b.2    0000299999999995020
20 | // 2.2.0-a.1.z  0000200001999990011
21 | //
22 | #define LIBSTUD_JSON_VERSION       $libstud_json.version.project_number$ULL
23 | #define LIBSTUD_JSON_VERSION_STR   "$libstud_json.version.project$"
24 | #define LIBSTUD_JSON_VERSION_ID    "$libstud_json.version.project_id$"
25 | 
26 | #define LIBSTUD_JSON_VERSION_MAJOR $libstud_json.version.major$
27 | #define LIBSTUD_JSON_VERSION_MINOR $libstud_json.version.minor$
28 | #define LIBSTUD_JSON_VERSION_PATCH $libstud_json.version.patch$
29 | 
30 | #define LIBSTUD_JSON_PRE_RELEASE   $libstud_json.version.pre_release$
31 | 
32 | #define LIBSTUD_JSON_SNAPSHOT_SN   $libstud_json.version.snapshot_sn$ULL
33 | #define LIBSTUD_JSON_SNAPSHOT_ID   "$libstud_json.version.snapshot_id$"
34 | 


--------------------------------------------------------------------------------
/manifest:
--------------------------------------------------------------------------------
 1 | : 1
 2 | name: libstud-json
 3 | project: libstud
 4 | version: 0.5.1-a.0.z
 5 | summary: JSON pull-parser/push-serializer library for C++
 6 | license: MIT ; MIT License.
 7 | topics: C++, JSON, parsing, serialization, streaming
 8 | description-file: README.md
 9 | changes-file: NEWS
10 | url: https://github.com/libstud/libstud-json
11 | email: libstud-authors@build2.org
12 | build-warning-email: libstud-authors@build2.org
13 | builds: all
14 | depends: * build2 >= 0.18.0-
15 | depends: * bpkg >= 0.18.0-
16 | depends: libstud-optional ^1.0.0
17 | 


--------------------------------------------------------------------------------
/repositories.manifest:
--------------------------------------------------------------------------------
1 | : 1
2 | summary: libstud-json project repository
3 | 
4 | :
5 | role: prerequisite
6 | location: ../libstud-optional.git##HEAD
7 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | # Test executables.
2 | #
3 | driver
4 | 
5 | # Testscript output directories (can be symlinks).
6 | #
7 | test
8 | test-*
9 | 


--------------------------------------------------------------------------------
/tests/build/.gitignore:
--------------------------------------------------------------------------------
1 | config.build
2 | root/
3 | bootstrap/
4 | 


--------------------------------------------------------------------------------
/tests/build/bootstrap.build:
--------------------------------------------------------------------------------
1 | project = # Unnamed tests subproject.
2 | 
3 | using version
4 | using config
5 | using test
6 | using dist
7 | 


--------------------------------------------------------------------------------
/tests/build/root.build:
--------------------------------------------------------------------------------
 1 | cxx.std = latest
 2 | 
 3 | using cxx
 4 | 
 5 | hxx{*}: extension = hxx
 6 | ixx{*}: extension = ixx
 7 | txx{*}: extension = txx
 8 | cxx{*}: extension = cxx
 9 | 
10 | if ($cxx.target.system == 'win32-msvc')
11 |   cxx.poptions += -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS
12 | 
13 | if ($cxx.class == 'msvc')
14 |   cxx.coptions += /wd4251 /wd4275 /wd4800
15 | 
16 | # Every exe{} in this subproject is by default a test.
17 | #
18 | exe{*}: test = true
19 | 
20 | # The test target for cross-testing (running tests under Wine, etc).
21 | #
22 | test.target = $cxx.target
23 | 


--------------------------------------------------------------------------------
/tests/buildfile:
--------------------------------------------------------------------------------
1 | ./: {*/ -build/}
2 | 


--------------------------------------------------------------------------------
/tests/parser/basics/basics.testscript:
--------------------------------------------------------------------------------
  1 | : basic
  2 | :
  3 | $* <<EOI >>EOO
  4 | {
  5 |   "string":  "str",
  6 |   "number":  123,
  7 |   "boolean": true,
  8 |   "null":    null,
  9 |   "array":   ["str", 123, false, null],
 10 |   "subobj":  {"foo": "bar"}
 11 | }
 12 | EOI
 13 |   1,  1: {
 14 |   2,  3:   string
 15 |   2, 14:   "str"
 16 |   3,  3:   number
 17 |   3, 14:   123
 18 |   4,  3:   boolean
 19 |   4, 14:   true
 20 |   5,  3:   null
 21 |   5, 14:   NULL
 22 |   6,  3:   array
 23 |   6, 14:   [
 24 |   6, 15:     "str"
 25 |   6, 22:     123
 26 |   6, 27:     false
 27 |   6, 34:     NULL
 28 |   6, 38:   ]
 29 |   7,  3:   subobj
 30 |   7, 14:   {
 31 |   7, 15:     foo
 32 |   7, 22:     "bar"
 33 |   7, 27:   }
 34 |   8,  1: }
 35 | EOO
 36 | 
 37 | : empty
 38 | :
 39 | $* 2>>EOE != 0
 40 | <stdin>:1:1: error: unexpected end of text
 41 | EOE
 42 | 
 43 | : fail-exception
 44 | :
 45 | $* --fail-exc <'{' >'  1,  1: {' 2>>EOE != 0
 46 | error: unable to read from stdin
 47 | EOE
 48 | 
 49 | : fail-bit
 50 | :
 51 | $* --fail-bit <'{' >'  1,  1: {' 2>>EOE != 0
 52 | <stdin>:1:1: error: unable to read JSON input text
 53 | EOE
 54 | 
 55 | : value-sint
 56 | :
 57 | {{
 58 |   : max
 59 |   :
 60 |   $* i <'2147483647' >'  1,  1: 2147483647'
 61 | 
 62 |   : min
 63 |   :
 64 |   $* i <'-2147483648' >'  1,  1: -2147483648'
 65 | 
 66 |   : fail-min
 67 |   :
 68 |   $* i <'-2147483649' 2>>EOE != 0
 69 |   <stdin>:1:1: error: invalid signed integer value: '-2147483649'
 70 |   EOE
 71 | 
 72 |   : fail-max
 73 |   :
 74 |   $* i <'2147483648' 2>>EOE != 0
 75 |   <stdin>:1:1: error: invalid signed integer value: '2147483648'
 76 |   EOE
 77 | }}
 78 | 
 79 | : value-uint
 80 | :
 81 | {{
 82 |   : max
 83 |   :
 84 |   $* u <'4294967295' >'  1,  1: 4294967295'
 85 | 
 86 |   : fail-max
 87 |   :
 88 |   $* u <'4294967296' 2>>EOE != 0
 89 |   <stdin>:1:1: error: invalid unsigned integer value: '4294967296'
 90 |   EOE
 91 | }}
 92 | 
 93 | # Testing floating point values is a pain. So we only check the success cases
 94 | # trusting the strto*() functions to detect range issues as expected.
 95 | #
 96 | : value-float
 97 | :
 98 | $* f <'0.123' >~'/  1,  1: 0.123[0-9]*/'
 99 | 
100 | : value-double
101 | :
102 | $* d <'0.1234' >~'/  1,  1: 0.1234[0-9]*/'
103 | 
104 | : value-long-double
105 | :
106 | $* l <'0.12345' >~'/  1,  1: 0.12345[0-9]*/'
107 | 
108 | # Test a bunch of corner cases.
109 | #
110 | : leading-zero
111 | :
112 | $* <'01' >'  1,  1: 0' 2>>EOE != 0
113 | <stdin>:1:2: error: expected end of text instead of byte '1'
114 | EOE
115 | 
116 | : wrong-letter
117 | :
118 | $* <'trux' 2>>EOE != 0
119 | <stdin>:1:4: error: expected 'e' instead of byte 'x'
120 | EOE
121 | 
122 | : error-within-value
123 | : Checks that, in case of invalid multi-character token, the column number
124 | : points to the errant character instead of the beginning of the token.
125 | :
126 | $* <'12a45' >'  1,  1: 12' 2>>EOE != 0
127 | <stdin>:1:3: error: expected end of text instead of byte 'a'
128 | EOE
129 | 
130 | : end-of-text
131 | :
132 | {{
133 |   # Note: suppress trailing newline in input.
134 | 
135 |   : array
136 |   :
137 |   $* <:'[2' >- 2>>EOE != 0
138 |   <stdin>:1:2: error: unexpected end of text
139 |   EOE
140 | 
141 |   : literal
142 |   :
143 |   $* <:'tru' 2>>EOE != 0
144 |   <stdin>:1:3: error: expected 'e' instead of end of text
145 |   EOE
146 | }}
147 | 
148 | : utf-8
149 | :
150 | {{
151 |   : 2-byte
152 |   :
153 |   {{
154 |     : literal
155 |     :
156 |     $* <'"¢"' >'  1,  1: "¢"'
157 | 
158 |     : escaped
159 |     :
160 |     $* <'"\u00A2"' >'  1,  1: "¢"'
161 |   }}
162 | 
163 |   : 3-byte
164 |   :
165 |   {{
166 |     : literal
167 |     :
168 |     $* <'"ह"' >'  1,  1: "ह"'
169 | 
170 |     : escaped
171 |     :
172 |     $* <'"\u0939"' >'  1,  1: "ह"'
173 |   }}
174 | 
175 |   : 2-and-3-byte
176 |   :
177 |   $* <'"ह\u00A2¢\u0939"' >'  1,  1: "ह¢¢ह"'
178 | 
179 |   : column-numbers
180 |   : Checks that literal/directly-embedded multi-byte codepoints occupy only a
181 |   : single column and that escaped codepoints occupy as many columns as there
182 |   : are characters in the escape sequence.
183 |   :
184 |   $* <'["ह","¢","\u20AC"]' >>EOO
185 |     1,  1: [
186 |     1,  2:   "ह"
187 |     1,  6:   "¢"
188 |     1, 10:   "€"
189 |     1, 18: ]
190 |   EOO
191 | 
192 |   : Invalid byte
193 |   :
194 |   $* <'"\u0MA2"' 2>>EOE != 0
195 |   <stdin>:1:5: error: invalid escape Unicode byte 'M'
196 |   EOE
197 | }}
198 | 


--------------------------------------------------------------------------------
/tests/parser/basics/buildfile:
--------------------------------------------------------------------------------
 1 | import libs = libstud-json%lib{stud-json}
 2 | 
 3 | ./: exe{driver}: {cxx}{driver} $libs
 4 | 
 5 | # Run the tests twice, once as is and another time with a pre-peek of every
 6 | # token (as an extra test for the peek logic). If/when we have support for a
 7 | # for-loop in Testscript we can handle this cleanly there. For now we use this
 8 | # alias trick (or hack, if you wish).
 9 | #
10 | exe{driver}: test = false
11 | 
12 | ./: alias{default peek}: exe{driver} testscript{*}
13 | {
14 |   test = exe{driver}
15 | }
16 | 
17 | alias{peek}: test.options += --peek
18 | 


--------------------------------------------------------------------------------
/tests/parser/basics/driver.cxx:
--------------------------------------------------------------------------------
  1 | // Usage: argv[0] [--multi[=<sep>]] [--peek] --fail-exc|--fail-bit|[<mode>]
  2 | //
  3 | // --multi=<sep> -- enable multi-value mode with the specified separators
  4 | // --peek        -- pre-peek every token before parsing (must come first)
  5 | // --fail-exc    -- fail due to istream exception
  6 | // --fail-bit    -- fail due to istream badbit
  7 | // <mode>        -- numeric value parsing mode: i|u|f|d|l|
  8 | 
  9 | #include <cstdint>
 10 | #include <iostream>
 11 | #include <iomanip>
 12 | 
 13 | #include <libstud/json/parser.hxx>
 14 | 
 15 | #undef NDEBUG
 16 | #include <cassert>
 17 | 
 18 | using namespace std;
 19 | namespace json = stud::json;
 20 | 
 21 | static string
 22 | number (const string& m, json::parser& p)
 23 | {
 24 |   if (m ==  "") return p.value ();
 25 |   if (m == "i") return to_string (p.value<int32_t> ());
 26 |   if (m == "u") return to_string (p.value<uint32_t> ());
 27 |   if (m == "f") return to_string (p.value<float> ());
 28 |   if (m == "d") return to_string (p.value<double> ());
 29 |   if (m == "l") return to_string (p.value<long double> ());
 30 | 
 31 |   assert (false);
 32 |   return "";
 33 | }
 34 | 
 35 | int main (int argc, const char* argv[])
 36 | {
 37 |   bool multi (false);
 38 |   const char* sep (nullptr);
 39 |   bool peek (false);
 40 |   bool fail_exc (false);
 41 |   bool fail_bit (false);
 42 | 
 43 |   string nm;
 44 |   for (int i (1); i < argc; ++i)
 45 |   {
 46 |     string o (argv[i]);
 47 | 
 48 |     if (o.compare (0, 7, "--multi") == 0)
 49 |     {
 50 |       multi = true;
 51 |       if (o.size () > 7)
 52 |         sep = argv[i] + 8;
 53 |       continue;
 54 |     }
 55 | 
 56 |     if (o == "--peek")
 57 |     {
 58 |       peek = true;
 59 |       continue;
 60 |     }
 61 | 
 62 |     if      (o == "--fail-exc") fail_exc = true;
 63 |     else if (o == "--fail-bit") fail_bit = true;
 64 |     else nm = move (o);
 65 |     break; // One of these should be last.
 66 |   }
 67 | 
 68 |   try
 69 |   {
 70 |     using namespace json;
 71 | 
 72 |     // It's not easy to cause the stream to fail when called by the parser.
 73 |     // So we will fail on EOF as the next best thing.
 74 |     //
 75 |     if (!fail_bit)
 76 |       cin.exceptions (istream::badbit  |
 77 |                       istream::failbit |
 78 |                       (fail_exc ? istream::eofbit : istream::goodbit));
 79 | 
 80 |     parser p (cin, "<stdin>", multi, sep);
 81 |     size_t i (0); // Indentation.
 82 | 
 83 |     cout << right << setfill (' '); // Line number formatting.
 84 | 
 85 |     auto process_event = [&p, &i, nm, fail_bit] (event e)
 86 |     {
 87 |       size_t j (i);
 88 |       string s;
 89 | 
 90 |       switch (e)
 91 |       {
 92 |       case event::begin_object: s = "{";     i += 2;                     break;
 93 |       case event::end_object:   s = "}"; j = i -= 2;                     break;
 94 |       case event::begin_array:  s = "[";     i += 2;                     break;
 95 |       case event::end_array:    s = "]"; j = i -= 2;                     break;
 96 |       case event::name:         s = p.name ();                           break;
 97 |       case event::string:       s = '"' + p.value () + '"';              break;
 98 |       case event::number:       s = number (nm, p);                      break;
 99 |       case event::boolean:      s = p.value<bool> () ? "true" : "false"; break;
100 |       case event::null:         s = "NULL";                              break;
101 |       }
102 | 
103 |       cout << setw (3) << p.line () << "," << setw (3) << p.column () << ": "
104 |            << string (j, ' ') << s << '\n';
105 | 
106 |       if (fail_bit)
107 |         cin.setstate (istream::badbit);
108 |     };
109 | 
110 |     // Use the "canonical" parsing code for both modes.
111 |     //
112 |     if (!multi)
113 |     {
114 |       if (peek)
115 |         p.peek ();
116 | 
117 |       for (event e: p)
118 |       {
119 |         process_event (e);
120 | 
121 |         if (peek)
122 |           p.peek ();
123 |       }
124 |     }
125 |     else
126 |     {
127 |       while (p.peek ())
128 |         for (event e: p)
129 |         {
130 |           process_event (e);
131 | 
132 |           if (peek)
133 |             p.peek ();
134 |         }
135 |     }
136 | 
137 |     return 0;
138 |   }
139 |   catch (const json::invalid_json_input& e)
140 |   {
141 |     cerr << e.name << ':' << e.line << ':' << e.column << ": error: "
142 |          << e.what () << endl;
143 |   }
144 |   catch (const istream::failure&)
145 |   {
146 |     cerr << "error: unable to read from stdin" << endl;
147 |   }
148 | 
149 |   return 1;
150 | }
151 | 


--------------------------------------------------------------------------------
/tests/parser/basics/multi-value.testscript:
--------------------------------------------------------------------------------
  1 | : empty-input
  2 | :
  3 | $* --multi=" " <''
  4 | 
  5 | : single-value
  6 | :
  7 | $* --multi=" " <'1' >'  1,  1: 1'
  8 | 
  9 | : extra-whitespace-ignored
 10 | : Extra JSON whitespace (newlines in this case) is ignored before first value,
 11 | : between values, and after last value.
 12 | :
 13 | $* --multi=" " <<EOI >>EOO
 14 | 
 15 |  1
 16 |  2
 17 | 
 18 | EOI
 19 |   2,  2: 1
 20 |   3,  2: 2
 21 | EOO
 22 | 
 23 | : null-separators
 24 | : Zero or more JSON whitespaces should be accepted.
 25 | :
 26 | {{
 27 |   test.options += --multi
 28 | 
 29 |   : valid
 30 |   : One unseparated pair, one separated pair.
 31 |   :
 32 |   $* <'"1""2" 3' >>EOO
 33 |     1,  1: "1"
 34 |     1,  4: "2"
 35 |     1,  8: 3
 36 |   EOO
 37 | 
 38 |   : invalid-separator
 39 |   : Non-whitespace separator.
 40 |   :
 41 |   $* <'1@2' >'  1,  1: 1' 2>>EOE != 0
 42 |   <stdin>:1:2: error: unexpected byte '@' in value
 43 |   EOE
 44 | }}
 45 | 
 46 | : any-whitespace
 47 | : One or more JSON whitespaces should be accepted.
 48 | :
 49 | {{
 50 |   test.options += --multi=
 51 | 
 52 |   : valid
 53 |   :
 54 |   $* <<EOI >>EOO
 55 |   1 2
 56 |    3
 57 |   EOI
 58 |     1,  1: 1
 59 |     1,  3: 2
 60 |     2,  2: 3
 61 |   EOO
 62 | 
 63 |   : invalid-separator
 64 |   :
 65 |   $* <'1@2' >'  1,  1: 1' 2>>EOE != 0
 66 |   <stdin>:1:2: error: missing separator between JSON values
 67 |   EOE
 68 | 
 69 |   : unseparated
 70 |   :
 71 |   $* <'"1""2"' >'  1,  1: "1"' 2>>EOE != 0
 72 |   <stdin>:1:4: error: missing separator between JSON values
 73 |   EOE
 74 | }}
 75 | 
 76 | : specific-separators
 77 | : At least one of the specified separators must be present.
 78 | :
 79 | {{
 80 |   test.options += --multi=" "
 81 | 
 82 |   : valid
 83 |   :
 84 |   $* <'1 2' >>EOO
 85 |     1,  1: 1
 86 |     1,  3: 2
 87 |   EOO
 88 | 
 89 |   : invalid-separator
 90 |   :
 91 |   $* <<EOI >'  1,  1: 1' 2>>EOE != 0
 92 |   1
 93 |   2
 94 |   EOI
 95 |   <stdin>:2:1: error: missing separator between JSON values
 96 |   EOE
 97 | 
 98 |   : unseparated
 99 |   :
100 |   $* <'"1""2"' >'  1,  1: "1"' 2>>EOE != 0
101 |   <stdin>:1:4: error: missing separator between JSON values
102 |   EOE
103 | }}
104 | 
105 | : multiple-separators
106 | :
107 | {{
108 |   # Newline + space.
109 |   #
110 |   test.options += --multi="
111 |  "
112 | 
113 |   : valid
114 |   :
115 |   $* <<EOI >>EOO
116 |   1 2
117 |   3
118 |    4
119 | 
120 |   EOI
121 |     1,  1: 1
122 |     1,  3: 2
123 |     2,  1: 3
124 |     3,  2: 4
125 |   EOO
126 | 
127 |   : invalid-separator
128 |   :
129 |   $* <"1	2" >'  1,  1: 1' 2>>EOE != 0 # Note: it's a TAB.
130 |   <stdin>:1:3: error: missing separator between JSON values
131 |   EOE
132 | 
133 |   : unseparated
134 |   :
135 |   $* <'"1""2"' >'  1,  1: "1"' 2>>EOE != 0
136 |   <stdin>:1:4: error: missing separator between JSON values
137 |   EOE
138 | }}
139 | 
140 | : non-whitespace-separator
141 | :
142 | {{
143 |   test.options += --multi="@"
144 | 
145 |   : valid
146 |   :
147 |   $* <'1@2@3' >>EOO
148 |     1,  1: 1
149 |     1,  3: 2
150 |     1,  5: 3
151 |   EOO
152 | 
153 |   : invalid-separator
154 |   :
155 |   $* <'1 2' >'  1,  1: 1' 2>>EOE != 0
156 |   <stdin>:1:3: error: missing separator between JSON values
157 |   EOE
158 | 
159 |   : unseparated
160 |   :
161 |   $* <'"1""2"' >'  1,  1: "1"' 2>>EOE != 0
162 |   <stdin>:1:4: error: missing separator between JSON values
163 |   EOE
164 | }}
165 | 
166 | : record-separator
167 | :
168 | {{
169 |   # @@ Currently it's impossible to specify RS characters in testscript so
170 |   #    using `@` until that has been adressed.
171 |   #
172 |   test.options += --multi="@"
173 | 
174 |   : leading-delimited
175 |   : RS-delimited: each value preceded by RS and succeeded by NL/LF.
176 |   :
177 |   $* <<EOI >>EOO
178 |   @1
179 |   @2
180 | 
181 |   EOI
182 |     1,  2: 1
183 |     2,  2: 2
184 |   EOO
185 | 
186 |   : empty
187 |   : While not valid per the RFC, we accept this.
188 |   :
189 |   $* <<EOI
190 | 
191 |     @
192 | 
193 |   EOI
194 | }}
195 | 


--------------------------------------------------------------------------------
/tests/parser/buildfile:
--------------------------------------------------------------------------------
1 | ./: {*/ -fuzz-llvm/}
2 | 


--------------------------------------------------------------------------------
/tests/parser/fuzz-llvm/README.md:
--------------------------------------------------------------------------------
 1 | This is an [LLVM LibFuzzer](https://llvm.org/docs/LibFuzzer.html)-based test.
 2 | 
 3 | A typical setup could look like this:
 4 | 
 5 | ```
 6 | cd libstud-json
 7 | bdep init -C @fuzz cc config.cxx=clang++ config.cxx.coptions="-g -O3 -fsanitize=address,undefined,fuzzer-no-link"
 8 | b ../libstud-json-fuzz/libstud-json/tests/parser/fuzz-llvm/ # Directory may not exist at this point
 9 | cd ../libstud-json-fuzz/libstud-json/tests/parser/fuzz-llvm/
10 | mkdir corpus
11 | ./driver corpus/
12 | ```
13 | 
14 | It is, however, highly recommended to pre-initialize the corpus with as many
15 | samples (both valid and invalid) as possible. The following repositories are a
16 | good starting point:
17 |   * The `test_parsing/` directory from
18 |     [JSONTestSuite](https://github.com/nst/JSONTestSuite)
19 |   * The `json/corpus/` directory from
20 |     [go-fuzz-corpus](https://github.com/dvyukov/go-fuzz-corpus/tree/master)
21 | 


--------------------------------------------------------------------------------
/tests/parser/fuzz-llvm/buildfile:
--------------------------------------------------------------------------------
1 | import libs = libstud-json%lib{stud-json}
2 | 
3 | exe{driver}: {cxx}{driver} $libs
4 | {
5 |   cxx.loptions += -fsanitize=fuzzer
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/parser/fuzz-llvm/driver.cxx:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | 
 3 | #include <libstud/json/parser.hxx>
 4 | 
 5 | #undef NDEBUG
 6 | #include <cassert>
 7 | 
 8 | using namespace std;
 9 | using namespace stud::json;
10 | 
11 | // Parse the data in the specified mode (default or multi-value) returning
12 | // true if the data is valid JSON and false otherwise.
13 | //
14 | static bool
15 | parse (const void* data, size_t size, bool multi, const char* sep)
16 | {
17 |   parser p (data, size, "<buffer>", multi, sep);
18 | 
19 |   auto handle_event = [&p] (event e)
20 |   {
21 |     assert (p.line () >= 1 && p.column () >= 1 && p.position () >= 1);
22 | 
23 |     switch (e)
24 |     {
25 |     case event::begin_object:
26 |     case event::end_object:
27 |     case event::begin_array:
28 |     case event::end_array: break;
29 |     case event::string: p.value (); break;
30 |     case event::name: p.name (); break;
31 |     case event::null: assert (p.value () == "null"); break;
32 |     case event::boolean:
33 |       {
34 |         p.value<bool> ();
35 |         assert (p.value () == "true" || p.value () == "false");
36 |         break;
37 |       }
38 |     case event::number:
39 |       {
40 |         try
41 |         {
42 |           p.value<int64_t> ();
43 |         }
44 |         catch (const invalid_json_input&)
45 |         {
46 |           try
47 |           {
48 |             p.value<double> ();
49 |           }
50 |           catch (const invalid_json_input&)
51 |           {
52 |             p.value ();
53 |           }
54 |         }
55 |         break;
56 |       }
57 |     }
58 |   };
59 | 
60 |   try
61 |   {
62 |     if (!multi)
63 |       for (auto e: p) handle_event (e);
64 |     else
65 |       while (p.peek ())
66 |         for (auto e: p) handle_event (e);
67 | 
68 |     return true;
69 |   }
70 |   catch (const invalid_json_input&)
71 |   {
72 |     return false;
73 |   }
74 | }
75 | 
76 | extern "C" int
77 | LLVMFuzzerTestOneInput (const uint8_t* data, size_t size)
78 | {
79 |   // If it's valid in default mode, don't waste time parsing it in multi-value
80 |   // mode.
81 |   //
82 |   if (!parse (data, size, false, nullptr))
83 |   {
84 |     // Multi-value mode enabled and configured to accept zero or more JSON
85 |     // whitespaces between values. The longer the list of accepted separator
86 |     // characters, the better the balance with the hundreds of invalid
87 |     // possibilities.
88 |     //
89 |     parse (data, size, true, nullptr);
90 |   }
91 |   return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/tests/parser/peek/buildfile:
--------------------------------------------------------------------------------
1 | import libs = libstud-json%lib{stud-json}
2 | 
3 | exe{driver}: {cxx}{driver} $libs
4 | 


--------------------------------------------------------------------------------
/tests/parser/peek/driver.cxx:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | 
  3 | #include <libstud/optional.hxx>
  4 | #include <libstud/json/parser.hxx>
  5 | 
  6 | #undef NDEBUG
  7 | #include <cassert>
  8 | 
  9 | using namespace std;
 10 | using namespace stud::json;
 11 | 
 12 | int
 13 | main ()
 14 | {
 15 |   using stud::nullopt;
 16 | 
 17 |   // Value in initial state.
 18 |   //
 19 |   {
 20 |     parser p ("1", "test");
 21 |     // assert (p.value ().empty ());
 22 |     assert (p.data ().first == nullptr);
 23 |     assert (p.data ().second == 0);
 24 |   }
 25 | 
 26 |   // Peek in initial state (before any next()s): no value available except
 27 |   // through data().
 28 |   //
 29 |   {
 30 |     parser p ("1", "test");
 31 |     assert (p.peek () == event::number);
 32 |     // assert (p.value ().empty ());
 33 |     assert (p.data ().first != nullptr);
 34 |     assert (string (p.data ().first) == "1");
 35 |   }
 36 | 
 37 |   // Next in initial state.
 38 |   //
 39 |   {
 40 |     parser p ("1", "test");
 41 |     assert (p.next () == event::number);
 42 |     assert (p.value<int> () == 1);
 43 |     assert (p.data ().first != nullptr);
 44 |     assert (string (p.data ().first) == "1");
 45 |   }
 46 | 
 47 |   // Peek followed by next.
 48 |   //
 49 |   {
 50 |     parser p ("1", "test");
 51 |     assert (p.peek () == event::number);
 52 |     // assert (p.value ().empty ());
 53 |     assert (p.data ().first != nullptr);
 54 |     assert (string (p.data ().first) == "1");
 55 |     assert (p.data ().second == 1);
 56 | 
 57 |     assert (p.next () == event::number);
 58 |     assert (p.value<int> () == 1);
 59 |     assert (string (p.data ().first) == "1");
 60 |     assert (p.data ().second == 1);
 61 |   }
 62 | 
 63 |   // Next followed by peek.
 64 |   //
 65 |   {
 66 |     parser p ("[1,2]", "test");
 67 |     assert (p.next () == event::begin_array);
 68 |     assert (p.next () == event::number);
 69 |     assert (p.value<int> () == 1);
 70 | 
 71 |     assert (p.peek () == event::number);
 72 |     assert (p.value<int> () == 1);
 73 |   }
 74 | 
 75 |   // Latest value always available via data().
 76 |   //
 77 |   {
 78 |     parser p ("[1,222]", "test");
 79 |     assert (p.peek () == event::begin_array);
 80 |     assert (p.data ().first == nullptr);
 81 |     assert (p.data ().second == 0);
 82 | 
 83 |     assert (p.next () == event::begin_array);
 84 |     assert (p.data ().first == nullptr);
 85 |     assert (p.data ().second == 0);
 86 | 
 87 |     // Peeked value accessible in raw form.
 88 |     //
 89 |     assert (p.peek () == event::number);
 90 |     assert (p.data ().first != nullptr);
 91 |     assert (string (p.data ().first) == "1");
 92 |     assert (p.data ().second == 1);
 93 | 
 94 |     // Parsed value accessible in raw form.
 95 |     //
 96 |     assert (p.next () == event::number);
 97 |     assert (p.data ().first != nullptr);
 98 |     assert (string (p.data ().first) == "1");
 99 |     assert (p.data ().second == 1);
100 | 
101 |     // Peeked value once again accessible in raw form.
102 |     //
103 |     assert (p.peek () == event::number);
104 |     assert (p.data ().first != nullptr);
105 |     assert (string (p.data ().first) == "222");
106 |     assert (p.data ().second == 3);
107 |   }
108 | 
109 |   // After peek(), value() returns value from previous next().
110 |   //
111 |   {
112 |     parser p ("[1, \"hello\", 3]", "test");
113 |     assert (p.next () == event::begin_array);
114 |     assert (p.next () == event::number);
115 |     assert (p.value () == "1");
116 | 
117 |     assert (p.peek () == event::string);
118 |     assert (p.value () == "1");
119 |   }
120 | 
121 |   // Peek is idempotent.
122 |   //
123 |   {
124 |     parser p ("[1, \"hello\"]", "test");
125 |     assert (p.peek () == event::begin_array);
126 |     assert (p.peek () == event::begin_array);
127 | 
128 |     assert (p.next () == event::begin_array);
129 | 
130 |     // Peek #1.
131 |     //
132 |     assert (p.peek () == event::number);
133 |     // assert (p.value ().empty ());
134 |     assert (p.data ().first != nullptr);
135 |     assert (string (p.data ().first) == "1");
136 | 
137 |     // Peek #2.
138 |     //
139 |     assert (p.peek () == event::number);
140 |     // assert (p.value ().empty ());
141 |     assert (p.data ().first != nullptr);
142 |     assert (string (p.data ().first) == "1");
143 | 
144 |     assert (p.next () == event::number);
145 | 
146 |     // Peek #1.
147 |     //
148 |     assert (p.peek () == event::string);
149 |     assert (p.value () == "1");
150 |     assert (p.data ().first != nullptr);
151 |     assert (string (p.data ().first) == "hello");
152 | 
153 |     // Peek #2.
154 |     //
155 |     assert (p.peek () == event::string);
156 |     assert (p.value () == "1");
157 |     assert (p.data ().first != nullptr);
158 |     assert (string (p.data ().first) == "hello");
159 | 
160 |     // Get to last value.
161 |     //
162 |     assert (p.next () == event::string);
163 |     assert (p.next () == event::end_array);
164 | 
165 |     // Peek past last value.
166 |     //
167 |     assert (p.peek () == nullopt);
168 |     assert (p.data ().first == nullptr);
169 |     assert (p.data ().second == 0);
170 | 
171 |     // Get to EOF.
172 |     //
173 |     assert (p.next () == nullopt);
174 | 
175 |     // Peek at (past) EOF is idempotent.
176 |     //
177 |     assert (p.peek () == nullopt);
178 |     assert (p.data ().first == nullptr);
179 |     assert (p.data ().second == 0);
180 |     assert (p.peek () == nullopt);
181 |     assert (p.data ().first == nullptr);
182 |     assert (p.data ().second == 0);
183 |   }
184 | 
185 |   // Peek EOF.
186 |   //
187 |   {
188 |     parser p ("1", "test");
189 |     assert (p.next () == event::number);
190 |     assert (p.peek () == nullopt);
191 |     assert (p.value () == "1");
192 |     assert (p.value<int> () == 1);
193 |     assert (p.next () == nullopt);
194 |     assert (p.peek () == nullopt);
195 |   }
196 | 
197 |   // Parse at EOF.
198 |   //
199 |   {
200 |     parser p ("1", "test");
201 |     assert (p.next () == event::number);
202 | 
203 |     assert (p.next () == nullopt);
204 |     assert (p.data ().first == nullptr);
205 |     assert (p.data ().second == 0);
206 | 
207 |     assert (p.next () == nullopt);
208 |     assert (p.data ().first == nullptr);
209 |     assert (p.data ().second == 0);
210 |   }
211 | 
212 |   // Beginning-to-end: parse only.
213 |   //
214 |   {
215 |     parser p ("[1,2]", "test");
216 |     assert (p.next () == event::begin_array);
217 |     assert (p.next () == event::number);
218 |     assert (p.value<int> () == 1);
219 |     assert (p.next () == event::number);
220 |     assert (p.value<int> () == 2);
221 |     assert (p.next () == event::end_array);
222 |     assert (p.next () == nullopt);
223 |   }
224 | 
225 |   // Beginning-to-end: peek first.
226 |   //
227 |   {
228 |     parser p ("[1,2,3]", "test");
229 |     assert (p.peek () == event::begin_array);
230 |     assert (p.peek () == event::begin_array);
231 |     assert (p.next () == event::begin_array);
232 |     assert (p.peek () == event::number); // 1
233 |     assert (p.peek () == event::number); // 1
234 |     assert (p.next () == event::number); // 1
235 |     assert (p.next () == event::number); // 2
236 |     assert (p.peek () == event::number); // 3
237 |     assert (p.peek () == event::number); // 3
238 |     assert (p.next () == event::number); // 3
239 |     assert (p.peek () == event::end_array);
240 |     assert (p.peek () == event::end_array);
241 |     assert (p.next () == event::end_array);
242 |     assert (p.peek () == nullopt);
243 |     assert (p.peek () == nullopt);
244 |     assert (p.next () == nullopt);
245 |     assert (p.peek () == nullopt);
246 |     assert (p.peek () == nullopt);
247 |   }
248 | 
249 |   // Beginning-to-end: parse first.
250 |   //
251 |   {
252 |     parser p ("[1,2,3]", "test");
253 |     assert (p.next () == event::begin_array);
254 |     assert (p.peek () == event::number); // 1
255 |     assert (p.peek () == event::number); // 1
256 |     assert (p.next () == event::number); // 1
257 |     assert (p.peek () == event::number); // 2
258 |     assert (p.peek () == event::number); // 2
259 |     assert (p.next () == event::number); // 2
260 |     assert (p.next () == event::number); // 3
261 |     assert (p.peek () == event::end_array);
262 |     assert (p.peek () == event::end_array);
263 |     assert (p.next () == event::end_array);
264 |     assert (p.peek () == nullopt);
265 |     assert (p.peek () == nullopt);
266 |     assert (p.next () == nullopt);
267 |     assert (p.peek () == nullopt);
268 |     assert (p.peek () == nullopt);
269 |   }
270 | 
271 |   // Don't get caught out by empty JSON string.
272 |   //
273 |   {
274 |     parser p ("[\"\", \"hello\"]", "test");
275 |     assert (p.next () == event::begin_array);
276 |     assert (p.next () == event::string);
277 |     assert (p.value () == "");
278 |     assert (p.peek () == event::string);
279 |     assert (p.value () == "");
280 |   }
281 | 
282 |   return 0;
283 | }
284 | 


--------------------------------------------------------------------------------
/tests/serializer/basics/buildfile:
--------------------------------------------------------------------------------
1 | import libs = libstud-json%lib{stud-json}
2 | 
3 | exe{driver}: {cxx}{driver} $libs
4 | 


--------------------------------------------------------------------------------
/tests/serializer/basics/driver.cxx:
--------------------------------------------------------------------------------
  1 | #include <limits>
  2 | #include <cstddef> // size_t
  3 | #include <cstring> // memcmp()
  4 | #include <sstream>
  5 | 
  6 | #include <libstud/optional.hxx>
  7 | #include <libstud/json/serializer.hxx>
  8 | 
  9 | #undef NDEBUG
 10 | #include <cassert>
 11 | 
 12 | using namespace std;
 13 | using namespace stud::json;
 14 | 
 15 | // Overflow function which always grows the buffer by exactly N bytes.
 16 | //
 17 | template <size_t N>
 18 | static void
 19 | overflow (void*, event, buffer_serializer::buffer& b, size_t)
 20 | {
 21 |   b.capacity = b.size + N;
 22 | }
 23 | 
 24 | int
 25 | main ()
 26 | {
 27 |   using stud::optional;
 28 |   using stud::nullopt;
 29 | 
 30 |   using error = invalid_json_output::error_code;
 31 | 
 32 |   // Return true if a call to s.next () with these arguments throws an
 33 |   // invalid_json_output exception with the specified error code (ec).
 34 |   //
 35 |   auto next_throws = [] (error ec,
 36 |                          buffer_serializer& s,
 37 |                          optional<event> e,
 38 |                          pair<const char*, size_t> val = {},
 39 |                          bool check = true)
 40 |   {
 41 |     try
 42 |     {
 43 |       s.next (e, val, check);
 44 |       return false;
 45 |     }
 46 |     catch (const invalid_json_output& e)
 47 |     {
 48 |       return e.code == ec;
 49 |     }
 50 |   };
 51 | 
 52 |   // Return true if the serialization (with checking enabled) of a string
 53 |   // throws.
 54 |   //
 55 |   auto serialize_throws = [next_throws] (const string& v)
 56 |   {
 57 |     string b;
 58 |     buffer_serializer s (b);
 59 |     return next_throws (
 60 |         error::invalid_value, s, event::string, {v.c_str (), v.size ()}, true);
 61 |   };
 62 | 
 63 |   // Return the serialized form of a string (with checking enabled). Note that
 64 |   // the quotes are removed to ease comparisons.
 65 |   //
 66 |   auto serialize = [] (const string& v)
 67 |   {
 68 |     string b;
 69 |     buffer_serializer s (b);
 70 |     s.next (event::string, {v.c_str (), v.size ()}, true);
 71 |     return b.size () >= 2 ? b.substr (1, b.size () - 2) : "";
 72 |   };
 73 | 
 74 |   // Completeness of top-level JSON value sequences.
 75 |   //
 76 |   {
 77 |     // Open array detected as incomplete.
 78 |     //
 79 |     {
 80 |       string b;
 81 |       buffer_serializer s (b);
 82 |       s.next (event::begin_array);
 83 |       assert (next_throws (error::invalid_value, s, nullopt));
 84 |     }
 85 | 
 86 |     // Open object detected as incomplete.
 87 |     //
 88 |     {
 89 |       string b;
 90 |       buffer_serializer s (b);
 91 |       s.next (event::begin_object);
 92 |       assert (next_throws (error::invalid_value, s, nullopt));
 93 |     }
 94 | 
 95 |     // Declare top-level value sequence complete by serializing an absent
 96 |     // event (nullopt).
 97 |     //
 98 |     // After that, serializing anything, even nullopt, is an error.
 99 |     //
100 |     {
101 |       // Empty top-level value sequence.
102 |       //
103 |       // If no values have been serialized, the first absent event declares
104 |       // the top-level value sequence complete.
105 |       //
106 |       {
107 |         string b;
108 |         buffer_serializer s (b);
109 |         s.next (nullopt); // Declare this an empty sequence of top-level values.
110 |         assert (next_throws (error::invalid_value, s, event::number, {"2", 1}));
111 |         assert (next_throws (error::invalid_value, s, nullopt));
112 |       }
113 | 
114 |       // One top-level value.
115 |       //
116 |       {
117 |         string b;
118 |         buffer_serializer s (b);
119 |         s.next (event::number, {"1", 1});
120 |         s.next (nullopt); // Check for completeness (throws if not).
121 |         s.next (nullopt); // Declare end of top-level value sequence.
122 |         assert (next_throws (error::invalid_value, s, event::number, {"2", 1}));
123 |         assert (next_throws (error::invalid_value, s, nullopt));
124 |       }
125 | 
126 |       // Multiple top-level values.
127 |       //
128 |       {
129 |         string b;
130 |         buffer_serializer s (b);
131 |         s.next (event::number, {"1", 1});
132 |         s.next (event::number, {"2", 1});
133 |         s.next (nullopt); // Check for completeness (throws if not).
134 |         s.next (nullopt); // Declare end of top-level value sequence.
135 |         assert (next_throws (error::invalid_value, s, event::number, {"3", 1}));
136 |         assert (next_throws (error::invalid_value, s, nullopt));
137 |       }
138 |     }
139 |   }
140 | 
141 |   // Array structure.
142 |   //
143 |   {
144 |     // End array outside array.
145 |     //
146 |     {
147 |       string b;
148 |       buffer_serializer s (b);
149 |       assert (next_throws (error::unexpected_event, s, event::end_array));
150 |     }
151 | 
152 |     // End object inside array.
153 |     //
154 |     {
155 |       string b;
156 |       buffer_serializer s (b);
157 |       s.next (event::begin_array);
158 |       assert (next_throws (error::unexpected_event, s, event::end_object));
159 |     }
160 |   }
161 | 
162 |   // Object structure.
163 |   //
164 |   {
165 |     // End object outside object.
166 |     //
167 |     {
168 |       string b;
169 |       buffer_serializer s (b);
170 |       assert (next_throws (error::unexpected_event, s, event::end_object));
171 |     }
172 | 
173 |     // End object when member value is expected.
174 |     //
175 |     {
176 |       string b;
177 |       buffer_serializer s (b);
178 |       s.next (event::begin_object);
179 |       s.next (event::name, {"n", 1});
180 |       assert (next_throws (error::unexpected_event, s, event::end_object));
181 |     }
182 | 
183 |     // End array inside object.
184 |     //
185 |     {
186 |       string b;
187 |       buffer_serializer s (b);
188 |       s.next (event::begin_object);
189 |       assert (next_throws (error::unexpected_event, s, event::end_array));
190 |     }
191 | 
192 |     // Value when expecting a name.
193 |     //
194 |     {
195 |       {
196 |         string b;
197 |         buffer_serializer s (b);
198 |         s.next (event::begin_object);
199 |         assert (
200 |             next_throws (error::unexpected_event, s, event::number, {"1", 1}));
201 |       }
202 |       {
203 |         string b;
204 |         buffer_serializer s (b);
205 |         s.next (event::begin_object);
206 |         assert (
207 |             next_throws (error::unexpected_event, s, event::string, {"1", 1}));
208 |       }
209 |       {
210 |         string b;
211 |         buffer_serializer s (b);
212 |         s.next (event::begin_object);
213 |         assert (next_throws (
214 |             error::unexpected_event, s, event::boolean, {"true", 4}));
215 |       }
216 |       {
217 |         string b;
218 |         buffer_serializer s (b);
219 |         s.next (event::begin_object);
220 |         assert (
221 |             next_throws (error::unexpected_event, s, event::null, {"null", 4}));
222 |       }
223 | 
224 |       // When there is already a complete member.
225 |       //
226 |       {
227 |         string b;
228 |         buffer_serializer s (b);
229 |         s.next (event::begin_object);
230 |         s.next (event::name, {"a", 1});
231 |         s.next (event::number, {"1", 1});
232 |         assert (
233 |             next_throws (error::unexpected_event, s, event::number, {"1", 1}));
234 |       }
235 |     }
236 | 
237 |     // Begin object when expecting a name.
238 |     //
239 |     {
240 |       string b;
241 |       buffer_serializer s (b);
242 |       s.next (event::begin_object);
243 |       assert (next_throws (error::unexpected_event, s, event::begin_object));
244 |     }
245 | 
246 |     // Name when expecting a value.
247 |     //
248 |     {
249 |       string b;
250 |       buffer_serializer s (b);
251 |       s.next (event::begin_object);
252 |       s.next (event::name, {"a", 1});
253 |       assert (next_throws (error::unexpected_event, s, event::name, {"b", 1}));
254 |     }
255 |   }
256 | 
257 |   // Buffer management.
258 |   //
259 |   {
260 |     // Fixed-size buffer: capacity exceeded.
261 |     //
262 |     {
263 |       uint8_t b[3];
264 |       size_t n (0);
265 |       buffer_serializer s (b, n, 3);
266 |       s.next (event::number, {"12", 2}); // 3 bytes written (val + newline).
267 |       assert (next_throws (error::buffer_overflow, s, event::number, {"2", 1}));
268 |     }
269 | 
270 |     // Serialization of value with multiple calls to overflow.
271 |     //
272 |     {
273 |       uint8_t b[100];
274 |       size_t n (0);
275 |       buffer_serializer s (b, n, 0, &overflow<6>, nullptr, nullptr);
276 |       const string v (50, 'a');
277 |       s.next (event::string, {v.c_str (), v.size ()});
278 |       // +1 skips the opening quote.
279 |       //
280 |       assert (memcmp (b + 1, v.c_str (), v.size ()) == 0);
281 |     }
282 | 
283 |     // Serializer appends to user buffer (that is, preserves its contents).
284 |     //
285 |     {
286 |       // String.
287 |       //
288 |       {
289 |         string b ("aaa");
290 |         buffer_serializer s (b);
291 |         const string v ("bbb");
292 |         s.next (event::string, {v.c_str(), v.size ()});
293 |         assert (b == "aaa\"bbb\"");
294 |       }
295 | 
296 |       // Array.
297 |       //
298 |       {
299 |         uint8_t b[100] {'a', 'a', 'a'};
300 |         size_t n (3);
301 |         buffer_serializer s (b, n, 10, nullptr, nullptr, nullptr);
302 |         const string v ("bbb");
303 |         s.next (event::string, {v.c_str(), v.size ()});
304 |         assert (n == 8);
305 |         assert (memcmp (b, "aaa\"bbb\"", 8) == 0);
306 |       }
307 |     }
308 | 
309 |     // Regression tests.
310 |     //
311 |     {
312 |       // This is a regression test for two different but related
313 |       // buffer-management bugs.
314 |       //
315 |       // Whether or not either of these bugs are triggered depends on the
316 |       // capacity of the buffer and thus on the allocation patterns of
317 |       // std::string, and therefore it's not practical to construct a small
318 |       // number of minimal and specific test cases. For both libstdc++ and
319 |       // libc++, however, both bugs were triggered in under 20 characters so
320 |       // the 100 used here should cover most implementations. (I think the
321 |       // crucial value is the size of the SSO buffer.)
322 |       //
323 |       {
324 |         for (size_t i (1); i < 100; i++)
325 |           serialize (string (i, 'a') + "\x01");
326 |       }
327 | 
328 |       // With this setup and input we get to the first byte of the UTF-8
329 |       // sequence with the bytes left to be written (size, value 2) is less
330 |       // than the bytes left in the buffer (cap, value 3) (see
331 |       // serializer::write()). Thus a value of (size - cap = 2 - 3 =
332 |       // underflow) was being passed to the overflow function. See the fake
333 |       // overflow implementation above for details.
334 |       //
335 |       {
336 |         uint8_t b[20];
337 |         size_t n (0);
338 |         buffer_serializer s (b, n, 0, &overflow<6>, nullptr, nullptr);
339 |         // 0xF0 indicates the beginning of a 4-byte UTF-8 sequence.
340 |         //
341 |         const string v ("12\xF0");
342 |         try
343 |         {
344 |           s.next (event::string, {v.c_str (), v.size ()}, true);
345 |         }
346 |         catch (const invalid_json_output& e)
347 |         {
348 |           assert (e.code == error::invalid_value);
349 |         }
350 |       }
351 |     }
352 |   }
353 | 
354 |   // Validation of literal values (null and boolean). All JSON literals must
355 |   // be lower case.
356 |   //
357 |   {
358 |     string b;
359 |     buffer_serializer s (b);
360 | 
361 |     auto next_throws_invalid_value =
362 |     [&next_throws, &s] (event e, pair<const char*, size_t> v)
363 |     {
364 |       return next_throws (error::invalid_value, s, e, v, true);
365 |     };
366 | 
367 |     assert (next_throws_invalid_value (event::null, {"Null", 4}));
368 |     assert (next_throws_invalid_value (event::null, {"NULL", 4}));
369 |     assert (next_throws_invalid_value (event::null, {"nul", 3}));
370 |     assert (next_throws_invalid_value (event::null, {"nullX", 5}));
371 |     assert (next_throws_invalid_value (event::null, {"null ", 5}));
372 | 
373 |     assert (next_throws_invalid_value (event::boolean, {"True", 4}));
374 |     assert (next_throws_invalid_value (event::boolean, {"TRUE", 4}));
375 |     assert (next_throws_invalid_value (event::boolean, {"tru", 3}));
376 |     assert (next_throws_invalid_value (event::boolean, {"trueX", 5}));
377 |     assert (next_throws_invalid_value (event::boolean, {"true ", 5}));
378 | 
379 |     assert (next_throws_invalid_value (event::boolean, {"False", 5}));
380 |     assert (next_throws_invalid_value (event::boolean, {"FALSE", 5}));
381 |     assert (next_throws_invalid_value (event::boolean, {"fals", 4}));
382 |     assert (next_throws_invalid_value (event::boolean, {"falseX", 6}));
383 |     assert (next_throws_invalid_value (event::boolean, {"false ", 6}));
384 |   }
385 | 
386 |   // null event: the value is supplied if it is unspecified.
387 |   //
388 |   {
389 |     string b;
390 |     buffer_serializer s (b);
391 |     s.next (event::null);
392 |     assert (b == "null");
393 |   }
394 | 
395 |   // UTF-8 sequences are not split if buffer runs out of space.
396 |   //
397 |   // Despite there being capacity for the first part of a UTF-8 sequence, none
398 |   // of it must be written.
399 |   //
400 |   {
401 |     uint8_t b[100];
402 |     const string v ("\xE2\x82\xAC"); // U+20AC '€'
403 | 
404 |     // Using the unchecked version of next().
405 |     //
406 |     {
407 |       size_t n (0);
408 |       buffer_serializer s (b, n, 3);
409 |       assert (next_throws (error::buffer_overflow,
410 |                            s,
411 |                            event::string,
412 |                            {v.c_str (), v.size ()},
413 |                            false));
414 |       assert (n == 1); // Only the opening quote should've been written.
415 |     }
416 | 
417 |     // Using the checked version of next().
418 |     //
419 |     {
420 |       size_t n (0);
421 |       buffer_serializer s (b, n, 3);
422 |       assert (next_throws (error::buffer_overflow,
423 |                            s,
424 |                            event::string,
425 |                            {v.c_str (), v.size ()},
426 |                            true));
427 |       assert (n == 1);
428 |     }
429 |   }
430 | 
431 |   // UTF-8 validation.
432 |   //
433 |   {
434 |     assert (serialize_throws ("\xC2"));     // Truncated 2-byte sequence.
435 |     assert (serialize_throws ("\xE1\x80")); // Truncated 3-byte sequence.
436 |     assert (serialize_throws ("\xF1\x80\x80")); // Truncated 4-byte sequence.
437 |     assert (serialize_throws ("\xC0\xB0")); // Overlong encoding of '0' (0x30).
438 |     assert (serialize_throws ("\xC1\xBE")); // Overlong encoding of '~' (0x7E).
439 |     assert (serialize_throws ("\xC2\x7F")); // 2nd byte < valid range.
440 |     assert (serialize_throws ("\xC2\xC0")); // 2nd byte > valid range.
441 | 
442 |     // Special second-byte cases.
443 |     //
444 |     assert (serialize_throws ("\xE0\x9F\x80")); // 2nd byte < valid range.
445 |     assert (serialize_throws ("\xED\xA0\x80")); // 2nd byte > valid range.
446 |     assert (serialize_throws ("\xF0\x8F\x80\x80")); // 2nd byte < valid range.
447 |     assert (serialize_throws ("\xF4\x90\x80\x80")); // 2nd byte > valid range.
448 |   }
449 | 
450 |   // Escaping.
451 |   //
452 |   {
453 |     assert (serialize ("\"") == "\\\"");
454 |     assert (serialize ("\\") == "\\\\");
455 |     assert (serialize ("\t") == "\\t");
456 |     assert (serialize ("\n") == "\\n");
457 |     assert (serialize ("\b") == "\\b");
458 |     assert (serialize ("\r") == "\\r");
459 |     assert (serialize ("\f") == "\\f");
460 |     assert (serialize ("\x01") == "\\u0001");
461 |     assert (serialize ("\x1F") == "\\u001F");
462 |     assert (serialize ("ABC \t DEF \x01\x02 GHI") ==
463 |             "ABC \\t DEF \\u0001\\u0002 GHI");
464 |   }
465 | 
466 |   // Exception offset.
467 |   //
468 |   // The offset stored in the invalid_json_output exception should point to
469 |   // the beginning of the invalid UTF-8 sequence (a truncated 3-byte sequence
470 |   // in this case).
471 |   //
472 |   {
473 |     string b;
474 |     buffer_serializer s (b);
475 |     try
476 |     {
477 |       s.next (event::string, {"abc\xE1\x80", 5}, true);
478 |       assert (false);
479 |     }
480 |     catch (const invalid_json_output& e)
481 |     {
482 |       assert (e.offset == 3);
483 |     }
484 |   }
485 | 
486 |   // High-level interface.
487 |   //
488 |   {
489 |     // All JSON types.
490 |     //
491 |     {
492 |       string b;
493 |       buffer_serializer s (b, 0);
494 |       s.value ("a");
495 |       s.value (string ("b"));
496 |       s.value (999);
497 |       s.value (nullptr);
498 |       s.value (true);
499 |       assert (b == "\"a\"\n\"b\"\n999\nnull\ntrue");
500 |     }
501 | 
502 |     // Object.
503 |     //
504 |     {
505 |       string b;
506 |       buffer_serializer s (b, 0);
507 |       s.begin_object ();
508 |       s.member ("a", 1);
509 |       s.member_name ("b"); s.value ("z");
510 |       s.member ("c", string ("y"));
511 |       s.member ("d", nullptr);
512 |       s.member ("e", true);
513 |       s.end_object ();
514 |       assert (b == "{\"a\":1,\"b\":\"z\",\"c\":\"y\",\"d\":null,\"e\":true}");
515 |     }
516 | 
517 |     // Array.
518 |     {
519 |       string b;
520 |       buffer_serializer s (b, 0);
521 |       s.begin_array ();
522 |       s.value (1);
523 |       s.value ("a");
524 |       s.end_array ();
525 |       assert (b == "[1,\"a\"]");
526 |     }
527 | 
528 |     // Long floating point numbers should be output in scientific notation.
529 |     // (This also tests that numbers with many digits do not break things.)
530 |     //
531 |     {
532 |       string b;
533 |       buffer_serializer s (b, 0);
534 |       s.value (numeric_limits<long double>::max ());
535 |       assert (b.find ("e+") != string::npos);
536 |     }
537 | 
538 |     // A null char* is serialized as a JSON null.
539 |     //
540 |     {
541 |       string b;
542 |       buffer_serializer s (b);
543 |       const char* cp (nullptr);
544 |       s.value (cp);
545 |       assert (b == "null");
546 |     }
547 | 
548 |     // Pre-serialized JSON value.
549 |     //
550 |     {
551 |       string b;
552 |       buffer_serializer s (b, 0);
553 |       s.begin_array ();
554 |       s.value_json_text ("{\"a\":1}");
555 |       s.value_json_text ("{\"a\":2}");
556 |       s.end_array ();
557 |       assert (b == "[{\"a\":1},{\"a\":2}]");
558 |     }
559 |   }
560 | }
561 | 


--------------------------------------------------------------------------------
/tests/serializer/buildfile:
--------------------------------------------------------------------------------
1 | ./: {*/ -fuzz-llvm/}
2 | 


--------------------------------------------------------------------------------
/tests/serializer/fuzz-llvm/README.md:
--------------------------------------------------------------------------------
 1 | This is an [LLVM LibFuzzer](https://llvm.org/docs/LibFuzzer.html)-based test.
 2 | 
 3 | A typical setup could look like this:
 4 | 
 5 | ```
 6 | cd libstud-json
 7 | bdep init -C @fuzz cc config.cxx=clang++ config.cxx.coptions="-g -O3 -fsanitize=address,undefined,fuzzer-no-link"
 8 | b ../libstud-json-fuzz/libstud-json/tests/serializer/fuzz-llvm/ # Directory may not exist at this point
 9 | cd ../libstud-json-fuzz/libstud-json/tests/serializer/fuzz-llvm/
10 | mkdir corpus
11 | ./driver corpus/
12 | ```
13 | 
14 | The serializer's driver does not support starting from an empty corpus so the
15 | corpus has to be pre-initialized. It is highly recommended to start with as
16 | many high-quality samples as possible. The following repositories are a good
17 | starting point:
18 |   * The `test_parsing/` directory from
19 |     [JSONTestSuite](https://github.com/nst/JSONTestSuite)
20 |   * The `json/corpus/` directory from
21 |     [go-fuzz-corpus](https://github.com/dvyukov/go-fuzz-corpus/tree/master)
22 | 
23 | It would also be wise to include a basic multi-value input such as this:
24 | 
25 | ```
26 | 123
27 | "abc"
28 | true
29 | false
30 | null
31 | []
32 | {}
33 | ```
34 | 
35 | The serializer's fuzz driver uses a custom input format. The included
36 | `convert` utility can be used to convert valid JSON input to this custom
37 | format. The following shell command can be used to convert an entire
38 | directory of JSON files:
39 | 
40 | ```
41 | for f in corpus-json/*
42 | do
43 |     ./convert $f corpus/`basename $f` || rm corpus/`basename $f`
44 | done
45 | ```
46 | 


--------------------------------------------------------------------------------
/tests/serializer/fuzz-llvm/buildfile:
--------------------------------------------------------------------------------
 1 | import libs = libstud-json%lib{stud-json}
 2 | 
 3 | ./: exe{driver}: {cxx}{driver} $libs
 4 | 
 5 | exe{driver}:
 6 | {
 7 |   cxx.loptions += -fsanitize=fuzzer
 8 | }
 9 | 
10 | ./: exe{convert}: {cxx}{convert} $libs
11 | 


--------------------------------------------------------------------------------
/tests/serializer/fuzz-llvm/convert.cxx:
--------------------------------------------------------------------------------
  1 | // This utility can be used to create an initial serializer fuzz corpus from
  2 | // valid JSON inputs. Usage:
  3 | //
  4 | // convert input.json output.bin
  5 | //
  6 | // See driver.cxx for the output format description.
  7 | //
  8 | 
  9 | #include <fstream>
 10 | #include <iostream>
 11 | 
 12 | #include <libstud/json/parser.hxx>
 13 | 
 14 | using namespace std;
 15 | using namespace stud::json;
 16 | 
 17 | int
 18 | main (int argc, const char** argv)
 19 | {
 20 |   if (argc != 3)
 21 |   {
 22 |     cerr << "usage: " << argv[0] << " <input-file> <output-file>" << endl;
 23 |     return 1;
 24 |   }
 25 | 
 26 |   // Setup input.
 27 |   //
 28 |   ifstream in (argv[1]);
 29 |   if (in.fail ())
 30 |   {
 31 |     cerr << "unable to open file '" << argv[1] << "' for reading" << endl;
 32 |     return 1;
 33 |   }
 34 |   in.exceptions (ios::badbit | ios::failbit);
 35 |   parser p (in, argv[1], true /* multi_value */);
 36 | 
 37 |   // Setup output.
 38 |   //
 39 |   ofstream out (argv[2], ios::binary);
 40 |   if (out.fail ())
 41 |   {
 42 |     cerr << "unable to open file '" << argv[2] << "' for writing" << endl;
 43 |     return 1;
 44 |   }
 45 |   out.exceptions (ios::badbit | ios::failbit);
 46 | 
 47 |   // Writes an event and (potentially absent, empty) value to stdout.
 48 |   //
 49 |   auto write = [&out] (uint8_t e, const string* v)
 50 |   {
 51 |     out.write (reinterpret_cast<const char*> (&e), sizeof (e));
 52 | 
 53 |     if (v != nullptr)
 54 |     {
 55 |       const uint32_t s (v->size ());
 56 |       auto sp (reinterpret_cast<const char*> (&s));
 57 |       out.write (sp, sizeof (s));
 58 |       out.write (v->data (), v->size ());
 59 |     }
 60 |   };
 61 | 
 62 |   try
 63 |   {
 64 |     uint32_t n (0); // Number of events.
 65 | 
 66 |     while (p.peek ())
 67 |     {
 68 |       for (event e: p)
 69 |       {
 70 |         switch (e)
 71 |         {
 72 |         case event::name:
 73 |           {
 74 |             write (static_cast<uint8_t> (e), &p.name ());
 75 |             break;
 76 |           }
 77 |         case event::string:
 78 |         case event::number:
 79 |         case event::boolean:
 80 |         case event::null:
 81 |           {
 82 |             write (static_cast<uint8_t> (e), &p.value ());
 83 |             break;
 84 |           }
 85 |         default:
 86 |           {
 87 |             write (static_cast<uint8_t> (e), nullptr);
 88 |             break;
 89 |           }
 90 |         }
 91 |         n++;
 92 |       }
 93 |       write (0, nullptr); // Absent event.
 94 |       n++;
 95 |     }
 96 |     write (0, nullptr); // Absent event.
 97 |     n++;
 98 | 
 99 |     out.write (reinterpret_cast<const char*> (&n), sizeof (n));
100 |   }
101 |   catch (const invalid_json_input& e)
102 |   {
103 |     cerr << e.name << ':' << e.line << ':' << e.column << ": error: "
104 |          << e.what () << endl;
105 |     return 1;
106 |   }
107 |   catch (const std::exception& e)
108 |   {
109 |     cerr << e.what () << endl;
110 |     return 1;
111 |   }
112 | }
113 | 


--------------------------------------------------------------------------------
/tests/serializer/fuzz-llvm/driver.cxx:
--------------------------------------------------------------------------------
  1 | // Usage: argv[0] [libFuzzer options] <corpus-directory>
  2 | //
  3 | // A corpus containing valid inputs must be provided. Starting from an empty
  4 | // corpus is not supported.
  5 | 
  6 | // The input format is a sequence of events in the following form:
  7 | //
  8 | // e[llllv...]
  9 | //
 10 | // e: event type (uint8_t), 0 for absent
 11 | // l: value length (uint32_t)
 12 | // v: value bytes (UTF-8 string)
 13 | //
 14 | // LLVMFuzzerTestOneInput() takes one file from the fuzz corpus as input and
 15 | // feeds the events and values it contains to the serializer one at a
 16 | // time. The file will first be passed to LLVMFuzzerCustomMutator() which will
 17 | // perform one of a number of different kinds of mutations on it, after which
 18 | // it is passed to LLVMFuzzerTestOneInput().
 19 | 
 20 | #include <random>
 21 | #include <cstdint>
 22 | #include <cstdlib> // abort
 23 | #include <cstring>
 24 | #include <iostream>
 25 | 
 26 | #include <libstud/optional.hxx>
 27 | #include <libstud/json/serializer.hxx>
 28 | 
 29 | #undef NDEBUG
 30 | #include <cassert>
 31 | 
 32 | using namespace std;
 33 | using namespace stud::json;
 34 | 
 35 | // Return true if a JSON event does not come with a value.
 36 | //
 37 | static bool
 38 | valueless (uint8_t e) noexcept
 39 | {
 40 |   if (e != 0) // Absent event.
 41 |   {
 42 |     switch (static_cast<event> (e))
 43 |     {
 44 |     case event::begin_object:
 45 |     case event::end_object:
 46 |     case event::begin_array:
 47 |     case event::end_array:
 48 |       {
 49 |         return true;
 50 |         break;
 51 |       }
 52 |     case event::name:
 53 |     case event::string:
 54 |     case event::number:
 55 |     case event::boolean:
 56 |     case event::null:
 57 |       {
 58 |         return false;
 59 |         break;
 60 |       }
 61 |     }
 62 |   }
 63 | 
 64 |   return true;
 65 | }
 66 | 
 67 | // Feed the events and values contained in the input buffer to the serializer.
 68 | //
 69 | extern "C" int
 70 | LLVMFuzzerTestOneInput (const uint8_t* data, size_t size)
 71 | {
 72 |   using stud::optional;
 73 | 
 74 |   // Note that libFuzzer will invoke this function once with empty input
 75 |   // before starting the fuzzing run.
 76 |   //
 77 |   if (size == 0)
 78 |     return 0;
 79 | 
 80 |   // Detect when we seem to be running without a corpus.
 81 |   //
 82 |   if (size < sizeof (uint32_t))
 83 |   {
 84 |     cerr << "empty corpus" << endl;
 85 |     exit (1);
 86 |   }
 87 | 
 88 |   string b;
 89 |   buffer_serializer s (b);
 90 | 
 91 |   // Extract the event count.
 92 |   //
 93 |   uint32_t en (0);
 94 |   memcpy (&en, data + size - sizeof (en), sizeof (en));
 95 | 
 96 |   // Parse events and their values from data and pass them to the serializer.
 97 |   //
 98 |   for (size_t ei (0), i (0); ei != en; ei++)
 99 |   {
100 |     const uint8_t e (data[i++]);
101 | 
102 |     // Extract the value length and the value.
103 |     //
104 |     const char* v (nullptr);
105 |     uint32_t n (0);
106 |     if (!valueless (e))
107 |     {
108 |       memcpy (&n, data + i, sizeof (n));
109 |       i += sizeof (n);
110 |       v = reinterpret_cast<const char*> (data + i);
111 |       i += n;
112 |     }
113 | 
114 |     // Serialize the event and its value.
115 |     //
116 |     try
117 |     {
118 |       s.next (e != 0 ? static_cast<event> (e) : optional<event> (),
119 |               {v, n},
120 |               true /* check */);
121 |     }
122 |     catch (const invalid_json_output& e)
123 |     {
124 |       // If the error code is buffer_overflow the bug must be in the
125 |       // serializer's code because this driver serializes to a std::string so
126 |       // a real allocation failure would throw bad_alloc.
127 |       //
128 |       if (e.code == invalid_json_output::error_code::buffer_overflow)
129 |         abort ();
130 | 
131 |       break;
132 |     }
133 |   }
134 | 
135 |   return 0;
136 | }
137 | 
138 | extern "C" size_t
139 | LLVMFuzzerMutate (uint8_t* data, size_t size, size_t maxsize);
140 | 
141 | // Default values for the event insertion mutation indexed by event.
142 | //
143 | static const char* default_values[event_count] {
144 |   nullptr,
145 |   nullptr,
146 |   nullptr,
147 |   nullptr,
148 |   "fuzz-name",
149 |   "fuzz-string",
150 |   "1234",
151 |   "true",
152 |   "null"};
153 | 
154 | // Select an event at random and either mutate its type, mutate its value
155 | // (including updating its length), remove it, or insert a new event after it.
156 | //
157 | // The seed argument is a pseudo-random number which should be used in such a
158 | // way as to cause a different mutation to be performed on each invocation.
159 | //
160 | // LibFuzzer's main fuzz loop (which is infinite by default) works as follows:
161 | // an input file is selected at random. It then loops over that input a
162 | // maximum of 5 times with each iteration consisting of a mutation (an
163 | // invocation of LLVMFuzzerCustomMutator()) and a test (an invocation of
164 | // LLVMFuzzerTestOneInput()). The same buffer is passed to the mutator each
165 | // time, so mutations are cumulative. If coverage increases or the input was
166 | // reduced, the inner loop is terminated immediately and the outer loop
167 | // selects the next input. Note that each input will be selected for these
168 | // 5-iteration runs repeatedly and thus ultimately be invoked many times (with
169 | // different seed values).
170 | //
171 | // What this all means in the end (and according to our understanding), is
172 | // that we don't want to perform pervasive mutations where the entire input is
173 | // changed. Instead, we want to perform a small, localized mutation on each
174 | // step (at least this is how the default mutation works if we did not provide
175 | // a custom mutator).
176 | //
177 | // This function performs a single mutation per invocation. It selects an
178 | // event to mutate and mutation type based on the seed argument.
179 | //
180 | extern "C" size_t
181 | LLVMFuzzerCustomMutator (uint8_t* data,
182 |                          size_t size,
183 |                          size_t maxsize,
184 |                          unsigned int seed)
185 | {
186 |   // Looking at other custom mutator implementations, it seems this is how
187 |   // the seed should be used.
188 |   //
189 |   minstd_rand rand (seed);
190 | 
191 |   // Read the event count from the end of the input.
192 |   //
193 |   uint32_t en (0);
194 |   memcpy (&en, data + size - sizeof (en), sizeof (en));
195 | 
196 |   // The plan is as follows: iterate over events in data copying them over to
197 |   // the temporary buffer until we reach the event that we want to mutate.
198 |   // Once we've performed the mutation (and added the result into the buffer),
199 |   // we continue iterating over the remaining events copying them over into
200 |   // the buffer as long as they fit.
201 |   //
202 |   uint32_t em (0);  // Number of events appended.
203 |   vector<char> buf; // Buffer to which mutated input is written.
204 |   buf.reserve (maxsize);
205 | 
206 |   maxsize -= sizeof (en);
207 | 
208 |   // Append a simple value to the buffer (note: assumes sufficient space).
209 |   //
210 |   auto append_v = [&buf] (auto v)
211 |   {
212 |     auto p (reinterpret_cast<const char*> (&v));
213 |     buf.insert (buf.end (), p, p + sizeof (v));
214 |   };
215 | 
216 |   // Append an event and its value, if any, to the buffer. Return false if
217 |   // there wasn't enough space.
218 |   //
219 |   auto append_e = [&buf, &append_v, maxsize, &em]
220 |     (uint8_t e, const void* v, uint32_t n)
221 |   {
222 |     const size_t cap (maxsize - buf.size ());
223 | 
224 |     if (cap < (v == nullptr ? sizeof (e) : sizeof (e) + sizeof (n) + n))
225 |       return false;
226 | 
227 |     append_v (e);
228 | 
229 |     if (v != nullptr)
230 |     {
231 |       append_v (n);
232 |       auto p (static_cast<const char*> (v));
233 |       buf.insert (buf.end (), p, p + n);
234 |     }
235 | 
236 |     em++;
237 |     return true;
238 |   };
239 | 
240 |   const uint32_t ej (rand () % en); // Index of event to mutate.
241 |   for (size_t ei (0), i (0); ei != en; ei++)
242 |   {
243 |     const uint8_t e (data[i++]);
244 | 
245 |     // Extract the value length and the value.
246 |     //
247 |     const uint8_t* v (nullptr);
248 |     uint32_t n (0);
249 |     if (!valueless (e))
250 |     {
251 |       memcpy (&n, data + i, sizeof (n));
252 |       i += sizeof (n);
253 |       v = data + i;
254 |       i += n;
255 |     }
256 | 
257 |     // Copy over events that don't need mutation.
258 |     //
259 |     if (ei != ej)
260 |     {
261 |       if (!append_e (e, v, n)) // Did not fit.
262 |         goto done;
263 | 
264 |       continue;
265 |     }
266 | 
267 |     // Apply the mutation and append the result to the buffer, except if we're
268 |     // removing the current event, in which case we do nothing.
269 |     //
270 |     switch (rand () % 4)
271 |     {
272 |     case 0: // Remove the current event.
273 |       {
274 |         // If this is the only event, then we fall through to add an event
275 |         // instead.
276 |         //
277 |         if (en != 1)
278 |           break;
279 |       }
280 |       // Fall through.
281 |     case 1: // Insert a new event.
282 |       {
283 |         // Copy the current event to the buffer.
284 |         //
285 |         if (!append_e (e, v, n))
286 |           goto done;
287 | 
288 |         // Insert a new event.
289 |         //
290 |         const uint8_t e1 (rand () % event_count + 1);
291 |         const char* v1 (default_values[e1 - 1]);
292 |         if (!append_e (e1, v1, v1 == nullptr ? 0 : strlen (v1)))
293 |           goto done;
294 | 
295 |         break;
296 |       }
297 |     case 2: // Mutate the current event's value.
298 |       {
299 |         // If the event has no value, then we fall through to mutate the event
300 |         // itself.
301 |         //
302 |         if (!valueless (e))
303 |         {
304 |           // Mutate the value, allowing it to grow by up to 100 bytes in size.
305 |           //
306 |           vector<uint8_t> v1 (n + 100);
307 |           memcpy (v1.data (), v, n);
308 |           const size_t n1 (LLVMFuzzerMutate (v1.data (), n, v1.size ()));
309 | 
310 |           if (!append_e (e, v1.data (), n1))
311 |             goto done;
312 | 
313 |           break;
314 |         }
315 |       }
316 |       // Fall through.
317 |     case 3: // Mutate the current event (but not the value).
318 |       {
319 |         // If the new event doesn't need a value, then we drop the old value
320 |         // (if any). If the new event needs a value and the old one did not
321 |         // have any, then we use the default value as in the insert case
322 |         // above.
323 |         //
324 |         const uint8_t e1 (rand () % event_count + 1);
325 | 
326 |         if (valueless (e1))
327 |         {
328 |           if (!append_e (e1, nullptr, 0))
329 |             goto done;
330 |         }
331 |         else
332 |         {
333 |           if (!valueless (e))
334 |           {
335 |             if (!append_e (e1, v, n))
336 |               goto done;
337 |           }
338 |           else
339 |           {
340 |             const char* v1 (default_values[e1 - 1]);
341 |             if (!append_e (e1, v1, v1 == nullptr ? 0 : strlen (v1)))
342 |               goto done;
343 |           }
344 |         }
345 | 
346 |         break;
347 |       }
348 |     }
349 |   }
350 | 
351 | done:
352 |   // Copy the mutated data and event count back into the input buffer.
353 |   //
354 |   append_v (em);
355 |   memcpy (data, buf.data (), buf.size ());
356 |   return buf.size ();
357 | }
358 | 


--------------------------------------------------------------------------------
/tests/serializer/roundtrip/buildfile:
--------------------------------------------------------------------------------
 1 | import libs = libstud-json%lib{stud-json}
 2 | 
 3 | ./: exe{driver}: cxx{driver} $libs
 4 | 
 5 | # Run the tests twice, once with checking disabled and once with checking
 6 | # enabled. If/when we have support for a for-loop in Testscript we can handle
 7 | # this cleanly there. For now we use this alias trick (or hack, if you wish).
 8 | #
 9 | exe{driver}: test = false
10 | 
11 | ./: alias{unchecked checked}: exe{driver} testscript
12 | {
13 |   test = exe{driver}
14 | }
15 | 
16 | alias{checked}: test.options += --check
17 | 


--------------------------------------------------------------------------------
/tests/serializer/roundtrip/driver.cxx:
--------------------------------------------------------------------------------
 1 | // Usage: argv[0] [--check] [--pretty]
 2 | //
 3 | // --check   -- enable UTF-8 checking and escaping
 4 | // --pretty  -- enable pretty-printing
 5 | 
 6 | #include <iostream>
 7 | 
 8 | #include <libstud/optional.hxx>
 9 | #include <libstud/json/parser.hxx>
10 | #include <libstud/json/serializer.hxx>
11 | 
12 | #undef NDEBUG
13 | #include <cassert>
14 | 
15 | using namespace std;
16 | using namespace stud::json;
17 | 
18 | int
19 | main (int argc, const char* argv[])
20 | {
21 |   using stud::nullopt;
22 | 
23 |   bool check (false);
24 |   bool pretty (false);
25 | 
26 |   for (int i (1); i < argc; i++)
27 |   {
28 |     const string o (argv[i]);
29 | 
30 |     if (o == "--check")
31 |       check = true;
32 |     else if (o == "--pretty")
33 |       pretty = true;
34 |   }
35 | 
36 |   parser p (cin, "<stdin>", true /* multi_value*/);
37 |   stream_serializer s (cout, pretty ? 2 : 0);
38 | 
39 |   try
40 |   {
41 |     if (p.peek ())
42 |     {
43 |       while (p.peek ())
44 |       {
45 |         for (event e: p)
46 |           s.next (e, p.data (), check);
47 |         s.next (nullopt);
48 |       }
49 |       s.next (nullopt);
50 |       cout << endl;
51 |     }
52 | 
53 |     return 0;
54 |   }
55 |   catch (const invalid_json_output& e)
56 |   {
57 |     cerr << e.what () << endl;
58 |   }
59 |   catch (const invalid_json_input& e)
60 |   {
61 |     cerr << e.what () << endl;
62 |   }
63 |   catch (const ios::failure& e)
64 |   {
65 |     cerr << "io error: " << e.what () << endl;
66 |   }
67 | 
68 |   return 1;
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/serializer/roundtrip/testscript:
--------------------------------------------------------------------------------
  1 | : simple
  2 | :
  3 | {{
  4 |   : number
  5 |   :
  6 |   $* <<EOI >>EOI
  7 |   12345
  8 |   EOI
  9 | 
 10 |   : string
 11 |   :
 12 |   $* <<EOI >>EOI
 13 |   ""
 14 |   "हab¢"
 15 |   EOI
 16 | 
 17 |   : null
 18 |   :
 19 |   $* <<EOI >>EOI
 20 |   null
 21 |   EOI
 22 | 
 23 |   : true
 24 |   :
 25 |   $* <<EOI >>EOI
 26 |   true
 27 |   EOI
 28 | 
 29 |   : false
 30 |   :
 31 |   $* <<EOI >>EOI
 32 |   false
 33 |   EOI
 34 | }}
 35 | 
 36 | : array
 37 | :
 38 | {{
 39 |   : empty
 40 |   :
 41 |   $* <<EOI >>EOI
 42 |   []
 43 |   EOI
 44 | 
 45 |   : empty-pretty
 46 |   :
 47 |   $* --pretty <<EOI >>EOI
 48 |   []
 49 |   EOI
 50 | 
 51 |   : single
 52 |   :
 53 |   $* <<EOI >>EOI
 54 |   [1]
 55 |   EOI
 56 | 
 57 |   : single-pretty
 58 |   :
 59 |   $* --pretty <<EOI >>EOI
 60 |   [
 61 |     1
 62 |   ]
 63 |   EOI
 64 | 
 65 |   : multi
 66 |   :
 67 |   $* <<EOI >>EOI
 68 |   [1,2,3]
 69 |   EOI
 70 | 
 71 |   : multi-pretty
 72 |   :
 73 |   $* --pretty <<EOI >>EOI
 74 |   [
 75 |     1,
 76 |     2,
 77 |     3
 78 |   ]
 79 |   EOI
 80 | 
 81 |   # Strings are given special treatment here because their quotes are
 82 |   # additional delimiters.
 83 |   #
 84 |   : string
 85 |   :
 86 |   $* <<EOI >>EOI
 87 |   ["abc","def",""]
 88 |   EOI
 89 | 
 90 |   : string-pretty
 91 |   :
 92 |   $* --pretty <<EOI >>EOI
 93 |   [
 94 |     "abc",
 95 |     "def",
 96 |     ""
 97 |   ]
 98 |   EOI
 99 | 
100 |   : array
101 |   :
102 |   $* <<EOI >>EOI
103 |   [[1,2,3],[]]
104 |   EOI
105 | 
106 |   : array-pretty
107 |   :
108 |   $* --pretty <<EOI >>EOI
109 |   [
110 |     [
111 |       1,
112 |       2,
113 |       3
114 |     ],
115 |     []
116 |   ]
117 |   EOI
118 | 
119 |   : object
120 |   :
121 |   $* <<EOI >>EOI
122 |   [{"a":1,"b":2,"c":3},{}]
123 |   EOI
124 | 
125 |   : object-pretty
126 |   :
127 |   $* --pretty <<EOI >>EOI
128 |   [
129 |     {
130 |       "a": 1,
131 |       "b": 2,
132 |       "c": 3
133 |     },
134 |     {}
135 |   ]
136 |   EOI
137 | }}
138 | 
139 | : object
140 | :
141 | {{
142 |   : empty
143 |   :
144 |   $* <<EOI >>EOI
145 |   {}
146 |   EOI
147 | 
148 |   : empty-pretty
149 |   :
150 |   $* --pretty <<EOI >>EOI
151 |   {}
152 |   EOI
153 | 
154 |   : single
155 |   :
156 |   $* <<EOI >>EOI
157 |   {"a":1}
158 |   EOI
159 | 
160 |   : single-pretty
161 |   :
162 |   $* --pretty <<EOI >>EOI
163 |   {
164 |     "a": 1
165 |   }
166 |   EOI
167 | 
168 |   : multi
169 |   :
170 |   $* <<EOI >>EOI
171 |   {"a":1,"b":2,"c":3}
172 |   EOI
173 | 
174 |   : multi-pretty
175 |   :
176 |   $* --pretty <<EOI >>EOI
177 |   {
178 |     "a": 1,
179 |     "b": 2,
180 |     "c": 3
181 |   }
182 |   EOI
183 | 
184 |   : string
185 |   :
186 |   $* <<EOI >>EOI
187 |   {"a":"abc","b":"def","c":""}
188 |   EOI
189 | 
190 |   : string-pretty
191 |   :
192 |   $* --pretty <<EOI >>EOI
193 |   {
194 |     "a": "abc",
195 |     "b": "def",
196 |     "c": ""
197 |   }
198 |   EOI
199 | 
200 |   : object
201 |   :
202 |   $* <<EOI >>EOI
203 |   {"a":{"b":1,"c":2,"d":3},"e":{}}
204 |   EOI
205 | 
206 |   : object-pretty
207 |   :
208 |   $* --pretty <<EOI >>EOI
209 |   {
210 |     "a": {
211 |       "b": 1,
212 |       "c": 2,
213 |       "d": 3
214 |     },
215 |     "e": {}
216 |   }
217 |   EOI
218 | 
219 |   : array
220 |   :
221 |   $* <<EOI >>EOI
222 |   {"a":[1,2,3],"b":[]}
223 |   EOI
224 | 
225 |   : array-pretty
226 |   :
227 |   $* --pretty <<EOI >>EOI
228 |   {
229 |     "a": [
230 |       1,
231 |       2,
232 |       3
233 |     ],
234 |     "b": []
235 |   }
236 |   EOI
237 | }}
238 | 
239 | # Multiple levels of nesting covering all of the nesting cases: {{}}, {[]},
240 | # [[]], [{}].
241 | #
242 | : nested
243 | :
244 | {{
245 |   : pretty
246 |   :
247 |   $* --pretty <<EOI >>EOI
248 |   {
249 |     "a": {
250 |       "b": [
251 |         {
252 |           "c": 1,
253 |           "d": 2,
254 |           "e": 3
255 |         },
256 |         {},
257 |         [
258 |           3,
259 |           4,
260 |           5
261 |         ],
262 |         []
263 |       ],
264 |       "f": []
265 |     },
266 |     "g": {}
267 |   }
268 |   EOI
269 | 
270 |   : no-pretty
271 |   :
272 |   $* <<EOI >>EOI
273 |   {"a":{"b":[{"c":1,"d":2,"e":3},{},[3,4,5],[]],"f":[]},"g":{}}
274 |   EOI
275 | }}
276 | 
277 | : multival
278 | :
279 | {{
280 |   : zero
281 |   : Checks that no newline is printed following a complete but empty value
282 |   : sequence, independently of whether or not pretty-printing is enabled.
283 |   :
284 |   {{
285 |     : pretty
286 |     :
287 |     $* --pretty <'' >:''
288 | 
289 |     : no-pretty
290 |     :
291 |     $* <'' >:''
292 |   }}
293 | 
294 |   : multi
295 |   : Checks that top-level values are always separated, independently of
296 |   : whether or not pretty-printing is enabled.
297 |   :
298 |   {{
299 |     : pretty
300 |     :
301 |     $* <<EOI >>EOI
302 |     1
303 |     "abc"
304 |     true
305 |     false
306 |     null
307 |     []
308 |     {}
309 |     EOI
310 | 
311 |     : no-pretty
312 |     :
313 |     $* --pretty <<EOI >>EOI
314 |     1
315 |     2
316 |     EOI
317 |   }}
318 | }}
319 | 


--------------------------------------------------------------------------------