├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── composer.json ├── helpers.php ├── phpstan-baseline.neon ├── phpstan.neon └── src ├── Concerns ├── DetectsEndpoints.php └── GuzzleAware.php ├── Decoders ├── AbstractDecoder.php ├── ConfigurableDecoder.php ├── DecodedValue.php ├── Decoder.php ├── JsonDecoder.php └── SimdjsonDecoder.php ├── Exceptions ├── DecodingException.php ├── GuzzleRequiredException.php ├── IntersectingPointersException.php ├── InvalidPointerException.php ├── JsonParserException.php ├── SyntaxException.php └── UnsupportedSourceException.php ├── JsonParser.php ├── Pointers ├── Pointer.php └── Pointers.php ├── Sources ├── AnySource.php ├── CustomSource.php ├── Endpoint.php ├── Filename.php ├── IterableSource.php ├── Json.php ├── JsonResource.php ├── LaravelClientRequest.php ├── LaravelClientResponse.php ├── Psr7Message.php ├── Psr7Request.php ├── Psr7Stream.php ├── Source.php └── StreamWrapper.php ├── Tokens ├── Colon.php ├── Comma.php ├── CompoundBegin.php ├── CompoundEnd.php ├── Constant.php ├── Lexer.php ├── Parser.php ├── ScalarString.php ├── Token.php ├── Tokenizer.php └── Tokens.php └── ValueObjects ├── Config.php ├── Progress.php ├── State.php └── Tree.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to `json-parser` will be documented in this file. 4 | 5 | Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) principles. 6 | 7 | ## NEXT - YYYY-MM-DD 8 | 9 | ### Added 10 | - Nothing 11 | 12 | ### Deprecated 13 | - Nothing 14 | 15 | ### Fixed 16 | - Nothing 17 | 18 | ### Removed 19 | - Nothing 20 | 21 | ### Security 22 | - Nothing 23 | 24 | 25 | ## 1.1.1 - 2025-01-17 26 | 27 | ### Added 28 | - Support for PHP 8.4 29 | 30 | 31 | ## 1.1.0 - 2023-08-06 32 | 33 | ### Added 34 | - Ability to wrap Parser instances recursively when lazy loading 35 | - Support for turning Parser wrappers into array 36 | - Support for turning sub-trees using wildcards into array 37 | 38 | 39 | ## 1.0.0 - 2023-06-16 40 | 41 | ### Added 42 | - First release of JSON Parser 43 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at `andrea.marco.sartori@gmail.com`. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are **welcome** and will be fully **credited**. 4 | 5 | We accept contributions via Pull Requests on [Github](https://github.com/cerbero90/json-parser). 6 | 7 | 8 | ## Pull Requests 9 | 10 | - **[PSR-12 Coding Standard](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-12-extended-coding-style-guide.md)** - Check the code style with ``$ composer check-style`` and fix it with ``$ composer fix-style``. 11 | 12 | - **Add tests!** - Your patch won't be accepted if it doesn't have tests. 13 | 14 | - **Document any change in behaviour** - Make sure the `README.md` and any other relevant documentation are kept up-to-date. 15 | 16 | - **Consider our release cycle** - We try to follow [SemVer v2.0.0](http://semver.org/). Randomly breaking public APIs is not an option. 17 | 18 | - **Create feature branches** - Don't ask us to pull from your master branch. 19 | 20 | - **One pull request per feature** - If you want to do more than one thing, send multiple pull requests. 21 | 22 | - **Send coherent history** - Make sure each individual commit in your pull request is meaningful. If you had to make multiple intermediate commits while developing, please [squash them](http://www.git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Changing-Multiple-Commit-Messages) before submitting. 23 | 24 | 25 | ## Running Tests 26 | 27 | ``` bash 28 | $ composer test 29 | ``` 30 | 31 | 32 | **Happy coding**! 33 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Andrea Marco Sartori 4 | 5 | > Permission is hereby granted, free of charge, to any person obtaining a copy 6 | > of this software and associated documentation files (the "Software"), to deal 7 | > in the Software without restriction, including without limitation the rights 8 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | > copies of the Software, and to permit persons to whom the Software is 10 | > furnished to do so, subject to the following conditions: 11 | > 12 | > The above copyright notice and this permission notice shall be included in 13 | > all copies or substantial portions of the Software. 14 | > 15 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | > THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🧩 JSON Parser 2 | 3 | [![Author][ico-author]][link-author] 4 | [![PHP Version][ico-php]][link-php] 5 | [![Build Status][ico-actions]][link-actions] 6 | [![Coverage Status][ico-scrutinizer]][link-scrutinizer] 7 | [![Quality Score][ico-code-quality]][link-code-quality] 8 | [![PHPStan Level][ico-phpstan]][link-phpstan] 9 | [![Latest Version][ico-version]][link-packagist] 10 | [![Software License][ico-license]](LICENSE.md) 11 | [![PSR-7][ico-psr7]][link-psr7] 12 | [![PSR-12][ico-psr12]][link-psr12] 13 | [![Total Downloads][ico-downloads]][link-downloads] 14 | 15 | Zero-dependencies pull parser to read large JSON from any source in a memory-efficient way. 16 | 17 | 18 | ## 📦 Install 19 | 20 | Via Composer: 21 | 22 | ``` bash 23 | composer require cerbero/json-parser 24 | ``` 25 | 26 | ## 🔮 Usage 27 | 28 | * [👣 Basics](#-basics) 29 | * [💧 Sources](#-sources) 30 | * [🎯 Pointers](#-pointers) 31 | * [🐼 Lazy pointers](#-lazy-pointers) 32 | * [⚙️ Decoders](#%EF%B8%8F-decoders) 33 | * [💢 Errors handling](#-errors-handling) 34 | * [⏳ Progress](#-progress) 35 | * [🛠 Settings](#-settings) 36 | 37 | 38 | ### 👣 Basics 39 | 40 | JSON Parser provides a minimal API to read large JSON from any source: 41 | 42 | ```php 43 | // a source is anything that can provide a JSON, in this case an endpoint 44 | $source = 'https://randomuser.me/api/1.4?seed=json-parser&results=5'; 45 | 46 | foreach (new JsonParser($source) as $key => $value) { 47 | // instead of loading the whole JSON, we keep in memory only one key and value at a time 48 | } 49 | ``` 50 | 51 | Depending on our code style, we can instantiate the parser in 3 different ways: 52 | 53 | ```php 54 | use Cerbero\JsonParser\JsonParser; 55 | use function Cerbero\JsonParser\parseJson; 56 | 57 | 58 | // classic object instantiation 59 | new JsonParser($source); 60 | 61 | // static instantiation 62 | JsonParser::parse($source); 63 | 64 | // namespaced function 65 | parseJson($source); 66 | ``` 67 | 68 | If we don't want to use `foreach()` to loop through each key and value, we can chain the `traverse()` method: 69 | 70 | ```php 71 | JsonParser::parse($source)->traverse(function (mixed $value, string|int $key, JsonParser $parser) { 72 | // lazily load one key and value at a time, we can also access the parser if needed 73 | }); 74 | 75 | // no foreach needed 76 | ``` 77 | 78 | > ⚠️ Please note the parameters order of the callback: the value is passed before the key. 79 | 80 | ### 💧 Sources 81 | 82 | A JSON source is any data point that provides a JSON. A wide range of sources are supported by default: 83 | - **strings**, e.g. `{"foo":"bar"}` 84 | - **iterables**, i.e. arrays or instances of `Traversable` 85 | - **file paths**, e.g. `/path/to/large.json` 86 | - **resources**, e.g. streams 87 | - **API endpoint URLs**, e.g. `https://endpoint.json` or any instance of `Psr\Http\Message\UriInterface` 88 | - **PSR-7 requests**, i.e. any instance of `Psr\Http\Message\RequestInterface` 89 | - **PSR-7 messages**, i.e. any instance of `Psr\Http\Message\MessageInterface` 90 | - **PSR-7 streams**, i.e. any instance of `Psr\Http\Message\StreamInterface` 91 | - **Laravel HTTP client requests**, i.e. any instance of `Illuminate\Http\Client\Request` 92 | - **Laravel HTTP client responses**, i.e. any instance of `Illuminate\Http\Client\Response` 93 | - **user-defined sources**, i.e. any instance of `Cerbero\JsonParser\Sources\Source` 94 | 95 | If the source we need to parse is not supported by default, we can implement our own custom source. 96 | 97 |
Click here to see how to implement a custom source. 98 | 99 | To implement a custom source, we need to extend `Source` and implement 3 methods: 100 | 101 | ```php 102 | use Cerbero\JsonParser\Sources\Source; 103 | use Traversable; 104 | 105 | class CustomSource extends Source 106 | { 107 | public function getIterator(): Traversable 108 | { 109 | // return a Traversable holding the JSON source, e.g. a Generator yielding chunks of JSON 110 | } 111 | 112 | public function matches(): bool 113 | { 114 | // return TRUE if this class can handle the JSON source 115 | } 116 | 117 | protected function calculateSize(): ?int 118 | { 119 | // return the size of the JSON in bytes or NULL if it can't be calculated 120 | } 121 | } 122 | ``` 123 | 124 | The parent class `Source` gives us access to 2 properties: 125 | - `$source`: the JSON source we pass to the parser, i.e.: `new JsonParser($source)` 126 | - `$config`: the configuration we set by chaining methods like `$parser->pointer('/foo')` 127 | 128 | The method `getIterator()` defines the logic to read the JSON source in a memory-efficient way. It feeds the parser with small pieces of JSON. Please refer to the [already existing sources](https://github.com/cerbero90/json-parser/tree/master/src/Sources) to see some implementations. 129 | 130 | The method `matches()` determines whether the JSON source passed to the parser can be handled by our custom implementation. In other words, we are telling the parser if it should use our class for the JSON to parse. 131 | 132 | Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the [parsing progress](#-progress), however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. 133 | 134 | Now that we have implemented our custom source, we can pass it to the parser: 135 | 136 | ```php 137 | $json = JsonParser::parse(new CustomSource($source)); 138 | 139 | foreach ($json as $key => $value) { 140 | // process one key and value of $source at a time 141 | } 142 | ``` 143 | 144 | If you find yourself implementing the same custom source in different projects, feel free to send a PR and we will consider to support your custom source by default. Thank you in advance for any contribution! 145 |
146 | 147 | 148 | ### 🎯 Pointers 149 | 150 | A JSON pointer is a [standard](https://www.rfc-editor.org/rfc/rfc6901) used to point to nodes within a JSON. This package leverages JSON pointers to extract only some sub-trees from large JSONs. 151 | 152 | Consider [this JSON](https://randomuser.me/api/1.4?seed=json-parser&results=5) for example. To extract only the first gender and avoid parsing the rest of the JSON, we can set the `/results/0/gender` pointer: 153 | 154 | ```php 155 | $json = JsonParser::parse($source)->pointer('/results/0/gender'); 156 | 157 | foreach ($json as $key => $value) { 158 | // 1st and only iteration: $key === 'gender', $value === 'female' 159 | } 160 | ``` 161 | 162 | JSON Parser takes advantage of the `-` wildcard to point to any array index, so we can extract all the genders with the `/results/-/gender` pointer: 163 | 164 | ```php 165 | $json = JsonParser::parse($source)->pointer('/results/-/gender'); 166 | 167 | foreach ($json as $key => $value) { 168 | // 1st iteration: $key === 'gender', $value === 'female' 169 | // 2nd iteration: $key === 'gender', $value === 'female' 170 | // 3rd iteration: $key === 'gender', $value === 'male' 171 | // and so on for all the objects in the array... 172 | } 173 | ``` 174 | 175 | If we want to extract more sub-trees, we can set multiple pointers. Let's extract all genders and countries: 176 | 177 | ```php 178 | $json = JsonParser::parse($source)->pointers(['/results/-/gender', '/results/-/location/country']); 179 | 180 | foreach ($json as $key => $value) { 181 | // 1st iteration: $key === 'gender', $value === 'female' 182 | // 2nd iteration: $key === 'country', $value === 'Germany' 183 | // 3rd iteration: $key === 'gender', $value === 'female' 184 | // 4th iteration: $key === 'country', $value === 'Mexico' 185 | // and so on for all the objects in the array... 186 | } 187 | ``` 188 | 189 | > ⚠️ Intersecting pointers like `/foo` and `/foo/bar` is not allowed but intersecting wildcards like `foo/-/bar` and `foo/0/bar` is possible. 190 | 191 | We can also specify a callback to execute when JSON pointers are found. This is handy when we have different pointers and we need to run custom logic for each of them: 192 | 193 | ```php 194 | $json = JsonParser::parse($source)->pointers([ 195 | '/results/-/gender' => fn (string $gender, string $key) => new Gender($gender), 196 | '/results/-/location/country' => fn (string $country, string $key) => new Country($country), 197 | ]); 198 | 199 | foreach ($json as $key => $value) { 200 | // 1st iteration: $key === 'gender', $value instanceof Gender 201 | // 2nd iteration: $key === 'country', $value instanceof Country 202 | // and so on for all the objects in the array... 203 | } 204 | ``` 205 | 206 | > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. 207 | 208 | The same can also be achieved by chaining the method `pointer()` multiple times: 209 | 210 | ```php 211 | $json = JsonParser::parse($source) 212 | ->pointer('/results/-/gender', fn (string $gender, string $key) => new Gender($gender)) 213 | ->pointer('/results/-/location/country', fn (string $country, string $key) => new Country($country)); 214 | 215 | foreach ($json as $key => $value) { 216 | // 1st iteration: $key === 'gender', $value instanceof Gender 217 | // 2nd iteration: $key === 'country', $value instanceof Country 218 | // and so on for all the objects in the array... 219 | } 220 | ``` 221 | 222 | Pointer callbacks can also be used to customize a key. We can achieve that by updating the key **reference**: 223 | 224 | ```php 225 | $json = JsonParser::parse($source)->pointer('/results/-/name/first', function (string $name, string &$key) { 226 | $key = 'first_name'; 227 | }); 228 | 229 | foreach ($json as $key => $value) { 230 | // 1st iteration: $key === 'first_name', $value === 'Sara' 231 | // 2nd iteration: $key === 'first_name', $value === 'Andrea' 232 | // and so on for all the objects in the array... 233 | } 234 | ``` 235 | 236 | If the callbacks are enough to handle the pointers and we don't need to run any common logic for all pointers, we can avoid to manually call `foreach()` by chaining the method `traverse()`: 237 | 238 | ```php 239 | JsonParser::parse($source) 240 | ->pointer('/-/gender', $this->handleGender(...)) 241 | ->pointer('/-/location/country', $this->handleCountry(...)) 242 | ->traverse(); 243 | 244 | // no foreach needed 245 | ``` 246 | 247 | Otherwise if some common logic for all pointers is needed but we prefer methods chaining to manual loops, we can pass a callback to the `traverse()` method: 248 | 249 | ```php 250 | JsonParser::parse($source) 251 | ->pointer('/results/-/gender', fn (string $gender, string $key) => new Gender($gender)) 252 | ->pointer('/results/-/location/country', fn (string $country, string $key) => new Country($country)) 253 | ->traverse(function (Gender|Country $value, string $key, JsonParser $parser) { 254 | // 1st iteration: $key === 'gender', $value instanceof Gender 255 | // 2nd iteration: $key === 'country', $value instanceof Country 256 | // and so on for all the objects in the array... 257 | }); 258 | 259 | // no foreach needed 260 | ``` 261 | 262 | > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. 263 | 264 | Sometimes the sub-trees extracted by pointers are small enough to be kept entirely in memory. In such cases, we can chain `toArray()` to eager load the extracted sub-trees into an array: 265 | 266 | ```php 267 | // ['gender' => 'female', 'country' => 'Germany'] 268 | $array = JsonParser::parse($source)->pointers(['/results/0/gender', '/results/0/location/country'])->toArray(); 269 | ``` 270 | 271 | ### 🐼 Lazy pointers 272 | 273 | JSON Parser only keeps one key and one value in memory at a time. However, if the value is a large array or object, it may be inefficient or even impossible to keep it all in memory. 274 | 275 | To solve this problem, we can use lazy pointers. These pointers recursively keep in memory only one key and one value at a time for any nested array or object. 276 | 277 | ```php 278 | $json = JsonParser::parse($source)->lazyPointer('/results/0/name'); 279 | 280 | foreach ($json as $key => $value) { 281 | // 1st iteration: $key === 'name', $value instanceof Parser 282 | } 283 | ``` 284 | 285 | Lazy pointers return a lightweight instance of `Cerbero\JsonParser\Tokens\Parser` instead of the actual large value. To lazy load nested keys and values, we can then loop through the parser: 286 | 287 | ```php 288 | $json = JsonParser::parse($source)->lazyPointer('/results/0/name'); 289 | 290 | foreach ($json as $key => $value) { 291 | // 1st iteration: $key === 'name', $value instanceof Parser 292 | foreach ($value as $nestedKey => $nestedValue) { 293 | // 1st iteration: $nestedKey === 'title', $nestedValue === 'Mrs' 294 | // 2nd iteration: $nestedKey === 'first', $nestedValue === 'Sara' 295 | // 3rd iteration: $nestedKey === 'last', $nestedValue === 'Meder' 296 | } 297 | } 298 | ``` 299 | 300 | As mentioned above, lazy pointers are recursive. This means that no nested objects or arrays will ever be kept in memory: 301 | 302 | ```php 303 | $json = JsonParser::parse($source)->lazyPointer('/results/0/location'); 304 | 305 | foreach ($json as $key => $value) { 306 | // 1st iteration: $key === 'location', $value instanceof Parser 307 | foreach ($value as $nestedKey => $nestedValue) { 308 | // 1st iteration: $nestedKey === 'street', $nestedValue instanceof Parser 309 | // 2nd iteration: $nestedKey === 'city', $nestedValue === 'Sontra' 310 | // ... 311 | // 6th iteration: $nestedKey === 'coordinates', $nestedValue instanceof Parser 312 | // 7th iteration: $nestedKey === 'timezone', $nestedValue instanceof Parser 313 | } 314 | } 315 | ``` 316 | 317 | To lazily parse the entire JSON, we can simply chain the `lazy()` method: 318 | 319 | ```php 320 | foreach (JsonParser::parse($source)->lazy() as $key => $value) { 321 | // 1st iteration: $key === 'results', $value instanceof Parser 322 | // 2nd iteration: $key === 'info', $value instanceof Parser 323 | } 324 | ``` 325 | 326 | We can recursively wrap any instance of `Cerbero\JsonParser\Tokens\Parser` by chaining `wrap()`. This lets us wrap lazy loaded JSON arrays and objects into classes with advanced functionalities, like mapping or filtering: 327 | 328 | ```php 329 | $json = JsonParser::parse($source) 330 | ->wrap(fn (Parser $parser) => new MyWrapper(fn () => yield from $parser)) 331 | ->lazy(); 332 | 333 | foreach ($json as $key => $value) { 334 | // 1st iteration: $key === 'results', $value instanceof MyWrapper 335 | foreach ($value as $nestedKey => $nestedValue) { 336 | // 1st iteration: $nestedKey === 0, $nestedValue instanceof MyWrapper 337 | // 2nd iteration: $nestedKey === 1, $nestedValue instanceof MyWrapper 338 | // ... 339 | } 340 | } 341 | ``` 342 | 343 | > ℹ️ If your wrapper class implements the method `toArray()`, such method will be called when eager loading sub-trees into an array. 344 | 345 | Lazy pointers also have all the other functionalities of normal pointers: they accept callbacks, can be set one by one or all together, can be eager loaded into an array and can be mixed with normal pointers as well: 346 | 347 | ```php 348 | // set custom callback to run only when names are found 349 | $json = JsonParser::parse($source)->lazyPointer('/results/-/name', fn (Parser $name) => $this->handleName($name)); 350 | 351 | // set multiple lazy pointers one by one 352 | $json = JsonParser::parse($source) 353 | ->lazyPointer('/results/-/name', fn (Parser $name) => $this->handleName($name)) 354 | ->lazyPointer('/results/-/location', fn (Parser $location) => $this->handleLocation($location)); 355 | 356 | // set multiple lazy pointers all together 357 | $json = JsonParser::parse($source)->lazyPointers([ 358 | '/results/-/name' => fn (Parser $name) => $this->handleName($name)), 359 | '/results/-/location' => fn (Parser $location) => $this->handleLocation($location)), 360 | ]); 361 | 362 | // eager load lazy pointers into an array 363 | // ['name' => ['title' => 'Mrs', 'first' => 'Sara', 'last' => 'Meder'], 'street' => ['number' => 46, 'name' => 'Römerstraße']] 364 | $array = JsonParser::parse($source)->lazyPointers(['/results/0/name', '/results/0/location/street'])->toArray(); 365 | 366 | // mix pointers and lazy pointers 367 | $json = JsonParser::parse($source) 368 | ->pointer('/results/-/gender', fn (string $gender) => $this->handleGender($gender)) 369 | ->lazyPointer('/results/-/name', fn (Parser $name) => $this->handleName($name)); 370 | ``` 371 | 372 | ### ⚙️ Decoders 373 | 374 | By default JSON Parser uses the built-in PHP function `json_decode()` to decode one key and value at a time. 375 | 376 | Normally it decodes values to associative arrays but, if we prefer to decode values to objects, we can set a custom decoder: 377 | 378 | ```php 379 | use Cerbero\JsonParser\Decoders\JsonDecoder; 380 | 381 | JsonParser::parse($source)->decoder(new JsonDecoder(decodesToArray: false)); 382 | ``` 383 | 384 | The [simdjson extension](https://github.com/crazyxman/simdjson_php#simdjson_php) offers a decoder [faster](https://github.com/crazyxman/simdjson_php/tree/master/benchmark#run-phpbench-benchmark) than `json_decode()` that can be installed via `pecl install simdjson` if your server satisfies the [requirements](https://github.com/crazyxman/simdjson_php#requirement). JSON Parser leverages the simdjson decoder by default if the extension is loaded. 385 | 386 | If we need a decoder that is not supported by default, we can implement our custom one. 387 | 388 |
Click here to see how to implement a custom decoder. 389 | 390 | To create a custom decoder, we need to implement the `Decoder` interface and implement 1 method: 391 | 392 | ```php 393 | use Cerbero\JsonParser\Decoders\Decoder; 394 | use Cerbero\JsonParser\Decoders\DecodedValue; 395 | 396 | class CustomDecoder implements Decoder 397 | { 398 | public function decode(string $json): DecodedValue 399 | { 400 | // return an instance of DecodedValue both in case of success or failure 401 | } 402 | } 403 | ``` 404 | 405 | The method `decode()` defines the logic to decode the given JSON value and it needs to return an instance of `DecodedValue` both in case of success or failure. 406 | 407 | To make custom decoder implementations even easier, JSON Parser provides an [abstract decoder](https://github.com/cerbero90/json-parser/tree/master/src/Decoders/AbstractDecoder.php) that hydrates `DecodedValue` for us so that we just need to define how a JSON value should be decoded: 408 | 409 | ```php 410 | use Cerbero\JsonParser\Decoders\AbstractDecoder; 411 | 412 | class CustomDecoder extends AbstractDecoder 413 | { 414 | protected function decodeJson(string $json): mixed 415 | { 416 | // decode the given JSON or throw an exception on failure 417 | return json_decode($json, flags: JSON_THROW_ON_ERROR); 418 | } 419 | } 420 | ``` 421 | 422 | > ⚠️ Please make sure to throw an exception in `decodeJson()` if the decoding process fails. 423 | 424 | Now that we have implemented our custom decoder, we can set it like this: 425 | 426 | ```php 427 | JsonParser::parse($source)->decoder(new CustomDecoder()); 428 | ``` 429 | 430 | To see some implementation examples, please refer to the [already existing decoders](https://github.com/cerbero90/json-parser/tree/master/src/Decoders). 431 | 432 | If you find yourself implementing the same custom decoder in different projects, feel free to send a PR and we will consider to support your custom decoder by default. Thank you in advance for any contribution! 433 |
434 | 435 | 436 | ### 💢 Errors handling 437 | 438 | Not all JSONs are valid, some may present syntax errors due to an incorrect structure (e.g. `[}`) or decoding errors when values can't be decoded properly (e.g. `[1a]`). JSON Parser allows us to intervene and define the logic to run when these issues occur: 439 | 440 | ```php 441 | use Cerbero\JsonParser\Decoders\DecodedValue; 442 | use Cerbero\JsonParser\Exceptions\SyntaxException; 443 | 444 | $json = JsonParser::parse($source) 445 | ->onSyntaxError(fn (SyntaxException $e) => $this->handleSyntaxError($e)) 446 | ->onDecodingError(fn (DecodedValue $decoded) => $this->handleDecodingError($decoded)); 447 | ``` 448 | 449 | We can even replace invalid values with placeholders to avoid that the entire JSON parsing fails because of them: 450 | 451 | ```php 452 | // instead of failing, replace invalid values with NULL 453 | $json = JsonParser::parse($source)->patchDecodingError(); 454 | 455 | // instead of failing, replace invalid values with '' 456 | $json = JsonParser::parse($source)->patchDecodingError(''); 457 | ``` 458 | 459 | For more advanced decoding errors patching, we can pass a closure that has access to the `DecodedValue` instance: 460 | 461 | ```php 462 | use Cerbero\JsonParser\Decoders\DecodedValue; 463 | 464 | $patches = ['1a' => 1, '2b' => 2]; 465 | $json = JsonParser::parse($source) 466 | ->patchDecodingError(fn (DecodedValue $decoded) => $patches[$decoded->json] ?? null); 467 | ``` 468 | 469 | Any exception thrown by this package implements the `JsonParserException` interface. This makes it easy to handle all exceptions in a single catch block: 470 | 471 | ```php 472 | use Cerbero\JsonParser\Exceptions\JsonParserException; 473 | 474 | try { 475 | JsonParser::parse($source)->traverse(); 476 | } catch (JsonParserException) { 477 | // handle any exception thrown by JSON Parser 478 | } 479 | ``` 480 | 481 | For reference, here is a comprehensive table of all the exceptions thrown by this package: 482 | |`Cerbero\JsonParser\Exceptions\`|thrown when| 483 | |---|---| 484 | |`DecodingException`|a value in the JSON can't be decoded| 485 | |`GuzzleRequiredException`|Guzzle is not installed and the JSON source is an endpoint| 486 | |`IntersectingPointersException`|two JSON pointers intersect| 487 | |`InvalidPointerException`|a JSON pointer syntax is not valid| 488 | |`SyntaxException`|the JSON structure is not valid| 489 | |`UnsupportedSourceException`|a JSON source is not supported| 490 | 491 | 492 | ### ⏳ Progress 493 | 494 | When processing large JSONs, it can be helpful to track the parsing progress. JSON Parser provides convenient methods for accessing all the progress details: 495 | 496 | ```php 497 | $json = new JsonParser($source); 498 | 499 | $json->progress(); // 500 | $json->progress()->current(); // the already parsed bytes e.g. 86759341 501 | $json->progress()->total(); // the total bytes to parse e.g. 182332642 502 | $json->progress()->fraction(); // the completed fraction e.g. 0.47583 503 | $json->progress()->percentage(); // the completed percentage e.g. 47.583 504 | $json->progress()->format(); // the formatted progress e.g. 47.5% 505 | ``` 506 | 507 | The total size of a JSON is calculated differently depending on the [source](#-sources). In some cases, it may not be possible to determine the size of a JSON and only the current progress is known: 508 | 509 | ```php 510 | $json->progress()->current(); // 86759341 511 | $json->progress()->total(); // null 512 | $json->progress()->fraction(); // null 513 | $json->progress()->percentage(); // null 514 | $json->progress()->format(); // null 515 | ``` 516 | 517 | 518 | ### 🛠 Settings 519 | 520 | JSON Parser also provides other settings to fine-tune the parsing process. For example we can set the number of bytes to read when parsing JSON strings or streams: 521 | 522 | ```php 523 | $json = JsonParser::parse($source)->bytes(1024 * 16); // read JSON chunks of 16KB 524 | ``` 525 | 526 | ## 📆 Change log 527 | 528 | Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. 529 | 530 | ## 🧪 Testing 531 | 532 | ``` bash 533 | composer test 534 | ``` 535 | 536 | ## 💞 Contributing 537 | 538 | Please see [CONTRIBUTING](CONTRIBUTING.md) and [CODE_OF_CONDUCT](CODE_OF_CONDUCT.md) for details. 539 | 540 | ## 🧯 Security 541 | 542 | If you discover any security related issues, please email andrea.marco.sartori@gmail.com instead of using the issue tracker. 543 | 544 | ## 🏅 Credits 545 | 546 | - [Andrea Marco Sartori][link-author] 547 | - [All Contributors][link-contributors] 548 | 549 | ## ⚖️ License 550 | 551 | The MIT License (MIT). Please see [License File](LICENSE.md) for more information. 552 | 553 | [ico-author]: https://img.shields.io/static/v1?label=author&message=cerbero90&color=50ABF1&logo=twitter&style=flat-square 554 | [ico-php]: https://img.shields.io/packagist/php-v/cerbero/json-parser?color=%234F5B93&logo=php&style=flat-square 555 | [ico-version]: https://img.shields.io/packagist/v/cerbero/json-parser.svg?label=version&style=flat-square 556 | [ico-actions]: https://img.shields.io/github/actions/workflow/status/cerbero90/json-parser/build.yml?branch=master&style=flat-square&logo=github 557 | [ico-license]: https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square 558 | [ico-psr7]: https://img.shields.io/static/v1?label=compliance&message=PSR-7&color=blue&style=flat-square 559 | [ico-psr12]: https://img.shields.io/static/v1?label=compliance&message=PSR-12&color=blue&style=flat-square 560 | [ico-scrutinizer]: https://img.shields.io/scrutinizer/coverage/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer 561 | [ico-code-quality]: https://img.shields.io/scrutinizer/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer 562 | [ico-phpstan]: https://img.shields.io/badge/level-max-success?style=flat-square&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAYAAABzenr0AAAGb0lEQVR42u1Xe1BUZRS/y4Kg8oiR3FCCBUySESZBRCiaBnmEsOzeSzsg+KxYYO9dEEftNRqZjx40FRZkTpqmOz5S2LsXlEZBciatkQnHDGYaGdFy1EpGMHl/p/PdFlt2rk5O+J9n5nA/vtf5ned3lnlISpRhafBlLRLHCtJGVrB/ZBDsaw2lUqzReGAC46DstTYfnSCGUjaaDvgxACo6j3vUenNdImeRXqdnWV5az5rrnzeZznj8J+E5Ftsclhf3s4J4CS/oRx5Bvon8ZU65FGYQxAwcf85a7CeRz+C41THejueydCZ7AAK34nwv3kHP/oUKdOL4K7258fF7Cud427O48RQeGkIGJ77N8fZqlrcfRP4d/x90WQfHXLeBt9dTrSlwl3V65ynWLM1SEA2qbNQckbe4Xmww10Hmy3shid0CMcmlEJtSDsl5VZBdfAgMvI3uuR+moJqN6LaxmpsOBeLCDmTifCB92RcQmbAUJvtqALc5sQr8p86gYBCcFdBq9wOin7NQax6ewlB6rqLZHf23FP10y3lj6uJtEBg2HxiVCtzd3SEwMBCio6Nh9uzZ4O/vLwOZ4OUNM2NyIGPFrvuzBG//lRPs+VQ2k1ki+ePkd84bskz7YFpYgizEz88P8vPzYffu3dDS0gJNTU1QXV0NqampRK1WIwgfiE4qhOyig0rC+pCvK8QUoML7uJVHA5kcQUp3DSpqWjc3d/Dy8oKioiLo6uqCoaEhuHb1KvT09AAhBFpbW4lOpyMyyIBQSCmoUQLQzgniNvz+obB2HS2RwBgE6dOxCyJogmNkP2u1Wrhw4QJ03+iGrR9XEd3CTNBn6eCbo40wPDwMdXV1BF1DVG5qiEtboxSUP6J71+D3NwUAhLOIRQzm7lnnhYUv7QFv/yDZ/Lm5ubK2DVI9iZ8bR8JDtEB57lNzENQN6OjoIGlpabIVZsYaMTO+hrikRRA1JxmSX9hE7/sJtVyF38tKsUCVZxBhz9jI3wGT/QJlADzPAyXrnj0kInzGHQCRMyOg/ed2uHjxIuE4TgYQHq2DLJqumashY+lnsMC4GVC5do6XVuK9l+4SkN8y+GfYeVJn2g++U7QygPT0dBgYGIDvT58mnF5PQcjC83PzSF9fH7S1tZGEhAQZQOT8JaA317oIkM6jS8uVLSDzOQqg23Uh+MlkOf00Gg0cP34c+vv74URzM9n41gby/rvvkc7OThlATU3NCGYJUXt4QaLuTYwBcTSOBmj1RD7D4Tsix4ByOjZRF/zgupDEbgZ3j4ly/qekpND0o5aQ44HS4OAgsVqtI1gTZO01IbG0aP1bknnxCDUvArHi+B0lJSlzglTFYO2udF3Ql9TCrHn5oEIreHp6QlRUFJSUlJCqqipSWVlJ8vLyCGYIFS7HS3zGa87mv4lcjLwLlStlLTKYYUUAlvrlDGcW45wKxXX6aqHZNutM+1oQBHFTewAKkoH4+vqCj48PYAGS5yb5amjNoO+CU2SL53NKpDD0vxHHmOJir7L5xUvZgm0us2R142ScOIyVqYvlpWU4XoHIP8DXL2b+wjdWeXh6U2FjmIIKmbWAYPFRMus62h/geIvjOQYlpuDysQrLL6Ger49HgW8jqvXUhI7UvDb9iaSTDqHtyItiF5Suw5ewF/Nd8VJ6zlhsn06bEhwX4NyfCvuGEeRpTmh4mkG68yDpyuzB9EUcjU5awbAgncPlAeSdAQER0zCndzqVbeXC4qDsMpvGEYBXRnsDx4N3Auf1FCTjTIaVtY/QTmd0I8bBVm1kejEubUfO01vqImn3c49X7qpeqI9inIgtbpxK3YrKfIJCt+OeV2nfUVFR4ca4EkVENyA7gkYcMfB1R5MMmxZ7ez/2KF5SSN1yV+158UPsJT0ZBcI2bRLtIXGoYu5FerOUiJe1OfsL3XEWH43l2KS+iJF9+S4FpcNgsc+j8cT8H4o1bfPg/qkLt50uJ1RzdMsGg0UqwfEN114Pwb1CtWTGg+Y9U5ClK9x7xUWI7BI5VQVp0AVcQ3bZkQhmnEgdHhKyNSZe16crtBIlc7sIb6cRLft2PCgoKGjijBDtjrAQ7a3EdMsxzIRflAFIhPb6mHYmYwX+WBlPQgskhgVryyJCQyNyBLsBQdQ6fgsQhyt6MSOOsWZ7gbH8wETmgRKAijatNL8Ngm0xx4tLcsps0Wzx4al0jXlI40B/A3pa144MDtSgAAAAAElFTkSuQmCC 563 | [ico-downloads]: https://img.shields.io/packagist/dt/cerbero/json-parser.svg?style=flat-square 564 | 565 | [link-author]: https://twitter.com/cerbero90 566 | [link-php]: https://www.php.net 567 | [link-packagist]: https://packagist.org/packages/cerbero/json-parser 568 | [link-actions]: https://github.com/cerbero90/json-parser/actions?query=workflow%3Abuild 569 | [link-psr7]: https://www.php-fig.org/psr/psr-7/ 570 | [link-psr12]: https://www.php-fig.org/psr/psr-12/ 571 | [link-scrutinizer]: https://scrutinizer-ci.com/g/cerbero90/json-parser/code-structure 572 | [link-code-quality]: https://scrutinizer-ci.com/g/cerbero90/json-parser 573 | [link-phpstan]: https://phpstan.org/ 574 | [link-downloads]: https://packagist.org/packages/cerbero/json-parser 575 | [link-contributors]: ../../contributors 576 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cerbero/json-parser", 3 | "type": "library", 4 | "description": "Zero-dependencies pull parser to read large JSON from any source in a memory-efficient way.", 5 | "keywords": [ 6 | "json", 7 | "parser", 8 | "json-parser", 9 | "lexer", 10 | "memory" 11 | ], 12 | "homepage": "https://github.com/cerbero90/json-parser", 13 | "license": "MIT", 14 | "authors": [{ 15 | "name": "Andrea Marco Sartori", 16 | "email": "andrea.marco.sartori@gmail.com", 17 | "homepage": "https://github.com/cerbero90", 18 | "role": "Developer" 19 | }], 20 | "require": { 21 | "php": "^8.1" 22 | }, 23 | "require-dev": { 24 | "guzzlehttp/guzzle": "^7.2", 25 | "illuminate/http": ">=6.20", 26 | "mockery/mockery": "^1.5", 27 | "pestphp/pest": "^2.0", 28 | "phpstan/phpstan": "^1.9", 29 | "scrutinizer/ocular": "^1.8", 30 | "squizlabs/php_codesniffer": "^3.0" 31 | }, 32 | "suggest": { 33 | "guzzlehttp/guzzle": "Required to load JSON from endpoints (^7.2)." 34 | }, 35 | "autoload": { 36 | "psr-4": { 37 | "Cerbero\\JsonParser\\": "src" 38 | }, 39 | "files": [ 40 | "helpers.php" 41 | ] 42 | }, 43 | "autoload-dev": { 44 | "psr-4": { 45 | "Cerbero\\JsonParser\\": "tests" 46 | } 47 | }, 48 | "scripts": { 49 | "test": "pest", 50 | "static": "phpstan analyze", 51 | "check-style": "phpcs --standard=PSR12 src", 52 | "fix-style": "phpcbf --standard=PSR12 src" 53 | }, 54 | "extra": { 55 | "branch-alias": { 56 | "dev-master": "1.0-dev" 57 | } 58 | }, 59 | "config": { 60 | "sort-packages": true, 61 | "allow-plugins": { 62 | "pestphp/pest-plugin": true 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /helpers.php: -------------------------------------------------------------------------------- 1 | guzzleIsInstalled()) { 26 | throw new GuzzleRequiredException(); 27 | } 28 | } 29 | 30 | /** 31 | * Determine whether Guzzle is installed 32 | * 33 | * @return bool 34 | */ 35 | protected function guzzleIsInstalled(): bool 36 | { 37 | return class_exists(Client::class); 38 | } 39 | 40 | /** 41 | * Retrieve the JSON response of the given URL 42 | * 43 | * @param UriInterface|string $url 44 | * @return ResponseInterface 45 | */ 46 | protected function getJson(UriInterface|string $url): ResponseInterface 47 | { 48 | return $this->guzzle()->get($url, [ 49 | 'headers' => [ 50 | 'Accept' => 'application/json', 51 | 'Content-Type' => 'application/json', 52 | ], 53 | ]); 54 | } 55 | 56 | /** 57 | * Retrieve the Guzzle client 58 | * 59 | * @codeCoverageIgnore 60 | * @return Client 61 | */ 62 | protected function guzzle(): Client 63 | { 64 | return new Client(); 65 | } 66 | 67 | /** 68 | * Retrieve the JSON response of the given request 69 | * 70 | * @param RequestInterface $request 71 | * @return ResponseInterface 72 | */ 73 | protected function sendRequest(RequestInterface $request): ResponseInterface 74 | { 75 | return $this->guzzle()->sendRequest($request); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/Decoders/AbstractDecoder.php: -------------------------------------------------------------------------------- 1 | decodeJson($json); 32 | } catch (Throwable $e) { 33 | return DecodedValue::failed($e, $json); 34 | } 35 | 36 | return DecodedValue::succeeded($value); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/Decoders/ConfigurableDecoder.php: -------------------------------------------------------------------------------- 1 | config->decoder->decode($value); 36 | 37 | if (!$decoded->succeeded) { 38 | ($this->config->onDecodingError)($decoded); 39 | } 40 | 41 | return $decoded->value; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/Decoders/DecodedValue.php: -------------------------------------------------------------------------------- 1 | getMessage(), $e->getCode(), $e, $json); 34 | } 35 | 36 | /** 37 | * Instantiate the class. 38 | * 39 | * @param mixed $value 40 | */ 41 | private function __construct( 42 | public readonly bool $succeeded, 43 | public mixed $value = null, 44 | public readonly ?string $error = null, 45 | public readonly ?int $code = null, 46 | public readonly ?Throwable $exception = null, 47 | public readonly ?string $json = null, 48 | ) { 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/Decoders/Decoder.php: -------------------------------------------------------------------------------- 1 | $depth 16 | */ 17 | public function __construct(private readonly bool $decodesToArray = true, private readonly int $depth = 512) 18 | { 19 | } 20 | 21 | /** 22 | * Retrieve the decoded value of the given JSON 23 | * 24 | * @param string $json 25 | * @return mixed 26 | * @throws \Throwable 27 | */ 28 | protected function decodeJson(string $json): mixed 29 | { 30 | return json_decode($json, $this->decodesToArray, $this->depth, JSON_THROW_ON_ERROR); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Decoders/SimdjsonDecoder.php: -------------------------------------------------------------------------------- 1 | decodesToArray, $this->depth); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Exceptions/DecodingException.php: -------------------------------------------------------------------------------- 1 | error, (int) $decoded->code); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/Exceptions/GuzzleRequiredException.php: -------------------------------------------------------------------------------- 1 | position = $position; 39 | $this->message .= " at position {$position}"; 40 | 41 | return $this; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/Exceptions/UnsupportedSourceException.php: -------------------------------------------------------------------------------- 1 | 22 | */ 23 | final class JsonParser implements IteratorAggregate 24 | { 25 | /** 26 | * The configuration. 27 | * 28 | * @var Config 29 | */ 30 | private readonly Config $config; 31 | 32 | /** 33 | * The lexer. 34 | * 35 | * @var Lexer 36 | */ 37 | private readonly Lexer $lexer; 38 | 39 | /** 40 | * The parser. 41 | * 42 | * @var Parser 43 | */ 44 | private readonly Parser $parser; 45 | 46 | /** 47 | * Instantiate the class statically 48 | * 49 | * @param mixed $source 50 | * @return self 51 | */ 52 | public static function parse(mixed $source): self 53 | { 54 | return new self($source); 55 | } 56 | 57 | /** 58 | * Instantiate the class. 59 | * 60 | * @param mixed $source 61 | */ 62 | public function __construct(mixed $source) 63 | { 64 | $this->config = new Config(); 65 | $this->lexer = new Lexer(new AnySource($source, $this->config)); 66 | $this->parser = new Parser($this->lexer->getIterator(), $this->config); 67 | } 68 | 69 | /** 70 | * Retrieve the lazily iterable JSON 71 | * 72 | * @return Traversable 73 | */ 74 | public function getIterator(): Traversable 75 | { 76 | try { 77 | yield from $this->parser; 78 | } catch (SyntaxException $e) { 79 | $e->setPosition($this->lexer->position()); 80 | ($this->config->onSyntaxError)($e); 81 | } 82 | } 83 | 84 | /** 85 | * Set the JSON pointers 86 | * 87 | * @param string[]|array $pointers 88 | * @return self 89 | */ 90 | public function pointers(array $pointers): self 91 | { 92 | foreach ($pointers as $pointer => $callback) { 93 | $callback instanceof Closure ? $this->pointer($pointer, $callback) : $this->pointer($callback); 94 | } 95 | 96 | return $this; 97 | } 98 | 99 | /** 100 | * Set a JSON pointer 101 | * 102 | * @param string $pointer 103 | * @param ?Closure $callback 104 | * @return self 105 | */ 106 | public function pointer(string $pointer, ?Closure $callback = null): self 107 | { 108 | $this->config->pointers->add(new Pointer($pointer, false, $callback)); 109 | 110 | return $this; 111 | } 112 | 113 | /** 114 | * Set the lazy JSON pointers 115 | * 116 | * @param string[]|array $pointers 117 | * @return self 118 | */ 119 | public function lazyPointers(array $pointers): self 120 | { 121 | foreach ($pointers as $pointer => $callback) { 122 | $callback instanceof Closure ? $this->lazyPointer($pointer, $callback) : $this->lazyPointer($callback); 123 | } 124 | 125 | return $this; 126 | } 127 | 128 | /** 129 | * Set a lazy JSON pointer 130 | * 131 | * @param string $pointer 132 | * @param ?Closure $callback 133 | * @return self 134 | */ 135 | public function lazyPointer(string $pointer, ?Closure $callback = null): self 136 | { 137 | $this->config->pointers->add(new Pointer($pointer, true, $callback)); 138 | 139 | return $this; 140 | } 141 | 142 | /** 143 | * Set a lazy JSON pointer for the whole JSON 144 | * 145 | * @return self 146 | */ 147 | public function lazy(): self 148 | { 149 | return $this->lazyPointer(''); 150 | } 151 | 152 | /** 153 | * Traverse the JSON one key and value at a time 154 | * 155 | * @param ?Closure $callback 156 | * @return void 157 | */ 158 | public function traverse(?Closure $callback = null): void 159 | { 160 | foreach ($this as $key => $value) { 161 | $callback && $callback($value, $key, $this); 162 | } 163 | } 164 | 165 | /** 166 | * Eager load the JSON into an array 167 | * 168 | * @return array 169 | */ 170 | public function toArray(): array 171 | { 172 | return $this->parser->toArray(); 173 | } 174 | 175 | /** 176 | * Set the JSON decoder 177 | * 178 | * @param Decoder $decoder 179 | * @return self 180 | */ 181 | public function decoder(Decoder $decoder): self 182 | { 183 | $this->config->decoder = $decoder; 184 | 185 | return $this; 186 | } 187 | 188 | /** 189 | * Retrieve the parsing progress 190 | * 191 | * @return Progress 192 | */ 193 | public function progress(): Progress 194 | { 195 | return $this->lexer->progress(); 196 | } 197 | 198 | /** 199 | * The number of bytes to read in each chunk 200 | * 201 | * @param int<1, max> $bytes 202 | * @return self 203 | */ 204 | public function bytes(int $bytes): self 205 | { 206 | $this->config->bytes = $bytes; 207 | 208 | return $this; 209 | } 210 | 211 | /** 212 | * Set the patch to apply during a decoding error 213 | * 214 | * @param mixed $patch 215 | * @return self 216 | */ 217 | public function patchDecodingError(mixed $patch = null): self 218 | { 219 | return $this->onDecodingError(function (DecodedValue $decoded) use ($patch) { 220 | $decoded->value = is_callable($patch) ? $patch($decoded) : $patch; 221 | }); 222 | } 223 | 224 | /** 225 | * Set the logic to run during a decoding error 226 | * 227 | * @param Closure $callback 228 | * @return self 229 | */ 230 | public function onDecodingError(Closure $callback): self 231 | { 232 | $this->config->onDecodingError = $callback; 233 | 234 | return $this; 235 | } 236 | 237 | /** 238 | * Set the logic to run during a syntax error 239 | * 240 | * @param Closure $callback 241 | * @return self 242 | */ 243 | public function onSyntaxError(Closure $callback): self 244 | { 245 | $this->config->onSyntaxError = $callback; 246 | 247 | return $this; 248 | } 249 | 250 | /** 251 | * Set the logic to run for wrapping the parser 252 | * 253 | * @param Closure $callback 254 | * @return self 255 | */ 256 | public function wrap(Closure $callback): self 257 | { 258 | $this->config->wrapper = $callback; 259 | 260 | return $this; 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /src/Pointers/Pointer.php: -------------------------------------------------------------------------------- 1 | referenceTokens = $this->toReferenceTokens(); 54 | $this->depth = count($this->referenceTokens); 55 | } 56 | 57 | /** 58 | * Turn the JSON pointer into reference tokens 59 | * 60 | * @return string[] 61 | */ 62 | private function toReferenceTokens(): array 63 | { 64 | if (preg_match('#^(?:/(?:(?:[^/~])|(?:~[01]))*)*$#', $this->pointer) === 0) { 65 | throw new InvalidPointerException($this->pointer); 66 | } 67 | 68 | $tokens = explode('/', $this->pointer); 69 | $referenceTokens = array_map(fn (string $token) => str_replace(['~1', '~0'], ['/', '~'], $token), $tokens); 70 | 71 | return array_slice($referenceTokens, 1); 72 | } 73 | 74 | /** 75 | * Call the pointer callback 76 | * 77 | * @param mixed $value 78 | * @param mixed $key 79 | * @return mixed 80 | */ 81 | public function call(mixed $value, mixed &$key): mixed 82 | { 83 | if ($this->callback === null) { 84 | return $value; 85 | } 86 | 87 | return ($this->callback)($value, $key) ?? $value; 88 | } 89 | 90 | /** 91 | * Determine whether the reference token at the given depth matches the provided key 92 | * 93 | * @param int $depth 94 | * @param string|int $key 95 | * @return bool 96 | */ 97 | public function depthMatchesKey(int $depth, string|int $key): bool 98 | { 99 | $referenceToken = $this->referenceTokens[$depth] ?? null; 100 | 101 | return $referenceToken === (string) $key 102 | || (is_int($key) && $referenceToken === '-'); 103 | } 104 | 105 | /** 106 | * Determine whether the pointer matches the given tree 107 | * 108 | * @param Tree $tree 109 | * @return bool 110 | */ 111 | public function matchesTree(Tree $tree): bool 112 | { 113 | return $this->referenceTokens == [] 114 | || $this->referenceTokens == $tree->original() 115 | || $this->referenceTokens == $tree->wildcarded(); 116 | } 117 | 118 | /** 119 | * Determine whether the pointer includes the given tree 120 | * 121 | * @param Tree $tree 122 | * @return bool 123 | */ 124 | public function includesTree(Tree $tree): bool 125 | { 126 | if ($this->pointer == '') { 127 | return true; 128 | } 129 | 130 | return is_int($firstNest = array_search('-', $this->referenceTokens)) 131 | && array_slice($this->referenceTokens, 0, $firstNest) === array_slice($tree->original(), 0, $firstNest); 132 | } 133 | 134 | /** 135 | * Retrieve the underlying JSON pointer 136 | * 137 | * @return string 138 | */ 139 | public function __toString(): string 140 | { 141 | return $this->pointer; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/Pointers/Pointers.php: -------------------------------------------------------------------------------- 1 | 34 | */ 35 | private array $found = []; 36 | 37 | /** 38 | * Add the given pointer 39 | * 40 | * @param Pointer $pointer 41 | */ 42 | public function add(Pointer $pointer): void 43 | { 44 | foreach ($this->pointers as $existingPointer) { 45 | if (str_starts_with($existingPointer, "$pointer/") || str_starts_with($pointer, "$existingPointer/")) { 46 | throw new IntersectingPointersException($existingPointer, $pointer); 47 | } 48 | } 49 | 50 | $this->pointers[] = $pointer; 51 | } 52 | 53 | /** 54 | * Retrieve the pointer matching the current tree 55 | * 56 | * @return Pointer 57 | */ 58 | public function matching(): Pointer 59 | { 60 | return $this->matching ??= $this->pointers[0] ?? new Pointer(''); 61 | } 62 | 63 | /** 64 | * Retrieve the pointer matching the given tree 65 | * 66 | * @param Tree $tree 67 | * @return Pointer 68 | */ 69 | public function matchTree(Tree $tree): Pointer 70 | { 71 | if (count($this->pointers) < 2) { 72 | return $this->matching; 73 | } 74 | 75 | $pointers = []; 76 | $originalTree = $tree->original(); 77 | 78 | foreach ($this->pointers as $pointer) { 79 | if ($pointer->referenceTokens == $originalTree) { 80 | return $this->matching = $pointer; 81 | } 82 | 83 | foreach ($originalTree as $depth => $key) { 84 | if (!$pointer->depthMatchesKey($depth, $key)) { 85 | continue 2; 86 | } elseif (!isset($pointers[$depth])) { 87 | $pointers[$depth] = $pointer; 88 | } 89 | } 90 | } 91 | 92 | return $this->matching = end($pointers) ?: $this->matching; 93 | } 94 | 95 | /** 96 | * Mark the given pointer as found 97 | * 98 | * @return Pointer 99 | */ 100 | public function markAsFound(): Pointer 101 | { 102 | if (!$this->matching->wasFound) { 103 | $this->found[(string) $this->matching] = $this->matching->wasFound = true; 104 | } 105 | 106 | return $this->matching; 107 | } 108 | 109 | /** 110 | * Determine whether all pointers were found in the given tree 111 | * 112 | * @param Tree $tree 113 | * @return bool 114 | */ 115 | public function wereFoundInTree(Tree $tree): bool 116 | { 117 | return count($this->pointers) == count($this->found) 118 | && !empty($this->pointers) 119 | && !$this->matching->includesTree($tree); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/Sources/AnySource.php: -------------------------------------------------------------------------------- 1 | [] 19 | */ 20 | protected array $supportedSources = [ 21 | CustomSource::class, 22 | Endpoint::class, 23 | Filename::class, 24 | IterableSource::class, 25 | Json::class, 26 | JsonResource::class, 27 | LaravelClientResponse::class, 28 | Psr7Message::class, 29 | Psr7Request::class, 30 | Psr7Stream::class, 31 | ]; 32 | 33 | /** 34 | * The matching source. 35 | * 36 | * @var Source|null 37 | */ 38 | protected ?Source $matchingSource; 39 | 40 | /** 41 | * Retrieve the JSON fragments 42 | * 43 | * @return Traversable 44 | * @throws UnsupportedSourceException 45 | */ 46 | public function getIterator(): Traversable 47 | { 48 | return $this->matchingSource(); 49 | } 50 | 51 | /** 52 | * Retrieve the matching source 53 | * 54 | * @return Source 55 | * @throws UnsupportedSourceException 56 | */ 57 | protected function matchingSource(): Source 58 | { 59 | if (isset($this->matchingSource)) { 60 | return $this->matchingSource; 61 | } 62 | 63 | foreach ($this->sources() as $source) { 64 | if ($source->matches()) { 65 | return $this->matchingSource = $source; 66 | } 67 | } 68 | 69 | throw new UnsupportedSourceException($this->source); 70 | } 71 | 72 | /** 73 | * Retrieve all available sources 74 | * 75 | * @return Generator 76 | */ 77 | protected function sources(): Generator 78 | { 79 | foreach ($this->supportedSources as $source) { 80 | yield new $source($this->source, $this->config); 81 | } 82 | } 83 | 84 | /** 85 | * Determine whether the JSON source can be handled 86 | * 87 | * @return bool 88 | */ 89 | public function matches(): bool 90 | { 91 | return true; 92 | } 93 | 94 | /** 95 | * Retrieve the calculated size of the JSON source 96 | * 97 | * @return int|null 98 | */ 99 | protected function calculateSize(): ?int 100 | { 101 | return $this->matchingSource()->size(); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/Sources/CustomSource.php: -------------------------------------------------------------------------------- 1 | 18 | */ 19 | public function getIterator(): Traversable 20 | { 21 | yield from $this->source; 22 | } 23 | 24 | /** 25 | * Determine whether the JSON source can be handled 26 | * 27 | * @return bool 28 | */ 29 | public function matches(): bool 30 | { 31 | return $this->source instanceof Source; 32 | } 33 | 34 | /** 35 | * Retrieve the calculated size of the JSON source 36 | * 37 | * @return int|null 38 | */ 39 | protected function calculateSize(): ?int 40 | { 41 | return $this->source->size(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/Sources/Endpoint.php: -------------------------------------------------------------------------------- 1 | 34 | * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException 35 | */ 36 | public function getIterator(): Traversable 37 | { 38 | return new Psr7Message($this->response(), $this->config); 39 | } 40 | 41 | /** 42 | * Retrieve the endpoint response 43 | * 44 | * @return ResponseInterface 45 | * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException 46 | */ 47 | protected function response(): ResponseInterface 48 | { 49 | $this->requireGuzzle(); 50 | 51 | return $this->response ??= $this->fetchResponse(); 52 | } 53 | 54 | /** 55 | * Retrieve the fetched HTTP response 56 | * 57 | * @return ResponseInterface 58 | */ 59 | protected function fetchResponse(): ResponseInterface 60 | { 61 | return $this->getJson($this->source); 62 | } 63 | 64 | /** 65 | * Determine whether the JSON source can be handled 66 | * 67 | * @return bool 68 | */ 69 | public function matches(): bool 70 | { 71 | /** @phpstan-ignore-next-line */ 72 | return (is_string($this->source) || $this->source instanceof UriInterface) && $this->isEndpoint($this->source); 73 | } 74 | 75 | /** 76 | * Retrieve the calculated size of the JSON source 77 | * 78 | * @return int|null 79 | * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException 80 | */ 81 | protected function calculateSize(): ?int 82 | { 83 | return $this->response()->getBody()->getSize(); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/Sources/Filename.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | public function getIterator(): Traversable 22 | { 23 | $handle = fopen($this->source, 'rb'); 24 | 25 | try { 26 | yield from new JsonResource($handle, $this->config); 27 | } finally { 28 | $handle && fclose($handle); 29 | } 30 | } 31 | 32 | /** 33 | * Determine whether the JSON source can be handled 34 | * 35 | * @return bool 36 | */ 37 | public function matches(): bool 38 | { 39 | return is_string($this->source) && is_file($this->source); 40 | } 41 | 42 | /** 43 | * Retrieve the calculated size of the JSON source 44 | * 45 | * @return int|null 46 | */ 47 | protected function calculateSize(): ?int 48 | { 49 | return filesize($this->source) ?: null; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/Sources/IterableSource.php: -------------------------------------------------------------------------------- 1 | 21 | */ 22 | public function getIterator(): Traversable 23 | { 24 | yield from $this->source; 25 | } 26 | 27 | /** 28 | * Determine whether the JSON source can be handled 29 | * 30 | * @return bool 31 | */ 32 | public function matches(): bool 33 | { 34 | return is_iterable($this->source) && !$this->source instanceof Source; 35 | } 36 | 37 | /** 38 | * Retrieve the calculated size of the JSON source 39 | * 40 | * @return int|null 41 | */ 42 | protected function calculateSize(): ?int 43 | { 44 | return is_array($this->source) ? count($this->source) : iterator_count(clone $this->source); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/Sources/Json.php: -------------------------------------------------------------------------------- 1 | 24 | */ 25 | public function getIterator(): Traversable 26 | { 27 | for ($i = 0; $i < $this->size(); $i += $this->config->bytes) { 28 | yield substr($this->source, $i, $this->config->bytes); 29 | } 30 | } 31 | 32 | /** 33 | * Determine whether the JSON source can be handled 34 | * 35 | * @return bool 36 | */ 37 | public function matches(): bool 38 | { 39 | return is_string($this->source) && !is_file($this->source) && !$this->isEndpoint($this->source); 40 | } 41 | 42 | /** 43 | * Retrieve the calculated size of the JSON source 44 | * 45 | * @return int|null 46 | */ 47 | protected function calculateSize(): ?int 48 | { 49 | return strlen($this->source); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/Sources/JsonResource.php: -------------------------------------------------------------------------------- 1 | 21 | */ 22 | public function getIterator(): Traversable 23 | { 24 | while (!feof($this->source)) { 25 | if (is_string($chunk = fread($this->source, $this->config->bytes))) { 26 | yield $chunk; 27 | } 28 | } 29 | } 30 | 31 | /** 32 | * Determine whether the JSON source can be handled 33 | * 34 | * @return bool 35 | */ 36 | public function matches(): bool 37 | { 38 | return is_resource($this->source); 39 | } 40 | 41 | /** 42 | * Retrieve the calculated size of the JSON source 43 | * 44 | * @return int|null 45 | */ 46 | protected function calculateSize(): ?int 47 | { 48 | $stats = fstat($this->source); 49 | $size = $stats['size'] ?? null; 50 | 51 | return $size ?: null; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/Sources/LaravelClientRequest.php: -------------------------------------------------------------------------------- 1 | sendRequest($this->source->toPsrRequest()); 23 | } 24 | 25 | /** 26 | * Determine whether the JSON source can be handled 27 | * 28 | * @return bool 29 | */ 30 | public function matches(): bool 31 | { 32 | return $this->source instanceof Request; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/Sources/LaravelClientResponse.php: -------------------------------------------------------------------------------- 1 | 19 | */ 20 | public function getIterator(): Traversable 21 | { 22 | return new Psr7Message($this->source->toPsrResponse(), $this->config); 23 | } 24 | 25 | /** 26 | * Determine whether the JSON source can be handled 27 | * 28 | * @return bool 29 | */ 30 | public function matches(): bool 31 | { 32 | return $this->source instanceof Response; 33 | } 34 | 35 | /** 36 | * Retrieve the calculated size of the JSON source 37 | * 38 | * @return int|null 39 | */ 40 | protected function calculateSize(): ?int 41 | { 42 | return $this->source->toPsrResponse()->getBody()->getSize(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/Sources/Psr7Message.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | public function getIterator(): Traversable 22 | { 23 | return new Psr7Stream($this->source->getBody(), $this->config); 24 | } 25 | 26 | /** 27 | * Determine whether the JSON source can be handled 28 | * 29 | * @return bool 30 | */ 31 | public function matches(): bool 32 | { 33 | return $this->source instanceof MessageInterface && !$this->source instanceof RequestInterface; 34 | } 35 | 36 | /** 37 | * Retrieve the calculated size of the JSON source 38 | * 39 | * @return int|null 40 | */ 41 | protected function calculateSize(): ?int 42 | { 43 | return $this->source->getBody()->getSize(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/Sources/Psr7Request.php: -------------------------------------------------------------------------------- 1 | sendRequest($this->source); 23 | } 24 | 25 | /** 26 | * Determine whether the JSON source can be handled 27 | * 28 | * @return bool 29 | */ 30 | public function matches(): bool 31 | { 32 | return $this->source instanceof RequestInterface; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/Sources/Psr7Stream.php: -------------------------------------------------------------------------------- 1 | 21 | */ 22 | public function getIterator(): Traversable 23 | { 24 | if (!in_array(StreamWrapper::NAME, stream_get_wrappers())) { 25 | stream_wrapper_register(StreamWrapper::NAME, StreamWrapper::class); 26 | } 27 | 28 | $stream = fopen(StreamWrapper::NAME . '://stream', 'rb', false, stream_context_create([ 29 | StreamWrapper::NAME => ['stream' => $this->source], 30 | ])); 31 | 32 | return new JsonResource($stream, $this->config); 33 | } 34 | 35 | /** 36 | * Determine whether the JSON source can be handled 37 | * 38 | * @return bool 39 | */ 40 | public function matches(): bool 41 | { 42 | return $this->source instanceof StreamInterface; 43 | } 44 | 45 | /** 46 | * Retrieve the calculated size of the JSON source 47 | * 48 | * @return int|null 49 | */ 50 | protected function calculateSize(): ?int 51 | { 52 | return $this->source->getSize(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/Sources/Source.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | abstract class Source implements IteratorAggregate 15 | { 16 | /** 17 | * The cached size of the JSON source. 18 | * 19 | * @var int|null 20 | */ 21 | protected ?int $size; 22 | 23 | /** 24 | * Whether the JSON size has already been calculated. 25 | * Avoid re-calculations when the size is NULL (not computable). 26 | * 27 | * @var bool 28 | */ 29 | protected bool $sizeWasSet = false; 30 | 31 | /** 32 | * Retrieve the JSON fragments 33 | * 34 | * @return Traversable 35 | */ 36 | abstract public function getIterator(): Traversable; 37 | 38 | /** 39 | * Determine whether the JSON source can be handled 40 | * 41 | * @return bool 42 | */ 43 | abstract public function matches(): bool; 44 | 45 | /** 46 | * Retrieve the calculated size of the JSON source 47 | * 48 | * @return int|null 49 | */ 50 | abstract protected function calculateSize(): ?int; 51 | 52 | /** 53 | * Instantiate the class. 54 | * 55 | * @param mixed $source 56 | * @param Config $config 57 | */ 58 | final public function __construct( 59 | protected readonly mixed $source, 60 | protected readonly Config $config = new Config(), 61 | ) { 62 | } 63 | 64 | /** 65 | * Retrieve the size of the JSON source and cache it 66 | * 67 | * @return int|null 68 | */ 69 | public function size(): ?int 70 | { 71 | if (!$this->sizeWasSet) { 72 | $this->size = $this->calculateSize(); 73 | $this->sizeWasSet = true; 74 | } 75 | 76 | return $this->size; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/Sources/StreamWrapper.php: -------------------------------------------------------------------------------- 1 | context); 50 | 51 | $this->stream = $options[self::NAME]['stream'] ?? null; 52 | 53 | return $this->stream instanceof StreamInterface && $this->stream->isReadable(); 54 | } 55 | 56 | /** 57 | * Determine whether the pointer is at the end of the stream 58 | * 59 | * @return bool 60 | */ 61 | public function stream_eof(): bool 62 | { 63 | return $this->stream->eof(); 64 | } 65 | 66 | /** 67 | * Read from the stream 68 | * 69 | * @param int $count 70 | * @return string 71 | */ 72 | public function stream_read(int $count): string 73 | { 74 | return $this->stream->read($count); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/Tokens/Colon.php: -------------------------------------------------------------------------------- 1 | expectedToken = Tokens::VALUE_ANY; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/Tokens/Comma.php: -------------------------------------------------------------------------------- 1 | expectsKey = $state->tree->inObject(); 22 | $state->expectedToken = $state->expectsKey ? Tokens::SCALAR_STRING : Tokens::VALUE_ANY; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/Tokens/CompoundBegin.php: -------------------------------------------------------------------------------- 1 | shouldLazyLoad = $this->shouldLazyLoad && $state->tree->depth() >= 0) { 29 | $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; 30 | return; 31 | } 32 | 33 | $state->expectsKey = $beginsObject = $this->value == '{'; 34 | $state->expectedToken = $beginsObject ? Tokens::AFTER_OBJECT_BEGIN : Tokens::AFTER_ARRAY_BEGIN; 35 | $state->tree->deepen($beginsObject); 36 | } 37 | 38 | /** 39 | * Set the token value 40 | * 41 | * @param string $value 42 | * @return static 43 | */ 44 | public function setValue(string $value): static 45 | { 46 | $this->shouldLazyLoad = false; 47 | 48 | return parent::setValue($value); 49 | } 50 | 51 | /** 52 | * Determine whether this token ends a JSON chunk 53 | * 54 | * @return bool 55 | */ 56 | public function endsChunk(): bool 57 | { 58 | return $this->shouldLazyLoad; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/Tokens/CompoundEnd.php: -------------------------------------------------------------------------------- 1 | tree->emerge(); 22 | 23 | $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; 24 | } 25 | 26 | /** 27 | * Determine whether this token ends a JSON chunk 28 | * 29 | * @return bool 30 | */ 31 | public function endsChunk(): bool 32 | { 33 | return true; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Tokens/Constant.php: -------------------------------------------------------------------------------- 1 | expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; 22 | } 23 | 24 | /** 25 | * Determine whether this token ends a JSON chunk 26 | * 27 | * @return bool 28 | */ 29 | public function endsChunk(): bool 30 | { 31 | return true; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/Tokens/Lexer.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | final class Lexer implements IteratorAggregate 22 | { 23 | /** 24 | * The parsing progress. 25 | * 26 | * @var Progress 27 | */ 28 | private readonly Progress $progress; 29 | 30 | /** 31 | * The current position. 32 | * 33 | * @var int 34 | */ 35 | private int $position = 0; 36 | 37 | /** 38 | * Instantiate the class. 39 | * 40 | * @param Source $source 41 | */ 42 | public function __construct(private readonly Source $source) 43 | { 44 | $this->progress = new Progress(); 45 | } 46 | 47 | /** 48 | * Retrieve the JSON fragments 49 | * 50 | * @return \Generator 51 | */ 52 | public function getIterator(): Traversable 53 | { 54 | $buffer = ''; 55 | $inString = $isEscaping = false; 56 | $tokenizer = Tokenizer::instance(); 57 | 58 | foreach ($this->source as $chunk) { 59 | for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->position++) { 60 | $character = $chunk[$i]; 61 | $inString = ($character == '"') != $inString || $isEscaping; 62 | $isEscaping = $character == '\\' && !$isEscaping; 63 | 64 | if ($inString || !isset(Tokens::BOUNDARIES[$character])) { 65 | $buffer == '' && !isset(Tokens::TYPES[$character]) && throw new SyntaxException($character); 66 | $buffer .= $character; 67 | continue; 68 | } 69 | 70 | if ($buffer != '') { 71 | yield $tokenizer->toToken($buffer); 72 | $buffer = ''; 73 | } 74 | 75 | if (isset(Tokens::DELIMITERS[$character])) { 76 | yield $tokenizer->toToken($character); 77 | } 78 | } 79 | } 80 | } 81 | 82 | /** 83 | * Retrieve the current position 84 | * 85 | * @return int 86 | */ 87 | public function position(): int 88 | { 89 | return $this->position; 90 | } 91 | 92 | /** 93 | * Retrieve the parsing progress 94 | * 95 | * @return Progress 96 | */ 97 | public function progress(): Progress 98 | { 99 | return $this->progress->setCurrent($this->position)->setTotal($this->source->size()); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/Tokens/Parser.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | final class Parser implements IteratorAggregate 22 | { 23 | /** 24 | * The decoder handling potential errors. 25 | * 26 | * @var ConfigurableDecoder 27 | */ 28 | private readonly ConfigurableDecoder $decoder; 29 | 30 | /** 31 | * Whether the parser is fast-forwarding. 32 | * 33 | * @var bool 34 | */ 35 | private bool $isFastForwarding = false; 36 | 37 | /** 38 | * Instantiate the class. 39 | * 40 | * @param Generator $tokens 41 | * @param Config $config 42 | */ 43 | public function __construct(private readonly Generator $tokens, private readonly Config $config) 44 | { 45 | $this->decoder = new ConfigurableDecoder($config); 46 | } 47 | 48 | /** 49 | * Retrieve the JSON fragments 50 | * 51 | * @return Traversable 52 | */ 53 | public function getIterator(): Traversable 54 | { 55 | $state = new State($this->config->pointers, fn () => new self($this->lazyLoad(), clone $this->config)); 56 | 57 | foreach ($this->tokens as $token) { 58 | if ($this->isFastForwarding) { 59 | continue; 60 | } elseif (!$token->matches($state->expectedToken)) { 61 | throw new SyntaxException($token); 62 | } 63 | 64 | $state->mutateByToken($token); 65 | 66 | if (!$token->endsChunk() || $state->tree->isDeep()) { 67 | continue; 68 | } 69 | 70 | if ($state->hasBuffer()) { 71 | /** @var string|int $key */ 72 | $key = $this->decoder->decode($state->tree->currentKey()); 73 | $value = $this->decoder->decode($state->value()); 74 | $wrapper = $value instanceof self ? ($this->config->wrapper)($value) : $value; 75 | 76 | yield $key => $state->callPointer($wrapper, $key); 77 | 78 | $value instanceof self && $value->fastForward(); 79 | } 80 | 81 | if ($state->canStopParsing()) { 82 | break; 83 | } 84 | } 85 | } 86 | 87 | /** 88 | * Retrieve the generator to lazy load the current compound 89 | * 90 | * @return Generator 91 | */ 92 | public function lazyLoad(): Generator 93 | { 94 | $depth = 0; 95 | 96 | do { 97 | yield $token = $this->tokens->current(); 98 | 99 | if ($token instanceof CompoundBegin) { 100 | $depth++; 101 | } elseif ($token instanceof CompoundEnd) { 102 | $depth--; 103 | } 104 | 105 | $depth > 0 && $this->tokens->next(); 106 | } while ($depth > 0); 107 | } 108 | 109 | /** 110 | * Eager load the current compound into an array 111 | * 112 | * @return array 113 | */ 114 | public function toArray(): array 115 | { 116 | $index = 0; 117 | $array = []; 118 | $hasWildcards = false; 119 | 120 | foreach ($this as $key => $value) { 121 | if (isset($array[$index][$key])) { 122 | $index++; 123 | $hasWildcards = true; 124 | } 125 | 126 | $turnsIntoArray = is_object($value) && method_exists($value, 'toArray'); 127 | $array[$index][$key] = $turnsIntoArray ? $value->toArray() : $value; 128 | } 129 | 130 | return $hasWildcards || empty($array) ? $array : $array[0]; 131 | } 132 | 133 | /** 134 | * Fast-forward the parser 135 | * 136 | * @return void 137 | */ 138 | public function fastForward(): void 139 | { 140 | if (!$this->tokens->valid()) { 141 | return; 142 | } 143 | 144 | $this->isFastForwarding = true; 145 | 146 | foreach ($this as $value) { 147 | $value instanceof self && $value->fastForward(); // @codeCoverageIgnore 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /src/Tokens/ScalarString.php: -------------------------------------------------------------------------------- 1 | isKey = $state->expectsKey) { 29 | $state->expectsKey = false; 30 | $state->expectedToken = Tokens::COLON; 31 | return; 32 | } 33 | 34 | $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; 35 | } 36 | 37 | /** 38 | * Determine whether this token ends a JSON chunk 39 | * 40 | * @return bool 41 | */ 42 | public function endsChunk(): bool 43 | { 44 | return !$this->isKey; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/Tokens/Token.php: -------------------------------------------------------------------------------- 1 | value[0]] & $type) != 0; 38 | } 39 | 40 | /** 41 | * Set the token value 42 | * 43 | * @param string $value 44 | * @return static 45 | */ 46 | public function setValue(string $value): static 47 | { 48 | $this->value = $value; 49 | 50 | return $this; 51 | } 52 | 53 | /** 54 | * Determine whether the token is a value 55 | * 56 | * @return bool 57 | */ 58 | public function isValue(): bool 59 | { 60 | return (Tokens::TYPES[$this->value[0]] | Tokens::VALUE_ANY) == Tokens::VALUE_ANY; 61 | } 62 | 63 | /** 64 | * Determine whether this token ends a JSON chunk 65 | * 66 | * @return bool 67 | */ 68 | public function endsChunk(): bool 69 | { 70 | return false; 71 | } 72 | 73 | /** 74 | * Retrieve the underlying token value 75 | * 76 | * @return string 77 | */ 78 | public function __toString(): string 79 | { 80 | return $this->value; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/Tokens/Tokenizer.php: -------------------------------------------------------------------------------- 1 | 22 | */ 23 | private array $tokensMap = []; 24 | 25 | /** 26 | * Retrieve the singleton instance 27 | * 28 | * @return self 29 | */ 30 | public static function instance(): self 31 | { 32 | return self::$instance ??= new self(); 33 | } 34 | 35 | /** 36 | * Instantiate the class. 37 | * 38 | */ 39 | private function __construct() 40 | { 41 | $this->setTokensMap(); 42 | } 43 | 44 | /** 45 | * Set the tokens map 46 | * 47 | * @return void 48 | */ 49 | private function setTokensMap(): void 50 | { 51 | $instances = []; 52 | 53 | foreach (Tokens::MAP as $type => $class) { 54 | $this->tokensMap[$type] = $instances[$class] ??= new $class(); 55 | } 56 | } 57 | 58 | /** 59 | * Turn the given value into a token 60 | * 61 | * @param string $value 62 | * @return Token 63 | */ 64 | public function toToken(string $value): Token 65 | { 66 | $type = Tokens::TYPES[$value[0]]; 67 | 68 | return $this->tokensMap[$type]->setValue($value); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/Tokens/Tokens.php: -------------------------------------------------------------------------------- 1 | 39 | */ 40 | public const TYPES = [ 41 | 'n' => self::SCALAR_CONST, 42 | 't' => self::SCALAR_CONST, 43 | 'f' => self::SCALAR_CONST, 44 | '-' => self::SCALAR_CONST, 45 | '0' => self::SCALAR_CONST, 46 | '1' => self::SCALAR_CONST, 47 | '2' => self::SCALAR_CONST, 48 | '3' => self::SCALAR_CONST, 49 | '4' => self::SCALAR_CONST, 50 | '5' => self::SCALAR_CONST, 51 | '6' => self::SCALAR_CONST, 52 | '7' => self::SCALAR_CONST, 53 | '8' => self::SCALAR_CONST, 54 | '9' => self::SCALAR_CONST, 55 | '"' => self::SCALAR_STRING, 56 | '{' => self::OBJECT_BEGIN, 57 | '}' => self::OBJECT_END, 58 | '[' => self::ARRAY_BEGIN, 59 | ']' => self::ARRAY_END, 60 | ',' => self::COMMA, 61 | ':' => self::COLON, 62 | ]; 63 | 64 | /** 65 | * The token boundaries. 66 | * 67 | * @var array 68 | */ 69 | public const BOUNDARIES = [ 70 | '{' => true, 71 | '}' => true, 72 | '[' => true, 73 | ']' => true, 74 | ',' => true, 75 | ':' => true, 76 | ' ' => true, 77 | "\n" => true, 78 | "\r" => true, 79 | "\t" => true, 80 | "\xEF" => true, 81 | "\xBB" => true, 82 | "\xBF" => true, 83 | ]; 84 | 85 | /** 86 | * The structural boundaries. 87 | * 88 | * @var array 89 | */ 90 | public const DELIMITERS = [ 91 | '{' => true, 92 | '}' => true, 93 | '[' => true, 94 | ']' => true, 95 | ',' => true, 96 | ':' => true, 97 | ]; 98 | 99 | /** 100 | * The tokens class map. 101 | * 102 | * @var array> 103 | */ 104 | public const MAP = [ 105 | self::COMMA => Comma::class, 106 | self::OBJECT_BEGIN => CompoundBegin::class, 107 | self::ARRAY_BEGIN => CompoundBegin::class, 108 | self::OBJECT_END => CompoundEnd::class, 109 | self::ARRAY_END => CompoundEnd::class, 110 | self::COLON => Colon::class, 111 | self::SCALAR_CONST => Constant::class, 112 | self::SCALAR_STRING => ScalarString::class, 113 | ]; 114 | } 115 | -------------------------------------------------------------------------------- /src/ValueObjects/Config.php: -------------------------------------------------------------------------------- 1 | 40 | */ 41 | public int $bytes = 1024 * 8; 42 | 43 | /** 44 | * The callback to run during a decoding error. 45 | * 46 | * @var Closure 47 | */ 48 | public Closure $onDecodingError; 49 | 50 | /** 51 | * The callback to run during a syntax error. 52 | * 53 | * @var Closure 54 | */ 55 | public Closure $onSyntaxError; 56 | 57 | /** 58 | * The callback to run for wrapping the parser. 59 | * 60 | * @var Closure 61 | */ 62 | public Closure $wrapper; 63 | 64 | /** 65 | * Instantiate the class 66 | * 67 | */ 68 | public function __construct() 69 | { 70 | $this->decoder = extension_loaded('simdjson') ? new SimdjsonDecoder() : new JsonDecoder(); 71 | $this->pointers = new Pointers(); 72 | $this->onDecodingError = fn (DecodedValue $decoded) => throw new DecodingException($decoded); 73 | $this->onSyntaxError = fn (SyntaxException $e) => throw $e; 74 | $this->wrapper = fn (Parser $parser) => $parser; 75 | } 76 | 77 | /** 78 | * Clone the configuration 79 | * 80 | * @return void 81 | */ 82 | public function __clone(): void 83 | { 84 | $this->pointers = new Pointers(); 85 | $this->pointers->add(new Pointer('', true)); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/ValueObjects/Progress.php: -------------------------------------------------------------------------------- 1 | current = $current; 36 | 37 | return $this; 38 | } 39 | 40 | /** 41 | * Retrieve the current progress 42 | * 43 | * @return int 44 | */ 45 | public function current(): int 46 | { 47 | return $this->current; 48 | } 49 | 50 | /** 51 | * Set the total possible progress 52 | * 53 | * @param int|null $total 54 | * @return self 55 | */ 56 | public function setTotal(?int $total): self 57 | { 58 | $this->total ??= $total; 59 | 60 | return $this; 61 | } 62 | 63 | /** 64 | * Retrieve the total possible progress 65 | * 66 | * @return int|null 67 | */ 68 | public function total(): ?int 69 | { 70 | return $this->total; 71 | } 72 | 73 | /** 74 | * Retrieve the formatted percentage of the progress 75 | * 76 | * @return string|null 77 | */ 78 | public function format(): ?string 79 | { 80 | return is_null($percentage = $this->percentage()) ? null : number_format($percentage, 1) . '%'; 81 | } 82 | 83 | /** 84 | * Retrieve the percentage of the progress 85 | * 86 | * @return float|null 87 | */ 88 | public function percentage(): ?float 89 | { 90 | return is_null($fraction = $this->fraction()) ? null : $fraction * 100; 91 | } 92 | 93 | /** 94 | * Retrieve the fraction of the progress 95 | * 96 | * @return float|null 97 | */ 98 | public function fraction(): ?float 99 | { 100 | return $this->total ? $this->current / $this->total : null; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/ValueObjects/State.php: -------------------------------------------------------------------------------- 1 | tree = new Tree($pointers); 55 | } 56 | 57 | /** 58 | * Determine whether the parser can stop parsing 59 | * 60 | * @return bool 61 | */ 62 | public function canStopParsing(): bool 63 | { 64 | return $this->pointers->wereFoundInTree($this->tree); 65 | } 66 | 67 | /** 68 | * Call the current pointer callback 69 | * 70 | * @param mixed $value 71 | * @param mixed $key 72 | * @return mixed 73 | */ 74 | public function callPointer(mixed $value, mixed &$key): mixed 75 | { 76 | return $this->pointers->matching()->call($value, $key); 77 | } 78 | 79 | /** 80 | * Mutate state depending on the given token 81 | * 82 | * @param Token $token 83 | * @return void 84 | */ 85 | public function mutateByToken(Token $token): void 86 | { 87 | $this->tree->traverseToken($token, $this->expectsKey); 88 | 89 | if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { 90 | $pointer = $this->pointers->markAsFound(); 91 | 92 | if ($token instanceof CompoundBegin && $pointer->isLazy) { 93 | $this->buffer = ($this->lazyLoad)(); 94 | $token->shouldLazyLoad = true; 95 | } else { 96 | /** @phpstan-ignore-next-line */ 97 | $this->buffer .= $token; 98 | } 99 | } 100 | 101 | $token->mutateState($this); 102 | } 103 | 104 | /** 105 | * Determine whether the buffer contains tokens 106 | * 107 | * @return bool 108 | */ 109 | public function hasBuffer(): bool 110 | { 111 | return $this->buffer != ''; 112 | } 113 | 114 | /** 115 | * Retrieve the value from the buffer and reset it 116 | * 117 | * @return Parser|string 118 | */ 119 | public function value(): Parser|string 120 | { 121 | $buffer = $this->buffer; 122 | 123 | $this->buffer = ''; 124 | 125 | return $buffer; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/ValueObjects/Tree.php: -------------------------------------------------------------------------------- 1 | 20 | */ 21 | private array $original = []; 22 | 23 | /** 24 | * The wildcarded JSON tree. 25 | * 26 | * @var array 27 | */ 28 | private array $wildcarded = []; 29 | 30 | /** 31 | * Whether a depth is within an object. 32 | * 33 | * @var array 34 | */ 35 | private array $inObjectByDepth = []; 36 | 37 | /** 38 | * The JSON tree depth. 39 | * 40 | * @var int 41 | */ 42 | private int $depth = -1; 43 | 44 | /** 45 | * Instantiate the class. 46 | * 47 | * @param Pointers $pointers 48 | */ 49 | public function __construct(private readonly Pointers $pointers) 50 | { 51 | } 52 | 53 | /** 54 | * Retrieve the original JSON tree 55 | * 56 | * @return array 57 | */ 58 | public function original(): array 59 | { 60 | return $this->original; 61 | } 62 | 63 | /** 64 | * Retrieve the wildcarded JSON tree 65 | * 66 | * @return array 67 | */ 68 | public function wildcarded(): array 69 | { 70 | return $this->wildcarded; 71 | } 72 | 73 | /** 74 | * Determine whether the current depth is within an object 75 | * 76 | * @return bool 77 | */ 78 | public function inObject(): bool 79 | { 80 | return $this->inObjectByDepth[$this->depth] ?? false; 81 | } 82 | 83 | /** 84 | * Retrieve the JSON tree depth 85 | * 86 | * @return int 87 | */ 88 | public function depth(): int 89 | { 90 | return $this->depth; 91 | } 92 | 93 | /** 94 | * Increase the tree depth by entering an object or an array 95 | * 96 | * @param bool $inObject 97 | * @return void 98 | */ 99 | public function deepen(bool $inObject): void 100 | { 101 | $this->depth++; 102 | $this->inObjectByDepth[$this->depth] = $inObject; 103 | } 104 | 105 | /** 106 | * Decrease the tree depth 107 | * 108 | * @return void 109 | */ 110 | public function emerge(): void 111 | { 112 | $this->depth--; 113 | } 114 | 115 | /** 116 | * Determine whether the tree is deep 117 | * 118 | * @return bool 119 | */ 120 | public function isDeep(): bool 121 | { 122 | $pointer = $this->pointers->matching(); 123 | 124 | return $pointer == '' ? $this->depth > 0 : $this->depth >= $pointer->depth; 125 | } 126 | 127 | /** 128 | * Traverse the given token 129 | * 130 | * @param Token $token 131 | * @param bool $expectsKey 132 | * @return void 133 | */ 134 | public function traverseToken(Token $token, bool $expectsKey): void 135 | { 136 | $pointer = $this->pointers->matching(); 137 | 138 | if ($pointer != '' && $this->depth >= $pointer->depth) { 139 | return; 140 | } elseif ($expectsKey) { 141 | $this->traverseKey($token); 142 | } elseif ($token->isValue() && !$this->inObject()) { 143 | $this->traverseArray(); 144 | } 145 | } 146 | 147 | /** 148 | * Determine whether the tree is matched by the JSON pointer 149 | * 150 | * @return bool 151 | */ 152 | public function isMatched(): bool 153 | { 154 | return $this->depth >= 0 && $this->pointers->matching()->matchesTree($this); 155 | } 156 | 157 | /** 158 | * Traverse the given object key 159 | * 160 | * @param string $key 161 | * @return void 162 | */ 163 | public function traverseKey(string $key): void 164 | { 165 | $trimmedKey = substr($key, 1, -1); 166 | 167 | $this->original[$this->depth] = $trimmedKey; 168 | $this->wildcarded[$this->depth] = $trimmedKey; 169 | 170 | if (count($this->original) > $offset = $this->depth + 1) { 171 | array_splice($this->original, $offset); 172 | array_splice($this->wildcarded, $offset); 173 | array_splice($this->inObjectByDepth, $offset); 174 | } 175 | 176 | $this->pointers->matchTree($this); 177 | } 178 | 179 | /** 180 | * Traverse an array 181 | * 182 | * @return void 183 | */ 184 | public function traverseArray(): void 185 | { 186 | $index = $this->original[$this->depth] ?? null; 187 | $this->original[$this->depth] = $index = is_int($index) ? $index + 1 : 0; 188 | 189 | if (count($this->original) > $offset = $this->depth + 1) { 190 | array_splice($this->original, $offset); 191 | array_splice($this->inObjectByDepth, $offset); 192 | } 193 | 194 | $referenceTokens = $this->pointers->matchTree($this)->referenceTokens; 195 | $this->wildcarded[$this->depth] = ($referenceTokens[$this->depth] ?? null) == '-' ? '-' : $index; 196 | 197 | if (count($this->wildcarded) > $offset) { 198 | array_splice($this->wildcarded, $offset); 199 | } 200 | } 201 | 202 | /** 203 | * Retrieve the current key 204 | * 205 | * @return string|int 206 | */ 207 | public function currentKey(): string|int 208 | { 209 | $key = $this->original[$this->depth]; 210 | 211 | return is_string($key) ? "\"$key\"" : $key; 212 | } 213 | } 214 | --------------------------------------------------------------------------------