├── Makefile
├── streamingjson
    ├── __init__.py
    ├── lexer_helper.py
    ├── lexer_tokens.py
    └── lexer.py
├── examples
    ├── simple_json_stream
    │   └── main.py
    └── gpt_function_call
    │   └── main.py
├── pyproject.toml
├── .github
    └── workflows
    │   └── coverage.yaml
├── .gitignore
├── LICENSE
├── tox.ini
├── tests
    ├── test_lexer_helper.py
    └── test_lexer.py
└── README.md


/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all manual-build manual-upload-to-testpypi manual-upload-to-pypi test
 2 | 
 3 | all: manual-build
 4 | 
 5 | manual-build:
 6 | 	@python -m build
 7 | 
 8 | manual-upload-to-testpypi:
 9 | 	@python -m twine upload --repository testpypi dist/streamingjson-$(VERSION)*
10 | 
11 | manual-upload-to-pypi:
12 | 	@python -m twine upload --repository pypi dist/streamingjson-$(VERSION)*
13 | 
14 | test:
15 | 	@python -m tox
16 | 


--------------------------------------------------------------------------------
/streamingjson/__init__.py:
--------------------------------------------------------------------------------
 1 | from .lexer import Lexer
 2 | 
 3 | __version__ = "0.0.4"
 4 | 
 5 | __title__ = "streamingjson"
 6 | __description__ = (
 7 |     "A streamlined, user-friendly JSON streaming preprocessor, crafted in Python."
 8 | )
 9 | __url__ = "https://github.com/karminski/streaming-json-py"
10 | __uri__ = __url__
11 | __doc__ = f"{__description__} <{__uri__}>"
12 | 
13 | __author__ = "Karminski"
14 | __email__ = "code.karminski@outlook.com"
15 | 
16 | __license__ = "MIT"
17 | __copyright__ = "Copyright 2024 Karminski"
18 | 
19 | __all__ = [
20 |     "Lexer",
21 | ]
22 | 


--------------------------------------------------------------------------------
/streamingjson/lexer_helper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | helper method for lexer
 3 | """
 4 | 
 5 | 
 6 | def is_ignore_token(c):
 7 |     """
 8 |     check if target character is ignore token
 9 |     """
10 |     return c in "\t\n\v\f\r "
11 | 
12 | 
13 | def match_stack(stack, tokens):
14 |     """
15 |     check if target stack match given tokens
16 |     """
17 |     pointer = len(stack)
18 |     tokens_left = len(tokens)
19 | 
20 |     while True:
21 |         tokens_left -= 1
22 |         pointer -= 1
23 |         if tokens_left < 0:
24 |             break
25 |         if pointer < 0:
26 |             return False
27 |         if stack[pointer] != tokens[tokens_left]:
28 |             return False
29 |     return True
30 | 


--------------------------------------------------------------------------------
/examples/simple_json_stream/main.py:
--------------------------------------------------------------------------------
 1 | import streamingjson
 2 | 
 3 | 
 4 | def main():
 5 |     # Case A, complete the incomplete JSON object
 6 |     json_segment_a = '{"a":'  # will complete to `{"a":null}`
 7 |     lexer = streamingjson.Lexer()
 8 |     lexer.append_string(json_segment_a)
 9 |     completed_json = lexer.complete_json()
10 |     print(f"completedJSON: {completed_json}")
11 | 
12 |     # Case B, complete the incomplete JSON array
13 |     json_segment_b = "[t"  # will complete to `[true]`
14 |     lexer = streamingjson.Lexer()
15 |     lexer.append_string(json_segment_b)
16 |     completed_json = lexer.complete_json()
17 |     print(f"completedJSON: {completed_json}")
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     main()
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "streamingjson"
 7 | version = "0.0.5"
 8 | authors = [
 9 |   { name="Karminski", email="code.karminski@outlook.com" },
10 | ]
11 | description = "A streamlined, user-friendly JSON streaming preprocessor, crafted in Python."
12 | readme = "README.md"
13 | requires-python = ">=3.7"
14 | classifiers = [
15 |     "Programming Language :: Python :: 3",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Operating System :: OS Independent",
18 | ]
19 | 
20 | [project.urls]
21 | Homepage = "https://github.com/karminski/streaming-json-py"
22 | Issues = "https://github.com/karminski/streaming-json-py/issues"
23 | 


--------------------------------------------------------------------------------
/.github/workflows/coverage.yaml:
--------------------------------------------------------------------------------
 1 | name: Coverage
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   test:
 5 |     runs-on: ubuntu-latest
 6 |     strategy:
 7 |       matrix:
 8 |         python: ["3.9", "3.10", "3.11", "3.12"]
 9 |     steps:
10 |       - name: Checkout
11 |         uses: actions/checkout@v3
12 |       - name: Setup Python ${{ matrix.python }}
13 |         uses: actions/setup-python@v4
14 |         with:
15 |           python-version: ${{ matrix.python }}
16 |       - name: Install dependencies
17 |         run: pip install poetry tox tox-gh-actions codecov
18 |       - name: Run tox
19 |         run: tox
20 |       - name: Upload coverage reports to Codecov
21 |         uses: codecov/codecov-action@v4.0.1
22 |         with:
23 |           token: ${{ secrets.CODECOV_TOKEN }}
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by https://www.gitignore.io
 2 | 
 3 | ### Python ###
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | 
29 | # PyInstaller
30 | #  Usually these files are written by a python script from a template
31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | .pytest_cache
62 | .mypy_cache
63 | pip-wheel-metadata/
64 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | 
 4 | Copyright (c) 2024 Karminski
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | addopts = -ra
 3 | testpaths = tests
 4 | filterwarnings =
 5 |     once::Warning
 6 |     ignore:::pympler[.*]
 7 | 
 8 | 
 9 | [gh-actions]
10 | python =
11 |     3.7: py37, docs
12 |     3.8: py38, typing
13 |     3.9: py39
14 |     3.10: py310
15 |     3.11: py311
16 |     pypy-3.8: pypy3
17 |     pypy-3.9: pypy3
18 | 
19 | 
20 | [tox]
21 | envlist =
22 |     lint
23 |     typing
24 |     py{37,38,39,310,311,py3}
25 |     docs
26 |     pypi-description
27 |     coverage-report
28 | isolated_build = True
29 | 
30 | 
31 | [testenv]
32 | # Prevent random setuptools/pip breakages like
33 | # https://github.com/pypa/setuptools/issues/1042 from breaking our builds.
34 | setenv =
35 |     VIRTUALENV_NO_DOWNLOAD=1
36 | deps = 
37 |     coverage
38 |     pytest
39 | extras =
40 |     tests
41 | commands = {envpython} -b -m coverage run -m pytest {posargs}
42 | 
43 | 
44 | [testenv:docs]
45 | basepython = python3.9
46 | extras = docs
47 | commands =
48 |     sphinx-build -n -T -W -b html -d {envtmpdir}/doctrees docs docs/_build/html
49 |     sphinx-build -n -T -W -b doctest -d {envtmpdir}/doctrees docs docs/_build/html
50 |     python -m doctest README.rst
51 | 
52 | 
53 | [testenv:lint]
54 | basepython = python3.9
55 | extras = dev
56 | passenv = HOMEPATH  # needed on Windows
57 | commands = pre-commit run --all-files
58 | 
59 | 
60 | [testenv:pypi-description]
61 | basepython = python3.9
62 | skip_install = true
63 | deps =
64 |     twine
65 |     pip >= 18.0.0
66 | commands =
67 |     pip wheel -w {envtmpdir}/build --no-deps .
68 |     twine check {envtmpdir}/build/*
69 | 
70 | 
71 | [testenv:coverage-report]
72 | basepython = python3.9
73 | skip_install = true
74 | deps = coverage[toml]==5.0.4
75 | commands =
76 |     coverage combine
77 |     coverage report
78 | 


--------------------------------------------------------------------------------
/tests/test_lexer_helper.py:
--------------------------------------------------------------------------------
 1 | """
 2 | test cases for lexer_helper
 3 | """
 4 | 
 5 | from streamingjson import lexer_helper
 6 | from streamingjson import lexer_tokens
 7 | 
 8 | 
 9 | class TestMatchStack:
10 |     """
11 |     test cases for match_stack method
12 |     """
13 | 
14 |     def test_match_stack_0(self):
15 |         """
16 |         simple match test
17 |         """
18 |         stack = [lexer_tokens.TOKEN_LEFT_BRACE]
19 |         tokens = [lexer_tokens.TOKEN_LEFT_BRACE]
20 |         match_result = lexer_helper.match_stack(stack, tokens)
21 |         assert match_result is True
22 | 
23 |     def test_match_stack_1(self):
24 |         """
25 |         match full stack test
26 |         """
27 |         stack = [
28 |             lexer_tokens.TOKEN_RIGHT_BRACE,
29 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
30 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
31 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
32 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
33 |             lexer_tokens.TOKEN_COLON,
34 |         ]
35 |         tokens = [
36 |             lexer_tokens.TOKEN_RIGHT_BRACE,
37 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
38 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
39 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
40 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
41 |             lexer_tokens.TOKEN_COLON,
42 |         ]
43 |         match_result = lexer_helper.match_stack(stack, tokens)
44 |         assert match_result is True
45 | 
46 |     def test_match_stack_2(self):
47 |         """
48 |         match multi element in stack test
49 |         """
50 |         stack = [
51 |             lexer_tokens.TOKEN_LEFT_BRACE,
52 |             lexer_tokens.TOKEN_QUOTE,
53 |             lexer_tokens.TOKEN_QUOTE,
54 |             lexer_tokens.TOKEN_COLON,
55 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
56 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
57 |         ]
58 |         tokens = [
59 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
60 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
61 |         ]
62 |         match_result = lexer_helper.match_stack(stack, tokens)
63 |         assert match_result is True
64 | 


--------------------------------------------------------------------------------
/examples/gpt_function_call/main.py:
--------------------------------------------------------------------------------
 1 | import streamingjson
 2 | 
 3 | 
 4 | # In GPT's chat completion stream mode, the request for tool_calls returns a structure as follows:
 5 | #
 6 | # {
 7 | #     "id": "chatcmpl-?",
 8 | #     "object": "chat.completion.chunk",
 9 | #     "created": 1712000001,
10 | #     "model": "gpt-4-0125-preview",
11 | #     "system_fingerprint": "fp_?",
12 | #     "choices": [
13 | #         {
14 | #             "index": 0,
15 | #             "delta": {
16 | #                 "tool_calls": [
17 | #                     {
18 | #                         "index": 0,
19 | #                         "function": {
20 | #                             "arguments": "{\"fi"
21 | #                         }
22 | #                     }
23 | #                 ]
24 | #             },
25 | #             "logprobs": null,
26 | #             "finish_reason": null
27 | #         }
28 | #     ]
29 | # }
30 | #
31 | # We need extract data.choices[0].delta.tool_calls[0].function.arguments.
32 | # The arguments fiels is a JSON fragment, we can use steaming-json-go complete it to a syntactically correct JSON and Unmarshal it.
33 | 
34 | 
35 | def main():
36 |     # We use string slice to simulate the arguments field in the return of GPT.
37 |     arguments = [
38 |         '{"fu',
39 |         "nction",
40 |         "_name",
41 |         '"',
42 |         ":",
43 |         '"run',
44 |         "_code",
45 |         '", ',
46 |         '"argu',
47 |         'ments"',
48 |         ": ",
49 |         '"print(',
50 |         '\\"hello',
51 |         " world",
52 |         '\\"',
53 |         ')"',
54 |     ]
55 |     lexer = streamingjson.Lexer()
56 | 
57 |     for json_fragment in arguments:
58 |         try:
59 |             lexer.append_string(json_fragment)
60 |             print(lexer.complete_json())
61 |         except ValueError as e:
62 |             print(f"invalied json string appended: {e}")
63 | 
64 | 
65 | # will print:
66 | # {"fu":null}
67 | # {"function":null}
68 | # {"function_name":null}
69 | # {"function_name":null}
70 | # {"function_name":null}
71 | # {"function_name":"run"}
72 | # {"function_name":"run_code"}
73 | # {"function_name":"run_code"}
74 | # {"function_name":"run_code", "argu":null}
75 | # {"function_name":"run_code", "arguments":null}
76 | # {"function_name":"run_code", "arguments":null}
77 | # {"function_name":"run_code", "arguments": "print("}
78 | # {"function_name":"run_code", "arguments": "print(\"hello"}
79 | # {"function_name":"run_code", "arguments": "print(\"hello world"}
80 | # {"function_name":"run_code", "arguments": "print(\"hello world\""}
81 | # {"function_name":"run_code", "arguments": "print(\"hello world\")"}
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     main()
86 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # streaming-json-py
 2 | 
 3 | [![codecov](https://codecov.io/gh/karminski/streaming-json-py/graph/badge.svg?token=1901RX87FK)](https://codecov.io/gh/karminski/streaming-json-py)
 4 | 
 5 | ```python
 6 | import streamingjson
 7 | ```
 8 | 
 9 | Welcome to **streaming-json-py**, a groundbreaking library designed to revolutionize the way we handle stream JSON parsing.  
10 | 
11 | In an era dominated by LLMs (Large Language Models), the ability to efficiently parse JSON streams is more critical than ever. Traditionally, JSON parsing libraries have fallen short, requiring JSON data to be fully generated before any parsing can begin. streaming-json-py challenges this limitation head-on.
12 | 
13 | ### Key Features
14 | 
15 | - **Real-Time JSON Parsing**: With streaming-json-py, you no longer need to wait for the entire JSON data to be generated. This library allows for the parsing of JSON as it is being streamed (this means JSON stream can stops at any position), significantly cutting down the time-to-first-token.
16 | - **Seamless Integration**: Designed to complement existing JSON parsing libraries, streaming-json-py preprocesses incomplete JSON strings, transforming them into valid, parseable JSON. This means you can continue using your preferred JSON library with our tool seamlessly.
17 | - **Enhanced User Experience**: By enabling real-time data processing, our library drastically reduces the wait time for end-users. Display JSON structures to users without the delay typically associated with complete JSON generation.
18 | 
19 | ### Example Usage
20 | 
21 | Basically, this library is used to complete fragmented JSON, making it into syntactically correct JSON. For example:
22 | 
23 | ```{"a":``` will complete to ```{"a":null}```
24 | 
25 | and When the JSON stream continues to output as:
26 | 
27 | ```{"a":[tr``` will complete to ```{"a":[true]}```
28 | 
29 | Do not worry about the JSON stream stopping anywhere, such as at a comma:
30 | 
31 | ```{"a":[true],``` will complete to ```{"a":[true]}```
32 | 
33 | Escaped characters? No problem:  
34 | 
35 | ```{"a":[true], "b": "this is unicode \u54"``` will complete to ```{"a":[true], "b": "this is unicode "}``` 
36 | 
37 | (After the stream outputs the complete Unicode, it will then display.)
38 | 
39 | 
40 | **Here’s a quick example to get you started:**
41 | 
42 | install from pypi:
43 | 
44 | ```bash
45 | pip install streamingjson
46 | ```
47 | 
48 | run example:
49 | 
50 | ```python
51 | # init, @NOTE: We need to assign a new lexer for each JSON stream.
52 | lexer = streamingjson.Lexer()
53 | 
54 | # append your JSON segment
55 | lexer.append_string('{"a":')
56 | 
57 | # complete the JSON
58 | print(lexer.complete_json()) # will print `{"a":null}`
59 | 
60 | # append more JSON segment
61 | lexer.append_string('[tr')
62 | 
63 | # complete the JSON again
64 | print(lexer.complete_json()) # will print `{"a":[true]}`
65 | ```
66 | 
67 | 
68 | For more examples please see: [examples](./examples/)
69 | 
70 | ### Try to Find This Library in Another Programming Language?
71 | 
72 | Please see:
73 | 
74 | - [streaming-json-go](https://github.com/karminski/streaming-json-go)
75 | - [streaming-json-py](https://github.com/karminski/streaming-json-py)
76 | - [streaming-json-js](https://github.com/karminski/streaming-json-js)
77 | 
78 | 
79 | ### License
80 | 
81 | This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
82 | 


--------------------------------------------------------------------------------
/streamingjson/lexer_tokens.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tokens for lexer
  3 | """
  4 | 
  5 | # Token constants
  6 | TOKEN_EOF = 0  # end-of-file
  7 | TOKEN_IGNORED = 1  # \t', '\n', '\v', '\f', '\r', ' '
  8 | TOKEN_LEFT_BRACKET = 2  # [
  9 | TOKEN_RIGHT_BRACKET = 3  # ]
 10 | TOKEN_LEFT_BRACE = 4  # {
 11 | TOKEN_RIGHT_BRACE = 5  # }
 12 | TOKEN_COLON = 6  # :
 13 | TOKEN_DOT = 7  # .
 14 | TOKEN_COMMA = 8  # ,
 15 | TOKEN_QUOTE = 9  # "
 16 | TOKEN_ESCAPE_CHARACTER = 10  # \
 17 | TOKEN_SLASH = 11  # /
 18 | TOKEN_NEGATIVE = 12  # -
 19 | TOKEN_NULL = 13  # null
 20 | TOKEN_TRUE = 14  # true
 21 | TOKEN_FALSE = 15  # false
 22 | TOKEN_ALPHABET_LOWERCASE_A = 16  # a
 23 | TOKEN_ALPHABET_LOWERCASE_B = 17  # b
 24 | TOKEN_ALPHABET_LOWERCASE_C = 18  # c
 25 | TOKEN_ALPHABET_LOWERCASE_D = 19  # d
 26 | TOKEN_ALPHABET_LOWERCASE_E = 20  # e
 27 | TOKEN_ALPHABET_LOWERCASE_F = 21  # f
 28 | TOKEN_ALPHABET_LOWERCASE_L = 22  # l
 29 | TOKEN_ALPHABET_LOWERCASE_N = 23  # n
 30 | TOKEN_ALPHABET_LOWERCASE_R = 24  # r
 31 | TOKEN_ALPHABET_LOWERCASE_S = 25  # s
 32 | TOKEN_ALPHABET_LOWERCASE_T = 26  # t
 33 | TOKEN_ALPHABET_LOWERCASE_U = 27  # u
 34 | TOKEN_ALPHABET_UPPERCASE_A = 28  # A
 35 | TOKEN_ALPHABET_UPPERCASE_B = 29  # B
 36 | TOKEN_ALPHABET_UPPERCASE_C = 30  # C
 37 | TOKEN_ALPHABET_UPPERCASE_D = 31  # D
 38 | TOKEN_ALPHABET_UPPERCASE_E = 32  # E
 39 | TOKEN_ALPHABET_UPPERCASE_F = 33  # F
 40 | TOKEN_NUMBER = 34  # number
 41 | TOKEN_NUMBER_0 = 35  # 0
 42 | TOKEN_NUMBER_1 = 36  # 1
 43 | TOKEN_NUMBER_2 = 37  # 2
 44 | TOKEN_NUMBER_3 = 38  # 3
 45 | TOKEN_NUMBER_4 = 39  # 4
 46 | TOKEN_NUMBER_5 = 40  # 5
 47 | TOKEN_NUMBER_6 = 41  # 6
 48 | TOKEN_NUMBER_7 = 42  # 7
 49 | TOKEN_NUMBER_8 = 43  # 8
 50 | TOKEN_NUMBER_9 = 44  # 9
 51 | TOKEN_OTHERS = 45  # anything else in json
 52 | 
 53 | # Token Symbols
 54 | TOKEN_LEFT_BRACKET_SYMBOL = "["
 55 | TOKEN_RIGHT_BRACKET_SYMBOL = "]"
 56 | TOKEN_LEFT_BRACE_SYMBOL = "{"
 57 | TOKEN_RIGHT_BRACE_SYMBOL = "}"
 58 | TOKEN_COLON_SYMBOL = ":"
 59 | TOKEN_DOT_SYMBOL = "."
 60 | TOKEN_COMMA_SYMBOL = ","
 61 | TOKEN_QUOTE_SYMBOL = '"'
 62 | TOKEN_ESCAPE_CHARACTER_SYMBOL = "\\"
 63 | TOKEN_SLASH_SYMBOL = "/"
 64 | TOKEN_NEGATIVE_SYMBOL = "-"
 65 | TOKEN_ALPHABET_LOWERCASE_A_SYMBOL = "a"
 66 | TOKEN_ALPHABET_LOWERCASE_B_SYMBOL = "b"
 67 | TOKEN_ALPHABET_LOWERCASE_C_SYMBOL = "c"
 68 | TOKEN_ALPHABET_LOWERCASE_D_SYMBOL = "d"
 69 | TOKEN_ALPHABET_LOWERCASE_E_SYMBOL = "e"
 70 | TOKEN_ALPHABET_LOWERCASE_F_SYMBOL = "f"
 71 | TOKEN_ALPHABET_LOWERCASE_L_SYMBOL = "l"
 72 | TOKEN_ALPHABET_LOWERCASE_N_SYMBOL = "n"
 73 | TOKEN_ALPHABET_LOWERCASE_R_SYMBOL = "r"
 74 | TOKEN_ALPHABET_LOWERCASE_S_SYMBOL = "s"
 75 | TOKEN_ALPHABET_LOWERCASE_T_SYMBOL = "t"
 76 | TOKEN_ALPHABET_LOWERCASE_U_SYMBOL = "u"
 77 | TOKEN_ALPHABET_UPPERCASE_A_SYMBOL = "A"
 78 | TOKEN_ALPHABET_UPPERCASE_B_SYMBOL = "B"
 79 | TOKEN_ALPHABET_UPPERCASE_C_SYMBOL = "C"
 80 | TOKEN_ALPHABET_UPPERCASE_D_SYMBOL = "D"
 81 | TOKEN_ALPHABET_UPPERCASE_E_SYMBOL = "E"
 82 | TOKEN_ALPHABET_UPPERCASE_F_SYMBOL = "F"
 83 | TOKEN_NUMBER_0_SYMBOL = "0"
 84 | TOKEN_NUMBER_1_SYMBOL = "1"
 85 | TOKEN_NUMBER_2_SYMBOL = "2"
 86 | TOKEN_NUMBER_3_SYMBOL = "3"
 87 | TOKEN_NUMBER_4_SYMBOL = "4"
 88 | TOKEN_NUMBER_5_SYMBOL = "5"
 89 | TOKEN_NUMBER_6_SYMBOL = "6"
 90 | TOKEN_NUMBER_7_SYMBOL = "7"
 91 | TOKEN_NUMBER_8_SYMBOL = "8"
 92 | TOKEN_NUMBER_9_SYMBOL = "9"
 93 | 
 94 | 
 95 | # Token symbol map
 96 | token_symbol_map = {
 97 |     TOKEN_EOF: "EOF",
 98 |     TOKEN_LEFT_BRACKET: "[",
 99 |     TOKEN_RIGHT_BRACKET: "]",
100 |     TOKEN_LEFT_BRACE: "{",
101 |     TOKEN_RIGHT_BRACE: "}",
102 |     TOKEN_COLON: ":",
103 |     TOKEN_DOT: ".",
104 |     TOKEN_COMMA: ",",
105 |     TOKEN_QUOTE: '"',
106 |     TOKEN_ESCAPE_CHARACTER: "\\",
107 |     TOKEN_SLASH: "/",
108 |     TOKEN_NEGATIVE: "-",
109 |     TOKEN_NULL: "null",
110 |     TOKEN_TRUE: "true",
111 |     TOKEN_FALSE: "false",
112 |     TOKEN_ALPHABET_LOWERCASE_A: "a",
113 |     TOKEN_ALPHABET_LOWERCASE_B: "b",
114 |     TOKEN_ALPHABET_LOWERCASE_C: "c",
115 |     TOKEN_ALPHABET_LOWERCASE_D: "d",
116 |     TOKEN_ALPHABET_LOWERCASE_E: "e",
117 |     TOKEN_ALPHABET_LOWERCASE_F: "f",
118 |     TOKEN_ALPHABET_LOWERCASE_L: "l",
119 |     TOKEN_ALPHABET_LOWERCASE_N: "n",
120 |     TOKEN_ALPHABET_LOWERCASE_R: "r",
121 |     TOKEN_ALPHABET_LOWERCASE_S: "s",
122 |     TOKEN_ALPHABET_LOWERCASE_T: "t",
123 |     TOKEN_ALPHABET_LOWERCASE_U: "u",
124 |     TOKEN_ALPHABET_UPPERCASE_A: "A",
125 |     TOKEN_ALPHABET_UPPERCASE_B: "B",
126 |     TOKEN_ALPHABET_UPPERCASE_C: "C",
127 |     TOKEN_ALPHABET_UPPERCASE_D: "D",
128 |     TOKEN_ALPHABET_UPPERCASE_E: "E",
129 |     TOKEN_ALPHABET_UPPERCASE_F: "F",
130 |     TOKEN_NUMBER_0: "0",
131 |     TOKEN_NUMBER_1: "1",
132 |     TOKEN_NUMBER_2: "2",
133 |     TOKEN_NUMBER_3: "3",
134 |     TOKEN_NUMBER_4: "4",
135 |     TOKEN_NUMBER_5: "5",
136 |     TOKEN_NUMBER_6: "6",
137 |     TOKEN_NUMBER_7: "7",
138 |     TOKEN_NUMBER_8: "8",
139 |     TOKEN_NUMBER_9: "9",
140 | }
141 | 


--------------------------------------------------------------------------------
/tests/test_lexer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | test cases for lexer
  3 | """
  4 | 
  5 | import json
  6 | from streamingjson import lexer
  7 | 
  8 | 
  9 | class TestCompleteJSONBase:
 10 |     """
 11 |     lexer test cases
 12 |     """
 13 | 
 14 |     def test_complete_json_base(self):
 15 |         """
 16 |         base test cases, will test all case in incomplete json
 17 |         """
 18 |         streaming_json_case = {
 19 |             # test case: basic object properity
 20 |             "{": "{}",  # mirror stack: [], should remove from stack: [], should push into mirror stack: ['}']
 21 |             "{}": "{}",  # mirror stack: [], should remove from stack: [], should push into mirror stack: []
 22 |             '{"': '{"":null}',  # mirror stack: ['}'], should remove from stack: [], should push into mirror stack: ['"', ':', 'n', 'u', 'l', 'l']
 23 |             '{""': '{"":null}',  # mirror stack: ['"', ':', 'n', 'u', 'l', 'l','}'], should remove from stack: ['"'], should push into mirror stack: []
 24 |             '{"a': '{"a":null}',
 25 |             '{"a"': '{"a":null}',
 26 |             '{"a":': '{"a":null}',
 27 |             '{"a":n': '{"a":null}',
 28 |             '{"a":nu': '{"a":null}',
 29 |             '{"a":nul': '{"a":null}',
 30 |             '{"a":null': '{"a":null}',
 31 |             '{"a":null , "b': '{"a":null , "b":null}',
 32 |             '{"a":t': '{"a":true}',
 33 |             '{"a":tr': '{"a":true}',
 34 |             '{"a":tru': '{"a":true}',
 35 |             '{"a":true': '{"a":true}',
 36 |             '{"a":true,': '{"a":true}',
 37 |             '{"a":true , "b': '{"a":true , "b":null}',
 38 |             '{"a":f': '{"a":false}',
 39 |             '{"a":fa': '{"a":false}',
 40 |             '{"a":fal': '{"a":false}',
 41 |             '{"a":fals': '{"a":false}',
 42 |             '{"a":false': '{"a":false}',
 43 |             '{"a":false,': '{"a":false}',
 44 |             '{"a":false , "b': '{"a":false , "b":null}',
 45 |             '{"a":-': '{"a":0}',
 46 |             '{"a":12': '{"a":12}',
 47 |             '{"a":-0': '{"a":-0}',  # @TODO: should be 0, not -0
 48 |             '{"a":-12': '{"a":-12}',
 49 |             '{"a":12,': '{"a":12}',
 50 |             '{"a":12.': '{"a":12.0}',
 51 |             '{"a":12.15': '{"a":12.15}',
 52 |             '{"a":12.15,': '{"a":12.15}',
 53 |             '{"a":-12.15,': '{"a":-12.15}',
 54 |             '{"a":-1.215e,': '{"a":-1.215}',
 55 |             '{"a":-1.215E,': '{"a":-1.215}',
 56 |             '{"a":-1.215e1,': '{"a":-1.215e1}',
 57 |             '{"a":-1.215e-1,': '{"a":-1.215e-1}',
 58 |             '{"a":-1.215e+1,': '{"a":-1.215e+1}',
 59 |             '{"a":-1.215E1,': '{"a":-1.215E1}',
 60 |             '{"a":-1.215E-1,': '{"a":-1.215E-1}',
 61 |             '{"a":-1.215E+1,': '{"a":-1.215E+1}',
 62 |             '{"a":-1.215e12': '{"a":-1.215e12}',
 63 |             '{"a":-1.215E12': '{"a":-1.215E12}',
 64 |             '{"a":-1.215e12,': '{"a":-1.215e12}',
 65 |             '{"a":-1.215E12,': '{"a":-1.215E12}',
 66 |             '{"a":"': '{"a":""}',
 67 |             '{"a":""': '{"a":""}',
 68 |             '{"a":"",': '{"a":""}',
 69 |             '{"a":"string': '{"a":"string"}',
 70 |             '{"a":"string"': '{"a":"string"}',
 71 |             '{"a":"string",': '{"a":"string"}',
 72 |             '{"a":"abcdefghijklmnopqrstuvwxyz",': '{"a":"abcdefghijklmnopqrstuvwxyz"}',
 73 |             '{"a":"ABCDEFGHIJKLMNOPQRSTUVWXYZ",': '{"a":"ABCDEFGHIJKLMNOPQRSTUVWXYZ"}',
 74 |             '{"a":"0123456789",': '{"a":"0123456789"}',
 75 |             '{"a":"https://': '{"a":"https://"}',
 76 |             '{"a":"\\u0': '{"a":""}',
 77 |             '{"a":"\\u00': '{"a":""}',
 78 |             '{"a":"\\u004': '{"a":""}',
 79 |             '{"a":"\\u0049': '{"a":"\\u0049"}',
 80 |             '{"a":"\\u0049"': '{"a":"\\u0049"}',
 81 |             '{"a":"\\u0049",': '{"a":"\\u0049"}',
 82 |             '{"a":"\\u0049","b":"': '{"a":"\\u0049","b":""}',
 83 |             '{"a":"\\u0049","b":"\\': '{"a":"\\u0049","b":""}',
 84 |             '{"a":"\\u0049","b":"\\u': '{"a":"\\u0049","b":""}',
 85 |             '{"a":"\\u0049","b":"\\u0': '{"a":"\\u0049","b":""}',
 86 |             '{"a":"\\u0049","b":"\\u00': '{"a":"\\u0049","b":""}',
 87 |             '{"a":"\\u0049","b":"\\u005': '{"a":"\\u0049","b":""}',
 88 |             '{"a":"\\u0049","b":"\\u0050': '{"a":"\\u0049","b":"\\u0050"}',
 89 |             '{"a":"\\u0049","b":"\\u0050"': '{"a":"\\u0049","b":"\\u0050"}',
 90 |             '{"a":"\\u0049","b":"\\u0050"}': '{"a":"\\u0049","b":"\\u0050"}',
 91 |             '{"a":"\\u0123",': '{"a":"\\u0123"}',
 92 |             '{"a":"\\u4567",': '{"a":"\\u4567"}',
 93 |             '{"a":"\\u89ab",': '{"a":"\\u89ab"}',
 94 |             '{"a":"\\u89AB",': '{"a":"\\u89AB"}',
 95 |             '{"a":"\\ucdef",': '{"a":"\\ucdef"}',
 96 |             '{"a":"\\ucdee",': '{"a":"\\ucdee"}',
 97 |             '{"a":"\\uaaaa",': '{"a":"\\uaaaa"}',
 98 |             '{"a":"\\uCDEF",': '{"a":"\\uCDEF"}',
 99 |             # test case: escape character
100 |             '{"\\': '{"":null}',
101 |             '{"\\"': '{"\\"":null}',
102 |             '{"\\""': '{"\\"":null}',
103 |             '{"\\"\\': '{"\\"":null}',
104 |             '{"\\"\\""': '{"\\"\\"":null}',
105 |             '{"\\"":': '{"\\"":null}',
106 |             '{"a":"\\"': '{"a":"\\""}',
107 |             '{"a":"\\""': '{"a":"\\""}',
108 |             '{"a":"\\"\\"': '{"a":"\\"\\""}',
109 |             '{"a":"\\"\\""': '{"a":"\\"\\""}',
110 |             '{"a":"\\"\\"",': '{"a":"\\"\\""}',
111 |             '{"a":"\\"\\""}': '{"a":"\\"\\""}',
112 |             '{"\\\\': '{"\\\\":null}',
113 |             '{"\\/': '{"\\/":null}',
114 |             '{"\\b': '{"\\b":null}',
115 |             '{"\\f': '{"\\f":null}',
116 |             '{"\\n': '{"\\n":null}',
117 |             '{"\\r': '{"\\r":null}',
118 |             '{"\\t': '{"\\t":null}',
119 |             '{"\\u0111': '{"\\u0111":null}',
120 |             # test case: token in string
121 |             '{"a":"["': '{"a":"["}',
122 |             '{"a":"[]"': '{"a":"[]"}',
123 |             '{"a":"]"': '{"a":"]"}',
124 |             '{"a":"{"': '{"a":"{"}',
125 |             '{"a":"{}"': '{"a":"{}"}',
126 |             '{"a":"}"': '{"a":"}"}',
127 |             '{"a":","': '{"a":","}',
128 |             '{"a":"."': '{"a":"."}',
129 |             '{"a":"","': '{"a":"","":null}',
130 |             '{"a":"","b': '{"a":"","b":null}',
131 |             '{"a":"","b"': '{"a":"","b":null}',
132 |             '{"a":"","b":': '{"a":"","b":null}',
133 |             '{"a":"","b":"': '{"a":"","b":""}',
134 |             '{"a":"","b":""': '{"a":"","b":""}',
135 |             '{"a":"","b":""}': '{"a":"","b":""}',
136 |             '{"1': '{"1":null}',
137 |             '{"1.': '{"1.":null}',
138 |             '{"1.1': '{"1.1":null}',
139 |             '{"1.10': '{"1.10":null}',
140 |             '{"1"': '{"1":null}',
141 |             '{"1":': '{"1":null}',
142 |             '{"1":"': '{"1":""}',
143 |             '{"1":"1': '{"1":"1"}',
144 |             '{"1":"1.': '{"1":"1."}',
145 |             '{"1":"1.1': '{"1":"1.1"}',
146 |             '{"1":"1.10': '{"1":"1.10"}',
147 |             '{"1":"1"': '{"1":"1"}',
148 |             '{"1":"1"}': '{"1":"1"}',
149 |             '{"-1":"-1"}': '{"-1":"-1"}',
150 |             '{"t': '{"t":null}',
151 |             '{"tr': '{"tr":null}',
152 |             '{"tru': '{"tru":null}',
153 |             '{"true': '{"true":null}',
154 |             '{"true"': '{"true":null}',
155 |             '{"true":': '{"true":null}',
156 |             '{"true":"t': '{"true":"t"}',
157 |             '{"true":"tr': '{"true":"tr"}',
158 |             '{"true":"tru': '{"true":"tru"}',
159 |             '{"true":"true': '{"true":"true"}',
160 |             '{"true":"true"': '{"true":"true"}',
161 |             '{"true":"true"}': '{"true":"true"}',
162 |             '{"f': '{"f":null}',
163 |             '{"fa': '{"fa":null}',
164 |             '{"fal': '{"fal":null}',
165 |             '{"fals': '{"fals":null}',
166 |             '{"false': '{"false":null}',
167 |             '{"false"': '{"false":null}',
168 |             '{"false":': '{"false":null}',
169 |             '{"false":"f': '{"false":"f"}',
170 |             '{"false":"fa': '{"false":"fa"}',
171 |             '{"false":"fal': '{"false":"fal"}',
172 |             '{"false":"fals': '{"false":"fals"}',
173 |             '{"false":"false': '{"false":"false"}',
174 |             '{"false":"false"': '{"false":"false"}',
175 |             '{"false":"false"}': '{"false":"false"}',
176 |             '{"n': '{"n":null}',
177 |             '{"nu': '{"nu":null}',
178 |             '{"nul': '{"nul":null}',
179 |             '{"null': '{"null":null}',
180 |             '{"null"': '{"null":null}',
181 |             '{"null":': '{"null":null}',
182 |             '{"null":"n': '{"null":"n"}',
183 |             '{"null":"nu': '{"null":"nu"}',
184 |             '{"null":"nul': '{"null":"nul"}',
185 |             '{"null":"null': '{"null":"null"}',
186 |             '{"null":"null"': '{"null":"null"}',
187 |             '{"null":"null"}': '{"null":"null"}',
188 |             # test case: array as object value
189 |             '{"a":[': '{"a":[]}',
190 |             '{"a":[]': '{"a":[]}',
191 |             '{"a":[1': '{"a":[1]}',
192 |             '{"a":[1,': '{"a":[1]}',
193 |             '{"a":[-0,': '{"a":[-0]}',  # @TODO: should be 0, not -0
194 |             '{"a":[-1,': '{"a":[-1]}',
195 |             '{"a":[1,0': '{"a":[1,0]}',
196 |             '{"a":[1,0.0': '{"a":[1,0.0]}',
197 |             '{"a":[1,0.01': '{"a":[1,0.01]}',
198 |             '{"a":[1,0.01]': '{"a":[1,0.01]}',
199 |             '{"a":[1,0.01]}': '{"a":[1,0.01]}',
200 |             '{"a":[-1,0.01]}': '{"a":[-1,0.01]}',
201 |             '{"a":[-1,-': '{"a":[-1,0]}',
202 |             '{"a":[-1,-0': '{"a":[-1,-0]}',  # @TODO: should be 0, not -0
203 |             '{"a":[1,-0.01]}': '{"a":[1,-0.01]}',
204 |             '{"a":[-1,-0.01]}': '{"a":[-1,-0.01]}',
205 |             '{"a":[n': '{"a":[null]}',
206 |             '{"a":[nu': '{"a":[null]}',
207 |             '{"a":[nul': '{"a":[null]}',
208 |             '{"a":[null': '{"a":[null]}',
209 |             '{"a":[null,': '{"a":[null]}',
210 |             '{"a":[null]': '{"a":[null]}',
211 |             '{"a":[null]}': '{"a":[null]}',
212 |             '{"a":[t': '{"a":[true]}',
213 |             '{"a":[tr': '{"a":[true]}',
214 |             '{"a":[tru': '{"a":[true]}',
215 |             '{"a":[true': '{"a":[true]}',
216 |             '{"a":[true,': '{"a":[true]}',
217 |             '{"a":[true]': '{"a":[true]}',
218 |             '{"a":[true]}': '{"a":[true]}',
219 |             '{"a":[f': '{"a":[false]}',
220 |             '{"a":[fa': '{"a":[false]}',
221 |             '{"a":[fal': '{"a":[false]}',
222 |             '{"a":[fals': '{"a":[false]}',
223 |             '{"a":[false': '{"a":[false]}',
224 |             '{"a":[false,': '{"a":[false]}',
225 |             '{"a":[false]': '{"a":[false]}',
226 |             '{"a":[false]}': '{"a":[false]}',
227 |             '{"a":["': '{"a":[""]}',
228 |             '{"a":["b': '{"a":["b"]}',
229 |             '{"a":["b"': '{"a":["b"]}',
230 |             '{"a":["b",': '{"a":["b"]}',
231 |             '{"a":["b"]': '{"a":["b"]}',
232 |             '{"a":["b"]}': '{"a":["b"]}',
233 |             '{"a":[{': '{"a":[{}]}',
234 |             '{"a":[{"': '{"a":[{"":null}]}',
235 |             '{"a":[{"b': '{"a":[{"b":null}]}',
236 |             '{"a":[{"b"': '{"a":[{"b":null}]}',
237 |             '{"a":[{"b":': '{"a":[{"b":null}]}',
238 |             '{"a":[{"b":"': '{"a":[{"b":""}]}',
239 |             '{"a":[{"b":"c': '{"a":[{"b":"c"}]}',
240 |             '{"a":[{"b":"c"': '{"a":[{"b":"c"}]}',
241 |             '{"a":[{"b":"c",': '{"a":[{"b":"c"}]}',
242 |             '{"a":[{"b":"c"}': '{"a":[{"b":"c"}]}',
243 |             '{"a":[{"b":"c"}]': '{"a":[{"b":"c"}]}',
244 |             '{"a":[{"b":"c"}]}': '{"a":[{"b":"c"}]}',
245 |             # test case: object as object value
246 |             '{"a":{': '{"a":{}}',
247 |             '{"a":{"': '{"a":{"":null}}',
248 |             '{"a":{"b': '{"a":{"b":null}}',
249 |             '{"a":{"b"': '{"a":{"b":null}}',
250 |             '{"a":{"b":': '{"a":{"b":null}}',
251 |             '{"a":{"b":"': '{"a":{"b":""}}',
252 |             '{"a":{"b":"c': '{"a":{"b":"c"}}',
253 |             '{"a":{"b":"c"': '{"a":{"b":"c"}}',
254 |             '{"a":{"b":"c",': '{"a":{"b":"c"}}',
255 |             '{"a":{"b":"c"}': '{"a":{"b":"c"}}',
256 |             '{"a":{"b":"c"}}': '{"a":{"b":"c"}}',
257 |             # test case: multiple object properity
258 |             '{"a":1,"b":1.20,"c":0.03,"d":-1,"e":-1.20,"f":-0.03,"g":1.997e3,"h":-1.338e19,"i":"a","j":null,"k":true,"l":false,"m":{},"n":[]]}': '{"a":1,"b":1.20,"c":0.03,"d":-1,"e":-1.20,"f":-0.03,"g":1.997e3,"h":-1.338e19,"i":"a","j":null,"k":true,"l":false,"m":{},"n":[]]}',
259 |             # test case: basic array element
260 |             "[": "[]",
261 |             "[]": "[]",
262 |             "[n": "[null]",
263 |             "[nu": "[null]",
264 |             "[nul": "[null]",
265 |             "[null": "[null]",
266 |             "[null,": "[null]",
267 |             "[null,null": "[null,null]",
268 |             "[t": "[true]",
269 |             "[tr": "[true]",
270 |             "[tru": "[true]",
271 |             "[true": "[true]",
272 |             "[true,": "[true]",
273 |             "[true,true": "[true,true]",
274 |             "[f": "[false]",
275 |             "[fa": "[false]",
276 |             "[fal": "[false]",
277 |             "[fals": "[false]",
278 |             "[false": "[false]",
279 |             "[false,": "[false]",
280 |             "[false,false": "[false,false]",
281 |             "[0": "[0]",
282 |             "[-": "[0]",
283 |             "[-1": "[-1]",
284 |             "[0,": "[0]",
285 |             "[-1,": "[-1]",
286 |             "[-1,-": "[-1,0]",
287 |             "[0.": "[0.0]",
288 |             "[-0.": "[-0.0]",
289 |             "[0.1": "[0.1]",
290 |             "[0.12,": "[0.12]",
291 |             "[-0.12,": "[-0.12]",
292 |             "[1,2,": "[1,2]",
293 |             "[1,2,0": "[1,2,0]",
294 |             "[1,2,0.": "[1,2,0.0]",
295 |             "[1,2,0.1": "[1,2,0.1]",
296 |             "[1,2,0.10": "[1,2,0.10]",
297 |             "[-1,2,0.10": "[-1,2,0.10]",
298 |             "[-1,-2,0.10": "[-1,-2,0.10]",
299 |             "[-1,-2,-0.10": "[-1,-2,-0.10]",
300 |             "[1,-2,-0.10": "[1,-2,-0.10]",
301 |             "[1,2,-0.10": "[1,2,-0.10]",
302 |             "[1,-2,0.10": "[1,-2,0.10]",
303 |             "[2.998e": "[2.998]",
304 |             "[2.998E": "[2.998]",
305 |             "[2.998e1": "[2.998e1]",
306 |             "[2.998e-1": "[2.998e-1]",
307 |             "[2.998e+1": "[2.998e+1]",
308 |             "[2.998E1": "[2.998E1]",
309 |             "[2.998E-1": "[2.998E-1]",
310 |             "[2.998E+1": "[2.998E+1]",
311 |             "[2.998e10": "[2.998e10]",
312 |             "[2.998E10": "[2.998E10]",
313 |             "[2.998e10,": "[2.998e10]",
314 |             "[2.998E10,": "[2.998E10]",
315 |             "[-2.998e": "[-2.998]",
316 |             "[-2.998E": "[-2.998]",
317 |             "[-2.998e1": "[-2.998e1]",
318 |             "[-2.998e-1": "[-2.998e-1]",
319 |             "[-2.998e+1": "[-2.998e+1]",
320 |             "[-2.998E1": "[-2.998E1]",
321 |             "[-2.998E-1": "[-2.998E-1]",
322 |             "[-2.998E+1": "[-2.998E+1]",
323 |             "[-2.998e10": "[-2.998e10]",
324 |             "[-2.998E10": "[-2.998E10]",
325 |             "[2.998e10,1": "[2.998e10,1]",
326 |             "[2.998e10,1.0": "[2.998e10,1.0]",
327 |             "[2.998e10,1.02": "[2.998e10,1.02]",
328 |             "[2.998e10,1.02e": "[2.998e10,1.02]",
329 |             "[2.998e10,1.02e8": "[2.998e10,1.02e8]",
330 |             "[2.998E10,1.02E8": "[2.998E10,1.02E8]",
331 |             "[2.998e10,1.02e8,": "[2.998e10,1.02e8]",
332 |             "[2.998E10,1.02E8,": "[2.998E10,1.02E8]",
333 |             '["': '[""]',
334 |             '[""': '[""]',
335 |             '["",': '[""]',
336 |             '["a': '["a"]',
337 |             '["a"': '["a"]',
338 |             '["a",': '["a"]',
339 |             '["a","': '["a",""]',
340 |             '["a","b': '["a","b"]',
341 |             '["a","b"': '["a","b"]',
342 |             '["a","b",': '["a","b"]',
343 |             '["a","b"]': '["a","b"]',
344 |             '["\\u0': '[""]',
345 |             '["\\u00': '[""]',
346 |             '["\\u004': '[""]',
347 |             '["\\u0049': '["\\u0049"]',
348 |             '["\\u0049"': '["\\u0049"]',
349 |             '["\\u0049",': '["\\u0049"]',
350 |             '["\\u0049","': '["\\u0049",""]',
351 |             '["\\u0049","\\': '["\\u0049",""]',
352 |             '["\\u0049","\\u': '["\\u0049",""]',
353 |             '["\\u0049","\\u0': '["\\u0049",""]',
354 |             '["\\u0049","\\u00': '["\\u0049",""]',
355 |             '["\\u0049","\\u005': '["\\u0049",""]',
356 |             '["\\u0049","\\u0050': '["\\u0049","\\u0050"]',
357 |             '["\\u0049","\\u0050"': '["\\u0049","\\u0050"]',
358 |             '["\\u0049","\\u0050"]': '["\\u0049","\\u0050"]',
359 |             '["\\u0123': '["\\u0123"]',
360 |             '["\\u4567': '["\\u4567"]',
361 |             '["\\u89ab': '["\\u89ab"]',
362 |             '["\\u89AB': '["\\u89AB"]',
363 |             '["\\ucdef': '["\\ucdef"]',
364 |             '["\\uCDEF': '["\\uCDEF"]',
365 |             # test case: object as array element
366 |             "[{": "[{}]",
367 |             '[{"': '[{"":null}]',
368 |             '[{""': '[{"":null}]',
369 |             '[{"":': '[{"":null}]',
370 |             '[{"":"': '[{"":""}]',
371 |             '[{"":""': '[{"":""}]',
372 |             '[{"":""}': '[{"":""}]',
373 |             '[{"":""}]': '[{"":""}]',
374 |             '[{"a': '[{"a":null}]',
375 |             '[{"a"': '[{"a":null}]',
376 |             '[{"a":': '[{"a":null}]',
377 |             '[{"a":"': '[{"a":""}]',
378 |             '[{"a":"b': '[{"a":"b"}]',
379 |             '[{"a":"b"': '[{"a":"b"}]',
380 |             '[{"a":"b"}': '[{"a":"b"}]',
381 |             '[{"a":"b"}]': '[{"a":"b"}]',
382 |             '[{"a":n': '[{"a":null}]',
383 |             '[{"a":nu': '[{"a":null}]',
384 |             '[{"a":nul': '[{"a":null}]',
385 |             '[{"a":null': '[{"a":null}]',
386 |             '[{"a":null,': '[{"a":null}]',
387 |             '[{"a":null}': '[{"a":null}]',
388 |             '[{"a":null}]': '[{"a":null}]',
389 |             '[{"a":t': '[{"a":true}]',
390 |             '[{"a":tr': '[{"a":true}]',
391 |             '[{"a":tru': '[{"a":true}]',
392 |             '[{"a":true': '[{"a":true}]',
393 |             '[{"a":true,': '[{"a":true}]',
394 |             '[{"a":true}': '[{"a":true}]',
395 |             '[{"a":true}]': '[{"a":true}]',
396 |             '[{"a":f': '[{"a":false}]',
397 |             '[{"a":fa': '[{"a":false}]',
398 |             '[{"a":fal': '[{"a":false}]',
399 |             '[{"a":fals': '[{"a":false}]',
400 |             '[{"a":false': '[{"a":false}]',
401 |             '[{"a":false,': '[{"a":false}]',
402 |             '[{"a":false}': '[{"a":false}]',
403 |             '[{"a":false}]': '[{"a":false}]',
404 |             '[{"a":-': '[{"a":0}]',
405 |             '[{"a":0': '[{"a":0}]',
406 |             '[{"a":-0': '[{"a":-0}]',  # @TODO: should be 0, not -0
407 |             '[{"a":0.': '[{"a":0.0}]',
408 |             '[{"a":0.1': '[{"a":0.1}]',
409 |             '[{"a":0.10': '[{"a":0.10}]',
410 |             '[{"a":0.10,': '[{"a":0.10}]',
411 |             '[{"a":0.10}': '[{"a":0.10}]',
412 |             '[{"a":0.10}]': '[{"a":0.10}]',
413 |             '[{"a":-0.10}]': '[{"a":-0.10}]',
414 |             '[{"a":[': '[{"a":[]}]',
415 |             '[{"a":[1': '[{"a":[1]}]',
416 |             '[{"a":[t': '[{"a":[true]}]',
417 |             '[{"a":[f': '[{"a":[false]}]',
418 |             '[{"a":[n': '[{"a":[null]}]',
419 |             '[{"a":["': '[{"a":[""]}]',
420 |             '[{"a":[{': '[{"a":[{}]}]',
421 |             '[{"a":[{"b":"c"},{': '[{"a":[{"b":"c"},{}]}]',
422 |             '[{"a":[{"b":"c"},{"': '[{"a":[{"b":"c"},{"":null}]}]',
423 |             '[{"a":[{"b":"c"},{"d"': '[{"a":[{"b":"c"},{"d":null}]}]',
424 |             '[{"a":[{"b":"c"},{"d":-': '[{"a":[{"b":"c"},{"d":0}]}]',
425 |             '[{"a":[{"b":"c"},{"d":-0': '[{"a":[{"b":"c"},{"d":-0}]}]',  # @TODO: should be 0, not -0
426 |             '[{"a":[{"b":"c"},{"d":1.': '[{"a":[{"b":"c"},{"d":1.0}]}]',
427 |             '[{"a":[{"b":"c"},{"d":1.1': '[{"a":[{"b":"c"},{"d":1.1}]}]',
428 |             '[{"a":[{"b":"c"},{"d":-1.1': '[{"a":[{"b":"c"},{"d":-1.1}]}]',
429 |             '[{"a":[{"b":"c"},{"d":[': '[{"a":[{"b":"c"},{"d":[]}]}]',
430 |             '[{"a":[{"b":"c"},{"d":[{': '[{"a":[{"b":"c"},{"d":[{}]}]}]',
431 |             # test case: multiple array element
432 |             '[1,1.20,0.03,-1,-1.20,-0.03,1.997e3,-1.338e19,"a",null,true,false,{},[]]': '[1,1.20,0.03,-1,-1.20,-0.03,1.997e3,-1.338e19,"a",null,true,false,{},[]]',
433 |             # test case: array as array element
434 |             "[[": "[[]]",
435 |             "[[]": "[[]]",
436 |             "[[]]": "[[]]",
437 |             "[[{": "[[{}]]",
438 |             '[["': '[[""]]',
439 |             '[[""': '[[""]]',
440 |             '[["a': '[["a"]]',
441 |             '[["a"': '[["a"]]',
442 |             '[["a"]': '[["a"]]',
443 |             '[["a"],': '[["a"]]',
444 |             '[["a"],[': '[["a"],[]]',
445 |             '[["a"],[]': '[["a"],[]]',
446 |             '[["a"],[]]': '[["a"],[]]',
447 |             '[["a"],{': '[["a"],{}]',
448 |             '[["a"],{}': '[["a"],{}]',
449 |             '[["a"],{}]': '[["a"],{}]',
450 |             '[["a"],{"': '[["a"],{"":null}]',
451 |             '[["a"],{"b': '[["a"],{"b":null}]',
452 |             '[["a"],{"b"': '[["a"],{"b":null}]',
453 |             '[["a"],{"b":': '[["a"],{"b":null}]',
454 |             '[["a"],{"b":"': '[["a"],{"b":""}]',
455 |             '[["a"],{"b":"c': '[["a"],{"b":"c"}]',
456 |             '[["a"],{"b":"c"': '[["a"],{"b":"c"}]',
457 |             '[["a"],{"b":"c"}': '[["a"],{"b":"c"}]',
458 |             '[["a"],{"b":"c"}]': '[["a"],{"b":"c"}]',
459 |             # test case: ignore token
460 |             "{ }": "{ }",
461 |             '{ " a " : -1.2 , ': '{ " a " : -1.2}',
462 |             '{ " a " : -1.2 , "  b  "  :  " c "  ': '{ " a " : -1.2 , "  b  "  :  " c "}',
463 |             '{ " a " : -1.2 , "  b  "  :  " c "   , "   d"  :  true  ': '{ " a " : -1.2 , "  b  "  :  " c "   , "   d"  :  true}',
464 |             '{ " a " : -1.2 , "  b  "  :  " c "   , "   d"  :  true  , "e   "  : {  } } ': '{ " a " : -1.2 , "  b  "  :  " c "   , "   d"  :  true  , "e   "  : {  } }',
465 |             "[ ]": "[ ]",
466 |             "[ 1": "[ 1]",
467 |             "[ 1 , -1.020  , true ,  false,  null": "[ 1 , -1.020  , true ,  false,  null]",
468 |             "[ 1 , -1.020  , true ,  false,  null,  {   }": "[ 1 , -1.020  , true ,  false,  null,  {   }]",
469 |         }
470 |         for test_case, expect in streaming_json_case.items():
471 |             lexer_instance = lexer.Lexer()
472 |             err_in_append_string = lexer_instance.append_string(test_case)
473 |             ret = lexer_instance.complete_json()
474 |             assert err_in_append_string is None
475 |             assert expect == ret, "unexpected JSON"
476 | 
477 |     def test_complete_json_nestad(self):
478 |         """
479 |         test nestad JSON by each caracter
480 |         """
481 |         streaming_json_content = '{"string": "这是一个字符串", "integer": 42, "float": 3.14159, "boolean_true": true, "boolean_false": false, "null": null, "object": {"empty_object": {}, "non_empty_object": {"key": "value"}, "nested_object": {"nested_key": {"sub_nested_key": "sub_nested_value"}}}, "array":["string in array", 123, 45.67, true, false, null, {"object_in_array": "object_value"},["nested_array"]]}'
482 |         lexer_instance = lexer.Lexer()
483 |         for char in streaming_json_content:
484 |             err_in_append_string = lexer_instance.append_string(char)
485 |             assert err_in_append_string is None
486 |             ret = lexer_instance.complete_json()
487 |             interface_for_json = None
488 |             err_in_unmarshal = None
489 |             try:
490 |                 interface_for_json = json.loads(ret)
491 |             except Exception as e:
492 |                 err_in_unmarshal = e
493 |             assert err_in_unmarshal is None
494 | 
495 |     def test_complete_json_nestad2(self):
496 |         """
497 |         test nestad JSON by each caracter, new line included
498 |         """
499 |         streaming_json_content = """{
500 |             "string_with_escape_chars": "This string contains escape characters like \\\"quotes\\\", \\\\backslashes\\\\, \\/forwardslashes/, \\bbackspace\\b, \\fformfeed\\f, \\nnewline\\n, \\rcarriage return\\r, \\ttab\\t.",
501 |             "scientific_notation": 2.998e8,
502 |             "unicode_characters": "Some unicode characters: \\u0041\\u0042\\u0043\\u0044",
503 |             "multiple_lang_strings": {
504 |                 "english": "Hello, World!",
505 |                 "chinese": "你好，世界！",
506 |                 "spanish": "¡Hola, mundo!",
507 |                 "russian": "Привет, мир!"
508 |             },
509 |             "json_tokens_as_strings": "{\\"key_with_invalid_token\\": \\"value_with_invalid_separator\\": \\"a\\"}",
510 |             "nested_objects": {
511 |                 "nested_object1": {
512 |                     "key1": "value1",
513 |                     "key2": "value2",
514 |                     "nested_object2": {
515 |                         "inner_key1": "inner_value1",
516 |                         "inner_key2": "inner_value2"
517 |                     }
518 |                 },
519 |                 "nested_object2": {
520 |                     "name": "John Doe",
521 |                     "age": 30,
522 |                     "address": {
523 |                         "street": "123 Main St",
524 |                         "city": "Anytown"
525 |                     }
526 |                 }
527 |             },
528 |             "array_test": {
529 |                 "simple_array": [10, 20, 30, 40, 50],
530 |                 "array_of_objects": [
531 |                     {
532 |                         "name": "Alice",
533 |                         "age": 25
534 |                     },
535 |                     {
536 |                         "name": "Bob",
537 |                         "age": 30
538 |                     }
539 |                 ],
540 |                 "nested_arrays": [
541 |                     [1, 2, 3],
542 |                     [true, false, null]
543 |                 ],
544 |                 "empty_objects": {},
545 |                 "empty_arrays": []
546 |             }
547 |         }
548 |         """
549 |         lexer_instance = lexer.Lexer()
550 |         for char in streaming_json_content:
551 |             err_in_append_string = lexer_instance.append_string(char)
552 |             assert err_in_append_string is None
553 |             ret = lexer_instance.complete_json()
554 |             interface_for_json = None
555 |             err_in_unmarshal = None
556 |             try:
557 |                 interface_for_json = json.loads(ret)
558 |             except Exception as e:
559 |                 err_in_unmarshal = e
560 |             assert err_in_unmarshal is None
561 | 
562 |     def test_complete_json_escape_and_etc(self):
563 |         """
564 |         test escape caracter and unicode
565 |         """
566 |         streaming_json_content = """{
567 |       "string": "含有转义字符的字符串：\\"\\\\\\/\\b\\f\\n\\r\\t",
568 |       "string_unicode": "含Unicode字符：\\u6211\\u662F",
569 |       "negative_integer": -42,
570 |       "float_scientific_notation": 6.02e23,
571 |       "negative_float": -3.14159,
572 |       "array_with_various_numbers": [
573 |         0,
574 |         -1,
575 |         2.99792458e8,
576 |         -6.62607015e-34
577 |       ],
578 |       "special_characters": "\\u003C\\u003E\\u0026\\u0027\\u0022",
579 |       "nested_structure": {
580 |         "nested_key_with_escaped_chars": "这是一个带有转义字符的字符串：\\\\n\\\\r\\\\t",
581 |         "nested_object": {
582 |           "bool_true": true,
583 |           "bool_false": false,
584 |           "null_value": null,
585 |           "complex_number": 3.14e-10
586 |         }
587 |       }
588 |     }
589 |         """
590 |         lexer_instance = lexer.Lexer()
591 |         for char in streaming_json_content:
592 |             err_in_append_string = lexer_instance.append_string(char)
593 |             assert err_in_append_string is None
594 |             ret = lexer_instance.complete_json()
595 |             interface_for_json = None
596 |             err_in_unmarshal = None
597 |             try:
598 |                 interface_for_json = json.loads(ret)
599 |             except Exception as e:
600 |                 err_in_unmarshal = e
601 |             assert err_in_unmarshal is None
602 | 


--------------------------------------------------------------------------------
/streamingjson/lexer.py:
--------------------------------------------------------------------------------
   1 | """
   2 | streaming-json-py main lexer method
   3 | This method will 
   4 | """
   5 | 
   6 | from streamingjson import lexer_tokens
   7 | from streamingjson import lexer_helper
   8 | 
   9 | 
  10 | class Lexer:
  11 |     """
  12 |     lexer for json fragment
  13 |     """
  14 | 
  15 |     def __init__(self):
  16 |         self.json_content = []  # input JSON content
  17 |         self.padding_content = (
  18 |             []
  19 |         )  # padding content for ignored characters and escape characters, etc.
  20 |         self.json_segment = ""  # appended JSON segment by the AppendString() method.
  21 |         self.token_stack = []  # token stack for input JSON
  22 |         self.mirror_token_stack = []  # token stack for auto-completed tokens
  23 | 
  24 |     def __get_top_token_on_stack(self):
  25 |         """
  26 |         get token on the stack top
  27 |         """
  28 |         if not self.token_stack:
  29 |             return lexer_tokens.TOKEN_EOF
  30 |         return self.token_stack[-1]
  31 | 
  32 |     def __get_top_token_on_mirror_stack(self):
  33 |         """
  34 |         get token on the mirror stack top
  35 |         """
  36 |         if not self.mirror_token_stack:
  37 |             return lexer_tokens.TOKEN_EOF
  38 |         return self.mirror_token_stack[-1]
  39 | 
  40 |     def __pop_token_stack(self):
  41 |         """
  42 |         pop token on the stack top
  43 |         """
  44 |         if not self.token_stack:
  45 |             return lexer_tokens.TOKEN_EOF
  46 |         return self.token_stack.pop()
  47 | 
  48 |     def __pop_mirror_token_stack(self):
  49 |         """
  50 |         pop token on the mirror stack top
  51 |         """
  52 |         if not self.mirror_token_stack:
  53 |             return lexer_tokens.TOKEN_EOF
  54 |         return self.mirror_token_stack.pop()
  55 | 
  56 |     def __push_token_stack(self, token):
  57 |         """
  58 |         push token into the stack
  59 |         """
  60 |         self.token_stack.append(token)
  61 | 
  62 |     def __push_mirror_token_stack(self, token):
  63 |         """
  64 |         push token into the mirror stack
  65 |         """
  66 |         self.mirror_token_stack.append(token)
  67 | 
  68 |     def __dump_mirror_token_stack_to_string(self):
  69 |         """
  70 |         convert mirror stack token into string
  71 |         """
  72 |         return "".join(
  73 |             [
  74 |                 lexer_tokens.token_symbol_map[x]
  75 |                 for x in reversed(self.mirror_token_stack)
  76 |             ]
  77 |         )
  78 | 
  79 |     def __skip_json_segment(self, n):
  80 |         """
  81 |         skip JSON segment by length n
  82 |         """
  83 |         self.json_segment = self.json_segment[n:]
  84 | 
  85 |     def __push_negative_into_json_content(self):
  86 |         """
  87 |         push negative symbol `-` into JSON content
  88 |         """
  89 |         self.json_content.append(lexer_tokens.TOKEN_NEGATIVE_SYMBOL)
  90 | 
  91 |     def __push_byte_into_padding_content(self, b):
  92 |         """
  93 |         push byte into JSON content by given
  94 |         """
  95 |         self.padding_content.append(b)
  96 | 
  97 |     def __append_padding_content_to_json_content(self):
  98 |         """
  99 |         append padding content into JSON content
 100 |         """
 101 |         self.json_content.extend(self.padding_content)
 102 |         self.padding_content = []
 103 | 
 104 |     def __have_padding_content(self):
 105 |         """
 106 |         check if padding content is empty
 107 |         """
 108 |         return bool(self.padding_content)
 109 | 
 110 |     def __clean_padding_content(self):
 111 |         """
 112 |         set padding content to empty
 113 |         """
 114 |         self.padding_content = []
 115 | 
 116 |     def __stream_stopped_in_an_object_key_start(self) -> bool:
 117 |         """
 118 |         check if JSON stream stopped at an object properity's key start, like `{"`
 119 |         """
 120 |         # `{`, `"` in stack, or `,`, `"` in stack
 121 |         case1 = [lexer_tokens.TOKEN_LEFT_BRACE, lexer_tokens.TOKEN_QUOTE]
 122 |         case2 = [lexer_tokens.TOKEN_COMMA, lexer_tokens.TOKEN_QUOTE]
 123 |         #  `}` in mirror stack
 124 |         case3 = [lexer_tokens.TOKEN_RIGHT_BRACE]
 125 |         return (
 126 |             lexer_helper.match_stack(self.token_stack, case1)
 127 |             or lexer_helper.match_stack(self.token_stack, case2)
 128 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case3)
 129 | 
 130 |     def __stream_stopped_in_an_object_key_end(self) -> bool:
 131 |         """
 132 |         check if JSON stream stopped in an object properity's key, like `{"field`
 133 |         """
 134 |         # // `{`, `"`, `"` in stack, or `,`, `"`, `"` in stack
 135 |         case1 = [
 136 |             lexer_tokens.TOKEN_LEFT_BRACE,
 137 |             lexer_tokens.TOKEN_QUOTE,
 138 |             lexer_tokens.TOKEN_QUOTE,
 139 |         ]
 140 |         case2 = [
 141 |             lexer_tokens.TOKEN_COMMA,
 142 |             lexer_tokens.TOKEN_QUOTE,
 143 |             lexer_tokens.TOKEN_QUOTE,
 144 |         ]
 145 |         # // `"`, `:`, `n`, `u`, `l`, `l`, `}` in mirror stack
 146 |         case3 = [
 147 |             lexer_tokens.TOKEN_RIGHT_BRACE,
 148 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 149 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 150 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 151 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 152 |             lexer_tokens.TOKEN_COLON,
 153 |             lexer_tokens.TOKEN_QUOTE,
 154 |         ]
 155 |         return (
 156 |             lexer_helper.match_stack(self.token_stack, case1)
 157 |             or lexer_helper.match_stack(self.token_stack, case2)
 158 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case3)
 159 | 
 160 |     def __stream_stopped_in_an_object_string_value_start(self) -> bool:
 161 |         """
 162 |         check if JSON stream stopped in an object properity's value start,
 163 |         like `{"field": "`
 164 |         """
 165 | 
 166 |         # `:`, `"` in stack
 167 |         case1 = [lexer_tokens.TOKEN_COLON, lexer_tokens.TOKEN_QUOTE]
 168 |         # // `n`, `u`, `l`, `l`, `}` in mirror stack
 169 |         case2 = [
 170 |             lexer_tokens.TOKEN_RIGHT_BRACE,
 171 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 172 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 173 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 174 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 175 |         ]
 176 |         return lexer_helper.match_stack(
 177 |             self.token_stack, case1
 178 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case2)
 179 | 
 180 |     def __stream_stopped_in_an_object_value_end(self) -> bool:
 181 |         """
 182 |         check if JSON stream stopped in an object properity's value finish,
 183 |         like `{"field": "value"`
 184 |         """
 185 |         # `"`, `}` left
 186 |         tokens = [lexer_tokens.TOKEN_RIGHT_BRACE, lexer_tokens.TOKEN_QUOTE]
 187 |         return lexer_helper.match_stack(self.mirror_token_stack, tokens)
 188 | 
 189 |     def __stream_stopped_in_an_object_array_value_start(self) -> bool:
 190 |         """
 191 |         check if JSON stream stopped in an object properity's value start by array,
 192 |         like `{"field":[`
 193 |         """
 194 |         # `:`, `[` in stack
 195 |         case1 = [lexer_tokens.TOKEN_COLON, lexer_tokens.TOKEN_LEFT_BRACKET]
 196 |         # `n`, `u`, `l`, `l`, `}` in mirror stack
 197 |         case2 = [
 198 |             lexer_tokens.TOKEN_RIGHT_BRACE,
 199 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 200 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 201 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 202 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 203 |         ]
 204 |         return lexer_helper.match_stack(
 205 |             self.token_stack, case1
 206 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case2)
 207 | 
 208 |     def __stream_stopped_in_an_object_object_value_start(self) -> bool:
 209 |         """
 210 |         check if JSON stream stopped in an object properity's value start by array,
 211 |         like `{"field":{`
 212 |         """
 213 |         # `:`, `{` in stack
 214 |         case1 = [lexer_tokens.TOKEN_COLON, lexer_tokens.TOKEN_LEFT_BRACE]
 215 |         # `n`, `u`, `l`, `l`, `}` in mirror stack
 216 |         case2 = [
 217 |             lexer_tokens.TOKEN_RIGHT_BRACE,
 218 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 219 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 220 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 221 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 222 |         ]
 223 |         return lexer_helper.match_stack(
 224 |             self.token_stack, case1
 225 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case2)
 226 | 
 227 |     def __stream_stopped_in_an_object_negative_number_value_start(self) -> bool:
 228 |         """
 229 |         check if JSON stream stopped in an object properity's negative number value,
 230 |         like `:-`
 231 |         """
 232 |         # `:`, `-` in stack
 233 |         case1 = [lexer_tokens.TOKEN_COLON, lexer_tokens.TOKEN_NEGATIVE]
 234 |         return lexer_helper.match_stack(self.token_stack, case1)
 235 | 
 236 |     def __stream_stopped_in_a_negative_number_value_start(self) -> bool:
 237 |         """
 238 |         check if JSON stream stopped in an object properity's negative number value,
 239 |         like `-`
 240 |         """
 241 |         # `-` in stack
 242 |         case1 = [lexer_tokens.TOKEN_NEGATIVE]
 243 |         # `0`in mirror stack
 244 |         case2 = [lexer_tokens.TOKEN_NUMBER_0]
 245 |         return lexer_helper.match_stack(
 246 |             self.token_stack, case1
 247 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case2)
 248 | 
 249 |     def __stream_stopped_in_an_array(self) -> bool:
 250 |         """
 251 |         check if JSON stream stopped in an array
 252 |         """
 253 |         return (
 254 |             self.__get_top_token_on_mirror_stack() == lexer_tokens.TOKEN_RIGHT_BRACKET
 255 |         )
 256 | 
 257 |     def __stream_stopped_in_an_array_string_value_end(self) -> bool:
 258 |         """
 259 |         check if JSON stream stopped in an array's string value end, like `["value"`
 260 |         """
 261 |         # `"`, `"` in stack
 262 |         case1 = [lexer_tokens.TOKEN_QUOTE, lexer_tokens.TOKEN_QUOTE]
 263 |         # `"`, `]` in mirror stack
 264 |         case2 = [lexer_tokens.TOKEN_RIGHT_BRACKET, lexer_tokens.TOKEN_QUOTE]
 265 |         return lexer_helper.match_stack(
 266 |             self.token_stack, case1
 267 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case2)
 268 | 
 269 |     def __stream_stopped_in_an_object_null_value_placeholder_start(self) -> bool:
 270 |         """
 271 |         check if JSON stream stopped in an object properity's value start by array,
 272 |         like `{"field":{`
 273 |         """
 274 |         # `n`, `u`, `l`, `l`, `}` in mirror stack
 275 |         case1 = [
 276 |             lexer_tokens.TOKEN_RIGHT_BRACE,
 277 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 278 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 279 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 280 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 281 |         ]
 282 |         return lexer_helper.match_stack(self.mirror_token_stack, case1)
 283 | 
 284 |     def __stream_stopped_in_a_string(self) -> bool:
 285 |         """
 286 |         check if JSON stream stopped in a string, like `""`
 287 |         """
 288 |         return (
 289 |             self.__get_top_token_on_stack() == lexer_tokens.TOKEN_QUOTE
 290 |             and self.__get_top_token_on_mirror_stack() == lexer_tokens.TOKEN_QUOTE
 291 |         )
 292 | 
 293 |     def __stream_stopped_in_an_string_unicode_escape(self) -> bool:
 294 |         """
 295 |         check if JSON stream stopped in a string's unicode escape, like `\u0001`
 296 |         """
 297 |         # `\`, `u` in stack
 298 |         case1 = [
 299 |             lexer_tokens.TOKEN_ESCAPE_CHARACTER,
 300 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 301 |         ]
 302 |         # `"` in mirror stack
 303 |         case2 = [lexer_tokens.TOKEN_QUOTE]
 304 |         return lexer_helper.match_stack(
 305 |             self.token_stack, case1
 306 |         ) and lexer_helper.match_stack(self.mirror_token_stack, case2)
 307 | 
 308 |     def __stream_stopped_in_a_number(self) -> bool:
 309 |         """
 310 |         check if JSON stream stopped in a number, like `[0-9]`
 311 |         """
 312 |         return self.__get_top_token_on_stack() == lexer_tokens.TOKEN_NUMBER
 313 | 
 314 |     def __stream_stopped_in_a_number_decimal_part(self) -> bool:
 315 |         """
 316 |         check if JSON stream stopped in a number first decimal place, like `.?`
 317 |         """
 318 |         # `.`, lexer_tokens.TOKEN_NUMBER in stack
 319 |         return self.__get_top_token_on_stack() == lexer_tokens.TOKEN_DOT
 320 | 
 321 |     def __stream_stopped_in_a_number_decimal_part_middle(self) -> bool:
 322 |         """
 323 |         check if JSON stream stopped in a number other decimal place (except first place),
 324 |         like `.[0-9]?`
 325 |         """
 326 |         case1 = [lexer_tokens.TOKEN_DOT, lexer_tokens.TOKEN_NUMBER]
 327 |         return lexer_helper.match_stack(self.token_stack, case1)
 328 | 
 329 |     def __stream_stopped_with_leading_escape_character(self) -> bool:
 330 |         """
 331 |         check if JSON stream stopped in escape character, like \
 332 |         """
 333 |         return self.__get_top_token_on_stack() == lexer_tokens.TOKEN_ESCAPE_CHARACTER
 334 | 
 335 |     def __match_token(self):
 336 |         """
 337 |         lexer match JSON token method, convert JSON segment to JSON tokens
 338 |         """
 339 |         # Segment end
 340 |         if len(self.json_segment) == 0:
 341 |             return lexer_tokens.TOKEN_EOF, 0
 342 | 
 343 |         token_symbol = self.json_segment[0]
 344 | 
 345 |         # Check if ignored token
 346 |         if lexer_helper.is_ignore_token(token_symbol):
 347 |             self.__skip_json_segment(1)
 348 |             return lexer_tokens.TOKEN_IGNORED, token_symbol
 349 |         # Match token
 350 |         token_mapping = {
 351 |             lexer_tokens.TOKEN_LEFT_BRACKET_SYMBOL: lexer_tokens.TOKEN_LEFT_BRACKET,
 352 |             lexer_tokens.TOKEN_RIGHT_BRACKET_SYMBOL: lexer_tokens.TOKEN_RIGHT_BRACKET,
 353 |             lexer_tokens.TOKEN_LEFT_BRACE_SYMBOL: lexer_tokens.TOKEN_LEFT_BRACE,
 354 |             lexer_tokens.TOKEN_RIGHT_BRACE_SYMBOL: lexer_tokens.TOKEN_RIGHT_BRACE,
 355 |             lexer_tokens.TOKEN_COLON_SYMBOL: lexer_tokens.TOKEN_COLON,
 356 |             lexer_tokens.TOKEN_DOT_SYMBOL: lexer_tokens.TOKEN_DOT,
 357 |             lexer_tokens.TOKEN_COMMA_SYMBOL: lexer_tokens.TOKEN_COMMA,
 358 |             lexer_tokens.TOKEN_QUOTE_SYMBOL: lexer_tokens.TOKEN_QUOTE,
 359 |             lexer_tokens.TOKEN_ESCAPE_CHARACTER_SYMBOL: lexer_tokens.TOKEN_ESCAPE_CHARACTER,
 360 |             lexer_tokens.TOKEN_SLASH_SYMBOL: lexer_tokens.TOKEN_SLASH,
 361 |             lexer_tokens.TOKEN_NEGATIVE_SYMBOL: lexer_tokens.TOKEN_NEGATIVE,
 362 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A,
 363 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_B_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_B,
 364 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_C_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_C,
 365 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_D_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_D,
 366 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 367 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F,
 368 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 369 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 370 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R,
 371 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S,
 372 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_T_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_T,
 373 |             lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U_SYMBOL: lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 374 |             lexer_tokens.TOKEN_ALPHABET_UPPERCASE_A_SYMBOL: lexer_tokens.TOKEN_ALPHABET_UPPERCASE_A,
 375 |             lexer_tokens.TOKEN_ALPHABET_UPPERCASE_B_SYMBOL: lexer_tokens.TOKEN_ALPHABET_UPPERCASE_B,
 376 |             lexer_tokens.TOKEN_ALPHABET_UPPERCASE_C_SYMBOL: lexer_tokens.TOKEN_ALPHABET_UPPERCASE_C,
 377 |             lexer_tokens.TOKEN_ALPHABET_UPPERCASE_D_SYMBOL: lexer_tokens.TOKEN_ALPHABET_UPPERCASE_D,
 378 |             lexer_tokens.TOKEN_ALPHABET_UPPERCASE_E_SYMBOL: lexer_tokens.TOKEN_ALPHABET_UPPERCASE_E,
 379 |             lexer_tokens.TOKEN_ALPHABET_UPPERCASE_F_SYMBOL: lexer_tokens.TOKEN_ALPHABET_UPPERCASE_F,
 380 |             lexer_tokens.TOKEN_NUMBER_0_SYMBOL: lexer_tokens.TOKEN_NUMBER_0,
 381 |             lexer_tokens.TOKEN_NUMBER_1_SYMBOL: lexer_tokens.TOKEN_NUMBER_1,
 382 |             lexer_tokens.TOKEN_NUMBER_2_SYMBOL: lexer_tokens.TOKEN_NUMBER_2,
 383 |             lexer_tokens.TOKEN_NUMBER_3_SYMBOL: lexer_tokens.TOKEN_NUMBER_3,
 384 |             lexer_tokens.TOKEN_NUMBER_4_SYMBOL: lexer_tokens.TOKEN_NUMBER_4,
 385 |             lexer_tokens.TOKEN_NUMBER_5_SYMBOL: lexer_tokens.TOKEN_NUMBER_5,
 386 |             lexer_tokens.TOKEN_NUMBER_6_SYMBOL: lexer_tokens.TOKEN_NUMBER_6,
 387 |             lexer_tokens.TOKEN_NUMBER_7_SYMBOL: lexer_tokens.TOKEN_NUMBER_7,
 388 |             lexer_tokens.TOKEN_NUMBER_8_SYMBOL: lexer_tokens.TOKEN_NUMBER_8,
 389 |             lexer_tokens.TOKEN_NUMBER_9_SYMBOL: lexer_tokens.TOKEN_NUMBER_9,
 390 |         }
 391 | 
 392 |         token_result = token_mapping.get(token_symbol, lexer_tokens.TOKEN_OTHERS)
 393 |         self.__skip_json_segment(1)
 394 |         return token_result, token_symbol
 395 | 
 396 |     def append_string(
 397 |         self,
 398 |         string: str,
 399 |     ):
 400 |         """
 401 |         append JSON string to current JSON stream content
 402 |         this method will traversal all token and generate mirror token for complete full JSON
 403 |         """
 404 | 
 405 |         self.json_segment = string
 406 |         while True:
 407 |             token, token_symbol = self.__match_token()
 408 | 
 409 |             if token == lexer_tokens.TOKEN_EOF:
 410 |                 # nothing to do with TOKEN_EOF
 411 |                 pass
 412 |             elif token == lexer_tokens.TOKEN_IGNORED:
 413 |                 if self.__stream_stopped_in_a_string():
 414 |                     self.json_content += token_symbol
 415 |                     continue
 416 |                 self.__push_byte_into_padding_content(token_symbol)
 417 |             elif token == lexer_tokens.TOKEN_OTHERS:
 418 |                 # check if json stream stopped with padding content
 419 |                 if self.__have_padding_content():
 420 |                     self.__append_padding_content_to_json_content()
 421 |                     self.__clean_padding_content()
 422 |                 # write current token symbol to JSON content
 423 |                 self.json_content += token_symbol
 424 |             elif token == lexer_tokens.TOKEN_LEFT_BRACKET:
 425 |                 # check if json stream stopped with padding content
 426 |                 if self.__have_padding_content():
 427 |                     self.__append_padding_content_to_json_content()
 428 |                     self.__clean_padding_content()
 429 |                 self.json_content += token_symbol
 430 |                 if self.__stream_stopped_in_a_string():
 431 |                     continue
 432 |                 self.__push_token_stack(token)
 433 |                 if self.__stream_stopped_in_an_object_array_value_start():
 434 |                     # pop `n`, `u`, `l`, `l` from mirror stack
 435 |                     self.__pop_mirror_token_stack()
 436 |                     self.__pop_mirror_token_stack()
 437 |                     self.__pop_mirror_token_stack()
 438 |                     self.__pop_mirror_token_stack()
 439 |                 # push `]` into mirror stack
 440 |                 self.__push_mirror_token_stack(lexer_tokens.TOKEN_RIGHT_BRACKET)
 441 |             elif token == lexer_tokens.TOKEN_RIGHT_BRACKET:
 442 |                 if self.__stream_stopped_in_a_string():
 443 |                     self.json_content += token_symbol
 444 |                     continue
 445 |                 # check if json stream stopped with padding content
 446 |                 if self.__have_padding_content():
 447 |                     self.__append_padding_content_to_json_content()
 448 |                     self.__clean_padding_content()
 449 |                 # write current token symbol to JSON content
 450 |                 self.json_content += token_symbol
 451 |                 # push `]` into stack
 452 |                 self.__push_token_stack(token)
 453 |                 # pop `]` from mirror stack
 454 |                 self.__pop_mirror_token_stack()
 455 |             elif token == lexer_tokens.TOKEN_LEFT_BRACE:
 456 |                 # check if json stream stopped with padding content
 457 |                 if self.__have_padding_content():
 458 |                     self.__append_padding_content_to_json_content()
 459 |                     self.__clean_padding_content()
 460 |                 # write current token symbol to JSON content
 461 |                 self.json_content += token_symbol
 462 |                 if self.__stream_stopped_in_a_string():
 463 |                     continue
 464 |                 self.__push_token_stack(token)
 465 |                 if self.__stream_stopped_in_an_object_object_value_start():
 466 |                     # pop `n`, `u`, `l`, `l` from mirror stack
 467 |                     self.__pop_mirror_token_stack()
 468 |                     self.__pop_mirror_token_stack()
 469 |                     self.__pop_mirror_token_stack()
 470 |                     self.__pop_mirror_token_stack()
 471 |                 # push `}` into mirror stack
 472 |                 self.__push_mirror_token_stack(lexer_tokens.TOKEN_RIGHT_BRACE)
 473 |             elif token == lexer_tokens.TOKEN_RIGHT_BRACE:
 474 |                 if self.__stream_stopped_in_a_string():
 475 |                     self.json_content += token_symbol
 476 |                     continue
 477 |                 # check if json stream stopped with padding content
 478 |                 if self.__have_padding_content():
 479 |                     self.__append_padding_content_to_json_content()
 480 |                     self.__clean_padding_content()
 481 |                 self.json_content += token_symbol
 482 |                 # push `}` into stack
 483 |                 self.__push_token_stack(token)
 484 |                 # pop `}` from mirror stack
 485 |                 self.__pop_mirror_token_stack()
 486 |             elif token == lexer_tokens.TOKEN_QUOTE:
 487 |                 # check if escape quote `\"`
 488 |                 if self.__stream_stopped_with_leading_escape_character():
 489 |                     # push padding escape character `\` into JSON content
 490 |                     self.__append_padding_content_to_json_content()
 491 |                     self.__clean_padding_content()
 492 |                     # write current token symbol to JSON content
 493 |                     self.json_content += token_symbol
 494 |                     # pop `\` from  stack
 495 |                     self.__pop_token_stack()
 496 |                     continue
 497 |                 # check if json stream stopped with padding content
 498 |                 if self.__have_padding_content():
 499 |                     self.__append_padding_content_to_json_content()
 500 |                     self.__clean_padding_content()
 501 | 
 502 |                 # write current token symbol to JSON content
 503 |                 self.json_content += token_symbol
 504 |                 self.__push_token_stack(token)
 505 |                 if self.__stream_stopped_in_an_array():
 506 |                     # push `"` into mirror stack
 507 |                     self.__push_mirror_token_stack(lexer_tokens.TOKEN_QUOTE)
 508 |                 elif self.__stream_stopped_in_an_array_string_value_end():
 509 |                     # pop `"` from mirror stack
 510 |                     self.__pop_mirror_token_stack()
 511 |                 elif self.__stream_stopped_in_an_object_key_start():
 512 |                     #  check if stopped in key of object's properity or value of object's properity
 513 |                     #  push `"`, `:`, `n`, `u`, `l`, `l` into mirror stack
 514 |                     self.__push_mirror_token_stack(
 515 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L
 516 |                     )
 517 |                     self.__push_mirror_token_stack(
 518 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L
 519 |                     )
 520 |                     self.__push_mirror_token_stack(
 521 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U
 522 |                     )
 523 |                     self.__push_mirror_token_stack(
 524 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N
 525 |                     )
 526 |                     self.__push_mirror_token_stack(lexer_tokens.TOKEN_COLON)
 527 |                     self.__push_mirror_token_stack(lexer_tokens.TOKEN_QUOTE)
 528 |                 elif self.__stream_stopped_in_an_object_key_end():
 529 |                     # check if stopped in key of object's properity or value of object's properity
 530 |                     # pop `"` from mirror stack
 531 |                     self.__pop_mirror_token_stack()
 532 |                 elif self.__stream_stopped_in_an_object_string_value_start():
 533 |                     # pop `n`, `u`, `l`, `l` from mirror stack
 534 |                     self.__pop_mirror_token_stack()
 535 |                     self.__pop_mirror_token_stack()
 536 |                     self.__pop_mirror_token_stack()
 537 |                     self.__pop_mirror_token_stack()
 538 |                     # push `"` into mirror stack
 539 |                     self.__push_mirror_token_stack(lexer_tokens.TOKEN_QUOTE)
 540 |                 elif self.__stream_stopped_in_an_object_value_end():
 541 |                     # pop `"` from mirror stack
 542 |                     self.__pop_mirror_token_stack()
 543 |                 else:
 544 |                     return "Invalid quote token in JSON stream"
 545 |             elif token == lexer_tokens.TOKEN_COLON:
 546 |                 if self.__stream_stopped_in_a_string():
 547 |                     self.json_content += token_symbol
 548 |                     continue
 549 |                 # check if json stream stopped with padding content
 550 |                 if self.__have_padding_content():
 551 |                     self.__append_padding_content_to_json_content()
 552 |                     self.__clean_padding_content()
 553 |                 # write current token symbol to JSON content
 554 |                 self.json_content += token_symbol
 555 |                 self.__push_token_stack(token)
 556 |                 # pop `:` from mirror stack
 557 |                 self.__pop_mirror_token_stack()
 558 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A:
 559 |                 # as hex in unicode
 560 |                 if self.__stream_stopped_in_an_string_unicode_escape():
 561 |                     self.__push_byte_into_padding_content(token_symbol)
 562 |                     # check if unicode escape is full length
 563 |                     if len(self.padding_content) == 6:
 564 |                         self.__append_padding_content_to_json_content()
 565 |                         self.__clean_padding_content()
 566 |                         # pop `\`, `u` from stack
 567 |                         self.__pop_token_stack()
 568 |                         self.__pop_token_stack()
 569 |                     continue
 570 |                 # write current token symbol to JSON content
 571 |                 self.json_content += token_symbol
 572 |                 # in a string, just skip token
 573 |                 if self.__stream_stopped_in_a_string():
 574 |                     continue
 575 | 
 576 |                 # check if `f` in token stack and `a`, `l`, `s`, `e in mirror stack
 577 |                 def it_is_part_of_token_false():
 578 |                     left = [lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F]
 579 |                     right = [
 580 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 581 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S,
 582 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 583 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A,
 584 |                     ]
 585 |                     return lexer_helper.match_stack(
 586 |                         self.token_stack, left
 587 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 588 | 
 589 |                 if not it_is_part_of_token_false():
 590 |                     continue
 591 | 
 592 |                 self.__push_token_stack(token)
 593 |                 self.__pop_mirror_token_stack()
 594 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_B:
 595 |                 # as hex in unicode
 596 |                 if self.__stream_stopped_in_an_string_unicode_escape():
 597 |                     self.__push_byte_into_padding_content(token_symbol)
 598 |                     # check if unicode escape is full length
 599 |                     if len(self.padding_content) == 6:
 600 |                         self.__append_padding_content_to_json_content()
 601 |                         self.__clean_padding_content()
 602 |                         # pop `\`, `u` from stack
 603 |                         self.__pop_token_stack()
 604 |                         self.__pop_token_stack()
 605 |                     continue
 606 | 
 607 |                 # \b escape `\`, `b`
 608 |                 if self.__stream_stopped_with_leading_escape_character():
 609 |                     # push padding escape character `\` into JSON content
 610 |                     self.__append_padding_content_to_json_content()
 611 |                     self.__clean_padding_content()
 612 |                     # write current token symbol to JSON content
 613 |                     self.json_content += token_symbol
 614 |                     # pop `\` from  stack
 615 |                     self.__pop_token_stack()
 616 |                     continue
 617 | 
 618 |                 # write current token symbol to JSON content
 619 |                 self.json_content += token_symbol
 620 | 
 621 |                 # in a string, just skip token
 622 |                 if self.__stream_stopped_in_a_string():
 623 |                     continue
 624 | 
 625 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E:
 626 |                 # as hex in unicode
 627 |                 if self.__stream_stopped_in_an_string_unicode_escape():
 628 |                     self.__push_byte_into_padding_content(token_symbol)
 629 |                     # check if unicode escape is full length
 630 |                     if len(self.padding_content) == 6:
 631 |                         self.__append_padding_content_to_json_content()
 632 |                         self.__clean_padding_content()
 633 |                         # pop `\`, `u` from stack
 634 |                         self.__pop_token_stack()
 635 |                         self.__pop_token_stack()
 636 |                     continue
 637 | 
 638 |                 # check if in a number, as `e` (exponent) in scientific notation
 639 |                 if self.__stream_stopped_in_a_number_decimal_part_middle():
 640 |                     self.__push_byte_into_padding_content(token_symbol)
 641 |                     continue
 642 | 
 643 |                 # write current token symbol to JSON content
 644 |                 self.json_content += token_symbol
 645 | 
 646 |                 # in a string, just skip token
 647 |                 if self.__stream_stopped_in_a_string():
 648 |                     continue
 649 | 
 650 |                 # check if `f`, `a`, `l`, `s` in token stack and `e` in mirror stack
 651 |                 def it_is_part_of_token_false():
 652 |                     left = [
 653 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F,
 654 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A,
 655 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 656 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S,
 657 |                     ]
 658 |                     right = [
 659 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 660 |                     ]
 661 |                     return lexer_helper.match_stack(
 662 |                         self.token_stack, left
 663 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 664 | 
 665 |                 # check if `t`, `r`, `u` in token stack and `e` in mirror stack
 666 |                 def it_is_part_of_token_true():
 667 |                     left = [
 668 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_T,
 669 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R,
 670 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 671 |                     ]
 672 |                     right = [
 673 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 674 |                     ]
 675 |                     return lexer_helper.match_stack(
 676 |                         self.token_stack, left
 677 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 678 | 
 679 |                 if not it_is_part_of_token_false() and not it_is_part_of_token_true():
 680 |                     continue
 681 | 
 682 |                 self.__push_token_stack(token)
 683 |                 self.__pop_mirror_token_stack()
 684 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F:
 685 |                 # as hex in unicode
 686 |                 if self.__stream_stopped_in_an_string_unicode_escape():
 687 |                     self.__push_byte_into_padding_content(token_symbol)
 688 |                     # check if unicode escape is full length
 689 |                     if len(self.padding_content) == 6:
 690 |                         self.__append_padding_content_to_json_content()
 691 |                         self.__clean_padding_content()
 692 |                         # pop `\`, `u` from stack
 693 |                         self.__pop_token_stack()
 694 |                         self.__pop_token_stack()
 695 |                     continue
 696 | 
 697 |                 # \f escape `\`, `f`
 698 |                 if self.__stream_stopped_with_leading_escape_character():
 699 |                     # push padding escape character `\` into JSON content
 700 |                     self.__append_padding_content_to_json_content()
 701 |                     self.__clean_padding_content()
 702 |                     # write current token symbol to JSON content
 703 |                     self.json_content.append(token_symbol)
 704 |                     # pop `\` from  stack
 705 |                     self.__pop_token_stack()
 706 |                     continue
 707 | 
 708 |                 # check if json stream stopped with padding content, like case `[true , f`
 709 |                 if self.__have_padding_content():
 710 |                     self.__append_padding_content_to_json_content()
 711 |                     self.__clean_padding_content()
 712 | 
 713 |                 # write current token symbol to JSON content
 714 |                 self.json_content.append(token_symbol)
 715 | 
 716 |                 # in a string, just skip token
 717 |                 if self.__stream_stopped_in_a_string():
 718 |                     continue
 719 | 
 720 |                 # push `f` into stack
 721 |                 self.__push_token_stack(token)
 722 |                 if self.__stream_stopped_in_an_array():
 723 |                     #  in array
 724 |                     #  push `a`, `l`, `s`, `e`
 725 |                     self.__push_mirror_token_stack(
 726 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E
 727 |                     )
 728 |                     self.__push_mirror_token_stack(
 729 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S
 730 |                     )
 731 |                     self.__push_mirror_token_stack(
 732 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L
 733 |                     )
 734 |                     self.__push_mirror_token_stack(
 735 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A
 736 |                     )
 737 |                 else:
 738 |                     # in object
 739 |                     # pop `n`, `u`, `l`, `l`
 740 |                     self.__pop_mirror_token_stack()
 741 |                     self.__pop_mirror_token_stack()
 742 |                     self.__pop_mirror_token_stack()
 743 |                     self.__pop_mirror_token_stack()
 744 |                     # push `a`, `l`, `s`, `e`
 745 |                     self.__push_mirror_token_stack(
 746 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E
 747 |                     )
 748 |                     self.__push_mirror_token_stack(
 749 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S
 750 |                     )
 751 |                     self.__push_mirror_token_stack(
 752 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L
 753 |                     )
 754 |                     self.__push_mirror_token_stack(
 755 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A
 756 |                     )
 757 | 
 758 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L:
 759 |                 # write current token symbol to JSON content
 760 |                 self.json_content.append(token_symbol)
 761 |                 # in a string, just skip token
 762 |                 if self.__stream_stopped_in_a_string():
 763 |                     continue
 764 | 
 765 |                 # check if `f`, `a` in token stack and, `l`, `s`, `e` in mirror stack
 766 |                 def it_is_part_of_token_false():
 767 |                     left = [
 768 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F,
 769 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A,
 770 |                     ]
 771 |                     right = [
 772 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 773 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S,
 774 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 775 |                     ]
 776 |                     return lexer_helper.match_stack(
 777 |                         self.token_stack, left
 778 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 779 | 
 780 |                 # check if `n`, `u` in token stack and `l`, `l` in mirror stack
 781 |                 def it_is_part_of_token_null1():
 782 |                     left = [
 783 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 784 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 785 |                     ]
 786 |                     right = [
 787 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 788 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 789 |                     ]
 790 |                     return lexer_helper.match_stack(
 791 |                         self.token_stack, left
 792 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 793 | 
 794 |                 # check if `n`, `u`, `l` in token stack and `l` in mirror stack
 795 |                 def it_is_part_of_token_null2():
 796 |                     left = [
 797 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N,
 798 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 799 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 800 |                     ]
 801 |                     right = [
 802 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 803 |                     ]
 804 |                     return lexer_helper.match_stack(
 805 |                         self.token_stack, left
 806 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 807 | 
 808 |                 if (
 809 |                     not it_is_part_of_token_false()
 810 |                     and not it_is_part_of_token_null1()
 811 |                     and not it_is_part_of_token_null2()
 812 |                 ):
 813 |                     continue
 814 | 
 815 |                 self.__push_token_stack(token)
 816 |                 self.__pop_mirror_token_stack()
 817 | 
 818 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N:
 819 |                 # \n escape `\`, `n`
 820 |                 if self.__stream_stopped_with_leading_escape_character():
 821 |                     # push padding escape character `\` into JSON content
 822 |                     self.__append_padding_content_to_json_content()
 823 |                     self.__clean_padding_content()
 824 |                     # write current token symbol to JSON content
 825 |                     self.json_content.append(token_symbol)
 826 |                     # pop `\` from  stack
 827 |                     self.__pop_token_stack()
 828 |                     continue
 829 | 
 830 |                 # check if json stream stopped with padding content, like case `[true , n`
 831 |                 if self.__have_padding_content():
 832 |                     self.__append_padding_content_to_json_content()
 833 |                     self.__clean_padding_content()
 834 | 
 835 |                 # write current token symbol to JSON content
 836 |                 self.json_content.append(token_symbol)
 837 | 
 838 |                 # in a string, just skip token
 839 |                 if self.__stream_stopped_in_a_string():
 840 |                     continue
 841 | 
 842 |                 # push `n`
 843 |                 self.__push_token_stack(token)
 844 |                 if self.__stream_stopped_in_an_array():
 845 |                     # in array, push `u`, `l`, `l`
 846 |                     self.__push_mirror_token_stack(
 847 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L
 848 |                     )
 849 |                     self.__push_mirror_token_stack(
 850 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L
 851 |                     )
 852 |                     self.__push_mirror_token_stack(
 853 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U
 854 |                     )
 855 |                 else:
 856 |                     # in object, pop `n`
 857 |                     self.__pop_mirror_token_stack()
 858 | 
 859 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R:
 860 |                 # \r escape `\`, `r`
 861 |                 if self.__stream_stopped_with_leading_escape_character():
 862 |                     # push padding escape character `\` into JSON content
 863 |                     self.__append_padding_content_to_json_content()
 864 |                     self.__clean_padding_content()
 865 |                     # write current token symbol to JSON content
 866 |                     self.json_content.append(token_symbol)
 867 |                     # pop `\` from  stack
 868 |                     self.__pop_token_stack()
 869 |                     continue
 870 | 
 871 |                 # write current token symbol to JSON content
 872 |                 self.json_content.append(token_symbol)
 873 | 
 874 |                 # in a string, just skip token
 875 |                 if self.__stream_stopped_in_a_string():
 876 |                     continue
 877 | 
 878 |                 # check if `t` in token stack and `r`, `u`, `e in mirror stack
 879 |                 def it_is_part_of_token_true():
 880 |                     left = [
 881 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_T,
 882 |                     ]
 883 |                     right = [
 884 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 885 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
 886 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R,
 887 |                     ]
 888 |                     return lexer_helper.match_stack(
 889 |                         self.token_stack, left
 890 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 891 | 
 892 |                 if not it_is_part_of_token_true():
 893 |                     continue
 894 | 
 895 |                 self.__push_token_stack(token)
 896 |                 self.__pop_mirror_token_stack()
 897 | 
 898 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S:
 899 |                 # write current token symbol to JSON content
 900 |                 self.json_content.append(token_symbol)
 901 | 
 902 |                 # in a string, just skip token
 903 |                 if self.__stream_stopped_in_a_string():
 904 |                     continue
 905 | 
 906 |                 # check if `f`, `a`, `l` in token stack and `s`, `e in mirror stack
 907 |                 def it_is_part_of_token_false():
 908 |                     left = [
 909 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_F,
 910 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_A,
 911 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
 912 |                     ]
 913 |                     right = [
 914 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
 915 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_S,
 916 |                     ]
 917 |                     return lexer_helper.match_stack(
 918 |                         self.token_stack, left
 919 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
 920 | 
 921 |                 if not it_is_part_of_token_false():
 922 |                     continue
 923 | 
 924 |                 self.__push_token_stack(token)
 925 |                 self.__pop_mirror_token_stack()
 926 | 
 927 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_T:
 928 |                 # \t escape `\`, `t`
 929 |                 if self.__stream_stopped_with_leading_escape_character():
 930 |                     # push padding escape character `\` into JSON content
 931 |                     self.__append_padding_content_to_json_content()
 932 |                     self.__clean_padding_content()
 933 |                     # write current token symbol to JSON content
 934 |                     self.json_content.append(token_symbol)
 935 |                     # pop `\` from  stack
 936 |                     self.__pop_token_stack()
 937 |                     continue
 938 | 
 939 |                 # check if json stream stopped with padding content, like case `[true , t`
 940 |                 if self.__have_padding_content():
 941 |                     self.__append_padding_content_to_json_content()
 942 |                     self.__clean_padding_content()
 943 | 
 944 |                 # write current token symbol to JSON content
 945 |                 self.json_content.append(token_symbol)
 946 | 
 947 |                 # in a string, just skip token
 948 |                 if self.__stream_stopped_in_a_string():
 949 |                     continue
 950 | 
 951 |                 # push `t` to stack
 952 |                 self.__push_token_stack(token)
 953 |                 if self.__stream_stopped_in_an_array():
 954 |                     # in array
 955 |                     # push `r`, `u`, `e`
 956 |                     self.__push_mirror_token_stack(
 957 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E
 958 |                     )
 959 |                     self.__push_mirror_token_stack(
 960 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U
 961 |                     )
 962 |                     self.__push_mirror_token_stack(
 963 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R
 964 |                     )
 965 |                 else:
 966 |                     # in object
 967 |                     # pop `n`, `u`, `l`, `l`
 968 |                     self.__pop_mirror_token_stack()
 969 |                     self.__pop_mirror_token_stack()
 970 |                     self.__pop_mirror_token_stack()
 971 |                     self.__pop_mirror_token_stack()
 972 |                     # push `r`, `u`, `e`
 973 |                     self.__push_mirror_token_stack(
 974 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E
 975 |                     )
 976 |                     self.__push_mirror_token_stack(
 977 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U
 978 |                     )
 979 |                     self.__push_mirror_token_stack(
 980 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R
 981 |                     )
 982 | 
 983 |             elif token == lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U:
 984 |                 # unicode escape `\`, `u`
 985 |                 if self.__stream_stopped_with_leading_escape_character():
 986 |                     self.__push_token_stack(token)
 987 |                     self.padding_content.append(token_symbol)
 988 |                     continue
 989 | 
 990 |                 # write current token symbol to JSON content
 991 |                 self.json_content.append(token_symbol)
 992 | 
 993 |                 # in a string, just skip token
 994 |                 if self.__stream_stopped_in_a_string():
 995 |                     continue
 996 | 
 997 |                 # check if `t`, `r` in token stack and, `u`, `e` in mirror stack
 998 |                 def it_is_part_of_token_true():
 999 |                     left = [
1000 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_T,
1001 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_R,
1002 |                     ]
1003 |                     right = [
1004 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_E,
1005 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
1006 |                     ]
1007 |                     return lexer_helper.match_stack(
1008 |                         self.token_stack, left
1009 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
1010 | 
1011 |                 # check if `n` in token stack and `u`, `l`, `l` in mirror stack
1012 |                 def it_is_part_of_token_null():
1013 |                     left = [lexer_tokens.TOKEN_ALPHABET_LOWERCASE_N]
1014 |                     right = [
1015 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
1016 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_L,
1017 |                         lexer_tokens.TOKEN_ALPHABET_LOWERCASE_U,
1018 |                     ]
1019 |                     return lexer_helper.match_stack(
1020 |                         self.token_stack, left
1021 |                     ) and lexer_helper.match_stack(self.mirror_token_stack, right)
1022 | 
1023 |                 if not it_is_part_of_token_true() and not it_is_part_of_token_null():
1024 |                     continue
1025 | 
1026 |                 self.__push_token_stack(token)
1027 |                 self.__pop_mirror_token_stack()
1028 | 
1029 |             elif token in [
1030 |                 lexer_tokens.TOKEN_ALPHABET_UPPERCASE_A,
1031 |                 lexer_tokens.TOKEN_ALPHABET_UPPERCASE_B,
1032 |                 lexer_tokens.TOKEN_ALPHABET_UPPERCASE_C,
1033 |                 lexer_tokens.TOKEN_ALPHABET_UPPERCASE_D,
1034 |                 lexer_tokens.TOKEN_ALPHABET_LOWERCASE_C,
1035 |                 lexer_tokens.TOKEN_ALPHABET_LOWERCASE_D,
1036 |                 lexer_tokens.TOKEN_ALPHABET_UPPERCASE_F,
1037 |             ]:
1038 |                 # as hex in unicode
1039 |                 if self.__stream_stopped_in_an_string_unicode_escape():
1040 |                     self.__push_byte_into_padding_content(token_symbol)
1041 |                     # check if unicode escape is full length
1042 |                     if len(self.padding_content) == 6:
1043 |                         self.__append_padding_content_to_json_content()
1044 |                         self.__clean_padding_content()
1045 |                         # pop `\`, `u` from stack
1046 |                         self.__pop_token_stack()
1047 |                         self.__pop_token_stack()
1048 |                     continue
1049 | 
1050 |                 # write current token symbol to JSON content
1051 |                 self.json_content.append(token_symbol)
1052 | 
1053 |                 # in a string, just skip token
1054 |                 if self.__stream_stopped_in_a_string():
1055 |                     continue
1056 | 
1057 |             elif token == lexer_tokens.TOKEN_ALPHABET_UPPERCASE_E:
1058 |                 # as hex in unicode
1059 |                 if self.__stream_stopped_in_an_string_unicode_escape():
1060 |                     self.__push_byte_into_padding_content(token_symbol)
1061 |                     # check if unicode escape is full length
1062 |                     if len(self.padding_content) == 6:
1063 |                         self.__append_padding_content_to_json_content()
1064 |                         self.__clean_padding_content()
1065 |                         # pop `\`, `u` from stack
1066 |                         self.__pop_token_stack()
1067 |                         self.__pop_token_stack()
1068 |                     continue
1069 | 
1070 |                 # check if in a number, as `E` (exponent) in scientific notation
1071 |                 if self.__stream_stopped_in_a_number_decimal_part_middle():
1072 |                     self.__push_byte_into_padding_content(token_symbol)
1073 |                     continue
1074 | 
1075 |                 # write current token symbol to JSON content
1076 |                 self.json_content.append(token_symbol)
1077 | 
1078 |                 # in a string, just skip token
1079 |                 if self.__stream_stopped_in_a_string():
1080 |                     continue
1081 |             elif token in [
1082 |                 lexer_tokens.TOKEN_NUMBER_0,
1083 |                 lexer_tokens.TOKEN_NUMBER_1,
1084 |                 lexer_tokens.TOKEN_NUMBER_2,
1085 |                 lexer_tokens.TOKEN_NUMBER_3,
1086 |                 lexer_tokens.TOKEN_NUMBER_4,
1087 |                 lexer_tokens.TOKEN_NUMBER_5,
1088 |                 lexer_tokens.TOKEN_NUMBER_6,
1089 |                 lexer_tokens.TOKEN_NUMBER_7,
1090 |                 lexer_tokens.TOKEN_NUMBER_8,
1091 |                 lexer_tokens.TOKEN_NUMBER_9,
1092 |             ]:
1093 |                 # as number in unicode
1094 |                 if self.__stream_stopped_in_an_string_unicode_escape():
1095 |                     self.__push_byte_into_padding_content(token_symbol)
1096 |                     # check if unicode escape is full length
1097 |                     if len(self.padding_content) == 6:
1098 |                         self.__append_padding_content_to_json_content()
1099 |                         self.__clean_padding_content()
1100 |                         #  pop `\`, `u` from stack
1101 |                         self.__pop_token_stack()
1102 |                         self.__pop_token_stack()
1103 |                     continue
1104 | 
1105 |                 # check if json stream stopped with padding content, like `[1 , 1`
1106 |                 if self.__have_padding_content():
1107 |                     self.__append_padding_content_to_json_content()
1108 |                     self.__clean_padding_content()
1109 | 
1110 |                 # in negative part of a number
1111 |                 if self.__stream_stopped_in_a_negative_number_value_start():
1112 |                     self.__push_negative_into_json_content()
1113 |                     # pop `0` from mirror stack
1114 |                     self.__pop_mirror_token_stack()
1115 | 
1116 |                 # write current token symbol to JSON content
1117 |                 self.json_content.append(token_symbol)
1118 | 
1119 |                 # in a string or a number, just skip token
1120 |                 if (
1121 |                     self.__stream_stopped_in_a_string()
1122 |                     or self.__stream_stopped_in_a_number()
1123 |                 ):
1124 |                     continue
1125 | 
1126 |                 # in decimal part of a number
1127 |                 if self.__stream_stopped_in_a_number_decimal_part():
1128 |                     self.__push_token_stack(lexer_tokens.TOKEN_NUMBER)
1129 |                     # pop placeholder `0` in decimal part
1130 |                     self.__pop_mirror_token_stack()
1131 |                     continue
1132 | 
1133 |                 # first number type token, push token into stack
1134 |                 self.__push_token_stack(lexer_tokens.TOKEN_NUMBER)
1135 | 
1136 |                 # check if we are in an object or an array
1137 |                 if self.__stream_stopped_in_an_array():
1138 |                     continue
1139 |                 elif self.__stream_stopped_in_an_object_null_value_placeholder_start():
1140 |                     # pop `n`, `u`, `l`, `l`
1141 |                     self.__pop_mirror_token_stack()
1142 |                     self.__pop_mirror_token_stack()
1143 |                     self.__pop_mirror_token_stack()
1144 |                     self.__pop_mirror_token_stack()
1145 | 
1146 |             elif token == lexer_tokens.TOKEN_COMMA:
1147 |                 # in a string, just skip token
1148 |                 if self.__stream_stopped_in_a_string():
1149 |                     self.json_content.append(token_symbol)
1150 |                     continue
1151 |                 # in a object or a array, keep the comma in stack but not write it into JSONContent, until next token arrival
1152 |                 # the comma must following with token: quote, null, true, false, number
1153 |                 self.__push_byte_into_padding_content(token_symbol)
1154 |                 self.__push_token_stack(token)
1155 |             elif token == lexer_tokens.TOKEN_DOT:
1156 |                 # write current token symbol to JSON content
1157 |                 self.json_content.append(token_symbol)
1158 | 
1159 |                 # in a string, just skip token
1160 |                 if self.__stream_stopped_in_a_string():
1161 |                     continue
1162 | 
1163 |                 # use 0 for decimal part place holder
1164 |                 self.__push_token_stack(token)
1165 |                 self.__push_mirror_token_stack(lexer_tokens.TOKEN_NUMBER_0)
1166 | 
1167 |             elif token == lexer_tokens.TOKEN_SLASH:
1168 |                 #  escape character `\`, `/`
1169 |                 if self.__stream_stopped_with_leading_escape_character():
1170 |                     # push padding escape character `\` into JSON content
1171 |                     self.__append_padding_content_to_json_content()
1172 |                     self.__clean_padding_content()
1173 |                     # write current token symbol to JSON content
1174 |                     self.json_content.append(token_symbol)
1175 |                     # pop `\` from  stack
1176 |                     self.__pop_token_stack()
1177 |                     continue
1178 |                 elif self.__stream_stopped_in_a_string():
1179 |                     self.json_content.append(token_symbol)
1180 |                     continue
1181 | 
1182 |             elif token == lexer_tokens.TOKEN_ESCAPE_CHARACTER:
1183 |                 # double escape character `\`, `\`
1184 |                 if self.__stream_stopped_with_leading_escape_character():
1185 |                     # push padding escape character `\` into JSON content
1186 |                     self.__append_padding_content_to_json_content()
1187 |                     self.__clean_padding_content()
1188 |                     # write current token symbol to JSON content
1189 |                     self.json_content.append(token_symbol)
1190 |                     # pop `\` from  stack
1191 |                     self.__pop_token_stack()
1192 |                     continue
1193 | 
1194 |                 # just write escape character into stack and waitting other token trigger escape method.
1195 |                 self.__push_token_stack(token)
1196 |                 self.__push_byte_into_padding_content(
1197 |                     lexer_tokens.TOKEN_ESCAPE_CHARACTER_SYMBOL
1198 |                 )
1199 |             elif token == lexer_tokens.TOKEN_NEGATIVE:
1200 |                 # in a string, just skip token
1201 |                 if self.__stream_stopped_in_a_string():
1202 |                     self.json_content.append(token_symbol)
1203 |                     continue
1204 | 
1205 |                 # check if json stream stopped with padding content, like `[1 , -`
1206 |                 if self.__have_padding_content():
1207 |                     self.__append_padding_content_to_json_content()
1208 |                     self.__clean_padding_content()
1209 | 
1210 |                 # just write negative character into stack and waitting other token trigger it.
1211 |                 self.__push_token_stack(token)
1212 |                 if self.__stream_stopped_in_an_object_negative_number_value_start():
1213 |                     # pop `n`, `u`, `l`, `l` from mirror stack
1214 |                     self.__pop_mirror_token_stack()
1215 |                     self.__pop_mirror_token_stack()
1216 |                     self.__pop_mirror_token_stack()
1217 |                     self.__pop_mirror_token_stack()
1218 | 
1219 |                 # push `0` into mirror stack for placeholder
1220 |                 self.__push_mirror_token_stack(lexer_tokens.TOKEN_NUMBER_0)
1221 | 
1222 |             else:
1223 |                 return f"Unexpected token: {token}, token symbol: {token_symbol}"
1224 | 
1225 |             if token == lexer_tokens.TOKEN_EOF:
1226 |                 break
1227 | 
1228 |         return None
1229 | 
1230 |     def complete_json(self) -> str:
1231 |         """
1232 |         complete the incomplete JSON string by concat JSON content and mirror tokens
1233 |         """
1234 |         # This combines json_content and mirror token stack into a complete JSON string
1235 |         return "".join(self.json_content) + self.__dump_mirror_token_stack_to_string()
1236 | 


--------------------------------------------------------------------------------