├── tests ├── __init__.py ├── test_js.py ├── test_stop.py ├── test_cases.py └── test_miss.py ├── half_json ├── __init__.py ├── main.py ├── json_util.py └── core.py ├── checks ├── oneline.json ├── debug.py ├── runratio.sh ├── runtest.sh ├── runshow.sh ├── broken.py ├── autogen.py └── check.py ├── .flake8 ├── pdm.lock ├── .travis.yml ├── pyproject.toml ├── LICENSE ├── .github └── workflows │ └── python-package.yml ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /half_json/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /checks/oneline.json: -------------------------------------------------------------------------------- 1 | {a:1} -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501 3 | -------------------------------------------------------------------------------- /checks/debug.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | from half_json.main import fixjson 3 | 4 | 5 | if __name__ == '__main__': 6 | import pdb 7 | pdb.set_trace() 8 | fixjson() 9 | -------------------------------------------------------------------------------- /checks/runratio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mode=$1 3 | if [ ! $mode ]; then 4 | mode=fix 5 | fi 6 | seq 1 20|xargs -P 4 -I {} ./runtest.sh {}|grep ratio:|grep $mode|awk -v mode="$mode" '{t += $3}END{printf("%s: %f \n", mode, t/NR)}' 7 | rm random.* 8 | -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- 1 | # This file is @generated by PDM. 2 | # It is not intended for manual editing. 3 | 4 | [metadata] 5 | groups = ["default"] 6 | cross_platform = true 7 | static_urls = false 8 | lock_version = "4.3" 9 | content_hash = "sha256:cb30ff0b06924f6f0d5f726b84c255686a2e277a4180b00b7b6e427c05ca202b" 10 | -------------------------------------------------------------------------------- /checks/runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | id=$1 3 | if [ ! $id ]; then 4 | id=1 5 | fi 6 | base_name=random.$id 7 | python3 autogen.py > $base_name.json 8 | python3 broken.py $base_name.json $base_name.broken.json 9 | cat $base_name.broken.json|sort|uniq > $base_name.broken.uniq.json 10 | python3 check.py $base_name.broken.uniq.json $base_name.broken.uniq.fix.json 11 | -------------------------------------------------------------------------------- /checks/runshow.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | id=$1 3 | if [ ! $id ]; then 4 | id=1 5 | fi 6 | base_name=random.$id 7 | cat $base_name.broken.uniq.fix.json|jq -r 'select((.fixed == true) and (.hited == false))|("orgin: "+."origin", "broken:"+."broken","fix: "+."fix")' 8 | # cat $base_name.broken.uniq.fix.json|jq -r 'select((.fixed == false))|("orgin: "+."origin", "broken:"+."broken","fix: "+."fix")' 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | python: 4 | - "2.7" 5 | 6 | install: 7 | - pip install . 8 | - pip install flake8 9 | - pip install coverage 10 | - pip install coveralls 11 | 12 | script: 13 | - flake8 half_json tests --max-line-length=200 14 | - nosetests --cover-package=half_json --with-coverage 15 | - python setup.py install 16 | - jsonfixer checks/oneline.json checks/oneline.fix.json 17 | - cd checks && ./runtest.sh 18 | 19 | after_success: 20 | - coveralls 21 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | 2 | [project] 3 | name = "jsonfixer" 4 | version = "0.2.2" 5 | description = "jsonfixer: fix invalid json: broken-json / truncated-json." 6 | authors = [ 7 | {name = "alingse", email = "alingse@foxmail.com"}, 8 | ] 9 | dependencies = [] 10 | requires-python = ">=3.8" 11 | readme = "README.md" 12 | license = {text = "MIT"} 13 | classifiers = [ 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: OS Independent", 16 | "Programming Language :: Python :: 3", 17 | ] 18 | 19 | [project.urls] 20 | Homepage = "https://github.com/half-pie/half-json" 21 | 22 | [project.scripts] 23 | jsonfixer = "half_json.main:fixjson" 24 | 25 | [build-system] 26 | requires = ["pdm-backend"] 27 | build-backend = "pdm.backend" 28 | 29 | -------------------------------------------------------------------------------- /half_json/main.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import sys 4 | 5 | from half_json.core import JSONFixer 6 | 7 | 8 | def fixjson() -> None: 9 | infile = sys.argv[1] 10 | outfile = sys.argv[2] 11 | 12 | inf = open(infile, "r") 13 | outf = open(outfile, "w") 14 | 15 | total = 0 16 | hit = 0 17 | 18 | fixer = JSONFixer() 19 | for line in inf: 20 | try: 21 | line = line.strip() 22 | if not line: 23 | continue 24 | total += 1 25 | result = fixer.fix(line) 26 | if result.success: 27 | outf.write(result.line + "\n") 28 | if not result.origin: 29 | hit += 1 30 | else: 31 | print(result) 32 | except Exception as e: 33 | print(e, line) 34 | print(f"total is {total} and hit {hit} --> ratio:{hit * 1.0 / total} \n") 35 | inf.close() 36 | outf.close() 37 | -------------------------------------------------------------------------------- /tests/test_js.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import unittest 4 | 5 | from half_json.core import JSONFixer 6 | 7 | 8 | class TestJSCase(unittest.TestCase): 9 | 10 | def test_bare_key(self): 11 | line = '{a:1, b:{c:3}}' 12 | ok, newline, _ = JSONFixer(js_style=True).fix(line) 13 | self.assertTrue(ok) 14 | self.assertEqual('{"a":1, "b":{"c":3}}', newline) 15 | 16 | def test_litte_key(self): 17 | line = "{'a':1, 'b':{'c':[]}}" 18 | ok, newline, _ = JSONFixer(js_style=True).fix(line) 19 | self.assertTrue(ok) 20 | self.assertEqual('{"a":1, "b":{"c":[]}}', newline) 21 | 22 | for i in range(1, len(line)): 23 | ok, newline, _ = JSONFixer(js_style=True).fix(line[:i]) 24 | self.assertTrue(ok) 25 | 26 | def test_litte_key_half(self): 27 | line = "{'a':}" 28 | ok, newline, _ = JSONFixer(js_style=True).fix(line) 29 | self.assertTrue(ok) 30 | self.assertEqual('{"a":null}', newline) 31 | -------------------------------------------------------------------------------- /checks/broken.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import sys 3 | import json 4 | import random 5 | 6 | 7 | def borken(s): 8 | idx = random.randint(0, len(s) + 1) 9 | # TODO add count 10 | return s[:idx] + s[idx + 1:] 11 | 12 | 13 | def main(inflie, outfile): 14 | inf = open(inflie, 'r') 15 | outf = open(outfile, 'w') 16 | for line in inf: 17 | line = line.strip() 18 | if not line: 19 | continue 20 | for i in range(random.randint(3, 10)): 21 | new_line = borken(line) 22 | if not new_line: 23 | continue 24 | try: 25 | json.loads(new_line) 26 | except Exception: 27 | out = { 28 | 'origin': line, 29 | 'broken': new_line 30 | } 31 | outf.write(json.dumps(out)) 32 | outf.write('\n') 33 | 34 | inf.close() 35 | outf.close() 36 | 37 | 38 | if __name__ == '__main__': 39 | main(sys.argv[1], sys.argv[2]) 40 | -------------------------------------------------------------------------------- /checks/autogen.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import json 3 | import random 4 | 5 | 6 | def gen_random_str(): 7 | return random.choice(["abc", "def", "ijk"]) 8 | 9 | 10 | def gen_random_int(): 11 | return random.choice([0, 1, 0.0, -1, -0.2, 123123123123121312312312, 32323e3, 12 | float('nan'), float('inf'), float('-inf')]) 13 | 14 | 15 | def gen_bool_and_none(): 16 | return random.choice([True, False, None]) 17 | 18 | 19 | def get_random_object(): 20 | funcs = [dict, list, gen_random_str, gen_random_int, gen_bool_and_none] 21 | obj = random.choice(funcs)() 22 | if isinstance(obj, dict): 23 | for i in range(random.randint(0, 3)): 24 | obj[gen_random_str()] = get_random_object() 25 | if isinstance(obj, list): 26 | for i in range(random.randint(0, 3)): 27 | obj.append(get_random_object()) 28 | return obj 29 | 30 | 31 | def main(): 32 | for i in range(1000): 33 | obj = get_random_object() 34 | print(json.dumps(obj)) 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 half-pie 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /checks/check.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import sys 3 | import json 4 | 5 | from half_json.core import JSONFixer 6 | 7 | f = JSONFixer(100) 8 | 9 | 10 | def json_equal(line, origin): 11 | return json.loads(line) == json.loads(origin) 12 | 13 | 14 | def main(inflie, outfile): 15 | inf = open(inflie, 'r') 16 | outf = open(outfile, 'w') 17 | 18 | total = 0 19 | hit = 0 20 | fix = 0 21 | 22 | for line in inf: 23 | info = json.loads(line) 24 | result = f.fix(info['broken']) 25 | info['fixed'] = result.success 26 | info['fix'] = result.line 27 | info['hited'] = False 28 | if info['fixed']: 29 | info['hited'] = json_equal(result.line, info['origin']) 30 | 31 | outf.write(json.dumps(info)) 32 | outf.write('\n') 33 | 34 | if info['fixed']: 35 | fix += 1 36 | if info['hited']: 37 | hit += 1 38 | total += 1 39 | print('total: %d fix: %d hit: %d' % (total, fix, hit)) 40 | print('fix ratio: %f' % (fix * 1.0 / total)) 41 | print('hit ratio: %f' % (hit * 1.0 / total)) 42 | 43 | inf.close() 44 | outf.close() 45 | 46 | 47 | if __name__ == '__main__': 48 | main(sys.argv[1], sys.argv[2]) 49 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 pytest 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | python -m unittest 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Test JSON file 10 | random.*.json 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | .vscode/ 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | .pdm-python 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Install 2 | 3 | ```bash 4 | pip install jsonfixer 5 | ``` 6 | # Usage 7 | 8 | ```python 9 | In [1]: from half_json.core import JSONFixer 10 | 11 | In [2]: f = JSONFixer() 12 | 13 | In [3]: f.fix('[{') 14 | Out[3]: FixResult(success=True, line='[{}]', origin=False) 15 | 16 | In [4]: f.fix('{"a') 17 | Out[4]: FixResult(success=True, line='{"a":null}', origin=False) 18 | 19 | In [5]: f.fix('{"a":}') 20 | Out[5]: FixResult(success=True, line='{"a":null}', origin=False) 21 | ``` 22 | 23 | ## 目标 24 | 25 | fix invalid/broken/truncated json 26 | 27 | # 修复原理 28 | 29 | 1. JSONDecoderError 30 | 2. line context 31 | 32 | ## HitRatio 33 | 34 | 根据 checks 里面的工具来衡量修改效果 35 | 36 | ABC : autogen --> broken --> check 37 | TSR : run test.sh show.sh ratio.sh 38 | 39 | ### FixRatio 40 | 41 | 仅判断 result.success == True 42 | 43 | ```bash 44 | ./runratio.sh fix 45 | ``` 46 | ``` 47 | 1. 0.4269, 0.4287, 0.4303 # 实现完 12 条规则 48 | 2. 0.5037, 0.5084, 0.5077 # string 的 " 补充在末尾 49 | 3. 0.5259, 0.5224, 0.5187 # Array 需要 pos - 2 50 | 4. 0.5433, 0.5311, 0.5381 # Array 细化一下 [, 的情况 51 | 5. 0.7192, 0.7216, 0.7265 # 大改进, FIX 之前的 Bug( parser 被冲掉了) 52 | 6. 0.7732, 0.7686, 0.7701 # case: {"a":1 --> 补充 } 53 | 7. 0.60 , 0.58 # 去掉了空行 54 | 8. 0.6971, 0.6969, 0.6984 # 增加处理 StopIteration 55 | 9. 0.7428, 0.7383, 0.7427 # 增加处理 half parse 56 | 10. 0.7617, 0.7631, 0.7558 # 细化处理 half parse 57 | 11. 0.7608, 0.7612, 0.7650 # 添加从左处理 58 | 12. 0.9817, 0.9827, 0.9819 # 增加对字符串的处理 59 | 13. 0.8314, 0.8302, 0.8312 # 去掉对字符串的额外处理 60 | 14. 0.95X # 已不可参考 61 | ``` 62 | 63 | ### Real HitRatio 64 | 65 | 判断 result.success == True 66 | 67 | 并且解析后的 json 大体和原来一致(equal && dictdiffer) 68 | 69 | ```bash 70 | ./runratio.sh hit 71 | ``` 72 | ``` 73 | 1. 0.5610, 0.5563, 0.5529 # origin 74 | 2. 0.5593, 0.5532, 0.5587 # fix :} --> :null} 75 | ``` 76 | 77 | # TODO 78 | 79 | ## 目前的缺点 && 发现 80 | 81 | 3. 数字的支持比较弱 --> -02 / 0. / .0 82 | 4. 还不支持分支回溯 --> [{] 83 | 7. 突然想到, 应该反思一下, 这个是一个fixer, 而不是一个将任何字符串都转为 json 的东西 84 | 应该明确 JSONFixer 的能力范围, 对 runratio.sh 也应该比较前后两个的 json 相似程度。 85 | (听起来像无能者的辩白?) 86 | 8. 也需要吧 parser 也做成 stack 这样可以解决 ["a] --> ["a"] 这样的 case 87 | 88 | 1. 考虑分支回溯的方式来试探 89 | 2. 解析缺失的 JSON 常量 90 | 9. 91 | 92 | ## BadCase 93 | 1. 1- --> {"1-": null} 94 | -------------------------------------------------------------------------------- /tests/test_stop.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import unittest 4 | 5 | from half_json.core import JSONFixer 6 | 7 | 8 | class TestOtherCase(unittest.TestCase): 9 | 10 | def test_patch_left_object(self): 11 | line = '}' 12 | ok, newline, _ = JSONFixer().fix(line) 13 | self.assertTrue(ok) 14 | self.assertEqual('{}', newline) 15 | 16 | def test_patch_left_array(self): 17 | line = ']' 18 | ok, newline, _ = JSONFixer().fix(line) 19 | self.assertTrue(ok) 20 | self.assertEqual('[]', newline) 21 | 22 | def test_patch_half_array(self): 23 | line = '[]]' 24 | ok, newline, _ = JSONFixer().fix(line) 25 | self.assertTrue(ok) 26 | self.assertEqual('[[]]', newline) 27 | 28 | def test_patch_half_object(self): 29 | line = '{}}' 30 | ok, newline, _ = JSONFixer().fix(line) 31 | self.assertTrue(ok) 32 | self.assertEqual('{"":{}}', newline) 33 | 34 | def test_patch_half_object_array(self): 35 | line = '{}]' 36 | ok, newline, _ = JSONFixer().fix(line) 37 | self.assertTrue(ok) 38 | self.assertEqual('[{}]', newline) 39 | 40 | def test_patch_half_array_object(self): 41 | line = '[]}' 42 | ok, newline, _ = JSONFixer().fix(line) 43 | self.assertTrue(ok) 44 | self.assertEqual('{"":[]}', newline) 45 | 46 | def test_patch_half_array_with_coma(self): 47 | line = '1, [""], -1]' 48 | ok, newline, _ = JSONFixer().fix(line) 49 | self.assertTrue(ok) 50 | self.assertEqual('[1, [""], -1]', newline) 51 | 52 | def test_patch_half_array_with_coma_v2(self): 53 | line = '1, 2' 54 | ok, newline, _ = JSONFixer().fix(line) 55 | self.assertTrue(ok) 56 | self.assertEqual('[1, 2]', newline) 57 | 58 | def test_patch_half_object_with_colon(self): 59 | line = '"a":' 60 | ok, newline, _ = JSONFixer().fix(line) 61 | self.assertTrue(ok) 62 | self.assertEqual('{"a":null}', newline) 63 | 64 | def test_patch_many_half_object(self): 65 | line = '{}[]{}}]' 66 | ok, newline, _ = JSONFixer().fix(line) 67 | self.assertTrue(ok) 68 | self.assertEqual('[{"":{},"":[],"":{}}]', newline) 69 | 70 | def test_patch_string(self): 71 | line = 'E"' 72 | ok, newline, _ = JSONFixer().fix(line) 73 | self.assertTrue(ok) 74 | self.assertEqual('"E"', newline) 75 | -------------------------------------------------------------------------------- /tests/test_cases.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import unittest 4 | 5 | from half_json.core import JSONFixer 6 | 7 | 8 | class TestSimpleCase(unittest.TestCase): 9 | 10 | def test_half_object(self): 11 | line = '{' 12 | ok, newline, _ = JSONFixer().fix(line) 13 | self.assertTrue(ok) 14 | self.assertEqual('{}', newline) 15 | 16 | def test_half_array(self): 17 | line = '[' 18 | ok, newline, _ = JSONFixer().fix(line) 19 | self.assertTrue(ok) 20 | self.assertEqual('[]', newline) 21 | 22 | def test_half_string(self): 23 | line = '"a' 24 | ok, newline, _ = JSONFixer().fix(line) 25 | self.assertTrue(ok) 26 | self.assertEqual('"a"', newline) 27 | 28 | def test_object_miss_key(self): 29 | line = '{:1}' 30 | ok, newline, _ = JSONFixer().fix(line) 31 | self.assertTrue(ok) 32 | self.assertEqual('{"":1}', newline) 33 | 34 | def test_half_array_with_element(self): 35 | line = '[1' 36 | ok, newline, _ = JSONFixer().fix(line) 37 | self.assertTrue(ok) 38 | self.assertEqual('[1]', newline) 39 | 40 | def test_array_miss_element(self): 41 | line = '[,' 42 | ok, newline, _ = JSONFixer().fix(line) 43 | self.assertTrue(ok) 44 | self.assertEqual('[]', newline) 45 | 46 | def test_simple_mix(self): 47 | line = '[{' 48 | ok, newline, _ = JSONFixer().fix(line) 49 | self.assertTrue(ok) 50 | self.assertEqual('[{}]', newline) 51 | 52 | def test_simple_mix_A(self): 53 | line = '[{,' 54 | ok, newline, _ = JSONFixer().fix(line) 55 | self.assertTrue(ok) 56 | self.assertEqual('[{}]', newline) 57 | 58 | def test_miss_quote(self): 59 | line = '{"a' 60 | ok, newline, _ = JSONFixer().fix(line) 61 | self.assertTrue(ok) 62 | self.assertEqual('{"a":null}', newline) 63 | 64 | def test_miss_colon(self): 65 | line = '{"a":1,"b"' 66 | ok, newline, _ = JSONFixer().fix(line) 67 | self.assertTrue(ok) 68 | self.assertEqual('{"a":1,"b":null}', newline) 69 | 70 | def test_many_from_adhocore(self): 71 | line = '{"a":1,' 72 | ok, newline, _ = JSONFixer().fix(line) 73 | self.assertTrue(ok) 74 | self.assertEqual('{"a":1}', newline) 75 | 76 | def test_case_from_stackoverflow(self): 77 | line = '{"title": "Center "ADVANCE"", "text": "Business.English."}' 78 | ok, newline, _ = JSONFixer().fix(line) 79 | self.assertTrue(ok) 80 | self.assertEqual('{"title": "Center ","ADVANCE":", ","text": "Business.English."}', newline) 81 | 82 | def test_case_miss_key(self): 83 | line = '{[' 84 | ok, newline, _ = JSONFixer().fix(line) 85 | self.assertTrue(ok) 86 | self.assertEqual('{"":[]}', newline) 87 | 88 | def test_object_miss_value(self): 89 | line = '{"V":}' 90 | ok, newline, _ = JSONFixer().fix(line) 91 | self.assertTrue(ok) 92 | self.assertEqual('{"V":null}', newline) 93 | 94 | def test_array_miss_value(self): 95 | line = '[,]' 96 | ok, newline, _ = JSONFixer().fix(line) 97 | self.assertTrue(ok) 98 | self.assertEqual('[]', newline) 99 | 100 | def test_array_miss_value_2(self): 101 | line = '[null,]' 102 | ok, newline, _ = JSONFixer().fix(line) 103 | self.assertTrue(ok) 104 | self.assertEqual('[null]', newline) 105 | 106 | def test_unstrict_ok(self): 107 | line = '{"hello": "wor\nld"}' 108 | ok, newline, _ = JSONFixer().fix(line) 109 | self.assertFalse(ok) 110 | ok, newline, _ = JSONFixer().fix(line, strict=False) 111 | self.assertTrue(ok) 112 | self.assertEqual(line, newline) 113 | 114 | def test_unstrict_fix(self): 115 | line = '{"hello": "wor\nld"' 116 | ok, newline, _ = JSONFixer().fix(line) 117 | self.assertFalse(ok) 118 | ok, newline, _ = JSONFixer().fix(line, strict=False) 119 | self.assertTrue(ok) 120 | self.assertEqual('{"hello": "wor\nld"}', newline) 121 | -------------------------------------------------------------------------------- /half_json/json_util.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import json.decoder 4 | from json.decoder import JSONDecodeError as PyJSONDecodeError, JSONDecoder, py_scanstring 5 | from json.scanner import py_make_scanner 6 | from typing import Any, Dict, NamedTuple, Optional, Tuple, Union 7 | 8 | 9 | class JSONDecodeError: 10 | def __init__(self, parser, message): 11 | self.message = message 12 | self.parser = parser 13 | 14 | def __eq__(self, err): 15 | return err.parser == self.parser and self.message in err.message 16 | 17 | 18 | class errors: 19 | StringInvalidUXXXXEscape = JSONDecodeError("py_scanstring", "Invalid \\uXXXX escape") 20 | # 2 different case 21 | StringUnterminatedString = JSONDecodeError("py_scanstring", "Unterminated string starting at") 22 | StringInvalidControlCharacter = JSONDecodeError("py_scanstring", "Invalid control character") 23 | StringInvalidEscape = JSONDecodeError("py_scanstring", "Invalid \\escape") 24 | ObjectExceptColon = JSONDecodeError("JSONObject", "Expecting ':' delimiter") 25 | ObjectExceptObject = JSONDecodeError("JSONObject", "Expecting value") 26 | # 2 different case 27 | ObjectExceptKey = JSONDecodeError("JSONObject", "Expecting property name enclosed in double quotes") 28 | ObjectExceptComma = JSONDecodeError("JSONObject", "Expecting ',' delimiter") 29 | ArrayExceptObject = JSONDecodeError("JSONArray", "Expecting value") 30 | ArrayExceptComma = JSONDecodeError("JSONArray", "Expecting ',' delimiter") 31 | 32 | @classmethod 33 | def get_decode_error(cls, parser, message): 34 | err = JSONDecodeError(parser, message) 35 | for _, value in cls.__dict__.items(): 36 | if isinstance(value, JSONDecodeError): 37 | if err == value: 38 | return value 39 | return None 40 | 41 | """ 42 | 01 先不看,不研究 43 | 02 badcase: " --> "" success 44 | 03 控制符 pass 45 | 04 unicode \\u 的 pass 46 | 05 同上 47 | 06 object 后面没有跟随 " , badcase: {abc":1} --> {"abc":1} 48 | 07 object key 后面没有 : , badcase: {"abc"1} --> {"abc":1} 49 | 08 object 开始检测 Value 收到 StopIteration 50 | 08.1 要么后面没有了 51 | 08.2 要么后面不是 "/{/[/n[ull]/t[rue]/f[alse]/number/NaN/Infinity/-Infinity 开头的东西 52 | -- 08.1 后面补上 null} 53 | -- 08.2 无脑补一个 " 54 | 09 object 解析完一个 pair 后,下一个不是}, 期待一个 ',' 55 | badcase {"k":1"s":2} 56 | 10 在 09 的基础上解析完{"k":1, 发现下一个不是 ", 这个后面再优化(暂时和 06 一致) 57 | badcase {"k":1,x":2} 58 | 11 array 开始检测 Value 收到 StopIteration 59 | 11.1 要么后面没有了,补上] 60 | 11.2 同 08.2,无脑补一个{ 看看 61 | 12 array 解析完前一个 object, 需要一个 , 62 | 这里 nextchar 既不是 ] 也不是, 代表这个 nextchar 的 end 也已经+1 了,所以减 2 63 | """ 64 | 65 | 66 | def errmsg_inv(e: ValueError) -> Dict[str, Any]: 67 | assert isinstance(e, PyJSONDecodeError) 68 | parser = e.__dict__.get("parser", "") 69 | errmsg = e.msg 70 | localerr = errors.get_decode_error(parser, errmsg) 71 | return { 72 | "parsers": e.__dict__.get("parsers", []), 73 | "error": localerr, 74 | "lineno": e.lineno, 75 | "colno": e.colno, 76 | "pos": e.pos, 77 | } 78 | 79 | 80 | def record_parser_name(parser: Any) -> Any: 81 | def new_parser(*args: Any, **kwargs: Any) -> Any: 82 | try: 83 | return parser(*args, **kwargs) 84 | except Exception as e: 85 | if "parser" not in e.__dict__: 86 | e.__dict__["parser"] = parser.__name__ 87 | if "parsers" not in e.__dict__: 88 | e.__dict__["parsers"] = [] 89 | e.__dict__["parsers"].append(parser.__name__) 90 | raise e 91 | 92 | return new_parser 93 | 94 | 95 | def make_decoder(*, strict: bool = True) -> JSONDecoder: 96 | json.decoder.scanstring = record_parser_name(py_scanstring) 97 | 98 | decoder = JSONDecoder(strict=strict) 99 | decoder.parse_object = record_parser_name(decoder.parse_object) 100 | decoder.parse_array = record_parser_name(decoder.parse_array) 101 | decoder.parse_string = record_parser_name(py_scanstring) 102 | decoder.parse_object = record_parser_name(decoder.parse_object) 103 | 104 | decoder.scan_once = py_make_scanner(decoder) 105 | return decoder 106 | 107 | 108 | decoder = make_decoder() 109 | decoder_unstrict = make_decoder(strict=False) 110 | 111 | 112 | class DecodeResult(NamedTuple): 113 | success: bool 114 | exception: Optional[Exception] 115 | err_info: Optional[Union[Dict[str, Any], Tuple[Any, Any]]] 116 | 117 | 118 | def decode_line(line: str, *, strict: bool = True) -> DecodeResult: 119 | try: 120 | obj, end = (decoder if strict else decoder_unstrict).scan_once(line, 0) 121 | ok = end == len(line) 122 | return DecodeResult(success=ok, exception=None, err_info=(obj, end)) 123 | except StopIteration as e: 124 | return DecodeResult(success=False, exception=e, err_info=None) 125 | except ValueError as e: 126 | err_info = errmsg_inv(e) 127 | return DecodeResult(success=False, exception=e, err_info=err_info) 128 | -------------------------------------------------------------------------------- /half_json/core.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import json 3 | from typing import Any, List, NamedTuple, Optional, Tuple 4 | 5 | from half_json.json_util import decode_line, errors 6 | 7 | 8 | class FixResult(NamedTuple): 9 | success: bool 10 | line: str 11 | origin: bool 12 | 13 | 14 | class JSONFixer: 15 | def __init__(self, max_try: int = 20, max_stack: int = 3, *, js_style: bool = False) -> None: 16 | self._max_try = max_try 17 | self._max_stack = max_stack 18 | self._js_style = js_style 19 | self.last_fix: Optional[bool] = None 20 | self.fix_stack: List[str] = [] 21 | 22 | def fix(self, line: str, *, strict: bool = True) -> FixResult: 23 | try: 24 | json.loads(line, strict=strict) 25 | return FixResult(success=True, line=line, origin=True) 26 | except Exception: 27 | pass 28 | 29 | ok, new_line = self.fixwithtry(line, strict=strict) 30 | return FixResult(success=ok, line=new_line, origin=False) 31 | 32 | def fixwithtry(self, line: str, *, strict: bool = True) -> Tuple[bool, str]: 33 | if self._max_try <= 0: 34 | return False, line 35 | 36 | self.fix_stack = [] 37 | self.last_fix = None 38 | 39 | ok = False 40 | for _ in range(self._max_try): 41 | ok, new_line = self.patch_line(line, strict=strict) 42 | if ok: 43 | return ok, new_line 44 | 45 | self.last_fix = line != new_line 46 | if self.last_fix: 47 | self.fix_stack.insert(0, new_line) 48 | self.fix_stack = self.fix_stack[: self._max_stack] 49 | 50 | line = new_line 51 | return ok, line 52 | 53 | def patch_line(self, line: str, *, strict: bool = True) -> Tuple[bool, str]: 54 | result = decode_line(line, strict=strict) 55 | if result.success: 56 | return True, line 57 | 58 | if isinstance(result.exception, ValueError): 59 | return self.patch_value_error(line, result.err_info) 60 | 61 | if isinstance(result.exception, StopIteration): 62 | return self.patch_stop_iteration(line) 63 | 64 | if result.exception is None: 65 | return self.patch_half_parse(line, result.err_info) 66 | 67 | return False, line 68 | 69 | def patch_value_error(self, line: str, err_info: Any) -> Tuple[bool, str]: 70 | if err_info["error"] is None: 71 | return False, line 72 | 73 | error = err_info["error"] 74 | pos = err_info["pos"] 75 | nextchar = line[pos : pos + 1] 76 | lastchar = line[pos - 1 : pos] 77 | nextline = line[pos:] 78 | lastline = line[:pos] 79 | 80 | if error == errors.StringUnterminatedString: 81 | return False, insert_line(line, '"', len(line)) 82 | if error == errors.ObjectExceptKey: 83 | if nextchar == "": 84 | return False, insert_line(line, "}", pos) 85 | if nextchar == ":": 86 | return False, insert_line(line, '""', pos) 87 | if lastchar in "{," and nextchar == ",": 88 | return False, remove_line(line, pos, pos + 1) 89 | if lastchar == "," and nextchar == "}": 90 | return False, remove_line(line, pos - 1, pos) 91 | if nextchar in "[{": 92 | return False, insert_line(line, '"":', pos) 93 | if self._js_style: 94 | # find 'abc' 95 | if nextchar == "'": 96 | nextline = remove_line(nextline, 0, 1) 97 | idx = nextline.find(":") 98 | if idx != -1 and idx != 0 and nextline[idx - 1] == "'": 99 | nextline = remove_line(nextline, idx - 1, idx) 100 | 101 | return False, lastline + nextline 102 | # abc:1 --> "aabc":1 103 | idx = nextline.find(":") 104 | if idx != -1: 105 | line = lastline + insert_line(nextline, '"', idx) 106 | return False, insert_line(line, '"', pos) 107 | # TODO process more case " 108 | return False, insert_line(line, '"', pos) 109 | if error == errors.ObjectExceptColon: 110 | return False, insert_line(line, ":", pos) 111 | if error == errors.ObjectExceptObject: 112 | if nextchar == "": 113 | if lastchar == "{": 114 | return False, insert_line(line, "}", pos) 115 | return False, insert_line(line, "null}", pos) 116 | if nextchar == "}": 117 | return False, insert_line(line, "null", pos) 118 | # TODO guess more 119 | return False, insert_line(line, '"', pos) 120 | if error == errors.ObjectExceptComma: 121 | if nextchar == "": 122 | return False, insert_line(line, "}", pos) 123 | return False, insert_line(line, ",", pos) 124 | if error == errors.ArrayExceptObject: 125 | if nextchar == "," and lastchar == "[": 126 | return False, remove_line(line, pos, pos + 1) 127 | if nextchar == ",": 128 | return False, insert_line(line, "null", pos) 129 | if nextchar == "]": 130 | return False, remove_line(line, pos - 1, pos) 131 | if nextchar == "": 132 | if lastchar == "[": 133 | return False, insert_line(line, "]", pos) 134 | return False, insert_line(line, "null]", pos) 135 | # TODO guess more? 136 | return False, insert_line(line, "{", pos) 137 | if error == errors.ArrayExceptComma: 138 | if len(line) == pos: 139 | return False, insert_line(line, "]", pos) 140 | return False, insert_line(line, ",", pos) 141 | # TODO unknonwn 142 | return False, line 143 | 144 | def patch_stop_iteration(self, line: str) -> Tuple[bool, str]: 145 | # TODO clean 146 | # TODO fix 147 | # 1. }] 148 | # 2. ]} 149 | # 3. constans 150 | # 4. - 151 | # 先 patch 完 {[]} 152 | # TODO: process number 153 | if line.startswith("-."): 154 | new_line = "-0." + line[2:] 155 | return False, new_line 156 | # patch 157 | left = patch_lastest_left_object_and_array(line) 158 | if left == "": 159 | if not self.last_fix: 160 | left = patch_guess_left(line) 161 | 162 | new_line = left + line 163 | return False, new_line 164 | 165 | def patch_half_parse(self, line: str, err_info: Any) -> Tuple[bool, str]: 166 | obj, end = err_info 167 | nextline = line[end:].strip() 168 | nextchar = nextline[:1] 169 | left = patch_lastest_left_object_and_array(nextline) 170 | # ?? 171 | if left == "": 172 | if nextchar == ",": 173 | left = "[" 174 | elif nextchar == ":" and isinstance(obj, str): 175 | left = "{" 176 | else: 177 | if not self.last_fix: 178 | left = patch_guess_left(nextline) 179 | 180 | new_line = left + line[:end] + nextline 181 | return False, new_line 182 | 183 | 184 | # TODO better name 185 | def patch_lastest_left_object_and_array(line: str) -> str: 186 | # '}]{[' --> '[{}]{[' 187 | pairs = {"}": "{", "]": "["} 188 | breaks = "{[" 189 | left = "" 190 | for char in line: 191 | if char in breaks: 192 | break 193 | if char in pairs: 194 | left = pairs[char] + left 195 | 196 | return left 197 | 198 | 199 | # TODO better name 200 | # TODO 改成 lastest 201 | # TODO {}}]]]] --> { not [ 202 | def patch_guess_left(line: str) -> str: 203 | miss_object = line.count("}") - line.count("{") 204 | miss_array = line.count("]") - line.count("[") 205 | if miss_object == miss_array == 0: 206 | if line[-1:] == '"' and line.count('"') == 1: 207 | return '"' 208 | elif miss_object >= miss_array: 209 | return "{" 210 | else: 211 | return "[" 212 | return "" 213 | 214 | 215 | def insert_line(line: str, value: str, pos: int) -> str: 216 | return line[:pos] + value + line[pos:] 217 | 218 | 219 | def remove_line(line: str, start: int, end: int) -> str: 220 | return line[:start] + line[end:] 221 | -------------------------------------------------------------------------------- /tests/test_miss.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | 3 | import random 4 | import unittest 5 | 6 | from half_json.core import JSONFixer 7 | 8 | 9 | class TestMissTailAndHeadCase(unittest.TestCase): 10 | 11 | # by json-generator 12 | line = '[{"_id":"5cf12ecfb7af6c84da64571b","index":0,"guid":"c2aedc2a-7303-42e2-b5a8-d58afca2149f","isActive":false,"balance":"$1,322.22","picture":"http://placehold.it/32x32","age":24,"eyeColor":"blue","name":{"first":"Gardner","last":"Ford"},"company":"IMAGINART","email":"gardner.ford@imaginart.net","phone":"+1 (874) 563-3237","address":"779 Cortelyou Road, Manchester, North Carolina, 939","about":"Ut dolore commodo qui nisi aliquip. Ad occaecat duis ipsum laborum magna cillum non mollit est eu. Non consectetur consectetur amet sunt reprehenderit tempor ex ea pariatur deserunt magna mollit sint in. Qui cupidatat ad eiusmod laborum ad consectetur elit ut. Quis dolore irure irure mollit aliquip laborum consectetur. Culpa do et id in in eu minim exercitation labore. Anim ex laboris nulla occaecat.","registered":"Saturday, May 18, 2019 3:55 PM","latitude":"-55.587817","longitude":"89.374875","tags":["irure","culpa","sint","sint","aliqua"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Malinda Estes"},{"id":1,"name":"Mueller Ryan"},{"id":2,"name":"Vilma Woods"}],"greeting":"Hello, Gardner! You have 6 unread messages.","favoriteFruit":"banana"},{"_id":"5cf12ecf5f6594406acaf90f","index":1,"guid":"eb7581e2-9471-4435-a689-7615f1324489","isActive":false,"balance":"$3,243.81","picture":"http://placehold.it/32x32","age":28,"eyeColor":"green","name":{"first":"Neva","last":"Frederick"},"company":"ACCRUEX","email":"neva.frederick@accruex.org","phone":"+1 (935) 521-3229","address":"935 Colby Court, Vallonia, Wisconsin, 9524","about":"Eiusmod dolor fugiat proident ex officia Lorem cupidatat cupidatat ut sunt minim. Sit incididunt reprehenderit cupidatat aliqua minim ad. Pariatur deserunt ad ad culpa veniam irure sint dolor quis pariatur eu laboris officia. Ad consequat voluptate cupidatat anim nulla elit veniam ex ipsum mollit. Pariatur est est excepteur laboris incididunt aliquip excepteur elit velit. Sint eu aliqua nulla dolore incididunt dolore nisi quis adipisicing est enim tempor.","registered":"Saturday, April 13, 2019 11:36 AM","latitude":"-29.541751","longitude":"-93.408621","tags":["laborum","eu","non","do","ut"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Ofelia Harmon"},{"id":1,"name":"Phillips Flowers"},{"id":2,"name":"Conner Walker"}],"greeting":"Hello, Neva! You have 8 unread messages.","favoriteFruit":"strawberry"},{"_id":"5cf12ecf20d50e3aa3590acb","index":2,"guid":"f150d306-8ca8-4791-9e99-912a75bb12a1","isActive":false,"balance":"$1,897.79","picture":"http://placehold.it/32x32","age":35,"eyeColor":"green","name":{"first":"Angelia","last":"Daniels"},"company":"DIGIGEN","email":"angelia.daniels@digigen.info","phone":"+1 (924) 451-2569","address":"628 Clymer Street, Whitmer, Delaware, 2210","about":"Enim adipisicing irure nisi nisi cillum voluptate ea commodo deserunt. Labore commodo ea culpa do esse cupidatat commodo consequat. Aliquip aliqua ut enim duis commodo dolore sint incididunt nulla excepteur. Occaecat labore consectetur occaecat ipsum id dolore dolor. Incididunt sint veniam ea dolore officia mollit dolore ullamco excepteur. Esse et esse nostrud aliquip exercitation Lorem. Laboris pariatur duis consectetur tempor reprehenderit ullamco pariatur sit deserunt sint non mollit eu.","registered":"Monday, August 6, 2018 2:54 AM","latitude":"-44.812364","longitude":"-40.811892","tags":["amet","consectetur","consequat","ex","elit"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Kline Delaney"},{"id":1,"name":"Agnes Patterson"},{"id":2,"name":"Donna Weeks"}],"greeting":"Hello, Angelia! You have 6 unread messages.","favoriteFruit":"apple"},{"_id":"5cf12ecf4eaebd02791a0a5d","index":3,"guid":"14fd69a4-6e19-4f46-a95f-ab99f144d383","isActive":false,"balance":"$1,647.55","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":{"first":"Aurelia","last":"Bentley"},"company":"ZYTREK","email":"aurelia.bentley@zytrek.tv","phone":"+1 (859) 530-2393","address":"582 Narrows Avenue, Riceville, Texas, 3979","about":"Sunt do aliquip voluptate sint pariatur adipisicing et. Irure voluptate ad voluptate anim aute ipsum laboris et. Culpa nostrud consequat in ex Lorem ex. Nostrud quis qui cupidatat occaecat incididunt aliqua elit aliqua anim labore voluptate sint consectetur ullamco. Eiusmod fugiat laborum sint velit eu do ex labore sunt labore exercitation voluptate ut aliquip.","registered":"Wednesday, October 24, 2018 10:08 AM","latitude":"-66.524116","longitude":"42.643245","tags":["aliquip","et","tempor","sit","in"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Tameka Whitfield"},{"id":1,"name":"Tessa Shepard"},{"id":2,"name":"Meyers Barr"}],"greeting":"Hello, Aurelia! You have 8 unread messages.","favoriteFruit":"strawberry"},{"_id":"5cf12ecf9508728e7da0a68f","index":4,"guid":"ee0a1b2e-da52-4b3b-a3d5-353495e6098e","isActive":false,"balance":"$3,847.46","picture":"http://placehold.it/32x32","age":40,"eyeColor":"brown","name":{"first":"Sutton","last":"Hess"},"company":"LUXURIA","email":"sutton.hess@luxuria.co.uk","phone":"+1 (928) 456-2632","address":"125 Vista Place, Charco, Indiana, 3164","about":"Deserunt ut ad proident aliqua ipsum laborum officia deserunt ea aliquip. In commodo est et esse sit mollit adipisicing veniam. Nulla eiusmod voluptate minim laborum laboris in dolore in est fugiat dolor exercitation officia. Non mollit tempor id eiusmod ex anim adipisicing qui ea ullamco et cupidatat. Aliquip irure nulla amet Lorem id eiusmod eu velit sit.","registered":"Thursday, March 19, 2015 6:39 PM","latitude":"-35.426884","longitude":"-133.613414","tags":["fugiat","dolore","nostrud","consectetur","anim"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Dorothea Dawson"},{"id":1,"name":"Compton Wilder"},{"id":2,"name":"Kelly Caldwell"}],"greeting":"Hello, Sutton! You have 10 unread messages.","favoriteFruit":"strawberry"},{"_id":"5cf12ecf7bde15b99bdf63b8","index":5,"guid":"fe862564-7ad7-4c8f-9a0d-005657883490","isActive":true,"balance":"$2,323.98","picture":"http://placehold.it/32x32","age":32,"eyeColor":"brown","name":{"first":"Goodman","last":"Spence"},"company":"UNIWORLD","email":"goodman.spence@uniworld.io","phone":"+1 (919) 463-3731","address":"878 High Street, Zortman, Virgin Islands, 9952","about":"Qui non velit nisi est cillum amet fugiat culpa ut anim aliqua nisi. Culpa aliquip eiusmod dolore proident deserunt minim sint officia do pariatur fugiat. Occaecat adipisicing eu esse non in consequat culpa amet fugiat aute sunt aliqua adipisicing proident. Voluptate ea duis pariatur exercitation. Irure officia consequat excepteur eu laborum occaecat amet ipsum laborum in nostrud eiusmod occaecat tempor. Ex amet culpa ipsum eiusmod adipisicing non anim ex veniam aute in ullamco ut.","registered":"Wednesday, January 10, 2018 3:37 PM","latitude":"68.089656","longitude":"29.601085","tags":["velit","ut","qui","Lorem","minim"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Hutchinson Finch"},{"id":1,"name":"Banks Winters"},{"id":2,"name":"Santana Cortez"}],"greeting":"Hello, Goodman! You have 7 unread messages.","favoriteFruit":"banana"},{"_id":"5cf12ecfc62579578b1bc759","index":6,"guid":"76ec16d9-a30f-471a-8c8b-31ea2fc37912","isActive":true,"balance":"$3,657.56","picture":"http://placehold.it/32x32","age":29,"eyeColor":"green","name":{"first":"Corinne","last":"Nguyen"},"company":"REALMO","email":"corinne.nguyen@realmo.name","phone":"+1 (800) 451-3183","address":"412 Beard Street, Kapowsin, New York, 836","about":"Qui fugiat sunt culpa consequat sint cillum veniam ullamco et aute ipsum. Sit ad tempor duis ex pariatur sint aliquip. Proident magna aliquip commodo sit quis. Officia occaecat officia voluptate sit exercitation occaecat qui anim. Excepteur mollit aute proident eu. Eu esse consectetur sunt laboris.","registered":"Friday, February 3, 2017 12:02 PM","latitude":"-38.812149","longitude":"47.887586","tags":["in","aliqua","aliqua","occaecat","id"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Celina Copeland"},{"id":1,"name":"Watson Santos"},{"id":2,"name":"Melba Olsen"}],"greeting":"Hello, Corinne! You have 8 unread messages.","favoriteFruit":"strawberry"},{"_id":"5cf12ecf223310f0f99c3d6e","index":7,"guid":"798e53fb-6590-4fc9-ba1b-cbb597e555de","isActive":false,"balance":"$2,244.77","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":{"first":"Rene","last":"Gillespie"},"company":"SCHOOLIO","email":"rene.gillespie@schoolio.us","phone":"+1 (828) 413-2911","address":"465 Ridgecrest Terrace, Joppa, Connecticut, 8512","about":"Do aliquip consequat nulla esse anim. Nulla cillum tempor labore excepteur voluptate reprehenderit amet. Nisi officia ea nisi fugiat mollit non eiusmod. Proident nulla ea sunt non quis dolor laboris magna cillum laborum eu. Sit nostrud eu enim consequat irure laborum duis et irure Lorem. Ipsum fugiat aliquip aute consequat est culpa sint cillum Lorem fugiat pariatur deserunt. Occaecat esse ea esse officia laborum.","registered":"Sunday, May 19, 2019 12:15 PM","latitude":"-65.435682","longitude":"137.573407","tags":["velit","voluptate","aliqua","consectetur","qui"],"range":[0,1,2,3,4,5,6,7,8,9],"friends":[{"id":0,"name":"Krystal Freeman"},{"id":1,"name":"Jenny Ruiz"},{"id":2,"name":"Heather Wood"}],"greeting":"Hello, Rene! You have 7 unread messages.","favoriteFruit":"banana"}]' # noqa 13 | 14 | def test_range_tail(self): 15 | fixer = JSONFixer() 16 | for i in range(1000): 17 | idx = random.randint(1, len(self.line)) 18 | result = fixer.fix(self.line[:idx]) 19 | self.assertTrue(result.success) 20 | 21 | def test_range_head(self): 22 | fixer = JSONFixer(200) 23 | for i in range(1000): 24 | idx = random.randint(1, len(self.line)) 25 | result = fixer.fix(self.line[idx:]) 26 | if not result.success: 27 | print(result.line) 28 | self.assertTrue(result.success) 29 | --------------------------------------------------------------------------------