├── index.rst ├── MANIFEST.in ├── CHANGES.txt ├── .gitignore ├── .travis.yml ├── dirtyjson ├── tests │ ├── test_pass2.py │ ├── test_errors.py │ ├── test_integer.py │ ├── test_decimal.py │ ├── test_pass3.py │ ├── test_float.py │ ├── __init__.py │ ├── test_pass1.py │ ├── test_unicode.py │ ├── test_decode.py │ └── test_fail.py ├── error.py ├── compat.py ├── __init__.py ├── attributed_containers.py └── loader.py ├── scripts └── make_docs.py ├── setup.py ├── conf.py ├── LICENSE.txt ├── simplejson.LICENSE.txt └── README.rst /index.rst: -------------------------------------------------------------------------------- 1 | README.rst -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.py 2 | include *.txt 3 | include *.rst 4 | include scripts/*.py 5 | include MANIFEST.in 6 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | Version 1.0.0 2 | 3 | * Initial release, forked from simplejson 3.3.0 4 | 5 | Version 1.0.8 6 | 7 | * Add Python 3.10 support 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.egg 3 | *.pyc 4 | *.so 5 | .DS_Store 6 | /MANIFEST 7 | /.coverage 8 | /.idea 9 | /coverage.xml 10 | /htmlcov 11 | /build 12 | /dist 13 | /docs 14 | /env* 15 | atlassian-ide-plugin.xml 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.5" 4 | - "2.6" 5 | - "2.7" 6 | - "3.3" 7 | - "3.4" 8 | - "pypy" 9 | script: 10 | - python setup.py build_ext -i 11 | - python -m compileall -f . 12 | - python setup.py test 13 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_pass2.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import json 3 | import dirtyjson 4 | 5 | # from http://json.org/JSON_checker/test/pass2.json 6 | JSON = r''' 7 | [[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] 8 | ''' 9 | 10 | 11 | class TestPass2(TestCase): 12 | def test_parse(self): 13 | # test in/out equivalence and parsing 14 | res = dirtyjson.loads(JSON) 15 | out = json.dumps(res) 16 | self.assertEqual(res, dirtyjson.loads(out)) 17 | -------------------------------------------------------------------------------- /scripts/make_docs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import subprocess 4 | 5 | SPHINX_BUILD = 'sphinx-build' 6 | 7 | DOCTREES_DIR = 'build/doctrees' 8 | HTML_DIR = 'docs' 9 | for dirname in DOCTREES_DIR, HTML_DIR: 10 | if not os.path.exists(dirname): 11 | os.makedirs(dirname) 12 | 13 | open(os.path.join(HTML_DIR, '.nojekyll'), 'w').close() 14 | res = subprocess.call([ 15 | SPHINX_BUILD, '-d', DOCTREES_DIR, '-b', 'html', '.', 'docs', 16 | ]) 17 | raise SystemExit(res) 18 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_errors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from unittest import TestCase 3 | 4 | import dirtyjson 5 | from dirtyjson.compat import u, b 6 | 7 | 8 | class TestErrors(TestCase): 9 | def test_scan_error(self): 10 | for t in (u, b): 11 | try: 12 | dirtyjson.loads(t('{"asdf": "')) 13 | except dirtyjson.Error: 14 | err = sys.exc_info()[1] 15 | else: 16 | self.fail('Expected JSONDecodeError') 17 | self.assertEqual(err.lineno, 1) 18 | self.assertEqual(err.colno, 10) 19 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_integer.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | from dirtyjson.compat import StringIO 3 | 4 | import dirtyjson 5 | 6 | 7 | class TestInteger(TestCase): 8 | NUMS = ("1", 1), ("10", 10), ("077", 63), ("-1000", -1000), ("0x40", 64), ("-0x40", -64) 9 | 10 | def loads(self, s, **kw): 11 | sio = StringIO(s) 12 | res = dirtyjson.loads(s, **kw) 13 | self.assertEqual(res, dirtyjson.load(sio, **kw)) 14 | return res 15 | 16 | def test_decimal_decode(self): 17 | for s, n in self.NUMS: 18 | self.assertEqual(self.loads(s), n) 19 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_decimal.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | from unittest import TestCase 3 | from dirtyjson.compat import StringIO 4 | 5 | import dirtyjson 6 | 7 | 8 | class TestDecimal(TestCase): 9 | NUMS = "1.0", "10.00", "1.1", "1234567890.1234567890", "500" 10 | 11 | def loads(self, s, **kw): 12 | sio = StringIO(s) 13 | res = dirtyjson.loads(s, **kw) 14 | self.assertEqual(res, dirtyjson.load(sio, **kw)) 15 | return res 16 | 17 | def test_decimal_decode(self): 18 | for s in self.NUMS: 19 | self.assertEqual(self.loads(s, parse_float=Decimal), Decimal(s)) 20 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_pass3.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import json 3 | import dirtyjson 4 | 5 | # from http://json.org/JSON_checker/test/pass3.json 6 | JSON = r''' 7 | { 8 | "JSON Test Pattern pass3": { 9 | "The outermost value": "must be an object or array.", 10 | "In this test": "It is an object.", 11 | array_value: ["one", "two", 3], 12 | } 13 | } 14 | ''' 15 | 16 | 17 | class TestPass3(TestCase): 18 | def test_parse(self): 19 | # test in/out equivalence and parsing 20 | res = dirtyjson.loads(JSON) 21 | out = json.dumps(res) 22 | self.assertEqual(res, dirtyjson.loads(out)) 23 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_float.py: -------------------------------------------------------------------------------- 1 | import math 2 | import json 3 | from unittest import TestCase 4 | import dirtyjson 5 | from dirtyjson.compat import long_type, text_type 6 | from dirtyjson.loader import NaN, PosInf, NegInf 7 | 8 | 9 | class TestFloat(TestCase): 10 | def test_degenerates_allow(self): 11 | for inf in (PosInf, NegInf): 12 | self.assertEqual(dirtyjson.loads(json.dumps(inf)), inf) 13 | # Python 2.5 doesn't have math.isnan 14 | nan = dirtyjson.loads(json.dumps(NaN)) 15 | self.assertTrue((0 + nan) != nan) 16 | 17 | def test_floats(self): 18 | for num in [1617161771.7650001, math.pi, math.pi ** 100, 19 | math.pi ** -100, 3.1]: 20 | self.assertEqual(dirtyjson.loads(json.dumps(num)), num) 21 | self.assertEqual(dirtyjson.loads(text_type(json.dumps(num))), num) 22 | 23 | def test_ints(self): 24 | for num in [1, long_type(1), 1 << 32, 1 << 64]: 25 | self.assertEqual(dirtyjson.loads(json.dumps(num)), num) 26 | self.assertEqual(dirtyjson.loads(text_type(json.dumps(num))), num) 27 | -------------------------------------------------------------------------------- /dirtyjson/error.py: -------------------------------------------------------------------------------- 1 | """JSON decode error class 2 | """ 3 | 4 | __all__ = ['Error'] 5 | 6 | 7 | class Error(ValueError): 8 | """Subclass of ValueError with the following additional properties: 9 | 10 | msg: The unformatted error message 11 | doc: The JSON document being parsed 12 | pos: The start index of doc where parsing failed 13 | end: The end index of doc where parsing failed (may be None) 14 | lineno: The line corresponding to pos 15 | colno: The column corresponding to pos 16 | endlineno: The line corresponding to end (may be None) 17 | endcolno: The column corresponding to end (may be None) 18 | 19 | """ 20 | # Note that this exception is used from _speedups 21 | def __init__(self, msg, doc, pos): 22 | ValueError.__init__(self, errmsg(msg, doc, pos)) 23 | self.msg = msg 24 | self.doc = doc 25 | self.pos = pos 26 | self.lineno, self.colno = linecol(doc, pos) 27 | 28 | 29 | def linecol(doc, pos): 30 | lineno = doc.count('\n', 0, pos) + 1 31 | if lineno == 1: 32 | colno = pos + 1 33 | else: 34 | colno = pos - doc.rindex('\n', 0, pos) 35 | return lineno, colno 36 | 37 | 38 | def errmsg(msg, doc, pos): 39 | lineno, colno = linecol(doc, pos) 40 | msg = msg.replace('%r', repr(doc[pos:pos + 1])) 41 | fmt = '%s: line %d column %d (char %d)' 42 | return fmt % (msg, lineno, colno, pos) 43 | -------------------------------------------------------------------------------- /dirtyjson/tests/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import doctest 4 | import unittest 5 | 6 | 7 | def additional_tests(suite=None): 8 | import dirtyjson.loader 9 | if suite is None: 10 | suite = unittest.TestSuite() 11 | for mod in (dirtyjson, dirtyjson.loader): 12 | suite.addTest(doctest.DocTestSuite(mod)) 13 | suite.addTest(doctest.DocFileSuite('../../index.rst')) 14 | return suite 15 | 16 | 17 | def all_tests_suite(): 18 | suite = unittest.TestLoader().loadTestsFromNames([ 19 | 'dirtyjson.tests.test_decimal', 20 | 'dirtyjson.tests.test_decode', 21 | 'dirtyjson.tests.test_errors', 22 | 'dirtyjson.tests.test_fail', 23 | 'dirtyjson.tests.test_float', 24 | 'dirtyjson.tests.test_integer', 25 | 'dirtyjson.tests.test_pass1', 26 | 'dirtyjson.tests.test_pass2', 27 | 'dirtyjson.tests.test_pass3', 28 | 'dirtyjson.tests.test_unicode', 29 | ]) 30 | suite = additional_tests(suite) 31 | return unittest.TestSuite([suite]) 32 | 33 | 34 | def main(): 35 | runner = unittest.TextTestRunner(verbosity=1 + sys.argv.count('-v')) 36 | suite = all_tests_suite() 37 | raise SystemExit(not runner.run(suite).wasSuccessful()) 38 | 39 | 40 | if __name__ == '__main__': 41 | import os 42 | import sys 43 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 44 | main() 45 | -------------------------------------------------------------------------------- /dirtyjson/compat.py: -------------------------------------------------------------------------------- 1 | """Python 3 compatibility shims 2 | """ 3 | import sys 4 | if sys.version_info[0] < 3: 5 | PY2 = True 6 | 7 | def b(s): 8 | return s 9 | 10 | def u(s): 11 | # noinspection PyUnresolvedReferences 12 | return unicode(s, 'unicode_escape') 13 | 14 | # noinspection PyUnresolvedReferences 15 | import cStringIO as StringIO 16 | # noinspection PyUnresolvedReferences 17 | StringIO = BytesIO = StringIO.StringIO 18 | # noinspection PyUnresolvedReferences 19 | text_type = unicode 20 | binary_type = str 21 | # noinspection PyUnresolvedReferences 22 | string_types = (basestring,) 23 | # noinspection PyUnresolvedReferences 24 | integer_types = (int, long) 25 | # noinspection PyUnresolvedReferences,PyUnboundLocalVariable 26 | unichr = unichr 27 | # noinspection PyShadowingBuiltins 28 | ascii = repr 29 | 30 | def fromhex(s): 31 | return s.decode('hex') 32 | 33 | else: 34 | PY2 = False 35 | import codecs 36 | 37 | def b(s): 38 | return codecs.latin_1_encode(s)[0] 39 | 40 | def u(s): 41 | return s 42 | 43 | import io 44 | StringIO = io.StringIO 45 | BytesIO = io.BytesIO 46 | text_type = str 47 | binary_type = bytes 48 | string_types = (str,) 49 | integer_types = (int,) 50 | ascii = ascii 51 | 52 | def unichr(s): 53 | return u(chr(s)) 54 | 55 | def fromhex(s): 56 | return bytes.fromhex(s) 57 | 58 | long_type = integer_types[-1] 59 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_pass1.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import json 3 | import dirtyjson 4 | 5 | # from http://json.org/JSON_checker/test/pass1.json 6 | JSON = r''' 7 | [ 8 | "JSON Test Pattern pass1", 9 | {"object with 1 member":["array with 1 element"]}, 10 | {}, 11 | [], 12 | -42, 13 | true, 14 | false, 15 | null, 16 | { 17 | "integer": 1234567890, 18 | "real": -9876.543210, 19 | "e": 0.123456789e-12, 20 | "E": 1.234567890E+34, 21 | "": 23456789012E66, 22 | "zero": 0, 23 | "one": 1, 24 | "space": " ", 25 | "quote": "\"", 26 | "backslash": "\\", 27 | "controls": "\b\f\n\r\t", 28 | "slash": "/ & \/", 29 | "alpha": "abcdefghijklmnopqrstuvwyz", 30 | "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", 31 | "digit": "0123456789", 32 | "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", 33 | "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", 34 | "true": true, 35 | "false": false, 36 | "null": null, 37 | "array":[ ], 38 | "object":{ }, 39 | "address": "50 St. James Street", 40 | "url": "http://www.JSON.org/", 41 | "comment": "// /* */": " ", 43 | " s p a c e d " :[1,2 , 3 44 | 45 | , 46 | 47 | 4 , 5 , 6 ,7 ],"compact": [1,2,3,4,5,6,7], 48 | "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", 49 | "quotes": "" \u0022 %22 0x22 034 "", 50 | "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" 51 | : "A key can be any string" 52 | }, 53 | 0.5 ,98.6 54 | , 55 | 99.44 56 | , 57 | 58 | 1066, 59 | 1e1, 60 | 0.1e1, 61 | 1e-1, 62 | 1e00,2e+00,2e-00 63 | ,"rosebud"] 64 | ''' 65 | 66 | 67 | class TestPass1(TestCase): 68 | def test_parse(self): 69 | # test in/out equivalence and parsing 70 | res = dirtyjson.loads(JSON) 71 | out = json.dumps(res) 72 | self.assertEqual(res, dirtyjson.loads(out)) 73 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import with_statement 3 | 4 | import sys 5 | import subprocess 6 | 7 | try: 8 | from setuptools import setup, Command 9 | except ImportError: 10 | from distutils.core import setup, Command 11 | from distutils.errors import CCompilerError, DistutilsExecError, \ 12 | DistutilsPlatformError 13 | 14 | VERSION = '1.0.8' 15 | DESCRIPTION = "JSON decoder for Python that can extract data from the muck" 16 | 17 | with open('README.rst', 'r') as f: 18 | LONG_DESCRIPTION = f.read() 19 | 20 | setup( 21 | name="dirtyjson", 22 | version=VERSION, 23 | packages=['dirtyjson', 'dirtyjson.tests'], 24 | author="Scott Maxwell", 25 | author_email="scott@codecobblers.com", 26 | url="https://github.com/codecobblers/dirtyjson", 27 | description=DESCRIPTION, 28 | long_description=LONG_DESCRIPTION, 29 | long_description_content_type="text/x-rst", 30 | license="MIT License", 31 | classifiers=["Development Status :: 5 - Production/Stable", 32 | "Intended Audience :: Developers", 33 | "License :: OSI Approved :: MIT License", 34 | "License :: OSI Approved :: Academic Free License (AFL)", 35 | "Programming Language :: Python", 36 | "Programming Language :: Python :: 2", 37 | "Programming Language :: Python :: 2.5", 38 | "Programming Language :: Python :: 2.6", 39 | "Programming Language :: Python :: 2.7", 40 | "Programming Language :: Python :: 3", 41 | "Programming Language :: Python :: 3.3", 42 | "Programming Language :: Python :: 3.4", 43 | "Programming Language :: Python :: 3.5", 44 | "Programming Language :: Python :: 3.6", 45 | "Programming Language :: Python :: 3.7", 46 | "Programming Language :: Python :: 3.8", 47 | "Programming Language :: Python :: 3.9", 48 | "Programming Language :: Python :: 3.10", 49 | "Programming Language :: Python :: Implementation :: CPython", 50 | "Programming Language :: Python :: Implementation :: PyPy", 51 | "Topic :: Software Development :: Libraries :: Python Modules"], 52 | platforms=['any'], 53 | test_suite="dirtyjson.tests", 54 | zip_safe=True) 55 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_unicode.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from unittest import TestCase 3 | 4 | import dirtyjson 5 | from dirtyjson.compat import unichr, text_type 6 | 7 | 8 | class TestUnicode(TestCase): 9 | def test_big_unicode_decode(self): 10 | uc = u'z\U0001d120x' 11 | self.assertEqual(dirtyjson.loads('"' + uc + '"'), uc) 12 | self.assertEqual(dirtyjson.loads('"z\\ud834\\udd20x"'), uc) 13 | 14 | def test_unicode_decode(self): 15 | for i in range(0, 0xd7ff): 16 | uc = unichr(i) 17 | s = '"\\u%04x"' % (i,) 18 | self.assertEqual(dirtyjson.loads(s), uc) 19 | 20 | def test_default_encoding(self): 21 | self.assertEqual(dirtyjson.loads(u'{"a": "\xe9"}'.encode('utf-8')), 22 | {'a': u'\xe9'}) 23 | 24 | def test_unicode_preservation(self): 25 | self.assertEqual(type(dirtyjson.loads(u'""')), text_type) 26 | self.assertEqual(type(dirtyjson.loads(u'"a"')), text_type) 27 | self.assertEqual(type(dirtyjson.loads(u'["a"]')[0]), text_type) 28 | 29 | def test_invalid_escape_sequences(self): 30 | # incomplete escape sequence 31 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u"') 32 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u1"') 33 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u12"') 34 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u123"') 35 | # invalid escape sequence 36 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u123x"') 37 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u12x4"') 38 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\u1x34"') 39 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ux234"') 40 | if sys.maxunicode > 65535: 41 | # invalid escape sequence for low surrogate 42 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u"') 43 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u0"') 44 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u00"') 45 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u000"') 46 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u000x"') 47 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u00x0"') 48 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\u0x00"') 49 | self.assertRaises(dirtyjson.Error, dirtyjson.loads, '"\\ud800\\ux000"') 50 | -------------------------------------------------------------------------------- /dirtyjson/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .error import Error 3 | from .loader import DirtyJSONLoader 4 | 5 | r"""JSON (JavaScript Object Notation) is a subset of 6 | JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data 7 | interchange format. 8 | 9 | :mod:`dirtyjson` exposes an API familiar to users of the standard library 10 | :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained 11 | version of the :mod:`json` library contained in Python 2.6, but maintains 12 | compatibility with Python 2.4 and Python 2.5 and (currently) has 13 | significant performance advantages, even without using the optional C 14 | extension for speedups. 15 | 16 | Decoding JSON:: 17 | 18 | >>> import dirtyjson 19 | >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] 20 | >>> dirtyjson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj 21 | True 22 | >>> dirtyjson.loads('"\\"foo\\bar"') == u'"foo\x08ar' 23 | True 24 | >>> from dirtyjson.compat import StringIO 25 | >>> io = StringIO('["streaming API"]') 26 | >>> dirtyjson.load(io)[0] == 'streaming API' 27 | True 28 | 29 | """ 30 | __all__ = ['load', 'loads', 'Error'] 31 | 32 | __author__ = 'Scott Maxwell ' 33 | 34 | 35 | def load(fp, encoding=None, parse_float=None, parse_int=None, 36 | parse_constant=None, search_for_first_object=False): 37 | """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing 38 | a JSON document) to a Python object. 39 | 40 | *encoding* determines the encoding used to interpret any 41 | :class:`str` objects decoded by this instance (``'utf-8'`` by 42 | default). It has no effect when decoding :class:`unicode` objects. 43 | 44 | Note that currently only encodings that are a superset of ASCII work, 45 | strings of other encodings should be passed in as :class:`unicode`. 46 | 47 | *parse_float*, if specified, will be called with the string of every 48 | JSON float to be decoded. By default, this is equivalent to 49 | ``float(num_str)``. This can be used to use another data type or parser 50 | for JSON floats (e.g. :class:`decimal.Decimal`). 51 | 52 | *parse_int*, if specified, will be called with the string of every 53 | JSON int to be decoded. By default, this is equivalent to 54 | ``int(num_str)``. This can be used to use another data type or parser 55 | for JSON integers (e.g. :class:`float`). 56 | 57 | *parse_constant*, if specified, will be called with one of the 58 | following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 59 | can be used to raise an exception if invalid JSON numbers are 60 | encountered. 61 | """ 62 | return loads(fp.read(), encoding, parse_float, parse_int, parse_constant, 63 | search_for_first_object) 64 | 65 | 66 | def loads(s, encoding=None, parse_float=None, parse_int=None, 67 | parse_constant=None, search_for_first_object=False, start_index=0): 68 | """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON 69 | document) to a Python object. 70 | 71 | *encoding* determines the encoding used to interpret any 72 | :class:`str` objects decoded by this instance (``'utf-8'`` by 73 | default). It has no effect when decoding :class:`unicode` objects. 74 | 75 | Note that currently only encodings that are a superset of ASCII work, 76 | strings of other encodings should be passed in as :class:`unicode`. 77 | 78 | *parse_float*, if specified, will be called with the string of every 79 | JSON float to be decoded. By default, this is equivalent to 80 | ``float(num_str)``. This can be used to use another data type or parser 81 | for JSON floats (e.g. :class:`decimal.Decimal`). 82 | 83 | *parse_int*, if specified, will be called with the string of every 84 | JSON int to be decoded. By default, this is equivalent to 85 | ``int(num_str)``. This can be used to use another data type or parser 86 | for JSON integers (e.g. :class:`float`). 87 | 88 | *parse_constant*, if specified, will be called with one of the 89 | following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 90 | can be used to raise an exception if invalid JSON numbers are 91 | encountered. 92 | """ 93 | d = DirtyJSONLoader(s, encoding, parse_float, parse_int, parse_constant) 94 | return d.decode(search_for_first_object, start_index) 95 | -------------------------------------------------------------------------------- /dirtyjson/attributed_containers.py: -------------------------------------------------------------------------------- 1 | """Drop-in replacement for collections.AttributedDict by Raymond Hettinger 2 | 3 | http://code.activestate.com/recipes/576693/ 4 | 5 | """ 6 | try: 7 | try: 8 | from collections.abc import MutableMapping as DictMixin 9 | except ImportError: 10 | from collections import MutableMapping as DictMixin 11 | py_level = 3 12 | except ImportError: 13 | # noinspection PyUnresolvedReferences,PyCompatibility 14 | from UserDict import DictMixin 15 | py_level = 2 16 | 17 | # Modified from original to support Python 2.4, see 18 | # http://code.google.com/p/dirtyjson/issues/detail?id=53 19 | try: 20 | # noinspection PyStatementEffect 21 | all 22 | except NameError: 23 | # noinspection PyShadowingBuiltins 24 | def all(seq): 25 | for elem in seq: 26 | if not elem: 27 | return False 28 | return True 29 | 30 | 31 | class AttributedDict(dict, DictMixin): 32 | def __init__(self): 33 | super(AttributedDict, self).__init__() 34 | self.__attributes = {} 35 | self.clear() 36 | 37 | # noinspection PyAttributeOutsideInit 38 | def clear(self): 39 | self.__end = end = [] 40 | end += [None, end, end] # sentinel node for doubly linked list 41 | self.__map = {} # key --> [key, prev, next] 42 | dict.clear(self) 43 | 44 | def add_with_attributes(self, key, value, attributes): 45 | self.__setitem__(key, value) 46 | self.__attributes[key] = attributes 47 | 48 | def attributes(self, key): 49 | return self.__attributes.get(key) 50 | 51 | def __setitem__(self, key, value): 52 | if key not in self: 53 | end = self.__end 54 | curr = end[1] 55 | curr[2] = end[1] = self.__map[key] = [key, curr, end] 56 | dict.__setitem__(self, key, value) 57 | 58 | def __delitem__(self, key): 59 | dict.__delitem__(self, key) 60 | key, prev, next_entry = self.__map.pop(key) 61 | prev[2] = next_entry 62 | next_entry[1] = prev 63 | 64 | def __iter__(self): 65 | end = self.__end 66 | curr = end[2] 67 | while curr is not end: 68 | yield curr[0] 69 | curr = curr[2] 70 | 71 | def __reversed__(self): 72 | end = self.__end 73 | curr = end[1] 74 | while curr is not end: 75 | yield curr[0] 76 | curr = curr[1] 77 | 78 | def popitem(self, last=True): 79 | if not self: 80 | raise KeyError('dictionary is empty') 81 | # Modified from original to support Python 2.4, see 82 | # http://code.google.com/p/dirtyjson/issues/detail?id=53 83 | if py_level == 2: 84 | if last: 85 | # noinspection PyUnresolvedReferences 86 | key = reversed(self).next() 87 | else: 88 | # noinspection PyUnresolvedReferences 89 | key = iter(self).next() 90 | else: 91 | if last: 92 | key = next(reversed(self)) 93 | else: 94 | key = next(iter(self)) 95 | value = self.pop(key) 96 | return key, value 97 | 98 | def __reduce__(self): 99 | items = [[k, self[k]] for k in self] 100 | tmp = self.__map, self.__end 101 | del self.__map, self.__end 102 | inst_dict = vars(self).copy() 103 | self.__map, self.__end = tmp 104 | if inst_dict: 105 | return self.__class__, (items,), inst_dict 106 | return self.__class__, (items,) 107 | 108 | def keys(self): 109 | return list(self) 110 | 111 | setdefault = DictMixin.setdefault 112 | update = DictMixin.update 113 | pop = DictMixin.pop 114 | values = DictMixin.values 115 | items = DictMixin.items 116 | 117 | def __repr__(self): 118 | if not self: 119 | return '%s()' % (self.__class__.__name__,) 120 | return '%s(%s)' % (self.__class__.__name__, list(self.items())) 121 | 122 | def copy(self): 123 | return self.__class__(self) 124 | 125 | # noinspection PyMethodOverriding 126 | @classmethod 127 | def fromkeys(cls, iterable, value=None): 128 | d = cls() 129 | for key in iterable: 130 | d[key] = value 131 | return d 132 | 133 | def __eq__(self, other): 134 | if isinstance(other, AttributedDict): 135 | return len(self) == len(other) and all( 136 | p == q for p, q in zip(sorted(self.items()), sorted(other.items()))) 137 | return dict.__eq__(self, other) 138 | 139 | def __ne__(self, other): 140 | return not self == other 141 | 142 | 143 | class AttributedList(list): 144 | def __init__(self): 145 | super(AttributedList, self).__init__() 146 | self.__attributes = [] 147 | 148 | def append(self, value, attributes=None): 149 | super(AttributedList, self).append(value) 150 | self.__attributes.append(attributes) 151 | 152 | def attributes(self, index): 153 | return self.__attributes[index] 154 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_decode.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import decimal 3 | from unittest import TestCase 4 | 5 | import dirtyjson 6 | 7 | 8 | class TestDecode(TestCase): 9 | if not hasattr(TestCase, 'assertIs'): 10 | def assertIs(self, expr1, expr2, msg=None): 11 | self.assertTrue(expr1 is expr2, msg or '%r is %r' % (expr1, expr2)) 12 | 13 | def test_decimal(self): 14 | rval = dirtyjson.loads('1.1', parse_float=decimal.Decimal) 15 | self.assertTrue(isinstance(rval, decimal.Decimal)) 16 | self.assertEqual(rval, decimal.Decimal('1.1')) 17 | 18 | def test_float(self): 19 | rval = dirtyjson.loads('1', parse_int=float) 20 | self.assertTrue(isinstance(rval, float)) 21 | self.assertEqual(rval, 1.0) 22 | 23 | def test_decoder_optimizations(self): 24 | rval = dirtyjson.loads('{ "key" : "value" , "k":"v" }') 25 | self.assertEqual(rval, {"key": "value", "k": "v"}) 26 | 27 | def test_empty_objects(self): 28 | s = '{}' 29 | self.assertEqual(dirtyjson.loads(s), eval(s)) 30 | s = '[]' 31 | self.assertEqual(dirtyjson.loads(s), eval(s)) 32 | s = '""' 33 | self.assertEqual(dirtyjson.loads(s), eval(s)) 34 | 35 | def check_keys_reuse(self, source, loads): 36 | rval = loads(source) 37 | (a, b), (c, d) = sorted(rval[0]), sorted(rval[1]) 38 | self.assertIs(a, c) 39 | self.assertIs(b, d) 40 | 41 | def test_keys_reuse_str(self): 42 | s = u'[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'.encode('utf8') 43 | self.check_keys_reuse(s, dirtyjson.loads) 44 | 45 | def test_keys_reuse_unicode(self): 46 | s = u'[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]' 47 | self.check_keys_reuse(s, dirtyjson.loads) 48 | 49 | def test_empty_strings(self): 50 | self.assertEqual(dirtyjson.loads('""'), "") 51 | self.assertEqual(dirtyjson.loads(u'""'), u"") 52 | self.assertEqual(dirtyjson.loads('[""]'), [""]) 53 | self.assertEqual(dirtyjson.loads(u'[""]'), [u""]) 54 | 55 | def test_empty_strings_with_single_quotes(self): 56 | self.assertEqual(dirtyjson.loads("''"), "") 57 | self.assertEqual(dirtyjson.loads(u"''"), u"") 58 | self.assertEqual(dirtyjson.loads("['']"), [""]) 59 | self.assertEqual(dirtyjson.loads(u"['']"), [u""]) 60 | 61 | def test_object_keys(self): 62 | result = {"key": "value", "k": "v"} 63 | rval = dirtyjson.loads("""{"key": "value", "k": "v"}""") 64 | self.assertEqual(rval, result) 65 | rval = dirtyjson.loads("""{'key': 'value', 'k': 'v'}""") 66 | self.assertEqual(rval, result) 67 | rval = dirtyjson.loads("""{key: 'value', k: 'v'}""") 68 | self.assertEqual(rval, result) 69 | rval = dirtyjson.loads("""{key: 'value', k: 'v',}""") 70 | self.assertEqual(rval, result) 71 | 72 | def test_not_at_beginning(self): 73 | s = """ 74 | // here are some comments 75 | var a = 1; // here is a line of regular JS 76 | 77 | var b = {test: 1, 'aack': 0x80, "bar": [1, 2, 3]}; 78 | console.log(b); 79 | """ 80 | first_object_index = s.index('{') 81 | 82 | rval = dirtyjson.loads(s, start_index=first_object_index) 83 | self.assertEqual(rval, {'test': 1, 'aack': 0x80, 'bar': [1, 2, 3]}) 84 | 85 | rval = dirtyjson.loads(s, start_index=first_object_index + 1, search_for_first_object=True) 86 | self.assertEqual(rval, [1, 2, 3]) 87 | 88 | rval = dirtyjson.loads(s, search_for_first_object=True) 89 | self.assertEqual(rval, {'test': 1, 'aack': 0x80, 'bar': [1, 2, 3]}) 90 | 91 | def test_ignore_single_line_comments(self): 92 | s = """ 93 | // here are some comments 94 | { 95 | // comments inside too 96 | test: 1, // and at the end of lines 97 | 'aack': 0x80, 98 | "bar": [ // even inside arrays 99 | 1, 100 | 2, 101 | 3, // and after trailing commas 102 | ], 103 | more: { // and inside objects 104 | once: true, 105 | twice: false, 106 | three_times3: null // and at the end 107 | } 108 | } 109 | console.log(b); 110 | """ 111 | rval = dirtyjson.loads(s) 112 | self.assertEqual(rval, {'test': 1, 'aack': 0x80, 'bar': [1, 2, 3], 'more': {'once': True, 'twice': False, 'three_times3': None}}) 113 | 114 | def test_ignore_inline_comments(self): 115 | s = """ 116 | /* here are some comments 117 | * that should all be skipped 118 | * right up until the terminator */ { 119 | /* comments inside too */ 120 | test: 1, /* and at the end of lines */ 121 | 'aack': 0x80, 122 | "bar": [ // even inside arrays 123 | 1, 124 | 2, 125 | 3, // and after trailing commas 126 | ], 127 | /* comment this block out 128 | more: { // and inside objects 129 | once: true, 130 | twice: false, 131 | three_times3: null // and at the end 132 | } */ 133 | } 134 | console.log(b); 135 | """ 136 | rval = dirtyjson.loads(s) 137 | self.assertEqual(rval, {'test': 1, 'aack': 0x80, 'bar': [1, 2, 3]}) 138 | -------------------------------------------------------------------------------- /dirtyjson/tests/test_fail.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from unittest import TestCase 3 | 4 | import dirtyjson 5 | 6 | # 2007-10-05 7 | JSONDOCS = [ 8 | # http://json.org/JSON_checker/test/fail2.json 9 | '["Unclosed array"', 10 | # http://json.org/JSON_checker/test/fail5.json 11 | '["double extra comma",,]', 12 | # http://json.org/JSON_checker/test/fail6.json 13 | '[ , "<-- missing value"]', 14 | # http://json.org/JSON_checker/test/fail11.json 15 | '{"Illegal expression": 1 + 2}', 16 | # http://json.org/JSON_checker/test/fail12.json 17 | '{"Illegal invocation": alert()}', 18 | # http://json.org/JSON_checker/test/fail15.json 19 | '["Illegal backslash escape: \\x15"]', 20 | # http://json.org/JSON_checker/test/fail16.json 21 | '[\\naked]', 22 | # http://json.org/JSON_checker/test/fail17.json 23 | '["Illegal backslash escape: \\017"]', 24 | # http://json.org/JSON_checker/test/fail19.json 25 | '{"Missing colon" null}', 26 | # http://json.org/JSON_checker/test/fail20.json 27 | '{"Double colon":: null}', 28 | # http://json.org/JSON_checker/test/fail21.json 29 | '{"Comma instead of colon", null}', 30 | # http://json.org/JSON_checker/test/fail22.json 31 | '["Colon instead of comma": false]', 32 | # http://json.org/JSON_checker/test/fail23.json 33 | '["Bad value", truth]', 34 | # http://json.org/JSON_checker/test/fail26.json 35 | '["tab\\ character\\ in\\ string\\ "]', 36 | # http://json.org/JSON_checker/test/fail28.json 37 | '["line\\\nbreak"]', 38 | # http://json.org/JSON_checker/test/fail29.json 39 | '[0e]', 40 | # http://json.org/JSON_checker/test/fail30.json 41 | '[0e+]', 42 | # http://json.org/JSON_checker/test/fail31.json 43 | '[0e+-1]', 44 | # http://json.org/JSON_checker/test/fail32.json 45 | '{"Comma instead if closing brace": true,', 46 | # http://json.org/JSON_checker/test/fail33.json 47 | '["mismatch"}', 48 | # misc based on coverage 49 | '{', 50 | '{]', 51 | '{"foo": "bar"]', 52 | '{"foo": "bar"', 53 | 'nul', 54 | 'nulx', 55 | '-', 56 | '-x', 57 | '-e', 58 | '-e0', 59 | '-Infinite', 60 | '-Inf', 61 | 'Infinit', 62 | 'Infinite', 63 | 'NaM', 64 | 'NuN', 65 | 'falsy', 66 | 'fal', 67 | 'trug', 68 | 'tru', 69 | ] 70 | 71 | 72 | # noinspection PyBroadException 73 | class TestFail(TestCase): 74 | def test_failures(self): 75 | for idx, doc in enumerate(JSONDOCS): 76 | idx += 1 77 | try: 78 | dirtyjson.loads(doc) 79 | except dirtyjson.Error: 80 | pass 81 | else: 82 | self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) 83 | 84 | def test_array_decoder_issue46(self): 85 | # http://code.google.com/p/dirtyjson/issues/detail?id=46 86 | for doc in [u'[,]', '[,]']: 87 | try: 88 | dirtyjson.loads(doc) 89 | except dirtyjson.Error: 90 | e = sys.exc_info()[1] 91 | self.assertEqual(e.pos, 1) 92 | self.assertEqual(e.lineno, 1) 93 | self.assertEqual(e.colno, 2) 94 | except Exception: 95 | e = sys.exc_info()[1] 96 | self.fail("Unexpected exception raised %r %s" % (e, e)) 97 | else: 98 | self.fail("Unexpected success parsing '[,]'") 99 | 100 | def test_truncated_input(self): 101 | test_cases = [ 102 | ('', 'Expecting value', 0), 103 | ('[', "Expecting value or ']'", 1), 104 | ('[42', "Expecting ',' delimiter", 3), 105 | ('[42,', 'Expecting value', 4), 106 | ('["', 'Unterminated string starting at', 1), 107 | ('["spam', 'Unterminated string starting at', 1), 108 | ('["spam"', "Expecting ',' delimiter", 7), 109 | ('["spam",', 'Expecting value', 8), 110 | ('{', 'Expecting property name', 1), 111 | ('{"', 'Unterminated string starting at', 1), 112 | ('{"spam', 'Unterminated string starting at', 1), 113 | ('{"spam"', "Expecting ':' delimiter", 7), 114 | ('{"spam":', 'Expecting value', 8), 115 | ('{"spam":42', "Expecting ',' delimiter", 10), 116 | ('{"spam":42,', 'Expecting property name', 117 | 11), 118 | ('"', 'Unterminated string starting at', 0), 119 | ('"spam', 'Unterminated string starting at', 0), 120 | ('[,', "Expecting value", 1), 121 | ] 122 | for data, msg, idx in test_cases: 123 | try: 124 | dirtyjson.loads(data) 125 | except dirtyjson.Error: 126 | e = sys.exc_info()[1] 127 | self.assertEqual( 128 | e.msg[:len(msg)], 129 | msg, 130 | "%r doesn't start with %r for %r" % (e.msg, msg, data)) 131 | self.assertEqual( 132 | e.pos, idx, 133 | "pos %r != %r for %r" % (e.pos, idx, data)) 134 | except Exception: 135 | e = sys.exc_info()[1] 136 | self.fail("Unexpected exception raised %r %s" % (e, e)) 137 | else: 138 | self.fail("Unexpected success parsing '%r'" % (data,)) 139 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # dirtyjson documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Sep 26 18:58:30 2008. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # The contents of this file are pickled, so don't put values in the namespace 9 | # that aren't pickleable (module imports are okay, they're removed automatically). 10 | # 11 | # All configuration values have a default value; values that are commented out 12 | # serve to show the default value. 13 | 14 | # noinspection PyUnresolvedReferences 15 | import sys 16 | # noinspection PyUnresolvedReferences 17 | import os 18 | 19 | # If your extensions are in another directory, add it here. If the directory 20 | # is relative to the documentation root, use os.path.abspath to make it 21 | # absolute, like shown here. 22 | # sys.path.append(os.path.abspath('some/directory')) 23 | 24 | # General configuration 25 | # --------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = [] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The master toctree document. 38 | master_doc = 'index' 39 | 40 | # General substitutions. 41 | project = 'dirtyjson' 42 | # noinspection PyShadowingBuiltins 43 | copyright = '2022, Scott Maxwell' 44 | 45 | # The default replacements for |version| and |release|, also used in various 46 | # other places throughout the built documents. 47 | # 48 | # The short X.Y version. 49 | version = '1.0' 50 | # The full version, including alpha/beta/rc tags. 51 | release = '1.0.8' 52 | 53 | # There are two options for replacing |today|: either, you set today to some 54 | # non-false value, then it is used: 55 | # today = '' 56 | # Else, today_fmt is used as the format for a strftime call. 57 | today_fmt = '%B %d, %Y' 58 | 59 | # List of documents that shouldn't be included in the build. 60 | # unused_docs = [] 61 | 62 | # List of directories, relative to source directories, that shouldn't be searched 63 | # for source files. 64 | # exclude_dirs = [] 65 | 66 | # The reST default role (used for this markup: `text`) to use for all documents. 67 | # default_role = None 68 | 69 | # If true, '()' will be appended to :func: etc. cross-reference text. 70 | # add_function_parentheses = True 71 | 72 | # If true, the current module name will be prepended to all description 73 | # unit titles (such as .. function::). 74 | # add_module_names = True 75 | 76 | # If true, sectionauthor and moduleauthor directives will be shown in the 77 | # output. They are ignored by default. 78 | # show_authors = False 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | 84 | # Options for HTML output 85 | # ----------------------- 86 | 87 | # The style sheet to use for HTML and HTML Help pages. A file of that name 88 | # must exist either in Sphinx' static/ path, or in one of the custom paths 89 | # given in html_static_path. 90 | html_style = 'default.css' 91 | 92 | # The name for this set of Sphinx documents. If None, it defaults to 93 | # " v documentation". 94 | # html_title = None 95 | 96 | # A shorter title for the navigation bar. Default is the same as html_title. 97 | # html_short_title = None 98 | 99 | # The name of an image file (within the static path) to place at the top of 100 | # the sidebar. 101 | # html_logo = None 102 | 103 | # The name of an image file (within the static path) to use as favicon of the 104 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 105 | # pixels large. 106 | # html_favicon = None 107 | 108 | # Add any paths that contain custom static files (such as style sheets) here, 109 | # relative to this directory. They are copied after the builtin static files, 110 | # so a file named "default.css" will overwrite the builtin "default.css". 111 | html_static_path = ['_static'] 112 | 113 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 114 | # using the given strftime format. 115 | html_last_updated_fmt = '%b %d, %Y' 116 | 117 | # If true, SmartyPants will be used to convert quotes and dashes to 118 | # typographically correct entities. 119 | # html_use_smartypants = True 120 | 121 | # Custom sidebar templates, maps document names to template names. 122 | # html_sidebars = {} 123 | 124 | # Additional templates that should be rendered to pages, maps page names to 125 | # template names. 126 | # html_additional_pages = {} 127 | 128 | # If false, no module index is generated. 129 | html_use_modindex = False 130 | 131 | # If false, no index is generated. 132 | # html_use_index = True 133 | 134 | # If true, the index is split into individual pages for each letter. 135 | # html_split_index = False 136 | 137 | # If true, the reST sources are included in the HTML build as _sources/. 138 | # html_copy_source = True 139 | 140 | # If true, an OpenSearch description file will be output, and all pages will 141 | # contain a tag referring to it. The value of this option must be the 142 | # base URL from which the finished HTML is served. 143 | # html_use_opensearch = '' 144 | 145 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 146 | html_file_suffix = '.html' 147 | 148 | # Output file base name for HTML help builder. 149 | htmlhelp_basename = 'dirtyjsondoc' 150 | 151 | 152 | # Options for LaTeX output 153 | # ------------------------ 154 | 155 | # The paper size ('letter' or 'a4'). 156 | # latex_paper_size = 'letter' 157 | 158 | # The font size ('10pt', '11pt' or '12pt'). 159 | # latex_font_size = '10pt' 160 | 161 | # Grouping the document tree into LaTeX files. List of tuples 162 | # (source start file, target name, title, author, document class [howto/manual]). 163 | latex_documents = [ 164 | ('index', 'dirtyjson.tex', 'dirtyjson Documentation', 165 | 'Scott Maxwell', 'manual'), 166 | ] 167 | 168 | # The name of an image file (relative to this directory) to place at the top of 169 | # the title page. 170 | # latex_logo = None 171 | 172 | # For "manual" documents, if this is true, then toplevel headings are parts, 173 | # not chapters. 174 | # latex_use_parts = False 175 | 176 | # Additional stuff for the LaTeX preamble. 177 | # latex_preamble = '' 178 | 179 | # Documents to append as an appendix to all manuals. 180 | # latex_appendices = [] 181 | 182 | # If false, no module index is generated. 183 | # latex_use_modindex = True 184 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | dirtyjson is dual-licensed software. It is available under the terms 2 | of the MIT license, or the Academic Free License version 2.1. The full 3 | text of each license agreement is included below. This code is also 4 | licensed to the Python Software Foundation (PSF) under a Contributor 5 | Agreement. 6 | 7 | MIT License 8 | =========== 9 | 10 | Copyright (c) 2022 Scott Maxwell 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | this software and associated documentation files (the "Software"), to deal in 14 | the Software without restriction, including without limitation the rights to 15 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 16 | of the Software, and to permit persons to whom the Software is furnished to do 17 | so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in all 20 | copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 28 | SOFTWARE. 29 | 30 | Academic Free License v. 2.1 31 | ============================ 32 | 33 | Copyright (c) 2022 Scott Maxwell. All rights reserved. 34 | 35 | This Academic Free License (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following notice immediately following the copyright notice for the Original Work: 36 | 37 | Licensed under the Academic Free License version 2.1 38 | 39 | 1) Grant of Copyright License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license to do the following: 40 | 41 | a) to reproduce the Original Work in copies; 42 | 43 | b) to prepare derivative works ("Derivative Works") based upon the Original Work; 44 | 45 | c) to distribute copies of the Original Work and Derivative Works to the public; 46 | 47 | d) to perform the Original Work publicly; and 48 | 49 | e) to display the Original Work publicly. 50 | 51 | 2) Grant of Patent License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, to make, use, sell and offer for sale the Original Work and Derivative Works. 52 | 53 | 3) Grant of Source Code License. The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor hereby agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work, and by publishing the address of that information repository in a notice immediately following the copyright notice that applies to the Original Work. 54 | 55 | 4) Exclusions From License Grant. Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior written permission of the Licensor. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor except as expressly stated herein. No patent license is granted to make, use, sell or offer to sell embodiments of any patent claims other than the licensed claims defined in Section 2. No right is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any Original Work that Licensor otherwise would have a right to license. 56 | 57 | 5) This section intentionally omitted. 58 | 59 | 6) Attribution Rights. You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work. 60 | 61 | 7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately proceeding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of NON-INFRINGEMENT, MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to Original Work is granted hereunder except under this disclaimer. 62 | 63 | 8) Limitation of Liability. Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to any person for any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to liability for death or personal injury resulting from Licensor's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You. 64 | 65 | 9) Acceptance and Termination. If You distribute copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. Nothing else but this License (or another written agreement between Licensor and You) grants You permission to create Derivative Works based upon the Original Work or to exercise any of the rights granted in Section 1 herein, and any attempt to do so except under the terms of this License (or another written agreement between Licensor and You) is expressly prohibited by U.S. copyright law, the equivalent laws of other countries, and by international treaty. Therefore, by exercising any of the rights granted to You in Section 1 herein, You indicate Your acceptance of this License and all of its terms and conditions. 66 | 67 | 10) Termination for Patent Action. This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware. 68 | 69 | 11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of the U.S. Copyright Act, 17 U.S.C. § 101 et seq., the equivalent laws of other countries, and international treaty. This section shall survive the termination of this License. 70 | 71 | 12) Attorneys Fees. In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License. 72 | 73 | 13) Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. 74 | 75 | 14) Definition of "You" in This License. "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 76 | 77 | 15) Right to Use. You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You. 78 | 79 | This license is Copyright (C) 2003-2004 Lawrence E. Rosen. All rights reserved. Permission is hereby granted to copy and distribute this license without modification. This license may not be modified without the express written permission of its copyright owner. 80 | -------------------------------------------------------------------------------- /simplejson.LICENSE.txt: -------------------------------------------------------------------------------- 1 | simplejson is dual-licensed software. It is available under the terms 2 | of the MIT license, or the Academic Free License version 2.1. The full 3 | text of each license agreement is included below. This code is also 4 | licensed to the Python Software Foundation (PSF) under a Contributor 5 | Agreement. 6 | 7 | MIT License 8 | =========== 9 | 10 | Copyright (c) 2006 Bob Ippolito 11 | 12 | Permission is hereby granted, free of charge, to any person obtaining a copy of 13 | this software and associated documentation files (the "Software"), to deal in 14 | the Software without restriction, including without limitation the rights to 15 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 16 | of the Software, and to permit persons to whom the Software is furnished to do 17 | so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be included in all 20 | copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 28 | SOFTWARE. 29 | 30 | Academic Free License v. 2.1 31 | ============================ 32 | 33 | Copyright (c) 2006 Bob Ippolito. All rights reserved. 34 | 35 | This Academic Free License (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following notice immediately following the copyright notice for the Original Work: 36 | 37 | Licensed under the Academic Free License version 2.1 38 | 39 | 1) Grant of Copyright License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license to do the following: 40 | 41 | a) to reproduce the Original Work in copies; 42 | 43 | b) to prepare derivative works ("Derivative Works") based upon the Original Work; 44 | 45 | c) to distribute copies of the Original Work and Derivative Works to the public; 46 | 47 | d) to perform the Original Work publicly; and 48 | 49 | e) to display the Original Work publicly. 50 | 51 | 2) Grant of Patent License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, to make, use, sell and offer for sale the Original Work and Derivative Works. 52 | 53 | 3) Grant of Source Code License. The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor hereby agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work, and by publishing the address of that information repository in a notice immediately following the copyright notice that applies to the Original Work. 54 | 55 | 4) Exclusions From License Grant. Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior written permission of the Licensor. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor except as expressly stated herein. No patent license is granted to make, use, sell or offer to sell embodiments of any patent claims other than the licensed claims defined in Section 2. No right is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any Original Work that Licensor otherwise would have a right to license. 56 | 57 | 5) This section intentionally omitted. 58 | 59 | 6) Attribution Rights. You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work. 60 | 61 | 7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately proceeding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of NON-INFRINGEMENT, MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to Original Work is granted hereunder except under this disclaimer. 62 | 63 | 8) Limitation of Liability. Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to any person for any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to liability for death or personal injury resulting from Licensor's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You. 64 | 65 | 9) Acceptance and Termination. If You distribute copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. Nothing else but this License (or another written agreement between Licensor and You) grants You permission to create Derivative Works based upon the Original Work or to exercise any of the rights granted in Section 1 herein, and any attempt to do so except under the terms of this License (or another written agreement between Licensor and You) is expressly prohibited by U.S. copyright law, the equivalent laws of other countries, and by international treaty. Therefore, by exercising any of the rights granted to You in Section 1 herein, You indicate Your acceptance of this License and all of its terms and conditions. 66 | 67 | 10) Termination for Patent Action. This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware. 68 | 69 | 11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of the U.S. Copyright Act, 17 U.S.C. § 101 et seq., the equivalent laws of other countries, and international treaty. This section shall survive the termination of this License. 70 | 71 | 12) Attorneys Fees. In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License. 72 | 73 | 13) Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. 74 | 75 | 14) Definition of "You" in This License. "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 76 | 77 | 15) Right to Use. You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You. 78 | 79 | This license is Copyright (C) 2003-2004 Lawrence E. Rosen. All rights reserved. Permission is hereby granted to copy and distribute this license without modification. This license may not be modified without the express written permission of its copyright owner. 80 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | `dirtyjson` --- JSON decoder 2 | ============================ 3 | 4 | .. dirtyjson 5 | Decode JSON data from dirty files. 6 | .. Scott Maxwell 7 | 8 | JSON (JavaScript Object Notation) is a subset of JavaScript 9 | syntax (ECMA-262 3rd edition) used as a lightweight data interchange format. 10 | 11 | `dirtyjson` is a JSON decoder meant for extracting JSON-type data from .js 12 | files. The returned data structure includes information about line and column 13 | numbers, so you can output more useful error messages. The input can also 14 | include single quotes, line comments, inline comments, dangling commas, 15 | unquoted single-word keys, and hexadecimal and octal numbers. 16 | 17 | The goal of `dirtyjson` is to read JSON objects out of files that are 18 | littered with elements that do not fit the official JSON standard. By providing 19 | line and column number contexts, a dirty JSON file can be used as source input 20 | for a complex data parser or compiler. 21 | 22 | `dirtyjson` exposes an API familiar to users of the standard library 23 | `marshal` and `pickle` modules. However, `dirtyjson` provides 24 | only the `load(s)` capability. To write JSON, use either the standard 25 | `json` library or `simplejson`. 26 | 27 | .. note:: 28 | 29 | The code for `dirtyjson` is a fairly drastically rewritten version 30 | of the loader in `simplejson` so thanks go to Bob Ippolito of the 31 | `simplejson` project for providing such a nice starting point. 32 | 33 | Development of dirtyjson happens on Github: 34 | https://github.com/codecobblers/dirtyjson 35 | 36 | Decoding JSON and getting position information:: 37 | 38 | >>> import dirtyjson 39 | >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] 40 | >>> d = dirtyjson.loads("""["foo", /* not fu*/ {bar: ['baz', null, 1.0, 2,]}] and then ignore this junk""") 41 | >>> d == obj 42 | True 43 | >>> pos = d.attributes(0) # line/column position of first element in array 44 | >>> pos.line == 1 45 | True 46 | >>> pos.column == 2 47 | True 48 | >>> pos = d[1].attributes('bar') # line/column position of 'bar' key/value pair 49 | >>> pos.key.line == 1 50 | True 51 | >>> pos.key.column == 22 52 | True 53 | >>> pos.value.line == 1 54 | True 55 | >>> pos.value.column == 27 56 | True 57 | 58 | Decoding unicode from JSON:: 59 | 60 | >>> dirtyjson.loads('"\\"foo\\bar"') == u'"foo\x08ar' 61 | True 62 | 63 | Decoding JSON from streams:: 64 | 65 | >>> from dirtyjson.compat import StringIO 66 | >>> io = StringIO('["streaming API"]') 67 | >>> dirtyjson.load(io)[0] == 'streaming API' 68 | True 69 | 70 | Using Decimal instead of float:: 71 | 72 | >>> import dirtyjson 73 | >>> from decimal import Decimal 74 | >>> dirtyjson.loads('1.1', parse_float=Decimal) == Decimal('1.1') 75 | True 76 | 77 | 78 | Basic Usage 79 | ----------- 80 | 81 | load(fp[, encoding[, parse_float[, parse_int[, parse_constant[, search_for_first_object]]]]]) 82 | 83 | Performs the following translations in decoding by default: 84 | 85 | +---------------+-------------------------+ 86 | | JSON | Python | 87 | +===============+=========================+ 88 | | object | `AttributedDict` | 89 | +---------------+-------------------------+ 90 | | array | `AttributedList` | 91 | +---------------+-------------------------+ 92 | | string | unicode | 93 | +---------------+-------------------------+ 94 | | number (int) | int, long | 95 | +---------------+-------------------------+ 96 | | number (real) | float | 97 | +---------------+-------------------------+ 98 | | true | True | 99 | +---------------+-------------------------+ 100 | | false | False | 101 | +---------------+-------------------------+ 102 | | null | None | 103 | +---------------+-------------------------+ 104 | 105 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as their 106 | corresponding ``float`` values, which is outside the JSON spec. 107 | 108 | Deserialize *fp* (a ``.read()``-supporting file-like object containing a JSON 109 | document) to a Python object. `dirtyjson.Error` will be 110 | raised if the given document is not valid. 111 | 112 | If the contents of *fp* are encoded with an ASCII based encoding other than 113 | UTF-8 (e.g. latin-1), then an appropriate *encoding* name must be specified. 114 | Encodings that are not ASCII based (such as UCS-2) are not allowed, and 115 | should be wrapped with ``codecs.getreader(fp)(encoding)``, or simply decoded 116 | to a `unicode` object and passed to `loads`. The default 117 | setting of ``'utf-8'`` is fastest and should be using whenever possible. 118 | 119 | If *fp.read()* returns `str` then decoded JSON strings that contain 120 | only ASCII characters may be parsed as `str` for performance and 121 | memory reasons. If your code expects only `unicode` the appropriate 122 | solution is to wrap fp with a reader as demonstrated above. 123 | 124 | *parse_float*, if specified, will be called with the string of every JSON 125 | float to be decoded. By default, this is equivalent to ``float(num_str)``. 126 | This can be used to use another datatype or parser for JSON floats 127 | (e.g. `decimal.Decimal`). 128 | 129 | *parse_int*, if specified, will be called with the int of the string of every 130 | JSON int to be decoded. By default, this is equivalent to ``int(num_str)``. 131 | This can be used to use another datatype or parser for JSON integers 132 | (e.g. `float`). 133 | 134 | .. note:: 135 | 136 | Unlike the standard `json` module, `dirtyjson` always does 137 | ``int(num_str, 0)`` before passing through to the converter passed is as 138 | the *parse_int* parameter. This is to enable automatic handling of hex 139 | and octal numbers. 140 | 141 | *parse_constant*, if specified, will be called with one of the following 142 | strings: ``true``, ``false``, ``null``, ``'-Infinity'``, ``'Infinity'``, 143 | ``'NaN'``. This can be used to raise an exception if invalid JSON numbers are 144 | encountered or to provide alternate values for any of these constants. 145 | 146 | *search_for_first_object*, if ``True``, will cause the parser to search for 147 | the first occurrence of either ``{`` or ``[``. This is very useful for 148 | reading an object from a JavaScript file. 149 | 150 | loads(s[, encoding[, parse_float[, parse_int[, parse_constant[, search_for_first_object[, start_index]]]]]) 151 | 152 | Deserialize *s* (a `str` or `unicode` instance containing a JSON 153 | document) to a Python object. `dirtyjson.Error` will be 154 | raised if the given JSON document is not valid. 155 | 156 | If *s* is a `str` instance and is encoded with an ASCII based encoding 157 | other than UTF-8 (e.g. latin-1), then an appropriate *encoding* name must be 158 | specified. Encodings that are not ASCII based (such as UCS-2) are not 159 | allowed and should be decoded to `unicode` first. 160 | 161 | If *s* is a `str` then decoded JSON strings that contain 162 | only ASCII characters may be parsed as `str` for performance and 163 | memory reasons. If your code expects only `unicode` the appropriate 164 | solution is decode *s* to `unicode` prior to calling loads. 165 | 166 | *start_index*, if non-zero, will cause the parser to start processing from 167 | the specified offset, while maintaining the correct line and column numbers. 168 | This is very useful for reading an object from the middle of a JavaScript 169 | file. 170 | 171 | The other arguments have the same meaning as in `load`. 172 | 173 | Exceptions 174 | ---------- 175 | 176 | dirtyjson.Error(msg, doc, pos) 177 | 178 | Subclass of `ValueError` with the following additional attributes: 179 | 180 | msg 181 | 182 | The unformatted error message 183 | 184 | doc 185 | 186 | The JSON document being parsed 187 | 188 | pos 189 | 190 | The start index of doc where parsing failed 191 | 192 | lineno 193 | 194 | The line corresponding to pos 195 | 196 | colno 197 | 198 | The column corresponding to pos 199 | 200 | AttributedDict and AttributedList 201 | --------------------------------- 202 | 203 | The `dirtyjson` module uses `AttributedDict` and 204 | `AttributedList` instead of ``dict`` and ``list``. Each is actually a 205 | subclass of its base type (``dict`` or ``list``) and can be used as if they were 206 | the standard class, but these have been enhanced to store attributes with each 207 | element. We use those attributes to store line and column numbers. You can use 208 | that information to refer users back to the exact location in the original 209 | source file. 210 | 211 | Position() 212 | 213 | This is a very simple utility class that contains ``line`` and ``column``. 214 | It is used for storing the position attributes for `AttributedList` 215 | and `KeyValuePosition` 216 | 217 | KeyValuePosition() 218 | 219 | This is another very simple utility class that contains ``key`` and 220 | ``value``. Each of those is a `Position` object specifying the 221 | location in the original source string/file of the key and value. It is used 222 | for storing the position attributes for `AttributedDict`. 223 | 224 | AttributedDict() 225 | 226 | A subclass of ``dict`` that behaves exactly like a ``dict`` except that it 227 | maintains order like an ``OrderedDict`` and allows storing attributes for 228 | each key/value pair. 229 | 230 | add_with_attributes(self, key, value, attributes) 231 | 232 | Set the *key* in the underlying ``dict`` to the *value* and also store 233 | whatever is passed in as *attributes* for later retrieval. In our case, 234 | we store `KeyValuePosition`. 235 | 236 | attributes(self, key) 237 | 238 | Return the attributes associated with the specified *key* or ``None`` if 239 | no attributes exist for the key. In our case, we store 240 | `KeyValuePosition`. Retrieve position info like this:: 241 | 242 | pos = d.attributes(key) 243 | key_line = pos.key.line 244 | key_column = pos.key.column 245 | value_line = pos.value.line 246 | value_column = pos.value.column 247 | 248 | AttributedList() 249 | 250 | A subclass of ``list`` that behaves exactly like a ``list`` except that it 251 | allows storing attributes for each value. 252 | 253 | append(self, value, attributes=None): 254 | 255 | Appends *value* to the list and *attributes* to the associated location. 256 | In our case, we store `Position`. 257 | 258 | attributes(self, index) 259 | 260 | Returns the attributes for the value at the given *index*. In our case, 261 | we store `Position`. Retrieve position info like this:: 262 | 263 | pos = l.attributes(index) 264 | value_line = pos.line 265 | value_column = pos.column 266 | 267 | .. note:: 268 | 269 | This class is *NOT* robust. If you insert or delete items, the attributes 270 | will get out of sync. Making this a non-naive class would be a nice 271 | enhancement. 272 | -------------------------------------------------------------------------------- /dirtyjson/loader.py: -------------------------------------------------------------------------------- 1 | """Implementation of JSONDecoder 2 | """ 3 | from __future__ import absolute_import 4 | import re 5 | import sys 6 | import struct 7 | from .compat import fromhex, u, text_type, binary_type, PY2, unichr, ascii 8 | from dirtyjson.attributed_containers import AttributedDict, AttributedList 9 | from .error import Error 10 | 11 | 12 | def _floatconstants(): 13 | _BYTES = fromhex('7FF80000000000007FF0000000000000') 14 | # The struct module in Python 2.4 would get frexp() out of range here 15 | # when an endian is specified in the format string. Fixed in Python 2.5+ 16 | if sys.byteorder != 'big': 17 | _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] 18 | nan, inf = struct.unpack('dd', _BYTES) 19 | return nan, inf, -inf 20 | 21 | NaN, PosInf, NegInf = _floatconstants() 22 | 23 | _CONSTANTS = { 24 | 'null': None, 25 | 'true': True, 26 | 'false': False, 27 | '-Infinity': NegInf, 28 | 'Infinity': PosInf, 29 | 'NaN': NaN, 30 | } 31 | 32 | CONSTANT_RE = re.compile('(%s)' % '|'.join(_CONSTANTS)) 33 | NUMBER_RE = re.compile(r'(-?(?:0x[\da-fA-F]+|\d+))(\.\d+)?([eE][-+]?\d+)?') 34 | EQUATION_RE = re.compile(r'[0-9.+\-]*[()[0-9+\-*/eEx&|]+') 35 | STRINGCHUNK_DOUBLEQUOTE = re.compile(r'(.*?)(["\\\x00-\x1f])') 36 | STRINGCHUNK_SINGLEQUOTE = re.compile(r"(.*?)(['\\\x00-\x1f])") 37 | UNQUOTED_KEYNAME = re.compile(r"([\w_$]+[\w\d_$]*)") 38 | WHITESPACE_STR = ' \t\n\r' 39 | WHITESPACE = re.compile('[%s]*' % WHITESPACE_STR, re.VERBOSE | re.MULTILINE | re.DOTALL) 40 | 41 | BACKSLASH = { 42 | '"': u('"'), '\'': u('\''), '\\': u('\u005c'), '/': u('/'), 43 | 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), 44 | } 45 | DEFAULT_ENCODING = "utf-8" 46 | 47 | 48 | class Position(object): 49 | def __init__(self, line, column): 50 | self.line = line 51 | self.column = column 52 | 53 | def __lt__(self, other): 54 | if self.line > other.line: 55 | return False 56 | return self.line < other.line or self.column < other.column 57 | 58 | 59 | class KeyValuePosition(object): 60 | def __init__(self, key_position, value_position): 61 | self.key = key_position 62 | self.value = value_position 63 | 64 | 65 | class DirtyJSONLoader(object): 66 | """JSON decoder that can handle muck in the file 67 | 68 | Performs the following translations in decoding by default: 69 | 70 | +---------------+-------------------+ 71 | | JSON | Python | 72 | +===============+===================+ 73 | | object | AttributedDict | 74 | +---------------+-------------------+ 75 | | array | list | 76 | +---------------+-------------------+ 77 | | string | unicode | 78 | +---------------+-------------------+ 79 | | number (int) | int, long | 80 | +---------------+-------------------+ 81 | | number (real) | float | 82 | +---------------+-------------------+ 83 | | true | True | 84 | +---------------+-------------------+ 85 | | false | False | 86 | +---------------+-------------------+ 87 | | null | None | 88 | +---------------+-------------------+ 89 | 90 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 91 | their corresponding ``float`` values, which is outside the JSON spec. 92 | 93 | """ 94 | 95 | def __init__(self, content, encoding=None, parse_float=None, parse_int=None, 96 | parse_constant=None): 97 | self.encoding = encoding or DEFAULT_ENCODING 98 | self.parse_float = parse_float or float 99 | self.parse_int = parse_int or int 100 | self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 101 | self.memo = {} 102 | if not PY2 and isinstance(content, binary_type): 103 | self.content = content.decode() 104 | else: 105 | self.content = content 106 | if self.encoding != DEFAULT_ENCODING: 107 | fixed = self.content.encode(self.encoding, 'ignore').decode() 108 | if self.content != fixed: 109 | for index, character in enumerate(self.content): 110 | if character != fixed[index]: 111 | raise Error('Non-{} character {}'.format(self.encoding, ascii(character)), self.content, index) 112 | self.end = len(self.content) 113 | self.lineno = 1 114 | self.current_line_pos = 0 115 | self.pos = 0 116 | self.expecting = 'Expecting value' 117 | 118 | def _next_character(self): 119 | try: 120 | nextchar = self.content[self.pos] 121 | self.pos += 1 122 | return nextchar 123 | except IndexError: 124 | raise Error(self.expecting, self.content, self.pos) 125 | 126 | def _next_character_after_whitespace(self): 127 | try: 128 | nextchar = self.content[self.pos] 129 | if nextchar in WHITESPACE_STR: 130 | self._skip_whitespace() 131 | nextchar = self.content[self.pos] 132 | self.pos += 1 133 | return nextchar 134 | except IndexError: 135 | return '' 136 | 137 | def _skip_whitespace(self): 138 | while True: 139 | self._skip_forward_to(WHITESPACE.match(self.content, self.pos).end()) 140 | if self.pos > self.end - 2: 141 | break 142 | two_chars = self.content[self.pos:self.pos + 2] 143 | if two_chars == '//' or two_chars == '/*': 144 | terminator = '\n' if two_chars == '//' else '*/' 145 | lf = self.content.index(terminator, self.pos) 146 | if lf >= 0: 147 | self._skip_forward_to(lf + len(terminator)) 148 | else: 149 | self._skip_forward_to(self.end) 150 | break 151 | else: 152 | break 153 | 154 | def _skip_forward_to(self, end): 155 | if end != self.pos: 156 | linefeeds = self.content.count('\n', self.pos, end) 157 | if linefeeds: 158 | self.lineno += linefeeds 159 | rpos = self.content.rfind('\n', self.pos, end) 160 | self.current_line_pos = rpos + 1 161 | self.pos = end 162 | 163 | def _current_position(self, offset=0): 164 | return Position(self.lineno, self.pos - self.current_line_pos + 1 + offset) 165 | 166 | def scan(self): 167 | self.expecting = 'Expecting value' 168 | nextchar = self._next_character() 169 | 170 | if nextchar == '"' or nextchar == "'": 171 | return self.parse_string(nextchar) 172 | if nextchar == '{': 173 | return self.parse_object() 174 | if nextchar == '[': 175 | return self.parse_array() 176 | 177 | self.pos -= 1 178 | m = CONSTANT_RE.match(self.content, self.pos) 179 | if m: 180 | self.pos = m.end() 181 | return self.parse_constant(m.groups()[0]) 182 | 183 | m = NUMBER_RE.match(self.content, self.pos) 184 | if m and (m.end() == len(self.content) or self.content[m.end()] not in '+-/*()'): 185 | integer, frac, exp = m.groups() 186 | if frac or exp: 187 | res = self.parse_float(integer + (frac or '') + (exp or '')) 188 | else: 189 | try: 190 | res = self.parse_int(int(integer, 0)) 191 | except ValueError: 192 | if integer[0] == '0': 193 | integer = '0o' + integer[1:] 194 | res = self.parse_int(int(integer, 0)) 195 | else: 196 | raise 197 | self.pos = m.end() 198 | return res 199 | m = EQUATION_RE.match(self.content, self.pos) 200 | if m: 201 | try: 202 | res = eval(m.string[m.pos:m.end()]) 203 | except (SyntaxError, NameError): 204 | raise Error('Cannot evaluate expression', self.content, self.pos) 205 | self.pos = m.end() 206 | return res 207 | raise Error(self.expecting, self.content, self.pos) 208 | 209 | def parse_string(self, terminating_character, 210 | _b=BACKSLASH, _join=u('').join, 211 | _py2=PY2, _maxunicode=sys.maxunicode): 212 | """Scan the string for a JSON string. End is the index of the 213 | character in string after the quote that started the JSON string. 214 | Unescapes all valid JSON string escape sequences and raises ValueError 215 | on attempt to decode an invalid string. 216 | 217 | Returns a tuple of the decoded string and the index of the character in 218 | string after the end quote.""" 219 | _m = STRINGCHUNK_DOUBLEQUOTE.match if terminating_character == '"' else STRINGCHUNK_SINGLEQUOTE.match 220 | chunks = [] 221 | _append = chunks.append 222 | begin = self.pos - 1 223 | while 1: 224 | chunk = _m(self.content, self.pos) 225 | if chunk is None: 226 | raise Error( 227 | "Unterminated string starting at", self.content, begin) 228 | self.pos = chunk.end() 229 | content, terminator = chunk.groups() 230 | # Content is contains zero or more unescaped string characters 231 | if content: 232 | if _py2 and not isinstance(content, text_type): 233 | content = text_type(content, self.encoding) 234 | _append(content) 235 | # Terminator is the end of string, a literal control character, 236 | # or a backslash denoting that an escape sequence follows 237 | if terminator == terminating_character: 238 | break 239 | elif terminator != '\\': 240 | _append(terminator) 241 | continue 242 | try: 243 | esc = self.content[self.pos] 244 | except IndexError: 245 | raise Error( 246 | "Unterminated string starting at", self.content, begin) 247 | # If not a unicode escape sequence, must be in the lookup table 248 | if esc != 'u': 249 | try: 250 | char = _b[esc] 251 | except KeyError: 252 | msg = "Invalid \\X escape sequence %r" 253 | raise Error(msg, self.content, self.pos) 254 | self.pos += 1 255 | else: 256 | # Unicode escape sequence 257 | msg = "Invalid \\uXXXX escape sequence" 258 | esc = self.content[self.pos + 1:self.pos + 5] 259 | esc_x = esc[1:2] 260 | if len(esc) != 4 or esc_x == 'x' or esc_x == 'X': 261 | raise Error(msg, self.content, self.pos - 1) 262 | try: 263 | uni = int(esc, 16) 264 | except ValueError: 265 | raise Error(msg, self.content, self.pos - 1) 266 | self.pos += 5 267 | # Check for surrogate pair on UCS-4 systems 268 | # Note that this will join high/low surrogate pairs 269 | # but will also pass unpaired surrogates through 270 | if _maxunicode > 65535 and uni & 0xfc00 == 0xd800 and self.content[self.pos:self.pos + 2] == '\\u': 271 | esc2 = self.content[self.pos + 2:self.pos + 6] 272 | esc_x = esc2[1:2] 273 | if len(esc2) == 4 and not (esc_x == 'x' or esc_x == 'X'): 274 | try: 275 | uni2 = int(esc2, 16) 276 | except ValueError: 277 | raise Error(msg, self.content, self.pos) 278 | if uni2 & 0xfc00 == 0xdc00: 279 | uni = 0x10000 + (((uni - 0xd800) << 10) | 280 | (uni2 - 0xdc00)) 281 | self.pos += 6 282 | char = unichr(uni) 283 | # Append the unescaped character 284 | _append(char) 285 | return _join(chunks) 286 | 287 | def parse_object(self): 288 | # Backwards compatibility 289 | memo_get = self.memo.setdefault 290 | obj = AttributedDict() 291 | # Use a slice to prevent IndexError from being raised, the following 292 | # check will raise a more specific ValueError if the string is empty 293 | nextchar = self._next_character_after_whitespace() 294 | # Trivial empty object 295 | while True: 296 | if nextchar == '}': 297 | break 298 | key_pos = self._current_position(-len(nextchar)) 299 | if nextchar == '"' or nextchar == "'": 300 | key = self.parse_string(nextchar) 301 | else: 302 | chunk = UNQUOTED_KEYNAME.match(self.content, self.pos - 1) 303 | if chunk is None: 304 | raise Error( 305 | "Expecting property name", 306 | self.content, self.pos) 307 | self.pos = chunk.end() 308 | key = chunk.groups()[0] 309 | key = memo_get(key, key) 310 | 311 | # To skip some function call overhead we optimize the fast paths where 312 | # the JSON key separator is ": " or just ":". 313 | if self._next_character_after_whitespace() != ':': 314 | raise Error("Expecting ':' delimiter", self.content, self.pos) 315 | 316 | self._skip_whitespace() 317 | key_value_pos = KeyValuePosition(key_pos, self._current_position()) 318 | value = self.scan() 319 | obj.add_with_attributes(key, value, key_value_pos) 320 | 321 | nextchar = self._next_character_after_whitespace() 322 | if nextchar == '}': 323 | break 324 | elif nextchar != ',': 325 | raise Error("Expecting ',' delimiter or '}'", self.content, self.pos - len(nextchar)) 326 | 327 | nextchar = self._next_character_after_whitespace() 328 | 329 | return obj 330 | 331 | def parse_array(self): 332 | values = AttributedList() 333 | nextchar = self._next_character_after_whitespace() 334 | # Look-ahead for trivial empty array 335 | if nextchar == ']': 336 | return values 337 | elif nextchar == '': 338 | raise Error("Expecting value or ']'", self.content, self.pos) 339 | while True: 340 | if nextchar == ']': 341 | break 342 | self.pos -= len(nextchar) 343 | value_pos = self._current_position() 344 | value = self.scan() 345 | values.append(value, value_pos) 346 | nextchar = self._next_character_after_whitespace() 347 | if nextchar == ']': 348 | break 349 | elif nextchar != ',': 350 | raise Error("Expecting ',' delimiter or ']'", self.content, self.pos - len(nextchar)) 351 | 352 | nextchar = self._next_character_after_whitespace() 353 | 354 | return values 355 | 356 | def decode(self, search_for_first_object=False, start_index=0): 357 | """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 358 | instance containing a JSON document) 359 | """ 360 | if start_index: 361 | self._skip_forward_to(start_index) 362 | 363 | if search_for_first_object: 364 | i = self.content.find('[', self.pos) 365 | o = self.content.find('{', self.pos) 366 | if i > o >= self.pos or i < 0: 367 | i = o 368 | if i >= self.pos: 369 | self._skip_forward_to(i) 370 | 371 | self._skip_whitespace() 372 | obj = self.scan() 373 | return obj 374 | --------------------------------------------------------------------------------