├── upload.bat ├── json_compare ├── __init__.py ├── json_compare.pyi ├── test_json_compare.py └── json_compare.py ├── setup.py ├── LICENSE ├── .gitignore └── README.md /upload.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | python setup.py sdist upload -r pypi 3 | pause -------------------------------------------------------------------------------- /json_compare/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # author: Rainy Chan mail to: rainydew@qq.com 3 | from .json_compare import compare, check 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | from distutils.core import setup 4 | from os import path, chdir, system 5 | from sys import argv 6 | 7 | if "upload" in argv: 8 | chdir("json_compare") 9 | print("running test") 10 | assert system("python test_json_compare.py") == 0 11 | chdir("..") 12 | 13 | this_directory = path.abspath(path.dirname(__file__)) 14 | 15 | try: 16 | import pypandoc 17 | long_description = pypandoc.convert('README.md', 'rst') 18 | except: 19 | long_description = "" 20 | 21 | setup( 22 | name='json-compare-deep', 23 | version='2.0', 24 | description='A recursive json comparison library that handles list orders and fuzzy types', 25 | author='Rainy Chan', 26 | author_email='rainydew@qq.com', 27 | url='https://github.com/rainydew/jsoncomparedeep', 28 | packages=['json_compare'], 29 | install_requires=['six>=1.12.0'], 30 | keywords='json comparison order unicode fuzzy', 31 | long_description=long_description, 32 | python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 33 | ) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Rainy Chan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # Jetbrains 133 | .idea 134 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # json_compare 2 | 3 | A library to compare any json string/bytes/json-like-objects. 4 | 5 | Version 2.0 is a rewrite to support more clear and easy-to-use functions(backward incompatible, so I started a new repo). 6 | 7 | Version 1.20 is an enhance version that supports *omit_path* to ignore omitted keys in dict, and fixes many bugs, esp counting message inaccurate issue when same length & element collections; and wrong not_found info when different length under *ignore_list_seq*=**True** 8 | 9 | Version 1.19 is an enhance version that fixes small bugs with strict_number_type supported to make 1 != 1.0. 10 | 11 | Version 1.18 is a quickfix + enhance version that fixes custom handlers cannot be passed recursively bug, and *float_fuzzy_digits* can be passed directly. 12 | 13 | Version 1.17 is an enhance version that supports custom handlers to handle outputs. 14 | 15 | Version 1.16 is a quickfix version that supports Python 3.8 (and 3.9 as well). 16 | 17 | ## Features 18 | 19 | * Compare jsons and print the differences (what and where they are, recursion supported). Useful for interface testing. 20 | * Config whether it will ignore the order of items in a list or not, recursively. 21 | * Both python 2.6-2.7 and 3.5-3.9 supported. (New) 22 | * Regular expressions supported for string to skip unconcerned keys or just to assert the format. 23 | * Compact **str** and **unicode** (or **bytes** and **str** in python3), they are considered equal. Good for non ascii coding languages. 24 | * Both **json string** (**unicode** or **binary** str) and **json object** (**dict**, **list** or **tuple**) are supported. 25 | * Support tuples, so results from pymysql.cursors.DictCursor can compare with interface response directly. 26 | * Json type legal check(strict_json). 27 | * Support skipping anywhere using argument like *ignore_path=["/a/1/k", "/a/1/l"]*, dict keys or list indexes. Skipped fields are regarded as match. 28 | * The ignore_path list now support regular expressions too. You can use *[r"^(/\d+/a)"]* as ignore_path to skip all keys named "a" in *[{"a": 1, "b": 2}, {"a": 1, "b": 4}]* but still compare the value of "b". (New) 29 | * Useful when compare multi records in database query result (dictionary cursor) with some fields unconcerned. 30 | * Fuzzy equal when handling floats. 31 | * Custom handlers supported. 32 | * Strict_number_type option to make int(1) != float(1.0) supported. 33 | * Emit keys in dict compare supported. (New) 34 | 35 | ## QuickStart 36 | 37 | install 38 | 39 | ```shell 40 | pip install jsoncomparedeep 41 | ``` 42 | 43 | or update 44 | 45 | ```shell 46 | pip install -U jsoncomparedeep 47 | ``` 48 | 49 | a simple example 50 | 51 | ```python 52 | from json_compare import compare 53 | print(compare({"key1":["v1","v2"],"key2":{"key3":1}},{"key1":["v2","v1"],"key2":{"key3":2}})) 54 | ``` 55 | 56 | to see 57 | 58 | ``` 59 | a is {'key1': ['v1', 'v2'], 'key2': {'key3': 1}} 60 | b is {'key1': ['v2', 'v1'], 'key2': {'key3': 2}} 61 | ignore_list_seq = True, re_compare = True, ignore_path = None, omit_path = None, float_fuzzy_digits = 0 62 | different value at /key2/key3 63 | a: 1 64 | b: 2 65 | False 66 | ``` 67 | 68 | For more demos and information, just install it and visit the test file **test_json_compare.py** in **Your_Python_Path/Lib/site-packages/json_compare/** 69 | 70 | ## Small Hints 71 | 72 | * Datetime in SQL result is not JSON serializable type, use something like **CAST(create_time as CHAR) 'create_time'** in SQL statement to solve it. 73 | 74 | ## Bug report 75 | 76 | * Issues and bugs report to rainydew@qq.com. 77 | * Homepage icon leads to my Github project page, issues / PRs / stars are welcomed :) 78 | -------------------------------------------------------------------------------- /json_compare/json_compare.pyi: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # author: Rainy Chan mail to: rainydew@qq.com 3 | 4 | def compare(a, b, ignore_list_seq=True, re_compare=True, ignore_path=None, callback=print, strict_json=False, 5 | float_fuzzy_digits=0, strict_number_type=False, omit_path=None): 6 | """this function returns whether a is matched with b 7 | :param str or unicode or list or tuple or dict a: The first json string/json-like object to compare 8 | :param str or unicode or list or tuple or dict b: The second one to be compared 9 | :param bool ignore_list_seq: Set True to ignore the order when comparing arrays(lists), recursively 10 | :param bool re_compare: Set True to enable regular expressions for assertion. The pattern MUST contains ONE 11 | bracket, start with ^ or end with $, otherwise it won't be considered as an re-pattern. You can use ^.*?(sth) or 12 | ().*$ or so on to extract something from middle of the string. ^(.*)$ can just match any string, make this item 13 | ignored. Comparing two re-patterns makes no sense so it isn't allowed 14 | :param list[str or unicode] or None ignore_path: a list of element-path to be ignored when comparing value. e.g. 15 | ["/key1/key2", "/key3/1"] means all "ignored" in {"key1":{"key2":"ignored"},"key3":["not ignored","ignored"]} 16 | :param function callback: A one-arg function to hold the difference, default to `print` 17 | :param bool strict_json: Set True to ensure that all dict/list objects are JSON serializable. You may set it to 18 | False to make some special types comparable, e.g. Decimal, bytes and struct_time, useful for db assertion. 19 | BEAWARE !!! Bytes-like str (str in python2) is not supported. Since you should use json.dumps(u"hello") instead 20 | of json.dumps("hello") It may raise UnicodeDecodeError if there are Chinese characters or so on. 21 | :param int float_fuzzy_digits: 0(default) means disable. Set it to N means we consider number a == b if abs(a-b) 22 | < 10**(-N) 23 | :param bool strict_number_type: False(default) means allow 1(int)==1.0(float). Set True to ensure type equality 24 | :param list[str or unicode] or None omit_path: a list of element-paths to be ignored even if they are absent. 25 | ["/key1/key2"] means {"key1":{"key2":"ignored"}} can match {"key1":{}}. the last path-segament MUSTBE a map key, 26 | you must use * to shadow list index and it doesn't support regular expression. e.g. /key1/*/key2 27 | :return bool: Whether two json string or json-like objects are equal. If not, print the differences 28 | """ 29 | return True 30 | 31 | def check(a, b, ignore_list_seq=True, re_compare=True, ignore_path=None, callback=print, strict_json=False, 32 | float_fuzzy_digits=0, strict_number_type=False, omit_path=None): 33 | """this function raise an AssertionError when matching failed 34 | :param str or unicode or list or tuple or dict a: The first json string/json-like object to compare 35 | :param str or unicode or list or tuple or dict b: The second one to be compared 36 | :param bool ignore_list_seq: Set True to ignore the order when comparing arrays(lists), recursively 37 | :param bool re_compare: Set True to enable regular expressions for assertion. The pattern MUST contains ONE 38 | bracket, start with ^ or end with $, otherwise it won't be considered as an re-pattern. You can use ^.*?(sth) or 39 | ().*$ or so on to extract something from middle of the string. ^(.*)$ can just match any string, make this item 40 | ignored. Comparing two re-patterns makes no sense so it isn't allowed 41 | :param list[str or unicode] or None ignore_path: a list of element-path to be ignored when comparing value. e.g. 42 | ["/key1/key2", "/key3/1"] means all "ignored" in {"key1":{"key2":"ignored"},"key3":["not ignored","ignored"]} 43 | :param function callback: A one-arg function to hold the difference, default to `print` 44 | :param bool strict_json: Set True to ensure that all dict/list objects are JSON serializable. You may set it to 45 | False to make some special types comparable, e.g. Decimal, bytes and struct_time, useful for db assertion. 46 | BEAWARE !!! Bytes-like str (str in python2) is not supported. Since you should use json.dumps(u"hello") instead 47 | of json.dumps("hello") It may raise UnicodeDecodeError if there are Chinese characters or so on. 48 | :param int float_fuzzy_digits: 0(default) means disable. Set it to N means we consider number a == b if abs(a-b) 49 | < 10**(-N) 50 | :param bool strict_number_type: False(default) means allow 1(int)==1.0(float). Set True to ensure type equality 51 | :param list[str or unicode] or None omit_path: a list of element-paths to be ignored even if they are absent. 52 | ["/key1/key2"] means {"key1":{"key2":"ignored"}} can match {"key1":{}}. the last path-segament MUSTBE a map key, 53 | you must use * to shadow list index and it doesn't support regular expression. e.g. /key1/*/key2 54 | """ 55 | return True 56 | -------------------------------------------------------------------------------- /json_compare/test_json_compare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | from json_compare import compare, check 4 | import six 5 | 6 | 7 | def long_line(): 8 | print("-" * 120) 9 | 10 | 11 | def run_tests(): 12 | a = {"姓名": "王大锤"} # str and unicode (or bytes and str in python3) are compatible, useful in Chinese words... 13 | b = {u"姓名": u"王大锤"} if six.PY2 else {"姓名".encode("utf-8"): "王大锤".encode("utf-8")} 14 | check(a, b) 15 | 16 | long_line() 17 | 18 | a = [[1, 2, 3], [4, 5, 6]] 19 | b = ([6, 5, 4], [3, 2, 1]) # tuples (useful in pymysql & DictCursor) and different order of arrays are supported 20 | check(a, b) 21 | 22 | long_line() 23 | 24 | a = [[1, 2, 3], [4, 5, 6]] 25 | b = [[3, 2, 1], [6, 5, 4]] # ignore_list_seq=False makes these two different, however 26 | assert not compare(a, b, ignore_list_seq=False) 27 | 28 | long_line() 29 | 30 | a = {"a": 1, "b": 3, "c": False, "d": "ok"} 31 | b = {"a": 1, "b": 2, "c": "False", "e": "ok"} # False != "False" 32 | assert not compare(a, b) 33 | 34 | long_line() 35 | 36 | a = {"a": [1, {"k": ["ok"]}]} 37 | b = {"a": [1, {"k": ["error"]}]} # ignoring list order, we aren't sure to pair {"k": ["ok"]} with {"k": ["error"]} 38 | assert not compare(a, b) 39 | 40 | long_line() 41 | 42 | assert not compare(a, b, ignore_list_seq=False) # however, if we consider the order, we can locate differences deeper 43 | 44 | long_line() 45 | 46 | a = {"a": [1, {"k": [0]}]} # we ignore this path now, test will pass. 47 | b = '{"a": [1, {"k": [1]}]}' # notice we can't specify path deeper in a list when ignore_list_seq is enabled 48 | check(a, b, ignore_list_seq=False, ignore_path=["/a/1/k"]) 49 | 50 | long_line() 51 | 52 | a = [{"a": 1, "b": 2}, {"a": 5, "b": 4}] # now we finally support regular expressions in ignore_path list 53 | b = [{"a": 3, "b": 2}, {"a": 6, "b": 4}] # in this case, only value of "b" concerned 54 | check(a, b, ignore_list_seq=False, ignore_path=[r"^(/\d+/a)"]) 55 | 56 | long_line() 57 | 58 | a = [{"a": 1, "b": 2}, {"a": 1, "b": 4}] # also useful under list_seq ignored 59 | b = [{"a": 2, "b": 4}, {"a": 2, "b": 2}] 60 | check(a, b, ignore_path=[r"^(/\d+/a)"]) 61 | 62 | long_line() 63 | 64 | a = [{"a": 1, "b": 3}, {"a": 1, "b": 4}] # this time, 3 and 2 cannot match 65 | b = [{"a": 2, "b": 4}, {"a": 2, "b": 2}] 66 | assert not compare(a, b, ignore_path=[r"^(/\d+/a)"]) 67 | 68 | long_line() 69 | 70 | a = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 4}] # this time, only different frequency found 71 | b = [{"a": 6, "b": 4}, {"a": 7, "b": 2}, {"a": 8, "b": 2}] # but it will choose a random value of "a" to display 72 | assert not compare(a, b, ignore_path=[r"^(/\d+/a)"]) # it's caused by logic restriction, don't get confused 73 | 74 | long_line() 75 | 76 | a = {"a": [1, {"k": [0], "l": None}, 2]} # ignore two paths this time, only difference at /a/2 will be shown 77 | b = {"a": [1, {"k": [1], "l": False}, 3]} 78 | assert not compare(a, b, ignore_list_seq=False, ignore_path=["/a/1/k", "/a/1/l"]) 79 | 80 | long_line() 81 | 82 | a = '{"rtn": 0, "msg": "ok"}' # can compare json string with python dict/list objects 83 | b = {"rtn": 1, "msg": "username not exist"} 84 | assert not compare(a, b) 85 | 86 | long_line() 87 | 88 | a = u'{"body":{"text":"你好"}}' # both text and binary json strings are supported 89 | b = '{"body":{"text":"你好啊"}}' 90 | assert not compare(a, b) 91 | 92 | long_line() 93 | 94 | a = [1, 2, 2] # even we ignore the order, the frequency of elements are concerned 95 | b = [1, 1, 2] 96 | assert not compare(a, b) 97 | 98 | long_line() 99 | 100 | a = [1, 2, 3] 101 | b = [1, 3, 4, 5] # even if the length of lists are not equal, we can still know the difference 102 | assert not compare(a, b) 103 | 104 | long_line() 105 | 106 | a = [1, 2, 3] 107 | b = [1, 3, 4, 5] # but we CANNOT keep the order of elements under different length even if ignore_list_seq is False 108 | assert not compare(a, b, ignore_list_seq=False) 109 | 110 | long_line() 111 | 112 | a = [1.0] # in face cp.compare(1, 1.0) is allowed, however non-standard jsons are not recommend 113 | b = [1 if six.PY3 else eval("1L")] # Integers and floats are compatible, including long of python 2 114 | check(a, b) 115 | 116 | long_line() 117 | 118 | a = [r"^(.*)$"] # re-comparing enabled as default. Be careful bare r"^(.*)$" without list is considered as json-str 119 | b = ["anything"] # use this to skip any unconcerned fields 120 | check(a, b, ignore_list_seq=False) 121 | 122 | long_line() 123 | 124 | a = [r"(.*)"] # without ^-start or $-end, this won't be regarded as re-pattern 125 | b = ["anything"] 126 | assert not compare(a, b, ignore_list_seq=False) 127 | 128 | long_line() 129 | 130 | a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})$"] # we can use re-comparing to confine formats but not values 131 | b = ["anything"] 132 | assert not compare(a, b, ignore_list_seq=False) 133 | 134 | long_line() 135 | 136 | a = [r"^(2019-07-01 \d{2}:\d{2}:\d{2})$"] # e.g. this assertion will pass 137 | b = ["2019-07-01 12:13:14"] 138 | check(a, b, ignore_list_seq=False) 139 | 140 | long_line() 141 | 142 | a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})$", r"^(.*)$"] 143 | b = ["anything", "otherthing"] # when using re with order-ignored list, it will be crossing compare 144 | # be careful, potential chance of messy 145 | assert not compare(a, b) 146 | 147 | long_line() 148 | 149 | a = [r"^(.*)$"] # two re-pattern is not allowed 150 | b = [r"^(.+)$"] 151 | try: 152 | compare(a, b, ignore_list_seq=False) 153 | except Exception as e: 154 | print(e) 155 | else: 156 | raise AssertionError() 157 | 158 | long_line() 159 | 160 | a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})$", "otherthing"] 161 | b = ["anything", r"^(.*)$"] # this errors when comparing a[0] with b[1] due to the above rule 162 | try: 163 | compare(a, b) 164 | except Exception as e: 165 | print(e) 166 | else: 167 | raise AssertionError() 168 | 169 | long_line() 170 | 171 | a = r'["^(2019-07-01 \\d{2}:\\d{2}:\\d{2})$"]' # double slashes are needed because this is a json-string, not list 172 | # or use '["^(2019-07-01 \\\\\d{2}:\\\\\d{2}:\\\\\d{2})$"]' will also work 173 | b = ["2019-07-01 12:13:14"] 174 | check(a, b, ignore_list_seq=False) 175 | 176 | long_line() 177 | 178 | a = r'[r"^(2019-07-01 \d{2}:\d{2}:\d{2})$"]' # json cannot parse inner 'r' notation, so this won't work 179 | b = ["2019-07-01 12:13:14"] 180 | try: 181 | compare(a, b, ignore_list_seq=False) 182 | except Exception as e: 183 | print(e) 184 | else: 185 | raise AssertionError() 186 | 187 | long_line() 188 | 189 | a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})"] # only fully match will pass re-comparing 190 | b = ["2019-07-01 12:13:14.567"] 191 | assert not compare(a, b, ignore_list_seq=False) 192 | 193 | long_line() 194 | 195 | a = [r"^.*?(\d)-(\d)"] # two or more brackets will result certain False 196 | b = ["2019-07-01 12:13:14.567"] 197 | assert not compare(a, b, ignore_list_seq=False) 198 | 199 | long_line() 200 | 201 | a = [0.1 + 0.1 + 0.1] # default we use accurate compare, since float compute causes accumulative errors 202 | b = [0.3] 203 | assert not compare(a, b, ignore_list_seq=False) 204 | 205 | long_line() 206 | 207 | check(a, b, ignore_list_seq=False, float_fuzzy_digits=6) # so we can bear margin < 10e-6 now in float comparing 208 | 209 | long_line() 210 | 211 | a = {"key": 1} 212 | b = {"key": 1.0} 213 | check(a, b) 214 | 215 | long_line() 216 | 217 | assert not compare(a, b, strict_number_type=True) 218 | 219 | long_line() 220 | 221 | a = {"a": {"b": 1, "c": 2, "d": 3}} 222 | b = {"a": {"c": 2}} 223 | check(a, b, omit_path=["/a/b", "/a/d"]) 224 | 225 | long_line() 226 | 227 | a = {"a": {"b": 1, "c": 2}} 228 | b = {"a": {"c": 2, "d": 3}} 229 | assert not compare(a, b, omit_path=["/a/b"]) 230 | 231 | long_line() 232 | 233 | a = {"a": [{"b": 1, "c": 2}, {"d": 3, "c": 4}]} 234 | b = {"a": [{"c": 2}, {"c": 4}]} 235 | check(a, b, omit_path=["/a/*/b", "/a/*/d"]) # omit_path only supports * expression to ignore path contains list 236 | 237 | long_line() 238 | 239 | a = {"a": [{"b": 1, "c": 2}, {"c": 2}, {"d": 3, "c": 4}]} 240 | b = {"a": [{"c": 2}, {"c": 4}, {"c": 4}]} 241 | assert not compare(a, b, omit_path=["/a/*/b", "/a/*/d"]) # freq are different now 242 | 243 | 244 | if __name__ == "__main__": 245 | run_tests() 246 | -------------------------------------------------------------------------------- /json_compare/json_compare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: Rainy Chan rainydew@qq.com 4 | # platform: python 2.6-2.7, 3.5-3.8+ 5 | # demos are provided in test_json_compare.py 6 | from __future__ import print_function 7 | import json 8 | import re 9 | import traceback 10 | import six 11 | import codecs 12 | 13 | _NUMBER_TYPES = list(six.integer_types) + [float] 14 | 15 | 16 | class _Compare(object): 17 | def __init__(self): 18 | self._float_fuzzy_digits = None 19 | self._strict_number_type = None 20 | self._res = None 21 | self._ignore_list_seq = None 22 | self._re_compare = True 23 | self._ignore_path = None 24 | self._omit_path = None 25 | self._handle = None 26 | 27 | @staticmethod 28 | def _tuple_append(t, i): 29 | return tuple(list(t) + [six.text_type(i)]) 30 | 31 | @staticmethod 32 | def _to_unicode_if_string(strlike): 33 | if type(strlike) == six.binary_type: 34 | try: 35 | return strlike.decode('utf-8') 36 | except UnicodeDecodeError: 37 | raise ValueError("decoding string {} failed, may be local encoded".format(repr(strlike))) 38 | else: 39 | return strlike 40 | 41 | @staticmethod 42 | def _to_list_if_tuple(listlike): 43 | if type(listlike) == tuple: 44 | return list(listlike) 45 | else: 46 | return listlike 47 | 48 | def _common_warp(self, anylike): 49 | return self._to_list_if_tuple(self._to_unicode_if_string(anylike)) 50 | 51 | def _fuzzy_float_equal(self, a, b): 52 | if self._float_fuzzy_digits: 53 | return abs(a - b) < 10 ** (-self._float_fuzzy_digits) 54 | else: 55 | return a == b 56 | 57 | @staticmethod 58 | def _modify_a_key(dic, from_key, to_key): 59 | assert not any([type(to_key) == type(exist_key) and to_key == exist_key for exist_key in 60 | dic.keys()]), 'cannot change the key due to key conflicts' 61 | # cannot use IN here `to_key in dic.keys()`, because u"a" in ["a"] == True 62 | dic[to_key] = dic.pop(from_key) 63 | 64 | def _fuzzy_number_type(self, value): 65 | if not self._strict_number_type: 66 | type_dict = {x: float for x in six.integer_types} 67 | else: 68 | type_dict = {x: int for x in six.integer_types} 69 | res = type(value) 70 | return type_dict.get(res, res) 71 | 72 | def _turn_dict_keys_to_unicode(self, dic): 73 | keys = dic.keys() 74 | modifiers = [] 75 | for key in keys: # a.keys() returns a constant, so it is safe because ak won't change 76 | if type(key) == six.binary_type: 77 | modifiers.append((key, self._to_unicode_if_string(key))) 78 | else: 79 | assert type(key) == six.text_type, 'key {} must be string or unicode in dict {}'.format(key, dic) 80 | 81 | for from_key, to_key in modifiers: 82 | self._modify_a_key(dic, from_key, to_key) 83 | 84 | def _set_false(self): 85 | self._res = False 86 | 87 | @staticmethod 88 | def _escape(s): 89 | """ 90 | :param s: binary if py2 else unicode 91 | :return: 92 | """ 93 | if r'\x' in s: 94 | s = s.decode('string-escape') if six.PY2 else codecs.escape_decode(s)[0].decode('utf-8') # no string-escape 95 | if r'\u' in s: 96 | s = s.decode('unicode-escape') if six.PY2 else s.encode().decode('unicode-escape') 97 | if type(s) == six.binary_type: 98 | s = s.decode('utf-8') # This often comes from unix servers 99 | return s 100 | 101 | # difference_print methods 102 | def _different_type(self, a, b, root): 103 | self._set_false() 104 | self._handle("different type at /{}".format("/".join(root))) 105 | self._handle("a {}: ".format(type(a)) + repr(a)) 106 | self._handle("b {}: ".format(type(b)) + repr(b)) 107 | 108 | def _different_value(self, a, b, root): 109 | self._set_false() 110 | self._handle("different value at /{}".format("/".join(root))) 111 | self._handle("a: " + repr(a)) 112 | self._handle("b: " + repr(b)) 113 | 114 | def _different_length(self, a, b, root): 115 | self._set_false() 116 | self._handle("different length of list at /{}".format("/".join(root))) 117 | self._handle("len(a)={} : ".format(len(a)) + repr(a)) 118 | self._handle("len(b)={} : ".format(len(b)) + repr(b)) 119 | 120 | def _list_item_not_found(self, ele, which, root): 121 | self._set_false() 122 | self._handle("list {} at /{}".format(which, "/".join(root))) 123 | self._handle("has element that another list hasn't :") 124 | self._handle(repr(ele)) 125 | 126 | def _list_freq_not_match(self, root, aplace, bplace, ele, counta, countb): 127 | self._set_false() 128 | self._handle( 129 | "list at /{}, index {}, has different frequency from b at index {}:".format("/".join(root), aplace, bplace)) 130 | self._handle("element is {}".format(ele)) 131 | self._handle("count of list a: {}".format(counta)) 132 | self._handle("count of list b: {}".format(countb)) 133 | 134 | def _dict_key_not_found(self, keys, which, root): 135 | self._set_false() 136 | self._handle("dict {} at /{}".format(which, "/".join(root))) 137 | self._handle("has key(s) that another dict hasn't :") 138 | self._handle(keys) 139 | 140 | # internal compare methods 141 | def _list_comp(self, a, b, root, printdiff): 142 | if len(a) != len(b): 143 | if not printdiff: 144 | return False 145 | self._different_length(a, b, root) 146 | found_b = [False] * len(b) 147 | 148 | for i, a_i in enumerate(a): 149 | found = False 150 | for j, b_j in enumerate(b): 151 | if self._common_comp(a_i, b_j, printdiff=False): 152 | found_b[j] = True 153 | found = True 154 | break 155 | if not found: 156 | buff = self._tuple_append(root, i) 157 | self._list_item_not_found(a_i, "a", buff) 158 | 159 | found_a = [False] * len(a) 160 | for j, b_j in enumerate(b): 161 | found = False 162 | for i, a_i in enumerate(a): 163 | if self._common_comp(a_i, b_j, printdiff=False): 164 | found_a[i] = True 165 | found = True 166 | break 167 | if not found: 168 | buff = self._tuple_append(root, j) 169 | self._list_item_not_found(b_j, "b", buff) 170 | return 171 | 172 | if not self._ignore_list_seq: 173 | for i in range(min(len(a), len(b))): 174 | buff = self._tuple_append(root, i) 175 | if not self._common_comp(a[i], b[i], buff, printdiff): 176 | if not printdiff: 177 | return False 178 | else: 179 | counts_a = [[0, None] for _ in range(len(a))] 180 | counts_b = [[0, None] for _ in range(len(a))] 181 | need_to_compare_number = True 182 | 183 | for i in range(len(a)): 184 | for j in range(len(a)): 185 | buff = self._tuple_append(root, len(a) * 10) 186 | if self._common_comp(a[i], b[j], buff, printdiff=False): 187 | counts_a[i][1] = j 188 | counts_a[i][0] += 1 189 | if self._common_comp(b[i], a[j], buff, printdiff=False): 190 | counts_b[i][1] = j 191 | counts_b[i][0] += 1 192 | 193 | if not counts_a[i][0]: 194 | if not printdiff: 195 | return False 196 | need_to_compare_number = False 197 | buff = self._tuple_append(root, i) 198 | self._list_item_not_found(a[i], "a", buff) 199 | 200 | if not counts_b[i][0]: 201 | if not printdiff: 202 | return False 203 | need_to_compare_number = False 204 | buff = self._tuple_append(root, i) 205 | self._list_item_not_found(b[i], "b", buff) 206 | 207 | if need_to_compare_number: 208 | for i in range(len(counts_a)): 209 | counta, place = counts_a[i] 210 | countb = counts_b[place][0] 211 | if countb != counta and counts_b[place][1] == i: # to prevent printing twice 212 | if not printdiff: 213 | return False 214 | self._list_freq_not_match(root, i, place, a[i], countb, counta) # need to swap counter here:) 215 | 216 | if not printdiff: 217 | return True 218 | 219 | def _dict_comp(self, a, b, root, printdiff): 220 | self._turn_dict_keys_to_unicode(a) 221 | self._turn_dict_keys_to_unicode(b) 222 | 223 | if self._omit_path: 224 | omit_dict = {} 225 | for x in self._omit_path: 226 | pre, tat = x.split(u"/")[1:-1], x.split(u"/")[-1] 227 | for i, v in enumerate(pre): 228 | if v == u"*" and i < len(root): 229 | pre[i] = root[i] 230 | pre = tuple(pre) 231 | if pre not in omit_dict: 232 | omit_dict[pre] = [tat] 233 | else: 234 | omit_dict[pre].append(tat) 235 | if root in omit_dict: 236 | a = {k: v for k, v in a.items() if k not in omit_dict[root]} 237 | b = {k: v for k, v in b.items() if k not in omit_dict[root]} 238 | 239 | ak = a.keys() # refresh again to make sure it's unicode now 240 | bk = b.keys() 241 | diffak = [x for x in ak if x not in bk] 242 | diffbk = [x for x in bk if x not in ak] 243 | if diffak: 244 | if not printdiff: 245 | return False 246 | self._dict_key_not_found(diffak, "a", root) 247 | if diffbk: 248 | if not printdiff: 249 | return False 250 | self._dict_key_not_found(diffbk, "b", root) 251 | samekeys = [x for x in ak if x in bk] 252 | 253 | for key in samekeys: 254 | buff = self._tuple_append(root, key) 255 | if not self._common_comp(a[key], b[key], buff, printdiff): 256 | if not printdiff: 257 | return False 258 | 259 | if not printdiff: 260 | return True 261 | 262 | def _common_comp(self, a, b, root=(), printdiff=True): 263 | if self._ignore_path: 264 | current_path = u"/{}".format(u"/".join(root)) 265 | 266 | for ignore_item in self._ignore_path: 267 | if ignore_item[0] == u"^" or ignore_item[-1] == u"$": 268 | find = re.findall(ignore_item, current_path) 269 | assert len(find) < 2, "shouldn't be this" 270 | if find and find[0] == current_path: 271 | return True 272 | else: 273 | if u"/{}".format(u"/".join(root)) == ignore_item: 274 | return True 275 | 276 | a = self._common_warp(a) 277 | b = self._common_warp(b) 278 | 279 | if self._fuzzy_number_type(a) != self._fuzzy_number_type(b): 280 | if not printdiff: 281 | return False 282 | self._different_type(a, b, root) 283 | return 284 | 285 | if type(a) not in [dict, list]: 286 | if not self._value_comp(a, b, printdiff): 287 | if not printdiff: 288 | return False 289 | self._different_value(a, b, root) 290 | elif not printdiff: 291 | return True 292 | return 293 | 294 | if type(a) == list: 295 | return self._list_comp(a, b, root, printdiff) 296 | 297 | if type(a) == dict: 298 | return self._dict_comp(a, b, root, printdiff) 299 | 300 | raise TypeError("shouldn't be here") 301 | 302 | def _value_comp(self, a, b, printdiff=True): # the most base comparison 303 | if not self._re_compare or type(a) != six.text_type or type(b) != six.text_type: 304 | if (type(a) == float and type(b) in _NUMBER_TYPES) or (type(b) == float and type(a) in _NUMBER_TYPES): 305 | return self._fuzzy_float_equal(a, b) 306 | else: 307 | return a == b 308 | else: 309 | a_is_re = len(a) > 0 and (a[0] == u"^" or a[-1] == u"$") 310 | b_is_re = len(b) > 0 and (b[0] == u"^" or b[-1] == u"$") # lazy eval prevents index out of range error 311 | if not a_is_re and not b_is_re: 312 | return a == b 313 | assert not (a_is_re and b_is_re), "can't compare two regular expressions" 314 | if b_is_re: # let a be re 315 | a, b = b, a 316 | find = re.findall(a, b) 317 | assert len(find) < 2, "shouldn't be this" 318 | if not find: 319 | if printdiff: 320 | self._handle("re compare failed, empty match, see next line") 321 | return False 322 | if not find[0] == b: 323 | if printdiff: 324 | self._handle("re compare failed, found {}, expect {}, see next line".format(find[0], b)) 325 | return False 326 | return True 327 | 328 | def compare(self, a, b, ignore_list_seq=True, re_compare=True, ignore_path=None, callback=print, strict_json=False, 329 | float_fuzzy_digits=0, strict_number_type=False, omit_path=None): 330 | """ 331 | real compare entrance 332 | """ 333 | self._handle = callback 334 | flag = False # transferred str to object, need recursion 335 | 336 | if type(a) in [six.text_type, six.binary_type]: 337 | json_loaded_a = json.loads(a) # json only, should use eval when using python dict/list-like strings instead 338 | flag = True 339 | else: 340 | json_loaded_a = a 341 | if type(b) in [six.text_type, six.binary_type]: 342 | json_loaded_b = json.loads(b) 343 | flag = True 344 | else: 345 | json_loaded_b = b 346 | if flag: 347 | return self.compare(json_loaded_a, json_loaded_b, ignore_list_seq, re_compare, ignore_path, callback, 348 | strict_json, float_fuzzy_digits, strict_number_type, omit_path) 349 | 350 | if strict_json: 351 | try: 352 | json.dumps(a, ensure_ascii=False) 353 | json.dumps(b, ensure_ascii=False) 354 | except TypeError: 355 | self._handle(traceback.format_exc()) 356 | raise TypeError("unsupported type found during strict json check") 357 | 358 | self._res = True 359 | self._ignore_list_seq = ignore_list_seq 360 | self._re_compare = re_compare 361 | self._float_fuzzy_digits = float_fuzzy_digits 362 | self._strict_number_type = strict_number_type 363 | self._ignore_path = None if ignore_path is None else [self._to_unicode_if_string(path) for path in ignore_path] 364 | self._omit_path = None if omit_path is None else [self._to_unicode_if_string(path) for path in omit_path] 365 | 366 | if self._ignore_path: 367 | assert all([path[0] == u"/" or u"(/" in path for path in self._ignore_path]), "invalid ignore path" 368 | if self._omit_path: 369 | assert all([path[0] == u"/" and path.split(u"/")[-1] not in (u"", u"*") and not path.split(u"/")[-1]. 370 | isdigit() for path in self._omit_path]), "invalid omit path" 371 | 372 | self._handle(self._escape("a is {}".format(a))) 373 | self._handle(self._escape("b is {}".format(b))) 374 | self._handle("ignore_list_seq = {}, re_compare = {}, ignore_path = {}, omit_path = {}, float_fuzzy_digits = {}" 375 | .format(ignore_list_seq, re_compare, ignore_path, omit_path, self._float_fuzzy_digits)) 376 | 377 | self._common_comp(a, b) 378 | return self._res 379 | 380 | 381 | def compare(a, b, *args, **kwargs): 382 | return _Compare().compare(a, b, *args, **kwargs) 383 | 384 | 385 | def check(a, b, *args, **kwargs): 386 | assert _Compare().compare(a, b, *args, **kwargs) 387 | --------------------------------------------------------------------------------