├── upload.bat
├── json_compare
    ├── __init__.py
    ├── json_compare.pyi
    ├── test_json_compare.py
    └── json_compare.py
├── setup.py
├── LICENSE
├── .gitignore
└── README.md


/upload.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | python setup.py sdist upload -r pypi
3 | pause


--------------------------------------------------------------------------------
/json_compare/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # author: Rainy Chan  mail to: rainydew@qq.com
3 | from .json_compare import compare, check
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | from distutils.core import setup
 4 | from os import path, chdir, system
 5 | from sys import argv
 6 | 
 7 | if "upload" in argv:
 8 |     chdir("json_compare")
 9 |     print("running test")
10 |     assert system("python test_json_compare.py") == 0
11 |     chdir("..")
12 | 
13 | this_directory = path.abspath(path.dirname(__file__))
14 | 
15 | try:
16 |     import pypandoc
17 |     long_description = pypandoc.convert('README.md', 'rst')
18 | except:
19 |     long_description = ""
20 | 
21 | setup(
22 |     name='json-compare-deep',
23 |     version='2.0',
24 |     description='A recursive json comparison library that handles list orders and fuzzy types',
25 |     author='Rainy Chan',
26 |     author_email='rainydew@qq.com',
27 |     url='https://github.com/rainydew/jsoncomparedeep',
28 |     packages=['json_compare'],
29 |     install_requires=['six>=1.12.0'],
30 |     keywords='json comparison order unicode fuzzy',
31 |     long_description=long_description,
32 |     python_requires=">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
33 | )
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Rainy Chan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | # Jetbrains
133 | .idea
134 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # json_compare
 2 | 
 3 | A library to compare any json string/bytes/json-like-objects.
 4 | 
 5 | Version 2.0 is a rewrite to support more clear and easy-to-use functions(backward incompatible, so I started a new repo).
 6 | 
 7 | Version 1.20 is an enhance version that supports *omit_path* to ignore omitted keys in dict, and fixes many bugs, esp counting message inaccurate issue when same length & element collections; and wrong not_found info when different length under *ignore_list_seq*=**True**
 8 | 
 9 | Version 1.19 is an enhance version that fixes small bugs with strict_number_type supported to make <int> 1 != <float> 1.0.
10 | 
11 | Version 1.18 is a quickfix + enhance version that fixes custom handlers cannot be passed recursively bug, and *float_fuzzy_digits* can be passed directly.
12 | 
13 | Version 1.17 is an enhance version that supports custom handlers to handle outputs.
14 | 
15 | Version 1.16 is a quickfix version that supports Python 3.8 (and 3.9 as well).
16 | 
17 | ## Features
18 | 
19 | * Compare jsons and print the differences (what and where they are, recursion supported). Useful for interface testing.
20 | * Config whether it will ignore the order of items in a list or not, recursively.
21 | * Both python 2.6-2.7 and 3.5-3.9 supported. (New)
22 | * Regular expressions supported for string to skip unconcerned keys or just to assert the format.
23 | * Compact **str** and **unicode** (or **bytes** and **str** in python3), they are considered equal. Good for non ascii coding languages.
24 | * Both **json string** (**unicode** or **binary** str) and **json object** (**dict**, **list** or **tuple**) are supported.
25 | * Support tuples, so results from pymysql.cursors.DictCursor can compare with interface response directly.
26 | * Json type legal check(strict_json).
27 | * Support skipping anywhere using argument like *ignore_path=["/a/1/k", "/a/1/l"]*, dict keys or list indexes. Skipped fields are regarded as match.
28 | * The ignore_path list now support regular expressions too. You can use *[r"^(/\d+/a)"]* as ignore_path to skip all keys named "a" in *[{"a": 1, "b": 2}, {"a": 1, "b": 4}]* but still compare the value of "b". (New)
29 |   * Useful when compare multi records in database query result (dictionary cursor) with some fields unconcerned.
30 | * Fuzzy equal when handling floats.
31 | * Custom handlers supported.
32 | * Strict_number_type option to make int(1) != float(1.0) supported.
33 | * Emit keys in dict compare supported. (New)
34 | 
35 | ## QuickStart
36 | 
37 | install
38 | 
39 | ```shell
40 | pip install jsoncomparedeep
41 | ```
42 | 
43 | or update
44 | 
45 | ```shell
46 | pip install -U jsoncomparedeep
47 | ```
48 | 
49 | a simple example
50 | 
51 | ```python
52 | from json_compare import compare
53 | print(compare({"key1":["v1","v2"],"key2":{"key3":1}},{"key1":["v2","v1"],"key2":{"key3":2}}))
54 | ```
55 | 
56 | to see
57 | 
58 | ```
59 | a is {'key1': ['v1', 'v2'], 'key2': {'key3': 1}}
60 | b is {'key1': ['v2', 'v1'], 'key2': {'key3': 2}}
61 | ignore_list_seq = True, re_compare = True, ignore_path = None, omit_path = None, float_fuzzy_digits = 0
62 | different value at /key2/key3
63 | a: 1
64 | b: 2
65 | False
66 | ```
67 | 
68 | For more demos and information, just install it and visit the test file **test_json_compare.py** in **Your_Python_Path/Lib/site-packages/json_compare/**
69 | 
70 | ## Small Hints
71 | 
72 | * Datetime in SQL result is not JSON serializable type, use something like **CAST(create_time as CHAR) 'create_time'** in SQL statement to solve it.
73 | 
74 | ## Bug report
75 | 
76 | * Issues and bugs report to rainydew@qq.com.
77 | * Homepage icon leads to my Github project page, issues / PRs / stars are welcomed :)
78 | 


--------------------------------------------------------------------------------
/json_compare/json_compare.pyi:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # author: Rainy Chan  mail to: rainydew@qq.com
 3 | 
 4 | def compare(a, b, ignore_list_seq=True, re_compare=True, ignore_path=None, callback=print, strict_json=False,
 5 |             float_fuzzy_digits=0, strict_number_type=False, omit_path=None):
 6 |     """this function returns whether a is matched with b
 7 |     :param str or unicode or list or tuple or dict a: The first json string/json-like object to compare
 8 |     :param str or unicode or list or tuple or dict b: The second one to be compared
 9 |     :param bool ignore_list_seq: Set True to ignore the order when comparing arrays(lists), recursively
10 |     :param bool re_compare: Set True to enable regular expressions for assertion. The pattern MUST contains ONE
11 |     bracket, start with ^ or end with $, otherwise it won't be considered as an re-pattern. You can use ^.*?(sth) or
12 |     ().*$ or so on to extract something from middle of the string. ^(.*)$ can just match any string, make this item
13 |     ignored. Comparing two re-patterns makes no sense so it isn't allowed
14 |     :param list[str or unicode] or None ignore_path: a list of element-path to be ignored when comparing value. e.g.
15 |     ["/key1/key2", "/key3/1"] means all "ignored" in {"key1":{"key2":"ignored"},"key3":["not ignored","ignored"]}
16 |     :param function callback: A one-arg function to hold the difference, default to `print`
17 |     :param bool strict_json: Set True to ensure that all dict/list objects are JSON serializable. You may set it to
18 |     False to make some special types comparable, e.g. Decimal, bytes and struct_time, useful for db assertion.
19 |     BEAWARE !!! Bytes-like str (str in python2) is not supported. Since you should use json.dumps(u"hello") instead
20 |         of json.dumps("hello") It may raise UnicodeDecodeError if there are Chinese characters or so on.
21 |     :param int float_fuzzy_digits: 0(default) means disable. Set it to N means we consider number a == b if abs(a-b)
22 |         < 10**(-N)
23 |     :param bool strict_number_type: False(default) means allow 1(int)==1.0(float). Set True to ensure type equality
24 |     :param list[str or unicode] or None omit_path: a list of element-paths to be ignored even if they are absent.
25 |     ["/key1/key2"] means {"key1":{"key2":"ignored"}} can match {"key1":{}}. the last path-segament MUSTBE a map key,
26 |     you must use * to shadow list index and it doesn't support regular expression. e.g. /key1/*/key2
27 |     :return bool: Whether two json string or json-like objects are equal. If not, print the differences
28 |     """
29 |     return True
30 | 
31 | def check(a, b, ignore_list_seq=True, re_compare=True, ignore_path=None, callback=print, strict_json=False,
32 |             float_fuzzy_digits=0, strict_number_type=False, omit_path=None):
33 |     """this function raise an AssertionError when matching failed
34 |     :param str or unicode or list or tuple or dict a: The first json string/json-like object to compare
35 |     :param str or unicode or list or tuple or dict b: The second one to be compared
36 |     :param bool ignore_list_seq: Set True to ignore the order when comparing arrays(lists), recursively
37 |     :param bool re_compare: Set True to enable regular expressions for assertion. The pattern MUST contains ONE
38 |     bracket, start with ^ or end with $, otherwise it won't be considered as an re-pattern. You can use ^.*?(sth) or
39 |     ().*$ or so on to extract something from middle of the string. ^(.*)$ can just match any string, make this item
40 |     ignored. Comparing two re-patterns makes no sense so it isn't allowed
41 |     :param list[str or unicode] or None ignore_path: a list of element-path to be ignored when comparing value. e.g.
42 |     ["/key1/key2", "/key3/1"] means all "ignored" in {"key1":{"key2":"ignored"},"key3":["not ignored","ignored"]}
43 |     :param function callback: A one-arg function to hold the difference, default to `print`
44 |     :param bool strict_json: Set True to ensure that all dict/list objects are JSON serializable. You may set it to
45 |     False to make some special types comparable, e.g. Decimal, bytes and struct_time, useful for db assertion.
46 |     BEAWARE !!! Bytes-like str (str in python2) is not supported. Since you should use json.dumps(u"hello") instead
47 |         of json.dumps("hello") It may raise UnicodeDecodeError if there are Chinese characters or so on.
48 |     :param int float_fuzzy_digits: 0(default) means disable. Set it to N means we consider number a == b if abs(a-b)
49 |         < 10**(-N)
50 |     :param bool strict_number_type: False(default) means allow 1(int)==1.0(float). Set True to ensure type equality
51 |     :param list[str or unicode] or None omit_path: a list of element-paths to be ignored even if they are absent.
52 |     ["/key1/key2"] means {"key1":{"key2":"ignored"}} can match {"key1":{}}. the last path-segament MUSTBE a map key,
53 |     you must use * to shadow list index and it doesn't support regular expression. e.g. /key1/*/key2
54 |     """
55 |     return True
56 | 


--------------------------------------------------------------------------------
/json_compare/test_json_compare.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | from json_compare import compare, check
  4 | import six
  5 | 
  6 | 
  7 | def long_line():
  8 |     print("-" * 120)
  9 | 
 10 | 
 11 | def run_tests():
 12 |     a = {"姓名": "王大锤"}  # str and unicode (or bytes and str in python3) are compatible, useful in Chinese words...
 13 |     b = {u"姓名": u"王大锤"} if six.PY2 else {"姓名".encode("utf-8"): "王大锤".encode("utf-8")}
 14 |     check(a, b)
 15 | 
 16 |     long_line()
 17 | 
 18 |     a = [[1, 2, 3], [4, 5, 6]]
 19 |     b = ([6, 5, 4], [3, 2, 1])  # tuples (useful in pymysql & DictCursor) and different order of arrays are supported
 20 |     check(a, b)
 21 | 
 22 |     long_line()
 23 | 
 24 |     a = [[1, 2, 3], [4, 5, 6]]
 25 |     b = [[3, 2, 1], [6, 5, 4]]  # ignore_list_seq=False makes these two different, however
 26 |     assert not compare(a, b, ignore_list_seq=False)
 27 | 
 28 |     long_line()
 29 | 
 30 |     a = {"a": 1, "b": 3, "c": False, "d": "ok"}
 31 |     b = {"a": 1, "b": 2, "c": "False", "e": "ok"}  # False != "False"
 32 |     assert not compare(a, b)
 33 | 
 34 |     long_line()
 35 | 
 36 |     a = {"a": [1, {"k": ["ok"]}]}
 37 |     b = {"a": [1, {"k": ["error"]}]}  # ignoring list order, we aren't sure to pair {"k": ["ok"]} with {"k": ["error"]}
 38 |     assert not compare(a, b)
 39 | 
 40 |     long_line()
 41 | 
 42 |     assert not compare(a, b, ignore_list_seq=False)  # however, if we consider the order, we can locate differences deeper
 43 | 
 44 |     long_line()
 45 | 
 46 |     a = {"a": [1, {"k": [0]}]}  # we ignore this path now, test will pass.
 47 |     b = '{"a": [1, {"k": [1]}]}'  # notice we can't specify path deeper in a list when ignore_list_seq is enabled
 48 |     check(a, b, ignore_list_seq=False, ignore_path=["/a/1/k"])
 49 | 
 50 |     long_line()
 51 | 
 52 |     a = [{"a": 1, "b": 2}, {"a": 5, "b": 4}]  # now we finally support regular expressions in ignore_path list
 53 |     b = [{"a": 3, "b": 2}, {"a": 6, "b": 4}]  # in this case, only value of "b" concerned
 54 |     check(a, b, ignore_list_seq=False, ignore_path=[r"^(/\d+/a)"])
 55 | 
 56 |     long_line()
 57 | 
 58 |     a = [{"a": 1, "b": 2}, {"a": 1, "b": 4}]  # also useful under list_seq ignored
 59 |     b = [{"a": 2, "b": 4}, {"a": 2, "b": 2}]
 60 |     check(a, b, ignore_path=[r"^(/\d+/a)"])
 61 | 
 62 |     long_line()
 63 | 
 64 |     a = [{"a": 1, "b": 3}, {"a": 1, "b": 4}]  # this time, 3 and 2 cannot match
 65 |     b = [{"a": 2, "b": 4}, {"a": 2, "b": 2}]
 66 |     assert not compare(a, b, ignore_path=[r"^(/\d+/a)"])
 67 | 
 68 |     long_line()
 69 | 
 70 |     a = [{"a": 1, "b": 2}, {"a": 3, "b": 4}, {"a": 5, "b": 4}]  # this time, only different frequency found
 71 |     b = [{"a": 6, "b": 4}, {"a": 7, "b": 2}, {"a": 8, "b": 2}]  # but it will choose a random value of "a" to display
 72 |     assert not compare(a, b, ignore_path=[r"^(/\d+/a)"]) # it's caused by logic restriction, don't get confused
 73 | 
 74 |     long_line()
 75 | 
 76 |     a = {"a": [1, {"k": [0], "l": None}, 2]}  # ignore two paths this time, only difference at /a/2 will be shown
 77 |     b = {"a": [1, {"k": [1], "l": False}, 3]}
 78 |     assert not compare(a, b, ignore_list_seq=False, ignore_path=["/a/1/k", "/a/1/l"])
 79 | 
 80 |     long_line()
 81 | 
 82 |     a = '{"rtn": 0, "msg": "ok"}'  # can compare json string with python dict/list objects
 83 |     b = {"rtn": 1, "msg": "username not exist"}
 84 |     assert not compare(a, b)
 85 | 
 86 |     long_line()
 87 | 
 88 |     a = u'{"body":{"text":"你好"}}'  # both text and binary json strings are supported
 89 |     b = '{"body":{"text":"你好啊"}}'
 90 |     assert not compare(a, b)
 91 | 
 92 |     long_line()
 93 | 
 94 |     a = [1, 2, 2]  # even we ignore the order, the frequency of elements are concerned
 95 |     b = [1, 1, 2]
 96 |     assert not compare(a, b)
 97 | 
 98 |     long_line()
 99 | 
100 |     a = [1, 2, 3]
101 |     b = [1, 3, 4, 5]  # even if the length of lists are not equal, we can still know the difference
102 |     assert not compare(a, b)
103 | 
104 |     long_line()
105 | 
106 |     a = [1, 2, 3]
107 |     b = [1, 3, 4, 5]  # but we CANNOT keep the order of elements under different length even if ignore_list_seq is False
108 |     assert not compare(a, b, ignore_list_seq=False)
109 | 
110 |     long_line()
111 | 
112 |     a = [1.0]  # in face cp.compare(1, 1.0) is allowed, however non-standard jsons are not recommend
113 |     b = [1 if six.PY3 else eval("1L")]  # Integers and floats are compatible, including long of python 2
114 |     check(a, b)
115 | 
116 |     long_line()
117 | 
118 |     a = [r"^(.*)$"]  # re-comparing enabled as default. Be careful bare r"^(.*)$" without list is considered as json-str
119 |     b = ["anything"]  # use this to skip any unconcerned fields
120 |     check(a, b, ignore_list_seq=False)
121 | 
122 |     long_line()
123 | 
124 |     a = [r"(.*)"]  # without ^-start or $-end, this won't be regarded as re-pattern
125 |     b = ["anything"]
126 |     assert not compare(a, b, ignore_list_seq=False)
127 | 
128 |     long_line()
129 | 
130 |     a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})$"]  # we can use re-comparing to confine formats but not values
131 |     b = ["anything"]
132 |     assert not compare(a, b, ignore_list_seq=False)
133 | 
134 |     long_line()
135 | 
136 |     a = [r"^(2019-07-01 \d{2}:\d{2}:\d{2})$"]  # e.g. this assertion will pass
137 |     b = ["2019-07-01 12:13:14"]
138 |     check(a, b, ignore_list_seq=False)
139 | 
140 |     long_line()
141 | 
142 |     a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})$", r"^(.*)$"]
143 |     b = ["anything", "otherthing"]  # when using re with order-ignored list, it will be crossing compare
144 |     # be careful, potential chance of messy
145 |     assert not compare(a, b)
146 | 
147 |     long_line()
148 | 
149 |     a = [r"^(.*)$"]  # two re-pattern is not allowed
150 |     b = [r"^(.+)$"]
151 |     try:
152 |         compare(a, b, ignore_list_seq=False)
153 |     except Exception as e:
154 |         print(e)
155 |     else:
156 |         raise AssertionError()
157 | 
158 |     long_line()
159 | 
160 |     a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})$", "otherthing"]
161 |     b = ["anything", r"^(.*)$"]  # this errors when comparing a[0] with b[1] due to the above rule
162 |     try:
163 |         compare(a, b)
164 |     except Exception as e:
165 |         print(e)
166 |     else:
167 |         raise AssertionError()
168 | 
169 |     long_line()
170 | 
171 |     a = r'["^(2019-07-01 \\d{2}:\\d{2}:\\d{2})$"]'  # double slashes are needed because this is a json-string, not list
172 |     # or use '["^(2019-07-01 \\\\\d{2}:\\\\\d{2}:\\\\\d{2})$"]' will also work
173 |     b = ["2019-07-01 12:13:14"]
174 |     check(a, b, ignore_list_seq=False)
175 | 
176 |     long_line()
177 | 
178 |     a = r'[r"^(2019-07-01 \d{2}:\d{2}:\d{2})$"]'  # json cannot parse inner 'r' notation, so this won't work
179 |     b = ["2019-07-01 12:13:14"]
180 |     try:
181 |         compare(a, b, ignore_list_seq=False)
182 |     except Exception as e:
183 |         print(e)
184 |     else:
185 |         raise AssertionError()
186 | 
187 |     long_line()
188 | 
189 |     a = [r"^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})"]  # only fully match will pass re-comparing
190 |     b = ["2019-07-01 12:13:14.567"]
191 |     assert not compare(a, b, ignore_list_seq=False)
192 | 
193 |     long_line()
194 | 
195 |     a = [r"^.*?(\d)-(\d)"]  # two or more brackets will result certain False
196 |     b = ["2019-07-01 12:13:14.567"]
197 |     assert not compare(a, b, ignore_list_seq=False)
198 | 
199 |     long_line()
200 | 
201 |     a = [0.1 + 0.1 + 0.1]  # default we use accurate compare, since float compute causes accumulative errors
202 |     b = [0.3]
203 |     assert not compare(a, b, ignore_list_seq=False)
204 | 
205 |     long_line()
206 | 
207 |     check(a, b, ignore_list_seq=False, float_fuzzy_digits=6)  # so we can bear margin < 10e-6 now in float comparing
208 | 
209 |     long_line()
210 | 
211 |     a = {"key": 1}
212 |     b = {"key": 1.0}
213 |     check(a, b)
214 | 
215 |     long_line()
216 | 
217 |     assert not compare(a, b, strict_number_type=True)
218 | 
219 |     long_line()
220 | 
221 |     a = {"a": {"b": 1, "c": 2, "d": 3}}
222 |     b = {"a": {"c": 2}}
223 |     check(a, b, omit_path=["/a/b", "/a/d"])
224 | 
225 |     long_line()
226 | 
227 |     a = {"a": {"b": 1, "c": 2}}
228 |     b = {"a": {"c": 2, "d": 3}}
229 |     assert not compare(a, b, omit_path=["/a/b"])
230 | 
231 |     long_line()
232 | 
233 |     a = {"a": [{"b": 1, "c": 2}, {"d": 3, "c": 4}]}
234 |     b = {"a": [{"c": 2}, {"c": 4}]}
235 |     check(a, b, omit_path=["/a/*/b", "/a/*/d"])   # omit_path only supports * expression to ignore path contains list
236 | 
237 |     long_line()
238 | 
239 |     a = {"a": [{"b": 1, "c": 2}, {"c": 2}, {"d": 3, "c": 4}]}
240 |     b = {"a": [{"c": 2}, {"c": 4}, {"c": 4}]}
241 |     assert not compare(a, b, omit_path=["/a/*/b", "/a/*/d"])    # freq are different now
242 | 
243 | 
244 | if __name__ == "__main__":
245 |     run_tests()
246 | 


--------------------------------------------------------------------------------
/json_compare/json_compare.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | # author: Rainy Chan rainydew@qq.com
  4 | # platform: python 2.6-2.7, 3.5-3.8+
  5 | # demos are provided in test_json_compare.py
  6 | from __future__ import print_function
  7 | import json
  8 | import re
  9 | import traceback
 10 | import six
 11 | import codecs
 12 | 
 13 | _NUMBER_TYPES = list(six.integer_types) + [float]
 14 | 
 15 | 
 16 | class _Compare(object):
 17 |     def __init__(self):
 18 |         self._float_fuzzy_digits = None
 19 |         self._strict_number_type = None
 20 |         self._res = None
 21 |         self._ignore_list_seq = None
 22 |         self._re_compare = True
 23 |         self._ignore_path = None
 24 |         self._omit_path = None
 25 |         self._handle = None
 26 | 
 27 |     @staticmethod
 28 |     def _tuple_append(t, i):
 29 |         return tuple(list(t) + [six.text_type(i)])
 30 | 
 31 |     @staticmethod
 32 |     def _to_unicode_if_string(strlike):
 33 |         if type(strlike) == six.binary_type:
 34 |             try:
 35 |                 return strlike.decode('utf-8')
 36 |             except UnicodeDecodeError:
 37 |                 raise ValueError("decoding string {} failed, may be local encoded".format(repr(strlike)))
 38 |         else:
 39 |             return strlike
 40 | 
 41 |     @staticmethod
 42 |     def _to_list_if_tuple(listlike):
 43 |         if type(listlike) == tuple:
 44 |             return list(listlike)
 45 |         else:
 46 |             return listlike
 47 | 
 48 |     def _common_warp(self, anylike):
 49 |         return self._to_list_if_tuple(self._to_unicode_if_string(anylike))
 50 | 
 51 |     def _fuzzy_float_equal(self, a, b):
 52 |         if self._float_fuzzy_digits:
 53 |             return abs(a - b) < 10 ** (-self._float_fuzzy_digits)
 54 |         else:
 55 |             return a == b
 56 | 
 57 |     @staticmethod
 58 |     def _modify_a_key(dic, from_key, to_key):
 59 |         assert not any([type(to_key) == type(exist_key) and to_key == exist_key for exist_key in
 60 |                         dic.keys()]), 'cannot change the key due to key conflicts'
 61 |         # cannot use IN here `to_key in dic.keys()`, because u"a" in ["a"] == True
 62 |         dic[to_key] = dic.pop(from_key)
 63 | 
 64 |     def _fuzzy_number_type(self, value):
 65 |         if not self._strict_number_type:
 66 |             type_dict = {x: float for x in six.integer_types}
 67 |         else:
 68 |             type_dict = {x: int for x in six.integer_types}
 69 |         res = type(value)
 70 |         return type_dict.get(res, res)
 71 | 
 72 |     def _turn_dict_keys_to_unicode(self, dic):
 73 |         keys = dic.keys()
 74 |         modifiers = []
 75 |         for key in keys:  # a.keys() returns a constant, so it is safe because ak won't change
 76 |             if type(key) == six.binary_type:
 77 |                 modifiers.append((key, self._to_unicode_if_string(key)))
 78 |             else:
 79 |                 assert type(key) == six.text_type, 'key {} must be string or unicode in dict {}'.format(key, dic)
 80 | 
 81 |         for from_key, to_key in modifiers:
 82 |             self._modify_a_key(dic, from_key, to_key)
 83 | 
 84 |     def _set_false(self):
 85 |         self._res = False
 86 | 
 87 |     @staticmethod
 88 |     def _escape(s):
 89 |         """
 90 |         :param s: binary if py2 else unicode
 91 |         :return:
 92 |         """
 93 |         if r'\x' in s:
 94 |             s = s.decode('string-escape') if six.PY2 else codecs.escape_decode(s)[0].decode('utf-8')  # no string-escape
 95 |         if r'\u' in s:
 96 |             s = s.decode('unicode-escape') if six.PY2 else s.encode().decode('unicode-escape')
 97 |         if type(s) == six.binary_type:
 98 |             s = s.decode('utf-8')  # This often comes from unix servers
 99 |         return s
100 | 
101 |     # difference_print methods
102 |     def _different_type(self, a, b, root):
103 |         self._set_false()
104 |         self._handle("different type at /{}".format("/".join(root)))
105 |         self._handle("a {}: ".format(type(a)) + repr(a))
106 |         self._handle("b {}: ".format(type(b)) + repr(b))
107 | 
108 |     def _different_value(self, a, b, root):
109 |         self._set_false()
110 |         self._handle("different value at /{}".format("/".join(root)))
111 |         self._handle("a: " + repr(a))
112 |         self._handle("b: " + repr(b))
113 | 
114 |     def _different_length(self, a, b, root):
115 |         self._set_false()
116 |         self._handle("different length of list at /{}".format("/".join(root)))
117 |         self._handle("len(a)={} : ".format(len(a)) + repr(a))
118 |         self._handle("len(b)={} : ".format(len(b)) + repr(b))
119 | 
120 |     def _list_item_not_found(self, ele, which, root):
121 |         self._set_false()
122 |         self._handle("list {} at /{}".format(which, "/".join(root)))
123 |         self._handle("has element that another list hasn't :")
124 |         self._handle(repr(ele))
125 | 
126 |     def _list_freq_not_match(self, root, aplace, bplace, ele, counta, countb):
127 |         self._set_false()
128 |         self._handle(
129 |             "list at /{}, index {}, has different frequency from b at index {}:".format("/".join(root), aplace, bplace))
130 |         self._handle("element is {}".format(ele))
131 |         self._handle("count of list a: {}".format(counta))
132 |         self._handle("count of list b: {}".format(countb))
133 | 
134 |     def _dict_key_not_found(self, keys, which, root):
135 |         self._set_false()
136 |         self._handle("dict {} at /{}".format(which, "/".join(root)))
137 |         self._handle("has key(s) that another dict hasn't :")
138 |         self._handle(keys)
139 | 
140 |     # internal compare methods
141 |     def _list_comp(self, a, b, root, printdiff):
142 |         if len(a) != len(b):
143 |             if not printdiff:
144 |                 return False
145 |             self._different_length(a, b, root)
146 |             found_b = [False] * len(b)
147 | 
148 |             for i, a_i in enumerate(a):
149 |                 found = False
150 |                 for j, b_j in enumerate(b):
151 |                     if self._common_comp(a_i, b_j, printdiff=False):
152 |                         found_b[j] = True
153 |                         found = True
154 |                         break
155 |                 if not found:
156 |                     buff = self._tuple_append(root, i)
157 |                     self._list_item_not_found(a_i, "a", buff)
158 | 
159 |             found_a = [False] * len(a)
160 |             for j, b_j in enumerate(b):
161 |                 found = False
162 |                 for i, a_i in enumerate(a):
163 |                     if self._common_comp(a_i, b_j, printdiff=False):
164 |                         found_a[i] = True
165 |                         found = True
166 |                         break
167 |                 if not found:
168 |                     buff = self._tuple_append(root, j)
169 |                     self._list_item_not_found(b_j, "b", buff)
170 |             return
171 | 
172 |         if not self._ignore_list_seq:
173 |             for i in range(min(len(a), len(b))):
174 |                 buff = self._tuple_append(root, i)
175 |                 if not self._common_comp(a[i], b[i], buff, printdiff):
176 |                     if not printdiff:
177 |                         return False
178 |         else:
179 |             counts_a = [[0, None] for _ in range(len(a))]
180 |             counts_b = [[0, None] for _ in range(len(a))]
181 |             need_to_compare_number = True
182 | 
183 |             for i in range(len(a)):
184 |                 for j in range(len(a)):
185 |                     buff = self._tuple_append(root, len(a) * 10)
186 |                     if self._common_comp(a[i], b[j], buff, printdiff=False):
187 |                         counts_a[i][1] = j
188 |                         counts_a[i][0] += 1
189 |                     if self._common_comp(b[i], a[j], buff, printdiff=False):
190 |                         counts_b[i][1] = j
191 |                         counts_b[i][0] += 1
192 | 
193 |                 if not counts_a[i][0]:
194 |                     if not printdiff:
195 |                         return False
196 |                     need_to_compare_number = False
197 |                     buff = self._tuple_append(root, i)
198 |                     self._list_item_not_found(a[i], "a", buff)
199 | 
200 |                 if not counts_b[i][0]:
201 |                     if not printdiff:
202 |                         return False
203 |                     need_to_compare_number = False
204 |                     buff = self._tuple_append(root, i)
205 |                     self._list_item_not_found(b[i], "b", buff)
206 | 
207 |             if need_to_compare_number:
208 |                 for i in range(len(counts_a)):
209 |                     counta, place = counts_a[i]
210 |                     countb = counts_b[place][0]
211 |                     if countb != counta and counts_b[place][1] == i:  # to prevent printing twice
212 |                         if not printdiff:
213 |                             return False
214 |                         self._list_freq_not_match(root, i, place, a[i], countb, counta)  # need to swap counter here:)
215 | 
216 |         if not printdiff:
217 |             return True
218 | 
219 |     def _dict_comp(self, a, b, root, printdiff):
220 |         self._turn_dict_keys_to_unicode(a)
221 |         self._turn_dict_keys_to_unicode(b)
222 | 
223 |         if self._omit_path:
224 |             omit_dict = {}
225 |             for x in self._omit_path:
226 |                 pre, tat = x.split(u"/")[1:-1], x.split(u"/")[-1]
227 |                 for i, v in enumerate(pre):
228 |                     if v == u"*" and i < len(root):
229 |                         pre[i] = root[i]
230 |                 pre = tuple(pre)
231 |                 if pre not in omit_dict:
232 |                     omit_dict[pre] = [tat]
233 |                 else:
234 |                     omit_dict[pre].append(tat)
235 |             if root in omit_dict:
236 |                 a = {k: v for k, v in a.items() if k not in omit_dict[root]}
237 |                 b = {k: v for k, v in b.items() if k not in omit_dict[root]}
238 | 
239 |         ak = a.keys()  # refresh again to make sure it's unicode now
240 |         bk = b.keys()
241 |         diffak = [x for x in ak if x not in bk]
242 |         diffbk = [x for x in bk if x not in ak]
243 |         if diffak:
244 |             if not printdiff:
245 |                 return False
246 |             self._dict_key_not_found(diffak, "a", root)
247 |         if diffbk:
248 |             if not printdiff:
249 |                 return False
250 |             self._dict_key_not_found(diffbk, "b", root)
251 |         samekeys = [x for x in ak if x in bk]
252 | 
253 |         for key in samekeys:
254 |             buff = self._tuple_append(root, key)
255 |             if not self._common_comp(a[key], b[key], buff, printdiff):
256 |                 if not printdiff:
257 |                     return False
258 | 
259 |         if not printdiff:
260 |             return True
261 | 
262 |     def _common_comp(self, a, b, root=(), printdiff=True):
263 |         if self._ignore_path:
264 |             current_path = u"/{}".format(u"/".join(root))
265 | 
266 |             for ignore_item in self._ignore_path:
267 |                 if ignore_item[0] == u"^" or ignore_item[-1] == u"$":
268 |                     find = re.findall(ignore_item, current_path)
269 |                     assert len(find) < 2, "shouldn't be this"
270 |                     if find and find[0] == current_path:
271 |                         return True
272 |                 else:
273 |                     if u"/{}".format(u"/".join(root)) == ignore_item:
274 |                         return True
275 | 
276 |         a = self._common_warp(a)
277 |         b = self._common_warp(b)
278 | 
279 |         if self._fuzzy_number_type(a) != self._fuzzy_number_type(b):
280 |             if not printdiff:
281 |                 return False
282 |             self._different_type(a, b, root)
283 |             return
284 | 
285 |         if type(a) not in [dict, list]:
286 |             if not self._value_comp(a, b, printdiff):
287 |                 if not printdiff:
288 |                     return False
289 |                 self._different_value(a, b, root)
290 |             elif not printdiff:
291 |                 return True
292 |             return
293 | 
294 |         if type(a) == list:
295 |             return self._list_comp(a, b, root, printdiff)
296 | 
297 |         if type(a) == dict:
298 |             return self._dict_comp(a, b, root, printdiff)
299 | 
300 |         raise TypeError("shouldn't be here")
301 | 
302 |     def _value_comp(self, a, b, printdiff=True):  # the most base comparison
303 |         if not self._re_compare or type(a) != six.text_type or type(b) != six.text_type:
304 |             if (type(a) == float and type(b) in _NUMBER_TYPES) or (type(b) == float and type(a) in _NUMBER_TYPES):
305 |                 return self._fuzzy_float_equal(a, b)
306 |             else:
307 |                 return a == b
308 |         else:
309 |             a_is_re = len(a) > 0 and (a[0] == u"^" or a[-1] == u"$")
310 |             b_is_re = len(b) > 0 and (b[0] == u"^" or b[-1] == u"$")  # lazy eval prevents index out of range error
311 |             if not a_is_re and not b_is_re:
312 |                 return a == b
313 |             assert not (a_is_re and b_is_re), "can't compare two regular expressions"
314 |             if b_is_re:  # let a be re
315 |                 a, b = b, a
316 |             find = re.findall(a, b)
317 |             assert len(find) < 2, "shouldn't be this"
318 |             if not find:
319 |                 if printdiff:
320 |                     self._handle("re compare failed, empty match, see next line")
321 |                 return False
322 |             if not find[0] == b:
323 |                 if printdiff:
324 |                     self._handle("re compare failed, found {}, expect {}, see next line".format(find[0], b))
325 |                 return False
326 |             return True
327 | 
328 |     def compare(self, a, b, ignore_list_seq=True, re_compare=True, ignore_path=None, callback=print, strict_json=False,
329 |                 float_fuzzy_digits=0, strict_number_type=False, omit_path=None):
330 |         """
331 |         real compare entrance
332 |         """
333 |         self._handle = callback
334 |         flag = False  # transferred str to object, need recursion
335 | 
336 |         if type(a) in [six.text_type, six.binary_type]:
337 |             json_loaded_a = json.loads(a)  # json only, should use eval when using python dict/list-like strings instead
338 |             flag = True
339 |         else:
340 |             json_loaded_a = a
341 |         if type(b) in [six.text_type, six.binary_type]:
342 |             json_loaded_b = json.loads(b)
343 |             flag = True
344 |         else:
345 |             json_loaded_b = b
346 |         if flag:
347 |             return self.compare(json_loaded_a, json_loaded_b, ignore_list_seq, re_compare, ignore_path, callback,
348 |                                 strict_json, float_fuzzy_digits, strict_number_type, omit_path)
349 | 
350 |         if strict_json:
351 |             try:
352 |                 json.dumps(a, ensure_ascii=False)
353 |                 json.dumps(b, ensure_ascii=False)
354 |             except TypeError:
355 |                 self._handle(traceback.format_exc())
356 |                 raise TypeError("unsupported type found during strict json check")
357 | 
358 |         self._res = True
359 |         self._ignore_list_seq = ignore_list_seq
360 |         self._re_compare = re_compare
361 |         self._float_fuzzy_digits = float_fuzzy_digits
362 |         self._strict_number_type = strict_number_type
363 |         self._ignore_path = None if ignore_path is None else [self._to_unicode_if_string(path) for path in ignore_path]
364 |         self._omit_path = None if omit_path is None else [self._to_unicode_if_string(path) for path in omit_path]
365 | 
366 |         if self._ignore_path:
367 |             assert all([path[0] == u"/" or u"(/" in path for path in self._ignore_path]), "invalid ignore path"
368 |         if self._omit_path:
369 |             assert all([path[0] == u"/" and path.split(u"/")[-1] not in (u"", u"*") and not path.split(u"/")[-1].
370 |                        isdigit() for path in self._omit_path]), "invalid omit path"
371 | 
372 |         self._handle(self._escape("a is {}".format(a)))
373 |         self._handle(self._escape("b is {}".format(b)))
374 |         self._handle("ignore_list_seq = {}, re_compare = {}, ignore_path = {}, omit_path = {}, float_fuzzy_digits = {}"
375 |                      .format(ignore_list_seq, re_compare, ignore_path, omit_path, self._float_fuzzy_digits))
376 | 
377 |         self._common_comp(a, b)
378 |         return self._res
379 | 
380 | 
381 | def compare(a, b, *args, **kwargs):
382 |     return _Compare().compare(a, b, *args, **kwargs)
383 | 
384 | 
385 | def check(a, b, *args, **kwargs):
386 |     assert _Compare().compare(a, b, *args, **kwargs)
387 | 


--------------------------------------------------------------------------------