├── MANIFEST.in ├── LICENSE ├── setup.py ├── demo.py ├── README.md ├── .gitignore └── pyjsonpath.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE *.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 hitplum 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = "hitplum" 4 | 5 | from setuptools import setup, find_packages 6 | 7 | with open("README.md", "r") as fh: 8 | long_description = fh.read() 9 | 10 | setup(name='pyjsonpath', 11 | version='1.2.3', 12 | description='A Python toolkit for parsing JSON document', 13 | author='hitplum', 14 | author_email='ycx921101@163.com', 15 | url='https://github.com/hitplum/pyjsonpath', 16 | py_modules=["pyjsonpath"], 17 | # packages=['pyjsonpath'], 18 | # packages=find_packages(), 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | license="MIT", 22 | classifiers=[ 23 | "Programming Language :: Python :: 3.5", 24 | "Programming Language :: Python :: 3.6", 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Programming Language :: Python :: 3.9", 28 | "Programming Language :: Python :: Implementation :: PyPy", 29 | "License :: OSI Approved :: MIT License", 30 | ], 31 | python_requires='>=3.5' 32 | ) 33 | 34 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | obj = { 4 | "store": { 5 | "book": [ 6 | { 7 | "category": "reference", 8 | "author": "Nigel Rees", 9 | "title": "Sayings of the Century", 10 | "price": 8.95 11 | }, 12 | { 13 | "category": "fiction", 14 | "author": "Evelyn Waugh", 15 | "title": "Sword of Honour", 16 | "price": 12.99 17 | }, 18 | { 19 | "category": "fiction", 20 | "author": "Herman Melville", 21 | "title": "Moby Dick", 22 | "isbn": "0-553-21311-3", 23 | "price": 8.99 24 | }, 25 | { 26 | "category": "fiction", 27 | "author": "J. R. R. Tolkien", 28 | "title": "The Lord of the Rings", 29 | "isbn": "0-395-19395-8", 30 | "price": 22.99 31 | } 32 | ], 33 | "bicycle": { 34 | "color": "red", 35 | "price": 19.95 36 | } 37 | }, 38 | "expensive": 10 39 | } 40 | expr = "$.store.book[1,3]" 41 | 42 | 43 | from pyjsonpath import JsonPath 44 | result = JsonPath(obj, expr).load() 45 | print(len(result), result) 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python JsonPath 2 | A Python tool that parses JSON documents using JsonPath 3 | 4 | Inspired by: https://github.com/json-path/JsonPath 5 | 6 | ## Quick Start 7 | install it using the pip command 8 | ``` 9 | pip install pyjsonpath 10 | ``` 11 | Then: 12 | ``` 13 | >>> from pyjsonpath import JsonPath 14 | >>> obj = {"name": "jsonpath"} 15 | >>> JsonPath(obj, "$.name").load() 16 | ['jsonpath'] 17 | >>> 18 | ``` 19 | 20 | ## JsonPath Syntax 21 | - General Operators 22 | 23 | |operators|description| 24 | |---|---| 25 | |$| The root element to query. This starts all path expressions.| 26 | |@| The current node being processed by a filter predicate.| 27 | |*| Wildcard. Available anywhere a name or numeric are required.| 28 | |..| Deep scan. Available anywhere a name is required.| 29 | |.|Dot-notated child| 30 | |['\' (, '\\')]|Bracket-notated child or children| 31 | |[\ (, \)]|Array index or indexes| 32 | |[start:end]|Array slice operator| 33 | |[?(\)]|Filter expression. Expression must evaluate to a boolean value.| 34 | 35 | - Filter Operators 36 | 37 | |operators|description| 38 | |---|---| 39 | |== |left is equal to right (note that 1 is not equal to '1')| 40 | |!= |left is not equal to right| 41 | |< |left is less than right| 42 | |<= |left is less or equal to right| 43 | |> |left is greater than right| 44 | |>= |left is greater than or equal to right| 45 | |=~ |left matches regular expression [?(@.name =~ /foo.*?/i)]| 46 | |in |left exists in right [?(@.size in ['S', 'M'])]| 47 | |nin|left does not exists in right| 48 | |subsetof|left is a subset of right [?(@.sizes subsetof ['S', 'M', 'L'])]| 49 | |anyof|left has an intersection with right [?(@.sizes anyof ['M', 'L'])]| 50 | |noneof|left has no intersection with right [?(@.sizes noneof ['M', 'L'])]| 51 | |size|size of left (array or string) should match right| 52 | |empty|left (array or string) should be empty| 53 | 54 | - Functions 55 | 56 | |operators|description| 57 | |---|---| 58 | |min()|Provides the min value of an array of numbers| 59 | |max()|Provides the max value of an array of numbers| 60 | |avg()|Provides the average value of an array of numbers| 61 | |stddev()|Provides the standard deviation value of an array of numbers| 62 | |length()|Provides the length of an array| 63 | |sum()|Provides the sum value of an array of numbers| 64 | |keys()|Provides the property keys (An alternative for terminal tilde ~)| 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .pypirc 131 | -------------------------------------------------------------------------------- /pyjsonpath.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | import traceback 4 | from math import sqrt 5 | from copy import deepcopy 6 | 7 | 8 | pattern_dict = r'\[("|\').+?("|\')\]' 9 | pattern_index = r'\[[0-9]+[, 0-9]*\]' 10 | pattern_tuple_keys = r'\[[0-9a-zA-Z_]+[, 0-9a-zA-Z_]*\]' 11 | pattern_split = r'\[((\*)|((-)?[0-9]*:(-)?[0-9]*))\]' 12 | pattern_dot = r'\.\*?' 13 | pattern_double_dot = r'\.\.((\*)|(\[\*\]))?' 14 | pattern_normal_type = r'[\-\_0-9a-zA-Z\u4e00-\u9fa5]+(\(\))?' 15 | pattern_controller_type = r'\[\?\(.+?\)\]' 16 | pattern_filter_type = r'\sin\s|\snin\s|\ssubsetof\s|\sanyof\s|\snoneof\s|\ssize\s|\sempty|\s?[\!\=\>\<\~]+\s?' 17 | 18 | 19 | def math_avg(L): 20 | return sum(L) / len(L) 21 | 22 | def math_stddev(L): 23 | a = math_avg(L) 24 | return sqrt(sum([(i - a) * (i - a) for i in L]) / (len(L) - 1)) 25 | 26 | func_dict = {'min()': min, 'max()': max, 'avg()': math_avg, 'stddev()': math_stddev, 'length()': len, 'sum()': sum} 27 | 28 | class UnExpectJsonPathError(Exception): 29 | pass 30 | 31 | class JsonPath(object): 32 | 33 | def __init__(self, obj, expr): 34 | self.obj = obj 35 | self.expr = expr 36 | 37 | def load(self): 38 | expr = self.expr.strip() 39 | if not expr.startswith('$'): 40 | raise ValueError("'expr' is not a parsable JsonPath format") 41 | 42 | result = [] 43 | try: 44 | self.start_parsing(self.obj, expr, result) 45 | except (KeyError, IndexError, ValueError, UnExpectJsonPathError): 46 | fmt = traceback.format_exc() 47 | print(fmt) 48 | return [] 49 | except RecursionError: 50 | return [] 51 | return result 52 | 53 | def start_parsing(self, obj, expr, result): 54 | if expr: 55 | result.clear() 56 | if expr.startswith('$'): 57 | obj, expr = self.match_parsing(self.obj, expr) 58 | result.extend(obj) 59 | self.start_parsing(obj, expr, result) 60 | elif expr.startswith('[?('): 61 | obj, expr = self.controller_parsing(obj, expr) 62 | result.extend(obj) 63 | self.start_parsing(obj, expr, result) 64 | elif expr.startswith('['): 65 | obj, expr = self.index_parsing(obj, expr) 66 | result.extend(obj) 67 | self.start_parsing(obj, expr, result) 68 | elif expr.startswith(".."): 69 | obj, expr = self.scan_parsing(obj, expr) 70 | result.extend(obj) 71 | self.start_parsing(obj, expr, result) 72 | elif expr.startswith(".[?"): 73 | expr = "..[?" + expr[3:] 74 | obj, expr = self.scan_parsing(obj, expr) 75 | result.extend(obj) 76 | self.start_parsing(obj, expr, result) 77 | elif expr.startswith(".["): 78 | expr = "..[" + expr[2:] 79 | obj, expr = self.scan_parsing(obj, expr) 80 | result.extend(obj) 81 | self.start_parsing(obj, expr, result) 82 | elif expr.startswith("."): 83 | obj, expr = self.dot_parsing(obj, expr) 84 | result.extend(obj) 85 | self.start_parsing(obj, expr, result) 86 | else: 87 | obj, expr = self.normal_parsing(obj, expr) 88 | result.extend(obj) 89 | self.start_parsing(obj, expr, result) 90 | 91 | def match_parsing(self, obj, expr): 92 | expr = expr[1:] 93 | result = [obj] 94 | return result, expr 95 | 96 | def index_parsing(self, obj, expr): 97 | result = [] 98 | dit = re.match(pattern_dict, expr) 99 | idx = re.match(pattern_index, expr) 100 | tpx = re.match(pattern_tuple_keys, expr) 101 | spt = re.match(pattern_split, expr) 102 | # print(111, expr, tpx) 103 | if dit: 104 | g = dit.group() 105 | key = g[2:-2] 106 | for item in obj: 107 | if isinstance(item, dict) and key in item: 108 | value = item[key] 109 | result.append(value) 110 | expr = expr[len(g):] 111 | elif idx: 112 | g = idx.group() 113 | index_list = eval(g) 114 | for item in obj: 115 | if isinstance(item, list): 116 | for index in index_list: 117 | if index < len(item): 118 | value = item[int(index)] 119 | result.append(value) 120 | expr = expr[len(g):] 121 | elif tpx: 122 | g = tpx.group() 123 | keys = g[1:-1].split(',') 124 | for key in keys: 125 | key = key.strip() 126 | for item in obj: 127 | if isinstance(item, dict) and key in item: 128 | result.append(item[key]) 129 | expr = expr[len(g):] 130 | elif spt: 131 | g = spt.group() 132 | if g == '[*]': 133 | for item in obj: 134 | if isinstance(item, list): 135 | result.extend(item) 136 | elif isinstance(item, dict): 137 | result.extend(list(item.values())) 138 | else: 139 | s = g[1:-1] 140 | start, end = s.split(':') 141 | start = start.strip() 142 | end = end.strip() 143 | for item in obj: 144 | if isinstance(item, list): 145 | if not start: 146 | start = 0 147 | if not end: 148 | end = len(item) 149 | start, end = int(start), int(end) 150 | result.extend(item[start:end]) 151 | expr = expr[len(g):] 152 | return result, expr 153 | 154 | def scan_parsing(self, obj, expr): 155 | result = [] 156 | def scan(value, x=''): 157 | if isinstance(value, list): 158 | result.append(value) 159 | for item in value: 160 | scan(item, x) 161 | elif isinstance(value, dict): 162 | result.append(value) 163 | for item in value.values(): 164 | scan(item, x) 165 | elif x == '*' or x == '[*]' or value is None: 166 | result.append(value) 167 | 168 | g = re.match(pattern_double_dot, expr) 169 | if g: 170 | g = g.group() 171 | x = g[2:] 172 | if not x: 173 | if all([isinstance(i, list) for i in obj]): 174 | for item in obj: 175 | result.append(item) 176 | for index in item: 177 | scan(index, x) 178 | else: 179 | for item in obj: 180 | scan(item, x) 181 | elif x == '*' or x == '[*]': 182 | for xx in obj: 183 | if isinstance(xx, dict): 184 | for item in xx.values(): 185 | scan(item, x) 186 | elif isinstance(xx, list): 187 | for item in xx: 188 | scan(item, x) 189 | 190 | expr = expr[len(g):] 191 | 192 | return result, expr 193 | 194 | def dot_parsing(self, obj, expr): 195 | result = [] 196 | g = re.match(pattern_dot, expr) 197 | if g: 198 | g = g.group() 199 | key = g[1:] 200 | if not key: 201 | result = deepcopy(obj) 202 | else: 203 | for item in obj: 204 | if isinstance(item, list): 205 | result.extend(item) 206 | elif isinstance(item, dict): 207 | result.extend(list(item.values())) 208 | 209 | expr = expr[len(g):] 210 | return result, expr 211 | 212 | def normal_parsing(self, obj, expr): 213 | result = [] 214 | g = re.match(pattern_normal_type, expr) 215 | if g: 216 | g = g.group() 217 | for item in obj: 218 | if isinstance(item, dict) and g in item: 219 | result.append(item[g]) 220 | elif isinstance(item, list): 221 | if re.search(r"^[0-9]+$", g): 222 | result.append(obj[int(g)]) 223 | elif g == 'length()': 224 | f = func_dict[g] 225 | result.append(f(item)) 226 | elif all([ 227 | all([isinstance(i, (int, float)) for i in item]), 228 | g in ('min()', 'max()', 'avg()', 'stddev()', 'sum()') 229 | ]): 230 | f = func_dict[g] 231 | result.append(f(item)) 232 | elif isinstance(item, dict) and g == 'keys()': 233 | value = list(item.keys()) 234 | result.extend(value) 235 | expr = expr[len(g):] 236 | return result, expr 237 | 238 | def parse_value(self, value, compare): 239 | if value.startswith('$'): 240 | res = [] 241 | self.start_parsing(self.obj, value, res) 242 | return res 243 | else: 244 | return value if compare == '=~' else eval(value) 245 | 246 | def normalize(self, value, index=0, replaced_dict=None): # todo 247 | replaced_dict = replaced_dict if replaced_dict else {} 248 | expr = f"(@((\.[_0-9a-zA-Z\u4e00-\u9fa5]+)|(\[(\"|').+?(\"|')\]))+({pattern_filter_type})?((true|false|null|\d+\.?\d*|\/.*?/i|'.*?'|\[.*?\]|$((\.[_0-9a-zA-Z\u4e00-\u9fa5]+)|(\[(\"|').+?(\"|')\]))+))?)|((true|false|null|\d+\.?\d*|\/.*?/i|'.*?'|\[.*?\]|$((\.[_0-9a-zA-Z\u4e00-\u9fa5]+)|(\[(\"|').+?(\"|')\]))+)({pattern_filter_type})@((\.[_0-9a-zA-Z\u4e00-\u9fa5]+)|(\[(\"|').+?(\"|')\])))" 249 | m = re.search(expr, value) 250 | if m: 251 | span = m.span() 252 | s = m.group() 253 | new_s = '' 254 | spt = re.split(pattern_filter_type, s) 255 | if spt and len(spt) == 2: 256 | left, right = spt 257 | compare = s[len(left):-len(right) if len(right) else len(s)].strip() # todo 258 | compare = compare.replace('nin', 'not in') 259 | if left.startswith('@'): 260 | if right in ('true', 'false', 'null'): 261 | right = right.replace('true', 'True').replace('false', 'False').replace('null', 'None') 262 | s_ = re.sub(r"\.([_0-9a-zA-Z\u4e00-\u9fa5]+)", r"['\1']", left) 263 | if compare in ('empty', 'size'): 264 | right = 0 if compare == 'empty' else right 265 | new_s = f"len(child{s_[1:]}) == {right}" 266 | elif compare == "subsetof": 267 | new_s = f"(set(child{s_[1:]}).issubset(set({right})) if isinstance(child{s_[1:]}, list) else False)" 268 | elif compare == "anyof": 269 | new_s = f"(set(child{s_[1:]}) & set({right}) if isinstance(child{s_[1:]}, list) else False)" 270 | elif compare == "noneof": 271 | new_s = f"(not set(child{s_[1:]}) & set({right}) if isinstance(child{s_[1:]}, list) else True)" 272 | elif compare == "=~": 273 | if not isinstance(right, str) or not right.startswith("/") or not right.endswith("/i"): 274 | raise UnExpectJsonPathError('Ungrammatical JsonPath') 275 | new_s = f"re.match('{right[1:-3]}', child{s_[1:]}, re.I)" 276 | else: 277 | new_s = f"child{s_[1:]} {compare} {right}" 278 | elif right.startswith('@'): 279 | if left in ('true', 'false', 'null'): 280 | left = left.replace('true', 'True').replace('false', 'False').replace('null', 'None') 281 | s_ = re.sub(r"\.([_0-9a-zA-Z\u4e00-\u9fa5]+)", r"['\1']", right) 282 | if compare in ('empty', 'size', '=~'): 283 | raise Exception('不符合语法的JsonPath') 284 | elif compare == "subsetof": 285 | new_s = f"set({left}) < set(child{s_[1:]})" 286 | elif compare == "anyof": 287 | new_s = f"set({left}) & set(child{s_[1:]})" 288 | elif compare == "noneof": 289 | new_s = f"not set({left}) & set(child{s_[1:]})" 290 | else: 291 | new_s = f"{left} {compare} child{s_[1:]}" 292 | elif s.startswith(("!@", "@")): 293 | s_ = re.sub(r"\.([_0-9a-zA-Z\u4e00-\u9fa5]+)", r"['\1']", s) 294 | new_s = f"not child{s_[2:]}" if s.startswith("!@") else f"child{s_[1:]}" 295 | 296 | index += 1 297 | k = f"param_{index}" 298 | v = new_s if new_s else s 299 | value = value[:span[0]] + '{' + k +'}' + value[span[1]:] 300 | replaced_dict[k] = v 301 | return self.normalize(value, index, replaced_dict) 302 | else: 303 | value = value.replace("&&", "and").replace("||", "or") 304 | value = value.format(**replaced_dict) if replaced_dict else value 305 | return value 306 | 307 | def start_filtering(self, obj, expr): 308 | result = [] 309 | expr = self.normalize(expr, index=0, replaced_dict=None) 310 | if not expr: 311 | return result 312 | 313 | if all([isinstance(i, list) for i in obj]): 314 | for item in obj: 315 | for child in item: 316 | try: 317 | value = eval(expr) 318 | except Exception: 319 | continue 320 | if value: 321 | result.append(child) 322 | else: 323 | for item in obj: 324 | if isinstance(item, dict): 325 | child = deepcopy(item) 326 | try: 327 | value = eval(expr) 328 | except Exception: 329 | continue 330 | if value: 331 | result.append(child) 332 | 333 | return result 334 | 335 | def controller_parsing(self, obj, expr): 336 | result = [] 337 | compare = re.match(pattern_controller_type, expr) 338 | if compare: 339 | g = compare.group() 340 | s = g[3:-2] 341 | res = self.start_filtering(obj, s) 342 | result.extend(res) 343 | expr = expr[len(g):] 344 | 345 | return result, expr 346 | --------------------------------------------------------------------------------