├── .github └── workflows │ ├── python-publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── dev-requirements.txt ├── pyproject.toml ├── pytest.ini ├── setup.cfg ├── src └── json_normalize │ ├── __init__.py │ ├── key_joiners.py │ └── main.py └── tests ├── __init__.py ├── test_combine_lists.py ├── test_drop_nodes.py ├── test_freeze_nodes.py ├── test_general_functionality.py ├── test_key_joiner.py └── test_other_input_types.py /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | jobs: 16 | deploy: 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: '3.x' 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade build pip 29 | - name: Build package 30 | run: python -m build 31 | - name: Publish package 32 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 33 | with: 34 | user: __token__ 35 | password: ${{ secrets.PYPI_API_TOKEN }} 36 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | python -m pip install flake8 pytest 26 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 27 | - name: Lint with flake8 28 | run: | 29 | # stop the build if there are Python syntax errors or undefined names 30 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 31 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 32 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=100 --statistics 33 | - name: Test with pytest 34 | run: | 35 | PYTHONPATH=src/ pytest --doctest-modules --doctest-continue-on-failure 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2024 Funnel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON_VERSION ?= 3.9 2 | 3 | dist: clean-dist venv 4 | . venv/bin/activate && python3 -m build . 5 | 6 | .PHONY: setup 7 | setup: venv/setup.txt 8 | 9 | venv: 10 | virtualenv venv --python=${PYTHON_VERSION} 11 | 12 | venv/setup.txt: venv dev-requirements.txt 13 | . venv/bin/activate && \ 14 | pip3 install --upgrade pip && \ 15 | pip3 install --requirement dev-requirements.txt 16 | touch venv/setup.txt 17 | 18 | .PHONY: clean 19 | clean: clean-dist 20 | rm -rf venv 21 | 22 | .PHONY: clean-dist 23 | clean-dist: 24 | rm -rf build 25 | rm -rf src/json_normalize.egg-info 26 | rm -rf dist 27 | 28 | .PHONY: test 29 | test: setup 30 | @ . venv/bin/activate && PYTHONPATH=src/ pytest -vv -rsx tests/ src/ --cov ./src/json_normalize/ --no-cov-on-fail --cov-report term-missing --doctest-modules --doctest-continue-on-failure 31 | @ . venv/bin/activate && flake8 src --exclude '#*,~*,.#*' 32 | @ . venv/bin/activate && black --check src/ tests/ 33 | 34 | .PHONY: test-focus 35 | test-focus: setup 36 | @ . venv/bin/activate && PYTHONPATH=src/ pytest -vv -m focus -rsx tests/ src/ --cov ./src/json_normalize/ --no-cov-on-fail --cov-report term-missing --doctest-modules --doctest-continue-on-failure 37 | @ . venv/bin/activate && flake8 src --exclude '#*,~*,.#*' 38 | @ . venv/bin/activate && black --check src/ tests/ 39 | 40 | .PHONY: release 41 | release: test dist 42 | . venv/bin/activate && twine upload dist/* 43 | 44 | .PHONY: test-release 45 | test-release: test dist 46 | . venv/bin/activate && twine upload -r testpypi dist/* 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JSON Normalize 2 | 3 | ![PyPI](https://img.shields.io/pypi/v/json_normalize) 4 | ![PyPI - License](https://img.shields.io/pypi/l/json_normalize) 5 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/json_normalize) 6 | ![PyPI - Status](https://img.shields.io/pypi/status/json_normalize) 7 | 8 | This package contains a function, json_normalize. It will take a json-like structure and convert it to a map object which returns dicts. Output dicts will have their path joined by ".", this can of course be customized. 9 | 10 | Data association will flows up and down inside dicts although in iterables, e.g. lists, data 11 | 12 | ## Installation 13 | 14 | Install the package `json_normalize` version `1.1+` from PyPI. 15 | The recommended `requirements.txt` line is `json_normalize~=1.1`. 16 | 17 | ## json_normalize.json_normalize 18 | 19 | ```python 20 | json_normalize.json_normalize( 21 | tree: Union[dict, Iterable], 22 | combine_lists: Literal["chain", "product"] = None, 23 | drop_nodes: Iterable[str] = (), 24 | freeze_nodes: Iterable[str] = (), 25 | key_joiner: Union[str, Callable] = ".", 26 | ) 27 | ``` 28 | 29 | - *`tree`* - A json like structure. Any iterable inside the object that is not a dict or a string will be treated as a list. 30 | - *`combine_lists`*`=None` - If there are two different branches in the json like object the function will have to know how to combine these. If the default `None` is used the function does not know how to handle them and will raise an error. However if `combine_lists="chain"` simply put them after eachother similar to `itertool.chain`. The other option would be `combine_lists="product"` this will use the `itertool.product` to combine the different branches. 31 | - *`drop_nodes`*`=()` - This makes it possible to ignore nodes with certain names 32 | - *`freeze_nodes`*`=()` - This makes it possible to preserve nodes with certain names, the function will not recursivly keep normalizing anything below this node. If this node contains a dict it will be a dict in the end as well. 33 | - *`key_joiner`*`="."` - If you want to customize the path. `key_joiner` takes either a function or a string as input. If it is a function, it will recieve the path to a certain node in to form of a tuple. If `key_joiner` is a string it will be converted to a function as this: `lambda p: key_joiner.join(p)` 34 | 35 | 36 | ## Examples 37 | 38 | A General use case: 39 | 40 | ```python 41 | >>> from json_normalize import json_normalize 42 | >>> json_like = { 43 | ... "city": "Stockholm", 44 | ... "coords": { 45 | ... "lat": 59.331924, 46 | ... "long": 18.062297 47 | ... }, 48 | ... "measurements": [ 49 | ... { 50 | ... "time": 1624363200, 51 | ... "temp": {"val": 28, "unit": "C"}, 52 | ... "wind": {"val": 2.8, "dir": 290, "unit": "m/s"}, 53 | ... }, 54 | ... { 55 | ... "time": 1624366800, 56 | ... "temp": {"val": 26, "unit": "C"}, 57 | ... } 58 | ... ] 59 | ... } 60 | >>> normal_json = json_normalize(json_like) 61 | >>> normal_json 62 | 63 | 64 | >>> list(normal_json) 65 | [ 66 | { 67 | 'city': 'Stockholm', 68 | 'coords.lat': 59.331924, 69 | 'coords.long': 18.062297, 70 | 'measurements.time': 1624363200, 71 | 'measurements.temp.val': 28, 72 | 'measurements.temp.unit': 'C', 73 | 'measurements.wind.val': 2.8, 74 | 'measurements.wind.dir': 290, 75 | 'measurements.wind.unit': 'm/s' 76 | }, 77 | { 78 | 'city': 'Stockholm', 79 | 'coords.lat': 59.331924, 80 | 'coords.long': 18.062297, 81 | 'measurements.time': 1624366800, 82 | 'measurements.temp.val': 26, 83 | 'measurements.temp.unit': 'C' 84 | } 85 | ] 86 | ``` 87 | 88 | 89 | 90 | 91 | Information always flow both in and out of each container, here data in both `a` and `c` node are associated as their closest common node (the root) is a dict. linked via `b`. 92 | 93 | ```python 94 | >>> json_like = { 95 | ... "a": 1, 96 | ... "b": { 97 | ... "c": "x", 98 | ... "d": 2 99 | ... } 100 | ... } 101 | >>> list(json_normalize(json_like)) 102 | [ 103 | { 104 | "a": 1, 105 | "b.c": "x", 106 | "b.d": 2 107 | } 108 | ] 109 | ``` 110 | 111 | However id the closest common node is a list like object the information is not associated with each other, e.g. the nodes `g=2` and `h=3` closest common node is a list and therefor, in the output, that data ends up in different objects. 112 | 113 | ```python 114 | >>> tree = { 115 | ... "a": 1, 116 | ... "b": [ 117 | ... { 118 | ... "c": "x", 119 | ... "g": 2 120 | ... }, 121 | ... { 122 | ... "c": "y", 123 | ... "h": 3 124 | ... } 125 | ... ] 126 | ... } 127 | >>> list(json_normalize(tree)) 128 | [ 129 | { 130 | "a": 1, 131 | "b.c": "x", 132 | "b.h" 2 133 | }, 134 | { 135 | "a": 1, 136 | "b.c": "y", 137 | "b.g": 3 138 | } 139 | ] 140 | 141 | ``` 142 | 143 | Even if a branch contains more data in a deeper layer as long as that data is contained inside a `dict` that data will be associated with the data in other branches. 144 | 145 | ```python 146 | >>> tree = { 147 | ... "a": { 148 | ... "j": 1.1, 149 | ... "k": 1.2 150 | ... }, 151 | ... "b": [ 152 | ... { 153 | ... "c": "x", 154 | ... "d": 2 155 | ... }, 156 | ... { 157 | ... "c": "y", 158 | ... "d": 3 159 | ... } 160 | ... ] 161 | ... } 162 | >>> list(json_normalize(tree)) 163 | [ 164 | { 165 | "j": 1.1, 166 | "k": 1.2, 167 | "c": "x", 168 | "d": 2 169 | }, 170 | { 171 | "j": 1.1, 172 | "k": 1.2, 173 | "c": "y", 174 | "d": 3 175 | } 176 | ] 177 | 178 | ``` 179 | 180 | When there are multiple lists in different branches the fucntion will have to know how to combine this. Default is `None` which will raise an error incase this happens. `"chain"` will put the information after eachother and `"product"` will combine the information as shown below. 181 | 182 | ```python 183 | >>> tree = { 184 | ... "a": 1, 185 | ... "b": [ 186 | ... {"x": "1"}, 187 | ... {"x": "2"} 188 | ... ], 189 | ... "c": [ 190 | ... {"y": "3"}, 191 | ... {"y": "4"} 192 | ... ] 193 | ... } 194 | >>> list(json_normalize(tree)) 195 | ValueError() 196 | 197 | >>> list(json_normalize(tree, combine_lists="chain")) 198 | [ 199 | {"a": 1, "b.x": "1"}, 200 | {"a": 1, "b.x": "1"}, 201 | {"a": 1, "c.y": "3"}, 202 | {"a": 1, "c.y": "4"}, 203 | ] 204 | 205 | >>> list(json_normalize(tree, combine_lists="product")) 206 | [ 207 | {"a": 1, "b.x": "1", "c.y": "3"}, 208 | {"a": 1, "b.x": "1", "c.y": "4"}, 209 | {"a": 1, "b.x": "2", "c.y": "3"}, 210 | {"a": 1, "b.x": "2", "c.y": "4"}, 211 | ] 212 | 213 | ``` 214 | 215 | If you want to make sure you do not copy information into to many branches you can leave the `combine_lists=None` and instead drop problematic nodes with the argument `drop_nodes=("b",)`. 216 | ```python 217 | >>> tree = { 218 | ... "a": 1, 219 | ... "b": [ 220 | ... {"x": "1"}, 221 | ... {"x": "2"} 222 | ... ], 223 | ... "c": [ 224 | ... {"y": "1"}, 225 | ... {"y": "2"} 226 | ... ] 227 | ... } 228 | >>> list(json_normalize(tree, drop_nodes=("b",))) 229 | [ 230 | {"a": 1, "c.y": "1"}, 231 | {"a": 1, "c.y": "2"}, 232 | ] 233 | ``` 234 | 235 | 236 | If you wish to customize the path generated you can to that by giving the key_joiner argument. 237 | ```python 238 | >>> tree = { 239 | ... "a": 1, 240 | ... "b": [ 241 | ... {"x": "1"}, 242 | ... {"x": "2"} 243 | ... ], 244 | ... } 245 | 246 | >>> def key_joiner(path: tuple) -> string: 247 | ... return path[-1] 248 | 249 | >>> list(json_normalize(tree, key_joiner=key_joiner)) 250 | [ 251 | {"a": 1, "x": "1"}, 252 | {"a": 1, "x": "2"}, 253 | ] 254 | 255 | >>> list(json_normalize(tree, key_joiner=" -> ")) 256 | [ 257 | {"a": 1, "b -> x": "1"}, 258 | {"a": 1, "b -> x": "2"}, 259 | ] 260 | ``` 261 | 262 | 263 | The function will also accept generators and simlar objects. 264 | ```python 265 | >>> from itertools import chain 266 | 267 | 268 | >>> def meta_generator(): 269 | ... yield {"who": "generator", "val": a_generator(1)} 270 | ... yield {"who": "range", "val": range(10, 12)} 271 | ... yield {"who": "map", "val": map(lambda x: x**2, range(20, 22))} 272 | ... yield {"who": "chain", "val": chain([30], [31])} 273 | 274 | 275 | >>> def a_generator(n): 276 | ... yield n 277 | ... yield 2 * n 278 | 279 | 280 | >>> list(json_normalize(meta_generator())): 281 | [ 282 | {'who': 'generator', 'val': 1}, 283 | {'who': 'generator', 'val': 2}, 284 | {'who': 'range', 'val': 10}, 285 | {'who': 'range', 'val': 11}, 286 | {'who': 'map', 'val': 400}, 287 | {'who': 'map', 'val': 441}, 288 | {'who': 'chain', 'val': 30}, 289 | {'who': 'chain', 'val': 31}, 290 | ] 291 | ``` 292 | 293 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | black 2 | build 3 | flake8 4 | pytest 5 | pytest-clarity 6 | pytest-cov 7 | twine 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [tool.black] 6 | line-length = 100 7 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | focus: the test to work on now 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 0 3 | 4 | [flake8] 5 | max-line-length = 100 6 | 7 | [metadata] 8 | name = json-normalize 9 | version = attr: json_normalize.__version__ 10 | author = The Funnel Dev Team 11 | author_email = open-source@funnel.io 12 | description = Recursively flattens a JSON-like structure into a list of flat dicts. 13 | classifiers = 14 | Development Status :: 5 - Production/Stable 15 | License :: OSI Approved :: MIT License 16 | Programming Language :: Python :: 3 17 | Programming Language :: Python :: 3.8 18 | Programming Language :: Python :: 3.9 19 | Programming Language :: Python :: 3.10 20 | Programming Language :: Python :: 3.11 21 | Programming Language :: Python :: 3.12 22 | Topic :: Software Development :: Libraries 23 | keywords = JSON 24 | license = MIT 25 | long_description = file: README.md 26 | long_description_content_type = text/markdown 27 | requires_python = ~=3.8 28 | project_urls = 29 | Bug Reports = https://github.com/funnel-io/json-normalize/issues 30 | Source = https://github.com/funnel-io/json-normalize 31 | url = https://github.com/funnel-io/json-normalize 32 | 33 | [options] 34 | package_dir = 35 | =src 36 | packages=find: 37 | 38 | [options.packages.find] 39 | where = src 40 | -------------------------------------------------------------------------------- /src/json_normalize/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import json_normalize 2 | from .key_joiners import last_node_name 3 | 4 | __all__ = [ 5 | "json_normalize", 6 | "last_node_name", 7 | ] 8 | 9 | __version__ = "1.1.0" 10 | 11 | VERSION = __version__ 12 | -------------------------------------------------------------------------------- /src/json_normalize/key_joiners.py: -------------------------------------------------------------------------------- 1 | def last_node_name(node_names): 2 | return node_names[-1] 3 | 4 | 5 | def n_last_node_name(n, delimiter="."): 6 | def temp(node_names): 7 | return delimiter.join(node_names[-n:]) 8 | 9 | return temp 10 | -------------------------------------------------------------------------------- /src/json_normalize/main.py: -------------------------------------------------------------------------------- 1 | from itertools import chain, product 2 | from collections import defaultdict 3 | from typing import Literal, Iterable, Callable, Generator, Any, Dict, Tuple, Union 4 | 5 | 6 | def json_normalize( 7 | tree: Union[dict, Iterable], 8 | combine_lists: Literal["chain", "product"] = None, 9 | drop_nodes: Iterable[str] = (), 10 | freeze_nodes: Iterable[str] = (), 11 | key_joiner: Union[str, Callable] = ".", 12 | ) -> Generator[dict, None, None]: 13 | """ 14 | Will flatten a dict-list structure to a list of flat dicts. 15 | >>> list(json_normalize({'a': {'A': 123}, 'b':[{'c':1},{'c':2}]})) 16 | [{'a.A': 123, 'b.c': 1}, {'a.A': 123, 'b.c': 2}] 17 | 18 | Dropping certain branches 19 | >>> list(json_normalize({'a': {'A': 123}, 'b':[{'c':1},{'c':2}]}, drop_nodes=('a',))) 20 | [{'b.c': 1}, {'b.c': 2}] 21 | 22 | Custom paths 23 | >>> list(json_normalize({'a': {'b':1}}, key_joiner=lambda p: p[-1].upper())) 24 | [{'B': 1}] 25 | """ 26 | _validate_input( 27 | tree, 28 | combine_lists, 29 | drop_nodes, 30 | freeze_nodes, 31 | key_joiner, 32 | ) 33 | 34 | if isinstance(key_joiner, str): 35 | key_joiner_str = key_joiner 36 | 37 | def key_joiner(path): 38 | return key_joiner_str.join(path) 39 | 40 | elif isinstance(key_joiner, Callable): 41 | pass 42 | else: 43 | raise TypeError(f"key_joiner has to be either a Callable or a str, got {type(key_joiner)}") 44 | 45 | flattened = _json_normalize( 46 | tree, 47 | combine_lists=combine_lists, 48 | drop_nodes=set(drop_nodes), 49 | freeze_nodes=set(freeze_nodes), 50 | key_joiner=key_joiner, 51 | tree_name=(), 52 | ) 53 | return map( 54 | lambda p: _apply_key_joiner(key_joiner, p), 55 | flattened, 56 | ) 57 | 58 | 59 | def _validate_input( 60 | tree, 61 | combine_lists, 62 | drop_nodes, 63 | freeze_nodes, 64 | key_joiner, 65 | ): 66 | allowed_values = ("chain", "product", None) 67 | if combine_lists not in allowed_values: 68 | raise ValueError(f"combine_lists allowed values: {allowed_values}, got {combine_lists}") 69 | 70 | 71 | def _json_normalize(tree, **kwargs) -> Generator[Dict[Tuple, Any], None, None]: 72 | if isinstance(tree, dict): 73 | current_node_name = kwargs.pop("tree_name", ()) 74 | freeze_nodes = kwargs.get("freeze_nodes", ()) 75 | drop_nodes = kwargs.get("drop_nodes", ()) 76 | 77 | tree = _flatten_dict(tree, current_node_name, drop_nodes, freeze_nodes) 78 | leaves = _leaves(tree, freeze_nodes) 79 | branches = _branches(tree, kwargs, freeze_nodes) 80 | 81 | for branch_data in _combine_branches(branches, kwargs["combine_lists"]): 82 | yield _merge_dicts(leaves, *branch_data) 83 | 84 | elif not _is_leaf(tree): 85 | for branch in tree: 86 | for flatted_branch in _json_normalize(branch, **kwargs): 87 | yield flatted_branch 88 | 89 | else: 90 | yield {kwargs["tree_name"]: tree} 91 | 92 | 93 | def _flatten_dict(a_dict, name, drop_nodes, freeze_nodes) -> Dict[Tuple[str], Any]: 94 | out = {} 95 | for k, v in a_dict.items(): 96 | node_name = (*name, k) 97 | if k in drop_nodes: 98 | pass 99 | elif isinstance(v, dict) and k not in freeze_nodes: 100 | out.update(_flatten_dict(v, node_name, drop_nodes, freeze_nodes)) 101 | else: 102 | out[node_name] = v 103 | return out 104 | 105 | 106 | def _branches(tree, kwargs, freeze_nodes): 107 | return [ 108 | _json_normalize(v, **kwargs, tree_name=k) 109 | for k, v in tree.items() 110 | if not _is_leaf(v) and k[-1] not in freeze_nodes 111 | ] 112 | 113 | 114 | def _leaves(tree, freeze_nodes): 115 | return {k: v for k, v in tree.items() if (_is_leaf(v) or k[-1] in freeze_nodes)} 116 | 117 | 118 | def _is_leaf(node): 119 | return isinstance(node, str) or not isinstance(node, Iterable) 120 | 121 | 122 | def _combine_branches(branches, method) -> Iterable[Tuple[Dict]]: 123 | if not branches: 124 | return ({},) 125 | elif len(branches) == 1: 126 | return map(lambda x: (x,), branches[0]) 127 | elif method == "product": 128 | return product(*branches) 129 | elif method == "chain": 130 | return map(lambda x: (x,), chain.from_iterable(branches)) 131 | else: 132 | raise ValueError("Multiple branches dont know how to handle these, either ") 133 | 134 | 135 | def _apply_key_joiner(key_joiner, raw): 136 | out = {key_joiner(k): v for k, v in raw.items()} 137 | 138 | if len(out) != len(raw): 139 | msg = _build_helper_message(key_joiner, raw) 140 | raise ValueError(f"Multiple raw keys were writtern to the same key. \n{msg}") 141 | return out 142 | 143 | 144 | def _build_helper_message(key_joiner, raw): 145 | helper = defaultdict(list) 146 | for k in raw: 147 | helper[key_joiner(k)].append(k) 148 | 149 | helper = {k: v for k, v in helper.items() if len(v) > 1} 150 | msg = "\n\n".join("\n".join(f"\t{v_i} -> {k}" for v_i in v) for k, v in helper.items()) 151 | return msg 152 | 153 | 154 | def _merge_dicts(*dicts): 155 | output = {} 156 | for d in dicts: 157 | output.update(d) 158 | return output 159 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/funnel-io/json-normalize/350fff866720a7a88f47294229187f8096bc87af/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_combine_lists.py: -------------------------------------------------------------------------------- 1 | from json_normalize import json_normalize 2 | import pytest 3 | 4 | 5 | def test_combine_lists_chain(): 6 | tree = { 7 | "a": [1, 2], 8 | "b": [ 9 | {"x": "hej"}, 10 | {"x": "san"}, 11 | ], 12 | } 13 | expected = [ 14 | {"a": 1}, 15 | {"a": 2}, 16 | {"b.x": "hej"}, 17 | {"b.x": "san"}, 18 | ] 19 | actual = json_normalize(tree, combine_lists="chain") 20 | assert list(actual) == expected 21 | 22 | 23 | def test_combine_lists_product(): 24 | tree = { 25 | "a": [1, 2], 26 | "b": [3, 4], 27 | } 28 | expected = [ 29 | {"a": 1, "b": 3}, 30 | {"a": 1, "b": 4}, 31 | {"a": 2, "b": 3}, 32 | {"a": 2, "b": 4}, 33 | ] 34 | actual = json_normalize(tree, combine_lists="product") 35 | assert list(actual) == expected 36 | 37 | 38 | def test_multiple_dependant_lists(): 39 | tree = { 40 | "a": 1, 41 | "b": [{"x": "1"}, {"x": "2"}], 42 | "c": { 43 | "d": [ 44 | {"y": "1"}, 45 | {"y": "2"}, 46 | ], 47 | }, 48 | } 49 | expected = [ 50 | {"a": 1, "b.x": "1", "c.d.y": "1"}, 51 | {"a": 1, "b.x": "1", "c.d.y": "2"}, 52 | {"a": 1, "b.x": "2", "c.d.y": "1"}, 53 | {"a": 1, "b.x": "2", "c.d.y": "2"}, 54 | ] 55 | actual = json_normalize(tree, combine_lists="product") 56 | assert list(actual) == expected 57 | 58 | 59 | def test_combine_lists_default_none_should_raise_error(): 60 | tree = { 61 | "a": [1, 2], 62 | "b": [1, 2], 63 | } 64 | with pytest.raises(ValueError): 65 | list(json_normalize(tree)) 66 | 67 | 68 | def test_improper_input(): 69 | with pytest.raises(ValueError): 70 | list(json_normalize({}, combine_lists="This should not work")) 71 | -------------------------------------------------------------------------------- /tests/test_drop_nodes.py: -------------------------------------------------------------------------------- 1 | from json_normalize import json_normalize 2 | 3 | 4 | def test_ignore_nodes_single_node(): 5 | tree = { 6 | "a": 1, 7 | "b": [ 8 | {"c": "x", "d": 2}, 9 | {"c": "y", "d": 3}, 10 | ], 11 | } 12 | expected = [ 13 | {"b.c": "x", "b.d": 2}, 14 | {"b.c": "y", "b.d": 3}, 15 | ] 16 | actual = json_normalize(tree, drop_nodes=("a",)) 17 | assert list(actual) == expected 18 | 19 | 20 | def test_ignore_nodes_entire_branch(): 21 | tree = { 22 | "a": 1, 23 | "b": [ 24 | {"c": "x", "d": 2}, 25 | {"c": "y", "d": 3}, 26 | ], 27 | } 28 | expected = [ 29 | {"a": 1}, 30 | ] 31 | actual = json_normalize(tree, drop_nodes=("b",)) 32 | assert list(actual) == expected 33 | -------------------------------------------------------------------------------- /tests/test_freeze_nodes.py: -------------------------------------------------------------------------------- 1 | from json_normalize import json_normalize 2 | 3 | 4 | def test_freeze_nodes_single_node(): 5 | tree = { 6 | "a": 1, 7 | "b": [ 8 | {"c": "x", "d": 2}, 9 | {"c": "y", "d": 3}, 10 | ], 11 | } 12 | expected = [ 13 | { 14 | "a": 1, 15 | "b.c": "x", 16 | "b.d": 2, 17 | }, 18 | { 19 | "a": 1, 20 | "b.c": "y", 21 | "b.d": 3, 22 | }, 23 | ] 24 | actual = json_normalize(tree, freeze_nodes=("a",)) 25 | assert list(actual) == expected 26 | 27 | 28 | def test_freeze_nodes_entire_branch(): 29 | tree = { 30 | "a": 1, 31 | "b": [1, 2, 3], 32 | "c": ["x", "y"], 33 | } 34 | expected = [ 35 | { 36 | "a": 1, 37 | "b": [1, 2, 3], 38 | "c": "x", 39 | }, 40 | { 41 | "a": 1, 42 | "b": [1, 2, 3], 43 | "c": "y", 44 | }, 45 | ] 46 | actual = json_normalize(tree, freeze_nodes=("b",)) 47 | assert list(actual) == expected 48 | -------------------------------------------------------------------------------- /tests/test_general_functionality.py: -------------------------------------------------------------------------------- 1 | from json_normalize import json_normalize 2 | 3 | 4 | def test_listless_tree(): 5 | tree = { 6 | "a": 1, 7 | "b": { 8 | "c": "x", 9 | "d": 2, 10 | }, 11 | } 12 | expected = [ 13 | { 14 | "a": 1, 15 | "b.c": "x", 16 | "b.d": 2, 17 | } 18 | ] 19 | actual = json_normalize(tree) 20 | assert list(actual) == expected 21 | 22 | 23 | def test_tree_with_list_independant_items(): 24 | tree = { 25 | "a": 1, 26 | "b": [ 27 | {"c": "x", "d": 2}, 28 | {"c": "y", "d": 3}, 29 | ], 30 | } 31 | expected = [ 32 | { 33 | "a": 1, 34 | "b.c": "x", 35 | "b.d": 2, 36 | }, 37 | { 38 | "a": 1, 39 | "b.c": "y", 40 | "b.d": 3, 41 | }, 42 | ] 43 | actual = json_normalize(tree) 44 | assert list(actual) == expected 45 | 46 | 47 | def test_list_of_values(): 48 | tree = { 49 | "a": 1, 50 | "b": [1, 2, 3], 51 | } 52 | expected = [ 53 | {"a": 1, "b": 1}, 54 | {"a": 1, "b": 2}, 55 | {"a": 1, "b": 3}, 56 | ] 57 | actual = json_normalize(tree) 58 | assert list(actual) == expected 59 | 60 | 61 | def test_list_input(): 62 | tree = [ 63 | {"a": 1, "b": {"d": 2}}, 64 | { 65 | "a": 2, 66 | "b": {"d": 4}, 67 | }, 68 | ] 69 | expected = [ 70 | {"a": 1, "b.d": 2}, 71 | {"a": 2, "b.d": 4}, 72 | ] 73 | actual = json_normalize(tree) 74 | assert list(actual) == expected 75 | 76 | 77 | def test_nested_list_input(): 78 | tree = [ 79 | {"a": 1, "b": [1, 2]}, 80 | { 81 | "a": 2, 82 | "b": [3, 4], 83 | }, 84 | ] 85 | expected = [ 86 | {"a": 1, "b": 1}, 87 | {"a": 1, "b": 2}, 88 | {"a": 2, "b": 3}, 89 | {"a": 2, "b": 4}, 90 | ] 91 | actual = json_normalize(tree) 92 | assert list(actual) == expected 93 | -------------------------------------------------------------------------------- /tests/test_key_joiner.py: -------------------------------------------------------------------------------- 1 | from json_normalize import json_normalize, last_node_name 2 | import pytest 3 | 4 | 5 | def test_custom_key_joiner_string(): 6 | tree = { 7 | "a": 1, 8 | "b": [ 9 | {"c": "x", "d": 2}, 10 | ], 11 | } 12 | expected = [ 13 | {"a": 1, "b-c": "x", "b-d": 2}, 14 | ] 15 | actual = json_normalize(tree, key_joiner="-") 16 | assert list(actual) == expected 17 | 18 | 19 | def test_custom_key_joiner_function(): 20 | tree = { 21 | "a": 1, 22 | "b": [ 23 | {"c": "x", "d": 2}, 24 | ], 25 | } 26 | expected = [ 27 | {"a": 1, "b-c": "x", "b-d": 2}, 28 | ] 29 | actual = json_normalize(tree, key_joiner=lambda p: "-".join(p)) 30 | assert list(actual) == expected 31 | 32 | 33 | def test_custom_key_joiner_function_last_node_name(): 34 | tree = { 35 | "a": 1, 36 | "b": [ 37 | {"c": "x", "d": 2}, 38 | ], 39 | } 40 | expected = [ 41 | {"a": 1, "c": "x", "d": 2}, 42 | ] 43 | actual = json_normalize(tree, key_joiner=last_node_name) 44 | assert list(actual) == expected 45 | 46 | 47 | def test_overwriting_names_should_raise_error(): 48 | tree = { 49 | "a": 1, 50 | "b": { 51 | "a": "1", 52 | }, 53 | } 54 | with pytest.raises(ValueError): 55 | list(json_normalize(tree, key_joiner=last_node_name)) 56 | 57 | 58 | def test_improper_input_type(): 59 | with pytest.raises(TypeError): 60 | list(json_normalize({}, key_joiner=dict())) 61 | -------------------------------------------------------------------------------- /tests/test_other_input_types.py: -------------------------------------------------------------------------------- 1 | from json_normalize import json_normalize 2 | 3 | 4 | def test_generator_input(): 5 | def a_dict_generator(): 6 | yield {"a": 1} 7 | yield {"a": 2} 8 | 9 | expected = [ 10 | {"a": 1}, 11 | {"a": 2}, 12 | ] 13 | actual = json_normalize(a_dict_generator()) 14 | assert list(actual) == expected 15 | 16 | 17 | def test_nested_generators(): 18 | def a_dict_generator(n): 19 | yield {"a": n} 20 | yield {"a": 2 * n} 21 | 22 | def another_dict_generator(): 23 | yield {"b": 3, "c": a_dict_generator(1)} 24 | yield {"b": 4, "c": a_dict_generator(2)} 25 | 26 | expected = [ 27 | {"b": 3, "c.a": 1}, 28 | {"b": 3, "c.a": 2}, 29 | {"b": 4, "c.a": 2}, 30 | {"b": 4, "c.a": 4}, 31 | ] 32 | actual = json_normalize(another_dict_generator()) 33 | assert list(actual) == expected 34 | --------------------------------------------------------------------------------