├── .github
    └── workflows
    │   ├── python-publish.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── dev-requirements.txt
├── pyproject.toml
├── pytest.ini
├── setup.cfg
├── src
    └── json_normalize
    │   ├── __init__.py
    │   ├── key_joiners.py
    │   └── main.py
└── tests
    ├── __init__.py
    ├── test_combine_lists.py
    ├── test_drop_nodes.py
    ├── test_freeze_nodes.py
    ├── test_general_functionality.py
    ├── test_key_joiner.py
    └── test_other_input_types.py


/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | jobs:
16 |   deploy:
17 | 
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v4
22 |     - name: Set up Python
23 |       uses: actions/setup-python@v5
24 |       with:
25 |         python-version: '3.x'
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade build pip
29 |     - name: Build package
30 |       run: python -m build
31 |     - name: Publish package
32 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
33 |       with:
34 |         user: __token__
35 |         password: ${{ secrets.PYPI_API_TOKEN }}
36 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 |     - name: Install dependencies
23 |       run: |
24 |         python -m pip install --upgrade pip
25 |         python -m pip install flake8 pytest
26 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
27 |     - name: Lint with flake8
28 |       run: |
29 |         # stop the build if there are Python syntax errors or undefined names
30 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
31 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
32 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=100 --statistics
33 |     - name: Test with pytest
34 |       run: |
35 |         PYTHONPATH=src/ pytest --doctest-modules --doctest-continue-on-failure
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021-2024 Funnel
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON_VERSION ?= 3.9
 2 | 
 3 | dist: clean-dist venv
 4 | 	. venv/bin/activate && python3 -m build .
 5 | 
 6 | .PHONY: setup
 7 | setup: venv/setup.txt
 8 | 
 9 | venv:
10 | 	virtualenv venv --python=${PYTHON_VERSION}
11 | 
12 | venv/setup.txt: venv dev-requirements.txt
13 | 	. venv/bin/activate && \
14 | 	pip3 install --upgrade pip && \
15 | 	pip3 install --requirement dev-requirements.txt
16 | 	touch venv/setup.txt
17 | 
18 | .PHONY: clean
19 | clean: clean-dist
20 | 	rm -rf venv
21 | 
22 | .PHONY: clean-dist
23 | clean-dist:
24 | 	rm -rf build
25 | 	rm -rf src/json_normalize.egg-info
26 | 	rm -rf dist
27 | 
28 | .PHONY: test
29 | test: setup
30 | 	@ . venv/bin/activate && PYTHONPATH=src/ pytest -vv -rsx tests/ src/ --cov ./src/json_normalize/ --no-cov-on-fail --cov-report term-missing --doctest-modules --doctest-continue-on-failure
31 | 	@ . venv/bin/activate && flake8 src --exclude '#*,~*,.#*'
32 | 	@ . venv/bin/activate && black --check src/ tests/
33 | 
34 | .PHONY: test-focus
35 | test-focus: setup
36 | 	@ . venv/bin/activate && PYTHONPATH=src/ pytest -vv -m focus -rsx tests/ src/ --cov ./src/json_normalize/ --no-cov-on-fail --cov-report term-missing --doctest-modules --doctest-continue-on-failure
37 | 	@ . venv/bin/activate && flake8 src --exclude '#*,~*,.#*'
38 | 	@ . venv/bin/activate && black --check src/ tests/
39 | 
40 | .PHONY: release
41 | release: test dist
42 | 	. venv/bin/activate && twine upload dist/*
43 | 
44 | .PHONY: test-release
45 | test-release: test dist
46 | 	. venv/bin/activate && twine upload -r testpypi dist/*
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # JSON Normalize
  2 | 
  3 | ![PyPI](https://img.shields.io/pypi/v/json_normalize)
  4 | ![PyPI - License](https://img.shields.io/pypi/l/json_normalize)
  5 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/json_normalize)
  6 | ![PyPI - Status](https://img.shields.io/pypi/status/json_normalize)
  7 | 
  8 | This package contains a function, json_normalize. It will take a json-like structure and convert it to a map object which returns dicts. Output dicts will have their path joined by ".", this can of course be customized.
  9 | 
 10 | Data association will flows up and down inside dicts although in iterables, e.g. lists, data
 11 | 
 12 | ## Installation
 13 | 
 14 | Install the package `json_normalize` version `1.1+` from PyPI.  
 15 | The recommended `requirements.txt` line is `json_normalize~=1.1`.
 16 | 
 17 | ## json_normalize.json_normalize
 18 | 
 19 | ```python
 20 | json_normalize.json_normalize(
 21 |     tree: Union[dict, Iterable],
 22 |     combine_lists: Literal["chain", "product"] = None,
 23 |     drop_nodes: Iterable[str] = (),
 24 |     freeze_nodes: Iterable[str] = (),
 25 |     key_joiner: Union[str, Callable] = ".",
 26 | )
 27 | ```
 28 | 
 29 | - *`tree`* - A json like structure. Any iterable inside the object that is not a dict or a string will be treated as a list.
 30 | - *`combine_lists`*`=None` - If there are two different branches in the json like object the function will have to know how to combine these. If the default `None` is used the function does not know how to handle them and will raise an error. However if `combine_lists="chain"` simply put them after eachother similar to `itertool.chain`. The other option would be `combine_lists="product"` this will use the `itertool.product` to combine the different branches.
 31 | - *`drop_nodes`*`=()` - This makes it possible to ignore nodes with certain names
 32 | - *`freeze_nodes`*`=()` - This makes it possible to preserve nodes with certain names, the function will not recursivly keep normalizing anything below this node. If this node contains a dict it will be a dict in the end as well.
 33 | - *`key_joiner`*`="."` - If you want to customize the path. `key_joiner` takes either a function or a string as input. If it is a function, it will recieve the path to a certain node in to form of a tuple. If `key_joiner` is a string it will be converted to a function as this: `lambda p: key_joiner.join(p)`
 34 | 
 35 | 
 36 | ## Examples
 37 | 
 38 | A General use case:
 39 | 
 40 | ```python
 41 | >>> from json_normalize import json_normalize
 42 | >>> json_like = {
 43 | ...     "city": "Stockholm",
 44 | ...     "coords": {
 45 | ...         "lat": 59.331924,
 46 | ...         "long": 18.062297
 47 | ...     },
 48 | ...     "measurements": [
 49 | ...         {
 50 | ...             "time": 1624363200,
 51 | ...             "temp": {"val": 28, "unit": "C"},
 52 | ...             "wind": {"val": 2.8, "dir": 290, "unit": "m/s"},
 53 | ...         },
 54 | ...         {
 55 | ...             "time": 1624366800,
 56 | ...             "temp": {"val": 26, "unit": "C"},
 57 | ...         }
 58 | ...     ]
 59 | ... }
 60 | >>> normal_json = json_normalize(json_like)
 61 | >>> normal_json
 62 | <map object at ...>
 63 | 
 64 | >>> list(normal_json)
 65 | [
 66 |     {
 67 |         'city': 'Stockholm',
 68 |         'coords.lat': 59.331924,
 69 |         'coords.long': 18.062297,
 70 |         'measurements.time': 1624363200,
 71 |         'measurements.temp.val': 28,
 72 |         'measurements.temp.unit': 'C',
 73 |         'measurements.wind.val': 2.8,
 74 |         'measurements.wind.dir': 290,
 75 |         'measurements.wind.unit': 'm/s'
 76 |     },
 77 |     {
 78 |         'city': 'Stockholm',
 79 |         'coords.lat': 59.331924,
 80 |         'coords.long': 18.062297,
 81 |         'measurements.time': 1624366800,
 82 |         'measurements.temp.val': 26,
 83 |         'measurements.temp.unit': 'C'
 84 |     }
 85 | ]
 86 | ```
 87 | 
 88 | 
 89 | 
 90 | 
 91 | Information always flow both in and out of each container, here data in both `a` and `c` node are associated as their closest common node (the root) is a dict. linked via `b`.
 92 | 
 93 | ```python
 94 | >>> json_like = {
 95 | ...     "a": 1,
 96 | ...     "b": {
 97 | ...         "c": "x",
 98 | ...         "d": 2
 99 | ...     }
100 | ... }
101 | >>> list(json_normalize(json_like))
102 | [
103 |     {
104 |         "a": 1,
105 |         "b.c": "x",
106 |         "b.d": 2
107 |     }
108 | ]
109 | ```
110 | 
111 | However id the closest common node is a list like object the information is not associated with each other, e.g. the nodes `g=2` and `h=3` closest common node is a list and therefor, in the output, that data ends up in different objects.
112 | 
113 | ```python
114 | >>> tree = {
115 | ...     "a": 1,
116 | ...     "b": [
117 | ...         {
118 | ...             "c": "x",
119 | ...             "g": 2
120 | ...         },
121 | ...         {
122 | ...             "c": "y",
123 | ...             "h": 3
124 | ...         }
125 | ...     ]
126 | ... }
127 | >>> list(json_normalize(tree))
128 | [
129 |     {
130 |         "a": 1,
131 |         "b.c": "x",
132 |         "b.h" 2
133 |     },
134 |     {
135 |         "a": 1,
136 |         "b.c": "y",
137 |         "b.g": 3
138 |     }
139 | ]
140 | 
141 | ```
142 | 
143 | Even if a branch contains more data in a deeper layer as long as that data is contained inside a `dict` that data will be associated with the data in other branches.
144 | 
145 | ```python
146 | >>> tree = {
147 | ...     "a": {
148 | ...         "j": 1.1,
149 | ...         "k": 1.2
150 | ...     },
151 | ...     "b": [
152 | ...         {
153 | ...             "c": "x",
154 | ...             "d": 2
155 | ...         },
156 | ...         {
157 | ...             "c": "y",
158 | ...             "d": 3
159 | ...         }
160 | ...     ]
161 | ... }
162 | >>> list(json_normalize(tree))
163 | [
164 |     {
165 |         "j": 1.1,
166 |         "k": 1.2,
167 |         "c": "x",
168 |         "d": 2
169 |     },
170 |     {
171 |         "j": 1.1,
172 |         "k": 1.2,
173 |         "c": "y",
174 |         "d": 3
175 |     }
176 | ]
177 | 
178 | ```
179 | 
180 | When there are multiple lists in different branches the fucntion will have to know how to combine this. Default is `None` which will raise an error incase this happens. `"chain"` will put the information after eachother and `"product"` will combine the information as shown below.
181 | 
182 | ```python
183 | >>> tree = {
184 | ...     "a": 1,
185 | ...     "b": [
186 | ...         {"x": "1"},
187 | ...         {"x": "2"}
188 | ...     ],
189 | ...     "c": [
190 | ...         {"y": "3"},
191 | ...         {"y": "4"}
192 | ...     ]
193 | ... }
194 | >>> list(json_normalize(tree))
195 | ValueError()
196 | 
197 | >>> list(json_normalize(tree, combine_lists="chain"))
198 | [
199 |     {"a": 1, "b.x": "1"},
200 |     {"a": 1, "b.x": "1"},
201 |     {"a": 1, "c.y": "3"},
202 |     {"a": 1, "c.y": "4"},
203 | ]
204 | 
205 | >>> list(json_normalize(tree, combine_lists="product"))
206 | [
207 |     {"a": 1, "b.x": "1", "c.y": "3"},
208 |     {"a": 1, "b.x": "1", "c.y": "4"},
209 |     {"a": 1, "b.x": "2", "c.y": "3"},
210 |     {"a": 1, "b.x": "2", "c.y": "4"},
211 | ]
212 | 
213 | ```
214 | 
215 | If you want to make sure you do not copy information into to many branches you can leave the `combine_lists=None` and instead drop problematic nodes with the argument `drop_nodes=("b",)`.
216 | ```python
217 | >>> tree = {
218 | ...     "a": 1,
219 | ...     "b": [
220 | ...         {"x": "1"},
221 | ...         {"x": "2"}
222 | ...     ],
223 | ...     "c": [
224 | ...         {"y": "1"},
225 | ...         {"y": "2"}
226 | ...     ]
227 | ... }
228 | >>> list(json_normalize(tree, drop_nodes=("b",)))
229 | [
230 |     {"a": 1, "c.y": "1"},
231 |     {"a": 1, "c.y": "2"},
232 | ]
233 | ```
234 | 
235 | 
236 | If you wish to customize the path generated you can to that by giving the key_joiner argument.
237 | ```python
238 | >>> tree = {
239 | ...     "a": 1,
240 | ...     "b": [
241 | ...         {"x": "1"},
242 | ...         {"x": "2"}
243 | ...     ],
244 | ... }
245 | 
246 | >>> def key_joiner(path: tuple) -> string:
247 | ...     return path[-1]
248 | 
249 | >>> list(json_normalize(tree, key_joiner=key_joiner))
250 | [
251 |     {"a": 1, "x": "1"},
252 |     {"a": 1, "x": "2"},
253 | ]
254 | 
255 | >>> list(json_normalize(tree, key_joiner=" -> "))
256 | [
257 |     {"a": 1, "b -> x": "1"},
258 |     {"a": 1, "b -> x": "2"},
259 | ]
260 | ```
261 | 
262 | 
263 | The function will also accept generators and simlar objects.
264 | ```python
265 | >>> from itertools import chain
266 | 
267 | 
268 | >>> def meta_generator():
269 | ...     yield {"who": "generator", "val": a_generator(1)}
270 | ...     yield {"who": "range", "val": range(10, 12)}
271 | ...     yield {"who": "map", "val": map(lambda x: x**2, range(20, 22))}
272 | ...     yield {"who": "chain", "val": chain([30], [31])}
273 | 
274 | 
275 | >>> def a_generator(n):
276 | ...     yield n
277 | ...     yield 2 * n
278 | 
279 | 
280 | >>> list(json_normalize(meta_generator())):
281 | [
282 |     {'who': 'generator', 'val': 1},
283 |     {'who': 'generator', 'val': 2},
284 |     {'who': 'range', 'val': 10},
285 |     {'who': 'range', 'val': 11},
286 |     {'who': 'map', 'val': 400},
287 |     {'who': 'map', 'val': 441},
288 |     {'who': 'chain', 'val': 30},
289 |     {'who': 'chain', 'val': 31},
290 | ]
291 | ```
292 | 
293 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | black
2 | build
3 | flake8
4 | pytest
5 | pytest-clarity
6 | pytest-cov
7 | twine
8 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 
5 | [tool.black]
6 | line-length = 100
7 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     focus: the test to work on now
4 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal = 0
 3 | 
 4 | [flake8]
 5 | max-line-length = 100
 6 | 
 7 | [metadata]
 8 | name = json-normalize
 9 | version = attr: json_normalize.__version__
10 | author = The Funnel Dev Team
11 | author_email = open-source@funnel.io
12 | description = Recursively flattens a JSON-like structure into a list of flat dicts.
13 | classifiers =
14 |   Development Status :: 5 - Production/Stable
15 |   License :: OSI Approved :: MIT License
16 |   Programming Language :: Python :: 3
17 |   Programming Language :: Python :: 3.8
18 |   Programming Language :: Python :: 3.9
19 |   Programming Language :: Python :: 3.10
20 |   Programming Language :: Python :: 3.11
21 |   Programming Language :: Python :: 3.12
22 |   Topic :: Software Development :: Libraries
23 | keywords = JSON
24 | license = MIT
25 | long_description = file: README.md
26 | long_description_content_type = text/markdown
27 | requires_python = ~=3.8
28 | project_urls =
29 |   Bug Reports = https://github.com/funnel-io/json-normalize/issues
30 |   Source = https://github.com/funnel-io/json-normalize
31 | url = https://github.com/funnel-io/json-normalize
32 | 
33 | [options]
34 | package_dir =
35 |   =src
36 | packages=find:
37 | 
38 | [options.packages.find]
39 | where = src
40 | 


--------------------------------------------------------------------------------
/src/json_normalize/__init__.py:
--------------------------------------------------------------------------------
 1 | from .main import json_normalize
 2 | from .key_joiners import last_node_name
 3 | 
 4 | __all__ = [
 5 |     "json_normalize",
 6 |     "last_node_name",
 7 | ]
 8 | 
 9 | __version__ = "1.1.0"
10 | 
11 | VERSION = __version__
12 | 


--------------------------------------------------------------------------------
/src/json_normalize/key_joiners.py:
--------------------------------------------------------------------------------
 1 | def last_node_name(node_names):
 2 |     return node_names[-1]
 3 | 
 4 | 
 5 | def n_last_node_name(n, delimiter="."):
 6 |     def temp(node_names):
 7 |         return delimiter.join(node_names[-n:])
 8 | 
 9 |     return temp
10 | 


--------------------------------------------------------------------------------
/src/json_normalize/main.py:
--------------------------------------------------------------------------------
  1 | from itertools import chain, product
  2 | from collections import defaultdict
  3 | from typing import Literal, Iterable, Callable, Generator, Any, Dict, Tuple, Union
  4 | 
  5 | 
  6 | def json_normalize(
  7 |     tree: Union[dict, Iterable],
  8 |     combine_lists: Literal["chain", "product"] = None,
  9 |     drop_nodes: Iterable[str] = (),
 10 |     freeze_nodes: Iterable[str] = (),
 11 |     key_joiner: Union[str, Callable] = ".",
 12 | ) -> Generator[dict, None, None]:
 13 |     """
 14 |     Will flatten a dict-list structure to a list of flat dicts.
 15 |     >>> list(json_normalize({'a': {'A': 123}, 'b':[{'c':1},{'c':2}]}))
 16 |     [{'a.A': 123, 'b.c': 1}, {'a.A': 123, 'b.c': 2}]
 17 | 
 18 |     Dropping certain branches
 19 |     >>> list(json_normalize({'a': {'A': 123}, 'b':[{'c':1},{'c':2}]}, drop_nodes=('a',)))
 20 |     [{'b.c': 1}, {'b.c': 2}]
 21 | 
 22 |     Custom paths
 23 |     >>> list(json_normalize({'a': {'b':1}}, key_joiner=lambda p: p[-1].upper()))
 24 |     [{'B': 1}]
 25 |     """
 26 |     _validate_input(
 27 |         tree,
 28 |         combine_lists,
 29 |         drop_nodes,
 30 |         freeze_nodes,
 31 |         key_joiner,
 32 |     )
 33 | 
 34 |     if isinstance(key_joiner, str):
 35 |         key_joiner_str = key_joiner
 36 | 
 37 |         def key_joiner(path):
 38 |             return key_joiner_str.join(path)
 39 | 
 40 |     elif isinstance(key_joiner, Callable):
 41 |         pass
 42 |     else:
 43 |         raise TypeError(f"key_joiner has to be either a Callable or a str, got {type(key_joiner)}")
 44 | 
 45 |     flattened = _json_normalize(
 46 |         tree,
 47 |         combine_lists=combine_lists,
 48 |         drop_nodes=set(drop_nodes),
 49 |         freeze_nodes=set(freeze_nodes),
 50 |         key_joiner=key_joiner,
 51 |         tree_name=(),
 52 |     )
 53 |     return map(
 54 |         lambda p: _apply_key_joiner(key_joiner, p),
 55 |         flattened,
 56 |     )
 57 | 
 58 | 
 59 | def _validate_input(
 60 |     tree,
 61 |     combine_lists,
 62 |     drop_nodes,
 63 |     freeze_nodes,
 64 |     key_joiner,
 65 | ):
 66 |     allowed_values = ("chain", "product", None)
 67 |     if combine_lists not in allowed_values:
 68 |         raise ValueError(f"combine_lists allowed values: {allowed_values}, got {combine_lists}")
 69 | 
 70 | 
 71 | def _json_normalize(tree, **kwargs) -> Generator[Dict[Tuple, Any], None, None]:
 72 |     if isinstance(tree, dict):
 73 |         current_node_name = kwargs.pop("tree_name", ())
 74 |         freeze_nodes = kwargs.get("freeze_nodes", ())
 75 |         drop_nodes = kwargs.get("drop_nodes", ())
 76 | 
 77 |         tree = _flatten_dict(tree, current_node_name, drop_nodes, freeze_nodes)
 78 |         leaves = _leaves(tree, freeze_nodes)
 79 |         branches = _branches(tree, kwargs, freeze_nodes)
 80 | 
 81 |         for branch_data in _combine_branches(branches, kwargs["combine_lists"]):
 82 |             yield _merge_dicts(leaves, *branch_data)
 83 | 
 84 |     elif not _is_leaf(tree):
 85 |         for branch in tree:
 86 |             for flatted_branch in _json_normalize(branch, **kwargs):
 87 |                 yield flatted_branch
 88 | 
 89 |     else:
 90 |         yield {kwargs["tree_name"]: tree}
 91 | 
 92 | 
 93 | def _flatten_dict(a_dict, name, drop_nodes, freeze_nodes) -> Dict[Tuple[str], Any]:
 94 |     out = {}
 95 |     for k, v in a_dict.items():
 96 |         node_name = (*name, k)
 97 |         if k in drop_nodes:
 98 |             pass
 99 |         elif isinstance(v, dict) and k not in freeze_nodes:
100 |             out.update(_flatten_dict(v, node_name, drop_nodes, freeze_nodes))
101 |         else:
102 |             out[node_name] = v
103 |     return out
104 | 
105 | 
106 | def _branches(tree, kwargs, freeze_nodes):
107 |     return [
108 |         _json_normalize(v, **kwargs, tree_name=k)
109 |         for k, v in tree.items()
110 |         if not _is_leaf(v) and k[-1] not in freeze_nodes
111 |     ]
112 | 
113 | 
114 | def _leaves(tree, freeze_nodes):
115 |     return {k: v for k, v in tree.items() if (_is_leaf(v) or k[-1] in freeze_nodes)}
116 | 
117 | 
118 | def _is_leaf(node):
119 |     return isinstance(node, str) or not isinstance(node, Iterable)
120 | 
121 | 
122 | def _combine_branches(branches, method) -> Iterable[Tuple[Dict]]:
123 |     if not branches:
124 |         return ({},)
125 |     elif len(branches) == 1:
126 |         return map(lambda x: (x,), branches[0])
127 |     elif method == "product":
128 |         return product(*branches)
129 |     elif method == "chain":
130 |         return map(lambda x: (x,), chain.from_iterable(branches))
131 |     else:
132 |         raise ValueError("Multiple branches dont know how to handle these, either ")
133 | 
134 | 
135 | def _apply_key_joiner(key_joiner, raw):
136 |     out = {key_joiner(k): v for k, v in raw.items()}
137 | 
138 |     if len(out) != len(raw):
139 |         msg = _build_helper_message(key_joiner, raw)
140 |         raise ValueError(f"Multiple raw keys were writtern to the same key. \n{msg}")
141 |     return out
142 | 
143 | 
144 | def _build_helper_message(key_joiner, raw):
145 |     helper = defaultdict(list)
146 |     for k in raw:
147 |         helper[key_joiner(k)].append(k)
148 | 
149 |     helper = {k: v for k, v in helper.items() if len(v) > 1}
150 |     msg = "\n\n".join("\n".join(f"\t{v_i} -> {k}" for v_i in v) for k, v in helper.items())
151 |     return msg
152 | 
153 | 
154 | def _merge_dicts(*dicts):
155 |     output = {}
156 |     for d in dicts:
157 |         output.update(d)
158 |     return output
159 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/funnel-io/json-normalize/350fff866720a7a88f47294229187f8096bc87af/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_combine_lists.py:
--------------------------------------------------------------------------------
 1 | from json_normalize import json_normalize
 2 | import pytest
 3 | 
 4 | 
 5 | def test_combine_lists_chain():
 6 |     tree = {
 7 |         "a": [1, 2],
 8 |         "b": [
 9 |             {"x": "hej"},
10 |             {"x": "san"},
11 |         ],
12 |     }
13 |     expected = [
14 |         {"a": 1},
15 |         {"a": 2},
16 |         {"b.x": "hej"},
17 |         {"b.x": "san"},
18 |     ]
19 |     actual = json_normalize(tree, combine_lists="chain")
20 |     assert list(actual) == expected
21 | 
22 | 
23 | def test_combine_lists_product():
24 |     tree = {
25 |         "a": [1, 2],
26 |         "b": [3, 4],
27 |     }
28 |     expected = [
29 |         {"a": 1, "b": 3},
30 |         {"a": 1, "b": 4},
31 |         {"a": 2, "b": 3},
32 |         {"a": 2, "b": 4},
33 |     ]
34 |     actual = json_normalize(tree, combine_lists="product")
35 |     assert list(actual) == expected
36 | 
37 | 
38 | def test_multiple_dependant_lists():
39 |     tree = {
40 |         "a": 1,
41 |         "b": [{"x": "1"}, {"x": "2"}],
42 |         "c": {
43 |             "d": [
44 |                 {"y": "1"},
45 |                 {"y": "2"},
46 |             ],
47 |         },
48 |     }
49 |     expected = [
50 |         {"a": 1, "b.x": "1", "c.d.y": "1"},
51 |         {"a": 1, "b.x": "1", "c.d.y": "2"},
52 |         {"a": 1, "b.x": "2", "c.d.y": "1"},
53 |         {"a": 1, "b.x": "2", "c.d.y": "2"},
54 |     ]
55 |     actual = json_normalize(tree, combine_lists="product")
56 |     assert list(actual) == expected
57 | 
58 | 
59 | def test_combine_lists_default_none_should_raise_error():
60 |     tree = {
61 |         "a": [1, 2],
62 |         "b": [1, 2],
63 |     }
64 |     with pytest.raises(ValueError):
65 |         list(json_normalize(tree))
66 | 
67 | 
68 | def test_improper_input():
69 |     with pytest.raises(ValueError):
70 |         list(json_normalize({}, combine_lists="This should not work"))
71 | 


--------------------------------------------------------------------------------
/tests/test_drop_nodes.py:
--------------------------------------------------------------------------------
 1 | from json_normalize import json_normalize
 2 | 
 3 | 
 4 | def test_ignore_nodes_single_node():
 5 |     tree = {
 6 |         "a": 1,
 7 |         "b": [
 8 |             {"c": "x", "d": 2},
 9 |             {"c": "y", "d": 3},
10 |         ],
11 |     }
12 |     expected = [
13 |         {"b.c": "x", "b.d": 2},
14 |         {"b.c": "y", "b.d": 3},
15 |     ]
16 |     actual = json_normalize(tree, drop_nodes=("a",))
17 |     assert list(actual) == expected
18 | 
19 | 
20 | def test_ignore_nodes_entire_branch():
21 |     tree = {
22 |         "a": 1,
23 |         "b": [
24 |             {"c": "x", "d": 2},
25 |             {"c": "y", "d": 3},
26 |         ],
27 |     }
28 |     expected = [
29 |         {"a": 1},
30 |     ]
31 |     actual = json_normalize(tree, drop_nodes=("b",))
32 |     assert list(actual) == expected
33 | 


--------------------------------------------------------------------------------
/tests/test_freeze_nodes.py:
--------------------------------------------------------------------------------
 1 | from json_normalize import json_normalize
 2 | 
 3 | 
 4 | def test_freeze_nodes_single_node():
 5 |     tree = {
 6 |         "a": 1,
 7 |         "b": [
 8 |             {"c": "x", "d": 2},
 9 |             {"c": "y", "d": 3},
10 |         ],
11 |     }
12 |     expected = [
13 |         {
14 |             "a": 1,
15 |             "b.c": "x",
16 |             "b.d": 2,
17 |         },
18 |         {
19 |             "a": 1,
20 |             "b.c": "y",
21 |             "b.d": 3,
22 |         },
23 |     ]
24 |     actual = json_normalize(tree, freeze_nodes=("a",))
25 |     assert list(actual) == expected
26 | 
27 | 
28 | def test_freeze_nodes_entire_branch():
29 |     tree = {
30 |         "a": 1,
31 |         "b": [1, 2, 3],
32 |         "c": ["x", "y"],
33 |     }
34 |     expected = [
35 |         {
36 |             "a": 1,
37 |             "b": [1, 2, 3],
38 |             "c": "x",
39 |         },
40 |         {
41 |             "a": 1,
42 |             "b": [1, 2, 3],
43 |             "c": "y",
44 |         },
45 |     ]
46 |     actual = json_normalize(tree, freeze_nodes=("b",))
47 |     assert list(actual) == expected
48 | 


--------------------------------------------------------------------------------
/tests/test_general_functionality.py:
--------------------------------------------------------------------------------
 1 | from json_normalize import json_normalize
 2 | 
 3 | 
 4 | def test_listless_tree():
 5 |     tree = {
 6 |         "a": 1,
 7 |         "b": {
 8 |             "c": "x",
 9 |             "d": 2,
10 |         },
11 |     }
12 |     expected = [
13 |         {
14 |             "a": 1,
15 |             "b.c": "x",
16 |             "b.d": 2,
17 |         }
18 |     ]
19 |     actual = json_normalize(tree)
20 |     assert list(actual) == expected
21 | 
22 | 
23 | def test_tree_with_list_independant_items():
24 |     tree = {
25 |         "a": 1,
26 |         "b": [
27 |             {"c": "x", "d": 2},
28 |             {"c": "y", "d": 3},
29 |         ],
30 |     }
31 |     expected = [
32 |         {
33 |             "a": 1,
34 |             "b.c": "x",
35 |             "b.d": 2,
36 |         },
37 |         {
38 |             "a": 1,
39 |             "b.c": "y",
40 |             "b.d": 3,
41 |         },
42 |     ]
43 |     actual = json_normalize(tree)
44 |     assert list(actual) == expected
45 | 
46 | 
47 | def test_list_of_values():
48 |     tree = {
49 |         "a": 1,
50 |         "b": [1, 2, 3],
51 |     }
52 |     expected = [
53 |         {"a": 1, "b": 1},
54 |         {"a": 1, "b": 2},
55 |         {"a": 1, "b": 3},
56 |     ]
57 |     actual = json_normalize(tree)
58 |     assert list(actual) == expected
59 | 
60 | 
61 | def test_list_input():
62 |     tree = [
63 |         {"a": 1, "b": {"d": 2}},
64 |         {
65 |             "a": 2,
66 |             "b": {"d": 4},
67 |         },
68 |     ]
69 |     expected = [
70 |         {"a": 1, "b.d": 2},
71 |         {"a": 2, "b.d": 4},
72 |     ]
73 |     actual = json_normalize(tree)
74 |     assert list(actual) == expected
75 | 
76 | 
77 | def test_nested_list_input():
78 |     tree = [
79 |         {"a": 1, "b": [1, 2]},
80 |         {
81 |             "a": 2,
82 |             "b": [3, 4],
83 |         },
84 |     ]
85 |     expected = [
86 |         {"a": 1, "b": 1},
87 |         {"a": 1, "b": 2},
88 |         {"a": 2, "b": 3},
89 |         {"a": 2, "b": 4},
90 |     ]
91 |     actual = json_normalize(tree)
92 |     assert list(actual) == expected
93 | 


--------------------------------------------------------------------------------
/tests/test_key_joiner.py:
--------------------------------------------------------------------------------
 1 | from json_normalize import json_normalize, last_node_name
 2 | import pytest
 3 | 
 4 | 
 5 | def test_custom_key_joiner_string():
 6 |     tree = {
 7 |         "a": 1,
 8 |         "b": [
 9 |             {"c": "x", "d": 2},
10 |         ],
11 |     }
12 |     expected = [
13 |         {"a": 1, "b-c": "x", "b-d": 2},
14 |     ]
15 |     actual = json_normalize(tree, key_joiner="-")
16 |     assert list(actual) == expected
17 | 
18 | 
19 | def test_custom_key_joiner_function():
20 |     tree = {
21 |         "a": 1,
22 |         "b": [
23 |             {"c": "x", "d": 2},
24 |         ],
25 |     }
26 |     expected = [
27 |         {"a": 1, "b-c": "x", "b-d": 2},
28 |     ]
29 |     actual = json_normalize(tree, key_joiner=lambda p: "-".join(p))
30 |     assert list(actual) == expected
31 | 
32 | 
33 | def test_custom_key_joiner_function_last_node_name():
34 |     tree = {
35 |         "a": 1,
36 |         "b": [
37 |             {"c": "x", "d": 2},
38 |         ],
39 |     }
40 |     expected = [
41 |         {"a": 1, "c": "x", "d": 2},
42 |     ]
43 |     actual = json_normalize(tree, key_joiner=last_node_name)
44 |     assert list(actual) == expected
45 | 
46 | 
47 | def test_overwriting_names_should_raise_error():
48 |     tree = {
49 |         "a": 1,
50 |         "b": {
51 |             "a": "1",
52 |         },
53 |     }
54 |     with pytest.raises(ValueError):
55 |         list(json_normalize(tree, key_joiner=last_node_name))
56 | 
57 | 
58 | def test_improper_input_type():
59 |     with pytest.raises(TypeError):
60 |         list(json_normalize({}, key_joiner=dict()))
61 | 


--------------------------------------------------------------------------------
/tests/test_other_input_types.py:
--------------------------------------------------------------------------------
 1 | from json_normalize import json_normalize
 2 | 
 3 | 
 4 | def test_generator_input():
 5 |     def a_dict_generator():
 6 |         yield {"a": 1}
 7 |         yield {"a": 2}
 8 | 
 9 |     expected = [
10 |         {"a": 1},
11 |         {"a": 2},
12 |     ]
13 |     actual = json_normalize(a_dict_generator())
14 |     assert list(actual) == expected
15 | 
16 | 
17 | def test_nested_generators():
18 |     def a_dict_generator(n):
19 |         yield {"a": n}
20 |         yield {"a": 2 * n}
21 | 
22 |     def another_dict_generator():
23 |         yield {"b": 3, "c": a_dict_generator(1)}
24 |         yield {"b": 4, "c": a_dict_generator(2)}
25 | 
26 |     expected = [
27 |         {"b": 3, "c.a": 1},
28 |         {"b": 3, "c.a": 2},
29 |         {"b": 4, "c.a": 2},
30 |         {"b": 4, "c.a": 4},
31 |     ]
32 |     actual = json_normalize(another_dict_generator())
33 |     assert list(actual) == expected
34 | 


--------------------------------------------------------------------------------