├── .github └── workflows │ └── tests.yaml ├── .gitignore ├── .readthedocs.yaml ├── .tox-coveragerc ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── docs ├── Makefile ├── _static │ ├── comet.png │ └── comet_multi.png ├── _templates │ └── page.html ├── api.rst ├── by_analogy.rst ├── cli.rst ├── conf.py ├── custom_spec_types.rst ├── debugging.rst ├── faq.rst ├── grouping.rst ├── index.rst ├── make.bat ├── matching.rst ├── modes.rst ├── mutation.rst ├── outreach.md ├── requirements-rtd.txt ├── snippets.rst ├── streaming.rst └── tutorial.rst ├── glom ├── __init__.py ├── __main__.py ├── _version.py ├── cli.py ├── core.py ├── grouping.py ├── matching.py ├── mutation.py ├── reduction.py ├── streaming.py ├── test │ ├── __init__.py │ ├── data │ │ ├── test_invalid.toml │ │ ├── test_invalid.yaml │ │ ├── test_valid.toml │ │ └── test_valid.yaml │ ├── perf_report.py │ ├── test_basic.py │ ├── test_check.py │ ├── test_cli.py │ ├── test_error.py │ ├── test_fill.py │ ├── test_grouping.py │ ├── test_match.py │ ├── test_mutation.py │ ├── test_path_and_t.py │ ├── test_reduction.py │ ├── test_scope_vars.py │ ├── test_snippets.py │ ├── test_spec.py │ ├── test_streaming.py │ ├── test_target_types.py │ └── test_tutorial.py └── tutorial.py ├── pytest.ini ├── requirements.in ├── requirements.txt ├── setup.py └── tox.ini /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | paths-ignore: 5 | - "docs/**" 6 | - "*.md" 7 | - "*.rst" 8 | pull_request: 9 | paths-ignore: 10 | - "docs/**" 11 | - "*.md" 12 | - "*.rst" 13 | jobs: 14 | tests: 15 | name: ${{ matrix.name }} 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | include: 21 | - { name: Linux, python: "3.12", os: ubuntu-latest, tox: py312 } 22 | - { name: Windows, python: "3.12", os: windows-latest, tox: py312 } 23 | - { name: Mac, python: "3.12", os: macos-latest, tox: py312 } 24 | - { name: "3.11", python: "3.11", os: ubuntu-latest, tox: py311 } 25 | - { name: "3.10", python: "3.10", os: ubuntu-latest, tox: py310 } 26 | - { name: "3.9", python: "3.9", os: ubuntu-latest, tox: py39 } 27 | - { name: "3.8", python: "3.8", os: ubuntu-latest, tox: py38 } 28 | - { name: "3.7", python: "3.7", os: ubuntu-22.04, tox: py37 } 29 | - { name: "PyPy3", python: "pypy-3.9", os: ubuntu-latest, tox: pypy3 } 30 | steps: 31 | - uses: actions/checkout@v4 32 | - uses: actions/setup-python@v4 33 | with: 34 | python-version: ${{ matrix.python }} 35 | - name: update pip 36 | run: | 37 | pip install -U wheel 38 | pip install -U setuptools 39 | python -m pip install -U pip 40 | - name: get pip cache dir 41 | id: pip-cache 42 | run: echo "::set-output name=dir::$(pip cache dir)" 43 | - name: cache pip 44 | uses: actions/cache@v3 45 | with: 46 | path: ${{ steps.pip-cache.outputs.dir }} 47 | key: pip|${{ runner.os }}|${{ matrix.python }}|${{ hashFiles('setup.py') }}|${{ hashFiles('requirements/*.txt') }} 48 | - run: pip install tox 49 | - run: tox -e ${{ matrix.tox }},coverage-report 50 | - name: "Upload coverage to Codecov" 51 | uses: "codecov/codecov-action@v3" 52 | with: 53 | fail_ci_if_error: true 54 | files: ./.tox/coverage.xml 55 | token: ${{ secrets.CODECOV_TOKEN }} 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs/_build 2 | tmp.py 3 | htmlcov/ 4 | .coverage.* 5 | 6 | *.py[cod] 7 | .pytest_cache 8 | venv* 9 | 10 | 11 | # emacs 12 | *~ 13 | ._* 14 | .\#* 15 | \#*\# 16 | 17 | # C extensions 18 | *.so 19 | 20 | # Packages 21 | *.egg 22 | *.egg-info 23 | dist 24 | build 25 | eggs 26 | parts 27 | bin 28 | var 29 | sdist 30 | develop-eggs 31 | .installed.cfg 32 | lib 33 | lib64 34 | 35 | # Installer logs 36 | pip-log.txt 37 | 38 | # Unit test / coverage reports 39 | .coverage 40 | .tox 41 | nosetests.xml 42 | 43 | # Translations 44 | *.mo 45 | 46 | # Mr Developer 47 | .mr.developer.cfg 48 | .project 49 | .pydevproject 50 | 51 | # Vim 52 | *.sw[op] 53 | 54 | .cache/ 55 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.10" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/conf.py 17 | 18 | # If using Sphinx, optionally build your docs in additional formats such as PDF 19 | # formats: 20 | # - pdf 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | install: 25 | - requirements: docs/requirements-rtd.txt 26 | - method: pip 27 | path: . -------------------------------------------------------------------------------- /.tox-coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = 4 | glom 5 | ../glom 6 | omit = 7 | */flycheck_*.py 8 | */chainmap_backport.py 9 | */perf_report.py 10 | 11 | [paths] 12 | source = 13 | ../glom 14 | */lib/python*/site-packages/glom 15 | */Lib/site-packages/glom 16 | */pypy/site-packages/glom 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018, Mahmoud Hashemi 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials provided 15 | with the distribution. 16 | 17 | * The names of the contributors may not be used to endorse or 18 | promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE CHANGELOG.md tox.ini requirements.txt requirements-rtd.txt .coveragerc Makefile pytest.ini .tox-coveragerc 2 | exclude TODO.md codecov.yml .readthedocs.yaml requirements.in 3 | global-exclude flycheck_* 4 | 5 | graft glom/test/data 6 | graft docs 7 | prune docs/_build 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # glom 2 | 3 | *Restructuring data, the Python way* 4 | 5 | 6 | 7 | 8 | 9 | 10 | Real applications have real data, and real data nests. Objects inside 11 | of objects inside of lists of objects. 12 | 13 | glom is a new and powerful way to handle real-world data, featuring: 14 | 15 | * Path-based access for nested data structures 16 | * Readable, meaningful error messages 17 | * Declarative data transformation, using lightweight, Pythonic specifications 18 | * Built-in data exploration and debugging features 19 | 20 | All of that and more, available as a [fully-documented][rtd], 21 | pure-Python package, tested on Python 3.7+, as well as 22 | PyPy3. Installation is as easy as: 23 | 24 | ``` 25 | pip install glom 26 | ``` 27 | 28 | And when you install glom, you also get [the `glom` command-line 29 | interface][cli_rtd], letting you experiment at the console, but never limiting 30 | you to shell scripts: 31 | 32 | ``` 33 | Usage: glom [FLAGS] [spec [target]] 34 | 35 | Command-line interface to the glom library, providing nested data access and data 36 | restructuring with the power of Python. 37 | 38 | Flags: 39 | 40 | --help / -h show this help message and exit 41 | --target-file TARGET_FILE path to target data source (optional) 42 | --target-format TARGET_FORMAT 43 | format of the source data (json, python, toml, 44 | or yaml) (defaults to 'json') 45 | --spec-file SPEC_FILE path to glom spec definition (optional) 46 | --spec-format SPEC_FORMAT format of the glom spec definition (json, python, 47 | python-full) (defaults to 'python') 48 | --indent INDENT number of spaces to indent the result, 0 to disable 49 | pretty-printing (defaults to 2) 50 | --debug interactively debug any errors that come up 51 | --inspect interactively explore the data 52 | 53 | ``` 54 | 55 | Anything you can do at the command line readily translates to Python 56 | code, so you've always got a path forward when complexity starts to 57 | ramp up. 58 | 59 | 60 | ## Examples 61 | #### Without glom 62 | ```python 63 | >>> data = {'a': {'b': {'c': 'd'}}} 64 | >>> data['a']['b']['c'] 65 | 'd' 66 | >>> data2 = {'a': {'b': None}} 67 | >>> data2['a']['b']['c'] 68 | Traceback (most recent call last): 69 | ... 70 | TypeError: 'NoneType' object is not subscriptable 71 | ``` 72 | 73 | #### With glom 74 | ```python 75 | >>> glom(data, 'a.b.c') 76 | 'd' 77 | >>> glom(data2, 'a.b.c') 78 | Traceback (most recent call last): 79 | ... 80 | PathAccessError: could not access 'c', index 2 in path Path('a', 'b', 'c'), got error: ... 81 | ``` 82 | 83 | ## Learn more 84 | 85 | 86 | 87 | If all this seems interesting, continue exploring glom below: 88 | 89 | * [glom Tutorial][tutorial] 90 | * [Full API documentation at Read the Docs][rtd] 91 | * [Original announcement blog post (2018-05-09)][glom_announce] 92 | * [Frequently Asked Questions][faq] 93 | * [PyCon 2018 Lightning Talk (2018-05-11)][pycon_talk] 94 | 95 | All of the links above are overflowing with examples, but should you 96 | find anything about the docs, or glom itself, lacking, [please submit 97 | an issue][gh_issues]! 98 | 99 | [rtd]: https://glom.readthedocs.io 100 | [cli_rtd]: http://glom.readthedocs.io/en/latest/cli.html 101 | [tutorial]: https://glom.readthedocs.io/en/latest/tutorial.html 102 | [faq]: https://glom.readthedocs.io/en/latest/faq.html 103 | [glom_announce]: https://sedimental.org/glom_restructured_data.html 104 | [gh_issues]: https://github.com/mahmoud/glom/issues/ 105 | [pycon_talk]: https://www.youtube.com/watch?v=bTAFl8P2DkE&t=18m07s 106 | 107 | In the meantime, just remember: When you've got nested data, glom it! ☄️ 108 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | By default we only support the latest version for all bugfixes, though we're open to discussion if there's evidence of wider deployment. 6 | [Libraries.io statistics](https://libraries.io/pypi/glom/usage) can be useful in assessing exposure, though we acknowledge there are private pools not represented by public stats. 7 | 8 | ## Reporting a Vulnerability 9 | 10 | You can use GitHub to privately report a vulnerability [here](https://github.com/mahmoud/glom/security/advisories), 11 | or if you do not have a GitHub account, contact the repository owner via the email on the About section of the website linked from their profile page. 12 | At the time of writing: https://sedimental.org/about.html 13 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = glom 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_static/comet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoud/glom/c225e2abeb234be7119911b96b4378cc9d8d6478/docs/_static/comet.png -------------------------------------------------------------------------------- /docs/_static/comet_multi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoud/glom/c225e2abeb234be7119911b96b4378cc9d8d6478/docs/_static/comet_multi.png -------------------------------------------------------------------------------- /docs/_templates/page.html: -------------------------------------------------------------------------------- 1 | {% extends "!page.html" %} 2 | {% block menu %} 3 | {{ super() }} 4 | 5 | {% endblock %} 6 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | Core ``glom`` API 2 | ================= 3 | 4 | .. automodule:: glom.core 5 | 6 | .. seealso:: 7 | 8 | As the glom API grows, we've refactored the docs into separate 9 | domains. The core API is below. More specialized types can also be 10 | found in the following docs: 11 | 12 | .. hlist:: 13 | :columns: 2 14 | 15 | * :doc:`mutation` 16 | * :doc:`streaming` 17 | * :doc:`grouping` 18 | * :doc:`matching` 19 | 20 | Longtime glom docs readers: thanks in advance for reporting/fixing 21 | any broken links you may find. 22 | 23 | .. contents:: Contents 24 | :local: 25 | 26 | 27 | .. _glom-func: 28 | 29 | The ``glom`` Function 30 | --------------------- 31 | 32 | Where it all happens. The reason for the season. The eponymous 33 | function, :func:`~glom.glom`. 34 | 35 | .. autofunction:: glom.glom 36 | 37 | Basic Specifiers 38 | ---------------- 39 | 40 | Basic glom specifications consist of ``dict``, ``list``, ``tuple``, 41 | ``str``, and ``callable`` objects. However, as data calls for more 42 | complicated interactions, ``glom`` provides specialized specifier 43 | types that can be used with the basic set of Python builtins. 44 | 45 | 46 | .. autoclass:: glom.Path 47 | .. autoclass:: glom.Val 48 | .. autoclass:: glom.Spec 49 | 50 | .. _advanced-specifiers: 51 | 52 | .. seealso:: 53 | 54 | Note that many of the Specifier types previously mentioned here 55 | have moved into their own docs, among them: 56 | 57 | .. hlist:: 58 | :columns: 2 59 | 60 | * :doc:`mutation` 61 | * :doc:`streaming` 62 | * :doc:`grouping` 63 | * :doc:`matching` 64 | 65 | Object-Oriented Access and Method Calls with T 66 | ---------------------------------------------- 67 | 68 | glom's shortest-named feature may be its most powerful. 69 | 70 | .. autodata:: glom.T 71 | 72 | 73 | Defaults with Coalesce 74 | ---------------------- 75 | 76 | Data isn't always where or what you want it to be. Use these 77 | specifiers to declare away overly branchy procedural code. 78 | 79 | .. autoclass:: glom.Coalesce 80 | 81 | .. autodata:: glom.SKIP 82 | .. autodata:: glom.STOP 83 | 84 | 85 | Calling Callables with Invoke 86 | ----------------------------- 87 | 88 | .. versionadded:: 19.10.0 89 | 90 | From calling functions to constructing objects, it's hardly Python if 91 | you're not invoking callables. By default, single-argument functions 92 | work great on their own in glom specs. The function gets passed the 93 | target and it just works: 94 | 95 | >>> glom(['1', '3', '5'], [int]) 96 | [1, 3, 5] 97 | 98 | Zero-argument and multi-argument functions get a lot trickier, 99 | especially when more than one of those arguments comes from the 100 | target, thus the :class:`Invoke` spec. 101 | 102 | .. autoclass:: glom.Invoke 103 | :members: 104 | 105 | Alternative approach to functions: Call 106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 107 | 108 | An earlier, more primitive approach to callables in glom was the Call 109 | specifier type. 110 | 111 | .. warning:: 112 | 113 | Given superiority of its successor, :class:`Invoke`, 114 | the :class:`Call` type may be deprecated in a future release. 115 | 116 | .. autoclass:: glom.Call 117 | 118 | 119 | Self-Referential Specs 120 | ---------------------- 121 | 122 | Sometimes nested data repeats itself, either recursive structure or 123 | just through redundancy. 124 | 125 | .. autoclass:: glom.Ref 126 | 127 | .. _scope: 128 | 129 | The ``glom`` Scope 130 | ------------------ 131 | 132 | Sometimes data transformation involves more than a single target and 133 | spec. For those times, glom has a *scope* system designed to manage 134 | additional state. 135 | 136 | Basic usage 137 | ~~~~~~~~~~~ 138 | 139 | On its surface, the glom scope is a dictionary of extra values that 140 | can be passed in to the top-level glom call. These values can then be 141 | addressed with the **S** object, which behaves 142 | similarly to the :data:`~glom.T` object. 143 | 144 | Here's an example case, counting the occurrences of a value in the 145 | target, using the scope: 146 | 147 | >>> count_spec = T.count(S.search) 148 | >>> glom(['a', 'c', 'a', 'b'], count_spec, scope={'search': 'a'}) 149 | 2 150 | 151 | Note how **S** supports attribute-style dot-access for its keys. For 152 | keys which are not valid attribute names, key-style access is also 153 | supported. 154 | 155 | .. note:: 156 | 157 | glom itself uses certain keys in the scope to manage internal 158 | state. Consider the namespace of strings, integers, builtin types, 159 | and other common Python objects open for your usage. Read 160 | :doc:`the custom spec doc` to learn about more 161 | advanced, reserved cases. 162 | 163 | Updating the scope - ``S()`` & ``A`` 164 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 165 | 166 | glom's scope isn't only set once when the top-level :func:`glom` 167 | function is called. It's dynamic and updatable. 168 | 169 | If your use case requires saving a value from one part of the target 170 | for usage elsewhere, then **S** will allow you to save values 171 | to the scope:: 172 | 173 | >>> target = {'data': {'val': 9}} 174 | >>> spec = (S(value=T['data']['val']), {'val': S['value']}) 175 | >>> glom(target, spec) 176 | {'val': 9} 177 | 178 | Any keyword arguments to the **S** will have their values evaluated as 179 | a spec, with the result being saved to the keyword argument name in 180 | the scope. 181 | 182 | When only the target is being assigned, you can use the **A** as a 183 | shortcut:: 184 | 185 | >>> target = {'data': {'val': 9}} 186 | >>> spec = ('data.val', A.value, {'val': S.value}) 187 | >>> glom(target, spec) 188 | {'val': 9} 189 | 190 | **A** enables a shorthand which assigns the current target to a 191 | location in the scope. 192 | 193 | 194 | Sensible saving - ``Vars`` & ``S.globals`` 195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 196 | 197 | Of course, glom's scopes do not last forever. Much like function calls 198 | in Python, new child scopes can see and read values in parent 199 | scopes. When a child spec saves a new value to the scope, it's lost 200 | when the child spec completes. 201 | 202 | If you need values to be saved beyond a spec's local scope, the best 203 | way to do that is to create a :class:`~glom.Vars` object in a common 204 | ancestor scope. :class:`~glom.Vars` acts as a mutable namespace where 205 | child scopes can store state and have it persist beyond their local 206 | scope. Choose a location in the spec such that all involved child 207 | scopes can see and share the value. 208 | 209 | .. note:: 210 | 211 | glom precreates a *global* :class:`~glom.Vars` object at 212 | ``S.globals``. Any values saved there will be accessible 213 | throughout that given :func:`~glom.glom` call:: 214 | 215 | >>> last_spec = ([A.globals.last], S.globals.last) 216 | >>> glom([3, 1, 4, 1, 5], last_spec) 217 | 5 218 | 219 | While not shared across calls, most of the same care prescribed 220 | about using global state still applies. 221 | 222 | .. autoclass:: glom.Vars 223 | 224 | 225 | Core Exceptions 226 | --------------- 227 | 228 | Not all data is going to match specifications. Luckily, glom errors 229 | are designed to be as readable and actionable as possible. 230 | 231 | All glom exceptions inherit from :exc:`GlomError`, described below, 232 | along with other core exception types. For more details about handling 233 | and debugging exceptions, see ":doc:`debugging`". 234 | 235 | .. autoclass:: glom.PathAccessError 236 | 237 | .. autoclass:: glom.CoalesceError 238 | 239 | .. autoclass:: glom.UnregisteredTarget 240 | 241 | .. autoclass:: glom.BadSpec 242 | 243 | .. autoclass:: glom.GlomError 244 | 245 | 246 | .. _setup-and-registration: 247 | 248 | Setup and Registration 249 | ---------------------- 250 | 251 | When it comes to targets, :func:`~glom.glom()` will operate on the 252 | vast majority of objects out there in Python-land. However, for that 253 | very special remainder, glom is readily extensible! 254 | 255 | .. autofunction:: glom.register 256 | .. autofunction:: glom.register_op 257 | .. autoclass:: glom.Glommer 258 | -------------------------------------------------------------------------------- /docs/by_analogy.rst: -------------------------------------------------------------------------------- 1 | ``glom`` by Analogy 2 | =================== 3 | 4 | ``glom`` is pure Python, and you don't need to know anything but 5 | Python to use it effectively. 6 | 7 | Still, most everyone who encounters ``glom`` for the first time finds 8 | analogies to tools they already know. Whether SQL, list 9 | comprehensions, or HTML templates, there seems to be no end to the 10 | similarities. Many of them intentional! 11 | 12 | While ``glom`` is none of those tools, and none of those tools are ``glom``, a 13 | little comparison doesn't hurt. This document collects analogies to 14 | help guide understanding along. 15 | 16 | 17 | Similarity to list comprehensions 18 | --------------------------------- 19 | 20 | One of the key inspirations for ``glom`` was the humble list 21 | comprehension, one of my favorite Python features. 22 | 23 | List comprehensions make your code look like its output, and that goes 24 | a long way in readability. ``glom`` itself does list processing with 25 | square brackets like ``[lambda x: x % 2]``, which actually makes it 26 | more like a list comp and the old ``filter()`` function. 27 | 28 | ``glom``'s list processing differs in two ways: 29 | 30 | * Required use of a callable or other ``glom`` spec, to enable deferred processing. 31 | * Ability to return :data:`~glom.SKIP`, which can exclude items from a list. 32 | 33 | 34 | Similarity to templating (Jinja, Django, Mustache) 35 | -------------------------------------------------- 36 | 37 | ``glom`` is a lot like templating engines, including modern formatters 38 | like gofmt, but with all the format affordances distilled out. glom 39 | doesn't just work on HTML, XML, JSON, or even just strings. 40 | 41 | ``glom`` works on objects, including functions, dicts, and all other 42 | primitives. In fact, it would be safe to call ``glom`` an "object 43 | templating" system. 44 | 45 | A lot of insights for ``glom`` came (and continue to come) from writing ashes_. 46 | 47 | .. _ashes: https://github.com/mahmoud/ashes 48 | 49 | 50 | Similarity to SQL and GraphQL 51 | ----------------------------- 52 | 53 | In some ways, ``glom`` is a Python query language for Python 54 | objects. But thanks to its restructuring capabilities, it's much more 55 | than SQL or GraphQL. 56 | 57 | With SQL the primary abstraction is an table, or table-like 58 | resultset. With GraphQL, the analogous answer to this is, of course, 59 | the graph. 60 | 61 | glom goes further, not only offering the Python object tree as a 62 | graph, but also allowing you to change the shape of the data, 63 | restructuring it while fetching and transforming values, which GraphQL 64 | only minimally supports, and SQL barely supports at all. Table targets 65 | get you table outputs. 66 | 67 | Similiarity to validation (jsonschema, schema, cerberus) 68 | -------------------------------------------------------- 69 | 70 | ``glom`` is a generalized form of intake libraries `including validation`_. 71 | We definitely took `schema`_ 72 | becoming successful as a sign that others shared our appetite for 73 | succinct, declarative Python datastructure manipulation. 74 | 75 | More importantly, these libraries seem to excel at structuring and 76 | parsing data, and don't solve much on the other end. Translating 77 | valid, structured objects like database models to JSON serializable 78 | objects is glom's forté. 79 | 80 | .. _schema: matching.rst 81 | .. _including validation: https://github.com/mahmoud/glom/issues/7 82 | 83 | Similarity to jq 84 | ---------------- 85 | 86 | :doc:`The CLI ` that ``glom`` packs is very similar in function 87 | to jq_, except it uses Python as its query language, instead of making 88 | its own. Most importantly glom gives you `a programmatic way forward`_. 89 | 90 | .. _jq: https://stedolan.github.io/jq/ 91 | .. _a programmatic way forward: http://sedimental.org/glom_restructured_data.html#library-first-then-cli 92 | 93 | Similarity to XPath/XSLT 94 | ------------------------ 95 | 96 | These hallowed technologies of yore, they were way ahead of the game 97 | in many ways. glom intentionally avoids their purity and verbosity, 98 | while trying to take as much inspiration as possible from their 99 | function. 100 | 101 | Others 102 | ------ 103 | 104 | Beyond what's listed above, several other packages and language 105 | features exist in glom's ballpark, including: 106 | 107 | * `Specter (for Clojure) `_ 108 | * `Lenses (for Haskell) `_ 109 | * `Dig (for Ruby Hashmaps) `_ 110 | 111 | If you know of other useful comparisons, `let us know 112 | `_! 113 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | ``glom`` Command-Line Interface 2 | =============================== 3 | 4 | .. note:: 5 | 6 | glom's CLI is usable and useful, but keep in mind glom is a library *first*. 7 | 8 | 9 | All the power of ``glom``, without even opening your text editor! 10 | 11 | .. code-block:: text 12 | 13 | $ glom --help 14 | Usage: /home/mahmoud/bin/glom [FLAGS] [spec [target]] 15 | 16 | Command-line interface to the glom library, providing nested data 17 | access and data restructuring with the power of Python. 18 | 19 | Flags: 20 | 21 | --help / -h show this help message and exit 22 | --target-file TARGET_FILE path to target data source (optional) 23 | --target-format TARGET_FORMAT 24 | format of the source data (json, python, toml, 25 | or yaml) (defaults to 'json') 26 | --spec-file SPEC_FILE path to glom spec definition (optional) 27 | --spec-format SPEC_FORMAT format of the glom spec definition (json, python, 28 | python-full) (defaults to 'python') 29 | --indent INDENT number of spaces to indent the result, 0 to disable 30 | pretty-printing (defaults to 2) 31 | --debug interactively debug any errors that come up 32 | --inspect interactively explore the data 33 | 34 | The ``glom`` command will also read from standard input (stdin) and 35 | process that data as the *target*. 36 | 37 | Here's an example, filtering a GitHub API example to something much 38 | more flat and readable: 39 | 40 | .. code-block:: bash 41 | 42 | $ pip install glom 43 | $ curl -s https://api.github.com/repos/mahmoud/glom/events \ 44 | | glom '[{"type": "type", "date": "created_at", "user": "actor.login"}]' 45 | 46 | This yields: 47 | 48 | .. code-block:: javascript 49 | 50 | [ 51 | { 52 | "date": "2018-05-09T03:39:44Z", 53 | "type": "WatchEvent", 54 | "user": "asapzacy" 55 | }, 56 | { 57 | "date": "2018-05-08T22:51:46Z", 58 | "type": "WatchEvent", 59 | "user": "CameronCairns" 60 | }, 61 | { 62 | "date": "2018-05-08T03:27:27Z", 63 | "type": "PushEvent", 64 | "user": "mahmoud" 65 | }, 66 | { 67 | "date": "2018-05-08T03:27:27Z", 68 | "type": "PullRequestEvent", 69 | "user": "mahmoud" 70 | } 71 | ... 72 | ] 73 | 74 | By default the CLI *target* is JSON and the *spec* is a Python 75 | literal. 76 | 77 | .. note:: 78 | 79 | Because the default CLI spec is a Python literal, there are no 80 | lambdas and other Python/glom constructs available. These features 81 | are gated behind the ``--spec-format python-full`` option to avoid 82 | code injection and other unwanted consequences. 83 | 84 | The ``--debug`` and ``--inspect`` flags are useful for exploring 85 | data. Note that they are not available when piping data through 86 | stdin. Save that API response to a file and use ``--target-file`` to 87 | do your interactive experimenting. 88 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | 15 | import os 16 | import sys 17 | import sphinx 18 | from pprint import pprint 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | CUR_PATH = os.path.dirname(os.path.abspath(__file__)) 24 | PROJECT_PATH = os.path.abspath(CUR_PATH + '/../') 25 | PACKAGE_PATH = os.path.abspath(CUR_PATH + '/../glom/') 26 | sys.path.insert(0, PROJECT_PATH) 27 | sys.path.insert(0, PACKAGE_PATH) 28 | 29 | pprint(os.environ) 30 | 31 | 32 | # -- Project information ----------------------------------------------------- 33 | 34 | project = u'glom' 35 | copyright = u'2024, Mahmoud Hashemi' 36 | author = u'Mahmoud Hashemi' 37 | 38 | # The short X.Y version 39 | version = u'24.11' 40 | # The full version, including alpha/beta/rc tags 41 | release = u'24.11.0' 42 | 43 | 44 | todo_add_to_theme_to_keep_menus_expanded = """ 45 | 46 | 47 | 48 | 49 | 50 | """ 51 | 52 | 53 | # -- General configuration --------------------------------------------------- 54 | 55 | # If your documentation needs a minimal Sphinx version, state it here. 56 | # 57 | # needs_sphinx = '1.0' 58 | 59 | # Add any Sphinx extension module names here, as strings. They can be 60 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 61 | # ones. 62 | extensions = [ 63 | 'sphinx.ext.autodoc', 64 | 'sphinx.ext.intersphinx', 65 | 'sphinx.ext.ifconfig', 66 | 'sphinx.ext.viewcode', 67 | ] 68 | 69 | # Read the Docs is version 1.2 as of writing 70 | if sphinx.version_info[:2] < (1, 3): 71 | extensions.append('sphinxcontrib.napoleon') 72 | else: 73 | extensions.append('sphinx.ext.napoleon') 74 | 75 | 76 | # Add any paths that contain templates here, relative to this directory. 77 | templates_path = ['_templates'] 78 | 79 | # The suffix(es) of source filenames. 80 | # You can specify multiple suffix as a list of string: 81 | # 82 | # source_suffix = ['.rst', '.md'] 83 | source_suffix = '.rst' 84 | 85 | # The master toctree document. 86 | master_doc = 'index' 87 | 88 | # The language for content autogenerated by Sphinx. Refer to documentation 89 | # for a list of supported languages. 90 | # 91 | # This is also used if you do content translation via gettext catalogs. 92 | # Usually you set "language" from the command line for these cases. 93 | language = None 94 | 95 | # List of patterns, relative to source directory, that match files and 96 | # directories to ignore when looking for source files. 97 | # This pattern also affects html_static_path and html_extra_path . 98 | exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store'] 99 | 100 | # The name of the Pygments (syntax highlighting) style to use. 101 | pygments_style = 'default' 102 | 103 | 104 | # -- Options for HTML output ------------------------------------------------- 105 | 106 | # The theme to use for HTML and HTML Help pages. See the documentation for 107 | # a list of builtin themes. 108 | # 109 | 110 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 111 | 112 | if on_rtd: 113 | html_theme = 'sphinx_rtd_theme' 114 | else: # only import and set the theme if we're building docs locally 115 | import sphinx_rtd_theme 116 | html_theme = 'sphinx_rtd_theme' 117 | html_theme_path = ['_themes', sphinx_rtd_theme.get_html_theme_path()] 118 | 119 | html_theme_options = { 120 | 'navigation_depth': 4, 121 | 'collapse_navigation': False, 122 | } 123 | 124 | # Add any paths that contain custom static files (such as style sheets) here, 125 | # relative to this directory. They are copied after the builtin static files, 126 | # so a file named "default.css" will overwrite the builtin "default.css". 127 | html_static_path = ['_static'] 128 | 129 | # Custom sidebar templates, must be a dictionary that maps document names 130 | # to template names. 131 | # 132 | # The default sidebars (for documents that don't match any pattern) are 133 | # defined by theme itself. Builtin themes are using these templates by 134 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 135 | # 'searchbox.html']``. 136 | # 137 | # html_sidebars = {} 138 | 139 | 140 | # -- Options for HTMLHelp output --------------------------------------------- 141 | 142 | # Output file base name for HTML help builder. 143 | htmlhelp_basename = 'glomdoc' 144 | 145 | 146 | # -- Options for LaTeX output ------------------------------------------------ 147 | 148 | latex_elements = { 149 | # The paper size ('letterpaper' or 'a4paper'). 150 | # 151 | # 'papersize': 'letterpaper', 152 | 153 | # The font size ('10pt', '11pt' or '12pt'). 154 | # 155 | # 'pointsize': '10pt', 156 | 157 | # Additional stuff for the LaTeX preamble. 158 | # 159 | # 'preamble': '', 160 | 161 | # Latex figure (float) alignment 162 | # 163 | # 'figure_align': 'htbp', 164 | } 165 | 166 | # Grouping the document tree into LaTeX files. List of tuples 167 | # (source start file, target name, title, 168 | # author, documentclass [howto, manual, or own class]). 169 | latex_documents = [ 170 | (master_doc, 'glom.tex', u'glom Documentation', 171 | u'Mahmoud Hashemi', 'manual'), 172 | ] 173 | 174 | 175 | # -- Options for manual page output ------------------------------------------ 176 | 177 | # One entry per manual page. List of tuples 178 | # (source start file, name, description, authors, manual section). 179 | man_pages = [ 180 | (master_doc, 'glom', u'glom Documentation', 181 | [author], 1) 182 | ] 183 | 184 | 185 | # -- Options for Texinfo output ---------------------------------------------- 186 | 187 | # Grouping the document tree into Texinfo files. List of tuples 188 | # (source start file, target name, title, author, 189 | # dir menu entry, description, category) 190 | texinfo_documents = [ 191 | (master_doc, 'glom', u'glom Documentation', 192 | author, 'glom', 'One line description of project.', 193 | 'Miscellaneous'), 194 | ] 195 | 196 | 197 | # -- Extension configuration ------------------------------------------------- 198 | 199 | # -- Options for intersphinx extension --------------------------------------- 200 | 201 | # Example configuration for intersphinx: refer to the Python standard library. 202 | intersphinx_mapping = {'https://docs.python.org/': None} 203 | -------------------------------------------------------------------------------- /docs/custom_spec_types.rst: -------------------------------------------------------------------------------- 1 | Writing a custom Specifier Type 2 | =============================== 3 | 4 | While glom comes with a lot of built-in features, no library can ever 5 | encompass all data manipulation operations. 6 | 7 | To cover every case out there, glom provides a way to extend its 8 | functionality with your own data handling hooks. This document 9 | explains glom's execution model and how to integrate with it when 10 | writing a custom Specifier Type. 11 | 12 | When to write a Specifier Type 13 | ------------------------------ 14 | 15 | ``glom`` has always supported arbitrary callables, like so: 16 | 17 | .. code:: 18 | 19 | glom({'nums': range(5)}, ('nums', sum)) 20 | # 10 21 | 22 | With this built-in extensibility, what does a glom specifier type add? 23 | 24 | Custom specifier types are useful when you want to: 25 | 26 | 1. Perform validation at spec construction time 27 | 2. Enable users to interact with new target types and operations 28 | 3. Improve readability and reusability of your data transformations 29 | 4. Temporarily change the glom runtime behavior 30 | 31 | If you're just building a one-off spec for transforming your own data, 32 | there's no reason to reach for an extension. ``glom``'s extension API 33 | is easy, but a good old Python ``lambda`` is even easier. 34 | 35 | Building your Specifier Type 36 | ---------------------------- 37 | 38 | Any object instance with a ``glomit`` method can participate in a glom 39 | call. By way of example, here is a programming cliché implemented as a 40 | glom specifier type, with comments referencing notes below. 41 | 42 | .. code:: 43 | 44 | class HelloWorldSpec(object): # 1 45 | def glomit(self, target, scope): # 2 46 | print("Hello, world!") 47 | return target 48 | 49 | And now let's put it to use! 50 | 51 | .. code:: 52 | 53 | from glom import glom 54 | 55 | target = {'example': 'object'} 56 | 57 | glom(target, HelloWorldSpec()) # 3 58 | # prints "Hello, world!" and returns target 59 | 60 | There are a few things to note from this example: 61 | 62 | 1. Specifier types do not need to inherit from any type. Just 63 | implement the ``glomit`` method. 64 | 2. The ``glomit`` signature takes two parameters, ``target`` and 65 | ``scope``. The ``target`` should be familiar from using 66 | :func:`~glom.glom`, and it's the ``scope`` that makes glom really 67 | tick. 68 | 3. By convention, instances are used in specs passed to 69 | :func:`~glom.glom` calls, not the types themselves. 70 | 71 | .. _glom_scope: 72 | 73 | The glom Scope 74 | -------------- 75 | 76 | The :ref:`glom scope` is also used to expose runtime state to the specifier 77 | type. Let's take a look inside a scope: 78 | 79 | .. code:: 80 | 81 | from glom import glom 82 | from pprint import pprint 83 | 84 | class ScopeInspectorSpec(object): 85 | def glomit(self, target, scope): 86 | pprint(dict(scope)) 87 | return target 88 | 89 | glom(target, ScopeInspectorSpec()) 90 | 91 | Which gives us: 92 | 93 | .. code:: 94 | 95 | {T: {'example': 'object'}, 96 | : , 97 | : [], 98 | : <__main__.ScopeInspectorSpec object at 0x7f208bf58690>, 99 | : None, 100 | : } 101 | 102 | As you can see, all glom's core workings are present, all under familiar keys: 103 | 104 | * The current *target*, accessible using :data:`~glom.T` as a scope key. 105 | * The current *spec*, accessible under :class:`~glom.Spec`. 106 | * The current *path*, accessible under :class:`~glom.Path`. 107 | * The ``TargetRegistry``, used to :ref:`register new operations and target types `. 108 | * Even the ``glom()`` function itself, filed under :func:`~glom.glom`. 109 | 110 | To learn how to use the scope's powerful features idiomatically, let's 111 | reimplement at one of glom's standard specifier types. 112 | 113 | Specifiers by example 114 | --------------------- 115 | 116 | While we've technically created a couple of extensions above, let's 117 | really dig into the features of the scope using an example. 118 | 119 | :class:`~glom.Sum` is a standard extension that ships with glom, and 120 | it works like this: 121 | 122 | .. code:: 123 | 124 | from glom import glom, Sum 125 | 126 | glom([1, 2, 3], Sum()) 127 | # 6 128 | 129 | The version below does not have as much error handling, but reproduces 130 | all the same basic principles. This version of ``Sum()`` code also 131 | contains comments with references to explanatory notes below. 132 | 133 | .. code:: 134 | 135 | from glom import glom, Path, T 136 | from glom.core import TargetRegistry, UnregisteredTarget # 1 137 | 138 | class Sum(object): 139 | def __init__(self, subspec=T, init=int): # 2 140 | self.subspec = subspec 141 | self.init = init 142 | 143 | def glomit(self, target, scope): 144 | if self.subspec is not T: 145 | target = scope[glom](target, self.subspec, scope) # 3 146 | 147 | try: 148 | # 4 149 | iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path]) 150 | except UnregisteredTarget as ut: 151 | # 5 152 | raise TypeError('can only %s on iterable targets, not %s type (%s)' 153 | % (self.__class__.__name__, type(target).__name__, ut)) 154 | 155 | try: 156 | iterator = iterate(target) 157 | except Exception as e: 158 | raise TypeError('failed to iterate on instance of type %r at %r (got %r)' 159 | % (target.__class__.__name__, Path(*scope[Path]), e)) 160 | 161 | return self._sum(iterator) 162 | 163 | def _sum(self, iterator): # 6 164 | ret = self.init() 165 | 166 | for v in iterator: 167 | ret += v 168 | 169 | return ret 170 | 171 | Now, let's take a look at the interesting parts, referencing the comments above: 172 | 173 | 1. Specifier types often reference the :class:`TargetRegistry`, 174 | which is not part of the top-level ``glom`` API, and must be 175 | imported from ``glom.core``. More on this in #4. 176 | 2. Specifier type ``__init__`` methods may take as many or as few 177 | arguments as desired, but many glom specifier types take a first 178 | parameter of a *subspec*, meant to be fetched right before the 179 | actual specifier's operation. This helps readability of 180 | glomspecs. See :class:`~glom.Coalesce` for an example of this 181 | idiom. 182 | 3. Specifier types should not reference the 183 | :func:`~glom.glom()` function directly, instead use the 184 | :func:`~glom.glom` function as a key to the ``scope`` map to get the 185 | currently active ``glom()``. This ensures that the extension type is 186 | compatible with advanced specifier types which override the 187 | ``glom()`` function. 188 | 4. To maximize compatiblity with new target types, ``glom`` allows 189 | :ref:`new types and operations to be registered 190 | ` with the ``TargetRegistry``. Specifier types 191 | should respect this by contextually fetching these standard 192 | operators as demonstrated above. At the time of writing, the 193 | primary operators used by glom itself are ``"get"``, 194 | ``"iterate"``, ``"keys"``, ``"assign"``, and ``"delete"``. 195 | 5. In the event that the current target does not support your 196 | Specifier type's desired operation, it's customary to raise a helpful 197 | error. Consider creating your own exception type and inheriting 198 | from :class:`~glom.GlomError`. 199 | 6. Specifier types may have other methods and members in addition to 200 | the primary ``glomit()`` method. This ``_sum()`` method 201 | implements most of the core of our custom specifier type. 202 | 203 | Check out the implementation of the real :class:`glom.Sum()` specifier for more details. 204 | 205 | Summing up 206 | ---------- 207 | 208 | ``glom`` Specifier Types are more than just add-ons; the extension 209 | architecture is how most of ``glom`` itself is implemented. Build 210 | knowing that the paradigm is as powerful as anything built-in. 211 | 212 | If you need more examples, another simple one can be found in 213 | :ref:`this snippet `. ``glom``'s source code itself 214 | contains many specifiers more advanced than the above. Simply search 215 | the codebase for ``glomit()`` methods and you will find no shortage. 216 | 217 | Happy extending! 218 | -------------------------------------------------------------------------------- /docs/debugging.rst: -------------------------------------------------------------------------------- 1 | Exceptions & Debugging 2 | ====================== 3 | 4 | While glom works well when all goes as intended, it even shines when 5 | data doesn't match expectations. glom's error messages and exception 6 | hierarchy have been designed to maximize readability and 7 | debuggability. Read on for a listing of glom's exceptions and how to 8 | debug them. 9 | 10 | .. contents:: Contents 11 | :local: 12 | 13 | .. _exceptions: 14 | 15 | Exceptions 16 | ---------- 17 | 18 | glom introduces a several new exception types designed to maximize 19 | readability and debuggability. Note that all these errors derive from 20 | :exc:`GlomError`, and are only raised from :func:`glom()` calls, not 21 | from spec construction or glom type registration. Those declarative 22 | and setup operations raise :exc:`ValueError`, :exc:`TypeError`, and 23 | other standard Python exceptions as appropriate. 24 | 25 | Here is a short list of links to all public exception types in glom. 26 | 27 | .. hlist:: 28 | :columns: 3 29 | 30 | * :exc:`~glom.GlomError` 31 | * :exc:`~glom.PathAccessError` 32 | * :exc:`~glom.PathAssignError` 33 | * :exc:`~glom.PathDeleteError` 34 | * :exc:`~glom.CoalesceError` 35 | * :exc:`~glom.FoldError` 36 | * :exc:`~glom.MatchError` 37 | * :exc:`~glom.TypeMatchError` 38 | * :exc:`~glom.CheckError` 39 | * :exc:`~glom.UnregisteredTarget` 40 | * :exc:`~glom.BadSpec` 41 | 42 | .. _reading-exceptions: 43 | 44 | Reading a glom Exception 45 | ------------------------ 46 | 47 | glom errors are regular Python exceptions, but may look a little 48 | different from other Python errors. Because glom is a data 49 | manipulation library, glom errors include a data traceback, 50 | interleaving spec and target data. 51 | 52 | For example, let's raise an error by glomming up some data that doesn't exist: 53 | 54 | .. code-block:: default 55 | :linenos: 56 | 57 | >>> target = {'planets': [{'name': 'earth', 'moons': 1}]} 58 | >>> glom(target, ('planets', ['rings'])) 59 | Traceback (most recent call last): 60 | File "", line 1, in 61 | File "/home/mahmoud/projects/glom/glom/core.py", line 1787, in glom 62 | raise err 63 | glom.core.PathAccessError: error raised while processing, details below. 64 | Target-spec trace (most recent last): 65 | - Target: {'planets': [{'name': 'earth', 'moons': 1}]} 66 | - Spec: ('planets', ['rings']) 67 | - Spec: 'planets' 68 | - Target: [{'name': 'earth', 'moons': 1}] 69 | - Spec: ['rings'] 70 | - Target: {'name': 'earth', 'moons': 1} 71 | - Spec: 'rings' 72 | glom.core.PathAccessError: could not access 'rings', part 0 of Path('rings'), got error: KeyError('rings') 73 | 74 | Let's step through this output: 75 | 76 | 77 | * Line **1**: We created a planet registry, similar to the one in the :doc:`tutorial`. 78 | * Line **2-3**: We try to get a listing of ``rings`` of all the planets. Instead, we get a Python traceback. 79 | * Line **7**: We see we have a :exc:`~glom.PathAccessError`. 80 | * Line **8-9**: The "target-spec trace", our data stack, begins. It always starts with the target data as it was passed in. 81 | * Line **10**: Next is the top-level spec, as passed in: ``('planets', ['rings'])`` 82 | * Line **11**: glom takes the first part of the spec from line 9, ``'planets'``, to get the next target. 83 | * Line **12**: Because the spec on line 11 updated the current target, glom outputs it. When a spec is evaluated but the target value is unchanged, the target is skipped in the trace. 84 | * Line **14-15**: We get to the last two lines, which include the culprit target and spec 85 | * Line **16**: Finally, our familiar :exc:`~glom.PathAccessError` message, 86 | with more details about the error, including the original ``KeyError('rings')``. 87 | 88 | This view of glom evaluation answers many of the questions 89 | a developer or user would ask upon encountering the error: 90 | 91 | * What was the data? 92 | * Which part of the spec failed? 93 | * What was the original error? 94 | 95 | The data trace does this by peeling away at the target and spec until 96 | it hones in on the failure. Both targets and specs in traces are 97 | truncated to terminal width to maximize readability. 98 | 99 | .. note:: 100 | 101 | If for some reason you need the full Python stack instead of the 102 | glom data traceback, pass ``glom_debug=True`` to the top-level glom 103 | call. 104 | 105 | .. _branched-exceptions: 106 | 107 | Reading Branched Exceptions 108 | --------------------------- 109 | 110 | Some glom spec types, like :class:`~glom.Coalesce` and 111 | :class:`~glom.Switch`, can try multiple specs in succession. These 112 | "branching" specs can also get multiple exceptions. 113 | 114 | Initially, debugging data for these branching specs was limited. But 115 | in v20.7.0, branching error trees were introduced, exposing 116 | information about every spec and target attempted before raising the 117 | final exception. 118 | 119 | All the exception reading advice in the ":ref:`reading-exceptions`" 120 | section applies, but there's a bit of extra formatting to visualize 121 | the error tree in the target-spec trace. 122 | 123 | Let's step line by line through a :class:`~glom.Coalesce` error tree: 124 | 125 | .. code-block:: default 126 | :linenos: 127 | 128 | >>> target = {'n': 'nope', 'xxx': {'z': {'v': 0}}} 129 | >>> glom(target, Coalesce(('xxx', 'z', 'n'), 'yyy')) 130 | Traceback (most recent call last): 131 | File "tmp.py", line 9, in _make_stack 132 | glom(target, spec) 133 | File "/home/mahmoud/projects/glom/glom/core.py", line 2029, in glom 134 | raise err 135 | glom.core.CoalesceError: error raised while processing, details below. 136 | Target-spec trace (most recent last): 137 | - Target: {'n': 'nope', 'xxx': {'z': {'v': 0}}} 138 | + Spec: Coalesce(('xxx', 'z', 'n'), 'yyy') 139 | |\ Spec: ('xxx', 'z', 'n') 140 | || Spec: 'xxx' 141 | || Target: {'z': {'v': 0}} 142 | || Spec: 'z' 143 | || Target: {'v': 0} 144 | || Spec: 'n' 145 | |X glom.core.PathAccessError: could not access 'n', part 0 of Path('n'), got error: KeyError('n') 146 | |\ Spec: 'yyy' 147 | |X glom.core.PathAccessError: could not access 'yyy', part 0 of Path('yyy'), got error: KeyError('yyy') 148 | glom.core.CoalesceError: no valid values found. Tried (('xxx', 'z', 'n'), 'yyy') and got (PathAccessError, PathAccessError) (at path ['xxx', 'z']) 149 | 150 | * Line **1-10**: Standard fare for glom use and error behavior, see ":ref:`reading-exceptions`" 151 | * Line **11**: We see a "**+**" when starting a branching spec. Each level of branch adds a "**|**" on the left to help track nesting level. 152 | * Line **12**: We see a "**\\**" indicating a new branch of the root branching spec. 153 | * Line **13-17**: Traversing downward as usual until... 154 | * Line **18**: We see an "**X**" indicating our first exception, causing the failure of this branch. 155 | * Line **19**: We see a "**\\**" which starts our next branch. 156 | * Line **20**: We see an "**X**" indicating our second and last exception, causing the failure of this branch. 157 | * Line **21**: The last line is our root level exception, dedented, same as any other glom error. 158 | 159 | Apart from the formatting, error branching doesn't change any other 160 | semantics of the glom exception being raised. 161 | 162 | .. _debugging: 163 | 164 | Debugging 165 | --------- 166 | 167 | Good error messages are great when the data has a problem, but what 168 | about when a spec is incorrect? 169 | 170 | Even the most carefully-constructed specifications eventually need 171 | debugging. If the error message isn't enough to fix your glom issues, 172 | that's where **Inspect** comes in. 173 | 174 | .. autoclass:: glom.Inspect 175 | -------------------------------------------------------------------------------- /docs/faq.rst: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ========================== 3 | 4 | Paradigm shifts always raise a question or two. 5 | 6 | .. contents:: Contents 7 | :local: 8 | 9 | What does "glom" mean? 10 | ---------------------- 11 | 12 | "glom" is short for "conglomerate", which means "gather into a compact 13 | form", coming from the Latin "glom-" meaning *ball*, like *globe*. 14 | 15 | glom can be used as a noun or verb. A developer might say, "I glommed 16 | together this API response." An astronomer might say, "these gloms of 17 | space dust are forming planets and comets." 18 | 19 | Got some data you need to transform? **glom it! ☄️** 20 | 21 | Any other glom terminology worth knowing? 22 | ----------------------------------------- 23 | 24 | A couple of conventional terms that help navigate around glom's 25 | semantics: 26 | 27 | * **target** - ``glom`` operates on a variety of inputs, so we simply 28 | refer to the object being accessed (i.e., the first argument to 29 | ``glom()``) as the "target" 30 | * **spec** - *(aka "glomspec")* The accompanying template used to 31 | specify the structure and sources of the output. 32 | * **output** - The value retrieved or created and returned by 33 | ``glom()``. 34 | 35 | All of these can be seen in the conventional call to :func:`~glom.glom`:: 36 | 37 | output = glom(target, spec) 38 | 39 | Nothing too wild, but these standard terms really do help clarify the 40 | complex situations ``glom`` was built to handle. 41 | 42 | What is glom's public API? 43 | -------------------------- 44 | 45 | Obviously, the primary glom API is the ``glom()`` function 46 | itself. Beyond this, there's other functionality at various degrees of 47 | readiness, ranging from production to alpha within the ``glom`` 48 | package. We try to keep the public API as production-ready as 49 | possible. That also means, if functionality is not public, it may 50 | change or disappear without advance notice or even a CHANGELOG entry. 51 | 52 | First, if it's not in the top-level ``glom`` package, it's not part of 53 | glom's public API. Another good indicator is that if a type or object 54 | is not in these glom docs, then it's not public. 55 | 56 | If functionality in the top-level package is not documented, please 57 | file an issue or pull request so we can get that sorted out. Thanks in 58 | advance! 59 | 60 | What's a convenience function? 61 | ------------------------------ 62 | 63 | The primary entrypoint for glom is the ``glom()`` function, but over the years 64 | several other single-purpose functions were added, mostly for readability. 65 | 66 | If you see a function with the same name as a specifier type, but lowercased, 67 | that's a convenience function. Take :class:`~glom.Assign` and :func:`~glom.assign` 68 | as examples: 69 | 70 | .. code-block:: python 71 | 72 | glom({}, Assign('a'), 'b') 73 | # is equivalent to 74 | assign({}, 'a', 'b') 75 | 76 | At the time of writing, other convenience functions include :class:`~glom.delete`, 77 | :class:`~glom.flatten`, and :class:`~glom.merge`. Note that when performing multiple 78 | glom operations (access, assignment, delete, etc.), it's clearer and more efficient to 79 | create a spec and execute it with the :func:`~glom.glom` top-level function. 80 | 81 | Other glom tips? 82 | ---------------- 83 | 84 | Just a few (for now): 85 | 86 | * Specs don't have to live in the glom call. You can put them 87 | anywhere. Commonly-used specs work as class attributes and globals. 88 | * Using glom's declarative approach does wonders for code coverage, 89 | much like `attrs`_ which goes great with ``glom``. 90 | * Advanced tips 91 | * glom is designed to support all of Python's built-ins as targets, 92 | and is readily extensible to other types and special handling, through 93 | :func:`~glom.register()`. 94 | * If you're trying to minimize global state, consider 95 | instantiating your own :class:`~glom.Glommer` object to 96 | encapsulate any type registration changes. 97 | 98 | If you've got more tips or patterns, `send them our way`_! 99 | 100 | .. _attrs: https://github.com/python-attrs/attrs 101 | .. _send them our way: https://github.com/mahmoud/glom/issues 102 | 103 | Why not just write more Python? 104 | ------------------------------- 105 | 106 | The answer is more than just DRY ("Don't Repeat Yourself"). 107 | 108 | Here on the glom team, we're big fans of Python. Have been for 109 | years. In fact, Python is one of a tiny handful of languages that 110 | could support something as powerful as glom. 111 | 112 | But not all Python code is the same. We built glom to replace the kind 113 | of Python that is about as un-Pythonic as code gets: simultaneously 114 | fluffy, but also fragile. Simple transformations requiring countless 115 | lines. 116 | 117 | Before glom, the "right" way to write this transformation code was 118 | verbose. Whether trying to fetch values nested within objects that may 119 | contain attributes set to ``None``, or performing a list comprehension 120 | which may raise an exception, the *correct* code was many lines of 121 | repetitious ``try-except`` blocks with a lot of hand-written exception 122 | messages. 123 | 124 | Written any more compactly, this Python would produce failures 125 | expressed in errors too low-level to associate with the higher-level 126 | transformation. 127 | 128 | So the glom-less code was hard to change, hard to debug, or 129 | both. ``glom`` specifications are none of the above, thanks to 130 | meaningful, high-level error messages, a :class:`a built-in debugging 131 | facility `, and a compact, composable design. 132 | 133 | In short, thanks to Python, glom can provide a Pythonic solution for 134 | those times when pure Python wasn't Pythonic enough. 135 | 136 | Should I use glom or remap? 137 | --------------------------- 138 | 139 | These days, you have a lot of choices when it comes to nested data manipulation. 140 | One choice is between glom and `remap`_, the recursive ``map()``. 141 | Given that the same people wrote both utilties, we recommend: 142 | 143 | * If you know the shape of the output ahead of time, then go with glom. 144 | * If your output shape is determined by the input, then use remap. 145 | 146 | Remap performs a full traversal of a nested data structure, walking it like a tree. 147 | In contrast, glom only goes where it's told by the spec. 148 | 149 | For example, imagine an error reporting service. 150 | Users send you an arbitrary dictionary of metadata related to the error. 151 | But you have a requirement that you don't store secrets. 152 | 153 | Remap is a great way to traverse that full structure, 154 | looking for all keys containing the substring "secret", 155 | replacing the associated value with "[REDACTED]". 156 | The output shape will be the same as the input shape. 157 | 158 | At the time of writing (2023), glom isn't designed for this use case. 159 | 160 | .. _remap: https://boltons.readthedocs.io/en/latest/iterutils.html#nested 161 | 162 | How does glom work? 163 | ------------------- 164 | 165 | The core conceptual engine of glom is a very simple recursive loop. It 166 | could fit on a business card. OK maybe a postcard. 167 | 168 | In fact, here it is, in literate form, modified from this `early point 169 | in glom history`_: 170 | 171 | .. code-block:: python 172 | 173 | def glom(target, spec): 174 | 175 | # if the spec is a string or a Path, perform a deep-get on the target 176 | if isinstance(spec, (basestring, Path)): 177 | return _get_path(target, spec) 178 | 179 | # if the spec is callable, call it on the target 180 | elif callable(spec): 181 | return spec(target) 182 | 183 | # if the spec is a dict, assign the result of 184 | # the glom on the right to the field key on the left 185 | elif isinstance(spec, dict): 186 | ret = {} 187 | for field, subspec in spec.items(): 188 | ret[field] = glom(target, subspec) 189 | return ret 190 | 191 | # if the spec is a list, run the spec inside the list on every 192 | # element in the list and return the new list 193 | elif isinstance(spec, list): 194 | subspec = spec[0] 195 | iterator = _get_iterator(target) 196 | return [glom(t, subspec) for t in iterator] 197 | 198 | # if the spec is a tuple of specs, chain the specs by running the 199 | # first spec on the target, then running the second spec on the 200 | # result of the first, and so on. 201 | elif isinstance(spec, tuple): 202 | res = target 203 | for subspec in spec: 204 | res = glom(res, subspec) 205 | return res 206 | else: 207 | raise TypeError('expected one of the above types') 208 | 209 | 210 | .. _early point in glom history: https://github.com/mahmoud/glom/blob/186757b47af3d33901df4bf715874b5f3c781d8f/glom/__init__.py#L74-L91 211 | 212 | Does Python need a null-coalescing operator? 213 | -------------------------------------------- 214 | 215 | Not technically a glom question, but it is frequently_ asked_! 216 | 217 | `Null coalescing operators`_ traverse nested objects and return null 218 | (or ``None`` for us) on the first null or non-traversable object, 219 | depending on implementation. 220 | 221 | It's basically a compact way of doing a deep :func:`getattr()` with a 222 | default set to ``None``. 223 | 224 | Suffice to say that ``glom(target, T.a.b.c, default=None)`` achieves 225 | this with ease, but I still want to revisit the question, since it's 226 | part of what got me thinking about ``glom`` in the first place. 227 | 228 | First off, working in PayPal's SOA environment, my team dealt with 229 | literally tens of thousands of service objects, with object 230 | definitions (from other teams) nested so deep as to make an 231 | 80-character line length laughable. 232 | 233 | But null coalescing wouldn't have helped, because in most of those 234 | cases ``None`` wasn't what we needed. We needed a good, automatically 235 | generated error message when a deeply-nested field wasn't accessible. Not 236 | ``NoneType has no attribute 'x'``, but not plain old ``None`` either. 237 | 238 | To solve this, I wrote my share of deep-gets before ``glom``, 239 | including the open-source `boltons.iterutils.get_path()`_. For 240 | whatever reason, it took me years of usage to realize just how often 241 | the deep-gets were coupled with the other transformations that 242 | ``glom`` enables. Now, I can never go back to a simple deep-get. 243 | 244 | Another years-in-the-making observation, from my time doing JavaScript 245 | then PHP then Django templates: all were much more lax on typing than 246 | Python. Not because of a fierce belief in weak types, though. More 247 | because when you're templating, it's inherently safer to return a 248 | blank value on lookup failures. You're so close to text formats that 249 | this default achieves a pretty desirable result. While implicitly 250 | doing this isn't my cup of tea, and ``glom`` opts for explicit 251 | :class:`~glom.Coalesce` specifiers, this connection contributed to the 252 | concept of ``glom`` as an "object templating" system. 253 | 254 | 255 | 256 | 257 | .. _frequently: https://mail.python.org/pipermail/python-ideas/2015-September/036289.html 258 | .. _asked: https://mail.python.org/pipermail/python-ideas/2016-November/043517.html 259 | .. _Null coalescing operators: https://en.wikipedia.org/wiki/Null_coalescing_operator 260 | .. _boltons.iterutils.get_path(): http://boltons.readthedocs.io/en/latest/iterutils.html#boltons.iterutils.get_path 261 | -------------------------------------------------------------------------------- /docs/grouping.rst: -------------------------------------------------------------------------------- 1 | Reduction & Grouping 2 | ==================== 3 | 4 | This document contains glom techniques for transforming a collection 5 | of data to a smaller set, otherwise known as "grouping" or 6 | "reduction". 7 | 8 | Combining iterables with Flatten and Merge 9 | ------------------------------------------ 10 | 11 | .. versionadded:: 19.1.0 12 | 13 | Got lists of lists? Sets of tuples? A sequence of dicts (but only want 14 | one)? Do you find yourself reaching for Python's builtin :func:`sum` 15 | and :func:`reduce`? To handle these situations and more, glom has five 16 | specifier types and two convenience functions: 17 | 18 | .. autofunction:: glom.flatten 19 | 20 | .. autoclass:: glom.Flatten 21 | 22 | .. autofunction:: glom.merge 23 | 24 | .. autoclass:: glom.Merge 25 | 26 | .. autoclass:: glom.Sum 27 | 28 | .. autoclass:: glom.Fold 29 | 30 | Exceptions 31 | ---------- 32 | 33 | .. autoclass:: glom.FoldError 34 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | glom 2 | ==== 3 | 4 | *Restructuring data, the Python way.* 5 | 6 | |release| |calver| |changelog| 7 | 8 | **glom** is a new approach to working with data in Python, featuring: 9 | 10 | * :ref:`Path-based access ` for nested structures 11 | * :ref:`Declarative data transformation ` using lightweight, Pythonic specifications 12 | * Readable, meaningful :ref:`error messages ` 13 | * Built-in :ref:`debugging ` features 14 | * Plus, :doc:`deep assignment `, :doc:`streaming `, :doc:`data validation `, and *more*! 15 | 16 | While it may sound like a lot, glom's straightforward approach becomes 17 | second-nature very quickly. Start with the :doc:`tutorial`, 18 | or `try glom in your browser now`__! 19 | 20 | .. __: https://yak.party/glompad/#spec=%22a.b.c%22%0A&target=%7B%22a%22%3A+%7B%22b%22%3A+%7B%22c%22%3A+%22d%22%7D%7D%7D%0A&v=1 21 | 22 | Installation 23 | ------------ 24 | 25 | glom is pure Python, and tested on Python 3.7+, as well as 26 | PyPy3. Installation is easy:: 27 | 28 | pip install glom 29 | 30 | Then you're ready to get glomming! 31 | 32 | .. code-block:: python 33 | 34 | from glom import glom 35 | 36 | target = {'a': {'b': {'c': 'd'}}} 37 | glom(target, 'a.b.c') # returns 'd' 38 | 39 | There's much, much more to glom, check out the :doc:`tutorial` and :doc:`API reference`! 40 | 41 | 42 | *Just glom it! ☄️* 43 | 44 | 45 | .. |release| image:: https://img.shields.io/pypi/v/glom.svg 46 | :target: https://pypi.org/project/glom/ 47 | 48 | .. |calver| image:: https://img.shields.io/badge/calver-YY.MM.MICRO-22bfda.svg 49 | :target: https://calver.org 50 | 51 | .. |changelog| image:: https://img.shields.io/badge/CHANGELOG-UPDATED-b84ad6.svg 52 | :target: https://github.com/mahmoud/glom/blob/master/CHANGELOG.md 53 | 54 | 55 | .. toctree:: 56 | :maxdepth: 1 57 | :caption: Learning glom 58 | 59 | tutorial 60 | faq 61 | by_analogy 62 | snippets 63 | cli 64 | 65 | .. toctree:: 66 | :maxdepth: 2 67 | :caption: API Reference 68 | 69 | api 70 | mutation 71 | streaming 72 | grouping 73 | matching 74 | debugging 75 | 76 | .. toctree:: 77 | :maxdepth: 1 78 | :caption: Extending glom 79 | 80 | custom_spec_types 81 | modes 82 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=glom 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/matching.rst: -------------------------------------------------------------------------------- 1 | Matching & Validation 2 | ===================== 3 | 4 | .. automodule:: glom.matching 5 | 6 | .. contents:: Contents 7 | :local: 8 | 9 | Validation with Match 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | For matching whole data structures, use a :class:`~glom.Match` spec. 13 | 14 | .. autoclass:: glom.Match 15 | :members: 16 | 17 | Optional and required ``dict`` key matching 18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | Note that our four :class:`~glom.Match` rules above imply that 21 | :class:`object` is a match-anything pattern. Because 22 | ``isinstance(val, object)`` is true for all values in Python, 23 | ``object`` is a useful stopping case. For instance, if we wanted to 24 | extend an example above to allow additional keys and values in the 25 | user dict above we could add :class:`object` as a generic pass through:: 26 | 27 | >>> target = [{'id': 1, 'email': 'alice@example.com', 'extra': 'val'}] 28 | >>> spec = Match([{'id': int, 'email': str, object: object}])) 29 | >>> assert glom(target, spec) == \\ 30 | ... [{'id': 1, 'email': 'alice@example.com', 'extra': 'val'}] 31 | True 32 | 33 | The fact that ``{object: object}`` will match any dictionary exposes 34 | the subtlety in :class:`~glom.Match` dictionary evaluation. 35 | 36 | By default, value match keys are required, and other keys are 37 | optional. For example, ``'id'`` and ``'email'`` above are required 38 | because they are matched via ``==``. If either was not present, it 39 | would raise class:`~glom.MatchError`. class:`object` however is matched 40 | with func:`isinstance()`. Since it is not an value-match comparison, 41 | it is not required. 42 | 43 | This default behavior can be modified with :class:`~glom.Required` 44 | and :class:`~glom.Optional`. 45 | 46 | .. autoclass:: glom.Optional 47 | 48 | .. autoclass:: glom.Required 49 | 50 | ``M`` Expressions 51 | ~~~~~~~~~~~~~~~~~ 52 | 53 | The most concise way to express validation and guards. 54 | 55 | .. autodata:: glom.M 56 | 57 | Boolean operators and matching 58 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 59 | 60 | While ``M`` is an easy way to construct expressions, sometimes a more 61 | object-oriented approach can be more suitable. 62 | 63 | .. autoclass:: glom.Or 64 | 65 | .. autoclass:: glom.And 66 | 67 | .. autoclass:: glom.Not 68 | 69 | String matching 70 | ~~~~~~~~~~~~~~~ 71 | 72 | .. autoclass:: glom.Regex 73 | 74 | Control flow with ``Switch`` 75 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 76 | 77 | Match becomes even more powerful when combined with the ability to 78 | branch spec execution. 79 | 80 | .. autoclass:: glom.Switch 81 | 82 | Exceptions 83 | ~~~~~~~~~~ 84 | 85 | .. autoclass:: glom.MatchError 86 | 87 | .. autoclass:: glom.TypeMatchError 88 | 89 | Validation with Check 90 | ~~~~~~~~~~~~~~~~~~~~~ 91 | 92 | .. warning:: 93 | 94 | Given the suite of tools introduced with :class:`~glom.Match`, the 95 | :class:`Check` specifier type may be deprecated in a future 96 | release. 97 | 98 | .. autoclass:: glom.Check 99 | 100 | .. autoclass:: glom.CheckError 101 | -------------------------------------------------------------------------------- /docs/modes.rst: -------------------------------------------------------------------------------- 1 | ``glom`` Modes 2 | ============== 3 | 4 | .. note:: 5 | 6 | Be sure to read ":doc:`custom_spec_types`" before diving into the 7 | deep details below. 8 | 9 | A glom "mode" determines how Python built-in data structures are 10 | evaluated. Think of it like a dialect for how :class:`dict`, 11 | :class:`tuple`, :class:`list`, etc., are interpreted in a spec. Modes 12 | do not change the behavior of `T`, or many other core 13 | specifiers. Modes are one of the keys to keeping glom specs short and 14 | readable. 15 | 16 | A mode is used similar to a spec: whatever Python data structure is 17 | passed to the mode type constructor will be evaluated under that 18 | mode. Once set, the mode remains in place until it is overridden by 19 | another mode. 20 | 21 | glom only has a few modes: 22 | 23 | 1. :class:`~glom.Auto` - The default glom behavior, used for data 24 | transformation, with the spec acting as a template. 25 | 2. :class:`~glom.Fill` - A variant of the default transformation 26 | behavior; preferring to "fill" containers instead of 27 | iterating, chaining, etc. 28 | 3. :class:`~glom.Match` - Treats the spec as a pattern, checking 29 | that the target matches. 30 | 31 | Adding a new mode is relatively rare, but when it comes up this 32 | document includes relevant details. 33 | 34 | 35 | Writing custom Modes 36 | -------------------- 37 | 38 | A mode is a spec which sets ``scope[MODE]`` to a function which 39 | accepts ``target``, ``spec``, and ``scope`` and returns a result, a 40 | signature very similar to the top-level :func:`~glom.glom` method 41 | itself. 42 | 43 | For example, here is an abbreviated version of the :class:`~glom.Fill` 44 | mode: 45 | 46 | 47 | .. code-block:: python 48 | 49 | class Fill(object): 50 | def __init__(self, spec): 51 | self.spec = spec 52 | 53 | def glomit(self, target, scope): 54 | scope[MODE] = _fill 55 | return scope[glom](target, self.spec, scope) 56 | 57 | def _fill(target, spec, scope): 58 | recurse = lambda val: scope[glom](target, val, scope) 59 | if type(spec) is dict: 60 | return {recurse(key): recurse(val) 61 | for key, val in spec.items()} 62 | if type(spec) in (list, tuple, set, frozenset): 63 | result = [recurse(val) for val in spec] 64 | if type(spec) is list: 65 | return result 66 | return type(spec)(result) 67 | if callable(spec): 68 | return spec(target) 69 | return spec 70 | 71 | Like any other :doc:`Specifier Type `, ``Fill`` has 72 | a ``glomit()`` method, and this method sets the ``MODE`` key in the 73 | :ref:`glom scope ` to our ``_fill`` function. The name 74 | itself doesn't matter, but the signature must match exactly: 75 | ``(target, spec, scope)``. 76 | 77 | As mentioned above, custom modes are relatively rare for glom. If you 78 | write one, `let us know `_! 79 | -------------------------------------------------------------------------------- /docs/mutation.rst: -------------------------------------------------------------------------------- 1 | Assignment & Mutation 2 | ===================== 3 | 4 | .. automodule:: glom.mutation 5 | 6 | .. contents:: Contents 7 | :local: 8 | 9 | Assignment 10 | ---------- 11 | 12 | Deeply assign within an existing structure, given a path and a value. 13 | 14 | .. autofunction:: glom.assign 15 | 16 | .. autoclass:: glom.Assign 17 | 18 | Deletion 19 | -------- 20 | 21 | Delete attributes from objects and keys from containers. 22 | 23 | .. autofunction:: glom.delete 24 | 25 | .. autoclass:: glom.Delete 26 | 27 | 28 | Exceptions 29 | ---------- 30 | 31 | .. autoclass:: glom.PathAssignError 32 | 33 | .. autoclass:: glom.PathDeleteError 34 | -------------------------------------------------------------------------------- /docs/outreach.md: -------------------------------------------------------------------------------- 1 | # glom outreach 2 | 3 | ## Interesting stack overflow questions to answer 4 | 5 | Some of these may be more amenable to remapping. 6 | 7 | 1. https://stackoverflow.com/questions/1602934/check-if-a-given-key-already-exists-in-a-dictionary 2,627,182 8 | 1. https://stackoverflow.com/questions/38987/how-to-merge-two-dictionaries-in-a-single-expression 1,391,000 9 | 1. https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists 1,388,600 10 | 1. https://stackoverflow.com/questions/4984647/accessing-dict-keys-like-an-attribute 128,380 11 | 1. https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys 75,500 12 | 1. https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python 66,390 13 | 1. https://stackoverflow.com/questions/2213923/removing-duplicates-from-a-list-of-lists 60,962 14 | 1. https://stackoverflow.com/questions/9285086/access-dict-key-and-return-none-if-doesnt-exist 33,140 15 | 1. https://stackoverflow.com/questions/10632839/transform-list-of-tuples-into-a-flat-list-or-a-matrix 23,128 16 | 1. https://stackoverflow.com/questions/16204076/pep8-compliant-deep-dictionary-access 2,647 17 | 1. https://stackoverflow.com/questions/21297475/set-a-value-deep-in-a-dict-dynamically 1,017 18 | 1. https://stackoverflow.com/questions/22377725/python-linq-like-methods 947 19 | 1. https://stackoverflow.com/questions/11515620/safely-access-objects-in-python-without-try-catch 368 20 | 1. https://stackoverflow.com/questions/48262059/recursively-flatten-nested-list-in-python 89 21 | 1. https://stackoverflow.com/questions/23106895/how-to-use-a-string-to-safely-access-deep-python-properties 55 22 | 23 | ## Completed answers 24 | 25 | If you're reading this, upvote! 26 | 27 | 1. https://stackoverflow.com/a/54656024/178013 28 | 1. https://stackoverflow.com/a/53354398/178013 29 | 1. https://stackoverflow.com/questions/53108624/using-python-module-glom-extract-irregular-nested-lists-into-a-flattened-list-o 30 | 1. https://stackoverflow.com/questions/54078102/get-value-of-nested-attribute-by-filtering-list-on-other-attribute-with-python-g 31 | 1. https://stackoverflow.com/questions/38220370/is-there-a-way-to-execute-jq-from-python/55017219#55017219 <- great answer! 32 | -------------------------------------------------------------------------------- /docs/requirements-rtd.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | alabaster==0.7.13 3 | atomicwrites==1.4.1 4 | Babel==2.11.0 5 | certifi==2022.12.7 6 | charset-normalizer==3.0.1 7 | docutils==0.17.1 8 | idna==3.4 9 | imagesize==1.4.1 10 | Jinja2==3.1.2 11 | livereload==2.6.3 12 | MarkupSafe==2.1.1 13 | more-itertools==9.0.0 14 | Pygments==2.14.0 15 | pytz==2022.7.1 16 | requests==2.28.2 17 | six==1.16.0 18 | snowballstemmer==2.2.0 19 | Sphinx==5.3.0 20 | sphinx-autobuild==2021.3.14 21 | sphinx-rtd-theme==1.1.1 22 | sphinxcontrib-devhelp==1.0.2 23 | sphinxcontrib-htmlhelp==2.0.0 24 | sphinxcontrib-jsmath==1.0.1 25 | sphinxcontrib-qthelp==1.0.3 26 | sphinxcontrib-serializinghtml==1.1.5 27 | sphinxcontrib.applehelp==1.0.2 28 | toml==0.10.2 29 | tornado==6.2 30 | urllib3==1.26.14 31 | -------------------------------------------------------------------------------- /docs/snippets.rst: -------------------------------------------------------------------------------- 1 | Examples & Snippets 2 | =================== 3 | 4 | glom can do a lot of things, in the right hands. This doc makes those 5 | hands yours, through sample code of useful building blocks and common 6 | glom tasks. 7 | 8 | .. contents:: Contents 9 | :local: 10 | 11 | .. note:: 12 | 13 | All samples below assume ``from glom import glom, T, Call`` and any 14 | other dependencies. 15 | 16 | Reversing a Target 17 | ------------------ 18 | 19 | Here are a couple ways to reverse the current target. The first uses 20 | basic Python builtins, the second uses the :data:`~glom.T` object. 21 | 22 | 23 | .. code-block:: python 24 | 25 | glom([1, 2, 3], (reversed, list)) 26 | glom([1, 2, 3], T[::-1]) 27 | 28 | 29 | Iteration Result as Tuple 30 | ------------------------- 31 | 32 | The default glom iteration specifier returns a list, but it's easy to 33 | turn that list into a tuple. The following returns a tuple of 34 | absolute-valued integers: 35 | 36 | 37 | .. code-block:: python 38 | 39 | glom([-1, 2, -3], ([abs], tuple)) 40 | 41 | 42 | Data-Driven Assignment 43 | ---------------------- 44 | 45 | glom's dict specifier interprets the keys as constants. A different 46 | technique is required if the dict keys are part of the target data 47 | rather than spec. 48 | 49 | 50 | .. code-block:: python 51 | 52 | glom({1:2, 2:3}, Call(dict, args=(T.items(),))) 53 | glom({1:2, 2:3}, lambda t: dict(t.items())) 54 | glom({1:2, 2:3}, dict) 55 | 56 | 57 | Construct Instance 58 | ------------------ 59 | 60 | A common use case is to construct an instance. In the most basic 61 | case, the default behavior on callable will suffice. 62 | 63 | 64 | The following converts a list of ints to a list of 65 | :class:`decimal.Decimal` objects. 66 | 67 | 68 | .. code-block:: python 69 | 70 | glom([1, 2, 3], [Decimal]) 71 | 72 | 73 | If additional arguments are required, :class:`~glom.Call` or ``lambda`` 74 | are good options. 75 | 76 | This converts a list to a collection.deque, 77 | while specifying a max size of 10. 78 | 79 | 80 | .. code-block:: python 81 | 82 | glom([1, 2, 3], Call(deque, args=[T, 10])) 83 | glom([1, 2, 3], lambda t: deque(t, 10)) 84 | 85 | 86 | Filtered Iteration 87 | ------------------ 88 | Sometimes in addition to stepping through an iterable, 89 | you'd like to omit some of the items from the result 90 | set all together. Here are two ways 91 | to filter the odd numbers from a list. 92 | 93 | 94 | .. code-block:: python 95 | 96 | glom([1, 2, 3, 4, 5, 6], lambda t: [i for i in t if i % 2]) 97 | glom([1, 2, 3, 4, 5, 6], [lambda i: i if i % 2 else SKIP]) 98 | 99 | 100 | The second approach demonstrates the use of ``glom.SKIP`` to 101 | back out of an execution. 102 | 103 | This can also be combined with :class:`~glom.Coalesce` to 104 | filter items which are missing sub-attributes. 105 | 106 | Here is an example of extracting the primary email from a group 107 | of contacts, skipping where the email is empty string, None, 108 | or the attribute is missing. 109 | 110 | .. code-block:: python 111 | 112 | glom(contacts, [Coalesce('primary_email.email', skip=('', None), default=SKIP)]) 113 | 114 | 115 | Preserve Type 116 | ------------- 117 | The iteration specifier will walk lists and tuples. In some cases it 118 | would be convenient to preserve the target type in the result type. 119 | 120 | This glomspec iterates over a tuple or list, adding one to each 121 | element, and uses :class:`~glom.T` to return a tuple or list depending 122 | on the target input's type. 123 | 124 | 125 | .. code-block:: python 126 | 127 | glom((1, 2, 3), ( 128 | { 129 | "type": type, 130 | "result": [T + 1] # arbitrary operation 131 | }, T['type'](T['result']))) 132 | 133 | 134 | This demonstrates an advanced technique -- just as a tuple 135 | can be used to process sub-specs "in series", a dict 136 | can be used to store intermediate results while processing 137 | sub-specs "in parallel" so they can then be recombined later on. 138 | 139 | 140 | Automatic Django ORM type handling 141 | ---------------------------------- 142 | 143 | In day-to-day Django ORM usage, Managers_ and QuerySets_ are 144 | everywhere. They work great with glom, too, but they work even better 145 | when you don't have to call ``.all()`` all the time. Enable automatic 146 | iteration using the following :meth:`~glom.register` technique: 147 | 148 | .. code-block:: python 149 | 150 | import glom 151 | import django.db.models 152 | 153 | glom.register(django.db.models.Manager, iterate=lambda m: m.all()) 154 | glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all()) 155 | 156 | Call this in ``settings`` or somewhere similarly early in your 157 | application setup for the best results. 158 | 159 | .. _Managers: https://docs.djangoproject.com/en/2.0/topics/db/managers/ 160 | .. _QuerySets: https://docs.djangoproject.com/en/2.0/ref/models/querysets/ 161 | 162 | 163 | Filter Iterable 164 | --------------- 165 | 166 | An iteration specifier can filter items out by using 167 | :data:`~glom.SKIP` as the default of a :class:`~glom.Check` object. 168 | 169 | 170 | .. code-block:: python 171 | 172 | glom(['cat', 1, 'dog', 2], [Check(type=str, default=SKIP)]) 173 | # ['cat', 'dog'] 174 | 175 | You can also truncate the list at the first failing check by using 176 | :data:`~glom.STOP`. 177 | 178 | .. _lisp-style-if: 179 | 180 | Lisp-style If Extension 181 | ----------------------- 182 | 183 | Any class with a glomit method will be treated as a spec by glom. 184 | As an example, here is a lisp-style If expression custom spec type: 185 | 186 | .. code-block:: python 187 | 188 | class If(object): 189 | def __init__(self, cond, if_, else_=None): 190 | self.cond, self.if_, self.else_ = cond, if_, else_ 191 | 192 | def glomit(self, target, scope): 193 | g = lambda spec: scope[glom](target, spec, scope) 194 | if g(self.cond): 195 | return g(self.if_) 196 | elif self.else_: 197 | return g(self.else_) 198 | else: 199 | return None 200 | 201 | glom(1, If(bool, {'yes': T}, {'no': T})) 202 | # {'yes': 1} 203 | glom(0, If(bool, {'yes': T}, {'no': T})) 204 | # {'no': 0} 205 | 206 | 207 | Parellel Evaluation of Sub-Specs 208 | -------------------------------- 209 | 210 | This is another example of a simple glom extension. 211 | Sometimes it is convenient to execute multiple glom-specs 212 | in parallel against a target, and get a sequence of their 213 | results. 214 | 215 | .. code-block:: python 216 | 217 | class Seq(object): 218 | def __init__(self, *subspecs): 219 | self.subspecs = subspecs 220 | 221 | def glomit(self, target, scope): 222 | return [scope[glom](target, spec, scope) for spec in self.subspecs] 223 | 224 | glom('1', Seq(float, int)) 225 | # [1.0, 1] 226 | 227 | 228 | Without this extension, the simplest way to achieve the same result is 229 | with a dict: 230 | 231 | .. code-block:: python 232 | 233 | glom('1', ({1: float, 2: int}, T.values())) 234 | 235 | 236 | Clamp Values 237 | ------------ 238 | 239 | A common numerical operation is to clamp values -- if they 240 | are above or below a certain value, assign them to that value. 241 | 242 | Using a pattern-matching glom idiom, this can be implemented 243 | simply: 244 | 245 | .. code-block:: python 246 | 247 | glom(range(10), [(M < 7) | Val(7)]) 248 | # [0, 1, 2, 3, 4, 5, 6, 7, 7, 7] 249 | 250 | 251 | What if you want to drop rather than clamp out-of-range values? 252 | 253 | .. code-block:: python 254 | 255 | glom(range(10), [(M < 7) | Val(SKIP)]) 256 | # [0, 1, 2, 3, 4, 5, 6] 257 | 258 | 259 | Transform Tree 260 | -------------- 261 | 262 | With an arbitrary depth tree, :class:`~glom.Ref` can be used to 263 | express a recursive spec. 264 | 265 | For example, this `etree2dicts` spec will recursively walk an `ElementTree` 266 | instance and transform it from nested objects to nested dicts. 267 | 268 | .. code-block:: python 269 | 270 | etree2dicts = Ref('ElementTree', 271 | {"tag": "tag", "text": "text", "attrib": "attrib", "children": (iter, [Ref('ElementTree')])}) 272 | 273 | 274 | Alternatively, say we only wanted to generate tuples of tag and children: 275 | 276 | .. code-block:: python 277 | 278 | etree2tuples = Fill(Ref('ElementTree', (T.tag, Iter(Ref('ElementTree')).all()))) 279 | 280 | 281 | (Note also the use of :class:`~glom.Fill` mode to easily construct a tuple.) 282 | 283 | .. code-block:: html 284 | 285 | 286 | 287 | the title 288 | 289 | 290 |

A paragraph

291 | 292 | 293 | 294 | 295 | Will translate to the following tuples: 296 | 297 | .. code-block:: python 298 | 299 | >>> etree = ElementTree.fromstring(html_text) 300 | >>> glom(etree, etree2tuples) 301 | ('html', [('head', [('title', [])]), ('body', [('p', [])])]) 302 | 303 | 304 | Fix Up Strings in Parsed JSON 305 | ----------------------------- 306 | 307 | Tree-walking with :class:`~glom.Ref()` combines powerfully with 308 | pattern matching from :class:`~glom.Match()`. 309 | 310 | In this case, consider that we want to transform parsed JSON recursively, 311 | such that all unicodes are converted to native strings. 312 | 313 | 314 | .. code-block:: python 315 | 316 | glom(json.loads(data), 317 | Ref('json', 318 | Match(Switch({ 319 | dict: {Ref('json'): Ref('json')}, 320 | list: [Ref('json')], 321 | type(u''): Auto(str), 322 | object: T})) 323 | ) 324 | ) 325 | 326 | 327 | :class:`~glom.Match()` above splits the :class:`~glom.Ref()` evaluation into 4 cases: 328 | 329 | * on :class:`dict`, use :class:`~glom.Ref()` to recurse for all keys and values 330 | * on :class:`list`, use :class:`~glom.Ref()` to recurse on each item 331 | * on text objects (``type(u'')``) -- py3 :class:`str` or py2 332 | :class:`unicode` -- transform the target with :class:`str` 333 | * for all other values (``object``), pass them through 334 | 335 | As motivation for why this might come up: attributes, class names, 336 | function names, and identifiers must be the native string type for a 337 | given Python, i.e., bytestrings in Python 2 and unicode in Python 3. 338 | 339 | 340 | Store and Retrieve Current Target 341 | --------------------------------- 342 | 343 | The :data:`~glom.A` scope assignment helper makes it 344 | convenient to hold on to the current target and then reset it. 345 | 346 | The ``(A.t, ..., S.t)`` "sandwich" is a convenient idiom for these 347 | cases. 348 | 349 | For example, we could use this to update a ``dict``: 350 | 351 | 352 | .. code-block:: python 353 | 354 | glom({}, (A.t, T.update({1: 1}), S.t)) 355 | 356 | 357 | Accessing Ancestry 358 | ------------------ 359 | 360 | The technique above can be useful when you want to flatten an object structure by combining child, 361 | parent, and/or grandparent data. For instance: 362 | 363 | .. code-block:: python 364 | 365 | input_data = {"a": {"b": {"c": 1}}} 366 | # transform to: 367 | output_data = [{"value": 1, "grandparent": "a"}] 368 | 369 | We can do this by leveraging glom's Scopes_. Here's the spec to get the results above: 370 | 371 | .. code-block:: python 372 | 373 | ( 374 | T.items(), 375 | [ 376 | ( 377 | A.globals.gp_item, # save the grandparent item to the global scope 378 | T[1].values(), # access the values as usual 379 | [{"value": "c", "grandparent": S.globals.gp_item[0]}], # access the grandparent item 380 | ) 381 | ], 382 | Flatten(), 383 | ) 384 | 385 | You can play with glom scopes `in your browser here`__. 386 | 387 | .. __: https://yak.party/glompad/#spec=%28%0A++++T.items%28%29%2C%0A++++%5B%28%0A++++++++++++A.globals.gp_item%2C%0A++++++++++++T%5B1%5D.values%28%29%2C%0A++++++++++++%5B%7B%22val%22%3A+%22c%22%2C+%22path%22%3A+S.globals.gp_item%5B0%5D%7D%5D%2C%0A++++%29%5D%2C%0A++++Flatten%28%29%2C%0A%29%0A&target=%7B%0A++%22a%22%3A+%7B%0A++++%22b%22%3A+%7B%0A++++++%22c%22%3A+1%0A++++%7D%0A++%7D%0A%7D&v=1 388 | 389 | .. _Scopes: https://glom.readthedocs.io/en/latest/api.html#updating-the-scope-s-a 390 | 391 | Note that at the time of writing, glom doesn't yet have full tree traversal, so the nesting of 392 | the spec is going to roughly match the nesting of your data. If you need this to work in an 393 | arbitrarily nested structure, we recommend `remap `_, 394 | the recursive map function. -------------------------------------------------------------------------------- /docs/streaming.rst: -------------------------------------------------------------------------------- 1 | Streaming & Iteration 2 | ===================== 3 | 4 | .. versionadded:: 19.10.0 5 | 6 | .. automodule:: glom.streaming 7 | 8 | .. contents:: Contents 9 | :local: 10 | 11 | .. autoclass:: glom.Iter 12 | 13 | .. automethod:: map 14 | .. automethod:: filter 15 | .. automethod:: chunked 16 | .. automethod:: split 17 | .. automethod:: flatten 18 | .. automethod:: unique 19 | .. automethod:: limit 20 | .. automethod:: slice 21 | .. automethod:: takewhile 22 | .. automethod:: dropwhile 23 | .. automethod:: all 24 | .. automethod:: first 25 | -------------------------------------------------------------------------------- /docs/tutorial.rst: -------------------------------------------------------------------------------- 1 | ``glom`` Tutorial 2 | ================= 3 | 4 | *Learn to use glom in no time!* 5 | 6 | Basic use of glom requires only a glance, not a whole tutorial. The 7 | case studies below takes a wider look at day-to-day data and object 8 | manipulation, helping you develop an eye for writing robust, 9 | declarative data transformations. 10 | 11 | Go beyond basic with 10 minutes or less, and even further if you 12 | can spare a half hour. 13 | 14 | .. contents:: Contents 15 | :local: 16 | 17 | .. automodule:: glom.tutorial 18 | -------------------------------------------------------------------------------- /glom/__init__.py: -------------------------------------------------------------------------------- 1 | from glom.core import (glom, 2 | Fill, 3 | Auto, 4 | register, 5 | register_op, 6 | Glommer, 7 | Call, 8 | Invoke, 9 | Spec, 10 | Ref, 11 | OMIT, # backwards compat 12 | SKIP, 13 | STOP, 14 | UP, 15 | ROOT, 16 | MODE, 17 | Path, 18 | Vars, 19 | Val, 20 | Literal, # backwards compat 2020-07 21 | Let, # backwards compat 2020-07 22 | Coalesce, 23 | Inspect, 24 | Pipe, 25 | GlomError, 26 | BadSpec, 27 | PathAccessError, 28 | PathAssignError, 29 | CoalesceError, 30 | UnregisteredTarget, 31 | T, S, A) 32 | 33 | from glom.reduction import Sum, Fold, Flatten, flatten, FoldError, Merge, merge 34 | from glom.matching import (M, 35 | Or, 36 | And, 37 | Not, 38 | Match, 39 | MatchError, 40 | TypeMatchError, 41 | Regex, 42 | Optional, 43 | Required, 44 | Switch, 45 | Check, 46 | CheckError) 47 | from glom.mutation import Assign, Delete, assign, delete, PathDeleteError 48 | 49 | # there's no -ion word that really fits what "streaming" means. 50 | # generation, production, iteration, all have more relevant meanings 51 | # elsewhere. (maybe procrastination :P) 52 | from glom.streaming import Iter 53 | 54 | from glom._version import __version__ 55 | -------------------------------------------------------------------------------- /glom/__main__.py: -------------------------------------------------------------------------------- 1 | from glom.cli import console_main 2 | 3 | if __name__ == '__main__': 4 | console_main() 5 | -------------------------------------------------------------------------------- /glom/_version.py: -------------------------------------------------------------------------------- 1 | version_info = (24, 11, 1, 'dev') 2 | __version__ = '.'.join([str(part) for part in version_info if part or part == 0]) 3 | -------------------------------------------------------------------------------- /glom/cli.py: -------------------------------------------------------------------------------- 1 | """like jq, but with the full power of python in the spec. 2 | 3 | Usage: python -m glom [FLAGS] [spec [target]] 4 | 5 | Command-line interface to the glom library, providing nested data 6 | access and data restructuring with the power of Python. 7 | 8 | 9 | Flags: 10 | 11 | --help / -h show this help message and exit 12 | --target-file TARGET_FILE path to target data source (optional) 13 | --target-format TARGET_FORMAT 14 | format of the source data (json, python, toml, 15 | or yaml) (defaults to 'json') 16 | --spec-file SPEC_FILE path to glom spec definition (optional) 17 | --spec-format SPEC_FORMAT format of the glom spec definition (json, python, 18 | python-full) (defaults to 'python') 19 | --indent INDENT number of spaces to indent the result, 0 to disable 20 | pretty-printing (defaults to 2) 21 | --debug interactively debug any errors that come up 22 | --inspect interactively explore the data 23 | 24 | try out: 25 | ` 26 | curl -s https://api.github.com/repos/mahmoud/glom/events | python -m glom '[{"type": "type", "date": "created_at", "user": "actor.login"}]' 27 | 28 | """ 29 | 30 | 31 | 32 | import os 33 | import ast 34 | import sys 35 | import json 36 | 37 | from face import (Command, 38 | Flag, 39 | face_middleware, 40 | PosArgSpec, 41 | PosArgDisplay, 42 | CommandLineError, 43 | UsageError) 44 | from face.utils import isatty 45 | from boltons.iterutils import is_scalar 46 | 47 | import glom 48 | from glom import Path, GlomError, Inspect 49 | 50 | # TODO: --default? 51 | 52 | def glom_cli(target, spec, indent, debug, inspect, scalar): 53 | """Command-line interface to the glom library, providing nested data 54 | access and data restructuring with the power of Python. 55 | """ 56 | if debug or inspect: 57 | stdin_open = not sys.stdin.closed 58 | spec = Inspect(spec, 59 | echo=inspect, 60 | recursive=inspect, 61 | breakpoint=inspect and stdin_open, 62 | post_mortem=debug and stdin_open) 63 | 64 | try: 65 | result = glom.glom(target, spec) 66 | except GlomError as ge: 67 | print(f'{ge.__class__.__name__}: {ge}') 68 | return 1 69 | 70 | if not indent: 71 | indent = None 72 | 73 | if scalar and is_scalar(result): 74 | print(result, end='') 75 | else: 76 | print(json.dumps(result, indent=indent, sort_keys=True)) 77 | return 78 | 79 | 80 | def get_command(): 81 | posargs = PosArgSpec(str, max_count=2, display={'label': '[spec [target]]'}) 82 | cmd = Command(glom_cli, posargs=posargs, middlewares=[mw_get_target]) 83 | cmd.add('--target-file', str, missing=None, doc='path to target data source') 84 | cmd.add('--target-format', str, missing='json', 85 | doc='format of the source data (json or python)') 86 | cmd.add('--spec-file', str, missing=None, doc='path to glom spec definition') 87 | cmd.add('--spec-format', str, missing='python', 88 | doc='format of the glom spec definition (json, python, python-full)') 89 | 90 | cmd.add('--indent', int, missing=2, 91 | doc='number of spaces to indent the result, 0 to disable pretty-printing') 92 | 93 | cmd.add('--scalar', parse_as=True, 94 | doc="if the result is a single value (not a collection), output it" 95 | " without quotes or whitespace, for easier usage in scripts") 96 | cmd.add('--debug', parse_as=True, doc='interactively debug any errors that come up') 97 | cmd.add('--inspect', parse_as=True, doc='interactively explore the data') 98 | return cmd 99 | 100 | 101 | def main(argv): 102 | cmd = get_command() 103 | return cmd.run(argv) or 0 104 | 105 | 106 | def console_main(): 107 | _enable_debug = os.getenv('GLOM_CLI_DEBUG') 108 | if _enable_debug: 109 | print(sys.argv) 110 | try: 111 | sys.exit(main(sys.argv) or 0) 112 | except Exception: 113 | if _enable_debug: 114 | import pdb;pdb.post_mortem() 115 | raise 116 | 117 | 118 | def mw_handle_target(target_text, target_format): 119 | """ Handles reading in a file specified in cli command. 120 | 121 | Args: 122 | target_text (str): The target data to load, as text 123 | target_format (str): Valid formats include `json`, `toml`, and `yml`/`yaml` 124 | Returns: 125 | The content of the file that you specified 126 | Raises: 127 | CommandLineError: Issue with file format or appropriate file reading package not installed. 128 | """ 129 | if not target_text: 130 | return {} 131 | target = {} 132 | if target_format == 'json': 133 | load_func = json.loads 134 | elif target_format in ('yaml', 'yml'): 135 | try: 136 | import yaml 137 | load_func = yaml.safe_load 138 | except ImportError: # pragma: no cover 139 | raise UsageError('No YAML package found. To process yaml files, run: pip install PyYAML') # pragma: no cover 140 | elif target_format == 'toml': 141 | missing = UsageError('No TOML package found. To process toml files, upgrade to Python 3.11 or run: pip install tomli') 142 | try: 143 | import tomllib 144 | load_func = tomllib.loads 145 | except ImportError: 146 | try: 147 | import tomli 148 | load_func = tomli.loads 149 | except ImportError: # pragma: no cover 150 | raise missing # pragma: no cover 151 | elif target_format == 'python': 152 | load_func = ast.literal_eval 153 | else: 154 | raise UsageError('expected target-format to be one of python, json, toml, or yaml') 155 | 156 | 157 | try: 158 | target = load_func(target_text) 159 | except Exception as e: 160 | raise UsageError('could not load target data, got: %s: %s' 161 | % (e.__class__.__name__, e)) 162 | 163 | 164 | return target 165 | 166 | 167 | @face_middleware(provides=['spec', 'target']) 168 | def mw_get_target(next_, posargs_, target_file, target_format, spec_file, spec_format): 169 | spec_text, target_text = None, None 170 | if len(posargs_) == 2: 171 | spec_text, target_text = posargs_ 172 | elif len(posargs_) == 1: 173 | spec_text, target_text = posargs_[0], None 174 | 175 | if spec_text and spec_file: 176 | raise UsageError('expected spec file or spec argument, not both') 177 | elif spec_file: 178 | try: 179 | with open(spec_file) as f: 180 | spec_text = f.read() 181 | except OSError as ose: 182 | raise UsageError(f'could not read spec file {spec_file!r}, got: {ose}') 183 | 184 | if not spec_text: 185 | spec = Path() 186 | elif spec_format == 'python': 187 | if spec_text[0] not in ('"', "'", "[", "{", "("): 188 | # intention: handle trivial path access, assume string 189 | spec_text = repr(spec_text) 190 | spec = ast.literal_eval(spec_text) 191 | elif spec_format == 'json': 192 | spec = json.loads(spec_text) 193 | elif spec_format == 'python-full': 194 | spec = _eval_python_full_spec(spec_text) 195 | else: 196 | raise UsageError('expected spec-format to be one of json, python, or python-full') 197 | 198 | if target_text and target_file: 199 | raise UsageError('expected target file or target argument, not both') 200 | elif target_text == '-' or target_file == '-': 201 | target_text = sys.stdin.read() 202 | elif target_file: 203 | try: 204 | target_text = open(target_file).read() 205 | except OSError as ose: 206 | raise UsageError(f'could not read target file {target_file!r}, got: {ose}') 207 | elif not target_text and not isatty(sys.stdin): 208 | target_text = sys.stdin.read() 209 | 210 | target = mw_handle_target(target_text, target_format) 211 | 212 | return next_(spec=spec, target=target) 213 | 214 | 215 | def _from_glom_import_star(): 216 | ret = dict(glom.__dict__) 217 | for k in ('__builtins__', '__name__', '__doc__', '__package__'): 218 | ret.pop(k, None) 219 | for k, v in list(ret.items()): 220 | if type(v) == type(glom): 221 | ret.pop(k) 222 | return ret 223 | 224 | 225 | def _eval_python_full_spec(py_text): 226 | name = '__cli_glom_spec__' 227 | code_str = f'{name} = {py_text}' 228 | env = _from_glom_import_star() 229 | spec = _compile_code(code_str, name=name, env=env) 230 | return spec 231 | 232 | 233 | def _compile_code(code_str, name, env=None, verbose=False): 234 | code = compile(code_str, '', 'single') 235 | if verbose: 236 | print(code_str) 237 | if env is None: 238 | env = {} 239 | exec(code, env) 240 | 241 | return env[name] 242 | -------------------------------------------------------------------------------- /glom/grouping.py: -------------------------------------------------------------------------------- 1 | """ 2 | Group mode 3 | """ 4 | 5 | import random 6 | 7 | from boltons.typeutils import make_sentinel 8 | 9 | from .core import glom, MODE, SKIP, STOP, TargetRegistry, Path, T, BadSpec, _MISSING 10 | 11 | 12 | ACC_TREE = make_sentinel('ACC_TREE') 13 | ACC_TREE.__doc__ = """ 14 | tree of accumulators for aggregation; 15 | structure roughly corresponds to the result, 16 | but is not 1:1; instead the main purpose is to ensure 17 | data is kept until the Group() finishes executing 18 | """ 19 | 20 | CUR_AGG = make_sentinel('CUR_AGG') 21 | CUR_AGG.__doc__ = """ 22 | the spec which is currently performing aggregation -- 23 | useful for specs that want to work in either "aggregate" 24 | mode, or "spec" mode depending on if they are in Group mode 25 | or not; this sentinel in the Scope allows a spec to decide 26 | if it is "closest" to the Group and so should behave 27 | like an aggregate, or if it is further away and so should 28 | have normal spec behavior. 29 | """ 30 | 31 | 32 | def target_iter(target, scope): 33 | iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path]) 34 | 35 | try: 36 | iterator = iterate(target) 37 | except Exception as e: 38 | raise TypeError('failed to iterate on instance of type %r at %r (got %r)' 39 | % (target.__class__.__name__, Path(*scope[Path]), e)) 40 | return iterator 41 | 42 | 43 | class Group: 44 | """supports nesting grouping operations -- 45 | think of a glom-style recursive boltons.iterutils.bucketize 46 | 47 | the "branches" of a Group spec are dicts; 48 | the leaves are lists, or an Aggregation object 49 | an Aggregation object is any object that defines the 50 | method agg(target, accumulator) 51 | 52 | For example, here we get a map of even and odd counts:: 53 | 54 | >>> glom(range(10), Group({T % 2: T})) 55 | {0: 8, 1: 9} 56 | 57 | And here we create a `"bucketized" 58 | `_ 59 | map of even and odd numbers:: 60 | 61 | >>> glom(range(10), Group({T % 2: [T]})) 62 | {0: [0, 2, 4, 6, 8], 1: [1, 3, 5, 7, 9]} 63 | 64 | target is the current target, accumulator is a dict 65 | maintained by Group mode 66 | 67 | unlike Iter(), Group() converts an iterable target 68 | into a single result; Iter() converts an iterable 69 | target into an iterable result 70 | 71 | """ 72 | def __init__(self, spec): 73 | self.spec = spec 74 | 75 | def glomit(self, target, scope): 76 | scope[MODE] = GROUP 77 | scope[CUR_AGG] = None # reset aggregation tripwire for sub-specs 78 | scope[ACC_TREE] = {} 79 | 80 | # handle the basecase where the spec stops immediately 81 | # TODO: something smarter 82 | if type(self.spec) in (dict, list): 83 | ret = type(self.spec)() 84 | else: 85 | ret = None 86 | 87 | for t in target_iter(target, scope): 88 | last, ret = ret, scope[glom](t, self.spec, scope) 89 | if ret is STOP: 90 | return last 91 | return ret 92 | 93 | def __repr__(self): 94 | cn = self.__class__.__name__ 95 | return f'{cn}({self.spec!r})' 96 | 97 | 98 | def GROUP(target, spec, scope): 99 | """ 100 | Group mode dispatcher; also sentinel for current mode = group 101 | """ 102 | recurse = lambda spec: scope[glom](target, spec, scope) 103 | tree = scope[ACC_TREE] # current accumulator support structure 104 | if callable(getattr(spec, "agg", None)): 105 | return spec.agg(target, tree) 106 | elif callable(spec): 107 | return spec(target) 108 | _spec_type = type(spec) 109 | if _spec_type not in (dict, list): 110 | raise BadSpec("Group mode expected dict, list, callable, or" 111 | " aggregator, not: %r" % (spec,)) 112 | _spec_id = id(spec) 113 | try: 114 | acc = tree[_spec_id] # current accumulator 115 | except KeyError: 116 | acc = tree[_spec_id] = _spec_type() 117 | if _spec_type is dict: 118 | done = True 119 | for keyspec, valspec in spec.items(): 120 | if tree.get(keyspec, None) is STOP: 121 | continue 122 | key = recurse(keyspec) 123 | if key is SKIP: 124 | done = False # SKIP means we still want more vals 125 | continue 126 | if key is STOP: 127 | tree[keyspec] = STOP 128 | continue 129 | if key not in acc: 130 | # TODO: guard against key == id(spec) 131 | tree[key] = {} 132 | scope[ACC_TREE] = tree[key] 133 | result = recurse(valspec) 134 | if result is STOP: 135 | tree[keyspec] = STOP 136 | continue 137 | done = False # SKIP or returning a value means we still want more vals 138 | if result is not SKIP: 139 | acc[key] = result 140 | if done: 141 | return STOP 142 | return acc 143 | elif _spec_type is list: 144 | for valspec in spec: 145 | if type(valspec) is dict: 146 | # doesn't make sense due to arity mismatch. did you mean [Auto({...})] ? 147 | raise BadSpec('dicts within lists are not' 148 | ' allowed while in Group mode: %r' % spec) 149 | result = recurse(valspec) 150 | if result is STOP: 151 | return STOP 152 | if result is not SKIP: 153 | acc.append(result) 154 | return acc 155 | raise ValueError(f"{_spec_type} not a valid spec type for Group mode") # pragma: no cover 156 | 157 | 158 | class First: 159 | """ 160 | holds onto the first value 161 | 162 | >>> glom([1, 2, 3], Group(First())) 163 | 1 164 | """ 165 | __slots__ = () 166 | 167 | def agg(self, target, tree): 168 | if self not in tree: 169 | tree[self] = STOP 170 | return target 171 | return STOP 172 | 173 | def __repr__(self): 174 | return '%s()' % self.__class__.__name__ 175 | 176 | 177 | class Avg: 178 | """ 179 | takes the numerical average of all values; 180 | raises exception on non-numeric value 181 | 182 | >>> glom([1, 2, 3], Group(Avg())) 183 | 2.0 184 | """ 185 | __slots__ = () 186 | 187 | def agg(self, target, tree): 188 | try: 189 | avg_acc = tree[self] 190 | except KeyError: 191 | # format is [sum, count] 192 | avg_acc = tree[self] = [0.0, 0] 193 | avg_acc[0] += target 194 | avg_acc[1] += 1 195 | return avg_acc[0] / avg_acc[1] 196 | 197 | def __repr__(self): 198 | return '%s()' % self.__class__.__name__ 199 | 200 | 201 | class Max: 202 | """ 203 | takes the maximum of all values; 204 | raises exception on values that are not comparable 205 | 206 | >>> glom([1, 2, 3], Group(Max())) 207 | 3 208 | """ 209 | __slots__ = () 210 | 211 | def agg(self, target, tree): 212 | if self not in tree or target > tree[self]: 213 | tree[self] = target 214 | return tree[self] 215 | 216 | def __repr__(self): 217 | return '%s()' % self.__class__.__name__ 218 | 219 | 220 | class Min: 221 | """ 222 | takes the minimum of all values; 223 | raises exception on values that are not comparable 224 | 225 | >>> glom([1, 2, 3], Group(Min())) 226 | 1 227 | """ 228 | __slots__ = () 229 | 230 | def agg(self, target, tree): 231 | if self not in tree or target < tree[self]: 232 | tree[self] = target 233 | return tree[self] 234 | 235 | def __repr__(self): 236 | return '%s()' % self.__class__.__name__ 237 | 238 | 239 | class Sample: 240 | """takes a random sample of the values 241 | 242 | >>> glom([1, 2, 3], Group(Sample(2))) # doctest: +SKIP 243 | [1, 3] 244 | >>> glom(range(5000), Group(Sample(2))) # doctest: +SKIP 245 | [272, 2901] 246 | 247 | The advantage of this over :func:`random.sample` is that this can 248 | take an arbitrarily-sized, potentially-very-long streaming input 249 | and returns a fixed-size output. Note that this does not stream 250 | results out, so your streaming input must have finite length. 251 | """ 252 | __slots__ = ('size',) 253 | 254 | def __init__(self, size): 255 | self.size = size 256 | 257 | def agg(self, target, tree): 258 | # simple reservoir sampling scheme 259 | # https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm 260 | if self not in tree: 261 | tree[self] = [0, []] 262 | num_seen, sample = tree[self] 263 | if len(sample) < self.size: 264 | sample.append(target) 265 | else: 266 | pos = random.randint(0, num_seen) 267 | if pos < self.size: 268 | sample[pos] = target 269 | tree[self][0] += 1 270 | return sample 271 | 272 | def __repr__(self): 273 | return f'{self.__class__.__name__}({self.size!r})' 274 | 275 | 276 | 277 | class Limit: 278 | """ 279 | Limits the number of values passed to sub-accumulator 280 | 281 | >>> glom([1, 2, 3], Group(Limit(2))) 282 | [1, 2] 283 | 284 | To override the default untransformed list output, set the subspec kwarg: 285 | 286 | >>> glom(range(10), Group(Limit(3, subspec={(lambda x: x % 2): [T]}))) 287 | {0: [0, 2], 1: [1]} 288 | 289 | You can even nest Limits in other ``Group`` specs: 290 | 291 | >>> glom(range(10), Group(Limit(5, {(lambda x: x % 2): Limit(2)}))) 292 | {0: [0, 2], 1: [1, 3]} 293 | 294 | """ 295 | __slots__ = ('n', 'subspec') 296 | 297 | def __init__(self, n, subspec=_MISSING): 298 | if subspec is _MISSING: 299 | subspec = [T] 300 | self.n = n 301 | self.subspec = subspec 302 | 303 | def glomit(self, target, scope): 304 | if scope[MODE] is not GROUP: 305 | raise BadSpec("Limit() only valid in Group mode") 306 | tree = scope[ACC_TREE] # current accumulator support structure 307 | if self not in tree: 308 | tree[self] = [0, {}] 309 | scope[ACC_TREE] = tree[self][1] 310 | tree[self][0] += 1 311 | if tree[self][0] > self.n: 312 | return STOP 313 | return scope[glom](target, self.subspec, scope) 314 | 315 | def __repr__(self): 316 | return f'{self.__class__.__name__}({self.n!r}, {self.subspec!r})' 317 | -------------------------------------------------------------------------------- /glom/mutation.py: -------------------------------------------------------------------------------- 1 | """By default, glom aims to safely return a transformed copy of your 2 | data. But sometimes you really need to transform an existing object. 3 | 4 | When you already have a large or complex bit of nested data that you 5 | are sure you want to modify in-place, glom has you covered, with the 6 | :func:`~glom.assign` function, and the :func:`~glom.Assign` specifier 7 | type. 8 | 9 | .. warning:: 10 | 11 | glom's deep assignment is powerful, and incorrect use can result in 12 | unintended assignments to global state, including class and module 13 | attributes, as well as function defaults. 14 | 15 | Be careful when writing assignment specs, and especially careful when 16 | any part of the spec is data-driven or provided by an end user. 17 | 18 | """ 19 | import operator 20 | from pprint import pprint 21 | 22 | from .core import Path, T, S, Spec, glom, UnregisteredTarget, GlomError, PathAccessError, UP 23 | from .core import TType, register_op, TargetRegistry, bbrepr, PathAssignError, arg_val, _assign_op 24 | 25 | 26 | try: 27 | basestring 28 | except NameError: 29 | basestring = str 30 | 31 | 32 | if getattr(__builtins__, '__dict__', None) is not None: 33 | # pypy's __builtins__ is a module, as is CPython's REPL, but at 34 | # normal execution time it's a dict? 35 | __builtins__ = __builtins__.__dict__ 36 | 37 | 38 | class PathDeleteError(PathAssignError): 39 | """This :exc:`GlomError` subtype is raised when an assignment fails, 40 | stemming from an :func:`~glom.delete` call or other 41 | :class:`~glom.Delete` usage. 42 | 43 | One example would be deleting an out-of-range position in a list:: 44 | 45 | >>> delete(["short", "list"], Path(5)) 46 | Traceback (most recent call last): 47 | ... 48 | PathDeleteError: could not delete 5 on object at Path(), got error: IndexError(... 49 | 50 | Other assignment failures could be due to deleting a read-only 51 | ``@property`` or exception being raised inside a ``__delattr__()``. 52 | 53 | """ 54 | def get_message(self): 55 | return ('could not delete %r on object at %r, got error: %r' 56 | % (self.dest_name, self.path, self.exc)) 57 | 58 | 59 | def _apply_for_each(func, path, val): 60 | layers = path.path_t.__stars__() 61 | if layers: 62 | for i in range(layers - 1): 63 | val = sum(val, []) # flatten out the extra layers 64 | for inner in val: 65 | func(inner) 66 | else: 67 | func(val) 68 | 69 | 70 | class Assign: 71 | """*New in glom 18.3.0* 72 | 73 | The ``Assign`` specifier type enables glom to modify the target, 74 | performing a "deep-set" to mirror glom's original deep-get use 75 | case. 76 | 77 | ``Assign`` can be used to perform spot modifications of large data 78 | structures when making a copy is not desired:: 79 | 80 | # deep assignment into a nested dictionary 81 | >>> target = {'a': {}} 82 | >>> spec = Assign('a.b', 'value') 83 | >>> _ = glom(target, spec) 84 | >>> pprint(target) 85 | {'a': {'b': 'value'}} 86 | 87 | The value to be assigned can also be a :class:`~glom.Spec`, which 88 | is useful for copying values around within the data structure:: 89 | 90 | # copying one nested value to another 91 | >>> _ = glom(target, Assign('a.c', Spec('a.b'))) 92 | >>> pprint(target) 93 | {'a': {'b': 'value', 'c': 'value'}} 94 | 95 | Another handy use of Assign is to deep-apply a function:: 96 | 97 | # sort a deep nested list 98 | >>> target={'a':{'b':[3,1,2]}} 99 | >>> _ = glom(target, Assign('a.b', Spec(('a.b',sorted)))) 100 | >>> pprint(target) 101 | {'a': {'b': [1, 2, 3]}} 102 | 103 | Like many other specifier types, ``Assign``'s destination path can be 104 | a :data:`~glom.T` expression, for maximum control:: 105 | 106 | # changing the error message of an exception in an error list 107 | >>> err = ValueError('initial message') 108 | >>> target = {'errors': [err]} 109 | >>> _ = glom(target, Assign(T['errors'][0].args, ('new message',))) 110 | >>> str(err) 111 | 'new message' 112 | 113 | ``Assign`` has built-in support for assigning to attributes of 114 | objects, keys of mappings (like dicts), and indexes of sequences 115 | (like lists). Additional types can be registered through 116 | :func:`~glom.register()` using the ``"assign"`` operation name. 117 | 118 | Attempting to assign to an immutable structure, like a 119 | :class:`tuple`, will result in a 120 | :class:`~glom.PathAssignError`. Attempting to assign to a path 121 | that doesn't exist will raise a :class:`~PathAccessError`. 122 | 123 | To automatically backfill missing structures, you can pass a 124 | callable to the *missing* argument. This callable will be called 125 | for each path segment along the assignment which is not 126 | present. 127 | 128 | >>> target = {} 129 | >>> assign(target, 'a.b.c', 'hi', missing=dict) 130 | {'a': {'b': {'c': 'hi'}}} 131 | 132 | """ 133 | def __init__(self, path, val, missing=None): 134 | # TODO: an option like require_preexisting or something to 135 | # ensure that a value is mutated, not just added. Current 136 | # workaround is to do a Check(). 137 | if isinstance(path, basestring): 138 | path = Path.from_text(path) 139 | elif type(path) is TType: 140 | path = Path(path) 141 | elif not isinstance(path, Path): 142 | raise TypeError('path argument must be a .-delimited string, Path, T, or S') 143 | 144 | try: 145 | self.op, self.arg = path.items()[-1] 146 | except IndexError: 147 | raise ValueError('path must have at least one element') 148 | self._orig_path = path 149 | self.path = path[:-1] 150 | 151 | if self.op not in '[.P': 152 | # maybe if we add null-coalescing this should do something? 153 | raise ValueError('last part of path must be setattr or setitem') 154 | self.val = val 155 | 156 | if missing is not None: 157 | if not callable(missing): 158 | raise TypeError(f'expected missing to be callable, not {missing!r}') 159 | self.missing = missing 160 | 161 | def glomit(self, target, scope): 162 | val = arg_val(target, self.val, scope) 163 | 164 | op, arg, path = self.op, self.arg, self.path 165 | if self.path.startswith(S): 166 | dest_target = scope[UP] 167 | dest_path = self.path.from_t() 168 | else: 169 | dest_target = target 170 | dest_path = self.path 171 | try: 172 | dest = scope[glom](dest_target, dest_path, scope) 173 | except PathAccessError as pae: 174 | if not self.missing: 175 | raise 176 | 177 | remaining_path = self._orig_path[pae.part_idx + 1:] 178 | val = scope[glom](self.missing(), Assign(remaining_path, val, missing=self.missing), scope) 179 | 180 | op, arg = self._orig_path.items()[pae.part_idx] 181 | path = self._orig_path[:pae.part_idx] 182 | dest = scope[glom](dest_target, path, scope) 183 | 184 | # TODO: forward-detect immutable dest? 185 | _apply = lambda dest: _assign_op( 186 | dest=dest, op=op, arg=arg, val=val, path=path, scope=scope) 187 | _apply_for_each(_apply, path, dest) 188 | 189 | return target 190 | 191 | def __repr__(self): 192 | cn = self.__class__.__name__ 193 | if self.missing is None: 194 | return f'{cn}({self._orig_path!r}, {self.val!r})' 195 | return f'{cn}({self._orig_path!r}, {self.val!r}, missing={bbrepr(self.missing)})' 196 | 197 | 198 | def assign(obj, path, val, missing=None): 199 | """*New in glom 18.3.0* 200 | 201 | The ``assign()`` function provides convenient "deep set" 202 | functionality, modifying nested data structures in-place:: 203 | 204 | >>> target = {'a': [{'b': 'c'}, {'d': None}]} 205 | >>> _ = assign(target, 'a.1.d', 'e') # let's give 'd' a value of 'e' 206 | >>> pprint(target) 207 | {'a': [{'b': 'c'}, {'d': 'e'}]} 208 | 209 | Missing structures can also be automatically created with the 210 | *missing* parameter. For more information and examples, see the 211 | :class:`~glom.Assign` specifier type, which this function wraps. 212 | """ 213 | return glom(obj, Assign(path, val, missing=missing)) 214 | 215 | 216 | _ALL_BUILTIN_TYPES = [v for v in __builtins__.values() if isinstance(v, type)] 217 | _BUILTIN_BASE_TYPES = [v for v in _ALL_BUILTIN_TYPES 218 | if not issubclass(v, tuple([t for t in _ALL_BUILTIN_TYPES 219 | if t not in (v, type, object)]))] 220 | _UNASSIGNABLE_BASE_TYPES = tuple(set(_BUILTIN_BASE_TYPES) 221 | - {dict, list, BaseException, object, type}) 222 | 223 | 224 | def _set_sequence_item(target, idx, val): 225 | target[int(idx)] = val 226 | 227 | 228 | def _assign_autodiscover(type_obj): 229 | # TODO: issubclass or "in"? 230 | if issubclass(type_obj, _UNASSIGNABLE_BASE_TYPES): 231 | return False 232 | 233 | if callable(getattr(type_obj, '__setitem__', None)): 234 | if callable(getattr(type_obj, 'index', None)): 235 | return _set_sequence_item 236 | return operator.setitem 237 | 238 | return setattr 239 | 240 | 241 | register_op('assign', auto_func=_assign_autodiscover, exact=False) 242 | 243 | 244 | class Delete: 245 | """ 246 | In addition to glom's core "deep-get" and ``Assign``'s "deep-set", 247 | the ``Delete`` specifier type performs a "deep-del", which can 248 | remove items from larger data structures by key, attribute, and 249 | index. 250 | 251 | >>> target = {'dict': {'x': [5, 6, 7]}} 252 | >>> glom(target, Delete('dict.x.1')) 253 | {'dict': {'x': [5, 7]}} 254 | >>> glom(target, Delete('dict.x')) 255 | {'dict': {}} 256 | 257 | If a target path is missing, a :exc:`PathDeleteError` will be 258 | raised. To ignore missing targets, use the ``ignore_missing`` 259 | flag: 260 | 261 | >>> glom(target, Delete('does_not_exist', ignore_missing=True)) 262 | {'dict': {}} 263 | 264 | ``Delete`` has built-in support for deleting attributes of 265 | objects, keys of dicts, and indexes of sequences 266 | (like lists). Additional types can be registered through 267 | :func:`~glom.register()` using the ``"delete"`` operation name. 268 | 269 | .. versionadded:: 20.5.0 270 | """ 271 | def __init__(self, path, ignore_missing=False): 272 | if isinstance(path, basestring): 273 | path = Path.from_text(path) 274 | elif type(path) is TType: 275 | path = Path(path) 276 | elif not isinstance(path, Path): 277 | raise TypeError('path argument must be a .-delimited string, Path, T, or S') 278 | 279 | try: 280 | self.op, self.arg = path.items()[-1] 281 | except IndexError: 282 | raise ValueError('path must have at least one element') 283 | self._orig_path = path 284 | self.path = path[:-1] 285 | 286 | if self.op not in '[.P': 287 | raise ValueError('last part of path must be an attribute or index') 288 | 289 | self.ignore_missing = ignore_missing 290 | 291 | def _del_one(self, dest, op, arg, scope): 292 | if op == '[': 293 | try: 294 | del dest[arg] 295 | except IndexError as e: 296 | if not self.ignore_missing: 297 | raise PathDeleteError(e, self.path, arg) 298 | elif op == '.': 299 | try: 300 | delattr(dest, arg) 301 | except AttributeError as e: 302 | if not self.ignore_missing: 303 | raise PathDeleteError(e, self.path, arg) 304 | elif op == 'P': 305 | _delete = scope[TargetRegistry].get_handler('delete', dest) 306 | try: 307 | _delete(dest, arg) 308 | except Exception as e: 309 | if not self.ignore_missing: 310 | raise PathDeleteError(e, self.path, arg) 311 | 312 | def glomit(self, target, scope): 313 | op, arg, path = self.op, self.arg, self.path 314 | if self.path.startswith(S): 315 | dest_target = scope[UP] 316 | dest_path = self.path.from_t() 317 | else: 318 | dest_target = target 319 | dest_path = self.path 320 | try: 321 | dest = scope[glom](dest_target, dest_path, scope) 322 | except PathAccessError as pae: 323 | if not self.ignore_missing: 324 | raise 325 | else: 326 | _apply_for_each(lambda dest: self._del_one(dest, op, arg, scope), path, dest) 327 | 328 | return target 329 | 330 | def __repr__(self): 331 | cn = self.__class__.__name__ 332 | return f'{cn}({self._orig_path!r})' 333 | 334 | 335 | def delete(obj, path, ignore_missing=False): 336 | """ 337 | The ``delete()`` function provides "deep del" functionality, 338 | modifying nested data structures in-place:: 339 | 340 | >>> target = {'a': [{'b': 'c'}, {'d': None}]} 341 | >>> delete(target, 'a.0.b') 342 | {'a': [{}, {'d': None}]} 343 | 344 | Attempting to delete missing keys, attributes, and indexes will 345 | raise a :exc:`PathDeleteError`. To ignore these errors, use the 346 | *ignore_missing* argument:: 347 | 348 | >>> delete(target, 'does_not_exist', ignore_missing=True) 349 | {'a': [{}, {'d': None}]} 350 | 351 | For more information and examples, see the :class:`~glom.Delete` 352 | specifier type, which this convenience function wraps. 353 | 354 | .. versionadded:: 20.5.0 355 | """ 356 | return glom(obj, Delete(path, ignore_missing=ignore_missing)) 357 | 358 | 359 | def _del_sequence_item(target, idx): 360 | del target[int(idx)] 361 | 362 | 363 | def _delete_autodiscover(type_obj): 364 | if issubclass(type_obj, _UNASSIGNABLE_BASE_TYPES): 365 | return False 366 | 367 | if callable(getattr(type_obj, '__delitem__', None)): 368 | if callable(getattr(type_obj, 'index', None)): 369 | return _del_sequence_item 370 | return operator.delitem 371 | return delattr 372 | 373 | 374 | register_op('delete', auto_func=_delete_autodiscover, exact=False) 375 | -------------------------------------------------------------------------------- /glom/reduction.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import itertools 3 | from pprint import pprint 4 | 5 | from boltons.typeutils import make_sentinel 6 | 7 | from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE 8 | from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG 9 | 10 | _MISSING = make_sentinel('_MISSING') 11 | 12 | 13 | try: 14 | basestring 15 | except NameError: 16 | basestring = str 17 | 18 | 19 | class FoldError(GlomError): 20 | """Error raised when Fold() is called on non-iterable 21 | targets, and possibly other uses in the future.""" 22 | pass 23 | 24 | 25 | class Fold: 26 | """The `Fold` specifier type is glom's building block for reducing 27 | iterables in data, implementing the classic `fold 28 | `_ 29 | from functional programming, similar to Python's built-in 30 | :func:`reduce`. 31 | 32 | Args: 33 | subspec: A spec representing the target to fold, which must be 34 | an iterable, or otherwise registered to 'iterate' (with 35 | :func:`~glom.register`). 36 | init (callable): A function or type which will be invoked to 37 | initialize the accumulator value. 38 | op (callable): A function to call on the accumulator value and 39 | every value, the result of which will become the new 40 | accumulator value. Defaults to :func:`operator.iadd`. 41 | 42 | Usage is as follows: 43 | 44 | >>> target = [set([1, 2]), set([3]), set([2, 4])] 45 | >>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union)) 46 | >>> result == frozenset([1, 2, 3, 4]) 47 | True 48 | 49 | Note the required ``spec`` and ``init`` arguments. ``op`` is 50 | optional, but here must be used because the :class:`set` and 51 | :class:`frozenset` types do not work with addition. 52 | 53 | While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and 54 | :class:`~glom.Sum` are subtypes with more convenient defaults for 55 | day-to-day use. 56 | """ 57 | def __init__(self, subspec, init, op=operator.iadd): 58 | self.subspec = subspec 59 | self.init = init 60 | self.op = op 61 | if not callable(op): 62 | raise TypeError('expected callable for %s op param, not: %r' % 63 | (self.__class__.__name__, op)) 64 | if not callable(init): 65 | raise TypeError('expected callable for %s init param, not: %r' % 66 | (self.__class__.__name__, init)) 67 | 68 | def glomit(self, target, scope): 69 | is_agg = False 70 | if scope[MODE] is GROUP and scope.get(CUR_AGG) is None: 71 | scope[CUR_AGG] = self 72 | is_agg = True 73 | 74 | if self.subspec is not T: 75 | target = scope[glom](target, self.subspec, scope) 76 | 77 | if is_agg: 78 | return self._agg(target, scope[ACC_TREE]) 79 | try: 80 | return self._fold(target_iter(target, scope)) 81 | except UnregisteredTarget as ut: 82 | raise FoldError('can only %s on iterable targets, not %s type (%s)' 83 | % (self.__class__.__name__, type(target).__name__, ut)) 84 | 85 | def _fold(self, iterator): 86 | ret, op = self.init(), self.op 87 | 88 | for v in iterator: 89 | ret = op(ret, v) 90 | 91 | return ret 92 | 93 | def _agg(self, target, tree): 94 | if self not in tree: 95 | tree[self] = self.init() 96 | tree[self] = self.op(tree[self], target) 97 | return tree[self] 98 | 99 | def __repr__(self): 100 | cn = self.__class__.__name__ 101 | kwargs = {'init': self.init} 102 | if self.op is not operator.iadd: 103 | kwargs['op'] = self.op 104 | return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr) 105 | 106 | 107 | class Sum(Fold): 108 | """The `Sum` specifier type is used to aggregate integers and other 109 | numericals using addition, much like the :func:`sum()` builtin. 110 | 111 | >>> glom(range(5), Sum()) 112 | 10 113 | 114 | Note that this specifier takes a callable *init* parameter like 115 | its friends, so to change the start value, be sure to wrap it in a 116 | callable:: 117 | 118 | >>> glom(range(5), Sum(init=lambda: 5.0)) 119 | 15.0 120 | 121 | To "sum" lists and other iterables, see the :class:`Flatten` 122 | spec. For other objects, see the :class:`Fold` specifier type. 123 | 124 | """ 125 | def __init__(self, subspec=T, init=int): 126 | super().__init__(subspec=subspec, init=init, op=operator.iadd) 127 | 128 | def __repr__(self): 129 | cn = self.__class__.__name__ 130 | args = () if self.subspec is T else (self.subspec,) 131 | kwargs = {'init': self.init} if self.init is not int else {} 132 | return format_invocation(cn, args, kwargs, repr=bbrepr) 133 | 134 | 135 | class Count(Fold): 136 | """ 137 | takes a count of how many values occurred 138 | 139 | >>> glom([1, 2, 3], Count()) 140 | 3 141 | """ 142 | __slots__ = () 143 | 144 | def __init__(self): 145 | super().__init__( 146 | subspec=T, init=int, op=lambda cur, val: cur + 1) 147 | 148 | def __repr__(self): 149 | return '%s()' % self.__class__.__name__ 150 | 151 | 152 | class Flatten(Fold): 153 | """The `Flatten` specifier type is used to combine iterables. By 154 | default it flattens an iterable of iterables into a single list 155 | containing items from all iterables. 156 | 157 | >>> target = [[1], [2, 3]] 158 | >>> glom(target, Flatten()) 159 | [1, 2, 3] 160 | 161 | You can also set *init* to ``"lazy"``, which returns a generator 162 | instead of a list. Use this to avoid making extra lists and other 163 | collections during intermediate processing steps. 164 | """ 165 | def __init__(self, subspec=T, init=list): 166 | if init == 'lazy': 167 | self.lazy = True 168 | init = list 169 | else: 170 | self.lazy = False 171 | super().__init__(subspec=subspec, init=init, op=operator.iadd) 172 | 173 | def _fold(self, iterator): 174 | if self.lazy: 175 | return itertools.chain.from_iterable(iterator) 176 | return super()._fold(iterator) 177 | 178 | def __repr__(self): 179 | cn = self.__class__.__name__ 180 | args = () if self.subspec is T else (self.subspec,) 181 | kwargs = {} 182 | if self.lazy: 183 | kwargs['init'] = 'lazy' 184 | elif self.init is not list: 185 | kwargs['init'] = self.init 186 | return format_invocation(cn, args, kwargs, repr=bbrepr) 187 | 188 | 189 | def flatten(target, **kwargs): 190 | """At its most basic, ``flatten()`` turns an iterable of iterables 191 | into a single list. But it has a few arguments which give it more 192 | power: 193 | 194 | Args: 195 | 196 | init (callable): A function or type which gives the initial 197 | value of the return. The value must support addition. Common 198 | values might be :class:`list` (the default), :class:`tuple`, 199 | or even :class:`int`. You can also pass ``init="lazy"`` to 200 | get a generator. 201 | levels (int): A positive integer representing the number of 202 | nested levels to flatten. Defaults to 1. 203 | spec: The glomspec to fetch before flattening. This defaults to the 204 | the root level of the object. 205 | 206 | Usage is straightforward. 207 | 208 | >>> target = [[1, 2], [3], [4]] 209 | >>> flatten(target) 210 | [1, 2, 3, 4] 211 | 212 | Because integers themselves support addition, we actually have two 213 | levels of flattening possible, to get back a single integer sum: 214 | 215 | >>> flatten(target, init=int, levels=2) 216 | 10 217 | 218 | However, flattening a non-iterable like an integer will raise an 219 | exception: 220 | 221 | >>> target = 10 222 | >>> flatten(target) 223 | Traceback (most recent call last): 224 | ... 225 | FoldError: can only Flatten on iterable targets, not int type (...) 226 | 227 | By default, ``flatten()`` will add a mix of iterables together, 228 | making it a more-robust alternative to the built-in 229 | ``sum(list_of_lists, list())`` trick most experienced Python 230 | programmers are familiar with using: 231 | 232 | >>> list_of_iterables = [range(2), [2, 3], (4, 5)] 233 | >>> sum(list_of_iterables, []) 234 | Traceback (most recent call last): 235 | ... 236 | TypeError: can only concatenate list (not "tuple") to list 237 | 238 | Whereas flatten() handles this just fine: 239 | 240 | >>> flatten(list_of_iterables) 241 | [0, 1, 2, 3, 4, 5] 242 | 243 | The ``flatten()`` function is a convenient wrapper around the 244 | :class:`Flatten` specifier type. For embedding in larger specs, 245 | and more involved flattening, see :class:`Flatten` and its base, 246 | :class:`Fold`. 247 | 248 | """ 249 | subspec = kwargs.pop('spec', T) 250 | init = kwargs.pop('init', list) 251 | levels = kwargs.pop('levels', 1) 252 | if kwargs: 253 | raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys())) 254 | 255 | if levels == 0: 256 | return target 257 | if levels < 0: 258 | raise ValueError('expected levels >= 0, not %r' % levels) 259 | spec = (subspec,) 260 | spec += (Flatten(init="lazy"),) * (levels - 1) 261 | spec += (Flatten(init=init),) 262 | 263 | return glom(target, spec) 264 | 265 | 266 | class Merge(Fold): 267 | """By default, Merge turns an iterable of mappings into a single, 268 | merged :class:`dict`, leveraging the behavior of the 269 | :meth:`~dict.update` method. The start state can be customized 270 | with *init*, as well as the update operation, with *op*. 271 | 272 | Args: 273 | subspec: The location of the iterable of mappings. Defaults to ``T``. 274 | init (callable): A type or callable which returns a base 275 | instance into which all other values will be merged. 276 | op (callable): A callable, which takes two arguments, and 277 | performs a merge of the second into the first. Can also be 278 | the string name of a method to fetch on the instance created 279 | from *init*. Defaults to ``"update"``. 280 | 281 | .. note:: 282 | 283 | Besides the differing defaults, the primary difference between 284 | :class:`Merge` and other :class:`Fold` subtypes is that its 285 | *op* argument is assumed to be a two-argument function which 286 | has no return value and modifies the left parameter 287 | in-place. Because the initial state is a new object created with 288 | the *init* parameter, none of the target values are modified. 289 | 290 | """ 291 | def __init__(self, subspec=T, init=dict, op=None): 292 | if op is None: 293 | op = 'update' 294 | if isinstance(op, basestring): 295 | test_init = init() 296 | op = getattr(type(test_init), op, None) 297 | if not callable(op): 298 | raise ValueError('expected callable "op" arg or an "init" with an .update()' 299 | ' method not %r and %r' % (op, init)) 300 | super().__init__(subspec=subspec, init=init, op=op) 301 | 302 | def _fold(self, iterator): 303 | # the difference here is that ret is mutated in-place, the 304 | # variable not being reassigned, as in base Fold. 305 | ret, op = self.init(), self.op 306 | 307 | for v in iterator: 308 | op(ret, v) 309 | 310 | return ret 311 | 312 | 313 | def _agg(self, target, tree): 314 | if self not in tree: 315 | acc = tree[self] = self.init() 316 | else: 317 | acc = tree[self] 318 | self.op(acc, target) 319 | return acc 320 | 321 | 322 | def merge(target, **kwargs): 323 | """By default, ``merge()`` turns an iterable of mappings into a 324 | single, merged :class:`dict`, leveraging the behavior of the 325 | :meth:`~dict.update` method. A new mapping is created and none of 326 | the passed mappings are modified. 327 | 328 | >>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}] 329 | >>> res = merge(target) 330 | >>> pprint(res) 331 | {'a': 'A', 'b': 'B'} 332 | 333 | Args: 334 | target: The list of dicts, or some other iterable of mappings. 335 | 336 | The start state can be customized with the *init* keyword 337 | argument, as well as the update operation, with the *op* keyword 338 | argument. For more on those customizations, see the :class:`Merge` 339 | spec. 340 | 341 | """ 342 | subspec = kwargs.pop('spec', T) 343 | init = kwargs.pop('init', dict) 344 | op = kwargs.pop('op', None) 345 | if kwargs: 346 | raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys())) 347 | spec = Merge(subspec, init, op) 348 | return glom(target, spec) 349 | -------------------------------------------------------------------------------- /glom/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mahmoud/glom/c225e2abeb234be7119911b96b4378cc9d8d6478/glom/test/__init__.py -------------------------------------------------------------------------------- /glom/test/data/test_invalid.toml: -------------------------------------------------------------------------------- 1 | # invalid 2 | toml = { 3 | -------------------------------------------------------------------------------- /glom/test/data/test_invalid.yaml: -------------------------------------------------------------------------------- 1 | - Invalid 2 | Yaml: -------------------------------------------------------------------------------- /glom/test/data/test_valid.toml: -------------------------------------------------------------------------------- 1 | Hello = ["World"] 2 | -------------------------------------------------------------------------------- /glom/test/data/test_valid.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | Hello: 3 | - World 4 | -------------------------------------------------------------------------------- /glom/test/perf_report.py: -------------------------------------------------------------------------------- 1 | """ 2 | slow gloms that came up organically, used as performance metrics 3 | """ 4 | import time 5 | import gc 6 | 7 | import attr 8 | 9 | from glom import glom, T 10 | 11 | 12 | 13 | STR_SPEC = [{ 14 | 'id': ('id', str), 15 | 'name': 'short_name', 16 | 'external_id': 'external_id', 17 | 'created_date': 'created_date', 18 | }] 19 | 20 | 21 | T_SPEC = [{ 22 | 'id': (T.id, str), 23 | 'name': T.short_name, 24 | 'external_id': T.external_id, 25 | 'created_date': T.created_date, 26 | }] 27 | 28 | 29 | def func(data): 30 | return [{ 31 | 'id': str(t.id), 32 | 'name': t.short_name, 33 | 'external_id': t.external_id, 34 | 'created_date': t.created_date 35 | } for t in data] 36 | 37 | 38 | def setup_list_of_dict(num=100): 39 | """ 40 | a common use case is list-of-dicts object processing 41 | to prepare internal objects for JSON serialization 42 | """ 43 | Obj = attr.make_class( 44 | 'Obj', ['id', 'short_name', 'external_id', 'created_date']) 45 | 46 | data = [ 47 | Obj(i, 'name' + str(i), 'external' + str(i), 'now') for i in range(num)] 48 | 49 | return data 50 | 51 | 52 | def run(spec, data): 53 | start = time.time() 54 | glom(data, spec) 55 | end = time.time() 56 | print(f"{(end - start) / len(data) * 1e6} us per object") 57 | 58 | 59 | def ratio(spec, func, data): 60 | glom_dur = [] 61 | py_dur = [] 62 | for i in range(10): 63 | t1 = time.perf_counter_ns() 64 | glom(data, spec) 65 | t2 = time.perf_counter_ns() 66 | func(data) 67 | t3 = time.perf_counter_ns() 68 | glom_dur.append(t2 - t1) 69 | py_dur.append(t3 - t2) 70 | 71 | glom_avg = sum(sorted(glom_dur)[2:-2]) 72 | py_avg = sum(sorted(py_dur)[2:-2]) 73 | 74 | return 1.0 * glom_avg / py_avg 75 | 76 | 77 | if __name__ == "__main__": 78 | import cProfile 79 | data = setup_list_of_dict(100000) 80 | run(STR_SPEC, data) 81 | run(STR_SPEC, data) 82 | print(ratio(STR_SPEC, func, setup_list_of_dict(1000))) 83 | print(ratio(STR_SPEC, func, setup_list_of_dict(1000))) 84 | 85 | 86 | # suggest using scalene to profile with: 87 | # $ scalene glom/test/perf_report.py --profile-all --reduced-profile --cpu-only --outfile SCALENE-CPU.txt 88 | -------------------------------------------------------------------------------- /glom/test/test_check.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from glom import glom, Check, CheckError, Coalesce, SKIP, STOP, T 4 | 5 | try: 6 | unicode 7 | except NameError: 8 | unicode = str 9 | 10 | 11 | def test_check_basic(): 12 | assert glom([0, SKIP], [T]) == [0] # sanity check SKIP 13 | 14 | target = [{'id': 0}, {'id': 1}, {'id': 2}] 15 | 16 | # check that skipping non-passing values works 17 | assert glom(target, ([Coalesce(Check('id', equal_to=0), default=SKIP)], T[0])) == {'id': 0} 18 | assert glom(target, ([Check('id', equal_to=0, default=SKIP)], T[0])) == {'id': 0} 19 | 20 | # check that stopping iteration on non-passing values works 21 | assert glom(target, [Check('id', equal_to=0, default=STOP)]) == [{'id': 0}] 22 | 23 | # check that stopping chain execution on non-passing values works 24 | spec = (Check(validate=lambda x: len(x) > 0, default=STOP), T[0]) 25 | assert glom('hello', spec) == 'h' 26 | assert glom('', spec) == '' # would fail with IndexError if STOP didn't work 27 | 28 | assert repr(Check()) == 'Check()' 29 | assert repr(Check(T.a)) == 'Check(T.a)' 30 | assert repr(Check(equal_to=1)) == 'Check(equal_to=1)' 31 | assert repr(Check(instance_of=dict)) == 'Check(instance_of=dict)' 32 | assert repr(Check(T(len), validate=sum)) == 'Check(T(len), validate=sum)' 33 | 34 | target = [1, 'a'] 35 | assert glom(target, [Check(type=unicode, default=SKIP)]) == ['a'] 36 | assert glom(target, [Check(type=(unicode, int))]) == [1, 'a'] 37 | assert glom(target, [Check(instance_of=unicode, default=SKIP)]) == ['a'] 38 | assert glom(target, [Check(instance_of=(unicode, int))]) == [1, 'a'] 39 | 40 | target = ['1'] 41 | assert glom(target, [Check(validate=(int, float))]) 42 | assert glom(target, [Check()]) # bare check does a truthy check 43 | 44 | failing_checks = [({'a': {'b': 1}}, {'a': ('a', 'b', Check(type=str))}, 45 | '''target at path ['a', 'b'] failed check, got error: "expected type to be 'str', found type 'int'"'''), 46 | ({'a': {'b': 1}}, {'a': ('a', Check('b', type=str))}, 47 | '''target at path ['a'] failed check, subtarget at 'b' got error: "expected type to be 'str', found type 'int'"'''), 48 | (1, Check(type=(unicode, bool))), 49 | (1, Check(instance_of=unicode)), 50 | (1, Check(instance_of=(unicode, bool))), 51 | (1, Check(equal_to=0)), 52 | (1, Check(one_of=(0,))), 53 | (1, Check(one_of=(0, 2))), 54 | ('-3.14', Check(validate=int)), 55 | ('', Check(validate=lambda x: False)),] 56 | 57 | for fc in failing_checks: 58 | if len(fc) == 2: 59 | target, check = fc 60 | msg = None 61 | else: 62 | target, check, msg = fc 63 | 64 | with raises(CheckError) as exc_info: 65 | glom(target, check) 66 | 67 | if msg is not None: 68 | assert str(exc_info.value).find(msg) != -1 69 | assert repr(exc_info.value) 70 | 71 | 72 | def test_check_multi(): 73 | target = 1 74 | with raises(CheckError) as exc_info: 75 | glom(target, Check(instance_of=float, validate=lambda x: x > 3.14)) 76 | 77 | assert "2 errors" in str(exc_info.value) 78 | 79 | 80 | 81 | def test_check_signature(): 82 | with raises(ValueError): 83 | Check(instance_of=()) 84 | with raises(ValueError): 85 | Check(type=()) 86 | 87 | with raises(TypeError): 88 | Check(fake_kwarg=True) 89 | 90 | with raises(ValueError): 91 | Check(one_of=1) 92 | with raises(ValueError): 93 | Check(one_of=()) 94 | with raises(TypeError): 95 | Check(one_of=(1, 2), equal_to=3) 96 | 97 | with raises(ValueError): 98 | Check(validate='bad, not callable, value') 99 | -------------------------------------------------------------------------------- /glom/test/test_cli.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import pytest 5 | from face import CommandChecker, CommandLineError 6 | 7 | from glom import cli 8 | 9 | 10 | BASIC_TARGET = '{"a": {"b": "c"}}' 11 | BASIC_SPEC = '{"a": "a.b"}' 12 | BASIC_OUT = '{"a": "c"}\n' 13 | 14 | @pytest.fixture 15 | def cc(): 16 | cmd = cli.get_command() 17 | # TODO: don't mix stderr 18 | return CommandChecker(cmd, mix_stderr=True) 19 | 20 | @pytest.fixture 21 | def basic_spec_path(tmp_path): 22 | spec_path = str(tmp_path) + '/basic_spec.txt' 23 | with open(spec_path, 'w') as f: 24 | f.write(BASIC_SPEC) 25 | return spec_path 26 | 27 | @pytest.fixture 28 | def basic_target_path(tmp_path): 29 | target_path = str(tmp_path) + '/basic_target.txt' 30 | with open(target_path, 'w') as f: 31 | f.write(BASIC_TARGET) 32 | return target_path 33 | 34 | 35 | def test_cli_blank(cc): 36 | res = cc.run(['glom']) 37 | assert res.stdout == '{}\n' 38 | 39 | 40 | def test_cli_spec_target_argv_basic(cc): 41 | res = cc.run(['glom', '--indent', '0', BASIC_SPEC, BASIC_TARGET]) 42 | assert res.stdout == BASIC_OUT 43 | 44 | # json format, too 45 | res = cc.run(['glom', '--indent', '0', '--spec-format', 'json', BASIC_SPEC, BASIC_TARGET]) 46 | assert res.stdout == BASIC_OUT 47 | 48 | 49 | def test_cli_spec_argv_target_stdin_basic(cc): 50 | res = cc.run(['glom', '--indent', '0', BASIC_SPEC], 51 | input=BASIC_TARGET) 52 | assert res.stdout == BASIC_OUT 53 | 54 | res = cc.run(['glom', '--indent', '0', BASIC_SPEC, '-'], 55 | input=BASIC_TARGET) 56 | assert res.stdout == BASIC_OUT 57 | 58 | res = cc.run(['glom', '--indent', '0', '--target-file', '-', BASIC_SPEC], 59 | input=BASIC_TARGET) 60 | assert res.stdout == BASIC_OUT 61 | 62 | 63 | def test_cli_scalar(cc): 64 | res = cc.run(['glom', 'a.b.c', '{"a": {"b": {"c": "d"}}}']) 65 | assert res.stdout == '"d"\n' 66 | 67 | res = cc.run(['glom', '--scalar', 'a.b.c', '{"a": {"b": {"c": "d"}}}']) 68 | assert res.stdout == 'd' 69 | 70 | 71 | def test_cli_spec_target_files_basic(cc, basic_spec_path, basic_target_path): 72 | res = cc.run(['glom', '--indent', '0', '--target-file', 73 | basic_target_path, '--spec-file', basic_spec_path]) 74 | assert res.stdout == BASIC_OUT 75 | 76 | 77 | def test_usage_errors(cc, basic_spec_path, basic_target_path): 78 | # bad target json 79 | res = cc.fail_1(['glom', BASIC_SPEC, '{' + BASIC_TARGET]) 80 | assert 'could not load target data' in res.stdout # TODO: stderr 81 | 82 | # bad target yaml 83 | res = cc.fail_1(['glom', '--target-format', 'yaml', BASIC_SPEC, '{' + BASIC_TARGET]) 84 | assert 'could not load target data' in res.stdout # TODO: stderr 85 | 86 | # bad target toml 87 | res = cc.fail_1(['glom', '--target-format', 'toml', BASIC_SPEC, '{' + BASIC_TARGET]) 88 | assert 'could not load target data' in res.stdout # TODO: stderr 89 | 90 | # TODO: bad target python? 91 | 92 | # bad target format TODO: fail_2 93 | res = cc.fail_1(['glom', '--target-format', 'lol', BASIC_SPEC, BASIC_TARGET]) 94 | assert 'target-format to be one of' in res.stdout # TODO: stderr 95 | 96 | # bad spec format TODO: fail_2 97 | res = cc.fail_1(['glom', '--spec-format', 'lol', BASIC_SPEC, BASIC_TARGET]) 98 | assert 'spec-format to be one of' in res.stdout # TODO: stderr 99 | 100 | # test conflicting spec file and spec posarg 101 | res = cc.fail_1(['glom', '--spec-file', basic_spec_path, BASIC_SPEC, BASIC_TARGET]) 102 | assert 'spec' in res.stdout 103 | assert 'not both' in res.stdout # TODO: stderr 104 | 105 | # test conflicting target file and target posarg 106 | res = cc.fail_1(['glom', '--target-file', basic_target_path, BASIC_SPEC, BASIC_TARGET]) 107 | assert 'target' in res.stdout 108 | assert 'not both' in res.stdout # TODO: stderr 109 | 110 | 111 | # TODO: if spec-file is present, maybe single posarg should become target? 112 | res = cc.fail_1(['glom', '--spec-file', basic_spec_path + 'abra', '--target-file', basic_target_path]) 113 | assert 'could not read spec file' in res.stdout # TODO: stderr 114 | 115 | res = cc.fail_1(['glom', '--spec-file', basic_spec_path, '--target-file', basic_target_path + 'abra']) 116 | assert 'could not read target file' in res.stdout # TODO: stderr 117 | 118 | 119 | def test_main_basic(): 120 | argv = ['__', 'a.b.fail', '{"a": {"b": "c"}}'] 121 | assert cli.main(argv) == 1 122 | 123 | argv = ['__', 'a.b.c', '{"a": {"b": {"c": "d"}}}'] 124 | assert cli.main(argv) == 0 125 | 126 | 127 | def test_main_yaml_target(): 128 | cwd = os.path.dirname(os.path.abspath(__file__)) 129 | # Handles the filepath if running tox 130 | if '.tox' in cwd: 131 | cwd = os.path.join(cwd.split('.tox')[0] + '/glom/test/') 132 | path = os.path.join(cwd, 'data/test_valid.yaml') 133 | argv = ['__', '--target-file', path, '--target-format', 'yml', 'Hello'] 134 | assert cli.main(argv) == 0 135 | 136 | path = os.path.join(cwd, 'data/test_invalid.yaml') 137 | argv = ['__', '--target-file', path, '--target-format', 'yml', 'Hello'] 138 | # Makes sure correct improper yaml exception is raised 139 | with pytest.raises(CommandLineError) as excinfo: 140 | cli.main(argv) 141 | assert 'expected , but found' in str(excinfo.value) 142 | 143 | 144 | def test_main_toml_target(): 145 | cwd = os.path.dirname(os.path.abspath(__file__)) 146 | # Handles the filepath if running tox 147 | if '.tox' in cwd: 148 | cwd = os.path.join(cwd.split('.tox')[0] + '/glom/test/') 149 | path = os.path.join(cwd, 'data/test_valid.toml') 150 | argv = ['__', '--target-file', path, '--target-format', 'toml', 'Hello'] 151 | assert cli.main(argv) == 0 152 | 153 | path = os.path.join(cwd, 'data/test_invalid.toml') 154 | argv = ['__', '--target-file', path, '--target-format', 'toml', 'Hello'] 155 | # Makes sure correct improper toml exception is raised 156 | with pytest.raises(CommandLineError) as excinfo: 157 | cli.main(argv) 158 | assert 'Invalid initial character for a key part' in str(excinfo.value) 159 | 160 | 161 | def test_main_python_full_spec_python_target(): 162 | argv = ['__', '--target-format', 'python', '--spec-format', 'python-full', 'T[T[3].bit_length()]', '{1: 2, 2: 3, 3: 4}'] 163 | assert cli.main(argv) == 0 164 | 165 | argv = ['__', '--target-format', 'python', '--spec-format', 'python-full', '(T.values(), [T])', '{1: 2, 2: 3, 3: 4}'] 166 | assert cli.main(argv) == 0 167 | 168 | 169 | def test_main(tmp_path): 170 | # TODO: pytest-cov knows how to make coverage work across 171 | # subprocess boundaries... 172 | os.chdir(str(tmp_path)) 173 | res = subprocess.check_output(['glom', 'a', '{"a": 3}']) 174 | assert res.decode('utf8') in ('3\n', '3\r\n') # unix or windows line end okay 175 | -------------------------------------------------------------------------------- /glom/test/test_fill.py: -------------------------------------------------------------------------------- 1 | from glom import Auto, Fill, T, glom 2 | 3 | def test(): 4 | assert glom('abc', Fill((T[0], {T[1]: T[2]}))) == ('a', {'b': 'c'}) 5 | assert glom('123', Fill({T[0], frozenset([T[1], T[2]])})) == {'1', frozenset(['2', '3'])} 6 | assert glom('xyz', Fill([T[0], T[1], T[2]])) 7 | assert glom('abc', Fill(lambda t: t.upper())) == 'ABC' 8 | assert glom('a', Fill(1)) == 1 9 | assert Fill((T, T, T)).fill(1) == (1, 1, 1) 10 | 11 | target = {'data': [0, 2, 4]} 12 | assert glom(target, Fill((T['data'][-1], Auto('data.-2')))) == (4, 2) 13 | 14 | assert repr(Auto()) == 'Auto()' 15 | assert repr(Auto(T)) == 'Auto(T)' 16 | 17 | assert repr(Fill()) == 'Fill()' 18 | assert repr(Fill(T)) == 'Fill(T)' 19 | 20 | assert repr(Fill(len)) == 'Fill(len)' 21 | -------------------------------------------------------------------------------- /glom/test/test_grouping.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from glom import glom, T, SKIP, STOP, Auto, BadSpec, Val 4 | from glom.grouping import Group, First, Avg, Max, Min, Sample, Limit 5 | 6 | from glom.reduction import Merge, Flatten, Sum, Count 7 | 8 | 9 | def test_bucketing(): 10 | assert glom(range(4), Group({T % 2: [T]})) == {0: [0, 2], 1: [1, 3]} 11 | assert (glom(range(6), Group({T % 3: {T % 2: [T / 10.0]}})) == 12 | {0: {0: [0.0], 1: [0.3]}, 1: {1: [0.1], 0: [0.4]}, 2: {0: [0.2], 1: [0.5]}}) 13 | 14 | 15 | 16 | def test_corner_cases(): 17 | target = range(5) 18 | 19 | # immediate stop dict 20 | assert glom(target, Group({Val(STOP): [T]})) == {} 21 | 22 | # immediate stop list 23 | assert glom(target, Group([Val(STOP)])) == [] 24 | 25 | # dict key SKIP 26 | assert glom(target, Group({(lambda t: SKIP if t < 3 else t): T})) == {3: 3, 4: 4} 27 | 28 | # dict val SKIP 29 | assert glom(target, Group({T: lambda t: t if t % 2 else SKIP})) == {3: 3, 1: 1} 30 | 31 | # list val SKIP 32 | assert glom(target, Group([lambda t: t if t % 2 else SKIP])) == [1, 3] 33 | 34 | # embedded auto spec (lol @ 0 being 0 bit length) 35 | assert glom(target, Group({Auto(('bit_length', T())): [T]})) == {0: [0], 1: [1], 2: [2, 3], 3: [4]} 36 | 37 | # no dicts inside lists in Group mode 38 | with raises(BadSpec): 39 | assert glom(target, Group([{T: T}])) 40 | 41 | # check only supported types 42 | with raises(BadSpec): 43 | assert glom(target, Group('no string support yet')) 44 | 45 | # bucket ints by their bit length and then odd/even, limited to 3 per bucket 46 | spec = Group({T.bit_length(): {lambda t: t % 2: Limit(3)}}) 47 | res = glom(range(20), spec) 48 | assert res == {0: {0: [0]}, 49 | 1: {1: [1]}, 50 | 2: {0: [2], 1: [3]}, 51 | 3: {0: [4, 6], 1: [5, 7]}, 52 | 4: {0: [8, 10, 12], 1: [9, 11, 13]}} 53 | 54 | return 55 | 56 | 57 | def test_agg(): 58 | t = list(range(10)) 59 | assert glom(t, Group(First())) == 0 60 | assert glom(t, Group(T)) == 9 # this is basically Last 61 | 62 | assert glom(t, Group(Avg())) == sum(t) / len(t) 63 | assert glom(t, Group(Sum())) == sum(t) 64 | 65 | assert glom([0, 1, 0], Group(Max())) == 1 66 | assert glom([1, 0, 1], Group(Min())) == 0 67 | 68 | assert repr(Group(First())) == 'Group(First())' 69 | assert repr(Avg()) == 'Avg()' 70 | assert repr(Max()) == 'Max()' 71 | assert repr(Min()) == 'Min()' 72 | assert repr(Sum()) == 'Sum()' 73 | assert repr(Count()) == 'Count()' 74 | 75 | assert glom(range(10), Group({lambda t: t % 2: Count()})) == { 76 | 0: 5, 1: 5} 77 | 78 | 79 | def test_limit(): 80 | t = list(range(10)) 81 | assert glom(t, Group(Limit(1, T))) == 0 82 | assert glom(t, Group(Limit(3, Max()))) == 2 83 | assert glom(t, Group(Limit(3, [T]))) == [0, 1, 2] 84 | 85 | assert repr(Group(Limit(3, Max()))) == 'Group(Limit(3, Max()))' 86 | 87 | with raises(BadSpec): 88 | assert glom(t, Limit(1)) # needs to be wrapped in Group for now 89 | return 90 | 91 | 92 | def test_reduce(): 93 | assert glom([[1], [2, 3]], Group(Flatten())) == [1, 2, 3] 94 | assert glom([{'a': 1}, {'b': 2}], Group(Merge())) == {'a': 1, 'b': 2} 95 | assert glom([[[1]], [[2, 3], [4]]], Group(Flatten(Flatten()))) == [1, 2, 3, 4] 96 | 97 | 98 | def test_sample(): 99 | spec = Group(Sample(5)) 100 | assert glom([1, 2, 3], spec) == [1, 2, 3] 101 | 102 | assert repr(spec) == 'Group(Sample(5))' 103 | 104 | s = glom([1, 2, 3], Group(Sample(2))) 105 | assert s in [[1, 2], [1, 3], [2, 1], [2, 3], [3, 1], [3, 2]] 106 | -------------------------------------------------------------------------------- /glom/test/test_mutation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from glom import glom, Path, T, S, Spec, Glommer, PathAssignError, PathAccessError 4 | from glom import assign, Assign, delete, Delete, PathDeleteError, Or 5 | from glom import core 6 | from glom.core import UnregisteredTarget 7 | 8 | 9 | def test_assign(): 10 | class Foo: 11 | pass 12 | 13 | assert glom({}, Assign(T['a'], 1)) == {'a': 1} 14 | assert glom({'a': {}}, Assign(T['a']['a'], 1)) == {'a': {'a': 1}} 15 | assert glom({'a': {}}, Assign('a.a', 1)) == {'a': {'a': 1}} 16 | assert glom(Foo(), Assign(T.a, 1)).a == 1 17 | assert glom({}, Assign('a', 1)) == {'a': 1} 18 | assert glom(Foo(), Assign('a', 1)).a == 1 19 | assert glom({'a': Foo()}, Assign('a.a', 1))['a'].a == 1 20 | def r(): 21 | r = {} 22 | r['r'] = r 23 | return r 24 | assert glom(r(), Assign('r.r.r.r.r.r.r.r.r', 1)) == {'r': 1} 25 | assert glom(r(), Assign(T['r']['r']['r']['r'], 1)) == {'r': 1} 26 | assert glom(r(), Assign(Path('r', 'r', T['r']), 1)) == {'r': 1} 27 | assert assign(r(), Path('r', 'r', T['r']), 1) == {'r': 1} 28 | with pytest.raises(TypeError, match='path argument must be'): 29 | Assign(1, 'a') 30 | with pytest.raises(ValueError, match='path must have at least one element'): 31 | Assign(T, 1) 32 | 33 | assert repr(Assign(T.a, 1)) == 'Assign(T.a, 1)' 34 | assign_spec = Assign(T.a, 1, missing=dict) 35 | assert repr(assign_spec) == "Assign(T.a, 1, missing=dict)" 36 | assert repr(assign_spec) == repr(eval(repr(assign_spec))) 37 | 38 | 39 | def test_assign_recursive(): 40 | val = {} 41 | val[1] = [val] 42 | recur_out = glom({}, Assign(T['a'], val))['a'] 43 | assert recur_out[1][0] is recur_out 44 | 45 | 46 | def test_assign_spec_val(): 47 | output = glom({'b': 'c'}, Assign('a', Spec('b'))) 48 | assert output['a'] == output['b'] == 'c' 49 | output = glom({'b': 'c'}, Assign('a', Or('d', 'b'))) 50 | assert output['a'] == output['b'] == 'c' 51 | 52 | 53 | def test_unregistered_assign(): 54 | # test with bare target registry 55 | glommer = Glommer(register_default_types=False) 56 | 57 | with pytest.raises(UnregisteredTarget, match='assign'): 58 | glommer.glom({}, Assign('a', 'b')) 59 | 60 | # test for unassignable tuple 61 | with pytest.raises(UnregisteredTarget, match='assign'): 62 | glom({'a': ()}, Assign('a.0', 'b')) 63 | 64 | 65 | def test_bad_assign_target(): 66 | class BadTarget: 67 | def __setattr__(self, name, val): 68 | raise Exception("and you trusted me?") 69 | 70 | # sanity check 71 | spec = Assign('a', 'b') 72 | ok_target = lambda: None 73 | glom(ok_target, spec) 74 | assert ok_target.a == 'b' 75 | 76 | with pytest.raises(PathAssignError, match='could not assign'): 77 | glom(BadTarget(), spec) 78 | 79 | with pytest.raises(PathAccessError, match='could not access'): 80 | assign({}, 'a.b.c', 'moot') 81 | return 82 | 83 | 84 | def test_sequence_assign(): 85 | target = {'alist': [0, 1, 2]} 86 | assign(target, 'alist.2', 3) 87 | assert target['alist'][2] == 3 88 | 89 | with pytest.raises(PathAssignError, match='could not assign') as exc_info: 90 | assign(target, 'alist.3', 4) 91 | 92 | # the following test is because pypy's IndexError is different than CPython's: 93 | # E - PathAssignError(IndexError('list index out of range',), Path('alist'), '3') 94 | # E + PathAssignError(IndexError('list assignment index out of range',), Path('alist'), '3') 95 | # E ? +++++++++++ 96 | 97 | exc_repr = repr(exc_info.value) 98 | assert exc_repr.startswith('PathAssignError(') 99 | assert exc_repr.endswith("'3')") 100 | return 101 | 102 | 103 | def test_invalid_assign_op_target(): 104 | target = {'afunc': lambda x: 'hi %s' % x} 105 | spec = T['afunc'](x=1) 106 | 107 | with pytest.raises(ValueError): 108 | assign(target, spec, None) 109 | return 110 | 111 | 112 | def test_assign_missing_signature(): 113 | # test signature (non-callable missing hook) 114 | with pytest.raises(TypeError, match='callable'): 115 | assign({}, 'a.b.c', 'lol', missing='invalidbcnotcallable') 116 | return 117 | 118 | 119 | def test_assign_missing_dict(): 120 | target = {} 121 | val = object() 122 | 123 | from itertools import count 124 | counter = count() 125 | def debugdict(): 126 | ret = dict() 127 | #ret['id'] = id(ret) 128 | #ret['inc'] = counter.next() 129 | return ret 130 | 131 | assign(target, 'a.b.c.d', val, missing=debugdict) 132 | 133 | assert target == {'a': {'b': {'c': {'d': val}}}} 134 | 135 | 136 | def test_assign_missing_object(): 137 | val = object() 138 | class Container: 139 | pass 140 | 141 | target = Container() 142 | target.a = extant_a = Container() 143 | assign(target, 'a.b.c.d', val, missing=Container) 144 | 145 | assert target.a.b.c.d is val 146 | assert target.a is extant_a # make sure we didn't overwrite anything on the path 147 | 148 | 149 | def test_assign_missing_with_extant_keys(): 150 | """This test ensures that assign with missing doesn't overwrite 151 | perfectly fine extant keys that are along the path it needs to 152 | assign to. call count is also checked to make sure missing() isn't 153 | invoked too many times. 154 | 155 | """ 156 | target = {} 157 | value = object() 158 | default_struct = {'b': {'c': {}}} 159 | 160 | call_count = [0] 161 | 162 | def _get_default_struct(): 163 | call_count[0] += 1 # make sure this is only called once 164 | return default_struct 165 | 166 | assign(target, 'a.b.c', value, missing=_get_default_struct) 167 | 168 | assert target['a']['b']['c'] is value 169 | assert target['a']['b'] is default_struct['b'] 170 | assert call_count == [1] 171 | 172 | 173 | def test_assign_missing_unassignable(): 174 | """Check that the final assignment to the target object comes last, 175 | ensuring that failed assignments don't leave targets in a bad 176 | state. 177 | 178 | """ 179 | 180 | class Tarjay: 181 | init_count = 0 182 | def __init__(self): 183 | self.__class__.init_count += 1 184 | 185 | @property 186 | def unassignable(self): 187 | return 188 | 189 | value = object() 190 | target = {"preexisting": "ok"} 191 | 192 | with pytest.raises(PathAssignError): 193 | assign(target, 'tarjay.unassignable.a.b.c', value, missing=Tarjay) 194 | 195 | assert target == {'preexisting': 'ok'} 196 | 197 | # why 3? "c" gets the value of "value", while "b", "a", and 198 | # "tarjay" all succeed and are set to Tarjay instances. Then 199 | # unassignable is already present, but not possible to assign to, 200 | # raising the PathAssignError. 201 | assert Tarjay.init_count == 3 202 | 203 | 204 | def test_s_assign(): 205 | ''' 206 | check that assign works when storing things into S 207 | ''' 208 | assert glom({}, (Assign(S['foo'], 'bar'), S['foo'])) == 'bar' 209 | 210 | 211 | def test_delete(): 212 | class Foo: 213 | def __init__(self, d=None): 214 | for k, v in d.items(): 215 | setattr(self, k, v) 216 | 217 | assert glom({'a': 1}, Delete(T['a'])) == {} 218 | assert glom({'a': {'a': 1}}, Delete(T['a']['a'])) == {'a': {}} 219 | assert glom({'a': {'a': 1}}, Delete('a.a')) == {'a': {}} 220 | assert not hasattr(glom(Foo({'a': 1}), Delete(T.a)), 'a') 221 | assert glom({'a': 1}, Delete('a')) == {} 222 | assert not hasattr(glom(Foo({'a': 1}), Delete('a')), 'a') 223 | assert not hasattr(glom({'a': Foo({'a': 1})}, Delete('a.a'))['a'], 'a') 224 | 225 | def r(): 226 | r = {} 227 | r['r'] = r 228 | return r 229 | 230 | assert glom(r(), Delete('r.r.r.r.r.r.r.r.r')) == {} 231 | assert glom(r(), Delete(T['r']['r']['r']['r'])) == {} 232 | assert glom(r(), Delete(Path('r', 'r', T['r']))) == {} 233 | assert delete(r(), Path('r', 'r', T['r'])) == {} 234 | with pytest.raises(TypeError, match='path argument must be'): 235 | Delete(1, 'a') 236 | with pytest.raises(ValueError, match='path must have at least one element'): 237 | Delete(T, 1) 238 | 239 | assert repr(Delete(T.a)) == 'Delete(T.a)' 240 | 241 | # test delete from scope 242 | assert glom(1, (S(x=T), S['x'])) == 1 243 | with pytest.raises(PathAccessError): 244 | glom(1, (S(x=T), Delete(S['x']), S['x'])) 245 | 246 | # test raising on missing parent 247 | with pytest.raises(PathAccessError): 248 | glom({}, Delete(T['a']['b'])) 249 | 250 | # test raising on missing index 251 | with pytest.raises(PathDeleteError): 252 | glom([], Delete(T[0])) 253 | target = [] 254 | assert glom(target, Delete(T[0], ignore_missing=True)) is target 255 | 256 | # test raising on missing attr 257 | with pytest.raises(PathDeleteError): 258 | glom(object(), Delete(T.does_not_exist)) 259 | target = object() 260 | assert glom(target, Delete(T.does_not_exist, ignore_missing=True)) is target 261 | 262 | 263 | def test_unregistered_delete(): 264 | glommer = Glommer(register_default_types=False) 265 | 266 | with pytest.raises(UnregisteredTarget, match='delete'): 267 | glommer.glom({'a': 1}, Delete('a')) 268 | 269 | with pytest.raises(UnregisteredTarget, match='delete'): 270 | glom({'a': (1,)}, Delete('a.0')) 271 | 272 | 273 | def test_bad_delete_target(): 274 | class BadTarget: 275 | def __delattr__(self, name): 276 | raise Exception("and you trusted me?") 277 | 278 | spec = Delete('a') 279 | ok_target = lambda: None 280 | ok_target.a = 1 281 | glom(ok_target, spec) 282 | assert not hasattr(ok_target, 'a') 283 | 284 | with pytest.raises(PathDeleteError, match='could not delete'): 285 | glom(BadTarget(), spec) 286 | 287 | with pytest.raises(PathDeleteError, match='could not delete'): 288 | delete({}, 'a') 289 | return 290 | 291 | 292 | def test_sequence_delete(): 293 | target = {'alist': [0, 1, 2]} 294 | delete(target, 'alist.1') 295 | assert target['alist'] == [0, 2] 296 | 297 | with pytest.raises(PathDeleteError, match='could not delete') as exc_info: 298 | delete(target, 'alist.2') 299 | 300 | exc_repr = repr(exc_info.value) 301 | assert exc_repr.startswith('PathDeleteError(') 302 | assert exc_repr.endswith("'2')") 303 | return 304 | 305 | 306 | def test_invalid_delete_op_target(): 307 | target = {'afunc': lambda x: 'hi %s' % x} 308 | spec = T['afunc'](x=1) 309 | 310 | with pytest.raises(ValueError): 311 | delete(target, spec, None) 312 | return 313 | 314 | 315 | def test_delete_ignore_missing(): 316 | assert delete({}, 'a', ignore_missing=True) == {} 317 | assert delete({}, 'a.b', ignore_missing=True) == {} 318 | 319 | 320 | def test_star_broadcast(): 321 | val = {'a': [{'b': [{'c': 1}, {'c': 2}, {'c': 3}]}]} 322 | assert glom(val, (Assign('a.*.b.*.d', 'a'), 'a.*.b.*.d')) == [['a', 'a', 'a']] 323 | glom(val, Delete('a.*.b.*.d')) 324 | assert 'c' in val['a'][0]['b'][0] 325 | assert 'd' not in val['a'][0]['b'][0] -------------------------------------------------------------------------------- /glom/test/test_path_and_t.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from glom import glom, Path, S, T, A, PathAccessError, GlomError, BadSpec, Or, Assign, Delete 4 | from glom import core 5 | 6 | def test_list_path_access(): 7 | assert glom(list(range(10)), Path(1)) == 1 8 | 9 | 10 | def test_path(): 11 | _obj = object() 12 | target = {'a': {'b.b': [None, {_obj: [None, None, 'd']}]}} 13 | 14 | assert glom(target, Path('a', 'b.b', 1, _obj, -1)) == 'd' 15 | 16 | 17 | def test_empty_path_access(): 18 | target = {} 19 | 20 | assert glom(target, Path()) is target 21 | assert glom(target, (Path(), Path(), Path())) is target 22 | 23 | dup_dict = glom(target, {'target': Path(), 24 | 'target2': Path()}) 25 | dup_dict['target'] is target 26 | dup_dict['target2'] is target 27 | 28 | 29 | def test_path_t_roundtrip(): 30 | # check that T repr roundrips 31 | assert repr(T['a'].b.c()) == "T['a'].b.c()" 32 | assert repr(T[1:]) == "T[1:]" 33 | assert repr(T[::3, 1:, 1:2, :2:3]) == "T[::3, 1:, 1:2, :2:3]" 34 | 35 | # check that Path repr roundtrips 36 | assert repr(Path('a', 1, 'b.b', -1.0)) == "Path('a', 1, 'b.b', -1.0)" 37 | 38 | # check that Path repr roundtrips when it contains Ts 39 | assert repr(Path(T['a'].b, 'c', T['d'].e)) == "Path(T['a'].b, 'c', T['d'].e)" 40 | 41 | # check that T instances containing Path access revert to repring with Path 42 | assert repr(Path(T['a'].b, 'c', T['d'].e).path_t) == "Path(T['a'].b, 'c', T['d'].e)" 43 | 44 | # check that Paths containing only T objects reduce to a T (joining the T objects) 45 | assert repr(Path(T['a'].b, T.c())) == "T['a'].b.c()" 46 | 47 | # check that multiple nested paths reduce 48 | assert repr(Path(Path(Path('a')))) == "Path('a')" 49 | 50 | # check builtin repr 51 | assert repr(T[len]) == 'T[len]' 52 | assert repr(T.func(len, sum)) == 'T.func(len, sum)' 53 | 54 | # check * and ** 55 | assert repr(T.__star__().__starstar__()) == 'T.__star__().__starstar__()' 56 | assert repr(Path('a', T.__star__().__starstar__())) == "Path('a', T.__star__().__starstar__())" 57 | 58 | 59 | def test_path_access_error_message(): 60 | 61 | # test fuzzy access 62 | with raises(GlomError) as exc_info: 63 | glom({}, 'a.b') 64 | assert ("PathAccessError: could not access 'a', part 0 of Path('a', 'b'), got error: KeyError" 65 | in exc_info.exconly()) 66 | ke = repr(KeyError('a')) # py3.7+ changed the keyerror repr 67 | assert repr(exc_info.value) == "PathAccessError(" + ke + ", Path('a', 'b'), 0)" 68 | 69 | # test multi-part Path with T, catchable as a KeyError 70 | with raises(KeyError) as exc_info: 71 | # don't actually use glom to copy your data structures please 72 | glom({'a': {'b': 'c'}}, Path('a', T.copy(), 'd')) 73 | assert ("PathAccessError: could not access 'd', part 3 of Path('a', T.copy(), 'd'), got error: KeyError" 74 | in exc_info.exconly()) 75 | ke = repr(KeyError('d')) # py3.7+ changed the keyerror repr 76 | assert repr(exc_info.value) == "PathAccessError(" + ke + ", Path('a', T.copy(), 'd'), 3)" 77 | 78 | # test AttributeError 79 | with raises(GlomError) as exc_info: 80 | glom({'a': {'b': 'c'}}, Path('a', T.b)) 81 | assert ("PathAccessError: could not access 'b', part 1 of Path('a', T.b), got error: AttributeError" 82 | in exc_info.exconly()) 83 | ae = repr(AttributeError("'dict' object has no attribute 'b'")) 84 | assert repr(exc_info.value) == "PathAccessError(" + ae + ", Path(\'a\', T.b), 1)" 85 | 86 | 87 | def test_t_picklability(): 88 | import pickle 89 | 90 | class TargetType: 91 | def __init__(self): 92 | self.attribute = lambda: None 93 | self.attribute.method = lambda: {'key': lambda x: x * 2} 94 | 95 | spec = T.attribute.method()['key'](x=5) 96 | 97 | rt_spec = pickle.loads(pickle.dumps(spec)) 98 | assert repr(spec) == repr(rt_spec) 99 | 100 | assert glom(TargetType(), spec) == 10 101 | 102 | s_spec = S.attribute 103 | assert repr(s_spec) == repr(pickle.loads(pickle.dumps(s_spec))) 104 | 105 | 106 | def test_t_subspec(): 107 | # tests that arg-mode is a min-mode, allowing for 108 | # other specs to be embedded inside T calls 109 | data = [ 110 | {'id': 1}, 111 | {'pk': 1}] 112 | 113 | get_ids = ( 114 | S(id_type=int), 115 | [S.id_type(Or('id', 'pk'))]) 116 | 117 | assert glom(data, get_ids) == [1, 1] 118 | 119 | data = {'a': 1, 'b': 2, 'c': 3} 120 | 121 | # test that "shallow" data structures translate as-is 122 | get_vals = ( 123 | S(seq_type=tuple), 124 | S.seq_type([T['a'], T['b'], Or('c', 'd')]) 125 | ) 126 | 127 | assert glom(data, get_vals) == (1, 2, 3) 128 | 129 | 130 | def test_a_forbidden(): 131 | with raises(BadSpec): 132 | A() # cannot assign to function call 133 | with raises(BadSpec): 134 | glom(1, A) # cannot assign without destination 135 | 136 | 137 | def test_s_magic(): 138 | assert glom(None, S.test, scope={'test': 'value'}) == 'value' 139 | 140 | with raises(PathAccessError): 141 | glom(1, S.a) # ref to 'a' which doesn't exist in scope 142 | 143 | with raises(PathAccessError): 144 | glom(1, A.b.c) 145 | 146 | return 147 | 148 | 149 | def test_path_len(): 150 | 151 | assert len(Path()) == 0 152 | assert len(Path('a', 'b', 'c')) == 3 153 | assert len(Path.from_text('1.2.3.4')) == 4 154 | 155 | assert len(Path(T)) == 0 156 | assert len(Path(T.a.b.c)) == 3 157 | assert len(Path(T.a()['b'].c.d)) == 5 158 | 159 | 160 | def test_path_getitem(): 161 | path = Path(T.a.b.c) 162 | 163 | assert path[0] == Path(T.a) 164 | assert path[1] == Path(T.b) 165 | assert path[2] == Path(T.c) 166 | assert path[-1] == Path(T.c) 167 | assert path[-2] == Path(T.b) 168 | 169 | with raises(IndexError, match='Path index out of range'): 170 | path[4] 171 | 172 | with raises(IndexError, match='Path index out of range'): 173 | path[-14] 174 | return 175 | 176 | 177 | def test_path_slices(): 178 | path = Path(T.a.b, 1, 2, T(test='yes')) 179 | 180 | assert path[::] == path 181 | 182 | # positive indices 183 | assert path[3:] == Path(2, T(test='yes')) 184 | assert path[1:3] == Path(T.b, 1) 185 | assert path[:3] == Path(T.a.b, 1) 186 | 187 | # positive indices backwards 188 | assert path[2:1] == Path() 189 | 190 | # negative indices forward 191 | assert path[-1:] == Path(T(test='yes')) 192 | assert path[:-2] == Path(T.a.b, 1) 193 | assert path[-3:-1] == Path(1, 2) 194 | 195 | # negative indices backwards 196 | assert path[-1:-3] == Path() 197 | 198 | # slicing and stepping 199 | assert path[1::2] == Path(T.b, 2) 200 | 201 | 202 | def test_path_values(): 203 | path = Path(T.a.b, 1, 2, T(test='yes')) 204 | 205 | assert path.values() == ('a', 'b', 1, 2, ((), {'test': 'yes'})) 206 | 207 | assert Path().values() == () 208 | 209 | 210 | def test_path_items(): 211 | path = Path(T.a, 1, 2, T(test='yes')) 212 | 213 | assert path.items() == (('.', 'a'), 214 | ('P', 1), ('P', 2), 215 | ('(', ((), {'test': 'yes'}))) 216 | 217 | assert Path().items() == () 218 | 219 | 220 | def test_path_star(): 221 | val = {'a': [1, 2, 3]} 222 | assert glom(val, 'a.*') == [1, 2, 3] 223 | val['a'] = [{'b': v} for v in val['a']] 224 | assert glom(val, 'a.*.b') == [1, 2, 3] 225 | assert glom(val, T['a'].__star__()['b']) == [1, 2, 3] 226 | assert glom(val, Path('a', T.__star__(), 'b')) == [1, 2, 3] 227 | # multi-paths eat errors 228 | assert glom(val, Path('a', T.__star__(), T.b)) == [] 229 | val = [[[1]]] 230 | assert glom(val, '**') == [val, [[1]], [1], 1] 231 | val = {'a': [{'b': [{'c': 1}, {'c': 2}, {'d': {'c': 3}}]}], 'c': 4} 232 | assert glom(val, '**.c') == [4, 1, 2, 3] 233 | assert glom(val, 'a.**.c') == [1, 2, 3] 234 | assert glom(val, T['a'].__starstar__()['c']) == [1, 2, 3] 235 | assert glom(val, 'a.*.b.*.c') == [[1, 2]] 236 | # errors 237 | class ErrDict(dict): 238 | def __getitem__(key): 1/0 239 | assert ErrDict(val).keys() # it will try to iterate 240 | assert glom(ErrDict(val), '**') == [val] 241 | assert glom(ErrDict(val), '*') == [] 242 | # object access 243 | class A: 244 | def __init__(self): 245 | self.a = 1 246 | self.b = {'c': 2} 247 | val = A() 248 | 249 | assert glom(val, '*') == [1, {'c': 2}] 250 | assert glom(val, '**') == [val, 1, {'c': 2}, 2] 251 | 252 | 253 | def test_star_broadcast(): 254 | val = {'a': [1, 2, 3]} 255 | assert glom(val, Path.from_text('a.*').path_t + 1) == [2, 3, 4] 256 | val = {'a': [{'b': [{'c': 1}, {'c': 2}, {'c': 3}]}]} 257 | assert glom(val, Path.from_text('**.c').path_t + 1) == [2, 3, 4] 258 | 259 | 260 | def test_star_warning(): 261 | '''check that the default behavior is as expected; this will change when * is default on''' 262 | assert core.PATH_STAR is True 263 | try: 264 | core.PATH_STAR = False 265 | assert glom({'*': 1}, '*') == 1 266 | assert Path._STAR_WARNED 267 | finally: 268 | core.PATH_STAR = True 269 | 270 | def test_path_eq(): 271 | assert Path('a', 'b') == Path('a', 'b') 272 | assert Path('a') != Path('b') 273 | 274 | assert Path() != object() 275 | 276 | 277 | def test_path_eq_t(): 278 | assert Path(T.a.b) == T.a.b 279 | assert Path(T.a.b.c) != T.a.b 280 | 281 | 282 | def test_startswith(): 283 | ref = T.a.b[1] 284 | 285 | assert Path(ref).startswith(T) 286 | assert Path(ref).startswith(T.a.b) 287 | assert Path(ref).startswith(ref) 288 | assert Path(ref).startswith(ref.c) is False 289 | 290 | assert Path('a.b.c').startswith(Path()) 291 | assert Path('a.b.c').startswith('a.b.c') 292 | 293 | with raises(TypeError): 294 | assert Path('a.b.c').startswith(None) 295 | 296 | return 297 | 298 | 299 | def test_from_t_identity(): 300 | ref = Path(T.a.b) 301 | assert ref.from_t() == ref 302 | assert ref.from_t() is ref 303 | 304 | 305 | def test_t_dict_key(): 306 | target = {'a': 'A'} 307 | assert glom(target, {T['a']: 'a'}) == {'A': 'A'} 308 | 309 | 310 | def test_t_arithmetic(): 311 | t = 2 312 | assert glom(t, T + T) == 4 313 | assert glom(t, T * T) == 4 314 | assert glom(t, T ** T) == 4 315 | assert glom(t, T / 1) == 2 316 | assert glom(t, T % 1) == 0 317 | assert glom(t, T - 1) == 1 318 | assert glom(t, T & T) == 2 319 | assert glom(t, T | 1) == 3 320 | assert glom(t, T ^ T) == 0 321 | assert glom(2, ~T) == -3 322 | assert glom(t, -T) == -2 323 | 324 | 325 | def test_t_arithmetic_reprs(): 326 | assert repr(T + T) == "T + T" 327 | assert repr(T + (T / 2 * (T - 5) % 4)) == "T + (T / 2 * (T - 5) % 4)" 328 | assert repr(T & 7 | (T ^ 6)) == "T & 7 | (T ^ 6)" 329 | assert repr(-(~T)) == "-(~T)" 330 | 331 | 332 | def test_t_arithmetic_errors(): 333 | with raises(PathAccessError, match='zero'): 334 | glom(0, T / 0) 335 | 336 | with raises(PathAccessError, match='unsupported operand type'): 337 | glom(None, T / 2) 338 | 339 | return 340 | 341 | 342 | def test_t_dunders(): 343 | with raises(AttributeError) as exc_info: 344 | T.__name__ 345 | 346 | assert 'use T.__("name__")' in str(exc_info.value) 347 | 348 | assert glom(1, T.__('class__')) is int 349 | 350 | 351 | def test_path_cache(): 352 | assert Path.from_text('a.b.c') is Path.from_text('a.b.c') 353 | pre = Path._MAX_CACHE 354 | Path._MAX_CACHE = 0 355 | assert Path.from_text('d.e.f') is not Path.from_text('d.e.f') 356 | -------------------------------------------------------------------------------- /glom/test/test_reduction.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | import pytest 4 | from boltons.dictutils import OMD 5 | 6 | from glom import glom, T, Sum, Fold, Flatten, Coalesce, flatten, FoldError, Glommer, Merge, merge 7 | 8 | 9 | def test_sum_integers(): 10 | target = list(range(5)) 11 | 12 | assert glom(target, Sum()) == 10 13 | 14 | assert glom(target, Sum(init=lambda: 2)) == 12 15 | 16 | target = [] 17 | assert glom(target, Sum()) == 0 18 | 19 | 20 | target = [{"num": 3}, {"num": 2}, {"num": -1}] 21 | assert glom(target, Sum(['num'])) == 4 22 | 23 | target = target + [{}] # add a non-compliant dict 24 | assert glom(target, Sum([Coalesce('num', default=0)])) ==4 25 | 26 | assert repr(Sum()) == 'Sum()' 27 | assert repr(Sum(len, init=float)) == 'Sum(len, init=float)' 28 | 29 | 30 | def test_sum_seqs(): 31 | target = [(x,) for x in range(4)] 32 | assert glom(target, Sum(init=tuple)) == (0, 1, 2, 3) 33 | 34 | # would not work with builtin sum(), gets: 35 | # "TypeError: sum() can't sum strings [use ''.join(seq) instead]" 36 | # Works here for now. If we're ok with that error, then we can 37 | # switch to sum(). 38 | target = ['a', 'b', 'cd'] 39 | assert glom(target, Sum(init=str)) == 'abcd' 40 | 41 | target = [['a'], ['b'], ['cde'], ['']] 42 | 43 | assert glom(target, Sum(Sum(init=list), init=str)) == 'abcde' 44 | 45 | 46 | def test_fold(): 47 | target = range(1, 5) 48 | assert glom(target, Fold(T, int)) == 10 49 | assert glom(target, Fold(T, init=lambda: 2)) == 12 50 | 51 | assert glom(target, Fold(T, lambda: 1, op=lambda l, r: l * r)) == 24 52 | 53 | assert repr(Fold(T, int)) == 'Fold(T, init=int)' 54 | assert repr(Fold(T, int, op=operator.imul)).startswith('Fold(T, init=int, op=<') 55 | 56 | # signature coverage 57 | with pytest.raises(TypeError): 58 | Fold(T, list, op=None) # noncallable op 59 | 60 | with pytest.raises(TypeError): 61 | Fold(T, init=None) # noncallable init 62 | 63 | 64 | def test_fold_bad_iter(): 65 | glommer = Glommer(register_default_types=False) 66 | 67 | def bad_iter(obj): 68 | raise RuntimeError('oops') 69 | 70 | glommer.register(list, iterate=bad_iter) 71 | 72 | with pytest.raises(TypeError): 73 | target = [] 74 | glommer.glom(target, Flatten()) 75 | 76 | 77 | def test_flatten(): 78 | target = [[1], [2], [3, 4]] 79 | assert glom(target, Flatten()) == [1, 2, 3, 4] 80 | 81 | target = [(1, 2), [3]] 82 | assert glom(target, Flatten()) == [1, 2, 3] 83 | 84 | gen = glom(target, Flatten(init='lazy')) 85 | assert next(gen) == 1 86 | assert list(gen) == [2, 3] 87 | 88 | assert repr(Flatten()) == 'Flatten()' 89 | assert repr(Flatten(init='lazy')) == "Flatten(init='lazy')" 90 | assert repr(Flatten(init=tuple)) == "Flatten(init=tuple)" 91 | 92 | 93 | def test_flatten_func(): 94 | target = [[1], [2], [3, 4]] 95 | assert flatten(target) == [1, 2, 3, 4] 96 | 97 | two_level_target = [[x] for x in target] 98 | assert flatten(two_level_target, levels=2) == [1, 2, 3, 4] 99 | assert flatten(two_level_target, levels=0) == two_level_target 100 | 101 | unflattenable = 2 102 | 103 | with pytest.raises(FoldError): 104 | assert flatten(unflattenable) 105 | 106 | # kind of an odd use, but it works for now 107 | assert flatten(['a', 'b', 'cd'], init=str) == 'abcd' 108 | 109 | # another odd case 110 | subspec_target = {'items': {'numbers': [1, 2, 3]}} 111 | assert (flatten(subspec_target, spec='items.numbers', init=int) == 6) 112 | 113 | # basic signature tests 114 | with pytest.raises(ValueError): 115 | flatten([], levels=-1) 116 | 117 | with pytest.raises(TypeError): 118 | flatten([], nonexistentkwarg=False) 119 | 120 | return 121 | 122 | 123 | def test_merge(): 124 | 125 | target = [{'a': 'A'}, {'b': 'B'}] 126 | 127 | assert glom(target, Merge()) == {'a': 'A', 'b': 'B'} 128 | 129 | assert glom(target, Merge(op=dict.update)) == {'a': 'A', 'b': 'B'} 130 | 131 | with pytest.raises(ValueError): 132 | Merge(init=list) # has no .update() 133 | 134 | with pytest.raises(ValueError): 135 | Merge(op='update_extend') # doesn't work on base dict, the default init 136 | 137 | 138 | def test_merge_omd(): 139 | target = [{'a': 'A'}, {'a': 'aleph'}] 140 | 141 | assert glom(target, Merge(init=OMD)) == OMD({'a': 'aleph'}) 142 | assert glom(target, Merge(init=OMD, op='update_extend')) == OMD([('a', 'A'), ('a', 'aleph')]) 143 | 144 | 145 | def test_merge_func(): 146 | 147 | target = [{'a': 'A'}, {'b': 'B'}] 148 | assert merge(target) == {'a': 'A', 'b': 'B'} 149 | assert merge([]) == {} 150 | 151 | # basic signature test 152 | with pytest.raises(TypeError): 153 | merge([], nonexistentkwarg=False) 154 | -------------------------------------------------------------------------------- /glom/test/test_scope_vars.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from glom import glom, Path, S, A, T, Vars, Val, GlomError, M, SKIP, Let 4 | 5 | from glom.core import ROOT 6 | from glom.mutation import PathAssignError 7 | 8 | def test_s_scope_assign(): 9 | data = {'a': 1, 'b': [{'c': 2}, {'c': 3}]} 10 | output = [{'a': 1, 'c': 2}, {'a': 1, 'c': 3}] 11 | assert glom(data, (S(a=T['a']), ('b', [{'a': S['a'], 'c': 'c'}]))) == output 12 | assert glom(data, ('b', [{'a': S[ROOT][Val(T)]['a'], 'c': 'c'}])) == output 13 | 14 | with pytest.raises(TypeError): 15 | S('posarg') 16 | with pytest.raises(TypeError): 17 | S() 18 | 19 | assert glom([[1]], (S(v=Vars()), [[A.v.a]], S.v.a)) == 1 20 | assert glom(1, (S(v={}), A.v['a'], S.v['a'])) == 1 21 | with pytest.raises(GlomError): 22 | glom(1, (S(v=1), A.v.a)) 23 | 24 | class FailAssign: 25 | def __setattr__(self, name, val): 26 | raise Exception('nope') 27 | 28 | with pytest.raises(PathAssignError): 29 | glom(1, (S(v=FailAssign()), Path(A.v, 'a'))) 30 | 31 | assert repr(S(a=T.a.b)) == 'S(a=T.a.b)' 32 | 33 | spec = (S(a=T['x']), S.a) 34 | assert glom({'x': 'y'}, spec) == 'y' 35 | 36 | return 37 | 38 | 39 | def test_globals(): 40 | assert glom([[1]], ([[A.globals.a]], S.globals.a)) == 1 41 | 42 | 43 | def test_vars(): 44 | assert glom(1, A.a) == 1 # A should not change the target 45 | assert glom(1, (A.a, S.a)) == 1 46 | # check that tuple vars don't "leak" into parent tuple 47 | assert glom(1, (A.t, Val(2), A.t, S.t)) == 2 48 | assert glom(1, (A.t, (Val(2), A.t), S.t)) == 1 49 | let = S(v=Vars({'b': 2}, c=3)) 50 | assert glom(1, (let, A.v.a, S.v.a)) == 1 51 | with pytest.raises(AttributeError): 52 | glom(1, (let, S.v.a)) # check that Vars() inside a spec doesn't hold state 53 | assert glom(1, (let, Path(A, 'v', 'a'), S.v.a)) == 1 54 | assert glom(1, (let, S.v.b)) == 2 55 | assert glom(1, (let, S.v.c)) == 3 56 | assert repr(let) == "S(v=Vars({'b': 2}, c=3))" 57 | assert repr(Vars(a=1, b=2)) in ( 58 | "Vars(a=1, b=2)", "Vars(b=2, a=1)") 59 | assert repr(Vars(a=1, b=2).glomit(None, None)) in ( 60 | "ScopeVars({'a': 1, 'b': 2})", "Vars({'b': 2, 'a': 1})") 61 | 62 | assert repr(A.b["c"]) == "A.b['c']" 63 | 64 | 65 | def test_scoped_vars(): 66 | target = list(range(10)) + list(range(5)) 67 | 68 | scope_globals = glom(target, ([A.globals.last], S.globals)) 69 | assert scope_globals.last == 4 70 | assert dict(scope_globals) == {'last': 4} 71 | 72 | 73 | def test_max_skip(): 74 | target = list(range(10)) + list(range(5)) 75 | 76 | max_spec = (S(max=Vars(max=0)), 77 | [((M > M(S.max.max)) & A.max.max) | Val(SKIP)], 78 | S.max) 79 | result = glom(target, max_spec) 80 | assert result.max == 9 81 | 82 | 83 | def test_let(): # backwards compat 2020-07 84 | data = {'a': 1, 'b': [{'c': 2}, {'c': 3}]} 85 | output = [{'a': 1, 'c': 2}, {'a': 1, 'c': 3}] 86 | assert glom(data, (Let(a='a'), ('b', [{'a': S['a'], 'c': 'c'}]))) == output 87 | assert glom(data, ('b', [{'a': S[ROOT][Val(T)]['a'], 'c': 'c'}])) == output 88 | 89 | with pytest.raises(TypeError): 90 | Let('posarg') 91 | with pytest.raises(TypeError): 92 | Let() 93 | 94 | assert glom([[1]], (Let(v=Vars()), [[A.v.a]], S.v.a)) == 1 95 | assert glom(1, (Let(v=lambda t: {}), A.v['a'], S.v['a'])) == 1 96 | with pytest.raises(GlomError): 97 | glom(1, (Let(v=lambda t: 1), A.v.a)) 98 | 99 | class FailAssign: 100 | def __setattr__(self, name, val): 101 | raise Exception('nope') 102 | 103 | with pytest.raises(PathAssignError): 104 | glom(1, (Let(v=lambda t: FailAssign()), Path(A.v, 'a'))) 105 | 106 | assert repr(Let(a=T.a.b)) == 'Let(a=T.a.b)' 107 | -------------------------------------------------------------------------------- /glom/test/test_snippets.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from collections import deque 3 | from decimal import Decimal 4 | import json 5 | import os 6 | import textwrap 7 | 8 | import pytest 9 | 10 | import glom 11 | 12 | 13 | def _get_codeblock(lines, offset): 14 | if lines[offset:offset + 2] != [".. code-block:: python\n", "\n"]: 15 | return None 16 | start = offset + 2 17 | try: 18 | finish = lines.index('\n', start) 19 | except ValueError: 20 | return None 21 | return textwrap.dedent("".join(lines[start:finish])) 22 | 23 | 24 | def _find_snippets(): 25 | path = os.path.dirname(os.path.abspath(__file__)) + '/../../docs/snippets.rst' 26 | with open(path) as snippet_file: 27 | lines = list(snippet_file) 28 | snippets = [] 29 | for line_no in range(len(lines)): 30 | source = _get_codeblock(lines, line_no) 31 | if source: 32 | snippets.append((line_no, source)) 33 | return snippets 34 | 35 | 36 | try: 37 | SNIPPETS = _find_snippets() 38 | except: 39 | SNIPPETS = [] # in case running in an environment without docs 40 | 41 | SNIPPETS_GLOBALS = copy.copy(glom.__dict__) 42 | SNIPPETS_GLOBALS.update(dict( 43 | json=json, 44 | deque=deque, 45 | Decimal=Decimal, 46 | data=json.dumps({'a': ['b']}), 47 | contacts=[{'primary_email': {'email': 'a@example.com'}}, {}], 48 | glom=glom.glom)) 49 | 50 | 51 | @pytest.mark.parametrize("line,source", SNIPPETS) 52 | def test_snippet(line, source): 53 | if '>>>' in source: 54 | return # maybe doctest output checking 55 | code = compile(source, 'snippets.rst', 'exec') 56 | if 'django' in source: 57 | return # maybe in the future 58 | eval(code, SNIPPETS_GLOBALS) 59 | 60 | -------------------------------------------------------------------------------- /glom/test/test_spec.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from glom import glom, Spec, T, S 4 | 5 | 6 | def test_spec(): 7 | assert glom(5, T) == 5 # check assumption about echo behavior 8 | echo = Spec(T) 9 | assert echo.glom(5) == 5 10 | assert glom(5, echo) == 5 11 | echo2 = Spec(echo) 12 | assert echo2.glom(5) == 5 13 | 14 | with pytest.raises(TypeError, match='expected spec to be'): 15 | glom({}, object()) 16 | return 17 | 18 | def test_scope_spec(): 19 | scope_spec = Spec(S) 20 | assert scope_spec.glom(5, scope={'cat': 1})['cat'] == 1 21 | cat_scope_spec = Spec(scope_spec, scope={'cat': 1}) 22 | assert 'cat' in repr(cat_scope_spec) 23 | assert cat_scope_spec.glom(5)['cat'] == 1 24 | 25 | # test that Spec overrides the scope for its sub-tree 26 | assert glom(5, cat_scope_spec, scope={'cat': 2})['cat'] == 1 27 | -------------------------------------------------------------------------------- /glom/test/test_streaming.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from itertools import count, dropwhile, chain 4 | 5 | from glom import Iter 6 | from glom import glom, SKIP, STOP, T, Call, Spec, Glommer, Check, SKIP 7 | 8 | 9 | RANGE_5 = list(range(5)) 10 | 11 | 12 | def test_iter(): 13 | assert list(glom(['1', '2', '3'], Iter(int))) == [1, 2, 3] 14 | cnt = count() 15 | cnt_1 = glom(cnt, Iter(lambda t: t + 1)) 16 | assert (next(cnt_1), next(cnt_1)) == (1, 2) 17 | assert next(cnt) == 2 18 | 19 | assert list(glom(['1', '2', '3'], (Iter(int), enumerate))) == [(0, 1), (1, 2), (2, 3)] 20 | 21 | assert list(glom([1, SKIP, 2], Iter())) == [1, 2] 22 | assert list(glom([1, STOP, 2], Iter())) == [1] 23 | 24 | with pytest.raises(TypeError): 25 | Iter(nonexistent_kwarg=True) 26 | 27 | 28 | def test_filter(): 29 | is_odd = lambda x: x % 2 30 | odd_spec = Iter().filter(is_odd) 31 | out = glom(RANGE_5, odd_spec) 32 | assert list(out) == [1, 3] 33 | 34 | # let's just make sure we're actually streaming just in case 35 | counter = count() 36 | out = glom(counter, odd_spec) 37 | assert next(out) == 1 38 | assert next(out) == 3 39 | assert next(counter) == 4 40 | assert next(counter) == 5 41 | assert next(out) == 7 42 | 43 | bools = [True, False, False, True, False] 44 | spec = Iter().filter().all() 45 | out = glom(bools, spec) 46 | assert out == [True, True] 47 | 48 | imags = [0j, 1j, 2, 2j, 3j] 49 | spec = Iter().filter(Check(T.imag.real, type=float, one_of=(0, 2), default=SKIP)).all() 50 | out = glom(imags, spec) 51 | assert out == [0j, 2j] 52 | 53 | assert repr(Iter().filter(T.a.b)) == 'Iter().filter(T.a.b)' 54 | assert repr(Iter(list).filter(sum)) == 'Iter(list).filter(sum)' 55 | 56 | 57 | def test_map(): 58 | spec = Iter().map(lambda x: x * 2) 59 | out = glom(RANGE_5, spec) 60 | assert list(out) == [0, 2, 4, 6, 8] 61 | assert repr(Iter().map(T.a.b)) == 'Iter().map(T.a.b)' 62 | 63 | 64 | def test_split_flatten(): 65 | falsey_stream = [1, None, None, 2, 3, None, 4] 66 | spec = Iter().split() 67 | out = glom(falsey_stream, spec) 68 | assert list(out) == [[1], [2, 3], [4]] 69 | 70 | spec = Iter().split().flatten() 71 | out = glom(falsey_stream, spec) 72 | assert list(out) == [1, 2, 3, 4] 73 | 74 | assert repr(Iter().split(sep=None, maxsplit=2)) == 'Iter().split(sep=None, maxsplit=2)' 75 | assert repr(Iter(T.a.b[1]).flatten()) == 'Iter(T.a.b[1]).flatten()' 76 | 77 | 78 | def test_chunked(): 79 | int_list = list(range(9)) 80 | 81 | spec = Iter().chunked(3) 82 | out = glom(int_list, spec) 83 | assert list(out) == [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 84 | 85 | spec = Iter().chunked(3).map(sum) 86 | out = glom(int_list, spec) 87 | assert list(out) == [3, 12, 21] 88 | 89 | 90 | def test_windowed(): 91 | int_list = list(range(5)) 92 | 93 | spec = Iter().windowed(3) 94 | out = glom(int_list, spec) 95 | assert list(out) == [(0, 1, 2), (1, 2, 3), (2, 3, 4)] 96 | assert repr(spec) == 'Iter().windowed(3)' 97 | 98 | spec = spec.filter(lambda x: bool(x[0] % 2)).map(sum) 99 | out = glom(int_list, spec) 100 | assert next(out) == 6 101 | 102 | out = glom(range(10), spec) 103 | assert list(out) == [6, 12, 18, 24] 104 | 105 | 106 | def test_unique(): 107 | int_list = list(range(10)) 108 | 109 | spec = Iter().unique() 110 | out = glom(int_list, spec) 111 | assert list(out) == int_list 112 | 113 | spec = Iter(lambda x: x % 4).unique() 114 | out = glom(int_list, spec) 115 | assert list(out) == int_list[:4] 116 | assert repr(Iter().unique(T.a)) == 'Iter().unique(T.a)' 117 | 118 | 119 | def test_slice(): 120 | cnt = count() 121 | 122 | spec = Iter().slice(3) 123 | out = glom(cnt, spec) 124 | 125 | assert list(out) == [0, 1, 2] 126 | assert next(cnt) == 3 127 | 128 | out = glom(range(10), Iter().slice(1, 5)) 129 | assert list(out) == [1, 2, 3, 4] 130 | 131 | out = glom(range(10), Iter().slice(1, 6, 2)) 132 | assert list(out) == [1, 3, 5] 133 | assert repr(Iter().slice(1, 6, 2)) == 'Iter().slice(1, 6, 2)' 134 | 135 | out = glom(range(10), Iter().limit(3)) 136 | assert list(out) == [0, 1, 2] 137 | assert repr(Iter().limit(3)) == 'Iter().limit(3)' 138 | 139 | out = glom(range(5), Iter().limit(10)) 140 | assert list(out) == [0, 1, 2, 3, 4] 141 | 142 | # test broken args 143 | with pytest.raises(TypeError): 144 | Iter().slice(1, 2, 3, 4) 145 | 146 | 147 | def test_while(): 148 | cnt = count() 149 | out = glom(cnt, Iter().takewhile(lambda x: x < 3)) 150 | assert list(out) == [0, 1, 2] 151 | assert next(cnt) == 4 152 | assert repr(Iter().takewhile(T.a) == 'Iter().takewhile(T.a)') 153 | 154 | range_iter = iter(range(7)) 155 | out = glom(range_iter, Iter().dropwhile(lambda x: x < 3 or x > 5)) 156 | assert list(out) == [3, 4, 5, 6] # 6 still here despite the x>5 above 157 | 158 | out = glom(range(10), Iter().dropwhile(lambda x: x >= 0).limit(10)) 159 | assert list(out) == [] 160 | 161 | out = glom(range(8), Iter().dropwhile((T.bit_length(), lambda x: x < 3))) 162 | assert list(out) == [4, 5, 6, 7] 163 | assert repr(Iter().dropwhile(T.a) == 'Iter().dropwhile(T.a)') 164 | 165 | 166 | def test_iter_composition(): 167 | int_list = list(range(10)) 168 | out = glom(int_list, (Iter(), Iter(), list)) 169 | assert out == int_list 170 | 171 | out = glom([int_list] * 3, Iter(Iter(lambda x: x % 4)).flatten().unique()) 172 | assert list(out) == [0, 1, 2, 3] 173 | 174 | 175 | def test_faulty_iterate(): 176 | glommer = Glommer() 177 | 178 | def bad_iter(obj): 179 | raise RuntimeError('oops') 180 | 181 | glommer.register(str, iterate=bad_iter) 182 | 183 | with pytest.raises(TypeError): 184 | glommer.glom('abc', (Iter(), list)) 185 | 186 | 187 | def test_first(): 188 | spec = Iter().first(T.imag) 189 | 190 | target = iter([1, 2, 3j, 4]) 191 | out = glom(target, spec) 192 | assert out == 3j 193 | assert next(target) == 4 194 | assert repr(spec) == '(Iter(), First(T.imag))' 195 | 196 | spec = Iter().first(T.imag, default=0) 197 | target = iter([1, 2, 4]) 198 | out = glom(target, spec) 199 | assert out == 0 200 | assert repr(spec) == '(Iter(), First(T.imag, default=0))' 201 | 202 | 203 | def test_all(): 204 | int_iter = iter(range(10)) 205 | 206 | out = glom(int_iter, Iter().all()) 207 | assert out == list(range(10)) 208 | assert next(int_iter, None) is None 209 | assert repr(Iter().all()) == 'Pipe(Iter(), list)' 210 | -------------------------------------------------------------------------------- /glom/test/test_target_types.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import glom 4 | from glom import Glommer, PathAccessError, UnregisteredTarget 5 | from glom.core import TargetRegistry 6 | 7 | 8 | class A: 9 | pass 10 | 11 | class B: 12 | pass 13 | 14 | class C(A): 15 | pass 16 | 17 | class D(B): 18 | pass 19 | 20 | class E(C, D, A): 21 | pass 22 | 23 | class F(E): 24 | pass 25 | 26 | 27 | def test_types_leave_one_out(): 28 | ALL_TYPES = [A, B, C, D, E, F] 29 | for cur_t in ALL_TYPES: 30 | 31 | treg = TargetRegistry(register_default_types=False) 32 | 33 | treg.register(object, get=lambda: object) 34 | for t in ALL_TYPES: 35 | if t is cur_t: 36 | continue 37 | treg.register(t, get=(lambda t: lambda: t)(t)) 38 | 39 | obj = cur_t() 40 | assert treg.get_handler('get', obj)() == obj.__class__.mro()[1] 41 | 42 | if cur_t is E: 43 | assert treg.get_handler('get', obj)() is C # sanity check 44 | 45 | return 46 | 47 | 48 | def test_types_bare(): 49 | glommer = Glommer(register_default_types=False) 50 | 51 | treg = glommer.scope[TargetRegistry] 52 | assert treg._get_closest_type(object(), treg._op_type_tree.get('get', {})) is None 53 | 54 | # test that bare glommers can't glom anything 55 | with pytest.raises(UnregisteredTarget) as exc_info: 56 | glommer.glom(object(), {'object_repr': '__class__.__name__'}) 57 | assert repr(exc_info.value) == "UnregisteredTarget('get', , OrderedDict(), ('__class__',))" 58 | assert str(exc_info.value).find( 59 | "glom() called without registering any types for operation 'get'." 60 | " see glom.register() or Glommer's constructor for details.") != -1 61 | 62 | with pytest.raises(UnregisteredTarget, match='without registering') as exc_info: 63 | glommer.glom([{'hi': 'hi'}], ['hi']) 64 | assert not exc_info.value.type_map 65 | 66 | glommer.register(object, get=getattr) 67 | glommer.register(dict, get=dict.__getitem__, exact=True) 68 | 69 | # check again that registering object for 'get' doesn't change the 70 | # fact that we don't have iterate support yet 71 | with pytest.raises(UnregisteredTarget) as exc_info: 72 | glommer.glom({'test': [{'hi': 'hi'}]}, ('test', ['hi'])) 73 | # feel free to update the "(at ['test'])" part to improve path display 74 | assert ( 75 | "target type 'list' not registered for 'iterate', " 76 | "expected one of registered types: (dict)" in str(exc_info.value)) 77 | return 78 | 79 | 80 | def test_invalid_register(): 81 | glommer = Glommer() 82 | with pytest.raises(TypeError): 83 | glommer.register(1) 84 | return 85 | 86 | 87 | def test_exact_register(): 88 | glommer = Glommer(register_default_types=False) 89 | 90 | class BetterList(list): 91 | pass 92 | 93 | glommer.register(BetterList, iterate=iter, exact=True) 94 | 95 | expected = [0, 2, 4] 96 | value = glommer.glom(BetterList(range(3)), [lambda x: x * 2]) 97 | assert value == expected 98 | 99 | with pytest.raises(UnregisteredTarget): 100 | glommer.glom(list(range(3)), ['unused']) 101 | 102 | return 103 | 104 | 105 | def test_duck_register(): 106 | class LilRanger: 107 | def __init__(self): 108 | self.lil_list = list(range(5)) 109 | 110 | def __iter__(self): 111 | return iter(self.lil_list) 112 | 113 | glommer = Glommer(register_default_types=False) 114 | 115 | target = LilRanger() 116 | 117 | with pytest.raises(UnregisteredTarget): 118 | float_range = glommer.glom(target, [float]) 119 | 120 | glommer.register(LilRanger) 121 | 122 | float_range = glommer.glom(target, [float]) 123 | 124 | assert float_range == [0.0, 1.0, 2.0, 3.0, 4.0] 125 | 126 | glommer = Glommer() # now with just defaults 127 | float_range = glommer.glom(target, [float]) 128 | assert float_range == [0.0, 1.0, 2.0, 3.0, 4.0] 129 | 130 | 131 | def test_bypass_getitem(): 132 | target = list(range(3)) * 3 133 | 134 | with pytest.raises(PathAccessError): 135 | glom.glom(target, 'count') 136 | 137 | res = glom.glom(target, lambda list_obj: list_obj.count(1)) 138 | 139 | assert res == 3 140 | 141 | 142 | def test_iter_set(): 143 | some_ints = set(range(5)) 144 | some_floats = glom.glom(some_ints, [float]) 145 | 146 | assert sorted(some_floats) == [0.0, 1.0, 2.0, 3.0, 4.0] 147 | 148 | # now without defaults 149 | glommer = Glommer(register_default_types=False) 150 | glommer.register(set, iterate=iter) 151 | some_floats = glom.glom(some_ints, [float]) 152 | 153 | assert sorted(some_floats) == [0.0, 1.0, 2.0, 3.0, 4.0] 154 | 155 | 156 | def test_iter_str(): 157 | # check that strings are not iterable by default, one of the most 158 | # common sources of bugs 159 | glom_buddy = 'kurt' 160 | 161 | with pytest.raises(UnregisteredTarget): 162 | glom.glom(glom_buddy, {'name': [glom_buddy]}) 163 | 164 | # also check that someone can override this 165 | 166 | glommer = Glommer() 167 | glommer.register(str, iterate=iter) 168 | res = glommer.glom(glom_buddy, {'name_chars_for_some_reason': [str]}) 169 | assert len(res['name_chars_for_some_reason']) == 4 170 | 171 | # the better way, for any dissenter reading this 172 | 173 | assert glom.glom(glom_buddy, {'name_chars': list}) == {'name_chars': ['k', 'u', 'r', 't']} 174 | 175 | # and for the really passionate: how about making strings 176 | # non-iterable and just giving them a .chars() method that returns 177 | # a list of single-character strings. 178 | 179 | 180 | def test_default_scope_register(): 181 | # just hit it to make sure it exists, it behaves exactly like Glommer.register 182 | glom.register(type, exact=False) 183 | 184 | 185 | def test_faulty_iterate(): 186 | glommer = Glommer() 187 | 188 | def bad_iter(obj): 189 | raise RuntimeError('oops') 190 | 191 | glommer.register(str, iterate=bad_iter) 192 | 193 | with pytest.raises(TypeError): 194 | glommer.glom({'a': 'fail'}, ('a', {'chars': [str]})) 195 | 196 | 197 | def test_faulty_op_registration(): 198 | treg = TargetRegistry() 199 | 200 | with pytest.raises(TypeError, match="text name, not:"): 201 | treg.register_op(None, len) 202 | with pytest.raises(TypeError, match="callable, not:"): 203 | treg.register_op('fake_op', object()) 204 | 205 | class NewType: 206 | pass 207 | 208 | def _autodiscover_raise(type_obj): 209 | raise Exception('noperino') 210 | 211 | with pytest.raises(TypeError, match="noperino"): 212 | treg.register_op('fake_op', _autodiscover_raise) 213 | 214 | assert 'fake_op' not in treg._op_auto_map 215 | 216 | # check op with no autodiscovery 217 | treg.register_op('lol', exact=True) 218 | lol_type_map = treg.get_type_map('lol') 219 | assert all([v is False for v in lol_type_map.values()]) 220 | 221 | # check op reregistration, this time not exact 222 | assert not treg._op_type_tree.get('lol') 223 | treg.register_op('lol', exact=False) 224 | assert treg._op_type_tree.get('lol') 225 | 226 | 227 | def _autodiscover_faulty_return(type_obj): 228 | return 'hideeho' 229 | 230 | with pytest.raises(TypeError, match="hideeho"): 231 | treg.register_op('fake_op', _autodiscover_faulty_return) 232 | 233 | def _autodiscover_sneaky(type_obj): 234 | # works with default registrations, but fails later on sets and frozensets 235 | if type_obj is set: 236 | return 'this should have been False or a callable, but was intentionally a string' 237 | if type_obj is frozenset: 238 | raise ValueError('this should have been False or a callable, but was intentionally a ValueError') 239 | return False 240 | 241 | treg.register_op('sneak', _autodiscover_sneaky) 242 | 243 | with pytest.raises(TypeError, match="intentionally a string"): 244 | treg.register(set) 245 | with pytest.raises(TypeError, match="intentionally a ValueError"): 246 | treg.register(frozenset) 247 | 248 | return 249 | 250 | 251 | def test_reregister_type(): 252 | treg = TargetRegistry() 253 | 254 | class NewType: 255 | pass 256 | 257 | treg.register(NewType, op=lambda obj: obj) 258 | 259 | obj = NewType() 260 | handler = treg.get_handler('op', obj) 261 | 262 | assert handler(obj) == obj 263 | 264 | # assert no change in reregistering same 265 | treg.register(NewType, op=lambda obj: obj) 266 | handler = treg.get_handler('op', obj) 267 | assert handler(obj) == obj 268 | 269 | # assert change in reregistering new 270 | treg.register(NewType, op=lambda obj: obj.__class__.__name__) 271 | handler = treg.get_handler('op', obj) 272 | assert handler(obj) == 'NewType' 273 | -------------------------------------------------------------------------------- /glom/test/test_tutorial.py: -------------------------------------------------------------------------------- 1 | from glom import glom, tutorial 2 | from glom.tutorial import Contact, Email 3 | 4 | def test_tutorial_data(): 5 | assert 4 <= len(tutorial.CONTACTS) < 10 6 | 7 | 8 | def test_tutorial(): 9 | target = {'a': {'b': {'c': 'd'}}} 10 | 11 | val = target['a']['b']['c'] 12 | 13 | res = glom(target, 'a.b.c') 14 | 15 | assert res == val 16 | 17 | contact = Contact('Julian', emails=[Email('julian@sunnyvaletrailerpark.info')]) 18 | contact.save() 19 | assert Contact.objects.get(contact_id=contact.id) is contact 20 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | doctest_optionflags=NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS 3 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | attrs>=19.2.0 2 | boltons>=20.2.0 3 | coverage<=7.2.7 # can unpin when dropping py37 4 | face>=20.1.1 5 | pytest>=6.2.5 6 | tox>=3.7.0 7 | PyYAML>=6.0.1 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.7 3 | # by the following command: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | attrs==24.2.0 8 | # via -r requirements.in 9 | boltons==24.1.0 10 | # via 11 | # -r requirements.in 12 | # face 13 | cachetools==5.5.0 14 | # via tox 15 | chardet==5.2.0 16 | # via tox 17 | colorama==0.4.6 18 | # via tox 19 | coverage==7.2.7 20 | # via -r requirements.in 21 | distlib==0.3.9 22 | # via virtualenv 23 | exceptiongroup==1.2.2 24 | # via pytest 25 | face==24.0.0 26 | # via -r requirements.in 27 | filelock==3.12.2 28 | # via 29 | # tox 30 | # virtualenv 31 | importlib-metadata==6.7.0 32 | # via 33 | # attrs 34 | # pluggy 35 | # pytest 36 | # tox 37 | # virtualenv 38 | iniconfig==2.0.0 39 | # via pytest 40 | packaging==24.0 41 | # via 42 | # pyproject-api 43 | # pytest 44 | # tox 45 | platformdirs==4.0.0 46 | # via 47 | # tox 48 | # virtualenv 49 | pluggy==1.2.0 50 | # via 51 | # pytest 52 | # tox 53 | pyproject-api==1.5.3 54 | # via tox 55 | pytest==7.4.4 56 | # via -r requirements.in 57 | pyyaml==6.0.1 58 | # via -r requirements.in 59 | tomli==2.0.1 60 | # via 61 | # pyproject-api 62 | # pytest 63 | # tox 64 | tox==4.8.0 65 | # via -r requirements.in 66 | typing-extensions==4.7.1 67 | # via 68 | # importlib-metadata 69 | # platformdirs 70 | # tox 71 | virtualenv==20.26.6 72 | # via tox 73 | zipp==3.15.0 74 | # via importlib-metadata 75 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import os 3 | 4 | from setuptools import setup 5 | 6 | __author__ = 'Mahmoud Hashemi and Kurt Rose' 7 | __contact__ = 'mahmoud@hatnote.com' 8 | __url__ = 'https://github.com/mahmoud/glom' 9 | 10 | 11 | def import_path(module_name, path): 12 | spec = importlib.util.spec_from_file_location(module_name, path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | return module 16 | 17 | 18 | CUR_PATH = os.path.abspath(os.path.dirname(__file__)) 19 | _version_mod_path = os.path.join(CUR_PATH, 'glom', '_version.py') 20 | _version_mod = import_path('_version', _version_mod_path) 21 | __version__ = _version_mod.__version__ 22 | 23 | 24 | open_kwarg = {} 25 | 26 | with open('README.md', encoding='utf8') as read_me: 27 | long_description = read_me.read() 28 | 29 | setup(name='glom', 30 | version=__version__, 31 | description="A declarative object transformer and formatter, for conglomerating nested data.", 32 | long_description=long_description, 33 | long_description_content_type='text/markdown', 34 | author=__author__, 35 | author_email=__contact__, 36 | url=__url__, 37 | project_urls={ 38 | 'Documentation': 'https://glom.readthedocs.io/en/latest/', 39 | }, 40 | packages=['glom', 'glom.test'], 41 | install_requires=['boltons>=19.3.0', 'attrs', 'face>=20.1.1'], 42 | extras_require={ 43 | 'toml': ['tomli; python_version<"3.11"'], 44 | 'yaml': ['PyYAML'], 45 | }, 46 | entry_points={'console_scripts': ['glom = glom.cli:console_main']}, 47 | include_package_data=True, 48 | zip_safe=False, 49 | platforms='any', 50 | license_files=['LICENSE'], 51 | classifiers=[ 52 | 'Topic :: Utilities', 53 | 'Intended Audience :: Developers', 54 | 'Topic :: Software Development :: Libraries', 55 | 'Development Status :: 5 - Production/Stable', 56 | 'Programming Language :: Python :: 3.7', 57 | 'Programming Language :: Python :: 3.8', 58 | 'Programming Language :: Python :: 3.9', 59 | 'Programming Language :: Python :: 3.10', 60 | 'Programming Language :: Python :: 3.11', 61 | 'Programming Language :: Python :: 3.12', 62 | 'Programming Language :: Python :: Implementation :: CPython', 63 | 'Programming Language :: Python :: Implementation :: PyPy', 64 | 'License :: OSI Approved :: BSD License', 65 | ] 66 | ) 67 | 68 | """ 69 | A brief checklist for release: 70 | 71 | * tox 72 | * git commit (if applicable) 73 | * Bump glom/_version.py off of -dev 74 | * git commit -a -m "bump version for vx.y.z release" 75 | * write CHANGELOG 76 | * bump docs/conf.py version 77 | * git commit 78 | * rm -rf dist/* 79 | * python setup.py sdist bdist_wheel 80 | * twine upload dist/* 81 | * git tag -a vx.y.z -m "brief summary" 82 | * bump glom/_version.py onto n+1 dev 83 | * git commit 84 | * git push 85 | 86 | NB: if dropping support for a python version, bump the pyupgrade argument in tox and run syntax-upgrade tox env. 87 | 88 | """ 89 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37,py38,py39,py310,py311,py312,pypy3,coverage-report,packaging 3 | 4 | [testenv] 5 | changedir = .tox 6 | deps = -rrequirements.txt 7 | commands = coverage run --parallel --rcfile {toxinidir}/.tox-coveragerc -m pytest -vv --doctest-modules {envsitepackagesdir}/glom {posargs} 8 | 9 | [testenv:coverage-report] 10 | changedir = .tox 11 | deps = coverage 12 | commands = coverage combine --rcfile {toxinidir}/.tox-coveragerc 13 | coverage xml --rcfile {toxinidir}/.tox-coveragerc 14 | coverage report --rcfile {toxinidir}/.tox-coveragerc 15 | coverage html --rcfile {toxinidir}/.tox-coveragerc -d {toxinidir}/htmlcov 16 | 17 | 18 | [testenv:packaging] 19 | changedir = {toxinidir} 20 | deps = 21 | check-manifest==0.50 22 | commands = 23 | check-manifest 24 | 25 | [testenv:syntax-upgrade] 26 | changedir = {toxinidir} 27 | deps = 28 | flynt 29 | pyupgrade 30 | commands = 31 | flynt ./glom 32 | python -c "import glob; import subprocess; [subprocess.run(['pyupgrade', '--py37-plus', f]) for f in glob.glob('./glom/**/*.py', recursive=True)]" --------------------------------------------------------------------------------