├── .github
└── workflows
│ └── tests.yaml
├── .gitignore
├── .readthedocs.yaml
├── .tox-coveragerc
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── SECURITY.md
├── docs
├── Makefile
├── _static
│ ├── comet.png
│ └── comet_multi.png
├── _templates
│ └── page.html
├── api.rst
├── by_analogy.rst
├── cli.rst
├── conf.py
├── custom_spec_types.rst
├── debugging.rst
├── faq.rst
├── grouping.rst
├── index.rst
├── make.bat
├── matching.rst
├── modes.rst
├── mutation.rst
├── outreach.md
├── requirements-rtd.txt
├── snippets.rst
├── streaming.rst
└── tutorial.rst
├── glom
├── __init__.py
├── __main__.py
├── _version.py
├── cli.py
├── core.py
├── grouping.py
├── matching.py
├── mutation.py
├── reduction.py
├── streaming.py
├── test
│ ├── __init__.py
│ ├── data
│ │ ├── test_invalid.toml
│ │ ├── test_invalid.yaml
│ │ ├── test_valid.toml
│ │ └── test_valid.yaml
│ ├── perf_report.py
│ ├── test_basic.py
│ ├── test_check.py
│ ├── test_cli.py
│ ├── test_error.py
│ ├── test_fill.py
│ ├── test_grouping.py
│ ├── test_match.py
│ ├── test_mutation.py
│ ├── test_path_and_t.py
│ ├── test_reduction.py
│ ├── test_scope_vars.py
│ ├── test_snippets.py
│ ├── test_spec.py
│ ├── test_streaming.py
│ ├── test_target_types.py
│ └── test_tutorial.py
└── tutorial.py
├── pytest.ini
├── requirements.in
├── requirements.txt
├── setup.py
└── tox.ini
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
1 | name: Tests
2 | on:
3 | push:
4 | paths-ignore:
5 | - "docs/**"
6 | - "*.md"
7 | - "*.rst"
8 | pull_request:
9 | paths-ignore:
10 | - "docs/**"
11 | - "*.md"
12 | - "*.rst"
13 | jobs:
14 | tests:
15 | name: ${{ matrix.name }}
16 | runs-on: ${{ matrix.os }}
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | include:
21 | - { name: Linux, python: "3.12", os: ubuntu-latest, tox: py312 }
22 | - { name: Windows, python: "3.12", os: windows-latest, tox: py312 }
23 | - { name: Mac, python: "3.12", os: macos-latest, tox: py312 }
24 | - { name: "3.11", python: "3.11", os: ubuntu-latest, tox: py311 }
25 | - { name: "3.10", python: "3.10", os: ubuntu-latest, tox: py310 }
26 | - { name: "3.9", python: "3.9", os: ubuntu-latest, tox: py39 }
27 | - { name: "3.8", python: "3.8", os: ubuntu-latest, tox: py38 }
28 | - { name: "3.7", python: "3.7", os: ubuntu-22.04, tox: py37 }
29 | - { name: "PyPy3", python: "pypy-3.9", os: ubuntu-latest, tox: pypy3 }
30 | steps:
31 | - uses: actions/checkout@v4
32 | - uses: actions/setup-python@v4
33 | with:
34 | python-version: ${{ matrix.python }}
35 | - name: update pip
36 | run: |
37 | pip install -U wheel
38 | pip install -U setuptools
39 | python -m pip install -U pip
40 | - name: get pip cache dir
41 | id: pip-cache
42 | run: echo "::set-output name=dir::$(pip cache dir)"
43 | - name: cache pip
44 | uses: actions/cache@v3
45 | with:
46 | path: ${{ steps.pip-cache.outputs.dir }}
47 | key: pip|${{ runner.os }}|${{ matrix.python }}|${{ hashFiles('setup.py') }}|${{ hashFiles('requirements/*.txt') }}
48 | - run: pip install tox
49 | - run: tox -e ${{ matrix.tox }},coverage-report
50 | - name: "Upload coverage to Codecov"
51 | uses: "codecov/codecov-action@v3"
52 | with:
53 | fail_ci_if_error: true
54 | files: ./.tox/coverage.xml
55 | token: ${{ secrets.CODECOV_TOKEN }}
56 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | docs/_build
2 | tmp.py
3 | htmlcov/
4 | .coverage.*
5 |
6 | *.py[cod]
7 | .pytest_cache
8 | venv*
9 |
10 |
11 | # emacs
12 | *~
13 | ._*
14 | .\#*
15 | \#*\#
16 |
17 | # C extensions
18 | *.so
19 |
20 | # Packages
21 | *.egg
22 | *.egg-info
23 | dist
24 | build
25 | eggs
26 | parts
27 | bin
28 | var
29 | sdist
30 | develop-eggs
31 | .installed.cfg
32 | lib
33 | lib64
34 |
35 | # Installer logs
36 | pip-log.txt
37 |
38 | # Unit test / coverage reports
39 | .coverage
40 | .tox
41 | nosetests.xml
42 |
43 | # Translations
44 | *.mo
45 |
46 | # Mr Developer
47 | .mr.developer.cfg
48 | .project
49 | .pydevproject
50 |
51 | # Vim
52 | *.sw[op]
53 |
54 | .cache/
55 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.10"
13 |
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 | configuration: docs/conf.py
17 |
18 | # If using Sphinx, optionally build your docs in additional formats such as PDF
19 | # formats:
20 | # - pdf
21 |
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 | install:
25 | - requirements: docs/requirements-rtd.txt
26 | - method: pip
27 | path: .
--------------------------------------------------------------------------------
/.tox-coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source =
4 | glom
5 | ../glom
6 | omit =
7 | */flycheck_*.py
8 | */chainmap_backport.py
9 | */perf_report.py
10 |
11 | [paths]
12 | source =
13 | ../glom
14 | */lib/python*/site-packages/glom
15 | */Lib/site-packages/glom
16 | */pypy/site-packages/glom
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2018, Mahmoud Hashemi
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are
7 | met:
8 |
9 | * Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above
13 | copyright notice, this list of conditions and the following
14 | disclaimer in the documentation and/or other materials provided
15 | with the distribution.
16 |
17 | * The names of the contributors may not be used to endorse or
18 | promote products derived from this software without specific
19 | prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE CHANGELOG.md tox.ini requirements.txt requirements-rtd.txt .coveragerc Makefile pytest.ini .tox-coveragerc
2 | exclude TODO.md codecov.yml .readthedocs.yaml requirements.in
3 | global-exclude flycheck_*
4 |
5 | graft glom/test/data
6 | graft docs
7 | prune docs/_build
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # glom
2 |
3 | *Restructuring data, the Python way*
4 |
5 |
6 |
7 |
8 |
9 |
10 | Real applications have real data, and real data nests. Objects inside
11 | of objects inside of lists of objects.
12 |
13 | glom is a new and powerful way to handle real-world data, featuring:
14 |
15 | * Path-based access for nested data structures
16 | * Readable, meaningful error messages
17 | * Declarative data transformation, using lightweight, Pythonic specifications
18 | * Built-in data exploration and debugging features
19 |
20 | All of that and more, available as a [fully-documented][rtd],
21 | pure-Python package, tested on Python 3.7+, as well as
22 | PyPy3. Installation is as easy as:
23 |
24 | ```
25 | pip install glom
26 | ```
27 |
28 | And when you install glom, you also get [the `glom` command-line
29 | interface][cli_rtd], letting you experiment at the console, but never limiting
30 | you to shell scripts:
31 |
32 | ```
33 | Usage: glom [FLAGS] [spec [target]]
34 |
35 | Command-line interface to the glom library, providing nested data access and data
36 | restructuring with the power of Python.
37 |
38 | Flags:
39 |
40 | --help / -h show this help message and exit
41 | --target-file TARGET_FILE path to target data source (optional)
42 | --target-format TARGET_FORMAT
43 | format of the source data (json, python, toml,
44 | or yaml) (defaults to 'json')
45 | --spec-file SPEC_FILE path to glom spec definition (optional)
46 | --spec-format SPEC_FORMAT format of the glom spec definition (json, python,
47 | python-full) (defaults to 'python')
48 | --indent INDENT number of spaces to indent the result, 0 to disable
49 | pretty-printing (defaults to 2)
50 | --debug interactively debug any errors that come up
51 | --inspect interactively explore the data
52 |
53 | ```
54 |
55 | Anything you can do at the command line readily translates to Python
56 | code, so you've always got a path forward when complexity starts to
57 | ramp up.
58 |
59 |
60 | ## Examples
61 | #### Without glom
62 | ```python
63 | >>> data = {'a': {'b': {'c': 'd'}}}
64 | >>> data['a']['b']['c']
65 | 'd'
66 | >>> data2 = {'a': {'b': None}}
67 | >>> data2['a']['b']['c']
68 | Traceback (most recent call last):
69 | ...
70 | TypeError: 'NoneType' object is not subscriptable
71 | ```
72 |
73 | #### With glom
74 | ```python
75 | >>> glom(data, 'a.b.c')
76 | 'd'
77 | >>> glom(data2, 'a.b.c')
78 | Traceback (most recent call last):
79 | ...
80 | PathAccessError: could not access 'c', index 2 in path Path('a', 'b', 'c'), got error: ...
81 | ```
82 |
83 | ## Learn more
84 |
85 |
86 |
87 | If all this seems interesting, continue exploring glom below:
88 |
89 | * [glom Tutorial][tutorial]
90 | * [Full API documentation at Read the Docs][rtd]
91 | * [Original announcement blog post (2018-05-09)][glom_announce]
92 | * [Frequently Asked Questions][faq]
93 | * [PyCon 2018 Lightning Talk (2018-05-11)][pycon_talk]
94 |
95 | All of the links above are overflowing with examples, but should you
96 | find anything about the docs, or glom itself, lacking, [please submit
97 | an issue][gh_issues]!
98 |
99 | [rtd]: https://glom.readthedocs.io
100 | [cli_rtd]: http://glom.readthedocs.io/en/latest/cli.html
101 | [tutorial]: https://glom.readthedocs.io/en/latest/tutorial.html
102 | [faq]: https://glom.readthedocs.io/en/latest/faq.html
103 | [glom_announce]: https://sedimental.org/glom_restructured_data.html
104 | [gh_issues]: https://github.com/mahmoud/glom/issues/
105 | [pycon_talk]: https://www.youtube.com/watch?v=bTAFl8P2DkE&t=18m07s
106 |
107 | In the meantime, just remember: When you've got nested data, glom it! ☄️
108 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | By default we only support the latest version for all bugfixes, though we're open to discussion if there's evidence of wider deployment.
6 | [Libraries.io statistics](https://libraries.io/pypi/glom/usage) can be useful in assessing exposure, though we acknowledge there are private pools not represented by public stats.
7 |
8 | ## Reporting a Vulnerability
9 |
10 | You can use GitHub to privately report a vulnerability [here](https://github.com/mahmoud/glom/security/advisories),
11 | or if you do not have a GitHub account, contact the repository owner via the email on the About section of the website linked from their profile page.
12 | At the time of writing: https://sedimental.org/about.html
13 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = glom
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/_static/comet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mahmoud/glom/c225e2abeb234be7119911b96b4378cc9d8d6478/docs/_static/comet.png
--------------------------------------------------------------------------------
/docs/_static/comet_multi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mahmoud/glom/c225e2abeb234be7119911b96b4378cc9d8d6478/docs/_static/comet_multi.png
--------------------------------------------------------------------------------
/docs/_templates/page.html:
--------------------------------------------------------------------------------
1 | {% extends "!page.html" %}
2 | {% block menu %}
3 | {{ super() }}
4 |
5 | {% endblock %}
6 |
--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
1 | Core ``glom`` API
2 | =================
3 |
4 | .. automodule:: glom.core
5 |
6 | .. seealso::
7 |
8 | As the glom API grows, we've refactored the docs into separate
9 | domains. The core API is below. More specialized types can also be
10 | found in the following docs:
11 |
12 | .. hlist::
13 | :columns: 2
14 |
15 | * :doc:`mutation`
16 | * :doc:`streaming`
17 | * :doc:`grouping`
18 | * :doc:`matching`
19 |
20 | Longtime glom docs readers: thanks in advance for reporting/fixing
21 | any broken links you may find.
22 |
23 | .. contents:: Contents
24 | :local:
25 |
26 |
27 | .. _glom-func:
28 |
29 | The ``glom`` Function
30 | ---------------------
31 |
32 | Where it all happens. The reason for the season. The eponymous
33 | function, :func:`~glom.glom`.
34 |
35 | .. autofunction:: glom.glom
36 |
37 | Basic Specifiers
38 | ----------------
39 |
40 | Basic glom specifications consist of ``dict``, ``list``, ``tuple``,
41 | ``str``, and ``callable`` objects. However, as data calls for more
42 | complicated interactions, ``glom`` provides specialized specifier
43 | types that can be used with the basic set of Python builtins.
44 |
45 |
46 | .. autoclass:: glom.Path
47 | .. autoclass:: glom.Val
48 | .. autoclass:: glom.Spec
49 |
50 | .. _advanced-specifiers:
51 |
52 | .. seealso::
53 |
54 | Note that many of the Specifier types previously mentioned here
55 | have moved into their own docs, among them:
56 |
57 | .. hlist::
58 | :columns: 2
59 |
60 | * :doc:`mutation`
61 | * :doc:`streaming`
62 | * :doc:`grouping`
63 | * :doc:`matching`
64 |
65 | Object-Oriented Access and Method Calls with T
66 | ----------------------------------------------
67 |
68 | glom's shortest-named feature may be its most powerful.
69 |
70 | .. autodata:: glom.T
71 |
72 |
73 | Defaults with Coalesce
74 | ----------------------
75 |
76 | Data isn't always where or what you want it to be. Use these
77 | specifiers to declare away overly branchy procedural code.
78 |
79 | .. autoclass:: glom.Coalesce
80 |
81 | .. autodata:: glom.SKIP
82 | .. autodata:: glom.STOP
83 |
84 |
85 | Calling Callables with Invoke
86 | -----------------------------
87 |
88 | .. versionadded:: 19.10.0
89 |
90 | From calling functions to constructing objects, it's hardly Python if
91 | you're not invoking callables. By default, single-argument functions
92 | work great on their own in glom specs. The function gets passed the
93 | target and it just works:
94 |
95 | >>> glom(['1', '3', '5'], [int])
96 | [1, 3, 5]
97 |
98 | Zero-argument and multi-argument functions get a lot trickier,
99 | especially when more than one of those arguments comes from the
100 | target, thus the :class:`Invoke` spec.
101 |
102 | .. autoclass:: glom.Invoke
103 | :members:
104 |
105 | Alternative approach to functions: Call
106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
107 |
108 | An earlier, more primitive approach to callables in glom was the Call
109 | specifier type.
110 |
111 | .. warning::
112 |
113 | Given superiority of its successor, :class:`Invoke`,
114 | the :class:`Call` type may be deprecated in a future release.
115 |
116 | .. autoclass:: glom.Call
117 |
118 |
119 | Self-Referential Specs
120 | ----------------------
121 |
122 | Sometimes nested data repeats itself, either recursive structure or
123 | just through redundancy.
124 |
125 | .. autoclass:: glom.Ref
126 |
127 | .. _scope:
128 |
129 | The ``glom`` Scope
130 | ------------------
131 |
132 | Sometimes data transformation involves more than a single target and
133 | spec. For those times, glom has a *scope* system designed to manage
134 | additional state.
135 |
136 | Basic usage
137 | ~~~~~~~~~~~
138 |
139 | On its surface, the glom scope is a dictionary of extra values that
140 | can be passed in to the top-level glom call. These values can then be
141 | addressed with the **S** object, which behaves
142 | similarly to the :data:`~glom.T` object.
143 |
144 | Here's an example case, counting the occurrences of a value in the
145 | target, using the scope:
146 |
147 | >>> count_spec = T.count(S.search)
148 | >>> glom(['a', 'c', 'a', 'b'], count_spec, scope={'search': 'a'})
149 | 2
150 |
151 | Note how **S** supports attribute-style dot-access for its keys. For
152 | keys which are not valid attribute names, key-style access is also
153 | supported.
154 |
155 | .. note::
156 |
157 | glom itself uses certain keys in the scope to manage internal
158 | state. Consider the namespace of strings, integers, builtin types,
159 | and other common Python objects open for your usage. Read
160 | :doc:`the custom spec doc` to learn about more
161 | advanced, reserved cases.
162 |
163 | Updating the scope - ``S()`` & ``A``
164 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
165 |
166 | glom's scope isn't only set once when the top-level :func:`glom`
167 | function is called. It's dynamic and updatable.
168 |
169 | If your use case requires saving a value from one part of the target
170 | for usage elsewhere, then **S** will allow you to save values
171 | to the scope::
172 |
173 | >>> target = {'data': {'val': 9}}
174 | >>> spec = (S(value=T['data']['val']), {'val': S['value']})
175 | >>> glom(target, spec)
176 | {'val': 9}
177 |
178 | Any keyword arguments to the **S** will have their values evaluated as
179 | a spec, with the result being saved to the keyword argument name in
180 | the scope.
181 |
182 | When only the target is being assigned, you can use the **A** as a
183 | shortcut::
184 |
185 | >>> target = {'data': {'val': 9}}
186 | >>> spec = ('data.val', A.value, {'val': S.value})
187 | >>> glom(target, spec)
188 | {'val': 9}
189 |
190 | **A** enables a shorthand which assigns the current target to a
191 | location in the scope.
192 |
193 |
194 | Sensible saving - ``Vars`` & ``S.globals``
195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 |
197 | Of course, glom's scopes do not last forever. Much like function calls
198 | in Python, new child scopes can see and read values in parent
199 | scopes. When a child spec saves a new value to the scope, it's lost
200 | when the child spec completes.
201 |
202 | If you need values to be saved beyond a spec's local scope, the best
203 | way to do that is to create a :class:`~glom.Vars` object in a common
204 | ancestor scope. :class:`~glom.Vars` acts as a mutable namespace where
205 | child scopes can store state and have it persist beyond their local
206 | scope. Choose a location in the spec such that all involved child
207 | scopes can see and share the value.
208 |
209 | .. note::
210 |
211 | glom precreates a *global* :class:`~glom.Vars` object at
212 | ``S.globals``. Any values saved there will be accessible
213 | throughout that given :func:`~glom.glom` call::
214 |
215 | >>> last_spec = ([A.globals.last], S.globals.last)
216 | >>> glom([3, 1, 4, 1, 5], last_spec)
217 | 5
218 |
219 | While not shared across calls, most of the same care prescribed
220 | about using global state still applies.
221 |
222 | .. autoclass:: glom.Vars
223 |
224 |
225 | Core Exceptions
226 | ---------------
227 |
228 | Not all data is going to match specifications. Luckily, glom errors
229 | are designed to be as readable and actionable as possible.
230 |
231 | All glom exceptions inherit from :exc:`GlomError`, described below,
232 | along with other core exception types. For more details about handling
233 | and debugging exceptions, see ":doc:`debugging`".
234 |
235 | .. autoclass:: glom.PathAccessError
236 |
237 | .. autoclass:: glom.CoalesceError
238 |
239 | .. autoclass:: glom.UnregisteredTarget
240 |
241 | .. autoclass:: glom.BadSpec
242 |
243 | .. autoclass:: glom.GlomError
244 |
245 |
246 | .. _setup-and-registration:
247 |
248 | Setup and Registration
249 | ----------------------
250 |
251 | When it comes to targets, :func:`~glom.glom()` will operate on the
252 | vast majority of objects out there in Python-land. However, for that
253 | very special remainder, glom is readily extensible!
254 |
255 | .. autofunction:: glom.register
256 | .. autofunction:: glom.register_op
257 | .. autoclass:: glom.Glommer
258 |
--------------------------------------------------------------------------------
/docs/by_analogy.rst:
--------------------------------------------------------------------------------
1 | ``glom`` by Analogy
2 | ===================
3 |
4 | ``glom`` is pure Python, and you don't need to know anything but
5 | Python to use it effectively.
6 |
7 | Still, most everyone who encounters ``glom`` for the first time finds
8 | analogies to tools they already know. Whether SQL, list
9 | comprehensions, or HTML templates, there seems to be no end to the
10 | similarities. Many of them intentional!
11 |
12 | While ``glom`` is none of those tools, and none of those tools are ``glom``, a
13 | little comparison doesn't hurt. This document collects analogies to
14 | help guide understanding along.
15 |
16 |
17 | Similarity to list comprehensions
18 | ---------------------------------
19 |
20 | One of the key inspirations for ``glom`` was the humble list
21 | comprehension, one of my favorite Python features.
22 |
23 | List comprehensions make your code look like its output, and that goes
24 | a long way in readability. ``glom`` itself does list processing with
25 | square brackets like ``[lambda x: x % 2]``, which actually makes it
26 | more like a list comp and the old ``filter()`` function.
27 |
28 | ``glom``'s list processing differs in two ways:
29 |
30 | * Required use of a callable or other ``glom`` spec, to enable deferred processing.
31 | * Ability to return :data:`~glom.SKIP`, which can exclude items from a list.
32 |
33 |
34 | Similarity to templating (Jinja, Django, Mustache)
35 | --------------------------------------------------
36 |
37 | ``glom`` is a lot like templating engines, including modern formatters
38 | like gofmt, but with all the format affordances distilled out. glom
39 | doesn't just work on HTML, XML, JSON, or even just strings.
40 |
41 | ``glom`` works on objects, including functions, dicts, and all other
42 | primitives. In fact, it would be safe to call ``glom`` an "object
43 | templating" system.
44 |
45 | A lot of insights for ``glom`` came (and continue to come) from writing ashes_.
46 |
47 | .. _ashes: https://github.com/mahmoud/ashes
48 |
49 |
50 | Similarity to SQL and GraphQL
51 | -----------------------------
52 |
53 | In some ways, ``glom`` is a Python query language for Python
54 | objects. But thanks to its restructuring capabilities, it's much more
55 | than SQL or GraphQL.
56 |
57 | With SQL the primary abstraction is an table, or table-like
58 | resultset. With GraphQL, the analogous answer to this is, of course,
59 | the graph.
60 |
61 | glom goes further, not only offering the Python object tree as a
62 | graph, but also allowing you to change the shape of the data,
63 | restructuring it while fetching and transforming values, which GraphQL
64 | only minimally supports, and SQL barely supports at all. Table targets
65 | get you table outputs.
66 |
67 | Similiarity to validation (jsonschema, schema, cerberus)
68 | --------------------------------------------------------
69 |
70 | ``glom`` is a generalized form of intake libraries `including validation`_.
71 | We definitely took `schema`_
72 | becoming successful as a sign that others shared our appetite for
73 | succinct, declarative Python datastructure manipulation.
74 |
75 | More importantly, these libraries seem to excel at structuring and
76 | parsing data, and don't solve much on the other end. Translating
77 | valid, structured objects like database models to JSON serializable
78 | objects is glom's forté.
79 |
80 | .. _schema: matching.rst
81 | .. _including validation: https://github.com/mahmoud/glom/issues/7
82 |
83 | Similarity to jq
84 | ----------------
85 |
86 | :doc:`The CLI ` that ``glom`` packs is very similar in function
87 | to jq_, except it uses Python as its query language, instead of making
88 | its own. Most importantly glom gives you `a programmatic way forward`_.
89 |
90 | .. _jq: https://stedolan.github.io/jq/
91 | .. _a programmatic way forward: http://sedimental.org/glom_restructured_data.html#library-first-then-cli
92 |
93 | Similarity to XPath/XSLT
94 | ------------------------
95 |
96 | These hallowed technologies of yore, they were way ahead of the game
97 | in many ways. glom intentionally avoids their purity and verbosity,
98 | while trying to take as much inspiration as possible from their
99 | function.
100 |
101 | Others
102 | ------
103 |
104 | Beyond what's listed above, several other packages and language
105 | features exist in glom's ballpark, including:
106 |
107 | * `Specter (for Clojure) `_
108 | * `Lenses (for Haskell) `_
109 | * `Dig (for Ruby Hashmaps) `_
110 |
111 | If you know of other useful comparisons, `let us know
112 | `_!
113 |
--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
1 | ``glom`` Command-Line Interface
2 | ===============================
3 |
4 | .. note::
5 |
6 | glom's CLI is usable and useful, but keep in mind glom is a library *first*.
7 |
8 |
9 | All the power of ``glom``, without even opening your text editor!
10 |
11 | .. code-block:: text
12 |
13 | $ glom --help
14 | Usage: /home/mahmoud/bin/glom [FLAGS] [spec [target]]
15 |
16 | Command-line interface to the glom library, providing nested data
17 | access and data restructuring with the power of Python.
18 |
19 | Flags:
20 |
21 | --help / -h show this help message and exit
22 | --target-file TARGET_FILE path to target data source (optional)
23 | --target-format TARGET_FORMAT
24 | format of the source data (json, python, toml,
25 | or yaml) (defaults to 'json')
26 | --spec-file SPEC_FILE path to glom spec definition (optional)
27 | --spec-format SPEC_FORMAT format of the glom spec definition (json, python,
28 | python-full) (defaults to 'python')
29 | --indent INDENT number of spaces to indent the result, 0 to disable
30 | pretty-printing (defaults to 2)
31 | --debug interactively debug any errors that come up
32 | --inspect interactively explore the data
33 |
34 | The ``glom`` command will also read from standard input (stdin) and
35 | process that data as the *target*.
36 |
37 | Here's an example, filtering a GitHub API example to something much
38 | more flat and readable:
39 |
40 | .. code-block:: bash
41 |
42 | $ pip install glom
43 | $ curl -s https://api.github.com/repos/mahmoud/glom/events \
44 | | glom '[{"type": "type", "date": "created_at", "user": "actor.login"}]'
45 |
46 | This yields:
47 |
48 | .. code-block:: javascript
49 |
50 | [
51 | {
52 | "date": "2018-05-09T03:39:44Z",
53 | "type": "WatchEvent",
54 | "user": "asapzacy"
55 | },
56 | {
57 | "date": "2018-05-08T22:51:46Z",
58 | "type": "WatchEvent",
59 | "user": "CameronCairns"
60 | },
61 | {
62 | "date": "2018-05-08T03:27:27Z",
63 | "type": "PushEvent",
64 | "user": "mahmoud"
65 | },
66 | {
67 | "date": "2018-05-08T03:27:27Z",
68 | "type": "PullRequestEvent",
69 | "user": "mahmoud"
70 | }
71 | ...
72 | ]
73 |
74 | By default the CLI *target* is JSON and the *spec* is a Python
75 | literal.
76 |
77 | .. note::
78 |
79 | Because the default CLI spec is a Python literal, there are no
80 | lambdas and other Python/glom constructs available. These features
81 | are gated behind the ``--spec-format python-full`` option to avoid
82 | code injection and other unwanted consequences.
83 |
84 | The ``--debug`` and ``--inspect`` flags are useful for exploring
85 | data. Note that they are not available when piping data through
86 | stdin. Save that API response to a file and use ``--target-file`` to
87 | do your interactive experimenting.
88 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 |
15 | import os
16 | import sys
17 | import sphinx
18 | from pprint import pprint
19 |
20 | # If extensions (or modules to document with autodoc) are in another directory,
21 | # add these directories to sys.path here. If the directory is relative to the
22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
23 | CUR_PATH = os.path.dirname(os.path.abspath(__file__))
24 | PROJECT_PATH = os.path.abspath(CUR_PATH + '/../')
25 | PACKAGE_PATH = os.path.abspath(CUR_PATH + '/../glom/')
26 | sys.path.insert(0, PROJECT_PATH)
27 | sys.path.insert(0, PACKAGE_PATH)
28 |
29 | pprint(os.environ)
30 |
31 |
32 | # -- Project information -----------------------------------------------------
33 |
34 | project = u'glom'
35 | copyright = u'2024, Mahmoud Hashemi'
36 | author = u'Mahmoud Hashemi'
37 |
38 | # The short X.Y version
39 | version = u'24.11'
40 | # The full version, including alpha/beta/rc tags
41 | release = u'24.11.0'
42 |
43 |
44 | todo_add_to_theme_to_keep_menus_expanded = """
45 |
46 |
47 |
48 |
49 |
50 | """
51 |
52 |
53 | # -- General configuration ---------------------------------------------------
54 |
55 | # If your documentation needs a minimal Sphinx version, state it here.
56 | #
57 | # needs_sphinx = '1.0'
58 |
59 | # Add any Sphinx extension module names here, as strings. They can be
60 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
61 | # ones.
62 | extensions = [
63 | 'sphinx.ext.autodoc',
64 | 'sphinx.ext.intersphinx',
65 | 'sphinx.ext.ifconfig',
66 | 'sphinx.ext.viewcode',
67 | ]
68 |
69 | # Read the Docs is version 1.2 as of writing
70 | if sphinx.version_info[:2] < (1, 3):
71 | extensions.append('sphinxcontrib.napoleon')
72 | else:
73 | extensions.append('sphinx.ext.napoleon')
74 |
75 |
76 | # Add any paths that contain templates here, relative to this directory.
77 | templates_path = ['_templates']
78 |
79 | # The suffix(es) of source filenames.
80 | # You can specify multiple suffix as a list of string:
81 | #
82 | # source_suffix = ['.rst', '.md']
83 | source_suffix = '.rst'
84 |
85 | # The master toctree document.
86 | master_doc = 'index'
87 |
88 | # The language for content autogenerated by Sphinx. Refer to documentation
89 | # for a list of supported languages.
90 | #
91 | # This is also used if you do content translation via gettext catalogs.
92 | # Usually you set "language" from the command line for these cases.
93 | language = None
94 |
95 | # List of patterns, relative to source directory, that match files and
96 | # directories to ignore when looking for source files.
97 | # This pattern also affects html_static_path and html_extra_path .
98 | exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store']
99 |
100 | # The name of the Pygments (syntax highlighting) style to use.
101 | pygments_style = 'default'
102 |
103 |
104 | # -- Options for HTML output -------------------------------------------------
105 |
106 | # The theme to use for HTML and HTML Help pages. See the documentation for
107 | # a list of builtin themes.
108 | #
109 |
110 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
111 |
112 | if on_rtd:
113 | html_theme = 'sphinx_rtd_theme'
114 | else: # only import and set the theme if we're building docs locally
115 | import sphinx_rtd_theme
116 | html_theme = 'sphinx_rtd_theme'
117 | html_theme_path = ['_themes', sphinx_rtd_theme.get_html_theme_path()]
118 |
119 | html_theme_options = {
120 | 'navigation_depth': 4,
121 | 'collapse_navigation': False,
122 | }
123 |
124 | # Add any paths that contain custom static files (such as style sheets) here,
125 | # relative to this directory. They are copied after the builtin static files,
126 | # so a file named "default.css" will overwrite the builtin "default.css".
127 | html_static_path = ['_static']
128 |
129 | # Custom sidebar templates, must be a dictionary that maps document names
130 | # to template names.
131 | #
132 | # The default sidebars (for documents that don't match any pattern) are
133 | # defined by theme itself. Builtin themes are using these templates by
134 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
135 | # 'searchbox.html']``.
136 | #
137 | # html_sidebars = {}
138 |
139 |
140 | # -- Options for HTMLHelp output ---------------------------------------------
141 |
142 | # Output file base name for HTML help builder.
143 | htmlhelp_basename = 'glomdoc'
144 |
145 |
146 | # -- Options for LaTeX output ------------------------------------------------
147 |
148 | latex_elements = {
149 | # The paper size ('letterpaper' or 'a4paper').
150 | #
151 | # 'papersize': 'letterpaper',
152 |
153 | # The font size ('10pt', '11pt' or '12pt').
154 | #
155 | # 'pointsize': '10pt',
156 |
157 | # Additional stuff for the LaTeX preamble.
158 | #
159 | # 'preamble': '',
160 |
161 | # Latex figure (float) alignment
162 | #
163 | # 'figure_align': 'htbp',
164 | }
165 |
166 | # Grouping the document tree into LaTeX files. List of tuples
167 | # (source start file, target name, title,
168 | # author, documentclass [howto, manual, or own class]).
169 | latex_documents = [
170 | (master_doc, 'glom.tex', u'glom Documentation',
171 | u'Mahmoud Hashemi', 'manual'),
172 | ]
173 |
174 |
175 | # -- Options for manual page output ------------------------------------------
176 |
177 | # One entry per manual page. List of tuples
178 | # (source start file, name, description, authors, manual section).
179 | man_pages = [
180 | (master_doc, 'glom', u'glom Documentation',
181 | [author], 1)
182 | ]
183 |
184 |
185 | # -- Options for Texinfo output ----------------------------------------------
186 |
187 | # Grouping the document tree into Texinfo files. List of tuples
188 | # (source start file, target name, title, author,
189 | # dir menu entry, description, category)
190 | texinfo_documents = [
191 | (master_doc, 'glom', u'glom Documentation',
192 | author, 'glom', 'One line description of project.',
193 | 'Miscellaneous'),
194 | ]
195 |
196 |
197 | # -- Extension configuration -------------------------------------------------
198 |
199 | # -- Options for intersphinx extension ---------------------------------------
200 |
201 | # Example configuration for intersphinx: refer to the Python standard library.
202 | intersphinx_mapping = {'https://docs.python.org/': None}
203 |
--------------------------------------------------------------------------------
/docs/custom_spec_types.rst:
--------------------------------------------------------------------------------
1 | Writing a custom Specifier Type
2 | ===============================
3 |
4 | While glom comes with a lot of built-in features, no library can ever
5 | encompass all data manipulation operations.
6 |
7 | To cover every case out there, glom provides a way to extend its
8 | functionality with your own data handling hooks. This document
9 | explains glom's execution model and how to integrate with it when
10 | writing a custom Specifier Type.
11 |
12 | When to write a Specifier Type
13 | ------------------------------
14 |
15 | ``glom`` has always supported arbitrary callables, like so:
16 |
17 | .. code::
18 |
19 | glom({'nums': range(5)}, ('nums', sum))
20 | # 10
21 |
22 | With this built-in extensibility, what does a glom specifier type add?
23 |
24 | Custom specifier types are useful when you want to:
25 |
26 | 1. Perform validation at spec construction time
27 | 2. Enable users to interact with new target types and operations
28 | 3. Improve readability and reusability of your data transformations
29 | 4. Temporarily change the glom runtime behavior
30 |
31 | If you're just building a one-off spec for transforming your own data,
32 | there's no reason to reach for an extension. ``glom``'s extension API
33 | is easy, but a good old Python ``lambda`` is even easier.
34 |
35 | Building your Specifier Type
36 | ----------------------------
37 |
38 | Any object instance with a ``glomit`` method can participate in a glom
39 | call. By way of example, here is a programming cliché implemented as a
40 | glom specifier type, with comments referencing notes below.
41 |
42 | .. code::
43 |
44 | class HelloWorldSpec(object): # 1
45 | def glomit(self, target, scope): # 2
46 | print("Hello, world!")
47 | return target
48 |
49 | And now let's put it to use!
50 |
51 | .. code::
52 |
53 | from glom import glom
54 |
55 | target = {'example': 'object'}
56 |
57 | glom(target, HelloWorldSpec()) # 3
58 | # prints "Hello, world!" and returns target
59 |
60 | There are a few things to note from this example:
61 |
62 | 1. Specifier types do not need to inherit from any type. Just
63 | implement the ``glomit`` method.
64 | 2. The ``glomit`` signature takes two parameters, ``target`` and
65 | ``scope``. The ``target`` should be familiar from using
66 | :func:`~glom.glom`, and it's the ``scope`` that makes glom really
67 | tick.
68 | 3. By convention, instances are used in specs passed to
69 | :func:`~glom.glom` calls, not the types themselves.
70 |
71 | .. _glom_scope:
72 |
73 | The glom Scope
74 | --------------
75 |
76 | The :ref:`glom scope` is also used to expose runtime state to the specifier
77 | type. Let's take a look inside a scope:
78 |
79 | .. code::
80 |
81 | from glom import glom
82 | from pprint import pprint
83 |
84 | class ScopeInspectorSpec(object):
85 | def glomit(self, target, scope):
86 | pprint(dict(scope))
87 | return target
88 |
89 | glom(target, ScopeInspectorSpec())
90 |
91 | Which gives us:
92 |
93 | .. code::
94 |
95 | {T: {'example': 'object'},
96 | : ,
97 | : [],
98 | : <__main__.ScopeInspectorSpec object at 0x7f208bf58690>,
99 | : None,
100 | : }
101 |
102 | As you can see, all glom's core workings are present, all under familiar keys:
103 |
104 | * The current *target*, accessible using :data:`~glom.T` as a scope key.
105 | * The current *spec*, accessible under :class:`~glom.Spec`.
106 | * The current *path*, accessible under :class:`~glom.Path`.
107 | * The ``TargetRegistry``, used to :ref:`register new operations and target types `.
108 | * Even the ``glom()`` function itself, filed under :func:`~glom.glom`.
109 |
110 | To learn how to use the scope's powerful features idiomatically, let's
111 | reimplement at one of glom's standard specifier types.
112 |
113 | Specifiers by example
114 | ---------------------
115 |
116 | While we've technically created a couple of extensions above, let's
117 | really dig into the features of the scope using an example.
118 |
119 | :class:`~glom.Sum` is a standard extension that ships with glom, and
120 | it works like this:
121 |
122 | .. code::
123 |
124 | from glom import glom, Sum
125 |
126 | glom([1, 2, 3], Sum())
127 | # 6
128 |
129 | The version below does not have as much error handling, but reproduces
130 | all the same basic principles. This version of ``Sum()`` code also
131 | contains comments with references to explanatory notes below.
132 |
133 | .. code::
134 |
135 | from glom import glom, Path, T
136 | from glom.core import TargetRegistry, UnregisteredTarget # 1
137 |
138 | class Sum(object):
139 | def __init__(self, subspec=T, init=int): # 2
140 | self.subspec = subspec
141 | self.init = init
142 |
143 | def glomit(self, target, scope):
144 | if self.subspec is not T:
145 | target = scope[glom](target, self.subspec, scope) # 3
146 |
147 | try:
148 | # 4
149 | iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
150 | except UnregisteredTarget as ut:
151 | # 5
152 | raise TypeError('can only %s on iterable targets, not %s type (%s)'
153 | % (self.__class__.__name__, type(target).__name__, ut))
154 |
155 | try:
156 | iterator = iterate(target)
157 | except Exception as e:
158 | raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
159 | % (target.__class__.__name__, Path(*scope[Path]), e))
160 |
161 | return self._sum(iterator)
162 |
163 | def _sum(self, iterator): # 6
164 | ret = self.init()
165 |
166 | for v in iterator:
167 | ret += v
168 |
169 | return ret
170 |
171 | Now, let's take a look at the interesting parts, referencing the comments above:
172 |
173 | 1. Specifier types often reference the :class:`TargetRegistry`,
174 | which is not part of the top-level ``glom`` API, and must be
175 | imported from ``glom.core``. More on this in #4.
176 | 2. Specifier type ``__init__`` methods may take as many or as few
177 | arguments as desired, but many glom specifier types take a first
178 | parameter of a *subspec*, meant to be fetched right before the
179 | actual specifier's operation. This helps readability of
180 | glomspecs. See :class:`~glom.Coalesce` for an example of this
181 | idiom.
182 | 3. Specifier types should not reference the
183 | :func:`~glom.glom()` function directly, instead use the
184 | :func:`~glom.glom` function as a key to the ``scope`` map to get the
185 | currently active ``glom()``. This ensures that the extension type is
186 | compatible with advanced specifier types which override the
187 | ``glom()`` function.
188 | 4. To maximize compatiblity with new target types, ``glom`` allows
189 | :ref:`new types and operations to be registered
190 | ` with the ``TargetRegistry``. Specifier types
191 | should respect this by contextually fetching these standard
192 | operators as demonstrated above. At the time of writing, the
193 | primary operators used by glom itself are ``"get"``,
194 | ``"iterate"``, ``"keys"``, ``"assign"``, and ``"delete"``.
195 | 5. In the event that the current target does not support your
196 | Specifier type's desired operation, it's customary to raise a helpful
197 | error. Consider creating your own exception type and inheriting
198 | from :class:`~glom.GlomError`.
199 | 6. Specifier types may have other methods and members in addition to
200 | the primary ``glomit()`` method. This ``_sum()`` method
201 | implements most of the core of our custom specifier type.
202 |
203 | Check out the implementation of the real :class:`glom.Sum()` specifier for more details.
204 |
205 | Summing up
206 | ----------
207 |
208 | ``glom`` Specifier Types are more than just add-ons; the extension
209 | architecture is how most of ``glom`` itself is implemented. Build
210 | knowing that the paradigm is as powerful as anything built-in.
211 |
212 | If you need more examples, another simple one can be found in
213 | :ref:`this snippet `. ``glom``'s source code itself
214 | contains many specifiers more advanced than the above. Simply search
215 | the codebase for ``glomit()`` methods and you will find no shortage.
216 |
217 | Happy extending!
218 |
--------------------------------------------------------------------------------
/docs/debugging.rst:
--------------------------------------------------------------------------------
1 | Exceptions & Debugging
2 | ======================
3 |
4 | While glom works well when all goes as intended, it even shines when
5 | data doesn't match expectations. glom's error messages and exception
6 | hierarchy have been designed to maximize readability and
7 | debuggability. Read on for a listing of glom's exceptions and how to
8 | debug them.
9 |
10 | .. contents:: Contents
11 | :local:
12 |
13 | .. _exceptions:
14 |
15 | Exceptions
16 | ----------
17 |
18 | glom introduces a several new exception types designed to maximize
19 | readability and debuggability. Note that all these errors derive from
20 | :exc:`GlomError`, and are only raised from :func:`glom()` calls, not
21 | from spec construction or glom type registration. Those declarative
22 | and setup operations raise :exc:`ValueError`, :exc:`TypeError`, and
23 | other standard Python exceptions as appropriate.
24 |
25 | Here is a short list of links to all public exception types in glom.
26 |
27 | .. hlist::
28 | :columns: 3
29 |
30 | * :exc:`~glom.GlomError`
31 | * :exc:`~glom.PathAccessError`
32 | * :exc:`~glom.PathAssignError`
33 | * :exc:`~glom.PathDeleteError`
34 | * :exc:`~glom.CoalesceError`
35 | * :exc:`~glom.FoldError`
36 | * :exc:`~glom.MatchError`
37 | * :exc:`~glom.TypeMatchError`
38 | * :exc:`~glom.CheckError`
39 | * :exc:`~glom.UnregisteredTarget`
40 | * :exc:`~glom.BadSpec`
41 |
42 | .. _reading-exceptions:
43 |
44 | Reading a glom Exception
45 | ------------------------
46 |
47 | glom errors are regular Python exceptions, but may look a little
48 | different from other Python errors. Because glom is a data
49 | manipulation library, glom errors include a data traceback,
50 | interleaving spec and target data.
51 |
52 | For example, let's raise an error by glomming up some data that doesn't exist:
53 |
54 | .. code-block:: default
55 | :linenos:
56 |
57 | >>> target = {'planets': [{'name': 'earth', 'moons': 1}]}
58 | >>> glom(target, ('planets', ['rings']))
59 | Traceback (most recent call last):
60 | File "", line 1, in
61 | File "/home/mahmoud/projects/glom/glom/core.py", line 1787, in glom
62 | raise err
63 | glom.core.PathAccessError: error raised while processing, details below.
64 | Target-spec trace (most recent last):
65 | - Target: {'planets': [{'name': 'earth', 'moons': 1}]}
66 | - Spec: ('planets', ['rings'])
67 | - Spec: 'planets'
68 | - Target: [{'name': 'earth', 'moons': 1}]
69 | - Spec: ['rings']
70 | - Target: {'name': 'earth', 'moons': 1}
71 | - Spec: 'rings'
72 | glom.core.PathAccessError: could not access 'rings', part 0 of Path('rings'), got error: KeyError('rings')
73 |
74 | Let's step through this output:
75 |
76 |
77 | * Line **1**: We created a planet registry, similar to the one in the :doc:`tutorial`.
78 | * Line **2-3**: We try to get a listing of ``rings`` of all the planets. Instead, we get a Python traceback.
79 | * Line **7**: We see we have a :exc:`~glom.PathAccessError`.
80 | * Line **8-9**: The "target-spec trace", our data stack, begins. It always starts with the target data as it was passed in.
81 | * Line **10**: Next is the top-level spec, as passed in: ``('planets', ['rings'])``
82 | * Line **11**: glom takes the first part of the spec from line 9, ``'planets'``, to get the next target.
83 | * Line **12**: Because the spec on line 11 updated the current target, glom outputs it. When a spec is evaluated but the target value is unchanged, the target is skipped in the trace.
84 | * Line **14-15**: We get to the last two lines, which include the culprit target and spec
85 | * Line **16**: Finally, our familiar :exc:`~glom.PathAccessError` message,
86 | with more details about the error, including the original ``KeyError('rings')``.
87 |
88 | This view of glom evaluation answers many of the questions
89 | a developer or user would ask upon encountering the error:
90 |
91 | * What was the data?
92 | * Which part of the spec failed?
93 | * What was the original error?
94 |
95 | The data trace does this by peeling away at the target and spec until
96 | it hones in on the failure. Both targets and specs in traces are
97 | truncated to terminal width to maximize readability.
98 |
99 | .. note::
100 |
101 | If for some reason you need the full Python stack instead of the
102 | glom data traceback, pass ``glom_debug=True`` to the top-level glom
103 | call.
104 |
105 | .. _branched-exceptions:
106 |
107 | Reading Branched Exceptions
108 | ---------------------------
109 |
110 | Some glom spec types, like :class:`~glom.Coalesce` and
111 | :class:`~glom.Switch`, can try multiple specs in succession. These
112 | "branching" specs can also get multiple exceptions.
113 |
114 | Initially, debugging data for these branching specs was limited. But
115 | in v20.7.0, branching error trees were introduced, exposing
116 | information about every spec and target attempted before raising the
117 | final exception.
118 |
119 | All the exception reading advice in the ":ref:`reading-exceptions`"
120 | section applies, but there's a bit of extra formatting to visualize
121 | the error tree in the target-spec trace.
122 |
123 | Let's step line by line through a :class:`~glom.Coalesce` error tree:
124 |
125 | .. code-block:: default
126 | :linenos:
127 |
128 | >>> target = {'n': 'nope', 'xxx': {'z': {'v': 0}}}
129 | >>> glom(target, Coalesce(('xxx', 'z', 'n'), 'yyy'))
130 | Traceback (most recent call last):
131 | File "tmp.py", line 9, in _make_stack
132 | glom(target, spec)
133 | File "/home/mahmoud/projects/glom/glom/core.py", line 2029, in glom
134 | raise err
135 | glom.core.CoalesceError: error raised while processing, details below.
136 | Target-spec trace (most recent last):
137 | - Target: {'n': 'nope', 'xxx': {'z': {'v': 0}}}
138 | + Spec: Coalesce(('xxx', 'z', 'n'), 'yyy')
139 | |\ Spec: ('xxx', 'z', 'n')
140 | || Spec: 'xxx'
141 | || Target: {'z': {'v': 0}}
142 | || Spec: 'z'
143 | || Target: {'v': 0}
144 | || Spec: 'n'
145 | |X glom.core.PathAccessError: could not access 'n', part 0 of Path('n'), got error: KeyError('n')
146 | |\ Spec: 'yyy'
147 | |X glom.core.PathAccessError: could not access 'yyy', part 0 of Path('yyy'), got error: KeyError('yyy')
148 | glom.core.CoalesceError: no valid values found. Tried (('xxx', 'z', 'n'), 'yyy') and got (PathAccessError, PathAccessError) (at path ['xxx', 'z'])
149 |
150 | * Line **1-10**: Standard fare for glom use and error behavior, see ":ref:`reading-exceptions`"
151 | * Line **11**: We see a "**+**" when starting a branching spec. Each level of branch adds a "**|**" on the left to help track nesting level.
152 | * Line **12**: We see a "**\\**" indicating a new branch of the root branching spec.
153 | * Line **13-17**: Traversing downward as usual until...
154 | * Line **18**: We see an "**X**" indicating our first exception, causing the failure of this branch.
155 | * Line **19**: We see a "**\\**" which starts our next branch.
156 | * Line **20**: We see an "**X**" indicating our second and last exception, causing the failure of this branch.
157 | * Line **21**: The last line is our root level exception, dedented, same as any other glom error.
158 |
159 | Apart from the formatting, error branching doesn't change any other
160 | semantics of the glom exception being raised.
161 |
162 | .. _debugging:
163 |
164 | Debugging
165 | ---------
166 |
167 | Good error messages are great when the data has a problem, but what
168 | about when a spec is incorrect?
169 |
170 | Even the most carefully-constructed specifications eventually need
171 | debugging. If the error message isn't enough to fix your glom issues,
172 | that's where **Inspect** comes in.
173 |
174 | .. autoclass:: glom.Inspect
175 |
--------------------------------------------------------------------------------
/docs/faq.rst:
--------------------------------------------------------------------------------
1 | Frequently Asked Questions
2 | ==========================
3 |
4 | Paradigm shifts always raise a question or two.
5 |
6 | .. contents:: Contents
7 | :local:
8 |
9 | What does "glom" mean?
10 | ----------------------
11 |
12 | "glom" is short for "conglomerate", which means "gather into a compact
13 | form", coming from the Latin "glom-" meaning *ball*, like *globe*.
14 |
15 | glom can be used as a noun or verb. A developer might say, "I glommed
16 | together this API response." An astronomer might say, "these gloms of
17 | space dust are forming planets and comets."
18 |
19 | Got some data you need to transform? **glom it! ☄️**
20 |
21 | Any other glom terminology worth knowing?
22 | -----------------------------------------
23 |
24 | A couple of conventional terms that help navigate around glom's
25 | semantics:
26 |
27 | * **target** - ``glom`` operates on a variety of inputs, so we simply
28 | refer to the object being accessed (i.e., the first argument to
29 | ``glom()``) as the "target"
30 | * **spec** - *(aka "glomspec")* The accompanying template used to
31 | specify the structure and sources of the output.
32 | * **output** - The value retrieved or created and returned by
33 | ``glom()``.
34 |
35 | All of these can be seen in the conventional call to :func:`~glom.glom`::
36 |
37 | output = glom(target, spec)
38 |
39 | Nothing too wild, but these standard terms really do help clarify the
40 | complex situations ``glom`` was built to handle.
41 |
42 | What is glom's public API?
43 | --------------------------
44 |
45 | Obviously, the primary glom API is the ``glom()`` function
46 | itself. Beyond this, there's other functionality at various degrees of
47 | readiness, ranging from production to alpha within the ``glom``
48 | package. We try to keep the public API as production-ready as
49 | possible. That also means, if functionality is not public, it may
50 | change or disappear without advance notice or even a CHANGELOG entry.
51 |
52 | First, if it's not in the top-level ``glom`` package, it's not part of
53 | glom's public API. Another good indicator is that if a type or object
54 | is not in these glom docs, then it's not public.
55 |
56 | If functionality in the top-level package is not documented, please
57 | file an issue or pull request so we can get that sorted out. Thanks in
58 | advance!
59 |
60 | What's a convenience function?
61 | ------------------------------
62 |
63 | The primary entrypoint for glom is the ``glom()`` function, but over the years
64 | several other single-purpose functions were added, mostly for readability.
65 |
66 | If you see a function with the same name as a specifier type, but lowercased,
67 | that's a convenience function. Take :class:`~glom.Assign` and :func:`~glom.assign`
68 | as examples:
69 |
70 | .. code-block:: python
71 |
72 | glom({}, Assign('a'), 'b')
73 | # is equivalent to
74 | assign({}, 'a', 'b')
75 |
76 | At the time of writing, other convenience functions include :class:`~glom.delete`,
77 | :class:`~glom.flatten`, and :class:`~glom.merge`. Note that when performing multiple
78 | glom operations (access, assignment, delete, etc.), it's clearer and more efficient to
79 | create a spec and execute it with the :func:`~glom.glom` top-level function.
80 |
81 | Other glom tips?
82 | ----------------
83 |
84 | Just a few (for now):
85 |
86 | * Specs don't have to live in the glom call. You can put them
87 | anywhere. Commonly-used specs work as class attributes and globals.
88 | * Using glom's declarative approach does wonders for code coverage,
89 | much like `attrs`_ which goes great with ``glom``.
90 | * Advanced tips
91 | * glom is designed to support all of Python's built-ins as targets,
92 | and is readily extensible to other types and special handling, through
93 | :func:`~glom.register()`.
94 | * If you're trying to minimize global state, consider
95 | instantiating your own :class:`~glom.Glommer` object to
96 | encapsulate any type registration changes.
97 |
98 | If you've got more tips or patterns, `send them our way`_!
99 |
100 | .. _attrs: https://github.com/python-attrs/attrs
101 | .. _send them our way: https://github.com/mahmoud/glom/issues
102 |
103 | Why not just write more Python?
104 | -------------------------------
105 |
106 | The answer is more than just DRY ("Don't Repeat Yourself").
107 |
108 | Here on the glom team, we're big fans of Python. Have been for
109 | years. In fact, Python is one of a tiny handful of languages that
110 | could support something as powerful as glom.
111 |
112 | But not all Python code is the same. We built glom to replace the kind
113 | of Python that is about as un-Pythonic as code gets: simultaneously
114 | fluffy, but also fragile. Simple transformations requiring countless
115 | lines.
116 |
117 | Before glom, the "right" way to write this transformation code was
118 | verbose. Whether trying to fetch values nested within objects that may
119 | contain attributes set to ``None``, or performing a list comprehension
120 | which may raise an exception, the *correct* code was many lines of
121 | repetitious ``try-except`` blocks with a lot of hand-written exception
122 | messages.
123 |
124 | Written any more compactly, this Python would produce failures
125 | expressed in errors too low-level to associate with the higher-level
126 | transformation.
127 |
128 | So the glom-less code was hard to change, hard to debug, or
129 | both. ``glom`` specifications are none of the above, thanks to
130 | meaningful, high-level error messages, a :class:`a built-in debugging
131 | facility `, and a compact, composable design.
132 |
133 | In short, thanks to Python, glom can provide a Pythonic solution for
134 | those times when pure Python wasn't Pythonic enough.
135 |
136 | Should I use glom or remap?
137 | ---------------------------
138 |
139 | These days, you have a lot of choices when it comes to nested data manipulation.
140 | One choice is between glom and `remap`_, the recursive ``map()``.
141 | Given that the same people wrote both utilties, we recommend:
142 |
143 | * If you know the shape of the output ahead of time, then go with glom.
144 | * If your output shape is determined by the input, then use remap.
145 |
146 | Remap performs a full traversal of a nested data structure, walking it like a tree.
147 | In contrast, glom only goes where it's told by the spec.
148 |
149 | For example, imagine an error reporting service.
150 | Users send you an arbitrary dictionary of metadata related to the error.
151 | But you have a requirement that you don't store secrets.
152 |
153 | Remap is a great way to traverse that full structure,
154 | looking for all keys containing the substring "secret",
155 | replacing the associated value with "[REDACTED]".
156 | The output shape will be the same as the input shape.
157 |
158 | At the time of writing (2023), glom isn't designed for this use case.
159 |
160 | .. _remap: https://boltons.readthedocs.io/en/latest/iterutils.html#nested
161 |
162 | How does glom work?
163 | -------------------
164 |
165 | The core conceptual engine of glom is a very simple recursive loop. It
166 | could fit on a business card. OK maybe a postcard.
167 |
168 | In fact, here it is, in literate form, modified from this `early point
169 | in glom history`_:
170 |
171 | .. code-block:: python
172 |
173 | def glom(target, spec):
174 |
175 | # if the spec is a string or a Path, perform a deep-get on the target
176 | if isinstance(spec, (basestring, Path)):
177 | return _get_path(target, spec)
178 |
179 | # if the spec is callable, call it on the target
180 | elif callable(spec):
181 | return spec(target)
182 |
183 | # if the spec is a dict, assign the result of
184 | # the glom on the right to the field key on the left
185 | elif isinstance(spec, dict):
186 | ret = {}
187 | for field, subspec in spec.items():
188 | ret[field] = glom(target, subspec)
189 | return ret
190 |
191 | # if the spec is a list, run the spec inside the list on every
192 | # element in the list and return the new list
193 | elif isinstance(spec, list):
194 | subspec = spec[0]
195 | iterator = _get_iterator(target)
196 | return [glom(t, subspec) for t in iterator]
197 |
198 | # if the spec is a tuple of specs, chain the specs by running the
199 | # first spec on the target, then running the second spec on the
200 | # result of the first, and so on.
201 | elif isinstance(spec, tuple):
202 | res = target
203 | for subspec in spec:
204 | res = glom(res, subspec)
205 | return res
206 | else:
207 | raise TypeError('expected one of the above types')
208 |
209 |
210 | .. _early point in glom history: https://github.com/mahmoud/glom/blob/186757b47af3d33901df4bf715874b5f3c781d8f/glom/__init__.py#L74-L91
211 |
212 | Does Python need a null-coalescing operator?
213 | --------------------------------------------
214 |
215 | Not technically a glom question, but it is frequently_ asked_!
216 |
217 | `Null coalescing operators`_ traverse nested objects and return null
218 | (or ``None`` for us) on the first null or non-traversable object,
219 | depending on implementation.
220 |
221 | It's basically a compact way of doing a deep :func:`getattr()` with a
222 | default set to ``None``.
223 |
224 | Suffice to say that ``glom(target, T.a.b.c, default=None)`` achieves
225 | this with ease, but I still want to revisit the question, since it's
226 | part of what got me thinking about ``glom`` in the first place.
227 |
228 | First off, working in PayPal's SOA environment, my team dealt with
229 | literally tens of thousands of service objects, with object
230 | definitions (from other teams) nested so deep as to make an
231 | 80-character line length laughable.
232 |
233 | But null coalescing wouldn't have helped, because in most of those
234 | cases ``None`` wasn't what we needed. We needed a good, automatically
235 | generated error message when a deeply-nested field wasn't accessible. Not
236 | ``NoneType has no attribute 'x'``, but not plain old ``None`` either.
237 |
238 | To solve this, I wrote my share of deep-gets before ``glom``,
239 | including the open-source `boltons.iterutils.get_path()`_. For
240 | whatever reason, it took me years of usage to realize just how often
241 | the deep-gets were coupled with the other transformations that
242 | ``glom`` enables. Now, I can never go back to a simple deep-get.
243 |
244 | Another years-in-the-making observation, from my time doing JavaScript
245 | then PHP then Django templates: all were much more lax on typing than
246 | Python. Not because of a fierce belief in weak types, though. More
247 | because when you're templating, it's inherently safer to return a
248 | blank value on lookup failures. You're so close to text formats that
249 | this default achieves a pretty desirable result. While implicitly
250 | doing this isn't my cup of tea, and ``glom`` opts for explicit
251 | :class:`~glom.Coalesce` specifiers, this connection contributed to the
252 | concept of ``glom`` as an "object templating" system.
253 |
254 |
255 |
256 |
257 | .. _frequently: https://mail.python.org/pipermail/python-ideas/2015-September/036289.html
258 | .. _asked: https://mail.python.org/pipermail/python-ideas/2016-November/043517.html
259 | .. _Null coalescing operators: https://en.wikipedia.org/wiki/Null_coalescing_operator
260 | .. _boltons.iterutils.get_path(): http://boltons.readthedocs.io/en/latest/iterutils.html#boltons.iterutils.get_path
261 |
--------------------------------------------------------------------------------
/docs/grouping.rst:
--------------------------------------------------------------------------------
1 | Reduction & Grouping
2 | ====================
3 |
4 | This document contains glom techniques for transforming a collection
5 | of data to a smaller set, otherwise known as "grouping" or
6 | "reduction".
7 |
8 | Combining iterables with Flatten and Merge
9 | ------------------------------------------
10 |
11 | .. versionadded:: 19.1.0
12 |
13 | Got lists of lists? Sets of tuples? A sequence of dicts (but only want
14 | one)? Do you find yourself reaching for Python's builtin :func:`sum`
15 | and :func:`reduce`? To handle these situations and more, glom has five
16 | specifier types and two convenience functions:
17 |
18 | .. autofunction:: glom.flatten
19 |
20 | .. autoclass:: glom.Flatten
21 |
22 | .. autofunction:: glom.merge
23 |
24 | .. autoclass:: glom.Merge
25 |
26 | .. autoclass:: glom.Sum
27 |
28 | .. autoclass:: glom.Fold
29 |
30 | Exceptions
31 | ----------
32 |
33 | .. autoclass:: glom.FoldError
34 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | glom
2 | ====
3 |
4 | *Restructuring data, the Python way.*
5 |
6 | |release| |calver| |changelog|
7 |
8 | **glom** is a new approach to working with data in Python, featuring:
9 |
10 | * :ref:`Path-based access ` for nested structures
11 | * :ref:`Declarative data transformation ` using lightweight, Pythonic specifications
12 | * Readable, meaningful :ref:`error messages `
13 | * Built-in :ref:`debugging ` features
14 | * Plus, :doc:`deep assignment `, :doc:`streaming `, :doc:`data validation `, and *more*!
15 |
16 | While it may sound like a lot, glom's straightforward approach becomes
17 | second-nature very quickly. Start with the :doc:`tutorial`,
18 | or `try glom in your browser now`__!
19 |
20 | .. __: https://yak.party/glompad/#spec=%22a.b.c%22%0A&target=%7B%22a%22%3A+%7B%22b%22%3A+%7B%22c%22%3A+%22d%22%7D%7D%7D%0A&v=1
21 |
22 | Installation
23 | ------------
24 |
25 | glom is pure Python, and tested on Python 3.7+, as well as
26 | PyPy3. Installation is easy::
27 |
28 | pip install glom
29 |
30 | Then you're ready to get glomming!
31 |
32 | .. code-block:: python
33 |
34 | from glom import glom
35 |
36 | target = {'a': {'b': {'c': 'd'}}}
37 | glom(target, 'a.b.c') # returns 'd'
38 |
39 | There's much, much more to glom, check out the :doc:`tutorial` and :doc:`API reference`!
40 |
41 |
42 | *Just glom it! ☄️*
43 |
44 |
45 | .. |release| image:: https://img.shields.io/pypi/v/glom.svg
46 | :target: https://pypi.org/project/glom/
47 |
48 | .. |calver| image:: https://img.shields.io/badge/calver-YY.MM.MICRO-22bfda.svg
49 | :target: https://calver.org
50 |
51 | .. |changelog| image:: https://img.shields.io/badge/CHANGELOG-UPDATED-b84ad6.svg
52 | :target: https://github.com/mahmoud/glom/blob/master/CHANGELOG.md
53 |
54 |
55 | .. toctree::
56 | :maxdepth: 1
57 | :caption: Learning glom
58 |
59 | tutorial
60 | faq
61 | by_analogy
62 | snippets
63 | cli
64 |
65 | .. toctree::
66 | :maxdepth: 2
67 | :caption: API Reference
68 |
69 | api
70 | mutation
71 | streaming
72 | grouping
73 | matching
74 | debugging
75 |
76 | .. toctree::
77 | :maxdepth: 1
78 | :caption: Extending glom
79 |
80 | custom_spec_types
81 | modes
82 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=glom
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/docs/matching.rst:
--------------------------------------------------------------------------------
1 | Matching & Validation
2 | =====================
3 |
4 | .. automodule:: glom.matching
5 |
6 | .. contents:: Contents
7 | :local:
8 |
9 | Validation with Match
10 | ~~~~~~~~~~~~~~~~~~~~~
11 |
12 | For matching whole data structures, use a :class:`~glom.Match` spec.
13 |
14 | .. autoclass:: glom.Match
15 | :members:
16 |
17 | Optional and required ``dict`` key matching
18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 |
20 | Note that our four :class:`~glom.Match` rules above imply that
21 | :class:`object` is a match-anything pattern. Because
22 | ``isinstance(val, object)`` is true for all values in Python,
23 | ``object`` is a useful stopping case. For instance, if we wanted to
24 | extend an example above to allow additional keys and values in the
25 | user dict above we could add :class:`object` as a generic pass through::
26 |
27 | >>> target = [{'id': 1, 'email': 'alice@example.com', 'extra': 'val'}]
28 | >>> spec = Match([{'id': int, 'email': str, object: object}]))
29 | >>> assert glom(target, spec) == \\
30 | ... [{'id': 1, 'email': 'alice@example.com', 'extra': 'val'}]
31 | True
32 |
33 | The fact that ``{object: object}`` will match any dictionary exposes
34 | the subtlety in :class:`~glom.Match` dictionary evaluation.
35 |
36 | By default, value match keys are required, and other keys are
37 | optional. For example, ``'id'`` and ``'email'`` above are required
38 | because they are matched via ``==``. If either was not present, it
39 | would raise class:`~glom.MatchError`. class:`object` however is matched
40 | with func:`isinstance()`. Since it is not an value-match comparison,
41 | it is not required.
42 |
43 | This default behavior can be modified with :class:`~glom.Required`
44 | and :class:`~glom.Optional`.
45 |
46 | .. autoclass:: glom.Optional
47 |
48 | .. autoclass:: glom.Required
49 |
50 | ``M`` Expressions
51 | ~~~~~~~~~~~~~~~~~
52 |
53 | The most concise way to express validation and guards.
54 |
55 | .. autodata:: glom.M
56 |
57 | Boolean operators and matching
58 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
59 |
60 | While ``M`` is an easy way to construct expressions, sometimes a more
61 | object-oriented approach can be more suitable.
62 |
63 | .. autoclass:: glom.Or
64 |
65 | .. autoclass:: glom.And
66 |
67 | .. autoclass:: glom.Not
68 |
69 | String matching
70 | ~~~~~~~~~~~~~~~
71 |
72 | .. autoclass:: glom.Regex
73 |
74 | Control flow with ``Switch``
75 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
76 |
77 | Match becomes even more powerful when combined with the ability to
78 | branch spec execution.
79 |
80 | .. autoclass:: glom.Switch
81 |
82 | Exceptions
83 | ~~~~~~~~~~
84 |
85 | .. autoclass:: glom.MatchError
86 |
87 | .. autoclass:: glom.TypeMatchError
88 |
89 | Validation with Check
90 | ~~~~~~~~~~~~~~~~~~~~~
91 |
92 | .. warning::
93 |
94 | Given the suite of tools introduced with :class:`~glom.Match`, the
95 | :class:`Check` specifier type may be deprecated in a future
96 | release.
97 |
98 | .. autoclass:: glom.Check
99 |
100 | .. autoclass:: glom.CheckError
101 |
--------------------------------------------------------------------------------
/docs/modes.rst:
--------------------------------------------------------------------------------
1 | ``glom`` Modes
2 | ==============
3 |
4 | .. note::
5 |
6 | Be sure to read ":doc:`custom_spec_types`" before diving into the
7 | deep details below.
8 |
9 | A glom "mode" determines how Python built-in data structures are
10 | evaluated. Think of it like a dialect for how :class:`dict`,
11 | :class:`tuple`, :class:`list`, etc., are interpreted in a spec. Modes
12 | do not change the behavior of `T`, or many other core
13 | specifiers. Modes are one of the keys to keeping glom specs short and
14 | readable.
15 |
16 | A mode is used similar to a spec: whatever Python data structure is
17 | passed to the mode type constructor will be evaluated under that
18 | mode. Once set, the mode remains in place until it is overridden by
19 | another mode.
20 |
21 | glom only has a few modes:
22 |
23 | 1. :class:`~glom.Auto` - The default glom behavior, used for data
24 | transformation, with the spec acting as a template.
25 | 2. :class:`~glom.Fill` - A variant of the default transformation
26 | behavior; preferring to "fill" containers instead of
27 | iterating, chaining, etc.
28 | 3. :class:`~glom.Match` - Treats the spec as a pattern, checking
29 | that the target matches.
30 |
31 | Adding a new mode is relatively rare, but when it comes up this
32 | document includes relevant details.
33 |
34 |
35 | Writing custom Modes
36 | --------------------
37 |
38 | A mode is a spec which sets ``scope[MODE]`` to a function which
39 | accepts ``target``, ``spec``, and ``scope`` and returns a result, a
40 | signature very similar to the top-level :func:`~glom.glom` method
41 | itself.
42 |
43 | For example, here is an abbreviated version of the :class:`~glom.Fill`
44 | mode:
45 |
46 |
47 | .. code-block:: python
48 |
49 | class Fill(object):
50 | def __init__(self, spec):
51 | self.spec = spec
52 |
53 | def glomit(self, target, scope):
54 | scope[MODE] = _fill
55 | return scope[glom](target, self.spec, scope)
56 |
57 | def _fill(target, spec, scope):
58 | recurse = lambda val: scope[glom](target, val, scope)
59 | if type(spec) is dict:
60 | return {recurse(key): recurse(val)
61 | for key, val in spec.items()}
62 | if type(spec) in (list, tuple, set, frozenset):
63 | result = [recurse(val) for val in spec]
64 | if type(spec) is list:
65 | return result
66 | return type(spec)(result)
67 | if callable(spec):
68 | return spec(target)
69 | return spec
70 |
71 | Like any other :doc:`Specifier Type `, ``Fill`` has
72 | a ``glomit()`` method, and this method sets the ``MODE`` key in the
73 | :ref:`glom scope ` to our ``_fill`` function. The name
74 | itself doesn't matter, but the signature must match exactly:
75 | ``(target, spec, scope)``.
76 |
77 | As mentioned above, custom modes are relatively rare for glom. If you
78 | write one, `let us know `_!
79 |
--------------------------------------------------------------------------------
/docs/mutation.rst:
--------------------------------------------------------------------------------
1 | Assignment & Mutation
2 | =====================
3 |
4 | .. automodule:: glom.mutation
5 |
6 | .. contents:: Contents
7 | :local:
8 |
9 | Assignment
10 | ----------
11 |
12 | Deeply assign within an existing structure, given a path and a value.
13 |
14 | .. autofunction:: glom.assign
15 |
16 | .. autoclass:: glom.Assign
17 |
18 | Deletion
19 | --------
20 |
21 | Delete attributes from objects and keys from containers.
22 |
23 | .. autofunction:: glom.delete
24 |
25 | .. autoclass:: glom.Delete
26 |
27 |
28 | Exceptions
29 | ----------
30 |
31 | .. autoclass:: glom.PathAssignError
32 |
33 | .. autoclass:: glom.PathDeleteError
34 |
--------------------------------------------------------------------------------
/docs/outreach.md:
--------------------------------------------------------------------------------
1 | # glom outreach
2 |
3 | ## Interesting stack overflow questions to answer
4 |
5 | Some of these may be more amenable to remapping.
6 |
7 | 1. https://stackoverflow.com/questions/1602934/check-if-a-given-key-already-exists-in-a-dictionary 2,627,182
8 | 1. https://stackoverflow.com/questions/38987/how-to-merge-two-dictionaries-in-a-single-expression 1,391,000
9 | 1. https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists 1,388,600
10 | 1. https://stackoverflow.com/questions/4984647/accessing-dict-keys-like-an-attribute 128,380
11 | 1. https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys 75,500
12 | 1. https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python 66,390
13 | 1. https://stackoverflow.com/questions/2213923/removing-duplicates-from-a-list-of-lists 60,962
14 | 1. https://stackoverflow.com/questions/9285086/access-dict-key-and-return-none-if-doesnt-exist 33,140
15 | 1. https://stackoverflow.com/questions/10632839/transform-list-of-tuples-into-a-flat-list-or-a-matrix 23,128
16 | 1. https://stackoverflow.com/questions/16204076/pep8-compliant-deep-dictionary-access 2,647
17 | 1. https://stackoverflow.com/questions/21297475/set-a-value-deep-in-a-dict-dynamically 1,017
18 | 1. https://stackoverflow.com/questions/22377725/python-linq-like-methods 947
19 | 1. https://stackoverflow.com/questions/11515620/safely-access-objects-in-python-without-try-catch 368
20 | 1. https://stackoverflow.com/questions/48262059/recursively-flatten-nested-list-in-python 89
21 | 1. https://stackoverflow.com/questions/23106895/how-to-use-a-string-to-safely-access-deep-python-properties 55
22 |
23 | ## Completed answers
24 |
25 | If you're reading this, upvote!
26 |
27 | 1. https://stackoverflow.com/a/54656024/178013
28 | 1. https://stackoverflow.com/a/53354398/178013
29 | 1. https://stackoverflow.com/questions/53108624/using-python-module-glom-extract-irregular-nested-lists-into-a-flattened-list-o
30 | 1. https://stackoverflow.com/questions/54078102/get-value-of-nested-attribute-by-filtering-list-on-other-attribute-with-python-g
31 | 1. https://stackoverflow.com/questions/38220370/is-there-a-way-to-execute-jq-from-python/55017219#55017219 <- great answer!
32 |
--------------------------------------------------------------------------------
/docs/requirements-rtd.txt:
--------------------------------------------------------------------------------
1 | -r ../requirements.txt
2 | alabaster==0.7.13
3 | atomicwrites==1.4.1
4 | Babel==2.11.0
5 | certifi==2022.12.7
6 | charset-normalizer==3.0.1
7 | docutils==0.17.1
8 | idna==3.4
9 | imagesize==1.4.1
10 | Jinja2==3.1.2
11 | livereload==2.6.3
12 | MarkupSafe==2.1.1
13 | more-itertools==9.0.0
14 | Pygments==2.14.0
15 | pytz==2022.7.1
16 | requests==2.28.2
17 | six==1.16.0
18 | snowballstemmer==2.2.0
19 | Sphinx==5.3.0
20 | sphinx-autobuild==2021.3.14
21 | sphinx-rtd-theme==1.1.1
22 | sphinxcontrib-devhelp==1.0.2
23 | sphinxcontrib-htmlhelp==2.0.0
24 | sphinxcontrib-jsmath==1.0.1
25 | sphinxcontrib-qthelp==1.0.3
26 | sphinxcontrib-serializinghtml==1.1.5
27 | sphinxcontrib.applehelp==1.0.2
28 | toml==0.10.2
29 | tornado==6.2
30 | urllib3==1.26.14
31 |
--------------------------------------------------------------------------------
/docs/snippets.rst:
--------------------------------------------------------------------------------
1 | Examples & Snippets
2 | ===================
3 |
4 | glom can do a lot of things, in the right hands. This doc makes those
5 | hands yours, through sample code of useful building blocks and common
6 | glom tasks.
7 |
8 | .. contents:: Contents
9 | :local:
10 |
11 | .. note::
12 |
13 | All samples below assume ``from glom import glom, T, Call`` and any
14 | other dependencies.
15 |
16 | Reversing a Target
17 | ------------------
18 |
19 | Here are a couple ways to reverse the current target. The first uses
20 | basic Python builtins, the second uses the :data:`~glom.T` object.
21 |
22 |
23 | .. code-block:: python
24 |
25 | glom([1, 2, 3], (reversed, list))
26 | glom([1, 2, 3], T[::-1])
27 |
28 |
29 | Iteration Result as Tuple
30 | -------------------------
31 |
32 | The default glom iteration specifier returns a list, but it's easy to
33 | turn that list into a tuple. The following returns a tuple of
34 | absolute-valued integers:
35 |
36 |
37 | .. code-block:: python
38 |
39 | glom([-1, 2, -3], ([abs], tuple))
40 |
41 |
42 | Data-Driven Assignment
43 | ----------------------
44 |
45 | glom's dict specifier interprets the keys as constants. A different
46 | technique is required if the dict keys are part of the target data
47 | rather than spec.
48 |
49 |
50 | .. code-block:: python
51 |
52 | glom({1:2, 2:3}, Call(dict, args=(T.items(),)))
53 | glom({1:2, 2:3}, lambda t: dict(t.items()))
54 | glom({1:2, 2:3}, dict)
55 |
56 |
57 | Construct Instance
58 | ------------------
59 |
60 | A common use case is to construct an instance. In the most basic
61 | case, the default behavior on callable will suffice.
62 |
63 |
64 | The following converts a list of ints to a list of
65 | :class:`decimal.Decimal` objects.
66 |
67 |
68 | .. code-block:: python
69 |
70 | glom([1, 2, 3], [Decimal])
71 |
72 |
73 | If additional arguments are required, :class:`~glom.Call` or ``lambda``
74 | are good options.
75 |
76 | This converts a list to a collection.deque,
77 | while specifying a max size of 10.
78 |
79 |
80 | .. code-block:: python
81 |
82 | glom([1, 2, 3], Call(deque, args=[T, 10]))
83 | glom([1, 2, 3], lambda t: deque(t, 10))
84 |
85 |
86 | Filtered Iteration
87 | ------------------
88 | Sometimes in addition to stepping through an iterable,
89 | you'd like to omit some of the items from the result
90 | set all together. Here are two ways
91 | to filter the odd numbers from a list.
92 |
93 |
94 | .. code-block:: python
95 |
96 | glom([1, 2, 3, 4, 5, 6], lambda t: [i for i in t if i % 2])
97 | glom([1, 2, 3, 4, 5, 6], [lambda i: i if i % 2 else SKIP])
98 |
99 |
100 | The second approach demonstrates the use of ``glom.SKIP`` to
101 | back out of an execution.
102 |
103 | This can also be combined with :class:`~glom.Coalesce` to
104 | filter items which are missing sub-attributes.
105 |
106 | Here is an example of extracting the primary email from a group
107 | of contacts, skipping where the email is empty string, None,
108 | or the attribute is missing.
109 |
110 | .. code-block:: python
111 |
112 | glom(contacts, [Coalesce('primary_email.email', skip=('', None), default=SKIP)])
113 |
114 |
115 | Preserve Type
116 | -------------
117 | The iteration specifier will walk lists and tuples. In some cases it
118 | would be convenient to preserve the target type in the result type.
119 |
120 | This glomspec iterates over a tuple or list, adding one to each
121 | element, and uses :class:`~glom.T` to return a tuple or list depending
122 | on the target input's type.
123 |
124 |
125 | .. code-block:: python
126 |
127 | glom((1, 2, 3), (
128 | {
129 | "type": type,
130 | "result": [T + 1] # arbitrary operation
131 | }, T['type'](T['result'])))
132 |
133 |
134 | This demonstrates an advanced technique -- just as a tuple
135 | can be used to process sub-specs "in series", a dict
136 | can be used to store intermediate results while processing
137 | sub-specs "in parallel" so they can then be recombined later on.
138 |
139 |
140 | Automatic Django ORM type handling
141 | ----------------------------------
142 |
143 | In day-to-day Django ORM usage, Managers_ and QuerySets_ are
144 | everywhere. They work great with glom, too, but they work even better
145 | when you don't have to call ``.all()`` all the time. Enable automatic
146 | iteration using the following :meth:`~glom.register` technique:
147 |
148 | .. code-block:: python
149 |
150 | import glom
151 | import django.db.models
152 |
153 | glom.register(django.db.models.Manager, iterate=lambda m: m.all())
154 | glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())
155 |
156 | Call this in ``settings`` or somewhere similarly early in your
157 | application setup for the best results.
158 |
159 | .. _Managers: https://docs.djangoproject.com/en/2.0/topics/db/managers/
160 | .. _QuerySets: https://docs.djangoproject.com/en/2.0/ref/models/querysets/
161 |
162 |
163 | Filter Iterable
164 | ---------------
165 |
166 | An iteration specifier can filter items out by using
167 | :data:`~glom.SKIP` as the default of a :class:`~glom.Check` object.
168 |
169 |
170 | .. code-block:: python
171 |
172 | glom(['cat', 1, 'dog', 2], [Check(type=str, default=SKIP)])
173 | # ['cat', 'dog']
174 |
175 | You can also truncate the list at the first failing check by using
176 | :data:`~glom.STOP`.
177 |
178 | .. _lisp-style-if:
179 |
180 | Lisp-style If Extension
181 | -----------------------
182 |
183 | Any class with a glomit method will be treated as a spec by glom.
184 | As an example, here is a lisp-style If expression custom spec type:
185 |
186 | .. code-block:: python
187 |
188 | class If(object):
189 | def __init__(self, cond, if_, else_=None):
190 | self.cond, self.if_, self.else_ = cond, if_, else_
191 |
192 | def glomit(self, target, scope):
193 | g = lambda spec: scope[glom](target, spec, scope)
194 | if g(self.cond):
195 | return g(self.if_)
196 | elif self.else_:
197 | return g(self.else_)
198 | else:
199 | return None
200 |
201 | glom(1, If(bool, {'yes': T}, {'no': T}))
202 | # {'yes': 1}
203 | glom(0, If(bool, {'yes': T}, {'no': T}))
204 | # {'no': 0}
205 |
206 |
207 | Parellel Evaluation of Sub-Specs
208 | --------------------------------
209 |
210 | This is another example of a simple glom extension.
211 | Sometimes it is convenient to execute multiple glom-specs
212 | in parallel against a target, and get a sequence of their
213 | results.
214 |
215 | .. code-block:: python
216 |
217 | class Seq(object):
218 | def __init__(self, *subspecs):
219 | self.subspecs = subspecs
220 |
221 | def glomit(self, target, scope):
222 | return [scope[glom](target, spec, scope) for spec in self.subspecs]
223 |
224 | glom('1', Seq(float, int))
225 | # [1.0, 1]
226 |
227 |
228 | Without this extension, the simplest way to achieve the same result is
229 | with a dict:
230 |
231 | .. code-block:: python
232 |
233 | glom('1', ({1: float, 2: int}, T.values()))
234 |
235 |
236 | Clamp Values
237 | ------------
238 |
239 | A common numerical operation is to clamp values -- if they
240 | are above or below a certain value, assign them to that value.
241 |
242 | Using a pattern-matching glom idiom, this can be implemented
243 | simply:
244 |
245 | .. code-block:: python
246 |
247 | glom(range(10), [(M < 7) | Val(7)])
248 | # [0, 1, 2, 3, 4, 5, 6, 7, 7, 7]
249 |
250 |
251 | What if you want to drop rather than clamp out-of-range values?
252 |
253 | .. code-block:: python
254 |
255 | glom(range(10), [(M < 7) | Val(SKIP)])
256 | # [0, 1, 2, 3, 4, 5, 6]
257 |
258 |
259 | Transform Tree
260 | --------------
261 |
262 | With an arbitrary depth tree, :class:`~glom.Ref` can be used to
263 | express a recursive spec.
264 |
265 | For example, this `etree2dicts` spec will recursively walk an `ElementTree`
266 | instance and transform it from nested objects to nested dicts.
267 |
268 | .. code-block:: python
269 |
270 | etree2dicts = Ref('ElementTree',
271 | {"tag": "tag", "text": "text", "attrib": "attrib", "children": (iter, [Ref('ElementTree')])})
272 |
273 |
274 | Alternatively, say we only wanted to generate tuples of tag and children:
275 |
276 | .. code-block:: python
277 |
278 | etree2tuples = Fill(Ref('ElementTree', (T.tag, Iter(Ref('ElementTree')).all())))
279 |
280 |
281 | (Note also the use of :class:`~glom.Fill` mode to easily construct a tuple.)
282 |
283 | .. code-block:: html
284 |
285 |
286 |
287 | the title
288 |
289 |
290 | A paragraph
291 |
292 |
293 |
294 |
295 | Will translate to the following tuples:
296 |
297 | .. code-block:: python
298 |
299 | >>> etree = ElementTree.fromstring(html_text)
300 | >>> glom(etree, etree2tuples)
301 | ('html', [('head', [('title', [])]), ('body', [('p', [])])])
302 |
303 |
304 | Fix Up Strings in Parsed JSON
305 | -----------------------------
306 |
307 | Tree-walking with :class:`~glom.Ref()` combines powerfully with
308 | pattern matching from :class:`~glom.Match()`.
309 |
310 | In this case, consider that we want to transform parsed JSON recursively,
311 | such that all unicodes are converted to native strings.
312 |
313 |
314 | .. code-block:: python
315 |
316 | glom(json.loads(data),
317 | Ref('json',
318 | Match(Switch({
319 | dict: {Ref('json'): Ref('json')},
320 | list: [Ref('json')],
321 | type(u''): Auto(str),
322 | object: T}))
323 | )
324 | )
325 |
326 |
327 | :class:`~glom.Match()` above splits the :class:`~glom.Ref()` evaluation into 4 cases:
328 |
329 | * on :class:`dict`, use :class:`~glom.Ref()` to recurse for all keys and values
330 | * on :class:`list`, use :class:`~glom.Ref()` to recurse on each item
331 | * on text objects (``type(u'')``) -- py3 :class:`str` or py2
332 | :class:`unicode` -- transform the target with :class:`str`
333 | * for all other values (``object``), pass them through
334 |
335 | As motivation for why this might come up: attributes, class names,
336 | function names, and identifiers must be the native string type for a
337 | given Python, i.e., bytestrings in Python 2 and unicode in Python 3.
338 |
339 |
340 | Store and Retrieve Current Target
341 | ---------------------------------
342 |
343 | The :data:`~glom.A` scope assignment helper makes it
344 | convenient to hold on to the current target and then reset it.
345 |
346 | The ``(A.t, ..., S.t)`` "sandwich" is a convenient idiom for these
347 | cases.
348 |
349 | For example, we could use this to update a ``dict``:
350 |
351 |
352 | .. code-block:: python
353 |
354 | glom({}, (A.t, T.update({1: 1}), S.t))
355 |
356 |
357 | Accessing Ancestry
358 | ------------------
359 |
360 | The technique above can be useful when you want to flatten an object structure by combining child,
361 | parent, and/or grandparent data. For instance:
362 |
363 | .. code-block:: python
364 |
365 | input_data = {"a": {"b": {"c": 1}}}
366 | # transform to:
367 | output_data = [{"value": 1, "grandparent": "a"}]
368 |
369 | We can do this by leveraging glom's Scopes_. Here's the spec to get the results above:
370 |
371 | .. code-block:: python
372 |
373 | (
374 | T.items(),
375 | [
376 | (
377 | A.globals.gp_item, # save the grandparent item to the global scope
378 | T[1].values(), # access the values as usual
379 | [{"value": "c", "grandparent": S.globals.gp_item[0]}], # access the grandparent item
380 | )
381 | ],
382 | Flatten(),
383 | )
384 |
385 | You can play with glom scopes `in your browser here`__.
386 |
387 | .. __: https://yak.party/glompad/#spec=%28%0A++++T.items%28%29%2C%0A++++%5B%28%0A++++++++++++A.globals.gp_item%2C%0A++++++++++++T%5B1%5D.values%28%29%2C%0A++++++++++++%5B%7B%22val%22%3A+%22c%22%2C+%22path%22%3A+S.globals.gp_item%5B0%5D%7D%5D%2C%0A++++%29%5D%2C%0A++++Flatten%28%29%2C%0A%29%0A&target=%7B%0A++%22a%22%3A+%7B%0A++++%22b%22%3A+%7B%0A++++++%22c%22%3A+1%0A++++%7D%0A++%7D%0A%7D&v=1
388 |
389 | .. _Scopes: https://glom.readthedocs.io/en/latest/api.html#updating-the-scope-s-a
390 |
391 | Note that at the time of writing, glom doesn't yet have full tree traversal, so the nesting of
392 | the spec is going to roughly match the nesting of your data. If you need this to work in an
393 | arbitrarily nested structure, we recommend `remap `_,
394 | the recursive map function.
--------------------------------------------------------------------------------
/docs/streaming.rst:
--------------------------------------------------------------------------------
1 | Streaming & Iteration
2 | =====================
3 |
4 | .. versionadded:: 19.10.0
5 |
6 | .. automodule:: glom.streaming
7 |
8 | .. contents:: Contents
9 | :local:
10 |
11 | .. autoclass:: glom.Iter
12 |
13 | .. automethod:: map
14 | .. automethod:: filter
15 | .. automethod:: chunked
16 | .. automethod:: split
17 | .. automethod:: flatten
18 | .. automethod:: unique
19 | .. automethod:: limit
20 | .. automethod:: slice
21 | .. automethod:: takewhile
22 | .. automethod:: dropwhile
23 | .. automethod:: all
24 | .. automethod:: first
25 |
--------------------------------------------------------------------------------
/docs/tutorial.rst:
--------------------------------------------------------------------------------
1 | ``glom`` Tutorial
2 | =================
3 |
4 | *Learn to use glom in no time!*
5 |
6 | Basic use of glom requires only a glance, not a whole tutorial. The
7 | case studies below takes a wider look at day-to-day data and object
8 | manipulation, helping you develop an eye for writing robust,
9 | declarative data transformations.
10 |
11 | Go beyond basic with 10 minutes or less, and even further if you
12 | can spare a half hour.
13 |
14 | .. contents:: Contents
15 | :local:
16 |
17 | .. automodule:: glom.tutorial
18 |
--------------------------------------------------------------------------------
/glom/__init__.py:
--------------------------------------------------------------------------------
1 | from glom.core import (glom,
2 | Fill,
3 | Auto,
4 | register,
5 | register_op,
6 | Glommer,
7 | Call,
8 | Invoke,
9 | Spec,
10 | Ref,
11 | OMIT, # backwards compat
12 | SKIP,
13 | STOP,
14 | UP,
15 | ROOT,
16 | MODE,
17 | Path,
18 | Vars,
19 | Val,
20 | Literal, # backwards compat 2020-07
21 | Let, # backwards compat 2020-07
22 | Coalesce,
23 | Inspect,
24 | Pipe,
25 | GlomError,
26 | BadSpec,
27 | PathAccessError,
28 | PathAssignError,
29 | CoalesceError,
30 | UnregisteredTarget,
31 | T, S, A)
32 |
33 | from glom.reduction import Sum, Fold, Flatten, flatten, FoldError, Merge, merge
34 | from glom.matching import (M,
35 | Or,
36 | And,
37 | Not,
38 | Match,
39 | MatchError,
40 | TypeMatchError,
41 | Regex,
42 | Optional,
43 | Required,
44 | Switch,
45 | Check,
46 | CheckError)
47 | from glom.mutation import Assign, Delete, assign, delete, PathDeleteError
48 |
49 | # there's no -ion word that really fits what "streaming" means.
50 | # generation, production, iteration, all have more relevant meanings
51 | # elsewhere. (maybe procrastination :P)
52 | from glom.streaming import Iter
53 |
54 | from glom._version import __version__
55 |
--------------------------------------------------------------------------------
/glom/__main__.py:
--------------------------------------------------------------------------------
1 | from glom.cli import console_main
2 |
3 | if __name__ == '__main__':
4 | console_main()
5 |
--------------------------------------------------------------------------------
/glom/_version.py:
--------------------------------------------------------------------------------
1 | version_info = (24, 11, 1, 'dev')
2 | __version__ = '.'.join([str(part) for part in version_info if part or part == 0])
3 |
--------------------------------------------------------------------------------
/glom/cli.py:
--------------------------------------------------------------------------------
1 | """like jq, but with the full power of python in the spec.
2 |
3 | Usage: python -m glom [FLAGS] [spec [target]]
4 |
5 | Command-line interface to the glom library, providing nested data
6 | access and data restructuring with the power of Python.
7 |
8 |
9 | Flags:
10 |
11 | --help / -h show this help message and exit
12 | --target-file TARGET_FILE path to target data source (optional)
13 | --target-format TARGET_FORMAT
14 | format of the source data (json, python, toml,
15 | or yaml) (defaults to 'json')
16 | --spec-file SPEC_FILE path to glom spec definition (optional)
17 | --spec-format SPEC_FORMAT format of the glom spec definition (json, python,
18 | python-full) (defaults to 'python')
19 | --indent INDENT number of spaces to indent the result, 0 to disable
20 | pretty-printing (defaults to 2)
21 | --debug interactively debug any errors that come up
22 | --inspect interactively explore the data
23 |
24 | try out:
25 | `
26 | curl -s https://api.github.com/repos/mahmoud/glom/events | python -m glom '[{"type": "type", "date": "created_at", "user": "actor.login"}]'
27 |
28 | """
29 |
30 |
31 |
32 | import os
33 | import ast
34 | import sys
35 | import json
36 |
37 | from face import (Command,
38 | Flag,
39 | face_middleware,
40 | PosArgSpec,
41 | PosArgDisplay,
42 | CommandLineError,
43 | UsageError)
44 | from face.utils import isatty
45 | from boltons.iterutils import is_scalar
46 |
47 | import glom
48 | from glom import Path, GlomError, Inspect
49 |
50 | # TODO: --default?
51 |
52 | def glom_cli(target, spec, indent, debug, inspect, scalar):
53 | """Command-line interface to the glom library, providing nested data
54 | access and data restructuring with the power of Python.
55 | """
56 | if debug or inspect:
57 | stdin_open = not sys.stdin.closed
58 | spec = Inspect(spec,
59 | echo=inspect,
60 | recursive=inspect,
61 | breakpoint=inspect and stdin_open,
62 | post_mortem=debug and stdin_open)
63 |
64 | try:
65 | result = glom.glom(target, spec)
66 | except GlomError as ge:
67 | print(f'{ge.__class__.__name__}: {ge}')
68 | return 1
69 |
70 | if not indent:
71 | indent = None
72 |
73 | if scalar and is_scalar(result):
74 | print(result, end='')
75 | else:
76 | print(json.dumps(result, indent=indent, sort_keys=True))
77 | return
78 |
79 |
80 | def get_command():
81 | posargs = PosArgSpec(str, max_count=2, display={'label': '[spec [target]]'})
82 | cmd = Command(glom_cli, posargs=posargs, middlewares=[mw_get_target])
83 | cmd.add('--target-file', str, missing=None, doc='path to target data source')
84 | cmd.add('--target-format', str, missing='json',
85 | doc='format of the source data (json or python)')
86 | cmd.add('--spec-file', str, missing=None, doc='path to glom spec definition')
87 | cmd.add('--spec-format', str, missing='python',
88 | doc='format of the glom spec definition (json, python, python-full)')
89 |
90 | cmd.add('--indent', int, missing=2,
91 | doc='number of spaces to indent the result, 0 to disable pretty-printing')
92 |
93 | cmd.add('--scalar', parse_as=True,
94 | doc="if the result is a single value (not a collection), output it"
95 | " without quotes or whitespace, for easier usage in scripts")
96 | cmd.add('--debug', parse_as=True, doc='interactively debug any errors that come up')
97 | cmd.add('--inspect', parse_as=True, doc='interactively explore the data')
98 | return cmd
99 |
100 |
101 | def main(argv):
102 | cmd = get_command()
103 | return cmd.run(argv) or 0
104 |
105 |
106 | def console_main():
107 | _enable_debug = os.getenv('GLOM_CLI_DEBUG')
108 | if _enable_debug:
109 | print(sys.argv)
110 | try:
111 | sys.exit(main(sys.argv) or 0)
112 | except Exception:
113 | if _enable_debug:
114 | import pdb;pdb.post_mortem()
115 | raise
116 |
117 |
118 | def mw_handle_target(target_text, target_format):
119 | """ Handles reading in a file specified in cli command.
120 |
121 | Args:
122 | target_text (str): The target data to load, as text
123 | target_format (str): Valid formats include `json`, `toml`, and `yml`/`yaml`
124 | Returns:
125 | The content of the file that you specified
126 | Raises:
127 | CommandLineError: Issue with file format or appropriate file reading package not installed.
128 | """
129 | if not target_text:
130 | return {}
131 | target = {}
132 | if target_format == 'json':
133 | load_func = json.loads
134 | elif target_format in ('yaml', 'yml'):
135 | try:
136 | import yaml
137 | load_func = yaml.safe_load
138 | except ImportError: # pragma: no cover
139 | raise UsageError('No YAML package found. To process yaml files, run: pip install PyYAML') # pragma: no cover
140 | elif target_format == 'toml':
141 | missing = UsageError('No TOML package found. To process toml files, upgrade to Python 3.11 or run: pip install tomli')
142 | try:
143 | import tomllib
144 | load_func = tomllib.loads
145 | except ImportError:
146 | try:
147 | import tomli
148 | load_func = tomli.loads
149 | except ImportError: # pragma: no cover
150 | raise missing # pragma: no cover
151 | elif target_format == 'python':
152 | load_func = ast.literal_eval
153 | else:
154 | raise UsageError('expected target-format to be one of python, json, toml, or yaml')
155 |
156 |
157 | try:
158 | target = load_func(target_text)
159 | except Exception as e:
160 | raise UsageError('could not load target data, got: %s: %s'
161 | % (e.__class__.__name__, e))
162 |
163 |
164 | return target
165 |
166 |
167 | @face_middleware(provides=['spec', 'target'])
168 | def mw_get_target(next_, posargs_, target_file, target_format, spec_file, spec_format):
169 | spec_text, target_text = None, None
170 | if len(posargs_) == 2:
171 | spec_text, target_text = posargs_
172 | elif len(posargs_) == 1:
173 | spec_text, target_text = posargs_[0], None
174 |
175 | if spec_text and spec_file:
176 | raise UsageError('expected spec file or spec argument, not both')
177 | elif spec_file:
178 | try:
179 | with open(spec_file) as f:
180 | spec_text = f.read()
181 | except OSError as ose:
182 | raise UsageError(f'could not read spec file {spec_file!r}, got: {ose}')
183 |
184 | if not spec_text:
185 | spec = Path()
186 | elif spec_format == 'python':
187 | if spec_text[0] not in ('"', "'", "[", "{", "("):
188 | # intention: handle trivial path access, assume string
189 | spec_text = repr(spec_text)
190 | spec = ast.literal_eval(spec_text)
191 | elif spec_format == 'json':
192 | spec = json.loads(spec_text)
193 | elif spec_format == 'python-full':
194 | spec = _eval_python_full_spec(spec_text)
195 | else:
196 | raise UsageError('expected spec-format to be one of json, python, or python-full')
197 |
198 | if target_text and target_file:
199 | raise UsageError('expected target file or target argument, not both')
200 | elif target_text == '-' or target_file == '-':
201 | target_text = sys.stdin.read()
202 | elif target_file:
203 | try:
204 | target_text = open(target_file).read()
205 | except OSError as ose:
206 | raise UsageError(f'could not read target file {target_file!r}, got: {ose}')
207 | elif not target_text and not isatty(sys.stdin):
208 | target_text = sys.stdin.read()
209 |
210 | target = mw_handle_target(target_text, target_format)
211 |
212 | return next_(spec=spec, target=target)
213 |
214 |
215 | def _from_glom_import_star():
216 | ret = dict(glom.__dict__)
217 | for k in ('__builtins__', '__name__', '__doc__', '__package__'):
218 | ret.pop(k, None)
219 | for k, v in list(ret.items()):
220 | if type(v) == type(glom):
221 | ret.pop(k)
222 | return ret
223 |
224 |
225 | def _eval_python_full_spec(py_text):
226 | name = '__cli_glom_spec__'
227 | code_str = f'{name} = {py_text}'
228 | env = _from_glom_import_star()
229 | spec = _compile_code(code_str, name=name, env=env)
230 | return spec
231 |
232 |
233 | def _compile_code(code_str, name, env=None, verbose=False):
234 | code = compile(code_str, '', 'single')
235 | if verbose:
236 | print(code_str)
237 | if env is None:
238 | env = {}
239 | exec(code, env)
240 |
241 | return env[name]
242 |
--------------------------------------------------------------------------------
/glom/grouping.py:
--------------------------------------------------------------------------------
1 | """
2 | Group mode
3 | """
4 |
5 | import random
6 |
7 | from boltons.typeutils import make_sentinel
8 |
9 | from .core import glom, MODE, SKIP, STOP, TargetRegistry, Path, T, BadSpec, _MISSING
10 |
11 |
12 | ACC_TREE = make_sentinel('ACC_TREE')
13 | ACC_TREE.__doc__ = """
14 | tree of accumulators for aggregation;
15 | structure roughly corresponds to the result,
16 | but is not 1:1; instead the main purpose is to ensure
17 | data is kept until the Group() finishes executing
18 | """
19 |
20 | CUR_AGG = make_sentinel('CUR_AGG')
21 | CUR_AGG.__doc__ = """
22 | the spec which is currently performing aggregation --
23 | useful for specs that want to work in either "aggregate"
24 | mode, or "spec" mode depending on if they are in Group mode
25 | or not; this sentinel in the Scope allows a spec to decide
26 | if it is "closest" to the Group and so should behave
27 | like an aggregate, or if it is further away and so should
28 | have normal spec behavior.
29 | """
30 |
31 |
32 | def target_iter(target, scope):
33 | iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
34 |
35 | try:
36 | iterator = iterate(target)
37 | except Exception as e:
38 | raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
39 | % (target.__class__.__name__, Path(*scope[Path]), e))
40 | return iterator
41 |
42 |
43 | class Group:
44 | """supports nesting grouping operations --
45 | think of a glom-style recursive boltons.iterutils.bucketize
46 |
47 | the "branches" of a Group spec are dicts;
48 | the leaves are lists, or an Aggregation object
49 | an Aggregation object is any object that defines the
50 | method agg(target, accumulator)
51 |
52 | For example, here we get a map of even and odd counts::
53 |
54 | >>> glom(range(10), Group({T % 2: T}))
55 | {0: 8, 1: 9}
56 |
57 | And here we create a `"bucketized"
58 | `_
59 | map of even and odd numbers::
60 |
61 | >>> glom(range(10), Group({T % 2: [T]}))
62 | {0: [0, 2, 4, 6, 8], 1: [1, 3, 5, 7, 9]}
63 |
64 | target is the current target, accumulator is a dict
65 | maintained by Group mode
66 |
67 | unlike Iter(), Group() converts an iterable target
68 | into a single result; Iter() converts an iterable
69 | target into an iterable result
70 |
71 | """
72 | def __init__(self, spec):
73 | self.spec = spec
74 |
75 | def glomit(self, target, scope):
76 | scope[MODE] = GROUP
77 | scope[CUR_AGG] = None # reset aggregation tripwire for sub-specs
78 | scope[ACC_TREE] = {}
79 |
80 | # handle the basecase where the spec stops immediately
81 | # TODO: something smarter
82 | if type(self.spec) in (dict, list):
83 | ret = type(self.spec)()
84 | else:
85 | ret = None
86 |
87 | for t in target_iter(target, scope):
88 | last, ret = ret, scope[glom](t, self.spec, scope)
89 | if ret is STOP:
90 | return last
91 | return ret
92 |
93 | def __repr__(self):
94 | cn = self.__class__.__name__
95 | return f'{cn}({self.spec!r})'
96 |
97 |
98 | def GROUP(target, spec, scope):
99 | """
100 | Group mode dispatcher; also sentinel for current mode = group
101 | """
102 | recurse = lambda spec: scope[glom](target, spec, scope)
103 | tree = scope[ACC_TREE] # current accumulator support structure
104 | if callable(getattr(spec, "agg", None)):
105 | return spec.agg(target, tree)
106 | elif callable(spec):
107 | return spec(target)
108 | _spec_type = type(spec)
109 | if _spec_type not in (dict, list):
110 | raise BadSpec("Group mode expected dict, list, callable, or"
111 | " aggregator, not: %r" % (spec,))
112 | _spec_id = id(spec)
113 | try:
114 | acc = tree[_spec_id] # current accumulator
115 | except KeyError:
116 | acc = tree[_spec_id] = _spec_type()
117 | if _spec_type is dict:
118 | done = True
119 | for keyspec, valspec in spec.items():
120 | if tree.get(keyspec, None) is STOP:
121 | continue
122 | key = recurse(keyspec)
123 | if key is SKIP:
124 | done = False # SKIP means we still want more vals
125 | continue
126 | if key is STOP:
127 | tree[keyspec] = STOP
128 | continue
129 | if key not in acc:
130 | # TODO: guard against key == id(spec)
131 | tree[key] = {}
132 | scope[ACC_TREE] = tree[key]
133 | result = recurse(valspec)
134 | if result is STOP:
135 | tree[keyspec] = STOP
136 | continue
137 | done = False # SKIP or returning a value means we still want more vals
138 | if result is not SKIP:
139 | acc[key] = result
140 | if done:
141 | return STOP
142 | return acc
143 | elif _spec_type is list:
144 | for valspec in spec:
145 | if type(valspec) is dict:
146 | # doesn't make sense due to arity mismatch. did you mean [Auto({...})] ?
147 | raise BadSpec('dicts within lists are not'
148 | ' allowed while in Group mode: %r' % spec)
149 | result = recurse(valspec)
150 | if result is STOP:
151 | return STOP
152 | if result is not SKIP:
153 | acc.append(result)
154 | return acc
155 | raise ValueError(f"{_spec_type} not a valid spec type for Group mode") # pragma: no cover
156 |
157 |
158 | class First:
159 | """
160 | holds onto the first value
161 |
162 | >>> glom([1, 2, 3], Group(First()))
163 | 1
164 | """
165 | __slots__ = ()
166 |
167 | def agg(self, target, tree):
168 | if self not in tree:
169 | tree[self] = STOP
170 | return target
171 | return STOP
172 |
173 | def __repr__(self):
174 | return '%s()' % self.__class__.__name__
175 |
176 |
177 | class Avg:
178 | """
179 | takes the numerical average of all values;
180 | raises exception on non-numeric value
181 |
182 | >>> glom([1, 2, 3], Group(Avg()))
183 | 2.0
184 | """
185 | __slots__ = ()
186 |
187 | def agg(self, target, tree):
188 | try:
189 | avg_acc = tree[self]
190 | except KeyError:
191 | # format is [sum, count]
192 | avg_acc = tree[self] = [0.0, 0]
193 | avg_acc[0] += target
194 | avg_acc[1] += 1
195 | return avg_acc[0] / avg_acc[1]
196 |
197 | def __repr__(self):
198 | return '%s()' % self.__class__.__name__
199 |
200 |
201 | class Max:
202 | """
203 | takes the maximum of all values;
204 | raises exception on values that are not comparable
205 |
206 | >>> glom([1, 2, 3], Group(Max()))
207 | 3
208 | """
209 | __slots__ = ()
210 |
211 | def agg(self, target, tree):
212 | if self not in tree or target > tree[self]:
213 | tree[self] = target
214 | return tree[self]
215 |
216 | def __repr__(self):
217 | return '%s()' % self.__class__.__name__
218 |
219 |
220 | class Min:
221 | """
222 | takes the minimum of all values;
223 | raises exception on values that are not comparable
224 |
225 | >>> glom([1, 2, 3], Group(Min()))
226 | 1
227 | """
228 | __slots__ = ()
229 |
230 | def agg(self, target, tree):
231 | if self not in tree or target < tree[self]:
232 | tree[self] = target
233 | return tree[self]
234 |
235 | def __repr__(self):
236 | return '%s()' % self.__class__.__name__
237 |
238 |
239 | class Sample:
240 | """takes a random sample of the values
241 |
242 | >>> glom([1, 2, 3], Group(Sample(2))) # doctest: +SKIP
243 | [1, 3]
244 | >>> glom(range(5000), Group(Sample(2))) # doctest: +SKIP
245 | [272, 2901]
246 |
247 | The advantage of this over :func:`random.sample` is that this can
248 | take an arbitrarily-sized, potentially-very-long streaming input
249 | and returns a fixed-size output. Note that this does not stream
250 | results out, so your streaming input must have finite length.
251 | """
252 | __slots__ = ('size',)
253 |
254 | def __init__(self, size):
255 | self.size = size
256 |
257 | def agg(self, target, tree):
258 | # simple reservoir sampling scheme
259 | # https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
260 | if self not in tree:
261 | tree[self] = [0, []]
262 | num_seen, sample = tree[self]
263 | if len(sample) < self.size:
264 | sample.append(target)
265 | else:
266 | pos = random.randint(0, num_seen)
267 | if pos < self.size:
268 | sample[pos] = target
269 | tree[self][0] += 1
270 | return sample
271 |
272 | def __repr__(self):
273 | return f'{self.__class__.__name__}({self.size!r})'
274 |
275 |
276 |
277 | class Limit:
278 | """
279 | Limits the number of values passed to sub-accumulator
280 |
281 | >>> glom([1, 2, 3], Group(Limit(2)))
282 | [1, 2]
283 |
284 | To override the default untransformed list output, set the subspec kwarg:
285 |
286 | >>> glom(range(10), Group(Limit(3, subspec={(lambda x: x % 2): [T]})))
287 | {0: [0, 2], 1: [1]}
288 |
289 | You can even nest Limits in other ``Group`` specs:
290 |
291 | >>> glom(range(10), Group(Limit(5, {(lambda x: x % 2): Limit(2)})))
292 | {0: [0, 2], 1: [1, 3]}
293 |
294 | """
295 | __slots__ = ('n', 'subspec')
296 |
297 | def __init__(self, n, subspec=_MISSING):
298 | if subspec is _MISSING:
299 | subspec = [T]
300 | self.n = n
301 | self.subspec = subspec
302 |
303 | def glomit(self, target, scope):
304 | if scope[MODE] is not GROUP:
305 | raise BadSpec("Limit() only valid in Group mode")
306 | tree = scope[ACC_TREE] # current accumulator support structure
307 | if self not in tree:
308 | tree[self] = [0, {}]
309 | scope[ACC_TREE] = tree[self][1]
310 | tree[self][0] += 1
311 | if tree[self][0] > self.n:
312 | return STOP
313 | return scope[glom](target, self.subspec, scope)
314 |
315 | def __repr__(self):
316 | return f'{self.__class__.__name__}({self.n!r}, {self.subspec!r})'
317 |
--------------------------------------------------------------------------------
/glom/mutation.py:
--------------------------------------------------------------------------------
1 | """By default, glom aims to safely return a transformed copy of your
2 | data. But sometimes you really need to transform an existing object.
3 |
4 | When you already have a large or complex bit of nested data that you
5 | are sure you want to modify in-place, glom has you covered, with the
6 | :func:`~glom.assign` function, and the :func:`~glom.Assign` specifier
7 | type.
8 |
9 | .. warning::
10 |
11 | glom's deep assignment is powerful, and incorrect use can result in
12 | unintended assignments to global state, including class and module
13 | attributes, as well as function defaults.
14 |
15 | Be careful when writing assignment specs, and especially careful when
16 | any part of the spec is data-driven or provided by an end user.
17 |
18 | """
19 | import operator
20 | from pprint import pprint
21 |
22 | from .core import Path, T, S, Spec, glom, UnregisteredTarget, GlomError, PathAccessError, UP
23 | from .core import TType, register_op, TargetRegistry, bbrepr, PathAssignError, arg_val, _assign_op
24 |
25 |
26 | try:
27 | basestring
28 | except NameError:
29 | basestring = str
30 |
31 |
32 | if getattr(__builtins__, '__dict__', None) is not None:
33 | # pypy's __builtins__ is a module, as is CPython's REPL, but at
34 | # normal execution time it's a dict?
35 | __builtins__ = __builtins__.__dict__
36 |
37 |
38 | class PathDeleteError(PathAssignError):
39 | """This :exc:`GlomError` subtype is raised when an assignment fails,
40 | stemming from an :func:`~glom.delete` call or other
41 | :class:`~glom.Delete` usage.
42 |
43 | One example would be deleting an out-of-range position in a list::
44 |
45 | >>> delete(["short", "list"], Path(5))
46 | Traceback (most recent call last):
47 | ...
48 | PathDeleteError: could not delete 5 on object at Path(), got error: IndexError(...
49 |
50 | Other assignment failures could be due to deleting a read-only
51 | ``@property`` or exception being raised inside a ``__delattr__()``.
52 |
53 | """
54 | def get_message(self):
55 | return ('could not delete %r on object at %r, got error: %r'
56 | % (self.dest_name, self.path, self.exc))
57 |
58 |
59 | def _apply_for_each(func, path, val):
60 | layers = path.path_t.__stars__()
61 | if layers:
62 | for i in range(layers - 1):
63 | val = sum(val, []) # flatten out the extra layers
64 | for inner in val:
65 | func(inner)
66 | else:
67 | func(val)
68 |
69 |
70 | class Assign:
71 | """*New in glom 18.3.0*
72 |
73 | The ``Assign`` specifier type enables glom to modify the target,
74 | performing a "deep-set" to mirror glom's original deep-get use
75 | case.
76 |
77 | ``Assign`` can be used to perform spot modifications of large data
78 | structures when making a copy is not desired::
79 |
80 | # deep assignment into a nested dictionary
81 | >>> target = {'a': {}}
82 | >>> spec = Assign('a.b', 'value')
83 | >>> _ = glom(target, spec)
84 | >>> pprint(target)
85 | {'a': {'b': 'value'}}
86 |
87 | The value to be assigned can also be a :class:`~glom.Spec`, which
88 | is useful for copying values around within the data structure::
89 |
90 | # copying one nested value to another
91 | >>> _ = glom(target, Assign('a.c', Spec('a.b')))
92 | >>> pprint(target)
93 | {'a': {'b': 'value', 'c': 'value'}}
94 |
95 | Another handy use of Assign is to deep-apply a function::
96 |
97 | # sort a deep nested list
98 | >>> target={'a':{'b':[3,1,2]}}
99 | >>> _ = glom(target, Assign('a.b', Spec(('a.b',sorted))))
100 | >>> pprint(target)
101 | {'a': {'b': [1, 2, 3]}}
102 |
103 | Like many other specifier types, ``Assign``'s destination path can be
104 | a :data:`~glom.T` expression, for maximum control::
105 |
106 | # changing the error message of an exception in an error list
107 | >>> err = ValueError('initial message')
108 | >>> target = {'errors': [err]}
109 | >>> _ = glom(target, Assign(T['errors'][0].args, ('new message',)))
110 | >>> str(err)
111 | 'new message'
112 |
113 | ``Assign`` has built-in support for assigning to attributes of
114 | objects, keys of mappings (like dicts), and indexes of sequences
115 | (like lists). Additional types can be registered through
116 | :func:`~glom.register()` using the ``"assign"`` operation name.
117 |
118 | Attempting to assign to an immutable structure, like a
119 | :class:`tuple`, will result in a
120 | :class:`~glom.PathAssignError`. Attempting to assign to a path
121 | that doesn't exist will raise a :class:`~PathAccessError`.
122 |
123 | To automatically backfill missing structures, you can pass a
124 | callable to the *missing* argument. This callable will be called
125 | for each path segment along the assignment which is not
126 | present.
127 |
128 | >>> target = {}
129 | >>> assign(target, 'a.b.c', 'hi', missing=dict)
130 | {'a': {'b': {'c': 'hi'}}}
131 |
132 | """
133 | def __init__(self, path, val, missing=None):
134 | # TODO: an option like require_preexisting or something to
135 | # ensure that a value is mutated, not just added. Current
136 | # workaround is to do a Check().
137 | if isinstance(path, basestring):
138 | path = Path.from_text(path)
139 | elif type(path) is TType:
140 | path = Path(path)
141 | elif not isinstance(path, Path):
142 | raise TypeError('path argument must be a .-delimited string, Path, T, or S')
143 |
144 | try:
145 | self.op, self.arg = path.items()[-1]
146 | except IndexError:
147 | raise ValueError('path must have at least one element')
148 | self._orig_path = path
149 | self.path = path[:-1]
150 |
151 | if self.op not in '[.P':
152 | # maybe if we add null-coalescing this should do something?
153 | raise ValueError('last part of path must be setattr or setitem')
154 | self.val = val
155 |
156 | if missing is not None:
157 | if not callable(missing):
158 | raise TypeError(f'expected missing to be callable, not {missing!r}')
159 | self.missing = missing
160 |
161 | def glomit(self, target, scope):
162 | val = arg_val(target, self.val, scope)
163 |
164 | op, arg, path = self.op, self.arg, self.path
165 | if self.path.startswith(S):
166 | dest_target = scope[UP]
167 | dest_path = self.path.from_t()
168 | else:
169 | dest_target = target
170 | dest_path = self.path
171 | try:
172 | dest = scope[glom](dest_target, dest_path, scope)
173 | except PathAccessError as pae:
174 | if not self.missing:
175 | raise
176 |
177 | remaining_path = self._orig_path[pae.part_idx + 1:]
178 | val = scope[glom](self.missing(), Assign(remaining_path, val, missing=self.missing), scope)
179 |
180 | op, arg = self._orig_path.items()[pae.part_idx]
181 | path = self._orig_path[:pae.part_idx]
182 | dest = scope[glom](dest_target, path, scope)
183 |
184 | # TODO: forward-detect immutable dest?
185 | _apply = lambda dest: _assign_op(
186 | dest=dest, op=op, arg=arg, val=val, path=path, scope=scope)
187 | _apply_for_each(_apply, path, dest)
188 |
189 | return target
190 |
191 | def __repr__(self):
192 | cn = self.__class__.__name__
193 | if self.missing is None:
194 | return f'{cn}({self._orig_path!r}, {self.val!r})'
195 | return f'{cn}({self._orig_path!r}, {self.val!r}, missing={bbrepr(self.missing)})'
196 |
197 |
198 | def assign(obj, path, val, missing=None):
199 | """*New in glom 18.3.0*
200 |
201 | The ``assign()`` function provides convenient "deep set"
202 | functionality, modifying nested data structures in-place::
203 |
204 | >>> target = {'a': [{'b': 'c'}, {'d': None}]}
205 | >>> _ = assign(target, 'a.1.d', 'e') # let's give 'd' a value of 'e'
206 | >>> pprint(target)
207 | {'a': [{'b': 'c'}, {'d': 'e'}]}
208 |
209 | Missing structures can also be automatically created with the
210 | *missing* parameter. For more information and examples, see the
211 | :class:`~glom.Assign` specifier type, which this function wraps.
212 | """
213 | return glom(obj, Assign(path, val, missing=missing))
214 |
215 |
216 | _ALL_BUILTIN_TYPES = [v for v in __builtins__.values() if isinstance(v, type)]
217 | _BUILTIN_BASE_TYPES = [v for v in _ALL_BUILTIN_TYPES
218 | if not issubclass(v, tuple([t for t in _ALL_BUILTIN_TYPES
219 | if t not in (v, type, object)]))]
220 | _UNASSIGNABLE_BASE_TYPES = tuple(set(_BUILTIN_BASE_TYPES)
221 | - {dict, list, BaseException, object, type})
222 |
223 |
224 | def _set_sequence_item(target, idx, val):
225 | target[int(idx)] = val
226 |
227 |
228 | def _assign_autodiscover(type_obj):
229 | # TODO: issubclass or "in"?
230 | if issubclass(type_obj, _UNASSIGNABLE_BASE_TYPES):
231 | return False
232 |
233 | if callable(getattr(type_obj, '__setitem__', None)):
234 | if callable(getattr(type_obj, 'index', None)):
235 | return _set_sequence_item
236 | return operator.setitem
237 |
238 | return setattr
239 |
240 |
241 | register_op('assign', auto_func=_assign_autodiscover, exact=False)
242 |
243 |
244 | class Delete:
245 | """
246 | In addition to glom's core "deep-get" and ``Assign``'s "deep-set",
247 | the ``Delete`` specifier type performs a "deep-del", which can
248 | remove items from larger data structures by key, attribute, and
249 | index.
250 |
251 | >>> target = {'dict': {'x': [5, 6, 7]}}
252 | >>> glom(target, Delete('dict.x.1'))
253 | {'dict': {'x': [5, 7]}}
254 | >>> glom(target, Delete('dict.x'))
255 | {'dict': {}}
256 |
257 | If a target path is missing, a :exc:`PathDeleteError` will be
258 | raised. To ignore missing targets, use the ``ignore_missing``
259 | flag:
260 |
261 | >>> glom(target, Delete('does_not_exist', ignore_missing=True))
262 | {'dict': {}}
263 |
264 | ``Delete`` has built-in support for deleting attributes of
265 | objects, keys of dicts, and indexes of sequences
266 | (like lists). Additional types can be registered through
267 | :func:`~glom.register()` using the ``"delete"`` operation name.
268 |
269 | .. versionadded:: 20.5.0
270 | """
271 | def __init__(self, path, ignore_missing=False):
272 | if isinstance(path, basestring):
273 | path = Path.from_text(path)
274 | elif type(path) is TType:
275 | path = Path(path)
276 | elif not isinstance(path, Path):
277 | raise TypeError('path argument must be a .-delimited string, Path, T, or S')
278 |
279 | try:
280 | self.op, self.arg = path.items()[-1]
281 | except IndexError:
282 | raise ValueError('path must have at least one element')
283 | self._orig_path = path
284 | self.path = path[:-1]
285 |
286 | if self.op not in '[.P':
287 | raise ValueError('last part of path must be an attribute or index')
288 |
289 | self.ignore_missing = ignore_missing
290 |
291 | def _del_one(self, dest, op, arg, scope):
292 | if op == '[':
293 | try:
294 | del dest[arg]
295 | except IndexError as e:
296 | if not self.ignore_missing:
297 | raise PathDeleteError(e, self.path, arg)
298 | elif op == '.':
299 | try:
300 | delattr(dest, arg)
301 | except AttributeError as e:
302 | if not self.ignore_missing:
303 | raise PathDeleteError(e, self.path, arg)
304 | elif op == 'P':
305 | _delete = scope[TargetRegistry].get_handler('delete', dest)
306 | try:
307 | _delete(dest, arg)
308 | except Exception as e:
309 | if not self.ignore_missing:
310 | raise PathDeleteError(e, self.path, arg)
311 |
312 | def glomit(self, target, scope):
313 | op, arg, path = self.op, self.arg, self.path
314 | if self.path.startswith(S):
315 | dest_target = scope[UP]
316 | dest_path = self.path.from_t()
317 | else:
318 | dest_target = target
319 | dest_path = self.path
320 | try:
321 | dest = scope[glom](dest_target, dest_path, scope)
322 | except PathAccessError as pae:
323 | if not self.ignore_missing:
324 | raise
325 | else:
326 | _apply_for_each(lambda dest: self._del_one(dest, op, arg, scope), path, dest)
327 |
328 | return target
329 |
330 | def __repr__(self):
331 | cn = self.__class__.__name__
332 | return f'{cn}({self._orig_path!r})'
333 |
334 |
335 | def delete(obj, path, ignore_missing=False):
336 | """
337 | The ``delete()`` function provides "deep del" functionality,
338 | modifying nested data structures in-place::
339 |
340 | >>> target = {'a': [{'b': 'c'}, {'d': None}]}
341 | >>> delete(target, 'a.0.b')
342 | {'a': [{}, {'d': None}]}
343 |
344 | Attempting to delete missing keys, attributes, and indexes will
345 | raise a :exc:`PathDeleteError`. To ignore these errors, use the
346 | *ignore_missing* argument::
347 |
348 | >>> delete(target, 'does_not_exist', ignore_missing=True)
349 | {'a': [{}, {'d': None}]}
350 |
351 | For more information and examples, see the :class:`~glom.Delete`
352 | specifier type, which this convenience function wraps.
353 |
354 | .. versionadded:: 20.5.0
355 | """
356 | return glom(obj, Delete(path, ignore_missing=ignore_missing))
357 |
358 |
359 | def _del_sequence_item(target, idx):
360 | del target[int(idx)]
361 |
362 |
363 | def _delete_autodiscover(type_obj):
364 | if issubclass(type_obj, _UNASSIGNABLE_BASE_TYPES):
365 | return False
366 |
367 | if callable(getattr(type_obj, '__delitem__', None)):
368 | if callable(getattr(type_obj, 'index', None)):
369 | return _del_sequence_item
370 | return operator.delitem
371 | return delattr
372 |
373 |
374 | register_op('delete', auto_func=_delete_autodiscover, exact=False)
375 |
--------------------------------------------------------------------------------
/glom/reduction.py:
--------------------------------------------------------------------------------
1 | import operator
2 | import itertools
3 | from pprint import pprint
4 |
5 | from boltons.typeutils import make_sentinel
6 |
7 | from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE
8 | from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG
9 |
10 | _MISSING = make_sentinel('_MISSING')
11 |
12 |
13 | try:
14 | basestring
15 | except NameError:
16 | basestring = str
17 |
18 |
19 | class FoldError(GlomError):
20 | """Error raised when Fold() is called on non-iterable
21 | targets, and possibly other uses in the future."""
22 | pass
23 |
24 |
25 | class Fold:
26 | """The `Fold` specifier type is glom's building block for reducing
27 | iterables in data, implementing the classic `fold
28 | `_
29 | from functional programming, similar to Python's built-in
30 | :func:`reduce`.
31 |
32 | Args:
33 | subspec: A spec representing the target to fold, which must be
34 | an iterable, or otherwise registered to 'iterate' (with
35 | :func:`~glom.register`).
36 | init (callable): A function or type which will be invoked to
37 | initialize the accumulator value.
38 | op (callable): A function to call on the accumulator value and
39 | every value, the result of which will become the new
40 | accumulator value. Defaults to :func:`operator.iadd`.
41 |
42 | Usage is as follows:
43 |
44 | >>> target = [set([1, 2]), set([3]), set([2, 4])]
45 | >>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union))
46 | >>> result == frozenset([1, 2, 3, 4])
47 | True
48 |
49 | Note the required ``spec`` and ``init`` arguments. ``op`` is
50 | optional, but here must be used because the :class:`set` and
51 | :class:`frozenset` types do not work with addition.
52 |
53 | While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and
54 | :class:`~glom.Sum` are subtypes with more convenient defaults for
55 | day-to-day use.
56 | """
57 | def __init__(self, subspec, init, op=operator.iadd):
58 | self.subspec = subspec
59 | self.init = init
60 | self.op = op
61 | if not callable(op):
62 | raise TypeError('expected callable for %s op param, not: %r' %
63 | (self.__class__.__name__, op))
64 | if not callable(init):
65 | raise TypeError('expected callable for %s init param, not: %r' %
66 | (self.__class__.__name__, init))
67 |
68 | def glomit(self, target, scope):
69 | is_agg = False
70 | if scope[MODE] is GROUP and scope.get(CUR_AGG) is None:
71 | scope[CUR_AGG] = self
72 | is_agg = True
73 |
74 | if self.subspec is not T:
75 | target = scope[glom](target, self.subspec, scope)
76 |
77 | if is_agg:
78 | return self._agg(target, scope[ACC_TREE])
79 | try:
80 | return self._fold(target_iter(target, scope))
81 | except UnregisteredTarget as ut:
82 | raise FoldError('can only %s on iterable targets, not %s type (%s)'
83 | % (self.__class__.__name__, type(target).__name__, ut))
84 |
85 | def _fold(self, iterator):
86 | ret, op = self.init(), self.op
87 |
88 | for v in iterator:
89 | ret = op(ret, v)
90 |
91 | return ret
92 |
93 | def _agg(self, target, tree):
94 | if self not in tree:
95 | tree[self] = self.init()
96 | tree[self] = self.op(tree[self], target)
97 | return tree[self]
98 |
99 | def __repr__(self):
100 | cn = self.__class__.__name__
101 | kwargs = {'init': self.init}
102 | if self.op is not operator.iadd:
103 | kwargs['op'] = self.op
104 | return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr)
105 |
106 |
107 | class Sum(Fold):
108 | """The `Sum` specifier type is used to aggregate integers and other
109 | numericals using addition, much like the :func:`sum()` builtin.
110 |
111 | >>> glom(range(5), Sum())
112 | 10
113 |
114 | Note that this specifier takes a callable *init* parameter like
115 | its friends, so to change the start value, be sure to wrap it in a
116 | callable::
117 |
118 | >>> glom(range(5), Sum(init=lambda: 5.0))
119 | 15.0
120 |
121 | To "sum" lists and other iterables, see the :class:`Flatten`
122 | spec. For other objects, see the :class:`Fold` specifier type.
123 |
124 | """
125 | def __init__(self, subspec=T, init=int):
126 | super().__init__(subspec=subspec, init=init, op=operator.iadd)
127 |
128 | def __repr__(self):
129 | cn = self.__class__.__name__
130 | args = () if self.subspec is T else (self.subspec,)
131 | kwargs = {'init': self.init} if self.init is not int else {}
132 | return format_invocation(cn, args, kwargs, repr=bbrepr)
133 |
134 |
135 | class Count(Fold):
136 | """
137 | takes a count of how many values occurred
138 |
139 | >>> glom([1, 2, 3], Count())
140 | 3
141 | """
142 | __slots__ = ()
143 |
144 | def __init__(self):
145 | super().__init__(
146 | subspec=T, init=int, op=lambda cur, val: cur + 1)
147 |
148 | def __repr__(self):
149 | return '%s()' % self.__class__.__name__
150 |
151 |
152 | class Flatten(Fold):
153 | """The `Flatten` specifier type is used to combine iterables. By
154 | default it flattens an iterable of iterables into a single list
155 | containing items from all iterables.
156 |
157 | >>> target = [[1], [2, 3]]
158 | >>> glom(target, Flatten())
159 | [1, 2, 3]
160 |
161 | You can also set *init* to ``"lazy"``, which returns a generator
162 | instead of a list. Use this to avoid making extra lists and other
163 | collections during intermediate processing steps.
164 | """
165 | def __init__(self, subspec=T, init=list):
166 | if init == 'lazy':
167 | self.lazy = True
168 | init = list
169 | else:
170 | self.lazy = False
171 | super().__init__(subspec=subspec, init=init, op=operator.iadd)
172 |
173 | def _fold(self, iterator):
174 | if self.lazy:
175 | return itertools.chain.from_iterable(iterator)
176 | return super()._fold(iterator)
177 |
178 | def __repr__(self):
179 | cn = self.__class__.__name__
180 | args = () if self.subspec is T else (self.subspec,)
181 | kwargs = {}
182 | if self.lazy:
183 | kwargs['init'] = 'lazy'
184 | elif self.init is not list:
185 | kwargs['init'] = self.init
186 | return format_invocation(cn, args, kwargs, repr=bbrepr)
187 |
188 |
189 | def flatten(target, **kwargs):
190 | """At its most basic, ``flatten()`` turns an iterable of iterables
191 | into a single list. But it has a few arguments which give it more
192 | power:
193 |
194 | Args:
195 |
196 | init (callable): A function or type which gives the initial
197 | value of the return. The value must support addition. Common
198 | values might be :class:`list` (the default), :class:`tuple`,
199 | or even :class:`int`. You can also pass ``init="lazy"`` to
200 | get a generator.
201 | levels (int): A positive integer representing the number of
202 | nested levels to flatten. Defaults to 1.
203 | spec: The glomspec to fetch before flattening. This defaults to the
204 | the root level of the object.
205 |
206 | Usage is straightforward.
207 |
208 | >>> target = [[1, 2], [3], [4]]
209 | >>> flatten(target)
210 | [1, 2, 3, 4]
211 |
212 | Because integers themselves support addition, we actually have two
213 | levels of flattening possible, to get back a single integer sum:
214 |
215 | >>> flatten(target, init=int, levels=2)
216 | 10
217 |
218 | However, flattening a non-iterable like an integer will raise an
219 | exception:
220 |
221 | >>> target = 10
222 | >>> flatten(target)
223 | Traceback (most recent call last):
224 | ...
225 | FoldError: can only Flatten on iterable targets, not int type (...)
226 |
227 | By default, ``flatten()`` will add a mix of iterables together,
228 | making it a more-robust alternative to the built-in
229 | ``sum(list_of_lists, list())`` trick most experienced Python
230 | programmers are familiar with using:
231 |
232 | >>> list_of_iterables = [range(2), [2, 3], (4, 5)]
233 | >>> sum(list_of_iterables, [])
234 | Traceback (most recent call last):
235 | ...
236 | TypeError: can only concatenate list (not "tuple") to list
237 |
238 | Whereas flatten() handles this just fine:
239 |
240 | >>> flatten(list_of_iterables)
241 | [0, 1, 2, 3, 4, 5]
242 |
243 | The ``flatten()`` function is a convenient wrapper around the
244 | :class:`Flatten` specifier type. For embedding in larger specs,
245 | and more involved flattening, see :class:`Flatten` and its base,
246 | :class:`Fold`.
247 |
248 | """
249 | subspec = kwargs.pop('spec', T)
250 | init = kwargs.pop('init', list)
251 | levels = kwargs.pop('levels', 1)
252 | if kwargs:
253 | raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
254 |
255 | if levels == 0:
256 | return target
257 | if levels < 0:
258 | raise ValueError('expected levels >= 0, not %r' % levels)
259 | spec = (subspec,)
260 | spec += (Flatten(init="lazy"),) * (levels - 1)
261 | spec += (Flatten(init=init),)
262 |
263 | return glom(target, spec)
264 |
265 |
266 | class Merge(Fold):
267 | """By default, Merge turns an iterable of mappings into a single,
268 | merged :class:`dict`, leveraging the behavior of the
269 | :meth:`~dict.update` method. The start state can be customized
270 | with *init*, as well as the update operation, with *op*.
271 |
272 | Args:
273 | subspec: The location of the iterable of mappings. Defaults to ``T``.
274 | init (callable): A type or callable which returns a base
275 | instance into which all other values will be merged.
276 | op (callable): A callable, which takes two arguments, and
277 | performs a merge of the second into the first. Can also be
278 | the string name of a method to fetch on the instance created
279 | from *init*. Defaults to ``"update"``.
280 |
281 | .. note::
282 |
283 | Besides the differing defaults, the primary difference between
284 | :class:`Merge` and other :class:`Fold` subtypes is that its
285 | *op* argument is assumed to be a two-argument function which
286 | has no return value and modifies the left parameter
287 | in-place. Because the initial state is a new object created with
288 | the *init* parameter, none of the target values are modified.
289 |
290 | """
291 | def __init__(self, subspec=T, init=dict, op=None):
292 | if op is None:
293 | op = 'update'
294 | if isinstance(op, basestring):
295 | test_init = init()
296 | op = getattr(type(test_init), op, None)
297 | if not callable(op):
298 | raise ValueError('expected callable "op" arg or an "init" with an .update()'
299 | ' method not %r and %r' % (op, init))
300 | super().__init__(subspec=subspec, init=init, op=op)
301 |
302 | def _fold(self, iterator):
303 | # the difference here is that ret is mutated in-place, the
304 | # variable not being reassigned, as in base Fold.
305 | ret, op = self.init(), self.op
306 |
307 | for v in iterator:
308 | op(ret, v)
309 |
310 | return ret
311 |
312 |
313 | def _agg(self, target, tree):
314 | if self not in tree:
315 | acc = tree[self] = self.init()
316 | else:
317 | acc = tree[self]
318 | self.op(acc, target)
319 | return acc
320 |
321 |
322 | def merge(target, **kwargs):
323 | """By default, ``merge()`` turns an iterable of mappings into a
324 | single, merged :class:`dict`, leveraging the behavior of the
325 | :meth:`~dict.update` method. A new mapping is created and none of
326 | the passed mappings are modified.
327 |
328 | >>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}]
329 | >>> res = merge(target)
330 | >>> pprint(res)
331 | {'a': 'A', 'b': 'B'}
332 |
333 | Args:
334 | target: The list of dicts, or some other iterable of mappings.
335 |
336 | The start state can be customized with the *init* keyword
337 | argument, as well as the update operation, with the *op* keyword
338 | argument. For more on those customizations, see the :class:`Merge`
339 | spec.
340 |
341 | """
342 | subspec = kwargs.pop('spec', T)
343 | init = kwargs.pop('init', dict)
344 | op = kwargs.pop('op', None)
345 | if kwargs:
346 | raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
347 | spec = Merge(subspec, init, op)
348 | return glom(target, spec)
349 |
--------------------------------------------------------------------------------
/glom/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mahmoud/glom/c225e2abeb234be7119911b96b4378cc9d8d6478/glom/test/__init__.py
--------------------------------------------------------------------------------
/glom/test/data/test_invalid.toml:
--------------------------------------------------------------------------------
1 | # invalid
2 | toml = {
3 |
--------------------------------------------------------------------------------
/glom/test/data/test_invalid.yaml:
--------------------------------------------------------------------------------
1 | - Invalid
2 | Yaml:
--------------------------------------------------------------------------------
/glom/test/data/test_valid.toml:
--------------------------------------------------------------------------------
1 | Hello = ["World"]
2 |
--------------------------------------------------------------------------------
/glom/test/data/test_valid.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | Hello:
3 | - World
4 |
--------------------------------------------------------------------------------
/glom/test/perf_report.py:
--------------------------------------------------------------------------------
1 | """
2 | slow gloms that came up organically, used as performance metrics
3 | """
4 | import time
5 | import gc
6 |
7 | import attr
8 |
9 | from glom import glom, T
10 |
11 |
12 |
13 | STR_SPEC = [{
14 | 'id': ('id', str),
15 | 'name': 'short_name',
16 | 'external_id': 'external_id',
17 | 'created_date': 'created_date',
18 | }]
19 |
20 |
21 | T_SPEC = [{
22 | 'id': (T.id, str),
23 | 'name': T.short_name,
24 | 'external_id': T.external_id,
25 | 'created_date': T.created_date,
26 | }]
27 |
28 |
29 | def func(data):
30 | return [{
31 | 'id': str(t.id),
32 | 'name': t.short_name,
33 | 'external_id': t.external_id,
34 | 'created_date': t.created_date
35 | } for t in data]
36 |
37 |
38 | def setup_list_of_dict(num=100):
39 | """
40 | a common use case is list-of-dicts object processing
41 | to prepare internal objects for JSON serialization
42 | """
43 | Obj = attr.make_class(
44 | 'Obj', ['id', 'short_name', 'external_id', 'created_date'])
45 |
46 | data = [
47 | Obj(i, 'name' + str(i), 'external' + str(i), 'now') for i in range(num)]
48 |
49 | return data
50 |
51 |
52 | def run(spec, data):
53 | start = time.time()
54 | glom(data, spec)
55 | end = time.time()
56 | print(f"{(end - start) / len(data) * 1e6} us per object")
57 |
58 |
59 | def ratio(spec, func, data):
60 | glom_dur = []
61 | py_dur = []
62 | for i in range(10):
63 | t1 = time.perf_counter_ns()
64 | glom(data, spec)
65 | t2 = time.perf_counter_ns()
66 | func(data)
67 | t3 = time.perf_counter_ns()
68 | glom_dur.append(t2 - t1)
69 | py_dur.append(t3 - t2)
70 |
71 | glom_avg = sum(sorted(glom_dur)[2:-2])
72 | py_avg = sum(sorted(py_dur)[2:-2])
73 |
74 | return 1.0 * glom_avg / py_avg
75 |
76 |
77 | if __name__ == "__main__":
78 | import cProfile
79 | data = setup_list_of_dict(100000)
80 | run(STR_SPEC, data)
81 | run(STR_SPEC, data)
82 | print(ratio(STR_SPEC, func, setup_list_of_dict(1000)))
83 | print(ratio(STR_SPEC, func, setup_list_of_dict(1000)))
84 |
85 |
86 | # suggest using scalene to profile with:
87 | # $ scalene glom/test/perf_report.py --profile-all --reduced-profile --cpu-only --outfile SCALENE-CPU.txt
88 |
--------------------------------------------------------------------------------
/glom/test/test_check.py:
--------------------------------------------------------------------------------
1 | from pytest import raises
2 |
3 | from glom import glom, Check, CheckError, Coalesce, SKIP, STOP, T
4 |
5 | try:
6 | unicode
7 | except NameError:
8 | unicode = str
9 |
10 |
11 | def test_check_basic():
12 | assert glom([0, SKIP], [T]) == [0] # sanity check SKIP
13 |
14 | target = [{'id': 0}, {'id': 1}, {'id': 2}]
15 |
16 | # check that skipping non-passing values works
17 | assert glom(target, ([Coalesce(Check('id', equal_to=0), default=SKIP)], T[0])) == {'id': 0}
18 | assert glom(target, ([Check('id', equal_to=0, default=SKIP)], T[0])) == {'id': 0}
19 |
20 | # check that stopping iteration on non-passing values works
21 | assert glom(target, [Check('id', equal_to=0, default=STOP)]) == [{'id': 0}]
22 |
23 | # check that stopping chain execution on non-passing values works
24 | spec = (Check(validate=lambda x: len(x) > 0, default=STOP), T[0])
25 | assert glom('hello', spec) == 'h'
26 | assert glom('', spec) == '' # would fail with IndexError if STOP didn't work
27 |
28 | assert repr(Check()) == 'Check()'
29 | assert repr(Check(T.a)) == 'Check(T.a)'
30 | assert repr(Check(equal_to=1)) == 'Check(equal_to=1)'
31 | assert repr(Check(instance_of=dict)) == 'Check(instance_of=dict)'
32 | assert repr(Check(T(len), validate=sum)) == 'Check(T(len), validate=sum)'
33 |
34 | target = [1, 'a']
35 | assert glom(target, [Check(type=unicode, default=SKIP)]) == ['a']
36 | assert glom(target, [Check(type=(unicode, int))]) == [1, 'a']
37 | assert glom(target, [Check(instance_of=unicode, default=SKIP)]) == ['a']
38 | assert glom(target, [Check(instance_of=(unicode, int))]) == [1, 'a']
39 |
40 | target = ['1']
41 | assert glom(target, [Check(validate=(int, float))])
42 | assert glom(target, [Check()]) # bare check does a truthy check
43 |
44 | failing_checks = [({'a': {'b': 1}}, {'a': ('a', 'b', Check(type=str))},
45 | '''target at path ['a', 'b'] failed check, got error: "expected type to be 'str', found type 'int'"'''),
46 | ({'a': {'b': 1}}, {'a': ('a', Check('b', type=str))},
47 | '''target at path ['a'] failed check, subtarget at 'b' got error: "expected type to be 'str', found type 'int'"'''),
48 | (1, Check(type=(unicode, bool))),
49 | (1, Check(instance_of=unicode)),
50 | (1, Check(instance_of=(unicode, bool))),
51 | (1, Check(equal_to=0)),
52 | (1, Check(one_of=(0,))),
53 | (1, Check(one_of=(0, 2))),
54 | ('-3.14', Check(validate=int)),
55 | ('', Check(validate=lambda x: False)),]
56 |
57 | for fc in failing_checks:
58 | if len(fc) == 2:
59 | target, check = fc
60 | msg = None
61 | else:
62 | target, check, msg = fc
63 |
64 | with raises(CheckError) as exc_info:
65 | glom(target, check)
66 |
67 | if msg is not None:
68 | assert str(exc_info.value).find(msg) != -1
69 | assert repr(exc_info.value)
70 |
71 |
72 | def test_check_multi():
73 | target = 1
74 | with raises(CheckError) as exc_info:
75 | glom(target, Check(instance_of=float, validate=lambda x: x > 3.14))
76 |
77 | assert "2 errors" in str(exc_info.value)
78 |
79 |
80 |
81 | def test_check_signature():
82 | with raises(ValueError):
83 | Check(instance_of=())
84 | with raises(ValueError):
85 | Check(type=())
86 |
87 | with raises(TypeError):
88 | Check(fake_kwarg=True)
89 |
90 | with raises(ValueError):
91 | Check(one_of=1)
92 | with raises(ValueError):
93 | Check(one_of=())
94 | with raises(TypeError):
95 | Check(one_of=(1, 2), equal_to=3)
96 |
97 | with raises(ValueError):
98 | Check(validate='bad, not callable, value')
99 |
--------------------------------------------------------------------------------
/glom/test/test_cli.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 | import pytest
5 | from face import CommandChecker, CommandLineError
6 |
7 | from glom import cli
8 |
9 |
10 | BASIC_TARGET = '{"a": {"b": "c"}}'
11 | BASIC_SPEC = '{"a": "a.b"}'
12 | BASIC_OUT = '{"a": "c"}\n'
13 |
14 | @pytest.fixture
15 | def cc():
16 | cmd = cli.get_command()
17 | # TODO: don't mix stderr
18 | return CommandChecker(cmd, mix_stderr=True)
19 |
20 | @pytest.fixture
21 | def basic_spec_path(tmp_path):
22 | spec_path = str(tmp_path) + '/basic_spec.txt'
23 | with open(spec_path, 'w') as f:
24 | f.write(BASIC_SPEC)
25 | return spec_path
26 |
27 | @pytest.fixture
28 | def basic_target_path(tmp_path):
29 | target_path = str(tmp_path) + '/basic_target.txt'
30 | with open(target_path, 'w') as f:
31 | f.write(BASIC_TARGET)
32 | return target_path
33 |
34 |
35 | def test_cli_blank(cc):
36 | res = cc.run(['glom'])
37 | assert res.stdout == '{}\n'
38 |
39 |
40 | def test_cli_spec_target_argv_basic(cc):
41 | res = cc.run(['glom', '--indent', '0', BASIC_SPEC, BASIC_TARGET])
42 | assert res.stdout == BASIC_OUT
43 |
44 | # json format, too
45 | res = cc.run(['glom', '--indent', '0', '--spec-format', 'json', BASIC_SPEC, BASIC_TARGET])
46 | assert res.stdout == BASIC_OUT
47 |
48 |
49 | def test_cli_spec_argv_target_stdin_basic(cc):
50 | res = cc.run(['glom', '--indent', '0', BASIC_SPEC],
51 | input=BASIC_TARGET)
52 | assert res.stdout == BASIC_OUT
53 |
54 | res = cc.run(['glom', '--indent', '0', BASIC_SPEC, '-'],
55 | input=BASIC_TARGET)
56 | assert res.stdout == BASIC_OUT
57 |
58 | res = cc.run(['glom', '--indent', '0', '--target-file', '-', BASIC_SPEC],
59 | input=BASIC_TARGET)
60 | assert res.stdout == BASIC_OUT
61 |
62 |
63 | def test_cli_scalar(cc):
64 | res = cc.run(['glom', 'a.b.c', '{"a": {"b": {"c": "d"}}}'])
65 | assert res.stdout == '"d"\n'
66 |
67 | res = cc.run(['glom', '--scalar', 'a.b.c', '{"a": {"b": {"c": "d"}}}'])
68 | assert res.stdout == 'd'
69 |
70 |
71 | def test_cli_spec_target_files_basic(cc, basic_spec_path, basic_target_path):
72 | res = cc.run(['glom', '--indent', '0', '--target-file',
73 | basic_target_path, '--spec-file', basic_spec_path])
74 | assert res.stdout == BASIC_OUT
75 |
76 |
77 | def test_usage_errors(cc, basic_spec_path, basic_target_path):
78 | # bad target json
79 | res = cc.fail_1(['glom', BASIC_SPEC, '{' + BASIC_TARGET])
80 | assert 'could not load target data' in res.stdout # TODO: stderr
81 |
82 | # bad target yaml
83 | res = cc.fail_1(['glom', '--target-format', 'yaml', BASIC_SPEC, '{' + BASIC_TARGET])
84 | assert 'could not load target data' in res.stdout # TODO: stderr
85 |
86 | # bad target toml
87 | res = cc.fail_1(['glom', '--target-format', 'toml', BASIC_SPEC, '{' + BASIC_TARGET])
88 | assert 'could not load target data' in res.stdout # TODO: stderr
89 |
90 | # TODO: bad target python?
91 |
92 | # bad target format TODO: fail_2
93 | res = cc.fail_1(['glom', '--target-format', 'lol', BASIC_SPEC, BASIC_TARGET])
94 | assert 'target-format to be one of' in res.stdout # TODO: stderr
95 |
96 | # bad spec format TODO: fail_2
97 | res = cc.fail_1(['glom', '--spec-format', 'lol', BASIC_SPEC, BASIC_TARGET])
98 | assert 'spec-format to be one of' in res.stdout # TODO: stderr
99 |
100 | # test conflicting spec file and spec posarg
101 | res = cc.fail_1(['glom', '--spec-file', basic_spec_path, BASIC_SPEC, BASIC_TARGET])
102 | assert 'spec' in res.stdout
103 | assert 'not both' in res.stdout # TODO: stderr
104 |
105 | # test conflicting target file and target posarg
106 | res = cc.fail_1(['glom', '--target-file', basic_target_path, BASIC_SPEC, BASIC_TARGET])
107 | assert 'target' in res.stdout
108 | assert 'not both' in res.stdout # TODO: stderr
109 |
110 |
111 | # TODO: if spec-file is present, maybe single posarg should become target?
112 | res = cc.fail_1(['glom', '--spec-file', basic_spec_path + 'abra', '--target-file', basic_target_path])
113 | assert 'could not read spec file' in res.stdout # TODO: stderr
114 |
115 | res = cc.fail_1(['glom', '--spec-file', basic_spec_path, '--target-file', basic_target_path + 'abra'])
116 | assert 'could not read target file' in res.stdout # TODO: stderr
117 |
118 |
119 | def test_main_basic():
120 | argv = ['__', 'a.b.fail', '{"a": {"b": "c"}}']
121 | assert cli.main(argv) == 1
122 |
123 | argv = ['__', 'a.b.c', '{"a": {"b": {"c": "d"}}}']
124 | assert cli.main(argv) == 0
125 |
126 |
127 | def test_main_yaml_target():
128 | cwd = os.path.dirname(os.path.abspath(__file__))
129 | # Handles the filepath if running tox
130 | if '.tox' in cwd:
131 | cwd = os.path.join(cwd.split('.tox')[0] + '/glom/test/')
132 | path = os.path.join(cwd, 'data/test_valid.yaml')
133 | argv = ['__', '--target-file', path, '--target-format', 'yml', 'Hello']
134 | assert cli.main(argv) == 0
135 |
136 | path = os.path.join(cwd, 'data/test_invalid.yaml')
137 | argv = ['__', '--target-file', path, '--target-format', 'yml', 'Hello']
138 | # Makes sure correct improper yaml exception is raised
139 | with pytest.raises(CommandLineError) as excinfo:
140 | cli.main(argv)
141 | assert 'expected , but found' in str(excinfo.value)
142 |
143 |
144 | def test_main_toml_target():
145 | cwd = os.path.dirname(os.path.abspath(__file__))
146 | # Handles the filepath if running tox
147 | if '.tox' in cwd:
148 | cwd = os.path.join(cwd.split('.tox')[0] + '/glom/test/')
149 | path = os.path.join(cwd, 'data/test_valid.toml')
150 | argv = ['__', '--target-file', path, '--target-format', 'toml', 'Hello']
151 | assert cli.main(argv) == 0
152 |
153 | path = os.path.join(cwd, 'data/test_invalid.toml')
154 | argv = ['__', '--target-file', path, '--target-format', 'toml', 'Hello']
155 | # Makes sure correct improper toml exception is raised
156 | with pytest.raises(CommandLineError) as excinfo:
157 | cli.main(argv)
158 | assert 'Invalid initial character for a key part' in str(excinfo.value)
159 |
160 |
161 | def test_main_python_full_spec_python_target():
162 | argv = ['__', '--target-format', 'python', '--spec-format', 'python-full', 'T[T[3].bit_length()]', '{1: 2, 2: 3, 3: 4}']
163 | assert cli.main(argv) == 0
164 |
165 | argv = ['__', '--target-format', 'python', '--spec-format', 'python-full', '(T.values(), [T])', '{1: 2, 2: 3, 3: 4}']
166 | assert cli.main(argv) == 0
167 |
168 |
169 | def test_main(tmp_path):
170 | # TODO: pytest-cov knows how to make coverage work across
171 | # subprocess boundaries...
172 | os.chdir(str(tmp_path))
173 | res = subprocess.check_output(['glom', 'a', '{"a": 3}'])
174 | assert res.decode('utf8') in ('3\n', '3\r\n') # unix or windows line end okay
175 |
--------------------------------------------------------------------------------
/glom/test/test_fill.py:
--------------------------------------------------------------------------------
1 | from glom import Auto, Fill, T, glom
2 |
3 | def test():
4 | assert glom('abc', Fill((T[0], {T[1]: T[2]}))) == ('a', {'b': 'c'})
5 | assert glom('123', Fill({T[0], frozenset([T[1], T[2]])})) == {'1', frozenset(['2', '3'])}
6 | assert glom('xyz', Fill([T[0], T[1], T[2]]))
7 | assert glom('abc', Fill(lambda t: t.upper())) == 'ABC'
8 | assert glom('a', Fill(1)) == 1
9 | assert Fill((T, T, T)).fill(1) == (1, 1, 1)
10 |
11 | target = {'data': [0, 2, 4]}
12 | assert glom(target, Fill((T['data'][-1], Auto('data.-2')))) == (4, 2)
13 |
14 | assert repr(Auto()) == 'Auto()'
15 | assert repr(Auto(T)) == 'Auto(T)'
16 |
17 | assert repr(Fill()) == 'Fill()'
18 | assert repr(Fill(T)) == 'Fill(T)'
19 |
20 | assert repr(Fill(len)) == 'Fill(len)'
21 |
--------------------------------------------------------------------------------
/glom/test/test_grouping.py:
--------------------------------------------------------------------------------
1 | from pytest import raises
2 |
3 | from glom import glom, T, SKIP, STOP, Auto, BadSpec, Val
4 | from glom.grouping import Group, First, Avg, Max, Min, Sample, Limit
5 |
6 | from glom.reduction import Merge, Flatten, Sum, Count
7 |
8 |
9 | def test_bucketing():
10 | assert glom(range(4), Group({T % 2: [T]})) == {0: [0, 2], 1: [1, 3]}
11 | assert (glom(range(6), Group({T % 3: {T % 2: [T / 10.0]}})) ==
12 | {0: {0: [0.0], 1: [0.3]}, 1: {1: [0.1], 0: [0.4]}, 2: {0: [0.2], 1: [0.5]}})
13 |
14 |
15 |
16 | def test_corner_cases():
17 | target = range(5)
18 |
19 | # immediate stop dict
20 | assert glom(target, Group({Val(STOP): [T]})) == {}
21 |
22 | # immediate stop list
23 | assert glom(target, Group([Val(STOP)])) == []
24 |
25 | # dict key SKIP
26 | assert glom(target, Group({(lambda t: SKIP if t < 3 else t): T})) == {3: 3, 4: 4}
27 |
28 | # dict val SKIP
29 | assert glom(target, Group({T: lambda t: t if t % 2 else SKIP})) == {3: 3, 1: 1}
30 |
31 | # list val SKIP
32 | assert glom(target, Group([lambda t: t if t % 2 else SKIP])) == [1, 3]
33 |
34 | # embedded auto spec (lol @ 0 being 0 bit length)
35 | assert glom(target, Group({Auto(('bit_length', T())): [T]})) == {0: [0], 1: [1], 2: [2, 3], 3: [4]}
36 |
37 | # no dicts inside lists in Group mode
38 | with raises(BadSpec):
39 | assert glom(target, Group([{T: T}]))
40 |
41 | # check only supported types
42 | with raises(BadSpec):
43 | assert glom(target, Group('no string support yet'))
44 |
45 | # bucket ints by their bit length and then odd/even, limited to 3 per bucket
46 | spec = Group({T.bit_length(): {lambda t: t % 2: Limit(3)}})
47 | res = glom(range(20), spec)
48 | assert res == {0: {0: [0]},
49 | 1: {1: [1]},
50 | 2: {0: [2], 1: [3]},
51 | 3: {0: [4, 6], 1: [5, 7]},
52 | 4: {0: [8, 10, 12], 1: [9, 11, 13]}}
53 |
54 | return
55 |
56 |
57 | def test_agg():
58 | t = list(range(10))
59 | assert glom(t, Group(First())) == 0
60 | assert glom(t, Group(T)) == 9 # this is basically Last
61 |
62 | assert glom(t, Group(Avg())) == sum(t) / len(t)
63 | assert glom(t, Group(Sum())) == sum(t)
64 |
65 | assert glom([0, 1, 0], Group(Max())) == 1
66 | assert glom([1, 0, 1], Group(Min())) == 0
67 |
68 | assert repr(Group(First())) == 'Group(First())'
69 | assert repr(Avg()) == 'Avg()'
70 | assert repr(Max()) == 'Max()'
71 | assert repr(Min()) == 'Min()'
72 | assert repr(Sum()) == 'Sum()'
73 | assert repr(Count()) == 'Count()'
74 |
75 | assert glom(range(10), Group({lambda t: t % 2: Count()})) == {
76 | 0: 5, 1: 5}
77 |
78 |
79 | def test_limit():
80 | t = list(range(10))
81 | assert glom(t, Group(Limit(1, T))) == 0
82 | assert glom(t, Group(Limit(3, Max()))) == 2
83 | assert glom(t, Group(Limit(3, [T]))) == [0, 1, 2]
84 |
85 | assert repr(Group(Limit(3, Max()))) == 'Group(Limit(3, Max()))'
86 |
87 | with raises(BadSpec):
88 | assert glom(t, Limit(1)) # needs to be wrapped in Group for now
89 | return
90 |
91 |
92 | def test_reduce():
93 | assert glom([[1], [2, 3]], Group(Flatten())) == [1, 2, 3]
94 | assert glom([{'a': 1}, {'b': 2}], Group(Merge())) == {'a': 1, 'b': 2}
95 | assert glom([[[1]], [[2, 3], [4]]], Group(Flatten(Flatten()))) == [1, 2, 3, 4]
96 |
97 |
98 | def test_sample():
99 | spec = Group(Sample(5))
100 | assert glom([1, 2, 3], spec) == [1, 2, 3]
101 |
102 | assert repr(spec) == 'Group(Sample(5))'
103 |
104 | s = glom([1, 2, 3], Group(Sample(2)))
105 | assert s in [[1, 2], [1, 3], [2, 1], [2, 3], [3, 1], [3, 2]]
106 |
--------------------------------------------------------------------------------
/glom/test/test_mutation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from glom import glom, Path, T, S, Spec, Glommer, PathAssignError, PathAccessError
4 | from glom import assign, Assign, delete, Delete, PathDeleteError, Or
5 | from glom import core
6 | from glom.core import UnregisteredTarget
7 |
8 |
9 | def test_assign():
10 | class Foo:
11 | pass
12 |
13 | assert glom({}, Assign(T['a'], 1)) == {'a': 1}
14 | assert glom({'a': {}}, Assign(T['a']['a'], 1)) == {'a': {'a': 1}}
15 | assert glom({'a': {}}, Assign('a.a', 1)) == {'a': {'a': 1}}
16 | assert glom(Foo(), Assign(T.a, 1)).a == 1
17 | assert glom({}, Assign('a', 1)) == {'a': 1}
18 | assert glom(Foo(), Assign('a', 1)).a == 1
19 | assert glom({'a': Foo()}, Assign('a.a', 1))['a'].a == 1
20 | def r():
21 | r = {}
22 | r['r'] = r
23 | return r
24 | assert glom(r(), Assign('r.r.r.r.r.r.r.r.r', 1)) == {'r': 1}
25 | assert glom(r(), Assign(T['r']['r']['r']['r'], 1)) == {'r': 1}
26 | assert glom(r(), Assign(Path('r', 'r', T['r']), 1)) == {'r': 1}
27 | assert assign(r(), Path('r', 'r', T['r']), 1) == {'r': 1}
28 | with pytest.raises(TypeError, match='path argument must be'):
29 | Assign(1, 'a')
30 | with pytest.raises(ValueError, match='path must have at least one element'):
31 | Assign(T, 1)
32 |
33 | assert repr(Assign(T.a, 1)) == 'Assign(T.a, 1)'
34 | assign_spec = Assign(T.a, 1, missing=dict)
35 | assert repr(assign_spec) == "Assign(T.a, 1, missing=dict)"
36 | assert repr(assign_spec) == repr(eval(repr(assign_spec)))
37 |
38 |
39 | def test_assign_recursive():
40 | val = {}
41 | val[1] = [val]
42 | recur_out = glom({}, Assign(T['a'], val))['a']
43 | assert recur_out[1][0] is recur_out
44 |
45 |
46 | def test_assign_spec_val():
47 | output = glom({'b': 'c'}, Assign('a', Spec('b')))
48 | assert output['a'] == output['b'] == 'c'
49 | output = glom({'b': 'c'}, Assign('a', Or('d', 'b')))
50 | assert output['a'] == output['b'] == 'c'
51 |
52 |
53 | def test_unregistered_assign():
54 | # test with bare target registry
55 | glommer = Glommer(register_default_types=False)
56 |
57 | with pytest.raises(UnregisteredTarget, match='assign'):
58 | glommer.glom({}, Assign('a', 'b'))
59 |
60 | # test for unassignable tuple
61 | with pytest.raises(UnregisteredTarget, match='assign'):
62 | glom({'a': ()}, Assign('a.0', 'b'))
63 |
64 |
65 | def test_bad_assign_target():
66 | class BadTarget:
67 | def __setattr__(self, name, val):
68 | raise Exception("and you trusted me?")
69 |
70 | # sanity check
71 | spec = Assign('a', 'b')
72 | ok_target = lambda: None
73 | glom(ok_target, spec)
74 | assert ok_target.a == 'b'
75 |
76 | with pytest.raises(PathAssignError, match='could not assign'):
77 | glom(BadTarget(), spec)
78 |
79 | with pytest.raises(PathAccessError, match='could not access'):
80 | assign({}, 'a.b.c', 'moot')
81 | return
82 |
83 |
84 | def test_sequence_assign():
85 | target = {'alist': [0, 1, 2]}
86 | assign(target, 'alist.2', 3)
87 | assert target['alist'][2] == 3
88 |
89 | with pytest.raises(PathAssignError, match='could not assign') as exc_info:
90 | assign(target, 'alist.3', 4)
91 |
92 | # the following test is because pypy's IndexError is different than CPython's:
93 | # E - PathAssignError(IndexError('list index out of range',), Path('alist'), '3')
94 | # E + PathAssignError(IndexError('list assignment index out of range',), Path('alist'), '3')
95 | # E ? +++++++++++
96 |
97 | exc_repr = repr(exc_info.value)
98 | assert exc_repr.startswith('PathAssignError(')
99 | assert exc_repr.endswith("'3')")
100 | return
101 |
102 |
103 | def test_invalid_assign_op_target():
104 | target = {'afunc': lambda x: 'hi %s' % x}
105 | spec = T['afunc'](x=1)
106 |
107 | with pytest.raises(ValueError):
108 | assign(target, spec, None)
109 | return
110 |
111 |
112 | def test_assign_missing_signature():
113 | # test signature (non-callable missing hook)
114 | with pytest.raises(TypeError, match='callable'):
115 | assign({}, 'a.b.c', 'lol', missing='invalidbcnotcallable')
116 | return
117 |
118 |
119 | def test_assign_missing_dict():
120 | target = {}
121 | val = object()
122 |
123 | from itertools import count
124 | counter = count()
125 | def debugdict():
126 | ret = dict()
127 | #ret['id'] = id(ret)
128 | #ret['inc'] = counter.next()
129 | return ret
130 |
131 | assign(target, 'a.b.c.d', val, missing=debugdict)
132 |
133 | assert target == {'a': {'b': {'c': {'d': val}}}}
134 |
135 |
136 | def test_assign_missing_object():
137 | val = object()
138 | class Container:
139 | pass
140 |
141 | target = Container()
142 | target.a = extant_a = Container()
143 | assign(target, 'a.b.c.d', val, missing=Container)
144 |
145 | assert target.a.b.c.d is val
146 | assert target.a is extant_a # make sure we didn't overwrite anything on the path
147 |
148 |
149 | def test_assign_missing_with_extant_keys():
150 | """This test ensures that assign with missing doesn't overwrite
151 | perfectly fine extant keys that are along the path it needs to
152 | assign to. call count is also checked to make sure missing() isn't
153 | invoked too many times.
154 |
155 | """
156 | target = {}
157 | value = object()
158 | default_struct = {'b': {'c': {}}}
159 |
160 | call_count = [0]
161 |
162 | def _get_default_struct():
163 | call_count[0] += 1 # make sure this is only called once
164 | return default_struct
165 |
166 | assign(target, 'a.b.c', value, missing=_get_default_struct)
167 |
168 | assert target['a']['b']['c'] is value
169 | assert target['a']['b'] is default_struct['b']
170 | assert call_count == [1]
171 |
172 |
173 | def test_assign_missing_unassignable():
174 | """Check that the final assignment to the target object comes last,
175 | ensuring that failed assignments don't leave targets in a bad
176 | state.
177 |
178 | """
179 |
180 | class Tarjay:
181 | init_count = 0
182 | def __init__(self):
183 | self.__class__.init_count += 1
184 |
185 | @property
186 | def unassignable(self):
187 | return
188 |
189 | value = object()
190 | target = {"preexisting": "ok"}
191 |
192 | with pytest.raises(PathAssignError):
193 | assign(target, 'tarjay.unassignable.a.b.c', value, missing=Tarjay)
194 |
195 | assert target == {'preexisting': 'ok'}
196 |
197 | # why 3? "c" gets the value of "value", while "b", "a", and
198 | # "tarjay" all succeed and are set to Tarjay instances. Then
199 | # unassignable is already present, but not possible to assign to,
200 | # raising the PathAssignError.
201 | assert Tarjay.init_count == 3
202 |
203 |
204 | def test_s_assign():
205 | '''
206 | check that assign works when storing things into S
207 | '''
208 | assert glom({}, (Assign(S['foo'], 'bar'), S['foo'])) == 'bar'
209 |
210 |
211 | def test_delete():
212 | class Foo:
213 | def __init__(self, d=None):
214 | for k, v in d.items():
215 | setattr(self, k, v)
216 |
217 | assert glom({'a': 1}, Delete(T['a'])) == {}
218 | assert glom({'a': {'a': 1}}, Delete(T['a']['a'])) == {'a': {}}
219 | assert glom({'a': {'a': 1}}, Delete('a.a')) == {'a': {}}
220 | assert not hasattr(glom(Foo({'a': 1}), Delete(T.a)), 'a')
221 | assert glom({'a': 1}, Delete('a')) == {}
222 | assert not hasattr(glom(Foo({'a': 1}), Delete('a')), 'a')
223 | assert not hasattr(glom({'a': Foo({'a': 1})}, Delete('a.a'))['a'], 'a')
224 |
225 | def r():
226 | r = {}
227 | r['r'] = r
228 | return r
229 |
230 | assert glom(r(), Delete('r.r.r.r.r.r.r.r.r')) == {}
231 | assert glom(r(), Delete(T['r']['r']['r']['r'])) == {}
232 | assert glom(r(), Delete(Path('r', 'r', T['r']))) == {}
233 | assert delete(r(), Path('r', 'r', T['r'])) == {}
234 | with pytest.raises(TypeError, match='path argument must be'):
235 | Delete(1, 'a')
236 | with pytest.raises(ValueError, match='path must have at least one element'):
237 | Delete(T, 1)
238 |
239 | assert repr(Delete(T.a)) == 'Delete(T.a)'
240 |
241 | # test delete from scope
242 | assert glom(1, (S(x=T), S['x'])) == 1
243 | with pytest.raises(PathAccessError):
244 | glom(1, (S(x=T), Delete(S['x']), S['x']))
245 |
246 | # test raising on missing parent
247 | with pytest.raises(PathAccessError):
248 | glom({}, Delete(T['a']['b']))
249 |
250 | # test raising on missing index
251 | with pytest.raises(PathDeleteError):
252 | glom([], Delete(T[0]))
253 | target = []
254 | assert glom(target, Delete(T[0], ignore_missing=True)) is target
255 |
256 | # test raising on missing attr
257 | with pytest.raises(PathDeleteError):
258 | glom(object(), Delete(T.does_not_exist))
259 | target = object()
260 | assert glom(target, Delete(T.does_not_exist, ignore_missing=True)) is target
261 |
262 |
263 | def test_unregistered_delete():
264 | glommer = Glommer(register_default_types=False)
265 |
266 | with pytest.raises(UnregisteredTarget, match='delete'):
267 | glommer.glom({'a': 1}, Delete('a'))
268 |
269 | with pytest.raises(UnregisteredTarget, match='delete'):
270 | glom({'a': (1,)}, Delete('a.0'))
271 |
272 |
273 | def test_bad_delete_target():
274 | class BadTarget:
275 | def __delattr__(self, name):
276 | raise Exception("and you trusted me?")
277 |
278 | spec = Delete('a')
279 | ok_target = lambda: None
280 | ok_target.a = 1
281 | glom(ok_target, spec)
282 | assert not hasattr(ok_target, 'a')
283 |
284 | with pytest.raises(PathDeleteError, match='could not delete'):
285 | glom(BadTarget(), spec)
286 |
287 | with pytest.raises(PathDeleteError, match='could not delete'):
288 | delete({}, 'a')
289 | return
290 |
291 |
292 | def test_sequence_delete():
293 | target = {'alist': [0, 1, 2]}
294 | delete(target, 'alist.1')
295 | assert target['alist'] == [0, 2]
296 |
297 | with pytest.raises(PathDeleteError, match='could not delete') as exc_info:
298 | delete(target, 'alist.2')
299 |
300 | exc_repr = repr(exc_info.value)
301 | assert exc_repr.startswith('PathDeleteError(')
302 | assert exc_repr.endswith("'2')")
303 | return
304 |
305 |
306 | def test_invalid_delete_op_target():
307 | target = {'afunc': lambda x: 'hi %s' % x}
308 | spec = T['afunc'](x=1)
309 |
310 | with pytest.raises(ValueError):
311 | delete(target, spec, None)
312 | return
313 |
314 |
315 | def test_delete_ignore_missing():
316 | assert delete({}, 'a', ignore_missing=True) == {}
317 | assert delete({}, 'a.b', ignore_missing=True) == {}
318 |
319 |
320 | def test_star_broadcast():
321 | val = {'a': [{'b': [{'c': 1}, {'c': 2}, {'c': 3}]}]}
322 | assert glom(val, (Assign('a.*.b.*.d', 'a'), 'a.*.b.*.d')) == [['a', 'a', 'a']]
323 | glom(val, Delete('a.*.b.*.d'))
324 | assert 'c' in val['a'][0]['b'][0]
325 | assert 'd' not in val['a'][0]['b'][0]
--------------------------------------------------------------------------------
/glom/test/test_path_and_t.py:
--------------------------------------------------------------------------------
1 | from pytest import raises
2 |
3 | from glom import glom, Path, S, T, A, PathAccessError, GlomError, BadSpec, Or, Assign, Delete
4 | from glom import core
5 |
6 | def test_list_path_access():
7 | assert glom(list(range(10)), Path(1)) == 1
8 |
9 |
10 | def test_path():
11 | _obj = object()
12 | target = {'a': {'b.b': [None, {_obj: [None, None, 'd']}]}}
13 |
14 | assert glom(target, Path('a', 'b.b', 1, _obj, -1)) == 'd'
15 |
16 |
17 | def test_empty_path_access():
18 | target = {}
19 |
20 | assert glom(target, Path()) is target
21 | assert glom(target, (Path(), Path(), Path())) is target
22 |
23 | dup_dict = glom(target, {'target': Path(),
24 | 'target2': Path()})
25 | dup_dict['target'] is target
26 | dup_dict['target2'] is target
27 |
28 |
29 | def test_path_t_roundtrip():
30 | # check that T repr roundrips
31 | assert repr(T['a'].b.c()) == "T['a'].b.c()"
32 | assert repr(T[1:]) == "T[1:]"
33 | assert repr(T[::3, 1:, 1:2, :2:3]) == "T[::3, 1:, 1:2, :2:3]"
34 |
35 | # check that Path repr roundtrips
36 | assert repr(Path('a', 1, 'b.b', -1.0)) == "Path('a', 1, 'b.b', -1.0)"
37 |
38 | # check that Path repr roundtrips when it contains Ts
39 | assert repr(Path(T['a'].b, 'c', T['d'].e)) == "Path(T['a'].b, 'c', T['d'].e)"
40 |
41 | # check that T instances containing Path access revert to repring with Path
42 | assert repr(Path(T['a'].b, 'c', T['d'].e).path_t) == "Path(T['a'].b, 'c', T['d'].e)"
43 |
44 | # check that Paths containing only T objects reduce to a T (joining the T objects)
45 | assert repr(Path(T['a'].b, T.c())) == "T['a'].b.c()"
46 |
47 | # check that multiple nested paths reduce
48 | assert repr(Path(Path(Path('a')))) == "Path('a')"
49 |
50 | # check builtin repr
51 | assert repr(T[len]) == 'T[len]'
52 | assert repr(T.func(len, sum)) == 'T.func(len, sum)'
53 |
54 | # check * and **
55 | assert repr(T.__star__().__starstar__()) == 'T.__star__().__starstar__()'
56 | assert repr(Path('a', T.__star__().__starstar__())) == "Path('a', T.__star__().__starstar__())"
57 |
58 |
59 | def test_path_access_error_message():
60 |
61 | # test fuzzy access
62 | with raises(GlomError) as exc_info:
63 | glom({}, 'a.b')
64 | assert ("PathAccessError: could not access 'a', part 0 of Path('a', 'b'), got error: KeyError"
65 | in exc_info.exconly())
66 | ke = repr(KeyError('a')) # py3.7+ changed the keyerror repr
67 | assert repr(exc_info.value) == "PathAccessError(" + ke + ", Path('a', 'b'), 0)"
68 |
69 | # test multi-part Path with T, catchable as a KeyError
70 | with raises(KeyError) as exc_info:
71 | # don't actually use glom to copy your data structures please
72 | glom({'a': {'b': 'c'}}, Path('a', T.copy(), 'd'))
73 | assert ("PathAccessError: could not access 'd', part 3 of Path('a', T.copy(), 'd'), got error: KeyError"
74 | in exc_info.exconly())
75 | ke = repr(KeyError('d')) # py3.7+ changed the keyerror repr
76 | assert repr(exc_info.value) == "PathAccessError(" + ke + ", Path('a', T.copy(), 'd'), 3)"
77 |
78 | # test AttributeError
79 | with raises(GlomError) as exc_info:
80 | glom({'a': {'b': 'c'}}, Path('a', T.b))
81 | assert ("PathAccessError: could not access 'b', part 1 of Path('a', T.b), got error: AttributeError"
82 | in exc_info.exconly())
83 | ae = repr(AttributeError("'dict' object has no attribute 'b'"))
84 | assert repr(exc_info.value) == "PathAccessError(" + ae + ", Path(\'a\', T.b), 1)"
85 |
86 |
87 | def test_t_picklability():
88 | import pickle
89 |
90 | class TargetType:
91 | def __init__(self):
92 | self.attribute = lambda: None
93 | self.attribute.method = lambda: {'key': lambda x: x * 2}
94 |
95 | spec = T.attribute.method()['key'](x=5)
96 |
97 | rt_spec = pickle.loads(pickle.dumps(spec))
98 | assert repr(spec) == repr(rt_spec)
99 |
100 | assert glom(TargetType(), spec) == 10
101 |
102 | s_spec = S.attribute
103 | assert repr(s_spec) == repr(pickle.loads(pickle.dumps(s_spec)))
104 |
105 |
106 | def test_t_subspec():
107 | # tests that arg-mode is a min-mode, allowing for
108 | # other specs to be embedded inside T calls
109 | data = [
110 | {'id': 1},
111 | {'pk': 1}]
112 |
113 | get_ids = (
114 | S(id_type=int),
115 | [S.id_type(Or('id', 'pk'))])
116 |
117 | assert glom(data, get_ids) == [1, 1]
118 |
119 | data = {'a': 1, 'b': 2, 'c': 3}
120 |
121 | # test that "shallow" data structures translate as-is
122 | get_vals = (
123 | S(seq_type=tuple),
124 | S.seq_type([T['a'], T['b'], Or('c', 'd')])
125 | )
126 |
127 | assert glom(data, get_vals) == (1, 2, 3)
128 |
129 |
130 | def test_a_forbidden():
131 | with raises(BadSpec):
132 | A() # cannot assign to function call
133 | with raises(BadSpec):
134 | glom(1, A) # cannot assign without destination
135 |
136 |
137 | def test_s_magic():
138 | assert glom(None, S.test, scope={'test': 'value'}) == 'value'
139 |
140 | with raises(PathAccessError):
141 | glom(1, S.a) # ref to 'a' which doesn't exist in scope
142 |
143 | with raises(PathAccessError):
144 | glom(1, A.b.c)
145 |
146 | return
147 |
148 |
149 | def test_path_len():
150 |
151 | assert len(Path()) == 0
152 | assert len(Path('a', 'b', 'c')) == 3
153 | assert len(Path.from_text('1.2.3.4')) == 4
154 |
155 | assert len(Path(T)) == 0
156 | assert len(Path(T.a.b.c)) == 3
157 | assert len(Path(T.a()['b'].c.d)) == 5
158 |
159 |
160 | def test_path_getitem():
161 | path = Path(T.a.b.c)
162 |
163 | assert path[0] == Path(T.a)
164 | assert path[1] == Path(T.b)
165 | assert path[2] == Path(T.c)
166 | assert path[-1] == Path(T.c)
167 | assert path[-2] == Path(T.b)
168 |
169 | with raises(IndexError, match='Path index out of range'):
170 | path[4]
171 |
172 | with raises(IndexError, match='Path index out of range'):
173 | path[-14]
174 | return
175 |
176 |
177 | def test_path_slices():
178 | path = Path(T.a.b, 1, 2, T(test='yes'))
179 |
180 | assert path[::] == path
181 |
182 | # positive indices
183 | assert path[3:] == Path(2, T(test='yes'))
184 | assert path[1:3] == Path(T.b, 1)
185 | assert path[:3] == Path(T.a.b, 1)
186 |
187 | # positive indices backwards
188 | assert path[2:1] == Path()
189 |
190 | # negative indices forward
191 | assert path[-1:] == Path(T(test='yes'))
192 | assert path[:-2] == Path(T.a.b, 1)
193 | assert path[-3:-1] == Path(1, 2)
194 |
195 | # negative indices backwards
196 | assert path[-1:-3] == Path()
197 |
198 | # slicing and stepping
199 | assert path[1::2] == Path(T.b, 2)
200 |
201 |
202 | def test_path_values():
203 | path = Path(T.a.b, 1, 2, T(test='yes'))
204 |
205 | assert path.values() == ('a', 'b', 1, 2, ((), {'test': 'yes'}))
206 |
207 | assert Path().values() == ()
208 |
209 |
210 | def test_path_items():
211 | path = Path(T.a, 1, 2, T(test='yes'))
212 |
213 | assert path.items() == (('.', 'a'),
214 | ('P', 1), ('P', 2),
215 | ('(', ((), {'test': 'yes'})))
216 |
217 | assert Path().items() == ()
218 |
219 |
220 | def test_path_star():
221 | val = {'a': [1, 2, 3]}
222 | assert glom(val, 'a.*') == [1, 2, 3]
223 | val['a'] = [{'b': v} for v in val['a']]
224 | assert glom(val, 'a.*.b') == [1, 2, 3]
225 | assert glom(val, T['a'].__star__()['b']) == [1, 2, 3]
226 | assert glom(val, Path('a', T.__star__(), 'b')) == [1, 2, 3]
227 | # multi-paths eat errors
228 | assert glom(val, Path('a', T.__star__(), T.b)) == []
229 | val = [[[1]]]
230 | assert glom(val, '**') == [val, [[1]], [1], 1]
231 | val = {'a': [{'b': [{'c': 1}, {'c': 2}, {'d': {'c': 3}}]}], 'c': 4}
232 | assert glom(val, '**.c') == [4, 1, 2, 3]
233 | assert glom(val, 'a.**.c') == [1, 2, 3]
234 | assert glom(val, T['a'].__starstar__()['c']) == [1, 2, 3]
235 | assert glom(val, 'a.*.b.*.c') == [[1, 2]]
236 | # errors
237 | class ErrDict(dict):
238 | def __getitem__(key): 1/0
239 | assert ErrDict(val).keys() # it will try to iterate
240 | assert glom(ErrDict(val), '**') == [val]
241 | assert glom(ErrDict(val), '*') == []
242 | # object access
243 | class A:
244 | def __init__(self):
245 | self.a = 1
246 | self.b = {'c': 2}
247 | val = A()
248 |
249 | assert glom(val, '*') == [1, {'c': 2}]
250 | assert glom(val, '**') == [val, 1, {'c': 2}, 2]
251 |
252 |
253 | def test_star_broadcast():
254 | val = {'a': [1, 2, 3]}
255 | assert glom(val, Path.from_text('a.*').path_t + 1) == [2, 3, 4]
256 | val = {'a': [{'b': [{'c': 1}, {'c': 2}, {'c': 3}]}]}
257 | assert glom(val, Path.from_text('**.c').path_t + 1) == [2, 3, 4]
258 |
259 |
260 | def test_star_warning():
261 | '''check that the default behavior is as expected; this will change when * is default on'''
262 | assert core.PATH_STAR is True
263 | try:
264 | core.PATH_STAR = False
265 | assert glom({'*': 1}, '*') == 1
266 | assert Path._STAR_WARNED
267 | finally:
268 | core.PATH_STAR = True
269 |
270 | def test_path_eq():
271 | assert Path('a', 'b') == Path('a', 'b')
272 | assert Path('a') != Path('b')
273 |
274 | assert Path() != object()
275 |
276 |
277 | def test_path_eq_t():
278 | assert Path(T.a.b) == T.a.b
279 | assert Path(T.a.b.c) != T.a.b
280 |
281 |
282 | def test_startswith():
283 | ref = T.a.b[1]
284 |
285 | assert Path(ref).startswith(T)
286 | assert Path(ref).startswith(T.a.b)
287 | assert Path(ref).startswith(ref)
288 | assert Path(ref).startswith(ref.c) is False
289 |
290 | assert Path('a.b.c').startswith(Path())
291 | assert Path('a.b.c').startswith('a.b.c')
292 |
293 | with raises(TypeError):
294 | assert Path('a.b.c').startswith(None)
295 |
296 | return
297 |
298 |
299 | def test_from_t_identity():
300 | ref = Path(T.a.b)
301 | assert ref.from_t() == ref
302 | assert ref.from_t() is ref
303 |
304 |
305 | def test_t_dict_key():
306 | target = {'a': 'A'}
307 | assert glom(target, {T['a']: 'a'}) == {'A': 'A'}
308 |
309 |
310 | def test_t_arithmetic():
311 | t = 2
312 | assert glom(t, T + T) == 4
313 | assert glom(t, T * T) == 4
314 | assert glom(t, T ** T) == 4
315 | assert glom(t, T / 1) == 2
316 | assert glom(t, T % 1) == 0
317 | assert glom(t, T - 1) == 1
318 | assert glom(t, T & T) == 2
319 | assert glom(t, T | 1) == 3
320 | assert glom(t, T ^ T) == 0
321 | assert glom(2, ~T) == -3
322 | assert glom(t, -T) == -2
323 |
324 |
325 | def test_t_arithmetic_reprs():
326 | assert repr(T + T) == "T + T"
327 | assert repr(T + (T / 2 * (T - 5) % 4)) == "T + (T / 2 * (T - 5) % 4)"
328 | assert repr(T & 7 | (T ^ 6)) == "T & 7 | (T ^ 6)"
329 | assert repr(-(~T)) == "-(~T)"
330 |
331 |
332 | def test_t_arithmetic_errors():
333 | with raises(PathAccessError, match='zero'):
334 | glom(0, T / 0)
335 |
336 | with raises(PathAccessError, match='unsupported operand type'):
337 | glom(None, T / 2)
338 |
339 | return
340 |
341 |
342 | def test_t_dunders():
343 | with raises(AttributeError) as exc_info:
344 | T.__name__
345 |
346 | assert 'use T.__("name__")' in str(exc_info.value)
347 |
348 | assert glom(1, T.__('class__')) is int
349 |
350 |
351 | def test_path_cache():
352 | assert Path.from_text('a.b.c') is Path.from_text('a.b.c')
353 | pre = Path._MAX_CACHE
354 | Path._MAX_CACHE = 0
355 | assert Path.from_text('d.e.f') is not Path.from_text('d.e.f')
356 |
--------------------------------------------------------------------------------
/glom/test/test_reduction.py:
--------------------------------------------------------------------------------
1 | import operator
2 |
3 | import pytest
4 | from boltons.dictutils import OMD
5 |
6 | from glom import glom, T, Sum, Fold, Flatten, Coalesce, flatten, FoldError, Glommer, Merge, merge
7 |
8 |
9 | def test_sum_integers():
10 | target = list(range(5))
11 |
12 | assert glom(target, Sum()) == 10
13 |
14 | assert glom(target, Sum(init=lambda: 2)) == 12
15 |
16 | target = []
17 | assert glom(target, Sum()) == 0
18 |
19 |
20 | target = [{"num": 3}, {"num": 2}, {"num": -1}]
21 | assert glom(target, Sum(['num'])) == 4
22 |
23 | target = target + [{}] # add a non-compliant dict
24 | assert glom(target, Sum([Coalesce('num', default=0)])) ==4
25 |
26 | assert repr(Sum()) == 'Sum()'
27 | assert repr(Sum(len, init=float)) == 'Sum(len, init=float)'
28 |
29 |
30 | def test_sum_seqs():
31 | target = [(x,) for x in range(4)]
32 | assert glom(target, Sum(init=tuple)) == (0, 1, 2, 3)
33 |
34 | # would not work with builtin sum(), gets:
35 | # "TypeError: sum() can't sum strings [use ''.join(seq) instead]"
36 | # Works here for now. If we're ok with that error, then we can
37 | # switch to sum().
38 | target = ['a', 'b', 'cd']
39 | assert glom(target, Sum(init=str)) == 'abcd'
40 |
41 | target = [['a'], ['b'], ['cde'], ['']]
42 |
43 | assert glom(target, Sum(Sum(init=list), init=str)) == 'abcde'
44 |
45 |
46 | def test_fold():
47 | target = range(1, 5)
48 | assert glom(target, Fold(T, int)) == 10
49 | assert glom(target, Fold(T, init=lambda: 2)) == 12
50 |
51 | assert glom(target, Fold(T, lambda: 1, op=lambda l, r: l * r)) == 24
52 |
53 | assert repr(Fold(T, int)) == 'Fold(T, init=int)'
54 | assert repr(Fold(T, int, op=operator.imul)).startswith('Fold(T, init=int, op=<')
55 |
56 | # signature coverage
57 | with pytest.raises(TypeError):
58 | Fold(T, list, op=None) # noncallable op
59 |
60 | with pytest.raises(TypeError):
61 | Fold(T, init=None) # noncallable init
62 |
63 |
64 | def test_fold_bad_iter():
65 | glommer = Glommer(register_default_types=False)
66 |
67 | def bad_iter(obj):
68 | raise RuntimeError('oops')
69 |
70 | glommer.register(list, iterate=bad_iter)
71 |
72 | with pytest.raises(TypeError):
73 | target = []
74 | glommer.glom(target, Flatten())
75 |
76 |
77 | def test_flatten():
78 | target = [[1], [2], [3, 4]]
79 | assert glom(target, Flatten()) == [1, 2, 3, 4]
80 |
81 | target = [(1, 2), [3]]
82 | assert glom(target, Flatten()) == [1, 2, 3]
83 |
84 | gen = glom(target, Flatten(init='lazy'))
85 | assert next(gen) == 1
86 | assert list(gen) == [2, 3]
87 |
88 | assert repr(Flatten()) == 'Flatten()'
89 | assert repr(Flatten(init='lazy')) == "Flatten(init='lazy')"
90 | assert repr(Flatten(init=tuple)) == "Flatten(init=tuple)"
91 |
92 |
93 | def test_flatten_func():
94 | target = [[1], [2], [3, 4]]
95 | assert flatten(target) == [1, 2, 3, 4]
96 |
97 | two_level_target = [[x] for x in target]
98 | assert flatten(two_level_target, levels=2) == [1, 2, 3, 4]
99 | assert flatten(two_level_target, levels=0) == two_level_target
100 |
101 | unflattenable = 2
102 |
103 | with pytest.raises(FoldError):
104 | assert flatten(unflattenable)
105 |
106 | # kind of an odd use, but it works for now
107 | assert flatten(['a', 'b', 'cd'], init=str) == 'abcd'
108 |
109 | # another odd case
110 | subspec_target = {'items': {'numbers': [1, 2, 3]}}
111 | assert (flatten(subspec_target, spec='items.numbers', init=int) == 6)
112 |
113 | # basic signature tests
114 | with pytest.raises(ValueError):
115 | flatten([], levels=-1)
116 |
117 | with pytest.raises(TypeError):
118 | flatten([], nonexistentkwarg=False)
119 |
120 | return
121 |
122 |
123 | def test_merge():
124 |
125 | target = [{'a': 'A'}, {'b': 'B'}]
126 |
127 | assert glom(target, Merge()) == {'a': 'A', 'b': 'B'}
128 |
129 | assert glom(target, Merge(op=dict.update)) == {'a': 'A', 'b': 'B'}
130 |
131 | with pytest.raises(ValueError):
132 | Merge(init=list) # has no .update()
133 |
134 | with pytest.raises(ValueError):
135 | Merge(op='update_extend') # doesn't work on base dict, the default init
136 |
137 |
138 | def test_merge_omd():
139 | target = [{'a': 'A'}, {'a': 'aleph'}]
140 |
141 | assert glom(target, Merge(init=OMD)) == OMD({'a': 'aleph'})
142 | assert glom(target, Merge(init=OMD, op='update_extend')) == OMD([('a', 'A'), ('a', 'aleph')])
143 |
144 |
145 | def test_merge_func():
146 |
147 | target = [{'a': 'A'}, {'b': 'B'}]
148 | assert merge(target) == {'a': 'A', 'b': 'B'}
149 | assert merge([]) == {}
150 |
151 | # basic signature test
152 | with pytest.raises(TypeError):
153 | merge([], nonexistentkwarg=False)
154 |
--------------------------------------------------------------------------------
/glom/test/test_scope_vars.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from glom import glom, Path, S, A, T, Vars, Val, GlomError, M, SKIP, Let
4 |
5 | from glom.core import ROOT
6 | from glom.mutation import PathAssignError
7 |
8 | def test_s_scope_assign():
9 | data = {'a': 1, 'b': [{'c': 2}, {'c': 3}]}
10 | output = [{'a': 1, 'c': 2}, {'a': 1, 'c': 3}]
11 | assert glom(data, (S(a=T['a']), ('b', [{'a': S['a'], 'c': 'c'}]))) == output
12 | assert glom(data, ('b', [{'a': S[ROOT][Val(T)]['a'], 'c': 'c'}])) == output
13 |
14 | with pytest.raises(TypeError):
15 | S('posarg')
16 | with pytest.raises(TypeError):
17 | S()
18 |
19 | assert glom([[1]], (S(v=Vars()), [[A.v.a]], S.v.a)) == 1
20 | assert glom(1, (S(v={}), A.v['a'], S.v['a'])) == 1
21 | with pytest.raises(GlomError):
22 | glom(1, (S(v=1), A.v.a))
23 |
24 | class FailAssign:
25 | def __setattr__(self, name, val):
26 | raise Exception('nope')
27 |
28 | with pytest.raises(PathAssignError):
29 | glom(1, (S(v=FailAssign()), Path(A.v, 'a')))
30 |
31 | assert repr(S(a=T.a.b)) == 'S(a=T.a.b)'
32 |
33 | spec = (S(a=T['x']), S.a)
34 | assert glom({'x': 'y'}, spec) == 'y'
35 |
36 | return
37 |
38 |
39 | def test_globals():
40 | assert glom([[1]], ([[A.globals.a]], S.globals.a)) == 1
41 |
42 |
43 | def test_vars():
44 | assert glom(1, A.a) == 1 # A should not change the target
45 | assert glom(1, (A.a, S.a)) == 1
46 | # check that tuple vars don't "leak" into parent tuple
47 | assert glom(1, (A.t, Val(2), A.t, S.t)) == 2
48 | assert glom(1, (A.t, (Val(2), A.t), S.t)) == 1
49 | let = S(v=Vars({'b': 2}, c=3))
50 | assert glom(1, (let, A.v.a, S.v.a)) == 1
51 | with pytest.raises(AttributeError):
52 | glom(1, (let, S.v.a)) # check that Vars() inside a spec doesn't hold state
53 | assert glom(1, (let, Path(A, 'v', 'a'), S.v.a)) == 1
54 | assert glom(1, (let, S.v.b)) == 2
55 | assert glom(1, (let, S.v.c)) == 3
56 | assert repr(let) == "S(v=Vars({'b': 2}, c=3))"
57 | assert repr(Vars(a=1, b=2)) in (
58 | "Vars(a=1, b=2)", "Vars(b=2, a=1)")
59 | assert repr(Vars(a=1, b=2).glomit(None, None)) in (
60 | "ScopeVars({'a': 1, 'b': 2})", "Vars({'b': 2, 'a': 1})")
61 |
62 | assert repr(A.b["c"]) == "A.b['c']"
63 |
64 |
65 | def test_scoped_vars():
66 | target = list(range(10)) + list(range(5))
67 |
68 | scope_globals = glom(target, ([A.globals.last], S.globals))
69 | assert scope_globals.last == 4
70 | assert dict(scope_globals) == {'last': 4}
71 |
72 |
73 | def test_max_skip():
74 | target = list(range(10)) + list(range(5))
75 |
76 | max_spec = (S(max=Vars(max=0)),
77 | [((M > M(S.max.max)) & A.max.max) | Val(SKIP)],
78 | S.max)
79 | result = glom(target, max_spec)
80 | assert result.max == 9
81 |
82 |
83 | def test_let(): # backwards compat 2020-07
84 | data = {'a': 1, 'b': [{'c': 2}, {'c': 3}]}
85 | output = [{'a': 1, 'c': 2}, {'a': 1, 'c': 3}]
86 | assert glom(data, (Let(a='a'), ('b', [{'a': S['a'], 'c': 'c'}]))) == output
87 | assert glom(data, ('b', [{'a': S[ROOT][Val(T)]['a'], 'c': 'c'}])) == output
88 |
89 | with pytest.raises(TypeError):
90 | Let('posarg')
91 | with pytest.raises(TypeError):
92 | Let()
93 |
94 | assert glom([[1]], (Let(v=Vars()), [[A.v.a]], S.v.a)) == 1
95 | assert glom(1, (Let(v=lambda t: {}), A.v['a'], S.v['a'])) == 1
96 | with pytest.raises(GlomError):
97 | glom(1, (Let(v=lambda t: 1), A.v.a))
98 |
99 | class FailAssign:
100 | def __setattr__(self, name, val):
101 | raise Exception('nope')
102 |
103 | with pytest.raises(PathAssignError):
104 | glom(1, (Let(v=lambda t: FailAssign()), Path(A.v, 'a')))
105 |
106 | assert repr(Let(a=T.a.b)) == 'Let(a=T.a.b)'
107 |
--------------------------------------------------------------------------------
/glom/test/test_snippets.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from collections import deque
3 | from decimal import Decimal
4 | import json
5 | import os
6 | import textwrap
7 |
8 | import pytest
9 |
10 | import glom
11 |
12 |
13 | def _get_codeblock(lines, offset):
14 | if lines[offset:offset + 2] != [".. code-block:: python\n", "\n"]:
15 | return None
16 | start = offset + 2
17 | try:
18 | finish = lines.index('\n', start)
19 | except ValueError:
20 | return None
21 | return textwrap.dedent("".join(lines[start:finish]))
22 |
23 |
24 | def _find_snippets():
25 | path = os.path.dirname(os.path.abspath(__file__)) + '/../../docs/snippets.rst'
26 | with open(path) as snippet_file:
27 | lines = list(snippet_file)
28 | snippets = []
29 | for line_no in range(len(lines)):
30 | source = _get_codeblock(lines, line_no)
31 | if source:
32 | snippets.append((line_no, source))
33 | return snippets
34 |
35 |
36 | try:
37 | SNIPPETS = _find_snippets()
38 | except:
39 | SNIPPETS = [] # in case running in an environment without docs
40 |
41 | SNIPPETS_GLOBALS = copy.copy(glom.__dict__)
42 | SNIPPETS_GLOBALS.update(dict(
43 | json=json,
44 | deque=deque,
45 | Decimal=Decimal,
46 | data=json.dumps({'a': ['b']}),
47 | contacts=[{'primary_email': {'email': 'a@example.com'}}, {}],
48 | glom=glom.glom))
49 |
50 |
51 | @pytest.mark.parametrize("line,source", SNIPPETS)
52 | def test_snippet(line, source):
53 | if '>>>' in source:
54 | return # maybe doctest output checking
55 | code = compile(source, 'snippets.rst', 'exec')
56 | if 'django' in source:
57 | return # maybe in the future
58 | eval(code, SNIPPETS_GLOBALS)
59 |
60 |
--------------------------------------------------------------------------------
/glom/test/test_spec.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from glom import glom, Spec, T, S
4 |
5 |
6 | def test_spec():
7 | assert glom(5, T) == 5 # check assumption about echo behavior
8 | echo = Spec(T)
9 | assert echo.glom(5) == 5
10 | assert glom(5, echo) == 5
11 | echo2 = Spec(echo)
12 | assert echo2.glom(5) == 5
13 |
14 | with pytest.raises(TypeError, match='expected spec to be'):
15 | glom({}, object())
16 | return
17 |
18 | def test_scope_spec():
19 | scope_spec = Spec(S)
20 | assert scope_spec.glom(5, scope={'cat': 1})['cat'] == 1
21 | cat_scope_spec = Spec(scope_spec, scope={'cat': 1})
22 | assert 'cat' in repr(cat_scope_spec)
23 | assert cat_scope_spec.glom(5)['cat'] == 1
24 |
25 | # test that Spec overrides the scope for its sub-tree
26 | assert glom(5, cat_scope_spec, scope={'cat': 2})['cat'] == 1
27 |
--------------------------------------------------------------------------------
/glom/test/test_streaming.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from itertools import count, dropwhile, chain
4 |
5 | from glom import Iter
6 | from glom import glom, SKIP, STOP, T, Call, Spec, Glommer, Check, SKIP
7 |
8 |
9 | RANGE_5 = list(range(5))
10 |
11 |
12 | def test_iter():
13 | assert list(glom(['1', '2', '3'], Iter(int))) == [1, 2, 3]
14 | cnt = count()
15 | cnt_1 = glom(cnt, Iter(lambda t: t + 1))
16 | assert (next(cnt_1), next(cnt_1)) == (1, 2)
17 | assert next(cnt) == 2
18 |
19 | assert list(glom(['1', '2', '3'], (Iter(int), enumerate))) == [(0, 1), (1, 2), (2, 3)]
20 |
21 | assert list(glom([1, SKIP, 2], Iter())) == [1, 2]
22 | assert list(glom([1, STOP, 2], Iter())) == [1]
23 |
24 | with pytest.raises(TypeError):
25 | Iter(nonexistent_kwarg=True)
26 |
27 |
28 | def test_filter():
29 | is_odd = lambda x: x % 2
30 | odd_spec = Iter().filter(is_odd)
31 | out = glom(RANGE_5, odd_spec)
32 | assert list(out) == [1, 3]
33 |
34 | # let's just make sure we're actually streaming just in case
35 | counter = count()
36 | out = glom(counter, odd_spec)
37 | assert next(out) == 1
38 | assert next(out) == 3
39 | assert next(counter) == 4
40 | assert next(counter) == 5
41 | assert next(out) == 7
42 |
43 | bools = [True, False, False, True, False]
44 | spec = Iter().filter().all()
45 | out = glom(bools, spec)
46 | assert out == [True, True]
47 |
48 | imags = [0j, 1j, 2, 2j, 3j]
49 | spec = Iter().filter(Check(T.imag.real, type=float, one_of=(0, 2), default=SKIP)).all()
50 | out = glom(imags, spec)
51 | assert out == [0j, 2j]
52 |
53 | assert repr(Iter().filter(T.a.b)) == 'Iter().filter(T.a.b)'
54 | assert repr(Iter(list).filter(sum)) == 'Iter(list).filter(sum)'
55 |
56 |
57 | def test_map():
58 | spec = Iter().map(lambda x: x * 2)
59 | out = glom(RANGE_5, spec)
60 | assert list(out) == [0, 2, 4, 6, 8]
61 | assert repr(Iter().map(T.a.b)) == 'Iter().map(T.a.b)'
62 |
63 |
64 | def test_split_flatten():
65 | falsey_stream = [1, None, None, 2, 3, None, 4]
66 | spec = Iter().split()
67 | out = glom(falsey_stream, spec)
68 | assert list(out) == [[1], [2, 3], [4]]
69 |
70 | spec = Iter().split().flatten()
71 | out = glom(falsey_stream, spec)
72 | assert list(out) == [1, 2, 3, 4]
73 |
74 | assert repr(Iter().split(sep=None, maxsplit=2)) == 'Iter().split(sep=None, maxsplit=2)'
75 | assert repr(Iter(T.a.b[1]).flatten()) == 'Iter(T.a.b[1]).flatten()'
76 |
77 |
78 | def test_chunked():
79 | int_list = list(range(9))
80 |
81 | spec = Iter().chunked(3)
82 | out = glom(int_list, spec)
83 | assert list(out) == [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
84 |
85 | spec = Iter().chunked(3).map(sum)
86 | out = glom(int_list, spec)
87 | assert list(out) == [3, 12, 21]
88 |
89 |
90 | def test_windowed():
91 | int_list = list(range(5))
92 |
93 | spec = Iter().windowed(3)
94 | out = glom(int_list, spec)
95 | assert list(out) == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
96 | assert repr(spec) == 'Iter().windowed(3)'
97 |
98 | spec = spec.filter(lambda x: bool(x[0] % 2)).map(sum)
99 | out = glom(int_list, spec)
100 | assert next(out) == 6
101 |
102 | out = glom(range(10), spec)
103 | assert list(out) == [6, 12, 18, 24]
104 |
105 |
106 | def test_unique():
107 | int_list = list(range(10))
108 |
109 | spec = Iter().unique()
110 | out = glom(int_list, spec)
111 | assert list(out) == int_list
112 |
113 | spec = Iter(lambda x: x % 4).unique()
114 | out = glom(int_list, spec)
115 | assert list(out) == int_list[:4]
116 | assert repr(Iter().unique(T.a)) == 'Iter().unique(T.a)'
117 |
118 |
119 | def test_slice():
120 | cnt = count()
121 |
122 | spec = Iter().slice(3)
123 | out = glom(cnt, spec)
124 |
125 | assert list(out) == [0, 1, 2]
126 | assert next(cnt) == 3
127 |
128 | out = glom(range(10), Iter().slice(1, 5))
129 | assert list(out) == [1, 2, 3, 4]
130 |
131 | out = glom(range(10), Iter().slice(1, 6, 2))
132 | assert list(out) == [1, 3, 5]
133 | assert repr(Iter().slice(1, 6, 2)) == 'Iter().slice(1, 6, 2)'
134 |
135 | out = glom(range(10), Iter().limit(3))
136 | assert list(out) == [0, 1, 2]
137 | assert repr(Iter().limit(3)) == 'Iter().limit(3)'
138 |
139 | out = glom(range(5), Iter().limit(10))
140 | assert list(out) == [0, 1, 2, 3, 4]
141 |
142 | # test broken args
143 | with pytest.raises(TypeError):
144 | Iter().slice(1, 2, 3, 4)
145 |
146 |
147 | def test_while():
148 | cnt = count()
149 | out = glom(cnt, Iter().takewhile(lambda x: x < 3))
150 | assert list(out) == [0, 1, 2]
151 | assert next(cnt) == 4
152 | assert repr(Iter().takewhile(T.a) == 'Iter().takewhile(T.a)')
153 |
154 | range_iter = iter(range(7))
155 | out = glom(range_iter, Iter().dropwhile(lambda x: x < 3 or x > 5))
156 | assert list(out) == [3, 4, 5, 6] # 6 still here despite the x>5 above
157 |
158 | out = glom(range(10), Iter().dropwhile(lambda x: x >= 0).limit(10))
159 | assert list(out) == []
160 |
161 | out = glom(range(8), Iter().dropwhile((T.bit_length(), lambda x: x < 3)))
162 | assert list(out) == [4, 5, 6, 7]
163 | assert repr(Iter().dropwhile(T.a) == 'Iter().dropwhile(T.a)')
164 |
165 |
166 | def test_iter_composition():
167 | int_list = list(range(10))
168 | out = glom(int_list, (Iter(), Iter(), list))
169 | assert out == int_list
170 |
171 | out = glom([int_list] * 3, Iter(Iter(lambda x: x % 4)).flatten().unique())
172 | assert list(out) == [0, 1, 2, 3]
173 |
174 |
175 | def test_faulty_iterate():
176 | glommer = Glommer()
177 |
178 | def bad_iter(obj):
179 | raise RuntimeError('oops')
180 |
181 | glommer.register(str, iterate=bad_iter)
182 |
183 | with pytest.raises(TypeError):
184 | glommer.glom('abc', (Iter(), list))
185 |
186 |
187 | def test_first():
188 | spec = Iter().first(T.imag)
189 |
190 | target = iter([1, 2, 3j, 4])
191 | out = glom(target, spec)
192 | assert out == 3j
193 | assert next(target) == 4
194 | assert repr(spec) == '(Iter(), First(T.imag))'
195 |
196 | spec = Iter().first(T.imag, default=0)
197 | target = iter([1, 2, 4])
198 | out = glom(target, spec)
199 | assert out == 0
200 | assert repr(spec) == '(Iter(), First(T.imag, default=0))'
201 |
202 |
203 | def test_all():
204 | int_iter = iter(range(10))
205 |
206 | out = glom(int_iter, Iter().all())
207 | assert out == list(range(10))
208 | assert next(int_iter, None) is None
209 | assert repr(Iter().all()) == 'Pipe(Iter(), list)'
210 |
--------------------------------------------------------------------------------
/glom/test/test_target_types.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import glom
4 | from glom import Glommer, PathAccessError, UnregisteredTarget
5 | from glom.core import TargetRegistry
6 |
7 |
8 | class A:
9 | pass
10 |
11 | class B:
12 | pass
13 |
14 | class C(A):
15 | pass
16 |
17 | class D(B):
18 | pass
19 |
20 | class E(C, D, A):
21 | pass
22 |
23 | class F(E):
24 | pass
25 |
26 |
27 | def test_types_leave_one_out():
28 | ALL_TYPES = [A, B, C, D, E, F]
29 | for cur_t in ALL_TYPES:
30 |
31 | treg = TargetRegistry(register_default_types=False)
32 |
33 | treg.register(object, get=lambda: object)
34 | for t in ALL_TYPES:
35 | if t is cur_t:
36 | continue
37 | treg.register(t, get=(lambda t: lambda: t)(t))
38 |
39 | obj = cur_t()
40 | assert treg.get_handler('get', obj)() == obj.__class__.mro()[1]
41 |
42 | if cur_t is E:
43 | assert treg.get_handler('get', obj)() is C # sanity check
44 |
45 | return
46 |
47 |
48 | def test_types_bare():
49 | glommer = Glommer(register_default_types=False)
50 |
51 | treg = glommer.scope[TargetRegistry]
52 | assert treg._get_closest_type(object(), treg._op_type_tree.get('get', {})) is None
53 |
54 | # test that bare glommers can't glom anything
55 | with pytest.raises(UnregisteredTarget) as exc_info:
56 | glommer.glom(object(), {'object_repr': '__class__.__name__'})
57 | assert repr(exc_info.value) == "UnregisteredTarget('get', , OrderedDict(), ('__class__',))"
58 | assert str(exc_info.value).find(
59 | "glom() called without registering any types for operation 'get'."
60 | " see glom.register() or Glommer's constructor for details.") != -1
61 |
62 | with pytest.raises(UnregisteredTarget, match='without registering') as exc_info:
63 | glommer.glom([{'hi': 'hi'}], ['hi'])
64 | assert not exc_info.value.type_map
65 |
66 | glommer.register(object, get=getattr)
67 | glommer.register(dict, get=dict.__getitem__, exact=True)
68 |
69 | # check again that registering object for 'get' doesn't change the
70 | # fact that we don't have iterate support yet
71 | with pytest.raises(UnregisteredTarget) as exc_info:
72 | glommer.glom({'test': [{'hi': 'hi'}]}, ('test', ['hi']))
73 | # feel free to update the "(at ['test'])" part to improve path display
74 | assert (
75 | "target type 'list' not registered for 'iterate', "
76 | "expected one of registered types: (dict)" in str(exc_info.value))
77 | return
78 |
79 |
80 | def test_invalid_register():
81 | glommer = Glommer()
82 | with pytest.raises(TypeError):
83 | glommer.register(1)
84 | return
85 |
86 |
87 | def test_exact_register():
88 | glommer = Glommer(register_default_types=False)
89 |
90 | class BetterList(list):
91 | pass
92 |
93 | glommer.register(BetterList, iterate=iter, exact=True)
94 |
95 | expected = [0, 2, 4]
96 | value = glommer.glom(BetterList(range(3)), [lambda x: x * 2])
97 | assert value == expected
98 |
99 | with pytest.raises(UnregisteredTarget):
100 | glommer.glom(list(range(3)), ['unused'])
101 |
102 | return
103 |
104 |
105 | def test_duck_register():
106 | class LilRanger:
107 | def __init__(self):
108 | self.lil_list = list(range(5))
109 |
110 | def __iter__(self):
111 | return iter(self.lil_list)
112 |
113 | glommer = Glommer(register_default_types=False)
114 |
115 | target = LilRanger()
116 |
117 | with pytest.raises(UnregisteredTarget):
118 | float_range = glommer.glom(target, [float])
119 |
120 | glommer.register(LilRanger)
121 |
122 | float_range = glommer.glom(target, [float])
123 |
124 | assert float_range == [0.0, 1.0, 2.0, 3.0, 4.0]
125 |
126 | glommer = Glommer() # now with just defaults
127 | float_range = glommer.glom(target, [float])
128 | assert float_range == [0.0, 1.0, 2.0, 3.0, 4.0]
129 |
130 |
131 | def test_bypass_getitem():
132 | target = list(range(3)) * 3
133 |
134 | with pytest.raises(PathAccessError):
135 | glom.glom(target, 'count')
136 |
137 | res = glom.glom(target, lambda list_obj: list_obj.count(1))
138 |
139 | assert res == 3
140 |
141 |
142 | def test_iter_set():
143 | some_ints = set(range(5))
144 | some_floats = glom.glom(some_ints, [float])
145 |
146 | assert sorted(some_floats) == [0.0, 1.0, 2.0, 3.0, 4.0]
147 |
148 | # now without defaults
149 | glommer = Glommer(register_default_types=False)
150 | glommer.register(set, iterate=iter)
151 | some_floats = glom.glom(some_ints, [float])
152 |
153 | assert sorted(some_floats) == [0.0, 1.0, 2.0, 3.0, 4.0]
154 |
155 |
156 | def test_iter_str():
157 | # check that strings are not iterable by default, one of the most
158 | # common sources of bugs
159 | glom_buddy = 'kurt'
160 |
161 | with pytest.raises(UnregisteredTarget):
162 | glom.glom(glom_buddy, {'name': [glom_buddy]})
163 |
164 | # also check that someone can override this
165 |
166 | glommer = Glommer()
167 | glommer.register(str, iterate=iter)
168 | res = glommer.glom(glom_buddy, {'name_chars_for_some_reason': [str]})
169 | assert len(res['name_chars_for_some_reason']) == 4
170 |
171 | # the better way, for any dissenter reading this
172 |
173 | assert glom.glom(glom_buddy, {'name_chars': list}) == {'name_chars': ['k', 'u', 'r', 't']}
174 |
175 | # and for the really passionate: how about making strings
176 | # non-iterable and just giving them a .chars() method that returns
177 | # a list of single-character strings.
178 |
179 |
180 | def test_default_scope_register():
181 | # just hit it to make sure it exists, it behaves exactly like Glommer.register
182 | glom.register(type, exact=False)
183 |
184 |
185 | def test_faulty_iterate():
186 | glommer = Glommer()
187 |
188 | def bad_iter(obj):
189 | raise RuntimeError('oops')
190 |
191 | glommer.register(str, iterate=bad_iter)
192 |
193 | with pytest.raises(TypeError):
194 | glommer.glom({'a': 'fail'}, ('a', {'chars': [str]}))
195 |
196 |
197 | def test_faulty_op_registration():
198 | treg = TargetRegistry()
199 |
200 | with pytest.raises(TypeError, match="text name, not:"):
201 | treg.register_op(None, len)
202 | with pytest.raises(TypeError, match="callable, not:"):
203 | treg.register_op('fake_op', object())
204 |
205 | class NewType:
206 | pass
207 |
208 | def _autodiscover_raise(type_obj):
209 | raise Exception('noperino')
210 |
211 | with pytest.raises(TypeError, match="noperino"):
212 | treg.register_op('fake_op', _autodiscover_raise)
213 |
214 | assert 'fake_op' not in treg._op_auto_map
215 |
216 | # check op with no autodiscovery
217 | treg.register_op('lol', exact=True)
218 | lol_type_map = treg.get_type_map('lol')
219 | assert all([v is False for v in lol_type_map.values()])
220 |
221 | # check op reregistration, this time not exact
222 | assert not treg._op_type_tree.get('lol')
223 | treg.register_op('lol', exact=False)
224 | assert treg._op_type_tree.get('lol')
225 |
226 |
227 | def _autodiscover_faulty_return(type_obj):
228 | return 'hideeho'
229 |
230 | with pytest.raises(TypeError, match="hideeho"):
231 | treg.register_op('fake_op', _autodiscover_faulty_return)
232 |
233 | def _autodiscover_sneaky(type_obj):
234 | # works with default registrations, but fails later on sets and frozensets
235 | if type_obj is set:
236 | return 'this should have been False or a callable, but was intentionally a string'
237 | if type_obj is frozenset:
238 | raise ValueError('this should have been False or a callable, but was intentionally a ValueError')
239 | return False
240 |
241 | treg.register_op('sneak', _autodiscover_sneaky)
242 |
243 | with pytest.raises(TypeError, match="intentionally a string"):
244 | treg.register(set)
245 | with pytest.raises(TypeError, match="intentionally a ValueError"):
246 | treg.register(frozenset)
247 |
248 | return
249 |
250 |
251 | def test_reregister_type():
252 | treg = TargetRegistry()
253 |
254 | class NewType:
255 | pass
256 |
257 | treg.register(NewType, op=lambda obj: obj)
258 |
259 | obj = NewType()
260 | handler = treg.get_handler('op', obj)
261 |
262 | assert handler(obj) == obj
263 |
264 | # assert no change in reregistering same
265 | treg.register(NewType, op=lambda obj: obj)
266 | handler = treg.get_handler('op', obj)
267 | assert handler(obj) == obj
268 |
269 | # assert change in reregistering new
270 | treg.register(NewType, op=lambda obj: obj.__class__.__name__)
271 | handler = treg.get_handler('op', obj)
272 | assert handler(obj) == 'NewType'
273 |
--------------------------------------------------------------------------------
/glom/test/test_tutorial.py:
--------------------------------------------------------------------------------
1 | from glom import glom, tutorial
2 | from glom.tutorial import Contact, Email
3 |
4 | def test_tutorial_data():
5 | assert 4 <= len(tutorial.CONTACTS) < 10
6 |
7 |
8 | def test_tutorial():
9 | target = {'a': {'b': {'c': 'd'}}}
10 |
11 | val = target['a']['b']['c']
12 |
13 | res = glom(target, 'a.b.c')
14 |
15 | assert res == val
16 |
17 | contact = Contact('Julian', emails=[Email('julian@sunnyvaletrailerpark.info')])
18 | contact.save()
19 | assert Contact.objects.get(contact_id=contact.id) is contact
20 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | doctest_optionflags=NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS
3 |
--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | attrs>=19.2.0
2 | boltons>=20.2.0
3 | coverage<=7.2.7 # can unpin when dropping py37
4 | face>=20.1.1
5 | pytest>=6.2.5
6 | tox>=3.7.0
7 | PyYAML>=6.0.1
8 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | #
2 | # This file is autogenerated by pip-compile with Python 3.7
3 | # by the following command:
4 | #
5 | # pip-compile requirements.in
6 | #
7 | attrs==24.2.0
8 | # via -r requirements.in
9 | boltons==24.1.0
10 | # via
11 | # -r requirements.in
12 | # face
13 | cachetools==5.5.0
14 | # via tox
15 | chardet==5.2.0
16 | # via tox
17 | colorama==0.4.6
18 | # via tox
19 | coverage==7.2.7
20 | # via -r requirements.in
21 | distlib==0.3.9
22 | # via virtualenv
23 | exceptiongroup==1.2.2
24 | # via pytest
25 | face==24.0.0
26 | # via -r requirements.in
27 | filelock==3.12.2
28 | # via
29 | # tox
30 | # virtualenv
31 | importlib-metadata==6.7.0
32 | # via
33 | # attrs
34 | # pluggy
35 | # pytest
36 | # tox
37 | # virtualenv
38 | iniconfig==2.0.0
39 | # via pytest
40 | packaging==24.0
41 | # via
42 | # pyproject-api
43 | # pytest
44 | # tox
45 | platformdirs==4.0.0
46 | # via
47 | # tox
48 | # virtualenv
49 | pluggy==1.2.0
50 | # via
51 | # pytest
52 | # tox
53 | pyproject-api==1.5.3
54 | # via tox
55 | pytest==7.4.4
56 | # via -r requirements.in
57 | pyyaml==6.0.1
58 | # via -r requirements.in
59 | tomli==2.0.1
60 | # via
61 | # pyproject-api
62 | # pytest
63 | # tox
64 | tox==4.8.0
65 | # via -r requirements.in
66 | typing-extensions==4.7.1
67 | # via
68 | # importlib-metadata
69 | # platformdirs
70 | # tox
71 | virtualenv==20.26.6
72 | # via tox
73 | zipp==3.15.0
74 | # via importlib-metadata
75 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import importlib.util
2 | import os
3 |
4 | from setuptools import setup
5 |
6 | __author__ = 'Mahmoud Hashemi and Kurt Rose'
7 | __contact__ = 'mahmoud@hatnote.com'
8 | __url__ = 'https://github.com/mahmoud/glom'
9 |
10 |
11 | def import_path(module_name, path):
12 | spec = importlib.util.spec_from_file_location(module_name, path)
13 | module = importlib.util.module_from_spec(spec)
14 | spec.loader.exec_module(module)
15 | return module
16 |
17 |
18 | CUR_PATH = os.path.abspath(os.path.dirname(__file__))
19 | _version_mod_path = os.path.join(CUR_PATH, 'glom', '_version.py')
20 | _version_mod = import_path('_version', _version_mod_path)
21 | __version__ = _version_mod.__version__
22 |
23 |
24 | open_kwarg = {}
25 |
26 | with open('README.md', encoding='utf8') as read_me:
27 | long_description = read_me.read()
28 |
29 | setup(name='glom',
30 | version=__version__,
31 | description="A declarative object transformer and formatter, for conglomerating nested data.",
32 | long_description=long_description,
33 | long_description_content_type='text/markdown',
34 | author=__author__,
35 | author_email=__contact__,
36 | url=__url__,
37 | project_urls={
38 | 'Documentation': 'https://glom.readthedocs.io/en/latest/',
39 | },
40 | packages=['glom', 'glom.test'],
41 | install_requires=['boltons>=19.3.0', 'attrs', 'face>=20.1.1'],
42 | extras_require={
43 | 'toml': ['tomli; python_version<"3.11"'],
44 | 'yaml': ['PyYAML'],
45 | },
46 | entry_points={'console_scripts': ['glom = glom.cli:console_main']},
47 | include_package_data=True,
48 | zip_safe=False,
49 | platforms='any',
50 | license_files=['LICENSE'],
51 | classifiers=[
52 | 'Topic :: Utilities',
53 | 'Intended Audience :: Developers',
54 | 'Topic :: Software Development :: Libraries',
55 | 'Development Status :: 5 - Production/Stable',
56 | 'Programming Language :: Python :: 3.7',
57 | 'Programming Language :: Python :: 3.8',
58 | 'Programming Language :: Python :: 3.9',
59 | 'Programming Language :: Python :: 3.10',
60 | 'Programming Language :: Python :: 3.11',
61 | 'Programming Language :: Python :: 3.12',
62 | 'Programming Language :: Python :: Implementation :: CPython',
63 | 'Programming Language :: Python :: Implementation :: PyPy',
64 | 'License :: OSI Approved :: BSD License',
65 | ]
66 | )
67 |
68 | """
69 | A brief checklist for release:
70 |
71 | * tox
72 | * git commit (if applicable)
73 | * Bump glom/_version.py off of -dev
74 | * git commit -a -m "bump version for vx.y.z release"
75 | * write CHANGELOG
76 | * bump docs/conf.py version
77 | * git commit
78 | * rm -rf dist/*
79 | * python setup.py sdist bdist_wheel
80 | * twine upload dist/*
81 | * git tag -a vx.y.z -m "brief summary"
82 | * bump glom/_version.py onto n+1 dev
83 | * git commit
84 | * git push
85 |
86 | NB: if dropping support for a python version, bump the pyupgrade argument in tox and run syntax-upgrade tox env.
87 |
88 | """
89 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py37,py38,py39,py310,py311,py312,pypy3,coverage-report,packaging
3 |
4 | [testenv]
5 | changedir = .tox
6 | deps = -rrequirements.txt
7 | commands = coverage run --parallel --rcfile {toxinidir}/.tox-coveragerc -m pytest -vv --doctest-modules {envsitepackagesdir}/glom {posargs}
8 |
9 | [testenv:coverage-report]
10 | changedir = .tox
11 | deps = coverage
12 | commands = coverage combine --rcfile {toxinidir}/.tox-coveragerc
13 | coverage xml --rcfile {toxinidir}/.tox-coveragerc
14 | coverage report --rcfile {toxinidir}/.tox-coveragerc
15 | coverage html --rcfile {toxinidir}/.tox-coveragerc -d {toxinidir}/htmlcov
16 |
17 |
18 | [testenv:packaging]
19 | changedir = {toxinidir}
20 | deps =
21 | check-manifest==0.50
22 | commands =
23 | check-manifest
24 |
25 | [testenv:syntax-upgrade]
26 | changedir = {toxinidir}
27 | deps =
28 | flynt
29 | pyupgrade
30 | commands =
31 | flynt ./glom
32 | python -c "import glob; import subprocess; [subprocess.run(['pyupgrade', '--py37-plus', f]) for f in glob.glob('./glom/**/*.py', recursive=True)]"
--------------------------------------------------------------------------------