"]
53 |
54 | ds = dt.ds
55 |
56 | sections = [
57 | children_section(dt.children),
58 | dim_section(ds),
59 | coord_section(ds.coords),
60 | datavar_section(ds.data_vars),
61 | attr_section(ds.attrs),
62 | ]
63 |
64 | return _obj_repr(ds, header_components, sections)
65 |
66 |
67 | def _wrap_repr(r: str, end: bool = False) -> str:
68 | """
69 | Wrap HTML representation with a tee to the left of it.
70 |
71 | Enclosing HTML tag is a
with :code:`display: inline-grid` style.
72 |
73 | Turns:
74 | [ title ]
75 | | details |
76 | |_____________|
77 |
78 | into (A):
79 | |─ [ title ]
80 | | | details |
81 | | |_____________|
82 |
83 | or (B):
84 | └─ [ title ]
85 | | details |
86 | |_____________|
87 |
88 | Parameters
89 | ----------
90 | r: str
91 | HTML representation to wrap.
92 | end: bool
93 | Specify if the line on the left should continue or end.
94 |
95 | Default is True.
96 |
97 | Returns
98 | -------
99 | str
100 | Wrapped HTML representation.
101 |
102 | Tee color is set to the variable :code:`--xr-border-color`.
103 | """
104 | # height of line
105 | end = bool(end)
106 | height = "100%" if end is False else "1.2em"
107 | return "".join(
108 | [
109 | "
",
110 | "
",
117 | "
",
118 | "
",
126 | "
",
127 | "
",
130 | "
",
131 | r,
132 | "
" "
",
133 | "
",
134 | ]
135 | )
136 |
137 |
138 | def datatree_repr(dt: Any) -> str:
139 | obj_type = f"datatree.{type(dt).__name__}"
140 | return node_repr(obj_type, dt)
141 |
--------------------------------------------------------------------------------
/datatree/iterators.py:
--------------------------------------------------------------------------------
1 | from abc import abstractmethod
2 | from collections import abc
3 | from typing import Callable, Iterator, List, Optional
4 |
5 | from .treenode import Tree
6 |
7 | """These iterators are copied from anytree.iterators, with minor modifications."""
8 |
9 |
10 | class AbstractIter(abc.Iterator):
11 | def __init__(
12 | self,
13 | node: Tree,
14 | filter_: Optional[Callable] = None,
15 | stop: Optional[Callable] = None,
16 | maxlevel: Optional[int] = None,
17 | ):
18 | """
19 | Iterate over tree starting at `node`.
20 | Base class for all iterators.
21 | Keyword Args:
22 | filter_: function called with every `node` as argument, `node` is returned if `True`.
23 | stop: stop iteration at `node` if `stop` function returns `True` for `node`.
24 | maxlevel (int): maximum descending in the node hierarchy.
25 | """
26 | self.node = node
27 | self.filter_ = filter_
28 | self.stop = stop
29 | self.maxlevel = maxlevel
30 | self.__iter = None
31 |
32 | def __init(self):
33 | node = self.node
34 | maxlevel = self.maxlevel
35 | filter_ = self.filter_ or AbstractIter.__default_filter
36 | stop = self.stop or AbstractIter.__default_stop
37 | children = (
38 | []
39 | if AbstractIter._abort_at_level(1, maxlevel)
40 | else AbstractIter._get_children([node], stop)
41 | )
42 | return self._iter(children, filter_, stop, maxlevel)
43 |
44 | @staticmethod
45 | def __default_filter(node):
46 | return True
47 |
48 | @staticmethod
49 | def __default_stop(node):
50 | return False
51 |
52 | def __iter__(self) -> Iterator[Tree]:
53 | return self
54 |
55 | def __next__(self) -> Iterator[Tree]:
56 | if self.__iter is None:
57 | self.__iter = self.__init()
58 | item = next(self.__iter) # type: ignore[call-overload]
59 | return item
60 |
61 | @staticmethod
62 | @abstractmethod
63 | def _iter(children: List[Tree], filter_, stop, maxlevel) -> Iterator[Tree]: ...
64 |
65 | @staticmethod
66 | def _abort_at_level(level, maxlevel):
67 | return maxlevel is not None and level > maxlevel
68 |
69 | @staticmethod
70 | def _get_children(children: List[Tree], stop) -> List[Tree]:
71 | return [child for child in children if not stop(child)]
72 |
73 |
74 | class PreOrderIter(AbstractIter):
75 | """
76 | Iterate over tree applying pre-order strategy starting at `node`.
77 | Start at root and go-down until reaching a leaf node.
78 | Step upwards then, and search for the next leafs.
79 | """
80 |
81 | @staticmethod
82 | def _iter(children, filter_, stop, maxlevel):
83 | for child_ in children:
84 | if stop(child_):
85 | continue
86 | if filter_(child_):
87 | yield child_
88 | if not AbstractIter._abort_at_level(2, maxlevel):
89 | descendantmaxlevel = maxlevel - 1 if maxlevel else None
90 | for descendant_ in PreOrderIter._iter(
91 | list(child_.children.values()), filter_, stop, descendantmaxlevel
92 | ):
93 | yield descendant_
94 |
95 |
96 | class LevelOrderIter(AbstractIter):
97 | """
98 | Iterate over tree applying level-order strategy starting at `node`.
99 | """
100 |
101 | @staticmethod
102 | def _iter(children, filter_, stop, maxlevel):
103 | level = 1
104 | while children:
105 | next_children = []
106 | for child in children:
107 | if filter_(child):
108 | yield child
109 | next_children += AbstractIter._get_children(
110 | list(child.children.values()), stop
111 | )
112 | children = next_children
113 | level += 1
114 | if AbstractIter._abort_at_level(level, maxlevel):
115 | break
116 |
--------------------------------------------------------------------------------
/datatree/testing.py:
--------------------------------------------------------------------------------
1 | from xarray.testing.assertions import ensure_warnings
2 |
3 | from .datatree import DataTree
4 | from .formatting import diff_tree_repr
5 |
6 |
7 | @ensure_warnings
8 | def assert_isomorphic(a: DataTree, b: DataTree, from_root: bool = False):
9 | """
10 | Two DataTrees are considered isomorphic if every node has the same number of children.
11 |
12 | Nothing about the data in each node is checked.
13 |
14 | Isomorphism is a necessary condition for two trees to be used in a nodewise binary operation,
15 | such as tree1 + tree2.
16 |
17 | By default this function does not check any part of the tree above the given node.
18 | Therefore this function can be used as default to check that two subtrees are isomorphic.
19 |
20 | Parameters
21 | ----------
22 | a : DataTree
23 | The first object to compare.
24 | b : DataTree
25 | The second object to compare.
26 | from_root : bool, optional, default is False
27 | Whether or not to first traverse to the root of the trees before checking for isomorphism.
28 | If a & b have no parents then this has no effect.
29 |
30 | See Also
31 | --------
32 | DataTree.isomorphic
33 | assert_equals
34 | assert_identical
35 | """
36 | __tracebackhide__ = True
37 | assert isinstance(a, type(b))
38 |
39 | if isinstance(a, DataTree):
40 | if from_root:
41 | a = a.root
42 | b = b.root
43 |
44 | assert a.isomorphic(b, from_root=from_root), diff_tree_repr(a, b, "isomorphic")
45 | else:
46 | raise TypeError(f"{type(a)} not of type DataTree")
47 |
48 |
49 | @ensure_warnings
50 | def assert_equal(a: DataTree, b: DataTree, from_root: bool = True):
51 | """
52 | Two DataTrees are equal if they have isomorphic node structures, with matching node names,
53 | and if they have matching variables and coordinates, all of which are equal.
54 |
55 | By default this method will check the whole tree above the given node.
56 |
57 | Parameters
58 | ----------
59 | a : DataTree
60 | The first object to compare.
61 | b : DataTree
62 | The second object to compare.
63 | from_root : bool, optional, default is True
64 | Whether or not to first traverse to the root of the trees before checking for isomorphism.
65 | If a & b have no parents then this has no effect.
66 |
67 | See Also
68 | --------
69 | DataTree.equals
70 | assert_isomorphic
71 | assert_identical
72 | """
73 | __tracebackhide__ = True
74 | assert isinstance(a, type(b))
75 |
76 | if isinstance(a, DataTree):
77 | if from_root:
78 | a = a.root
79 | b = b.root
80 |
81 | assert a.equals(b, from_root=from_root), diff_tree_repr(a, b, "equals")
82 | else:
83 | raise TypeError(f"{type(a)} not of type DataTree")
84 |
85 |
86 | @ensure_warnings
87 | def assert_identical(a: DataTree, b: DataTree, from_root: bool = True):
88 | """
89 | Like assert_equals, but will also check all dataset attributes and the attributes on
90 | all variables and coordinates.
91 |
92 | By default this method will check the whole tree above the given node.
93 |
94 | Parameters
95 | ----------
96 | a : xarray.DataTree
97 | The first object to compare.
98 | b : xarray.DataTree
99 | The second object to compare.
100 | from_root : bool, optional, default is True
101 | Whether or not to first traverse to the root of the trees before checking for isomorphism.
102 | If a & b have no parents then this has no effect.
103 |
104 | See Also
105 | --------
106 | DataTree.identical
107 | assert_isomorphic
108 | assert_equal
109 | """
110 |
111 | __tracebackhide__ = True
112 | assert isinstance(a, type(b))
113 | if isinstance(a, DataTree):
114 | if from_root:
115 | a = a.root
116 | b = b.root
117 |
118 | assert a.identical(b, from_root=from_root), diff_tree_repr(a, b, "identical")
119 | else:
120 | raise TypeError(f"{type(a)} not of type DataTree")
121 |
--------------------------------------------------------------------------------
/datatree/tests/test_formatting.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 | from xarray import Dataset
4 |
5 | from datatree import DataTree
6 | from datatree.formatting import diff_tree_repr
7 |
8 |
9 | class TestRepr:
10 | def test_print_empty_node(self):
11 | dt = DataTree(name="root")
12 | printout = dt.__str__()
13 | assert printout == "DataTree('root', parent=None)"
14 |
15 | def test_print_empty_node_with_attrs(self):
16 | dat = Dataset(attrs={"note": "has attrs"})
17 | dt = DataTree(name="root", data=dat)
18 | printout = dt.__str__()
19 | assert printout == dedent(
20 | """\
21 | DataTree('root', parent=None)
22 | Dimensions: ()
23 | Data variables:
24 | *empty*
25 | Attributes:
26 | note: has attrs"""
27 | )
28 |
29 | def test_print_node_with_data(self):
30 | dat = Dataset({"a": [0, 2]})
31 | dt = DataTree(name="root", data=dat)
32 | printout = dt.__str__()
33 | expected = [
34 | "DataTree('root', parent=None)",
35 | "Dimensions",
36 | "Coordinates",
37 | "a",
38 | "Data variables",
39 | "*empty*",
40 | ]
41 | for expected_line, printed_line in zip(expected, printout.splitlines()):
42 | assert expected_line in printed_line
43 |
44 | def test_nested_node(self):
45 | dat = Dataset({"a": [0, 2]})
46 | root = DataTree(name="root")
47 | DataTree(name="results", data=dat, parent=root)
48 | printout = root.__str__()
49 | assert printout.splitlines()[2].startswith(" ")
50 |
51 | def test_print_datatree(self, simple_datatree):
52 | dt = simple_datatree
53 | print(dt)
54 |
55 | # TODO work out how to test something complex like this
56 |
57 | def test_repr_of_node_with_data(self):
58 | dat = Dataset({"a": [0, 2]})
59 | dt = DataTree(name="root", data=dat)
60 | assert "Coordinates" in repr(dt)
61 |
62 |
63 | class TestDiffFormatting:
64 | def test_diff_structure(self):
65 | dt_1 = DataTree.from_dict({"a": None, "a/b": None, "a/c": None})
66 | dt_2 = DataTree.from_dict({"d": None, "d/e": None})
67 |
68 | expected = dedent(
69 | """\
70 | Left and right DataTree objects are not isomorphic
71 |
72 | Number of children on node '/a' of the left object: 2
73 | Number of children on node '/d' of the right object: 1"""
74 | )
75 | actual = diff_tree_repr(dt_1, dt_2, "isomorphic")
76 | assert actual == expected
77 |
78 | def test_diff_node_names(self):
79 | dt_1 = DataTree.from_dict({"a": None})
80 | dt_2 = DataTree.from_dict({"b": None})
81 |
82 | expected = dedent(
83 | """\
84 | Left and right DataTree objects are not identical
85 |
86 | Node '/a' in the left object has name 'a'
87 | Node '/b' in the right object has name 'b'"""
88 | )
89 | actual = diff_tree_repr(dt_1, dt_2, "identical")
90 | assert actual == expected
91 |
92 | def test_diff_node_data(self):
93 | import numpy as np
94 |
95 | # casting to int64 explicitly ensures that int64s are created on all architectures
96 | ds1 = Dataset({"u": np.int64(0), "v": np.int64(1)})
97 | ds3 = Dataset({"w": np.int64(5)})
98 | dt_1 = DataTree.from_dict({"a": ds1, "a/b": ds3})
99 | ds2 = Dataset({"u": np.int64(0)})
100 | ds4 = Dataset({"w": np.int64(6)})
101 | dt_2 = DataTree.from_dict({"a": ds2, "a/b": ds4})
102 |
103 | expected = dedent(
104 | """\
105 | Left and right DataTree objects are not equal
106 |
107 |
108 | Data in nodes at position '/a' do not match:
109 |
110 | Data variables only on the left object:
111 | v int64 8B 1
112 |
113 | Data in nodes at position '/a/b' do not match:
114 |
115 | Differing data variables:
116 | L w int64 8B 5
117 | R w int64 8B 6"""
118 | )
119 | actual = diff_tree_repr(dt_1, dt_2, "equals")
120 | assert actual == expected
121 |
--------------------------------------------------------------------------------
/datatree/common.py:
--------------------------------------------------------------------------------
1 | """
2 | This file and class only exists because it was easier to copy the code for AttrAccessMixin from xarray.core.common
3 | with some slight modifications than it was to change the behaviour of an inherited xarray internal here.
4 |
5 | The modifications are marked with # TODO comments.
6 | """
7 |
8 | import warnings
9 | from contextlib import suppress
10 | from typing import Any, Hashable, Iterable, List, Mapping
11 |
12 |
13 | class TreeAttrAccessMixin:
14 | """Mixin class that allows getting keys with attribute access"""
15 |
16 | __slots__ = ()
17 |
18 | def __init_subclass__(cls, **kwargs):
19 | """Verify that all subclasses explicitly define ``__slots__``. If they don't,
20 | raise error in the core xarray module and a FutureWarning in third-party
21 | extensions.
22 | """
23 | if not hasattr(object.__new__(cls), "__dict__"):
24 | pass
25 | # TODO reinstate this once integrated upstream
26 | # elif cls.__module__.startswith("datatree."):
27 | # raise AttributeError(f"{cls.__name__} must explicitly define __slots__")
28 | # else:
29 | # cls.__setattr__ = cls._setattr_dict
30 | # warnings.warn(
31 | # f"xarray subclass {cls.__name__} should explicitly define __slots__",
32 | # FutureWarning,
33 | # stacklevel=2,
34 | # )
35 | super().__init_subclass__(**kwargs)
36 |
37 | @property
38 | def _attr_sources(self) -> Iterable[Mapping[Hashable, Any]]:
39 | """Places to look-up items for attribute-style access"""
40 | yield from ()
41 |
42 | @property
43 | def _item_sources(self) -> Iterable[Mapping[Hashable, Any]]:
44 | """Places to look-up items for key-autocompletion"""
45 | yield from ()
46 |
47 | def __getattr__(self, name: str) -> Any:
48 | if name not in {"__dict__", "__setstate__"}:
49 | # this avoids an infinite loop when pickle looks for the
50 | # __setstate__ attribute before the xarray object is initialized
51 | for source in self._attr_sources:
52 | with suppress(KeyError):
53 | return source[name]
54 | raise AttributeError(
55 | f"{type(self).__name__!r} object has no attribute {name!r}"
56 | )
57 |
58 | # This complicated two-method design boosts overall performance of simple operations
59 | # - particularly DataArray methods that perform a _to_temp_dataset() round-trip - by
60 | # a whopping 8% compared to a single method that checks hasattr(self, "__dict__") at
61 | # runtime before every single assignment. All of this is just temporary until the
62 | # FutureWarning can be changed into a hard crash.
63 | def _setattr_dict(self, name: str, value: Any) -> None:
64 | """Deprecated third party subclass (see ``__init_subclass__`` above)"""
65 | object.__setattr__(self, name, value)
66 | if name in self.__dict__:
67 | # Custom, non-slotted attr, or improperly assigned variable?
68 | warnings.warn(
69 | f"Setting attribute {name!r} on a {type(self).__name__!r} object. Explicitly define __slots__ "
70 | "to suppress this warning for legitimate custom attributes and "
71 | "raise an error when attempting variables assignments.",
72 | FutureWarning,
73 | stacklevel=2,
74 | )
75 |
76 | def __setattr__(self, name: str, value: Any) -> None:
77 | """Objects with ``__slots__`` raise AttributeError if you try setting an
78 | undeclared attribute. This is desirable, but the error message could use some
79 | improvement.
80 | """
81 | try:
82 | object.__setattr__(self, name, value)
83 | except AttributeError as e:
84 | # Don't accidentally shadow custom AttributeErrors, e.g.
85 | # DataArray.dims.setter
86 | if str(e) != "{!r} object has no attribute {!r}".format(
87 | type(self).__name__, name
88 | ):
89 | raise
90 | raise AttributeError(
91 | f"cannot set attribute {name!r} on a {type(self).__name__!r} object. Use __setitem__ style"
92 | "assignment (e.g., `ds['name'] = ...`) instead of assigning variables."
93 | ) from e
94 |
95 | def __dir__(self) -> List[str]:
96 | """Provide method name lookup and completion. Only provide 'public'
97 | methods.
98 | """
99 | extra_attrs = {
100 | item
101 | for source in self._attr_sources
102 | for item in source
103 | if isinstance(item, str)
104 | }
105 | return sorted(set(dir(type(self))) | extra_attrs)
106 |
--------------------------------------------------------------------------------
/docs/source/contributing.rst:
--------------------------------------------------------------------------------
1 | ========================
2 | Contributing to Datatree
3 | ========================
4 |
5 | Contributions are highly welcomed and appreciated. Every little help counts,
6 | so do not hesitate!
7 |
8 | .. contents:: Contribution links
9 | :depth: 2
10 |
11 | .. _submitfeedback:
12 |
13 | Feature requests and feedback
14 | -----------------------------
15 |
16 | Do you like Datatree? Share some love on Twitter or in your blog posts!
17 |
18 | We'd also like to hear about your propositions and suggestions. Feel free to
19 | `submit them as issues `_ and:
20 |
21 | * Explain in detail how they should work.
22 | * Keep the scope as narrow as possible. This will make it easier to implement.
23 |
24 | .. _reportbugs:
25 |
26 | Report bugs
27 | -----------
28 |
29 | Report bugs for Datatree in the `issue tracker `_.
30 |
31 | If you are reporting a bug, please include:
32 |
33 | * Your operating system name and version.
34 | * Any details about your local setup that might be helpful in troubleshooting,
35 | specifically the Python interpreter version, installed libraries, and Datatree
36 | version.
37 | * Detailed steps to reproduce the bug.
38 |
39 | If you can write a demonstration test that currently fails but should pass
40 | (xfail), that is a very useful commit to make as well, even if you cannot
41 | fix the bug itself.
42 |
43 | .. _fixbugs:
44 |
45 | Fix bugs
46 | --------
47 |
48 | Look through the `GitHub issues for bugs `_.
49 |
50 | Talk to developers to find out how you can fix specific bugs.
51 |
52 | Write documentation
53 | -------------------
54 |
55 | Datatree could always use more documentation. What exactly is needed?
56 |
57 | * More complementary documentation. Have you perhaps found something unclear?
58 | * Docstrings. There can never be too many of them.
59 | * Blog posts, articles and such -- they're all very appreciated.
60 |
61 | You can also edit documentation files directly in the GitHub web interface,
62 | without using a local copy. This can be convenient for small fixes.
63 |
64 | To build the documentation locally, you first need to install the following
65 | tools:
66 |
67 | - `Sphinx `__
68 | - `sphinx_rtd_theme `__
69 | - `sphinx-autosummary-accessors `__
70 |
71 | You can then build the documentation with the following commands::
72 |
73 | $ cd docs
74 | $ make html
75 |
76 | The built documentation should be available in the ``docs/_build/`` folder.
77 |
78 | .. _`pull requests`:
79 | .. _pull-requests:
80 |
81 | Preparing Pull Requests
82 | -----------------------
83 |
84 | #. Fork the
85 | `Datatree GitHub repository `__. It's
86 | fine to use ``Datatree`` as your fork repository name because it will live
87 | under your user.
88 |
89 | #. Clone your fork locally using `git `_ and create a branch::
90 |
91 | $ git clone git@github.com:{YOUR_GITHUB_USERNAME}/Datatree.git
92 | $ cd Datatree
93 |
94 | # now, to fix a bug or add feature create your own branch off "master":
95 |
96 | $ git checkout -b your-bugfix-feature-branch-name master
97 |
98 | #. Install `pre-commit `_ and its hook on the Datatree repo::
99 |
100 | $ pip install --user pre-commit
101 | $ pre-commit install
102 |
103 | Afterwards ``pre-commit`` will run whenever you commit.
104 |
105 | https://pre-commit.com/ is a framework for managing and maintaining multi-language pre-commit hooks
106 | to ensure code-style and code formatting is consistent.
107 |
108 | #. Install dependencies into a new conda environment::
109 |
110 | $ conda env update -f ci/environment.yml
111 |
112 | #. Run all the tests
113 |
114 | Now running tests is as simple as issuing this command::
115 |
116 | $ conda activate datatree-dev
117 | $ pytest --junitxml=test-reports/junit.xml --cov=./ --verbose
118 |
119 | This command will run tests via the "pytest" tool.
120 |
121 | #. You can now edit your local working copy and run the tests again as necessary. Please follow PEP-8 for naming.
122 |
123 | When committing, ``pre-commit`` will re-format the files if necessary.
124 |
125 | #. Commit and push once your tests pass and you are happy with your change(s)::
126 |
127 | $ git commit -a -m ""
128 | $ git push -u
129 |
130 | #. Finally, submit a pull request through the GitHub website using this data::
131 |
132 | head-fork: YOUR_GITHUB_USERNAME/Datatree
133 | compare: your-branch-name
134 |
135 | base-fork: TomNicholas/datatree
136 | base: master
137 |
--------------------------------------------------------------------------------
/datatree/tests/test_io.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import zarr.errors
3 |
4 | from datatree.io import open_datatree
5 | from datatree.testing import assert_equal
6 | from datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr
7 |
8 |
9 | class TestIO:
10 | @requires_netCDF4
11 | def test_to_netcdf(self, tmpdir, simple_datatree):
12 | filepath = str(
13 | tmpdir / "test.nc"
14 | ) # casting to str avoids a pathlib bug in xarray
15 | original_dt = simple_datatree
16 | original_dt.to_netcdf(filepath, engine="netcdf4")
17 |
18 | roundtrip_dt = open_datatree(filepath)
19 | assert_equal(original_dt, roundtrip_dt)
20 |
21 | @requires_netCDF4
22 | def test_netcdf_encoding(self, tmpdir, simple_datatree):
23 | filepath = str(
24 | tmpdir / "test.nc"
25 | ) # casting to str avoids a pathlib bug in xarray
26 | original_dt = simple_datatree
27 |
28 | # add compression
29 | comp = dict(zlib=True, complevel=9)
30 | enc = {"/set2": {var: comp for var in original_dt["/set2"].ds.data_vars}}
31 |
32 | original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4")
33 | roundtrip_dt = open_datatree(filepath)
34 |
35 | assert roundtrip_dt["/set2/a"].encoding["zlib"] == comp["zlib"]
36 | assert roundtrip_dt["/set2/a"].encoding["complevel"] == comp["complevel"]
37 |
38 | enc["/not/a/group"] = {"foo": "bar"}
39 | with pytest.raises(ValueError, match="unexpected encoding group.*"):
40 | original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4")
41 |
42 | @requires_h5netcdf
43 | def test_to_h5netcdf(self, tmpdir, simple_datatree):
44 | filepath = str(
45 | tmpdir / "test.nc"
46 | ) # casting to str avoids a pathlib bug in xarray
47 | original_dt = simple_datatree
48 | original_dt.to_netcdf(filepath, engine="h5netcdf")
49 |
50 | roundtrip_dt = open_datatree(filepath)
51 | assert_equal(original_dt, roundtrip_dt)
52 |
53 | @requires_zarr
54 | def test_to_zarr(self, tmpdir, simple_datatree):
55 | filepath = str(
56 | tmpdir / "test.zarr"
57 | ) # casting to str avoids a pathlib bug in xarray
58 | original_dt = simple_datatree
59 | original_dt.to_zarr(filepath)
60 |
61 | roundtrip_dt = open_datatree(filepath, engine="zarr")
62 | assert_equal(original_dt, roundtrip_dt)
63 |
64 | @requires_zarr
65 | def test_zarr_encoding(self, tmpdir, simple_datatree):
66 | import zarr
67 |
68 | filepath = str(
69 | tmpdir / "test.zarr"
70 | ) # casting to str avoids a pathlib bug in xarray
71 | original_dt = simple_datatree
72 |
73 | comp = {"compressor": zarr.Blosc(cname="zstd", clevel=3, shuffle=2)}
74 | enc = {"/set2": {var: comp for var in original_dt["/set2"].ds.data_vars}}
75 | original_dt.to_zarr(filepath, encoding=enc)
76 | roundtrip_dt = open_datatree(filepath, engine="zarr")
77 |
78 | print(roundtrip_dt["/set2/a"].encoding)
79 | assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"]
80 |
81 | enc["/not/a/group"] = {"foo": "bar"}
82 | with pytest.raises(ValueError, match="unexpected encoding group.*"):
83 | original_dt.to_zarr(filepath, encoding=enc, engine="zarr")
84 |
85 | @requires_zarr
86 | def test_to_zarr_zip_store(self, tmpdir, simple_datatree):
87 | from zarr.storage import ZipStore
88 |
89 | filepath = str(
90 | tmpdir / "test.zarr.zip"
91 | ) # casting to str avoids a pathlib bug in xarray
92 | original_dt = simple_datatree
93 | store = ZipStore(filepath)
94 | original_dt.to_zarr(store)
95 |
96 | roundtrip_dt = open_datatree(store, engine="zarr")
97 | assert_equal(original_dt, roundtrip_dt)
98 |
99 | @requires_zarr
100 | def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree):
101 | filepath = tmpdir / "test.zarr"
102 | zmetadata = filepath / ".zmetadata"
103 | s1zmetadata = filepath / "set1" / ".zmetadata"
104 | filepath = str(filepath) # casting to str avoids a pathlib bug in xarray
105 | original_dt = simple_datatree
106 | original_dt.to_zarr(filepath, consolidated=False)
107 | assert not zmetadata.exists()
108 | assert not s1zmetadata.exists()
109 |
110 | with pytest.warns(RuntimeWarning, match="consolidated"):
111 | roundtrip_dt = open_datatree(filepath, engine="zarr")
112 | assert_equal(original_dt, roundtrip_dt)
113 |
114 | @requires_zarr
115 | def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree):
116 | simple_datatree.to_zarr(tmpdir)
117 |
118 | # with default settings, to_zarr should not overwrite an existing dir
119 | with pytest.raises(zarr.errors.ContainsGroupError):
120 | simple_datatree.to_zarr(tmpdir)
121 |
--------------------------------------------------------------------------------
/datatree/tests/test_formatting_html.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import xarray as xr
3 |
4 | from datatree import DataTree, formatting_html
5 |
6 |
7 | @pytest.fixture(scope="module", params=["some html", "some other html"])
8 | def repr(request):
9 | return request.param
10 |
11 |
12 | class Test_summarize_children:
13 | """
14 | Unit tests for summarize_children.
15 | """
16 |
17 | func = staticmethod(formatting_html.summarize_children)
18 |
19 | @pytest.fixture(scope="class")
20 | def childfree_tree_factory(self):
21 | """
22 | Fixture for a child-free DataTree factory.
23 | """
24 | from random import randint
25 |
26 | def _childfree_tree_factory():
27 | return DataTree(
28 | data=xr.Dataset({"z": ("y", [randint(1, 100) for _ in range(3)])})
29 | )
30 |
31 | return _childfree_tree_factory
32 |
33 | @pytest.fixture(scope="class")
34 | def childfree_tree(self, childfree_tree_factory):
35 | """
36 | Fixture for a child-free DataTree.
37 | """
38 | return childfree_tree_factory()
39 |
40 | @pytest.fixture(scope="function")
41 | def mock_node_repr(self, monkeypatch):
42 | """
43 | Apply mocking for node_repr.
44 | """
45 |
46 | def mock(group_title, dt):
47 | """
48 | Mock with a simple result
49 | """
50 | return group_title + " " + str(id(dt))
51 |
52 | monkeypatch.setattr(formatting_html, "node_repr", mock)
53 |
54 | @pytest.fixture(scope="function")
55 | def mock_wrap_repr(self, monkeypatch):
56 | """
57 | Apply mocking for _wrap_repr.
58 | """
59 |
60 | def mock(r, *, end, **kwargs):
61 | """
62 | Mock by appending "end" or "not end".
63 | """
64 | return r + " " + ("end" if end else "not end") + "//"
65 |
66 | monkeypatch.setattr(formatting_html, "_wrap_repr", mock)
67 |
68 | def test_empty_mapping(self):
69 | """
70 | Test with an empty mapping of children.
71 | """
72 | children = {}
73 | assert self.func(children) == (
74 | "
" "
"
75 | )
76 |
77 | def test_one_child(self, childfree_tree, mock_wrap_repr, mock_node_repr):
78 | """
79 | Test with one child.
80 |
81 | Uses a mock of _wrap_repr and node_repr to essentially mock
82 | the inline lambda function "lines_callback".
83 | """
84 | # Create mapping of children
85 | children = {"a": childfree_tree}
86 |
87 | # Expect first line to be produced from the first child, and
88 | # wrapped as the last child
89 | first_line = f"a {id(children['a'])} end//"
90 |
91 | assert self.func(children) == (
92 | "
"
93 | f"{first_line}"
94 | "
"
95 | )
96 |
97 | def test_two_children(self, childfree_tree_factory, mock_wrap_repr, mock_node_repr):
98 | """
99 | Test with two level deep children.
100 |
101 | Uses a mock of _wrap_repr and node_repr to essentially mock
102 | the inline lambda function "lines_callback".
103 | """
104 |
105 | # Create mapping of children
106 | children = {"a": childfree_tree_factory(), "b": childfree_tree_factory()}
107 |
108 | # Expect first line to be produced from the first child, and
109 | # wrapped as _not_ the last child
110 | first_line = f"a {id(children['a'])} not end//"
111 |
112 | # Expect second line to be produced from the second child, and
113 | # wrapped as the last child
114 | second_line = f"b {id(children['b'])} end//"
115 |
116 | assert self.func(children) == (
117 | "
"
121 | )
122 |
123 |
124 | class Test__wrap_repr:
125 | """
126 | Unit tests for _wrap_repr.
127 | """
128 |
129 | func = staticmethod(formatting_html._wrap_repr)
130 |
131 | def test_end(self, repr):
132 | """
133 | Test with end=True.
134 | """
135 | r = self.func(repr, end=True)
136 | assert r == (
137 | "
"
138 | "
"
145 | "
"
146 | "
"
154 | "
"
155 | "
"
158 | "
"
159 | f"{repr}"
160 | "
"
161 | "
"
162 | "
"
163 | )
164 |
165 | def test_not_end(self, repr):
166 | """
167 | Test with end=False.
168 | """
169 | r = self.func(repr, end=False)
170 | assert r == (
171 | "
"
172 | "
"
179 | "
"
180 | "
"
188 | "
"
189 | "
"
192 | "
"
193 | f"{repr}"
194 | "
"
195 | "
"
196 | "
"
197 | )
198 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # datatree
2 |
3 | | CI | [![GitHub Workflow Status][github-ci-badge]][github-ci-link] [![Code Coverage Status][codecov-badge]][codecov-link] [![pre-commit.ci status][pre-commit.ci-badge]][pre-commit.ci-link] |
4 | | :---------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
5 | | **Docs** | [![Documentation Status][rtd-badge]][rtd-link] |
6 | | **Package** | [![Conda][conda-badge]][conda-link] [![PyPI][pypi-badge]][pypi-link] |
7 | | **License** | [![License][license-badge]][repo-link] |
8 |
9 |
10 | **Datatree is a prototype implementation of a tree-like hierarchical data structure for xarray.**
11 |
12 | Datatree was born after the xarray team recognised a [need for a new hierarchical data structure](https://github.com/pydata/xarray/issues/4118),
13 | that was more flexible than a single `xarray.Dataset` object.
14 | The initial motivation was to represent netCDF files / Zarr stores with multiple nested groups in a single in-memory object,
15 | but `datatree.DataTree` objects have many other uses.
16 |
17 | # :rotating_light: :bell: :warning: NO LONGER MAINTAINED :warning: :bell: :rotating_light:
18 |
19 | **This repository has been archived and the code is no longer maintained!**
20 |
21 | Datatree has been [merged upstream into `pydata/xarray`](https://github.com/pydata/xarray/issues/8572), and released as of xarray version `2024.10.0`.
22 |
23 | There will be no further bugfixes or feature additions to this respository.
24 |
25 | Users of this repository should migrate to using `xarray.DataTree` instead, following the [Migration Guide](https://github.com/pydata/xarray/discussions/9680).
26 |
27 | The information below is all outdated, and is left only for historical interest.
28 |
29 | ### Installation
30 | You can install datatree via pip:
31 | ```shell
32 | pip install xarray-datatree
33 | ```
34 |
35 | or via conda-forge
36 | ```shell
37 | conda install -c conda-forge xarray-datatree
38 | ```
39 |
40 | ### Why Datatree?
41 |
42 | You might want to use datatree for:
43 |
44 | - Organising many related datasets, e.g. results of the same experiment with different parameters, or simulations of the same system using different models,
45 | - Analysing similar data at multiple resolutions simultaneously, such as when doing a convergence study,
46 | - Comparing heterogenous but related data, such as experimental and theoretical data,
47 | - I/O with nested data formats such as netCDF / Zarr groups.
48 |
49 | [**Talk slides on Datatree from AMS-python 2023**](https://speakerdeck.com/tomnicholas/xarray-datatree-hierarchical-data-structures-for-multi-model-science)
50 |
51 | ### Features
52 |
53 | The approach used here is based on benbovy's [`DatasetNode` example](https://gist.github.com/benbovy/92e7c76220af1aaa4b3a0b65374e233a) - the basic idea is that each tree node wraps a up to a single `xarray.Dataset`. The differences are that this effort:
54 | - Uses a node structure inspired by [anytree](https://github.com/xarray-contrib/datatree/issues/7) for the tree,
55 | - Implements path-like getting and setting,
56 | - Has functions for mapping user-supplied functions over every node in the tree,
57 | - Automatically dispatches *some* of `xarray.Dataset`'s API over every node in the tree (such as `.isel`),
58 | - Has a bunch of tests,
59 | - Has a printable representation that currently looks like this:
60 |
61 |
62 | ### Get Started
63 |
64 | You can create a `DataTree` object in 3 ways:
65 | 1) Load from a netCDF file (or Zarr store) that has groups via `open_datatree()`.
66 | 2) Using the init method of `DataTree`, which creates an individual node.
67 | You can then specify the nodes' relationships to one other, either by setting `.parent` and `.children` attributes,
68 | or through `__get/setitem__` access, e.g. `dt['path/to/node'] = DataTree()`.
69 | 3) Create a tree from a dictionary of paths to datasets using `DataTree.from_dict()`.
70 |
71 | ### Development Roadmap
72 |
73 | Datatree currently lives in a separate repository to the main xarray package.
74 | This allows the datatree developers to make changes to it, experiment, and improve it faster.
75 |
76 | Eventually we plan to fully integrate datatree upstream into xarray's main codebase, at which point the [github.com/xarray-contrib/datatree](https://github.com/xarray-contrib/datatree>) repository will be archived.
77 | This should not cause much disruption to code that depends on datatree - you will likely only have to change the import line (i.e. from ``from datatree import DataTree`` to ``from xarray import DataTree``).
78 |
79 | However, until this full integration occurs, datatree's API should not be considered to have the same [level of stability as xarray's](https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility).
80 |
81 | ### User Feedback
82 |
83 | We really really really want to hear your opinions on datatree!
84 | At this point in development, user feedback is critical to help us create something that will suit everyone's needs.
85 | Please raise any thoughts, issues, suggestions or bugs, no matter how small or large, on the [github issue tracker](https://github.com/xarray-contrib/datatree/issues).
86 |
87 |
88 | [github-ci-badge]: https://img.shields.io/github/actions/workflow/status/xarray-contrib/datatree/main.yaml?branch=main&label=CI&logo=github
89 | [github-ci-link]: https://github.com/xarray-contrib/datatree/actions?query=workflow%3ACI
90 | [codecov-badge]: https://img.shields.io/codecov/c/github/xarray-contrib/datatree.svg?logo=codecov
91 | [codecov-link]: https://codecov.io/gh/xarray-contrib/datatree
92 | [rtd-badge]: https://img.shields.io/readthedocs/xarray-datatree/latest.svg
93 | [rtd-link]: https://xarray-datatree.readthedocs.io/en/latest/?badge=latest
94 | [pypi-badge]: https://img.shields.io/pypi/v/xarray-datatree?logo=pypi
95 | [pypi-link]: https://pypi.org/project/xarray-datatree
96 | [conda-badge]: https://img.shields.io/conda/vn/conda-forge/xarray-datatree?logo=anaconda
97 | [conda-link]: https://anaconda.org/conda-forge/xarray-datatree
98 | [license-badge]: https://img.shields.io/github/license/xarray-contrib/datatree
99 | [repo-link]: https://github.com/xarray-contrib/datatree
100 | [pre-commit.ci-badge]: https://results.pre-commit.ci/badge/github/xarray-contrib/datatree/main.svg
101 | [pre-commit.ci-link]: https://results.pre-commit.ci/latest/github/xarray-contrib/datatree/main
102 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=_build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | goto end
41 | )
42 |
43 | if "%1" == "clean" (
44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
45 | del /q /s %BUILDDIR%\*
46 | goto end
47 | )
48 |
49 |
50 | %SPHINXBUILD% 2> nul
51 | if errorlevel 9009 (
52 | echo.
53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
54 | echo.installed, then set the SPHINXBUILD environment variable to point
55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
56 | echo.may add the Sphinx directory to PATH.
57 | echo.
58 | echo.If you don't have Sphinx installed, grab it from
59 | echo.http://sphinx-doc.org/
60 | exit /b 1
61 | )
62 |
63 | if "%1" == "html" (
64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
65 | if errorlevel 1 exit /b 1
66 | echo.
67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
68 | goto end
69 | )
70 |
71 | if "%1" == "dirhtml" (
72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
73 | if errorlevel 1 exit /b 1
74 | echo.
75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
76 | goto end
77 | )
78 |
79 | if "%1" == "singlehtml" (
80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
81 | if errorlevel 1 exit /b 1
82 | echo.
83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
84 | goto end
85 | )
86 |
87 | if "%1" == "pickle" (
88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
89 | if errorlevel 1 exit /b 1
90 | echo.
91 | echo.Build finished; now you can process the pickle files.
92 | goto end
93 | )
94 |
95 | if "%1" == "json" (
96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
97 | if errorlevel 1 exit /b 1
98 | echo.
99 | echo.Build finished; now you can process the JSON files.
100 | goto end
101 | )
102 |
103 | if "%1" == "htmlhelp" (
104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | if errorlevel 1 exit /b 1
106 | echo.
107 | echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | goto end
110 | )
111 |
112 | if "%1" == "qthelp" (
113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | if errorlevel 1 exit /b 1
115 | echo.
116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\complexity.qhcp
119 | echo.To view the help file:
120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\complexity.ghc
121 | goto end
122 | )
123 |
124 | if "%1" == "devhelp" (
125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished.
129 | goto end
130 | )
131 |
132 | if "%1" == "epub" (
133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | if errorlevel 1 exit /b 1
135 | echo.
136 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | goto end
138 | )
139 |
140 | if "%1" == "latex" (
141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | if errorlevel 1 exit /b 1
143 | echo.
144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | goto end
146 | )
147 |
148 | if "%1" == "latexpdf" (
149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | cd %BUILDDIR%/latex
151 | make all-pdf
152 | cd %BUILDDIR%/..
153 | echo.
154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | goto end
156 | )
157 |
158 | if "%1" == "latexpdfja" (
159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | cd %BUILDDIR%/latex
161 | make all-pdf-ja
162 | cd %BUILDDIR%/..
163 | echo.
164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | goto end
166 | )
167 |
168 | if "%1" == "text" (
169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | if errorlevel 1 exit /b 1
171 | echo.
172 | echo.Build finished. The text files are in %BUILDDIR%/text.
173 | goto end
174 | )
175 |
176 | if "%1" == "man" (
177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | if errorlevel 1 exit /b 1
179 | echo.
180 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | goto end
182 | )
183 |
184 | if "%1" == "texinfo" (
185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | if errorlevel 1 exit /b 1
187 | echo.
188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | goto end
190 | )
191 |
192 | if "%1" == "gettext" (
193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | if errorlevel 1 exit /b 1
195 | echo.
196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | goto end
198 | )
199 |
200 | if "%1" == "changes" (
201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | if errorlevel 1 exit /b 1
203 | echo.
204 | echo.The overview file is in %BUILDDIR%/changes.
205 | goto end
206 | )
207 |
208 | if "%1" == "linkcheck" (
209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | if errorlevel 1 exit /b 1
211 | echo.
212 | echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | goto end
215 | )
216 |
217 | if "%1" == "doctest" (
218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | if errorlevel 1 exit /b 1
220 | echo.
221 | echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | goto end
224 | )
225 |
226 | if "%1" == "xml" (
227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | if errorlevel 1 exit /b 1
229 | echo.
230 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | goto end
232 | )
233 |
234 | if "%1" == "pseudoxml" (
235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | if errorlevel 1 exit /b 1
237 | echo.
238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | goto end
240 | )
241 |
242 | :end
243 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
21 |
22 | .PHONY: help clean html rtdhtml dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " rtdhtml Build html using same settings used on ReadtheDocs"
28 | @echo " dirhtml to make HTML files named index.html in directories"
29 | @echo " singlehtml to make a single large HTML file"
30 | @echo " pickle to make pickle files"
31 | @echo " json to make JSON files"
32 | @echo " htmlhelp to make HTML files and a HTML help project"
33 | @echo " qthelp to make HTML files and a qthelp project"
34 | @echo " devhelp to make HTML files and a Devhelp project"
35 | @echo " epub to make an epub"
36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
37 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
39 | @echo " text to make text files"
40 | @echo " man to make manual pages"
41 | @echo " texinfo to make Texinfo files"
42 | @echo " info to make Texinfo files and run them through makeinfo"
43 | @echo " gettext to make PO message catalogs"
44 | @echo " changes to make an overview of all changed/added/deprecated items"
45 | @echo " xml to make Docutils-native XML files"
46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
47 | @echo " linkcheck to check all external links for integrity"
48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
49 |
50 | clean:
51 | rm -rf $(BUILDDIR)/*
52 |
53 | html:
54 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
55 | @echo
56 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
57 |
58 | rtdhtml:
59 | $(SPHINXBUILD) -T -j auto -E -W --keep-going -b html -d $(BUILDDIR)/doctrees -D language=en . $(BUILDDIR)/html
60 | @echo
61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
62 |
63 | dirhtml:
64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
65 | @echo
66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
67 |
68 | singlehtml:
69 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
70 | @echo
71 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
72 |
73 | pickle:
74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
75 | @echo
76 | @echo "Build finished; now you can process the pickle files."
77 |
78 | json:
79 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
80 | @echo
81 | @echo "Build finished; now you can process the JSON files."
82 |
83 | htmlhelp:
84 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
85 | @echo
86 | @echo "Build finished; now you can run HTML Help Workshop with the" \
87 | ".hhp project file in $(BUILDDIR)/htmlhelp."
88 |
89 | qthelp:
90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
91 | @echo
92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/complexity.qhcp"
95 | @echo "To view the help file:"
96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/complexity.qhc"
97 |
98 | devhelp:
99 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
100 | @echo
101 | @echo "Build finished."
102 | @echo "To view the help file:"
103 | @echo "# mkdir -p $$HOME/.local/share/devhelp/complexity"
104 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/complexity"
105 | @echo "# devhelp"
106 |
107 | epub:
108 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
109 | @echo
110 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
111 |
112 | latex:
113 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
114 | @echo
115 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
116 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
117 | "(use \`make latexpdf' here to do that automatically)."
118 |
119 | latexpdf:
120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | @echo "Running LaTeX files through pdflatex..."
122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 |
125 | latexpdfja:
126 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
127 | @echo "Running LaTeX files through platex and dvipdfmx..."
128 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
129 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
130 |
131 | text:
132 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
133 | @echo
134 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
135 |
136 | man:
137 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
138 | @echo
139 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
140 |
141 | texinfo:
142 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
143 | @echo
144 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
145 | @echo "Run \`make' in that directory to run these through makeinfo" \
146 | "(use \`make info' here to do that automatically)."
147 |
148 | info:
149 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
150 | @echo "Running Texinfo files through makeinfo..."
151 | make -C $(BUILDDIR)/texinfo info
152 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
153 |
154 | gettext:
155 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
156 | @echo
157 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
158 |
159 | changes:
160 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
161 | @echo
162 | @echo "The overview file is in $(BUILDDIR)/changes."
163 |
164 | linkcheck:
165 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
166 | @echo
167 | @echo "Link check complete; look for any errors in the above output " \
168 | "or in $(BUILDDIR)/linkcheck/output.txt."
169 |
170 | doctest:
171 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
172 | @echo "Testing of doctests in the sources finished, look at the " \
173 | "results in $(BUILDDIR)/doctest/output.txt."
174 |
175 | xml:
176 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
177 | @echo
178 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
179 |
180 | pseudoxml:
181 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
182 | @echo
183 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
184 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: datatree
2 |
3 | #############
4 | API reference
5 | #############
6 |
7 | DataTree
8 | ========
9 |
10 | Creating a DataTree
11 | -------------------
12 |
13 | Methods of creating a datatree.
14 |
15 | .. autosummary::
16 | :toctree: generated/
17 |
18 | DataTree
19 | DataTree.from_dict
20 |
21 | Tree Attributes
22 | ---------------
23 |
24 | Attributes relating to the recursive tree-like structure of a ``DataTree``.
25 |
26 | .. autosummary::
27 | :toctree: generated/
28 |
29 | DataTree.parent
30 | DataTree.children
31 | DataTree.name
32 | DataTree.path
33 | DataTree.root
34 | DataTree.is_root
35 | DataTree.is_leaf
36 | DataTree.leaves
37 | DataTree.level
38 | DataTree.depth
39 | DataTree.width
40 | DataTree.subtree
41 | DataTree.descendants
42 | DataTree.siblings
43 | DataTree.lineage
44 | DataTree.parents
45 | DataTree.ancestors
46 | DataTree.groups
47 |
48 | Data Contents
49 | -------------
50 |
51 | Interface to the data objects (optionally) stored inside a single ``DataTree`` node.
52 | This interface echoes that of ``xarray.Dataset``.
53 |
54 | .. autosummary::
55 | :toctree: generated/
56 |
57 | DataTree.dims
58 | DataTree.sizes
59 | DataTree.data_vars
60 | DataTree.coords
61 | DataTree.attrs
62 | DataTree.encoding
63 | DataTree.indexes
64 | DataTree.nbytes
65 | DataTree.ds
66 | DataTree.to_dataset
67 | DataTree.has_data
68 | DataTree.has_attrs
69 | DataTree.is_empty
70 | DataTree.is_hollow
71 |
72 | Dictionary Interface
73 | --------------------
74 |
75 | ``DataTree`` objects also have a dict-like interface mapping keys to either ``xarray.DataArray``s or to child ``DataTree`` nodes.
76 |
77 | .. autosummary::
78 | :toctree: generated/
79 |
80 | DataTree.__getitem__
81 | DataTree.__setitem__
82 | DataTree.__delitem__
83 | DataTree.update
84 | DataTree.get
85 | DataTree.items
86 | DataTree.keys
87 | DataTree.values
88 |
89 | Tree Manipulation
90 | -----------------
91 |
92 | For manipulating, traversing, navigating, or mapping over the tree structure.
93 |
94 | .. autosummary::
95 | :toctree: generated/
96 |
97 | DataTree.orphan
98 | DataTree.same_tree
99 | DataTree.relative_to
100 | DataTree.iter_lineage
101 | DataTree.find_common_ancestor
102 | DataTree.map_over_subtree
103 | map_over_subtree
104 | DataTree.pipe
105 | DataTree.match
106 | DataTree.filter
107 |
108 | Pathlib-like Interface
109 | ----------------------
110 |
111 | ``DataTree`` objects deliberately echo some of the API of `pathlib.PurePath`.
112 |
113 | .. autosummary::
114 | :toctree: generated/
115 |
116 | DataTree.name
117 | DataTree.parent
118 | DataTree.parents
119 | DataTree.relative_to
120 |
121 | Missing:
122 |
123 | ..
124 |
125 | ``DataTree.glob``
126 | ``DataTree.joinpath``
127 | ``DataTree.with_name``
128 | ``DataTree.walk``
129 | ``DataTree.rename``
130 | ``DataTree.replace``
131 |
132 | DataTree Contents
133 | -----------------
134 |
135 | Manipulate the contents of all nodes in a tree simultaneously.
136 |
137 | .. autosummary::
138 | :toctree: generated/
139 |
140 | DataTree.copy
141 | DataTree.assign_coords
142 | DataTree.merge
143 | DataTree.rename
144 | DataTree.rename_vars
145 | DataTree.rename_dims
146 | DataTree.swap_dims
147 | DataTree.expand_dims
148 | DataTree.drop_vars
149 | DataTree.drop_dims
150 | DataTree.set_coords
151 | DataTree.reset_coords
152 |
153 | DataTree Node Contents
154 | ----------------------
155 |
156 | Manipulate the contents of a single DataTree node.
157 |
158 | .. autosummary::
159 | :toctree: generated/
160 |
161 | DataTree.assign
162 | DataTree.drop_nodes
163 |
164 | Comparisons
165 | ===========
166 |
167 | Compare one ``DataTree`` object to another.
168 |
169 | .. autosummary::
170 | :toctree: generated/
171 |
172 | DataTree.isomorphic
173 | DataTree.equals
174 | DataTree.identical
175 |
176 | Indexing
177 | ========
178 |
179 | Index into all nodes in the subtree simultaneously.
180 |
181 | .. autosummary::
182 | :toctree: generated/
183 |
184 | DataTree.isel
185 | DataTree.sel
186 | DataTree.drop_sel
187 | DataTree.drop_isel
188 | DataTree.head
189 | DataTree.tail
190 | DataTree.thin
191 | DataTree.squeeze
192 | DataTree.interp
193 | DataTree.interp_like
194 | DataTree.reindex
195 | DataTree.reindex_like
196 | DataTree.set_index
197 | DataTree.reset_index
198 | DataTree.reorder_levels
199 | DataTree.query
200 |
201 | ..
202 |
203 | Missing:
204 | ``DataTree.loc``
205 |
206 |
207 | Missing Value Handling
208 | ======================
209 |
210 | .. autosummary::
211 | :toctree: generated/
212 |
213 | DataTree.isnull
214 | DataTree.notnull
215 | DataTree.combine_first
216 | DataTree.dropna
217 | DataTree.fillna
218 | DataTree.ffill
219 | DataTree.bfill
220 | DataTree.interpolate_na
221 | DataTree.where
222 | DataTree.isin
223 |
224 | Computation
225 | ===========
226 |
227 | Apply a computation to the data in all nodes in the subtree simultaneously.
228 |
229 | .. autosummary::
230 | :toctree: generated/
231 |
232 | DataTree.map
233 | DataTree.reduce
234 | DataTree.diff
235 | DataTree.quantile
236 | DataTree.differentiate
237 | DataTree.integrate
238 | DataTree.map_blocks
239 | DataTree.polyfit
240 | DataTree.curvefit
241 |
242 | Aggregation
243 | ===========
244 |
245 | Aggregate data in all nodes in the subtree simultaneously.
246 |
247 | .. autosummary::
248 | :toctree: generated/
249 |
250 | DataTree.all
251 | DataTree.any
252 | DataTree.argmax
253 | DataTree.argmin
254 | DataTree.idxmax
255 | DataTree.idxmin
256 | DataTree.max
257 | DataTree.min
258 | DataTree.mean
259 | DataTree.median
260 | DataTree.prod
261 | DataTree.sum
262 | DataTree.std
263 | DataTree.var
264 | DataTree.cumsum
265 | DataTree.cumprod
266 |
267 | ndarray methods
268 | ===============
269 |
270 | Methods copied from :py:class:`numpy.ndarray` objects, here applying to the data in all nodes in the subtree.
271 |
272 | .. autosummary::
273 | :toctree: generated/
274 |
275 | DataTree.argsort
276 | DataTree.astype
277 | DataTree.clip
278 | DataTree.conj
279 | DataTree.conjugate
280 | DataTree.round
281 | DataTree.rank
282 |
283 | Reshaping and reorganising
284 | ==========================
285 |
286 | Reshape or reorganise the data in all nodes in the subtree.
287 |
288 | .. autosummary::
289 | :toctree: generated/
290 |
291 | DataTree.transpose
292 | DataTree.stack
293 | DataTree.unstack
294 | DataTree.shift
295 | DataTree.roll
296 | DataTree.pad
297 | DataTree.sortby
298 | DataTree.broadcast_like
299 |
300 | Plotting
301 | ========
302 |
303 | I/O
304 | ===
305 |
306 | Open a datatree from an on-disk store or serialize the tree.
307 |
308 | .. autosummary::
309 | :toctree: generated/
310 |
311 | open_datatree
312 | DataTree.to_dict
313 | DataTree.to_netcdf
314 | DataTree.to_zarr
315 |
316 | ..
317 |
318 | Missing:
319 | ``open_mfdatatree``
320 |
321 | Tutorial
322 | ========
323 |
324 | Testing
325 | =======
326 |
327 | Test that two DataTree objects are similar.
328 |
329 | .. autosummary::
330 | :toctree: generated/
331 |
332 | testing.assert_isomorphic
333 | testing.assert_equal
334 | testing.assert_identical
335 |
336 | Exceptions
337 | ==========
338 |
339 | Exceptions raised when manipulating trees.
340 |
341 | .. autosummary::
342 | :toctree: generated/
343 |
344 | TreeIsomorphismError
345 | InvalidTreeError
346 | NotFoundInTreeError
347 |
348 | Advanced API
349 | ============
350 |
351 | Relatively advanced API for users or developers looking to understand the internals, or extend functionality.
352 |
353 | .. autosummary::
354 | :toctree: generated/
355 |
356 | DataTree.variables
357 | register_datatree_accessor
358 |
359 | ..
360 |
361 | Missing:
362 | ``DataTree.set_close``
363 |
--------------------------------------------------------------------------------
/datatree/ops.py:
--------------------------------------------------------------------------------
1 | import textwrap
2 |
3 | from xarray import Dataset
4 |
5 | from .mapping import map_over_subtree
6 |
7 | """
8 | Module which specifies the subset of xarray.Dataset's API which we wish to copy onto DataTree.
9 |
10 | Structured to mirror the way xarray defines Dataset's various operations internally, but does not actually import from
11 | xarray's internals directly, only the public-facing xarray.Dataset class.
12 | """
13 |
14 |
15 | _MAPPED_DOCSTRING_ADDENDUM = textwrap.fill(
16 | "This method was copied from xarray.Dataset, but has been altered to "
17 | "call the method on the Datasets stored in every node of the subtree. "
18 | "See the `map_over_subtree` function for more details.",
19 | width=117,
20 | )
21 |
22 | # TODO equals, broadcast_equals etc.
23 | # TODO do dask-related private methods need to be exposed?
24 | _DATASET_DASK_METHODS_TO_MAP = [
25 | "load",
26 | "compute",
27 | "persist",
28 | "unify_chunks",
29 | "chunk",
30 | "map_blocks",
31 | ]
32 | _DATASET_METHODS_TO_MAP = [
33 | "as_numpy",
34 | "set_coords",
35 | "reset_coords",
36 | "info",
37 | "isel",
38 | "sel",
39 | "head",
40 | "tail",
41 | "thin",
42 | "broadcast_like",
43 | "reindex_like",
44 | "reindex",
45 | "interp",
46 | "interp_like",
47 | "rename",
48 | "rename_dims",
49 | "rename_vars",
50 | "swap_dims",
51 | "expand_dims",
52 | "set_index",
53 | "reset_index",
54 | "reorder_levels",
55 | "stack",
56 | "unstack",
57 | "merge",
58 | "drop_vars",
59 | "drop_sel",
60 | "drop_isel",
61 | "drop_dims",
62 | "transpose",
63 | "dropna",
64 | "fillna",
65 | "interpolate_na",
66 | "ffill",
67 | "bfill",
68 | "combine_first",
69 | "reduce",
70 | "map",
71 | "diff",
72 | "shift",
73 | "roll",
74 | "sortby",
75 | "quantile",
76 | "rank",
77 | "differentiate",
78 | "integrate",
79 | "cumulative_integrate",
80 | "filter_by_attrs",
81 | "polyfit",
82 | "pad",
83 | "idxmin",
84 | "idxmax",
85 | "argmin",
86 | "argmax",
87 | "query",
88 | "curvefit",
89 | ]
90 | _ALL_DATASET_METHODS_TO_MAP = _DATASET_DASK_METHODS_TO_MAP + _DATASET_METHODS_TO_MAP
91 |
92 | _DATA_WITH_COORDS_METHODS_TO_MAP = [
93 | "squeeze",
94 | "clip",
95 | "assign_coords",
96 | "where",
97 | "close",
98 | "isnull",
99 | "notnull",
100 | "isin",
101 | "astype",
102 | ]
103 |
104 | REDUCE_METHODS = ["all", "any"]
105 | NAN_REDUCE_METHODS = [
106 | "max",
107 | "min",
108 | "mean",
109 | "prod",
110 | "sum",
111 | "std",
112 | "var",
113 | "median",
114 | ]
115 | NAN_CUM_METHODS = ["cumsum", "cumprod"]
116 | _TYPED_DATASET_OPS_TO_MAP = [
117 | "__add__",
118 | "__sub__",
119 | "__mul__",
120 | "__pow__",
121 | "__truediv__",
122 | "__floordiv__",
123 | "__mod__",
124 | "__and__",
125 | "__xor__",
126 | "__or__",
127 | "__lt__",
128 | "__le__",
129 | "__gt__",
130 | "__ge__",
131 | "__eq__",
132 | "__ne__",
133 | "__radd__",
134 | "__rsub__",
135 | "__rmul__",
136 | "__rpow__",
137 | "__rtruediv__",
138 | "__rfloordiv__",
139 | "__rmod__",
140 | "__rand__",
141 | "__rxor__",
142 | "__ror__",
143 | "__iadd__",
144 | "__isub__",
145 | "__imul__",
146 | "__ipow__",
147 | "__itruediv__",
148 | "__ifloordiv__",
149 | "__imod__",
150 | "__iand__",
151 | "__ixor__",
152 | "__ior__",
153 | "__neg__",
154 | "__pos__",
155 | "__abs__",
156 | "__invert__",
157 | "round",
158 | "argsort",
159 | "conj",
160 | "conjugate",
161 | ]
162 | # TODO NUM_BINARY_OPS apparently aren't defined on DatasetArithmetic, and don't appear to be injected anywhere...
163 | _ARITHMETIC_METHODS_TO_MAP = (
164 | REDUCE_METHODS
165 | + NAN_REDUCE_METHODS
166 | + NAN_CUM_METHODS
167 | + _TYPED_DATASET_OPS_TO_MAP
168 | + ["__array_ufunc__"]
169 | )
170 |
171 |
172 | def _wrap_then_attach_to_cls(
173 | target_cls_dict, source_cls, methods_to_set, wrap_func=None
174 | ):
175 | """
176 | Attach given methods on a class, and optionally wrap each method first. (i.e. with map_over_subtree)
177 |
178 | Result is like having written this in the classes' definition:
179 | ```
180 | @wrap_func
181 | def method_name(self, *args, **kwargs):
182 | return self.method(*args, **kwargs)
183 | ```
184 |
185 | Every method attached here needs to have a return value of Dataset or DataArray in order to construct a new tree.
186 |
187 | Parameters
188 | ----------
189 | target_cls_dict : MappingProxy
190 | The __dict__ attribute of the class which we want the methods to be added to. (The __dict__ attribute can also
191 | be accessed by calling vars() from within that classes' definition.) This will be updated by this function.
192 | source_cls : class
193 | Class object from which we want to copy methods (and optionally wrap them). Should be the actual class object
194 | (or instance), not just the __dict__.
195 | methods_to_set : Iterable[Tuple[str, callable]]
196 | The method names and definitions supplied as a list of (method_name_string, method) pairs.
197 | This format matches the output of inspect.getmembers().
198 | wrap_func : callable, optional
199 | Function to decorate each method with. Must have the same return type as the method.
200 | """
201 | for method_name in methods_to_set:
202 | orig_method = getattr(source_cls, method_name)
203 | wrapped_method = (
204 | wrap_func(orig_method) if wrap_func is not None else orig_method
205 | )
206 | target_cls_dict[method_name] = wrapped_method
207 |
208 | if wrap_func is map_over_subtree:
209 | # Add a paragraph to the method's docstring explaining how it's been mapped
210 | orig_method_docstring = orig_method.__doc__
211 | # if orig_method_docstring is not None:
212 | # if "\n" in orig_method_docstring:
213 | # new_method_docstring = orig_method_docstring.replace(
214 | # "\n", _MAPPED_DOCSTRING_ADDENDUM, 1
215 | # )
216 | # else:
217 | # new_method_docstring = (
218 | # orig_method_docstring + f"\n\n{_MAPPED_DOCSTRING_ADDENDUM}"
219 | # )
220 | setattr(target_cls_dict[method_name], "__doc__", orig_method_docstring)
221 |
222 |
223 | class MappedDatasetMethodsMixin:
224 | """
225 | Mixin to add methods defined specifically on the Dataset class such as .query(), but wrapped to map over all nodes
226 | in the subtree.
227 | """
228 |
229 | _wrap_then_attach_to_cls(
230 | target_cls_dict=vars(),
231 | source_cls=Dataset,
232 | methods_to_set=_ALL_DATASET_METHODS_TO_MAP,
233 | wrap_func=map_over_subtree,
234 | )
235 |
236 |
237 | class MappedDataWithCoords:
238 | """
239 | Mixin to add coordinate-aware Dataset methods such as .where(), but wrapped to map over all nodes in the subtree.
240 | """
241 |
242 | # TODO add mapped versions of groupby, weighted, rolling, rolling_exp, coarsen, resample
243 | _wrap_then_attach_to_cls(
244 | target_cls_dict=vars(),
245 | source_cls=Dataset,
246 | methods_to_set=_DATA_WITH_COORDS_METHODS_TO_MAP,
247 | wrap_func=map_over_subtree,
248 | )
249 |
250 |
251 | class DataTreeArithmeticMixin:
252 | """
253 | Mixin to add Dataset arithmetic operations such as __add__, reduction methods such as .mean(), and enable numpy
254 | ufuncs such as np.sin(), but wrapped to map over all nodes in the subtree.
255 | """
256 |
257 | _wrap_then_attach_to_cls(
258 | target_cls_dict=vars(),
259 | source_cls=Dataset,
260 | methods_to_set=_ARITHMETIC_METHODS_TO_MAP,
261 | wrap_func=map_over_subtree,
262 | )
263 |
--------------------------------------------------------------------------------
/datatree/io.py:
--------------------------------------------------------------------------------
1 | from xarray import Dataset, open_dataset
2 |
3 | from .datatree import DataTree, NodePath
4 |
5 |
6 | def _iter_zarr_groups(root, parent="/"):
7 | parent = NodePath(parent)
8 | for path, group in root.groups():
9 | gpath = parent / path
10 | yield str(gpath)
11 | yield from _iter_zarr_groups(group, parent=gpath)
12 |
13 |
14 | def _iter_nc_groups(root, parent="/"):
15 | parent = NodePath(parent)
16 | for path, group in root.groups.items():
17 | gpath = parent / path
18 | yield str(gpath)
19 | yield from _iter_nc_groups(group, parent=gpath)
20 |
21 |
22 | def _get_nc_dataset_class(engine):
23 | if engine == "netcdf4":
24 | from netCDF4 import Dataset # type: ignore
25 | elif engine == "h5netcdf":
26 | from h5netcdf.legacyapi import Dataset # type: ignore
27 | elif engine is None:
28 | try:
29 | from netCDF4 import Dataset
30 | except ImportError:
31 | from h5netcdf.legacyapi import Dataset # type: ignore
32 | else:
33 | raise ValueError(f"unsupported engine: {engine}")
34 | return Dataset
35 |
36 |
37 | def open_datatree(filename_or_obj, engine=None, **kwargs) -> DataTree:
38 | """
39 | Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file.
40 |
41 | Parameters
42 | ----------
43 | filename_or_obj : str, Path, file-like, or DataStore
44 | Strings and Path objects are interpreted as a path to a netCDF file or Zarr store.
45 | engine : str, optional
46 | Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`.
47 | kwargs :
48 | Additional keyword arguments passed to ``xarray.open_dataset`` for each group.
49 |
50 | Returns
51 | -------
52 | DataTree
53 | """
54 |
55 | if engine == "zarr":
56 | return _open_datatree_zarr(filename_or_obj, **kwargs)
57 | elif engine in [None, "netcdf4", "h5netcdf"]:
58 | return _open_datatree_netcdf(filename_or_obj, engine=engine, **kwargs)
59 | else:
60 | raise ValueError("Unsupported engine")
61 |
62 |
63 | def _open_datatree_netcdf(filename: str, **kwargs) -> DataTree:
64 | ncDataset = _get_nc_dataset_class(kwargs.get("engine", None))
65 |
66 | ds = open_dataset(filename, **kwargs)
67 | tree_root = DataTree.from_dict({"/": ds})
68 | with ncDataset(filename, mode="r") as ncds:
69 | for path in _iter_nc_groups(ncds):
70 | subgroup_ds = open_dataset(filename, group=path, **kwargs)
71 |
72 | # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again
73 | node_name = NodePath(path).name
74 | new_node: DataTree = DataTree(name=node_name, data=subgroup_ds)
75 | tree_root._set_item(
76 | path,
77 | new_node,
78 | allow_overwrite=False,
79 | new_nodes_along_path=True,
80 | )
81 | return tree_root
82 |
83 |
84 | def _open_datatree_zarr(store, **kwargs) -> DataTree:
85 | import zarr # type: ignore
86 |
87 | zds = zarr.open_group(store, mode="r")
88 | ds = open_dataset(store, engine="zarr", **kwargs)
89 | tree_root = DataTree.from_dict({"/": ds})
90 | for path in _iter_zarr_groups(zds):
91 | try:
92 | subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs)
93 | except zarr.errors.PathNotFoundError:
94 | subgroup_ds = Dataset()
95 |
96 | # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again
97 | node_name = NodePath(path).name
98 | new_node: DataTree = DataTree(name=node_name, data=subgroup_ds)
99 | tree_root._set_item(
100 | path,
101 | new_node,
102 | allow_overwrite=False,
103 | new_nodes_along_path=True,
104 | )
105 | return tree_root
106 |
107 |
108 | def _create_empty_netcdf_group(filename, group, mode, engine):
109 | ncDataset = _get_nc_dataset_class(engine)
110 |
111 | with ncDataset(filename, mode=mode) as rootgrp:
112 | rootgrp.createGroup(group)
113 |
114 |
115 | def _datatree_to_netcdf(
116 | dt: DataTree,
117 | filepath,
118 | mode: str = "w",
119 | encoding=None,
120 | unlimited_dims=None,
121 | **kwargs,
122 | ):
123 | if kwargs.get("format", None) not in [None, "NETCDF4"]:
124 | raise ValueError("to_netcdf only supports the NETCDF4 format")
125 |
126 | engine = kwargs.get("engine", None)
127 | if engine not in [None, "netcdf4", "h5netcdf"]:
128 | raise ValueError("to_netcdf only supports the netcdf4 and h5netcdf engines")
129 |
130 | if kwargs.get("group", None) is not None:
131 | raise NotImplementedError(
132 | "specifying a root group for the tree has not been implemented"
133 | )
134 |
135 | if not kwargs.get("compute", True):
136 | raise NotImplementedError("compute=False has not been implemented yet")
137 |
138 | if encoding is None:
139 | encoding = {}
140 |
141 | # In the future, we may want to expand this check to insure all the provided encoding
142 | # options are valid. For now, this simply checks that all provided encoding keys are
143 | # groups in the datatree.
144 | if set(encoding) - set(dt.groups):
145 | raise ValueError(
146 | f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}"
147 | )
148 |
149 | if unlimited_dims is None:
150 | unlimited_dims = {}
151 |
152 | for node in dt.subtree:
153 | ds = node.ds
154 | group_path = node.path
155 | if ds is None:
156 | _create_empty_netcdf_group(filepath, group_path, mode, engine)
157 | else:
158 | ds.to_netcdf(
159 | filepath,
160 | group=group_path,
161 | mode=mode,
162 | encoding=encoding.get(node.path),
163 | unlimited_dims=unlimited_dims.get(node.path),
164 | **kwargs,
165 | )
166 | mode = "r+"
167 |
168 |
169 | def _create_empty_zarr_group(store, group, mode):
170 | import zarr # type: ignore
171 |
172 | root = zarr.open_group(store, mode=mode)
173 | root.create_group(group, overwrite=True)
174 |
175 |
176 | def _datatree_to_zarr(
177 | dt: DataTree,
178 | store,
179 | mode: str = "w-",
180 | encoding=None,
181 | consolidated: bool = True,
182 | **kwargs,
183 | ):
184 | from zarr.convenience import consolidate_metadata # type: ignore
185 |
186 | if kwargs.get("group", None) is not None:
187 | raise NotImplementedError(
188 | "specifying a root group for the tree has not been implemented"
189 | )
190 |
191 | if not kwargs.get("compute", True):
192 | raise NotImplementedError("compute=False has not been implemented yet")
193 |
194 | if encoding is None:
195 | encoding = {}
196 |
197 | # In the future, we may want to expand this check to insure all the provided encoding
198 | # options are valid. For now, this simply checks that all provided encoding keys are
199 | # groups in the datatree.
200 | if set(encoding) - set(dt.groups):
201 | raise ValueError(
202 | f"unexpected encoding group name(s) provided: {set(encoding) - set(dt.groups)}"
203 | )
204 |
205 | for node in dt.subtree:
206 | ds = node.ds
207 | group_path = node.path
208 | if ds is None:
209 | _create_empty_zarr_group(store, group_path, mode)
210 | else:
211 | ds.to_zarr(
212 | store,
213 | group=group_path,
214 | mode=mode,
215 | encoding=encoding.get(node.path),
216 | consolidated=False,
217 | **kwargs,
218 | )
219 | if "w" in mode:
220 | mode = "a"
221 |
222 | if consolidated:
223 | consolidate_metadata(store)
224 |
--------------------------------------------------------------------------------
/docs/source/data-structures.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: datatree
2 |
3 | .. _data structures:
4 |
5 | Data Structures
6 | ===============
7 |
8 | .. ipython:: python
9 | :suppress:
10 |
11 | import numpy as np
12 | import pandas as pd
13 | import xarray as xr
14 | import datatree
15 |
16 | np.random.seed(123456)
17 | np.set_printoptions(threshold=10)
18 |
19 | %xmode minimal
20 |
21 | .. note::
22 |
23 | This page builds on the information given in xarray's main page on
24 | `data structures `_, so it is suggested that you
25 | are familiar with those first.
26 |
27 | DataTree
28 | --------
29 |
30 | :py:class:`DataTree` is xarray's highest-level data structure, able to organise heterogeneous data which
31 | could not be stored inside a single :py:class:`Dataset` object. This includes representing the recursive structure of multiple
32 | `groups`_ within a netCDF file or `Zarr Store`_.
33 |
34 | .. _groups: https://www.unidata.ucar.edu/software/netcdf/workshops/2011/groups-types/GroupsIntro.html
35 | .. _Zarr Store: https://zarr.readthedocs.io/en/stable/tutorial.html#groups
36 |
37 | Each ``DataTree`` object (or "node") contains the same data that a single ``xarray.Dataset`` would (i.e. ``DataArray`` objects
38 | stored under hashable keys), and so has the same key properties:
39 |
40 | - ``dims``: a dictionary mapping of dimension names to lengths, for the variables in this node,
41 | - ``data_vars``: a dict-like container of DataArrays corresponding to variables in this node,
42 | - ``coords``: another dict-like container of DataArrays, corresponding to coordinate variables in this node,
43 | - ``attrs``: dict to hold arbitary metadata relevant to data in this node.
44 |
45 | A single ``DataTree`` object acts much like a single ``Dataset`` object, and has a similar set of dict-like methods
46 | defined upon it. However, ``DataTree``'s can also contain other ``DataTree`` objects, so they can be thought of as nested dict-like
47 | containers of both ``xarray.DataArray``'s and ``DataTree``'s.
48 |
49 | A single datatree object is known as a "node", and its position relative to other nodes is defined by two more key
50 | properties:
51 |
52 | - ``children``: An ordered dictionary mapping from names to other ``DataTree`` objects, known as its' "child nodes".
53 | - ``parent``: The single ``DataTree`` object whose children this datatree is a member of, known as its' "parent node".
54 |
55 | Each child automatically knows about its parent node, and a node without a parent is known as a "root" node
56 | (represented by the ``parent`` attribute pointing to ``None``).
57 | Nodes can have multiple children, but as each child node has at most one parent, there can only ever be one root node in a given tree.
58 |
59 | The overall structure is technically a `connected acyclic undirected rooted graph`, otherwise known as a
60 | `"Tree" `_.
61 |
62 | .. note::
63 |
64 | Technically a ``DataTree`` with more than one child node forms an `"Ordered Tree" `_,
65 | because the children are stored in an Ordered Dictionary. However, this distinction only really matters for a few
66 | edge cases involving operations on multiple trees simultaneously, and can safely be ignored by most users.
67 |
68 |
69 | ``DataTree`` objects can also optionally have a ``name`` as well as ``attrs``, just like a ``DataArray``.
70 | Again these are not normally used unless explicitly accessed by the user.
71 |
72 |
73 | .. _creating a datatree:
74 |
75 | Creating a DataTree
76 | ~~~~~~~~~~~~~~~~~~~
77 |
78 | One way to create a ``DataTree`` from scratch is to create each node individually,
79 | specifying the nodes' relationship to one another as you create each one.
80 |
81 | The ``DataTree`` constructor takes:
82 |
83 | - ``data``: The data that will be stored in this node, represented by a single ``xarray.Dataset``, or a named ``xarray.DataArray``.
84 | - ``parent``: The parent node (if there is one), given as a ``DataTree`` object.
85 | - ``children``: The various child nodes (if there are any), given as a mapping from string keys to ``DataTree`` objects.
86 | - ``name``: A string to use as the name of this node.
87 |
88 | Let's make a single datatree node with some example data in it:
89 |
90 | .. ipython:: python
91 |
92 | from datatree import DataTree
93 |
94 | ds1 = xr.Dataset({"foo": "orange"})
95 | dt = DataTree(name="root", data=ds1) # create root node
96 |
97 | dt
98 |
99 | At this point our node is also the root node, as every tree has a root node.
100 |
101 | We can add a second node to this tree either by referring to the first node in the constructor of the second:
102 |
103 | .. ipython:: python
104 |
105 | ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
106 | # add a child by referring to the parent node
107 | node2 = DataTree(name="a", parent=dt, data=ds2)
108 |
109 | or by dynamically updating the attributes of one node to refer to another:
110 |
111 | .. ipython:: python
112 |
113 | # add a second child by first creating a new node ...
114 | ds3 = xr.Dataset({"zed": np.NaN})
115 | node3 = DataTree(name="b", data=ds3)
116 | # ... then updating its .parent property
117 | node3.parent = dt
118 |
119 | Our tree now has three nodes within it:
120 |
121 | .. ipython:: python
122 |
123 | dt
124 |
125 | It is at tree construction time that consistency checks are enforced. For instance, if we try to create a `cycle` the constructor will raise an error:
126 |
127 | .. ipython:: python
128 | :okexcept:
129 |
130 | dt.parent = node3
131 |
132 | Alternatively you can also create a ``DataTree`` object from
133 |
134 | - An ``xarray.Dataset`` using ``Dataset.to_node()`` (not yet implemented),
135 | - A dictionary mapping directory-like paths to either ``DataTree`` nodes or data, using :py:meth:`DataTree.from_dict()`,
136 | - A netCDF or Zarr file on disk with :py:func:`open_datatree()`. See :ref:`reading and writing files `.
137 |
138 |
139 | DataTree Contents
140 | ~~~~~~~~~~~~~~~~~
141 |
142 | Like ``xarray.Dataset``, ``DataTree`` implements the python mapping interface, but with values given by either ``xarray.DataArray`` objects or other ``DataTree`` objects.
143 |
144 | .. ipython:: python
145 |
146 | dt["a"]
147 | dt["foo"]
148 |
149 | Iterating over keys will iterate over both the names of variables and child nodes.
150 |
151 | We can also access all the data in a single node through a dataset-like view
152 |
153 | .. ipython:: python
154 |
155 | dt["a"].ds
156 |
157 | This demonstrates the fact that the data in any one node is equivalent to the contents of a single ``xarray.Dataset`` object.
158 | The ``DataTree.ds`` property returns an immutable view, but we can instead extract the node's data contents as a new (and mutable)
159 | ``xarray.Dataset`` object via :py:meth:`DataTree.to_dataset()`:
160 |
161 | .. ipython:: python
162 |
163 | dt["a"].to_dataset()
164 |
165 | Like with ``Dataset``, you can access the data and coordinate variables of a node separately via the ``data_vars`` and ``coords`` attributes:
166 |
167 | .. ipython:: python
168 |
169 | dt["a"].data_vars
170 | dt["a"].coords
171 |
172 |
173 | Dictionary-like methods
174 | ~~~~~~~~~~~~~~~~~~~~~~~
175 |
176 | We can update a datatree in-place using Python's standard dictionary syntax, similar to how we can for Dataset objects.
177 | For example, to create this example datatree from scratch, we could have written:
178 |
179 | # TODO update this example using ``.coords`` and ``.data_vars`` as setters,
180 |
181 | .. ipython:: python
182 |
183 | dt = DataTree(name="root")
184 | dt["foo"] = "orange"
185 | dt["a"] = DataTree(data=xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])}))
186 | dt["a/b/zed"] = np.NaN
187 | dt
188 |
189 | To change the variables in a node of a ``DataTree``, you can use all the standard dictionary
190 | methods, including ``values``, ``items``, ``__delitem__``, ``get`` and
191 | :py:meth:`DataTree.update`.
192 | Note that assigning a ``DataArray`` object to a ``DataTree`` variable using ``__setitem__`` or ``update`` will
193 | :ref:`automatically align ` the array(s) to the original node's indexes.
194 |
195 | If you copy a ``DataTree`` using the :py:func:`copy` function or the :py:meth:`DataTree.copy` method it will copy the subtree,
196 | meaning that node and children below it, but no parents above it.
197 | Like for ``Dataset``, this copy is shallow by default, but you can copy all the underlying data arrays by calling ``dt.copy(deep=True)``.
198 |
--------------------------------------------------------------------------------
/datatree/render.py:
--------------------------------------------------------------------------------
1 | """
2 | String Tree Rendering. Copied from anytree.
3 | """
4 |
5 | import collections
6 | from typing import TYPE_CHECKING
7 |
8 | if TYPE_CHECKING:
9 | from .datatree import DataTree
10 |
11 | Row = collections.namedtuple("Row", ("pre", "fill", "node"))
12 |
13 |
14 | class AbstractStyle(object):
15 | def __init__(self, vertical, cont, end):
16 | """
17 | Tree Render Style.
18 | Args:
19 | vertical: Sign for vertical line.
20 | cont: Chars for a continued branch.
21 | end: Chars for the last branch.
22 | """
23 | super(AbstractStyle, self).__init__()
24 | self.vertical = vertical
25 | self.cont = cont
26 | self.end = end
27 | assert (
28 | len(cont) == len(vertical) == len(end)
29 | ), f"'{vertical}', '{cont}' and '{end}' need to have equal length"
30 |
31 | @property
32 | def empty(self):
33 | """Empty string as placeholder."""
34 | return " " * len(self.end)
35 |
36 | def __repr__(self):
37 | return f"{self.__class__.__name__}()"
38 |
39 |
40 | class ContStyle(AbstractStyle):
41 | def __init__(self):
42 | """
43 | Continued style, without gaps.
44 |
45 | >>> from anytree import Node, RenderTree
46 | >>> root = Node("root")
47 | >>> s0 = Node("sub0", parent=root)
48 | >>> s0b = Node("sub0B", parent=s0)
49 | >>> s0a = Node("sub0A", parent=s0)
50 | >>> s1 = Node("sub1", parent=root)
51 | >>> print(RenderTree(root, style=ContStyle()))
52 |
53 | Node('/root')
54 | ├── Node('/root/sub0')
55 | │ ├── Node('/root/sub0/sub0B')
56 | │ └── Node('/root/sub0/sub0A')
57 | └── Node('/root/sub1')
58 | """
59 | super(ContStyle, self).__init__(
60 | "\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 "
61 | )
62 |
63 |
64 | class RenderTree(object):
65 | def __init__(
66 | self, node: "DataTree", style=ContStyle(), childiter=list, maxlevel=None
67 | ):
68 | """
69 | Render tree starting at `node`.
70 | Keyword Args:
71 | style (AbstractStyle): Render Style.
72 | childiter: Child iterator.
73 | maxlevel: Limit rendering to this depth.
74 | :any:`RenderTree` is an iterator, returning a tuple with 3 items:
75 | `pre`
76 | tree prefix.
77 | `fill`
78 | filling for multiline entries.
79 | `node`
80 | :any:`NodeMixin` object.
81 | It is up to the user to assemble these parts to a whole.
82 | >>> from anytree import Node, RenderTree
83 | >>> root = Node("root", lines=["c0fe", "c0de"])
84 | >>> s0 = Node("sub0", parent=root, lines=["ha", "ba"])
85 | >>> s0b = Node("sub0B", parent=s0, lines=["1", "2", "3"])
86 | >>> s0a = Node("sub0A", parent=s0, lines=["a", "b"])
87 | >>> s1 = Node("sub1", parent=root, lines=["Z"])
88 | Simple one line:
89 | >>> for pre, _, node in RenderTree(root):
90 | ... print("%s%s" % (pre, node.name))
91 | ...
92 | root
93 | ├── sub0
94 | │ ├── sub0B
95 | │ └── sub0A
96 | └── sub1
97 | Multiline:
98 | >>> for pre, fill, node in RenderTree(root):
99 | ... print("%s%s" % (pre, node.lines[0]))
100 | ... for line in node.lines[1:]:
101 | ... print("%s%s" % (fill, line))
102 | ...
103 | c0fe
104 | c0de
105 | ├── ha
106 | │ ba
107 | │ ├── 1
108 | │ │ 2
109 | │ │ 3
110 | │ └── a
111 | │ b
112 | └── Z
113 | `maxlevel` limits the depth of the tree:
114 | >>> print(RenderTree(root, maxlevel=2))
115 | Node('/root', lines=['c0fe', 'c0de'])
116 | ├── Node('/root/sub0', lines=['ha', 'ba'])
117 | └── Node('/root/sub1', lines=['Z'])
118 | The `childiter` is responsible for iterating over child nodes at the
119 | same level. An reversed order can be achived by using `reversed`.
120 | >>> for row in RenderTree(root, childiter=reversed):
121 | ... print("%s%s" % (row.pre, row.node.name))
122 | ...
123 | root
124 | ├── sub1
125 | └── sub0
126 | ├── sub0A
127 | └── sub0B
128 | Or writing your own sort function:
129 | >>> def mysort(items):
130 | ... return sorted(items, key=lambda item: item.name)
131 | ...
132 | >>> for row in RenderTree(root, childiter=mysort):
133 | ... print("%s%s" % (row.pre, row.node.name))
134 | ...
135 | root
136 | ├── sub0
137 | │ ├── sub0A
138 | │ └── sub0B
139 | └── sub1
140 | :any:`by_attr` simplifies attribute rendering and supports multiline:
141 | >>> print(RenderTree(root).by_attr())
142 | root
143 | ├── sub0
144 | │ ├── sub0B
145 | │ └── sub0A
146 | └── sub1
147 | >>> print(RenderTree(root).by_attr("lines"))
148 | c0fe
149 | c0de
150 | ├── ha
151 | │ ba
152 | │ ├── 1
153 | │ │ 2
154 | │ │ 3
155 | │ └── a
156 | │ b
157 | └── Z
158 | And can be a function:
159 | >>> print(RenderTree(root).by_attr(lambda n: " ".join(n.lines)))
160 | c0fe c0de
161 | ├── ha ba
162 | │ ├── 1 2 3
163 | │ └── a b
164 | └── Z
165 | """
166 | if not isinstance(style, AbstractStyle):
167 | style = style()
168 | self.node = node
169 | self.style = style
170 | self.childiter = childiter
171 | self.maxlevel = maxlevel
172 |
173 | def __iter__(self):
174 | return self.__next(self.node, tuple())
175 |
176 | def __next(self, node, continues, level=0):
177 | yield RenderTree.__item(node, continues, self.style)
178 | children = node.children.values()
179 | level += 1
180 | if children and (self.maxlevel is None or level < self.maxlevel):
181 | children = self.childiter(children)
182 | for child, is_last in _is_last(children):
183 | for grandchild in self.__next(
184 | child, continues + (not is_last,), level=level
185 | ):
186 | yield grandchild
187 |
188 | @staticmethod
189 | def __item(node, continues, style):
190 | if not continues:
191 | return Row("", "", node)
192 | else:
193 | items = [style.vertical if cont else style.empty for cont in continues]
194 | indent = "".join(items[:-1])
195 | branch = style.cont if continues[-1] else style.end
196 | pre = indent + branch
197 | fill = "".join(items)
198 | return Row(pre, fill, node)
199 |
200 | def __str__(self):
201 | lines = ["%s%r" % (pre, node) for pre, _, node in self]
202 | return "\n".join(lines)
203 |
204 | def __repr__(self):
205 | classname = self.__class__.__name__
206 | args = [
207 | repr(self.node),
208 | "style=%s" % repr(self.style),
209 | "childiter=%s" % repr(self.childiter),
210 | ]
211 | return "%s(%s)" % (classname, ", ".join(args))
212 |
213 | def by_attr(self, attrname="name"):
214 | """
215 | Return rendered tree with node attribute `attrname`.
216 | >>> from anytree import AnyNode, RenderTree
217 | >>> root = AnyNode(id="root")
218 | >>> s0 = AnyNode(id="sub0", parent=root)
219 | >>> s0b = AnyNode(id="sub0B", parent=s0, foo=4, bar=109)
220 | >>> s0a = AnyNode(id="sub0A", parent=s0)
221 | >>> s1 = AnyNode(id="sub1", parent=root)
222 | >>> s1a = AnyNode(id="sub1A", parent=s1)
223 | >>> s1b = AnyNode(id="sub1B", parent=s1, bar=8)
224 | >>> s1c = AnyNode(id="sub1C", parent=s1)
225 | >>> s1ca = AnyNode(id="sub1Ca", parent=s1c)
226 | >>> print(RenderTree(root).by_attr("id"))
227 | root
228 | ├── sub0
229 | │ ├── sub0B
230 | │ └── sub0A
231 | └── sub1
232 | ├── sub1A
233 | ├── sub1B
234 | └── sub1C
235 | └── sub1Ca
236 | """
237 |
238 | def get():
239 | for pre, fill, node in self:
240 | attr = (
241 | attrname(node)
242 | if callable(attrname)
243 | else getattr(node, attrname, "")
244 | )
245 | if isinstance(attr, (list, tuple)):
246 | lines = attr
247 | else:
248 | lines = str(attr).split("\n")
249 | yield "%s%s" % (pre, lines[0])
250 | for line in lines[1:]:
251 | yield "%s%s" % (fill, line)
252 |
253 | return "\n".join(get())
254 |
255 |
256 | def _is_last(iterable):
257 | iter_ = iter(iterable)
258 | try:
259 | nextitem = next(iter_)
260 | except StopIteration:
261 | pass
262 | else:
263 | item = nextitem
264 | while True:
265 | try:
266 | nextitem = next(iter_)
267 | yield item, False
268 | except StopIteration:
269 | yield nextitem, True
270 | break
271 | item = nextitem
272 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright (c) 2022 onwards, datatree developers
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/datatree/tests/test_treenode.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from datatree.iterators import LevelOrderIter, PreOrderIter
4 | from datatree.treenode import InvalidTreeError, NamedNode, NodePath, TreeNode
5 |
6 |
7 | class TestFamilyTree:
8 | def test_lonely(self):
9 | root = TreeNode()
10 | assert root.parent is None
11 | assert root.children == {}
12 |
13 | def test_parenting(self):
14 | john = TreeNode()
15 | mary = TreeNode()
16 | mary._set_parent(john, "Mary")
17 |
18 | assert mary.parent == john
19 | assert john.children["Mary"] is mary
20 |
21 | def test_no_time_traveller_loops(self):
22 | john = TreeNode()
23 |
24 | with pytest.raises(InvalidTreeError, match="cannot be a parent of itself"):
25 | john._set_parent(john, "John")
26 |
27 | with pytest.raises(InvalidTreeError, match="cannot be a parent of itself"):
28 | john.children = {"John": john}
29 |
30 | mary = TreeNode()
31 | rose = TreeNode()
32 | mary._set_parent(john, "Mary")
33 | rose._set_parent(mary, "Rose")
34 |
35 | with pytest.raises(InvalidTreeError, match="is already a descendant"):
36 | john._set_parent(rose, "John")
37 |
38 | with pytest.raises(InvalidTreeError, match="is already a descendant"):
39 | rose.children = {"John": john}
40 |
41 | def test_parent_swap(self):
42 | john = TreeNode()
43 | mary = TreeNode()
44 | mary._set_parent(john, "Mary")
45 |
46 | steve = TreeNode()
47 | mary._set_parent(steve, "Mary")
48 |
49 | assert mary.parent == steve
50 | assert steve.children["Mary"] is mary
51 | assert "Mary" not in john.children
52 |
53 | def test_multi_child_family(self):
54 | mary = TreeNode()
55 | kate = TreeNode()
56 | john = TreeNode(children={"Mary": mary, "Kate": kate})
57 | assert john.children["Mary"] is mary
58 | assert john.children["Kate"] is kate
59 | assert mary.parent is john
60 | assert kate.parent is john
61 |
62 | def test_disown_child(self):
63 | mary = TreeNode()
64 | john = TreeNode(children={"Mary": mary})
65 | mary.orphan()
66 | assert mary.parent is None
67 | assert "Mary" not in john.children
68 |
69 | def test_doppelganger_child(self):
70 | kate = TreeNode()
71 | john = TreeNode()
72 |
73 | with pytest.raises(TypeError):
74 | john.children = {"Kate": 666}
75 |
76 | with pytest.raises(InvalidTreeError, match="Cannot add same node"):
77 | john.children = {"Kate": kate, "Evil_Kate": kate}
78 |
79 | john = TreeNode(children={"Kate": kate})
80 | evil_kate = TreeNode()
81 | evil_kate._set_parent(john, "Kate")
82 | assert john.children["Kate"] is evil_kate
83 |
84 | def test_sibling_relationships(self):
85 | mary = TreeNode()
86 | kate = TreeNode()
87 | ashley = TreeNode()
88 | TreeNode(children={"Mary": mary, "Kate": kate, "Ashley": ashley})
89 | assert kate.siblings["Mary"] is mary
90 | assert kate.siblings["Ashley"] is ashley
91 | assert "Kate" not in kate.siblings
92 |
93 | def test_ancestors(self):
94 | tony = TreeNode()
95 | michael = TreeNode(children={"Tony": tony})
96 | vito = TreeNode(children={"Michael": michael})
97 | assert tony.root is vito
98 | assert tony.parents == (michael, vito)
99 | assert tony.ancestors == (vito, michael, tony)
100 |
101 |
102 | class TestGetNodes:
103 | def test_get_child(self):
104 | steven = TreeNode()
105 | sue = TreeNode(children={"Steven": steven})
106 | mary = TreeNode(children={"Sue": sue})
107 | john = TreeNode(children={"Mary": mary})
108 |
109 | # get child
110 | assert john._get_item("Mary") is mary
111 | assert mary._get_item("Sue") is sue
112 |
113 | # no child exists
114 | with pytest.raises(KeyError):
115 | john._get_item("Kate")
116 |
117 | # get grandchild
118 | assert john._get_item("Mary/Sue") is sue
119 |
120 | # get great-grandchild
121 | assert john._get_item("Mary/Sue/Steven") is steven
122 |
123 | # get from middle of tree
124 | assert mary._get_item("Sue/Steven") is steven
125 |
126 | def test_get_upwards(self):
127 | sue = TreeNode()
128 | kate = TreeNode()
129 | mary = TreeNode(children={"Sue": sue, "Kate": kate})
130 | john = TreeNode(children={"Mary": mary})
131 |
132 | assert sue._get_item("../") is mary
133 | assert sue._get_item("../../") is john
134 |
135 | # relative path
136 | assert sue._get_item("../Kate") is kate
137 |
138 | def test_get_from_root(self):
139 | sue = TreeNode()
140 | mary = TreeNode(children={"Sue": sue})
141 | john = TreeNode(children={"Mary": mary}) # noqa
142 |
143 | assert sue._get_item("/Mary") is mary
144 |
145 |
146 | class TestSetNodes:
147 | def test_set_child_node(self):
148 | john = TreeNode()
149 | mary = TreeNode()
150 | john._set_item("Mary", mary)
151 |
152 | assert john.children["Mary"] is mary
153 | assert isinstance(mary, TreeNode)
154 | assert mary.children == {}
155 | assert mary.parent is john
156 |
157 | def test_child_already_exists(self):
158 | mary = TreeNode()
159 | john = TreeNode(children={"Mary": mary})
160 | mary_2 = TreeNode()
161 | with pytest.raises(KeyError):
162 | john._set_item("Mary", mary_2, allow_overwrite=False)
163 |
164 | def test_set_grandchild(self):
165 | rose = TreeNode()
166 | mary = TreeNode()
167 | john = TreeNode()
168 |
169 | john._set_item("Mary", mary)
170 | john._set_item("Mary/Rose", rose)
171 |
172 | assert john.children["Mary"] is mary
173 | assert isinstance(mary, TreeNode)
174 | assert "Rose" in mary.children
175 | assert rose.parent is mary
176 |
177 | def test_create_intermediate_child(self):
178 | john = TreeNode()
179 | rose = TreeNode()
180 |
181 | # test intermediate children not allowed
182 | with pytest.raises(KeyError, match="Could not reach"):
183 | john._set_item(path="Mary/Rose", item=rose, new_nodes_along_path=False)
184 |
185 | # test intermediate children allowed
186 | john._set_item("Mary/Rose", rose, new_nodes_along_path=True)
187 | assert "Mary" in john.children
188 | mary = john.children["Mary"]
189 | assert isinstance(mary, TreeNode)
190 | assert mary.children == {"Rose": rose}
191 | assert rose.parent == mary
192 | assert rose.parent == mary
193 |
194 | def test_overwrite_child(self):
195 | john = TreeNode()
196 | mary = TreeNode()
197 | john._set_item("Mary", mary)
198 |
199 | # test overwriting not allowed
200 | marys_evil_twin = TreeNode()
201 | with pytest.raises(KeyError, match="Already a node object"):
202 | john._set_item("Mary", marys_evil_twin, allow_overwrite=False)
203 | assert john.children["Mary"] is mary
204 | assert marys_evil_twin.parent is None
205 |
206 | # test overwriting allowed
207 | marys_evil_twin = TreeNode()
208 | john._set_item("Mary", marys_evil_twin, allow_overwrite=True)
209 | assert john.children["Mary"] is marys_evil_twin
210 | assert marys_evil_twin.parent is john
211 |
212 |
213 | class TestPruning:
214 | def test_del_child(self):
215 | john = TreeNode()
216 | mary = TreeNode()
217 | john._set_item("Mary", mary)
218 |
219 | del john["Mary"]
220 | assert "Mary" not in john.children
221 | assert mary.parent is None
222 |
223 | with pytest.raises(KeyError):
224 | del john["Mary"]
225 |
226 |
227 | def create_test_tree():
228 | a = NamedNode(name="a")
229 | b = NamedNode()
230 | c = NamedNode()
231 | d = NamedNode()
232 | e = NamedNode()
233 | f = NamedNode()
234 | g = NamedNode()
235 | h = NamedNode()
236 | i = NamedNode()
237 |
238 | a.children = {"b": b, "c": c}
239 | b.children = {"d": d, "e": e}
240 | e.children = {"f": f, "g": g}
241 | c.children = {"h": h}
242 | h.children = {"i": i}
243 |
244 | return a, f
245 |
246 |
247 | class TestIterators:
248 | def test_preorderiter(self):
249 | root, _ = create_test_tree()
250 | result = [node.name for node in PreOrderIter(root)]
251 | expected = [
252 | "a",
253 | "b",
254 | "d",
255 | "e",
256 | "f",
257 | "g",
258 | "c",
259 | "h",
260 | "i",
261 | ]
262 | assert result == expected
263 |
264 | def test_levelorderiter(self):
265 | root, _ = create_test_tree()
266 | result = [node.name for node in LevelOrderIter(root)]
267 | expected = [
268 | "a", # root Node is unnamed
269 | "b",
270 | "c",
271 | "d",
272 | "e",
273 | "h",
274 | "f",
275 | "g",
276 | "i",
277 | ]
278 | assert result == expected
279 |
280 |
281 | class TestAncestry:
282 | def test_parents(self):
283 | _, leaf = create_test_tree()
284 | expected = ["e", "b", "a"]
285 | assert [node.name for node in leaf.parents] == expected
286 |
287 | def test_lineage(self):
288 | _, leaf = create_test_tree()
289 | expected = ["f", "e", "b", "a"]
290 | assert [node.name for node in leaf.lineage] == expected
291 |
292 | def test_ancestors(self):
293 | _, leaf = create_test_tree()
294 | ancestors = leaf.ancestors
295 | expected = ["a", "b", "e", "f"]
296 | for node, expected_name in zip(ancestors, expected):
297 | assert node.name == expected_name
298 |
299 | def test_subtree(self):
300 | root, _ = create_test_tree()
301 | subtree = root.subtree
302 | expected = [
303 | "a",
304 | "b",
305 | "d",
306 | "e",
307 | "f",
308 | "g",
309 | "c",
310 | "h",
311 | "i",
312 | ]
313 | for node, expected_name in zip(subtree, expected):
314 | assert node.name == expected_name
315 |
316 | def test_descendants(self):
317 | root, _ = create_test_tree()
318 | descendants = root.descendants
319 | expected = [
320 | "b",
321 | "d",
322 | "e",
323 | "f",
324 | "g",
325 | "c",
326 | "h",
327 | "i",
328 | ]
329 | for node, expected_name in zip(descendants, expected):
330 | assert node.name == expected_name
331 |
332 | def test_leaves(self):
333 | tree, _ = create_test_tree()
334 | leaves = tree.leaves
335 | expected = [
336 | "d",
337 | "f",
338 | "g",
339 | "i",
340 | ]
341 | for node, expected_name in zip(leaves, expected):
342 | assert node.name == expected_name
343 |
344 | def test_levels(self):
345 | a, f = create_test_tree()
346 |
347 | assert a.level == 0
348 | assert f.level == 3
349 |
350 | assert a.depth == 3
351 | assert f.depth == 3
352 |
353 | assert a.width == 1
354 | assert f.width == 3
355 |
356 |
357 | class TestRenderTree:
358 | def test_render_nodetree(self):
359 | sam = NamedNode()
360 | ben = NamedNode()
361 | mary = NamedNode(children={"Sam": sam, "Ben": ben})
362 | kate = NamedNode()
363 | john = NamedNode(children={"Mary": mary, "Kate": kate})
364 |
365 | printout = john.__str__()
366 | expected_nodes = [
367 | "NamedNode()",
368 | "NamedNode('Mary')",
369 | "NamedNode('Sam')",
370 | "NamedNode('Ben')",
371 | "NamedNode('Kate')",
372 | ]
373 | for expected_node, printed_node in zip(expected_nodes, printout.splitlines()):
374 | assert expected_node in printed_node
375 |
376 |
377 | def test_nodepath():
378 | path = NodePath("/Mary")
379 | assert path.root == "/"
380 | assert path.stem == "Mary"
381 |
--------------------------------------------------------------------------------
/datatree/tests/test_mapping.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 | import xarray as xr
4 |
5 | from datatree.datatree import DataTree
6 | from datatree.mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree
7 | from datatree.testing import assert_equal
8 |
9 | empty = xr.Dataset()
10 |
11 |
12 | class TestCheckTreesIsomorphic:
13 | def test_not_a_tree(self):
14 | with pytest.raises(TypeError, match="not a tree"):
15 | check_isomorphic("s", 1)
16 |
17 | def test_different_widths(self):
18 | dt1 = DataTree.from_dict(d={"a": empty})
19 | dt2 = DataTree.from_dict(d={"b": empty, "c": empty})
20 | expected_err_str = (
21 | "Number of children on node '/' of the left object: 1\n"
22 | "Number of children on node '/' of the right object: 2"
23 | )
24 | with pytest.raises(TreeIsomorphismError, match=expected_err_str):
25 | check_isomorphic(dt1, dt2)
26 |
27 | def test_different_heights(self):
28 | dt1 = DataTree.from_dict({"a": empty})
29 | dt2 = DataTree.from_dict({"b": empty, "b/c": empty})
30 | expected_err_str = (
31 | "Number of children on node '/a' of the left object: 0\n"
32 | "Number of children on node '/b' of the right object: 1"
33 | )
34 | with pytest.raises(TreeIsomorphismError, match=expected_err_str):
35 | check_isomorphic(dt1, dt2)
36 |
37 | def test_names_different(self):
38 | dt1 = DataTree.from_dict({"a": xr.Dataset()})
39 | dt2 = DataTree.from_dict({"b": empty})
40 | expected_err_str = (
41 | "Node '/a' in the left object has name 'a'\n"
42 | "Node '/b' in the right object has name 'b'"
43 | )
44 | with pytest.raises(TreeIsomorphismError, match=expected_err_str):
45 | check_isomorphic(dt1, dt2, require_names_equal=True)
46 |
47 | def test_isomorphic_names_equal(self):
48 | dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
49 | dt2 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
50 | check_isomorphic(dt1, dt2, require_names_equal=True)
51 |
52 | def test_isomorphic_ordering(self):
53 | dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/d": empty, "b/c": empty})
54 | dt2 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
55 | check_isomorphic(dt1, dt2, require_names_equal=False)
56 |
57 | def test_isomorphic_names_not_equal(self):
58 | dt1 = DataTree.from_dict({"a": empty, "b": empty, "b/c": empty, "b/d": empty})
59 | dt2 = DataTree.from_dict({"A": empty, "B": empty, "B/C": empty, "B/D": empty})
60 | check_isomorphic(dt1, dt2)
61 |
62 | def test_not_isomorphic_complex_tree(self, create_test_datatree):
63 | dt1 = create_test_datatree()
64 | dt2 = create_test_datatree()
65 | dt2["set1/set2/extra"] = DataTree(name="extra")
66 | with pytest.raises(TreeIsomorphismError, match="/set1/set2"):
67 | check_isomorphic(dt1, dt2)
68 |
69 | def test_checking_from_root(self, create_test_datatree):
70 | dt1 = create_test_datatree()
71 | dt2 = create_test_datatree()
72 | real_root = DataTree(name="real root")
73 | dt2.name = "not_real_root"
74 | dt2.parent = real_root
75 | with pytest.raises(TreeIsomorphismError):
76 | check_isomorphic(dt1, dt2, check_from_root=True)
77 |
78 |
79 | class TestMapOverSubTree:
80 | def test_no_trees_passed(self):
81 | @map_over_subtree
82 | def times_ten(ds):
83 | return 10.0 * ds
84 |
85 | with pytest.raises(TypeError, match="Must pass at least one tree"):
86 | times_ten("dt")
87 |
88 | def test_not_isomorphic(self, create_test_datatree):
89 | dt1 = create_test_datatree()
90 | dt2 = create_test_datatree()
91 | dt2["set1/set2/extra"] = DataTree(name="extra")
92 |
93 | @map_over_subtree
94 | def times_ten(ds1, ds2):
95 | return ds1 * ds2
96 |
97 | with pytest.raises(TreeIsomorphismError):
98 | times_ten(dt1, dt2)
99 |
100 | def test_no_trees_returned(self, create_test_datatree):
101 | dt1 = create_test_datatree()
102 | dt2 = create_test_datatree()
103 |
104 | @map_over_subtree
105 | def bad_func(ds1, ds2):
106 | return None
107 |
108 | with pytest.raises(TypeError, match="return value of None"):
109 | bad_func(dt1, dt2)
110 |
111 | def test_single_dt_arg(self, create_test_datatree):
112 | dt = create_test_datatree()
113 |
114 | @map_over_subtree
115 | def times_ten(ds):
116 | return 10.0 * ds
117 |
118 | expected = create_test_datatree(modify=lambda ds: 10.0 * ds)
119 | result_tree = times_ten(dt)
120 | assert_equal(result_tree, expected)
121 |
122 | def test_single_dt_arg_plus_args_and_kwargs(self, create_test_datatree):
123 | dt = create_test_datatree()
124 |
125 | @map_over_subtree
126 | def multiply_then_add(ds, times, add=0.0):
127 | return (times * ds) + add
128 |
129 | expected = create_test_datatree(modify=lambda ds: (10.0 * ds) + 2.0)
130 | result_tree = multiply_then_add(dt, 10.0, add=2.0)
131 | assert_equal(result_tree, expected)
132 |
133 | def test_multiple_dt_args(self, create_test_datatree):
134 | dt1 = create_test_datatree()
135 | dt2 = create_test_datatree()
136 |
137 | @map_over_subtree
138 | def add(ds1, ds2):
139 | return ds1 + ds2
140 |
141 | expected = create_test_datatree(modify=lambda ds: 2.0 * ds)
142 | result = add(dt1, dt2)
143 | assert_equal(result, expected)
144 |
145 | def test_dt_as_kwarg(self, create_test_datatree):
146 | dt1 = create_test_datatree()
147 | dt2 = create_test_datatree()
148 |
149 | @map_over_subtree
150 | def add(ds1, value=0.0):
151 | return ds1 + value
152 |
153 | expected = create_test_datatree(modify=lambda ds: 2.0 * ds)
154 | result = add(dt1, value=dt2)
155 | assert_equal(result, expected)
156 |
157 | def test_return_multiple_dts(self, create_test_datatree):
158 | dt = create_test_datatree()
159 |
160 | @map_over_subtree
161 | def minmax(ds):
162 | return ds.min(), ds.max()
163 |
164 | dt_min, dt_max = minmax(dt)
165 | expected_min = create_test_datatree(modify=lambda ds: ds.min())
166 | assert_equal(dt_min, expected_min)
167 | expected_max = create_test_datatree(modify=lambda ds: ds.max())
168 | assert_equal(dt_max, expected_max)
169 |
170 | def test_return_wrong_type(self, simple_datatree):
171 | dt1 = simple_datatree
172 |
173 | @map_over_subtree
174 | def bad_func(ds1):
175 | return "string"
176 |
177 | with pytest.raises(TypeError, match="not Dataset or DataArray"):
178 | bad_func(dt1)
179 |
180 | def test_return_tuple_of_wrong_types(self, simple_datatree):
181 | dt1 = simple_datatree
182 |
183 | @map_over_subtree
184 | def bad_func(ds1):
185 | return xr.Dataset(), "string"
186 |
187 | with pytest.raises(TypeError, match="not Dataset or DataArray"):
188 | bad_func(dt1)
189 |
190 | @pytest.mark.xfail
191 | def test_return_inconsistent_number_of_results(self, simple_datatree):
192 | dt1 = simple_datatree
193 |
194 | @map_over_subtree
195 | def bad_func(ds):
196 | # Datasets in simple_datatree have different numbers of dims
197 | # TODO need to instead return different numbers of Dataset objects for this test to catch the intended error
198 | return tuple(ds.dims)
199 |
200 | with pytest.raises(TypeError, match="instead returns"):
201 | bad_func(dt1)
202 |
203 | def test_wrong_number_of_arguments_for_func(self, simple_datatree):
204 | dt = simple_datatree
205 |
206 | @map_over_subtree
207 | def times_ten(ds):
208 | return 10.0 * ds
209 |
210 | with pytest.raises(
211 | TypeError, match="takes 1 positional argument but 2 were given"
212 | ):
213 | times_ten(dt, dt)
214 |
215 | def test_map_single_dataset_against_whole_tree(self, create_test_datatree):
216 | dt = create_test_datatree()
217 |
218 | @map_over_subtree
219 | def nodewise_merge(node_ds, fixed_ds):
220 | return xr.merge([node_ds, fixed_ds])
221 |
222 | other_ds = xr.Dataset({"z": ("z", [0])})
223 | expected = create_test_datatree(modify=lambda ds: xr.merge([ds, other_ds]))
224 | result_tree = nodewise_merge(dt, other_ds)
225 | assert_equal(result_tree, expected)
226 |
227 | @pytest.mark.xfail
228 | def test_trees_with_different_node_names(self):
229 | # TODO test this after I've got good tests for renaming nodes
230 | raise NotImplementedError
231 |
232 | def test_dt_method(self, create_test_datatree):
233 | dt = create_test_datatree()
234 |
235 | def multiply_then_add(ds, times, add=0.0):
236 | return times * ds + add
237 |
238 | expected = create_test_datatree(modify=lambda ds: (10.0 * ds) + 2.0)
239 | result_tree = dt.map_over_subtree(multiply_then_add, 10.0, add=2.0)
240 | assert_equal(result_tree, expected)
241 |
242 | def test_discard_ancestry(self, create_test_datatree):
243 | # Check for datatree GH issue #48
244 | dt = create_test_datatree()
245 | subtree = dt["set1"]
246 |
247 | @map_over_subtree
248 | def times_ten(ds):
249 | return 10.0 * ds
250 |
251 | expected = create_test_datatree(modify=lambda ds: 10.0 * ds)["set1"]
252 | result_tree = times_ten(subtree)
253 | assert_equal(result_tree, expected, from_root=False)
254 |
255 | def test_skip_empty_nodes_with_attrs(self, create_test_datatree):
256 | # inspired by xarray-datatree GH262
257 | dt = create_test_datatree()
258 | dt["set1/set2"].attrs["foo"] = "bar"
259 |
260 | def check_for_data(ds):
261 | # fails if run on a node that has no data
262 | assert len(ds.variables) != 0
263 | return ds
264 |
265 | dt.map_over_subtree(check_for_data)
266 |
267 | def test_keep_attrs_on_empty_nodes(self, create_test_datatree):
268 | # GH278
269 | dt = create_test_datatree()
270 | dt["set1/set2"].attrs["foo"] = "bar"
271 |
272 | def empty_func(ds):
273 | return ds
274 |
275 | result = dt.map_over_subtree(empty_func)
276 | assert result["set1/set2"].attrs == dt["set1/set2"].attrs
277 |
278 | @pytest.mark.xfail(
279 | reason="probably some bug in pytests handling of exception notes"
280 | )
281 | def test_error_contains_path_of_offending_node(self, create_test_datatree):
282 | dt = create_test_datatree()
283 | dt["set1"]["bad_var"] = 0
284 | print(dt)
285 |
286 | def fail_on_specific_node(ds):
287 | if "bad_var" in ds:
288 | raise ValueError("Failed because 'bar_var' present in dataset")
289 |
290 | with pytest.raises(
291 | ValueError, match="Raised whilst mapping function over node /set1"
292 | ):
293 | dt.map_over_subtree(fail_on_specific_node)
294 |
295 |
296 | class TestMutableOperations:
297 | def test_construct_using_type(self):
298 | # from datatree GH issue #188
299 | # xarray's .weighted is unusual because it uses type() to create a Dataset/DataArray
300 |
301 | a = xr.DataArray(
302 | np.random.rand(3, 4, 10),
303 | dims=["x", "y", "time"],
304 | coords={"area": (["x", "y"], np.random.rand(3, 4))},
305 | ).to_dataset(name="data")
306 | b = xr.DataArray(
307 | np.random.rand(2, 6, 14),
308 | dims=["x", "y", "time"],
309 | coords={"area": (["x", "y"], np.random.rand(2, 6))},
310 | ).to_dataset(name="data")
311 | dt = DataTree.from_dict({"a": a, "b": b})
312 |
313 | def weighted_mean(ds):
314 | return ds.weighted(ds.area).mean(["x", "y"])
315 |
316 | dt.map_over_subtree(weighted_mean)
317 |
318 | def test_alter_inplace_forbidden(self):
319 | simpsons = DataTree.from_dict(
320 | d={
321 | "/": xr.Dataset({"age": 83}),
322 | "/Herbert": xr.Dataset({"age": 40}),
323 | "/Homer": xr.Dataset({"age": 39}),
324 | "/Homer/Bart": xr.Dataset({"age": 10}),
325 | "/Homer/Lisa": xr.Dataset({"age": 8}),
326 | "/Homer/Maggie": xr.Dataset({"age": 1}),
327 | },
328 | name="Abe",
329 | )
330 |
331 | def fast_forward(ds: xr.Dataset, years: float) -> xr.Dataset:
332 | """Add some years to the age, but by altering the given dataset"""
333 | ds["age"] = ds["age"] + years
334 | return ds
335 |
336 | with pytest.raises(AttributeError):
337 | simpsons.map_over_subtree(fast_forward, years=10)
338 |
339 |
340 | @pytest.mark.xfail
341 | class TestMapOverSubTreeInplace:
342 | def test_map_over_subtree_inplace(self):
343 | raise NotImplementedError
344 |
--------------------------------------------------------------------------------
/datatree/mapping.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import functools
4 | import sys
5 | from itertools import repeat
6 | from textwrap import dedent
7 | from typing import TYPE_CHECKING, Callable, Tuple
8 |
9 | from xarray import DataArray, Dataset
10 |
11 | from .iterators import LevelOrderIter
12 | from .treenode import NodePath, TreeNode
13 |
14 | if TYPE_CHECKING:
15 | from .datatree import DataTree
16 |
17 |
18 | class TreeIsomorphismError(ValueError):
19 | """Error raised if two tree objects do not share the same node structure."""
20 |
21 | pass
22 |
23 |
24 | def check_isomorphic(
25 | a: DataTree,
26 | b: DataTree,
27 | require_names_equal: bool = False,
28 | check_from_root: bool = True,
29 | ):
30 | """
31 | Check that two trees have the same structure, raising an error if not.
32 |
33 | Does not compare the actual data in the nodes.
34 |
35 | By default this function only checks that subtrees are isomorphic, not the entire tree above (if it exists).
36 | Can instead optionally check the entire trees starting from the root, which will ensure all
37 |
38 | Can optionally check if corresponding nodes should have the same name.
39 |
40 | Parameters
41 | ----------
42 | a : DataTree
43 | b : DataTree
44 | require_names_equal : Bool
45 | Whether or not to also check that each node has the same name as its counterpart.
46 | check_from_root : Bool
47 | Whether or not to first traverse to the root of the trees before checking for isomorphism.
48 | If a & b have no parents then this has no effect.
49 |
50 | Raises
51 | ------
52 | TypeError
53 | If either a or b are not tree objects.
54 | TreeIsomorphismError
55 | If a and b are tree objects, but are not isomorphic to one another.
56 | Also optionally raised if their structure is isomorphic, but the names of any two
57 | respective nodes are not equal.
58 | """
59 |
60 | if not isinstance(a, TreeNode):
61 | raise TypeError(f"Argument `a` is not a tree, it is of type {type(a)}")
62 | if not isinstance(b, TreeNode):
63 | raise TypeError(f"Argument `b` is not a tree, it is of type {type(b)}")
64 |
65 | if check_from_root:
66 | a = a.root
67 | b = b.root
68 |
69 | diff = diff_treestructure(a, b, require_names_equal=require_names_equal)
70 |
71 | if diff:
72 | raise TreeIsomorphismError("DataTree objects are not isomorphic:\n" + diff)
73 |
74 |
75 | def diff_treestructure(a: DataTree, b: DataTree, require_names_equal: bool) -> str:
76 | """
77 | Return a summary of why two trees are not isomorphic.
78 | If they are isomorphic return an empty string.
79 | """
80 |
81 | # Walking nodes in "level-order" fashion means walking down from the root breadth-first.
82 | # Checking for isomorphism by walking in this way implicitly assumes that the tree is an ordered tree
83 | # (which it is so long as children are stored in a tuple or list rather than in a set).
84 | for node_a, node_b in zip(LevelOrderIter(a), LevelOrderIter(b)):
85 | path_a, path_b = node_a.path, node_b.path
86 |
87 | if require_names_equal:
88 | if node_a.name != node_b.name:
89 | diff = dedent(
90 | f"""\
91 | Node '{path_a}' in the left object has name '{node_a.name}'
92 | Node '{path_b}' in the right object has name '{node_b.name}'"""
93 | )
94 | return diff
95 |
96 | if len(node_a.children) != len(node_b.children):
97 | diff = dedent(
98 | f"""\
99 | Number of children on node '{path_a}' of the left object: {len(node_a.children)}
100 | Number of children on node '{path_b}' of the right object: {len(node_b.children)}"""
101 | )
102 | return diff
103 |
104 | return ""
105 |
106 |
107 | def map_over_subtree(func: Callable) -> Callable:
108 | """
109 | Decorator which turns a function which acts on (and returns) Datasets into one which acts on and returns DataTrees.
110 |
111 | Applies a function to every dataset in one or more subtrees, returning new trees which store the results.
112 |
113 | The function will be applied to any data-containing dataset stored in any of the nodes in the trees. The returned
114 | trees will have the same structure as the supplied trees.
115 |
116 | `func` needs to return one Datasets, DataArrays, or None in order to be able to rebuild the subtrees after
117 | mapping, as each result will be assigned to its respective node of a new tree via `DataTree.__setitem__`. Any
118 | returned value that is one of these types will be stacked into a separate tree before returning all of them.
119 |
120 | The trees passed to the resulting function must all be isomorphic to one another. Their nodes need not be named
121 | similarly, but all the output trees will have nodes named in the same way as the first tree passed.
122 |
123 | Parameters
124 | ----------
125 | func : callable
126 | Function to apply to datasets with signature:
127 |
128 | `func(*args, **kwargs) -> Union[Dataset, Iterable[Dataset]]`.
129 |
130 | (i.e. func must accept at least one Dataset and return at least one Dataset.)
131 | Function will not be applied to any nodes without datasets.
132 | *args : tuple, optional
133 | Positional arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
134 | via .ds .
135 | **kwargs : Any
136 | Keyword arguments passed on to `func`. If DataTrees any data-containing nodes will be converted to Datasets
137 | via .ds .
138 |
139 | Returns
140 | -------
141 | mapped : callable
142 | Wrapped function which returns one or more tree(s) created from results of applying ``func`` to the dataset at
143 | each node.
144 |
145 | See also
146 | --------
147 | DataTree.map_over_subtree
148 | DataTree.map_over_subtree_inplace
149 | DataTree.subtree
150 | """
151 |
152 | # TODO examples in the docstring
153 |
154 | # TODO inspect function to work out immediately if the wrong number of arguments were passed for it?
155 |
156 | @functools.wraps(func)
157 | def _map_over_subtree(*args, **kwargs) -> DataTree | Tuple[DataTree, ...]:
158 | """Internal function which maps func over every node in tree, returning a tree of the results."""
159 | from .datatree import DataTree
160 |
161 | all_tree_inputs = [a for a in args if isinstance(a, DataTree)] + [
162 | a for a in kwargs.values() if isinstance(a, DataTree)
163 | ]
164 |
165 | if len(all_tree_inputs) > 0:
166 | first_tree, *other_trees = all_tree_inputs
167 | else:
168 | raise TypeError("Must pass at least one tree object")
169 |
170 | for other_tree in other_trees:
171 | # isomorphism is transitive so this is enough to guarantee all trees are mutually isomorphic
172 | check_isomorphic(
173 | first_tree, other_tree, require_names_equal=False, check_from_root=False
174 | )
175 |
176 | # Walk all trees simultaneously, applying func to all nodes that lie in same position in different trees
177 | # We don't know which arguments are DataTrees so we zip all arguments together as iterables
178 | # Store tuples of results in a dict because we don't yet know how many trees we need to rebuild to return
179 | out_data_objects = {}
180 | args_as_tree_length_iterables = [
181 | a.subtree if isinstance(a, DataTree) else repeat(a) for a in args
182 | ]
183 | n_args = len(args_as_tree_length_iterables)
184 | kwargs_as_tree_length_iterables = {
185 | k: v.subtree if isinstance(v, DataTree) else repeat(v)
186 | for k, v in kwargs.items()
187 | }
188 | for node_of_first_tree, *all_node_args in zip(
189 | first_tree.subtree,
190 | *args_as_tree_length_iterables,
191 | *list(kwargs_as_tree_length_iterables.values()),
192 | ):
193 | node_args_as_datasetviews = [
194 | a.ds if isinstance(a, DataTree) else a for a in all_node_args[:n_args]
195 | ]
196 | node_kwargs_as_datasetviews = dict(
197 | zip(
198 | [k for k in kwargs_as_tree_length_iterables.keys()],
199 | [
200 | v.ds if isinstance(v, DataTree) else v
201 | for v in all_node_args[n_args:]
202 | ],
203 | )
204 | )
205 | func_with_error_context = _handle_errors_with_path_context(
206 | node_of_first_tree.path
207 | )(func)
208 |
209 | if node_of_first_tree.has_data:
210 | # call func on the data in this particular set of corresponding nodes
211 | results = func_with_error_context(
212 | *node_args_as_datasetviews, **node_kwargs_as_datasetviews
213 | )
214 | elif node_of_first_tree.has_attrs:
215 | # propagate attrs
216 | results = node_of_first_tree.ds
217 | else:
218 | # nothing to propagate so use fastpath to create empty node in new tree
219 | results = None
220 |
221 | # TODO implement mapping over multiple trees in-place using if conditions from here on?
222 | out_data_objects[node_of_first_tree.path] = results
223 |
224 | # Find out how many return values we received
225 | num_return_values = _check_all_return_values(out_data_objects)
226 |
227 | # Reconstruct 1+ subtrees from the dict of results, by filling in all nodes of all result trees
228 | original_root_path = first_tree.path
229 | result_trees = []
230 | for i in range(num_return_values):
231 | out_tree_contents = {}
232 | for n in first_tree.subtree:
233 | p = n.path
234 | if p in out_data_objects.keys():
235 | if isinstance(out_data_objects[p], tuple):
236 | output_node_data = out_data_objects[p][i]
237 | else:
238 | output_node_data = out_data_objects[p]
239 | else:
240 | output_node_data = None
241 |
242 | # Discard parentage so that new trees don't include parents of input nodes
243 | relative_path = str(NodePath(p).relative_to(original_root_path))
244 | relative_path = "/" if relative_path == "." else relative_path
245 | out_tree_contents[relative_path] = output_node_data
246 |
247 | new_tree = DataTree.from_dict(
248 | out_tree_contents,
249 | name=first_tree.name,
250 | )
251 | result_trees.append(new_tree)
252 |
253 | # If only one result then don't wrap it in a tuple
254 | if len(result_trees) == 1:
255 | return result_trees[0]
256 | else:
257 | return tuple(result_trees)
258 |
259 | return _map_over_subtree
260 |
261 |
262 | def _handle_errors_with_path_context(path):
263 | """Wraps given function so that if it fails it also raises path to node on which it failed."""
264 |
265 | def decorator(func):
266 | def wrapper(*args, **kwargs):
267 | try:
268 | return func(*args, **kwargs)
269 | except Exception as e:
270 | if sys.version_info >= (3, 11):
271 | # Add the context information to the error message
272 | e.add_note(
273 | f"Raised whilst mapping function over node with path {path}"
274 | )
275 | raise
276 |
277 | return wrapper
278 |
279 | return decorator
280 |
281 |
282 | def add_note(err: BaseException, msg: str) -> None:
283 | # TODO: remove once python 3.10 can be dropped
284 | if sys.version_info < (3, 11):
285 | err.__notes__ = getattr(err, "__notes__", []) + [msg] # type: ignore[attr-defined]
286 | else:
287 | err.add_note(msg)
288 |
289 |
290 | def _check_single_set_return_values(path_to_node, obj):
291 | """Check types returned from single evaluation of func, and return number of return values received from func."""
292 | if isinstance(obj, (Dataset, DataArray)):
293 | return 1
294 | elif isinstance(obj, tuple):
295 | for r in obj:
296 | if not isinstance(r, (Dataset, DataArray)):
297 | raise TypeError(
298 | f"One of the results of calling func on datasets on the nodes at position {path_to_node} is "
299 | f"of type {type(r)}, not Dataset or DataArray."
300 | )
301 | return len(obj)
302 | else:
303 | raise TypeError(
304 | f"The result of calling func on the node at position {path_to_node} is of type {type(obj)}, not "
305 | f"Dataset or DataArray, nor a tuple of such types."
306 | )
307 |
308 |
309 | def _check_all_return_values(returned_objects):
310 | """Walk through all values returned by mapping func over subtrees, raising on any invalid or inconsistent types."""
311 |
312 | if all(r is None for r in returned_objects.values()):
313 | raise TypeError(
314 | "Called supplied function on all nodes but found a return value of None for"
315 | "all of them."
316 | )
317 |
318 | result_data_objects = [
319 | (path_to_node, r)
320 | for path_to_node, r in returned_objects.items()
321 | if r is not None
322 | ]
323 |
324 | if len(result_data_objects) == 1:
325 | # Only one node in the tree: no need to check consistency of results between nodes
326 | path_to_node, result = result_data_objects[0]
327 | num_return_values = _check_single_set_return_values(path_to_node, result)
328 | else:
329 | prev_path, _ = result_data_objects[0]
330 | prev_num_return_values, num_return_values = None, None
331 | for path_to_node, obj in result_data_objects[1:]:
332 | num_return_values = _check_single_set_return_values(path_to_node, obj)
333 |
334 | if (
335 | num_return_values != prev_num_return_values
336 | and prev_num_return_values is not None
337 | ):
338 | raise TypeError(
339 | f"Calling func on the nodes at position {path_to_node} returns {num_return_values} separate return "
340 | f"values, whereas calling func on the nodes at position {prev_path} instead returns "
341 | f"{prev_num_return_values} separate return values."
342 | )
343 |
344 | prev_path, prev_num_return_values = path_to_node, num_return_values
345 |
346 | return num_return_values
347 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # flake8: noqa
3 | # Ignoring F401: imported but unused
4 |
5 | # complexity documentation build configuration file, created by
6 | # sphinx-quickstart on Tue Jul 9 22:26:36 2013.
7 | #
8 | # This file is execfile()d with the current directory set to its containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | import inspect
17 | import os
18 | import sys
19 |
20 | import sphinx_autosummary_accessors
21 |
22 | import datatree
23 |
24 | # If extensions (or modules to document with autodoc) are in another directory,
25 | # add these directories to sys.path here. If the directory is relative to the
26 | # documentation root, use os.path.abspath to make it absolute, like shown here.
27 | # sys.path.insert(0, os.path.abspath('.'))
28 |
29 | cwd = os.getcwd()
30 | parent = os.path.dirname(cwd)
31 | sys.path.insert(0, parent)
32 |
33 | # -- Deprecation Warning banner -----------------------------------------------------
34 |
35 | rst_prolog = """.. attention::
36 | This repository has been archived. Please use xarray.DataTree instead.
37 | """
38 |
39 | # -- General configuration -----------------------------------------------------
40 |
41 | # If your documentation needs a minimal Sphinx version, state it here.
42 | # needs_sphinx = '1.0'
43 |
44 | # Add any Sphinx extension module names here, as strings. They can be extensions
45 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
46 | extensions = [
47 | "sphinx.ext.autodoc",
48 | "sphinx.ext.viewcode",
49 | "sphinx.ext.linkcode",
50 | "sphinx.ext.autosummary",
51 | "sphinx.ext.intersphinx",
52 | "sphinx.ext.extlinks",
53 | "sphinx.ext.napoleon",
54 | "sphinx_copybutton",
55 | "sphinxext.opengraph",
56 | "sphinx_autosummary_accessors",
57 | "IPython.sphinxext.ipython_console_highlighting",
58 | "IPython.sphinxext.ipython_directive",
59 | "nbsphinx",
60 | "sphinxcontrib.srclinks",
61 | ]
62 |
63 | extlinks = {
64 | "issue": ("https://github.com/xarray-contrib/datatree/issues/%s", "GH#%s"),
65 | "pull": ("https://github.com/xarray-contrib/datatree/pull/%s", "GH#%s"),
66 | }
67 | # Add any paths that contain templates here, relative to this directory.
68 | templates_path = ["_templates", sphinx_autosummary_accessors.templates_path]
69 |
70 | # Generate the API documentation when building
71 | autosummary_generate = True
72 |
73 |
74 | # Napoleon configurations
75 |
76 | napoleon_google_docstring = False
77 | napoleon_numpy_docstring = True
78 | napoleon_use_param = False
79 | napoleon_use_rtype = False
80 | napoleon_preprocess_types = True
81 | napoleon_type_aliases = {
82 | # general terms
83 | "sequence": ":term:`sequence`",
84 | "iterable": ":term:`iterable`",
85 | "callable": ":py:func:`callable`",
86 | "dict_like": ":term:`dict-like `",
87 | "dict-like": ":term:`dict-like `",
88 | "path-like": ":term:`path-like `",
89 | "mapping": ":term:`mapping`",
90 | "file-like": ":term:`file-like `",
91 | # special terms
92 | # "same type as caller": "*same type as caller*", # does not work, yet
93 | # "same type as values": "*same type as values*", # does not work, yet
94 | # stdlib type aliases
95 | "MutableMapping": "~collections.abc.MutableMapping",
96 | "sys.stdout": ":obj:`sys.stdout`",
97 | "timedelta": "~datetime.timedelta",
98 | "string": ":class:`string `",
99 | # numpy terms
100 | "array_like": ":term:`array_like`",
101 | "array-like": ":term:`array-like `",
102 | "scalar": ":term:`scalar`",
103 | "array": ":term:`array`",
104 | "hashable": ":term:`hashable `",
105 | # matplotlib terms
106 | "color-like": ":py:func:`color-like `",
107 | "matplotlib colormap name": ":doc:`matplotlib colormap name `",
108 | "matplotlib axes object": ":py:class:`matplotlib axes object `",
109 | "colormap": ":py:class:`colormap `",
110 | # objects without namespace: xarray
111 | "DataArray": "~xarray.DataArray",
112 | "Dataset": "~xarray.Dataset",
113 | "Variable": "~xarray.Variable",
114 | "DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy",
115 | "DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy",
116 | # objects without namespace: numpy
117 | "ndarray": "~numpy.ndarray",
118 | "MaskedArray": "~numpy.ma.MaskedArray",
119 | "dtype": "~numpy.dtype",
120 | "ComplexWarning": "~numpy.ComplexWarning",
121 | # objects without namespace: pandas
122 | "Index": "~pandas.Index",
123 | "MultiIndex": "~pandas.MultiIndex",
124 | "CategoricalIndex": "~pandas.CategoricalIndex",
125 | "TimedeltaIndex": "~pandas.TimedeltaIndex",
126 | "DatetimeIndex": "~pandas.DatetimeIndex",
127 | "Series": "~pandas.Series",
128 | "DataFrame": "~pandas.DataFrame",
129 | "Categorical": "~pandas.Categorical",
130 | "Path": "~~pathlib.Path",
131 | # objects with abbreviated namespace (from pandas)
132 | "pd.Index": "~pandas.Index",
133 | "pd.NaT": "~pandas.NaT",
134 | }
135 |
136 | # The suffix of source filenames.
137 | source_suffix = ".rst"
138 |
139 | # The encoding of source files.
140 | # source_encoding = 'utf-8-sig'
141 |
142 | # The master toctree document.
143 | master_doc = "index"
144 |
145 | # General information about the project.
146 | project = "Datatree"
147 | copyright = "2021 onwards, Tom Nicholas and its Contributors"
148 | author = "Tom Nicholas"
149 |
150 | html_show_sourcelink = True
151 | srclink_project = "https://github.com/xarray-contrib/datatree"
152 | srclink_branch = "main"
153 | srclink_src_path = "docs/source"
154 |
155 | # The version info for the project you're documenting, acts as replacement for
156 | # |version| and |release|, also used in various other places throughout the
157 | # built documents.
158 | #
159 | # The short X.Y version.
160 | version = datatree.__version__
161 | # The full version, including alpha/beta/rc tags.
162 | release = datatree.__version__
163 |
164 | # The language for content autogenerated by Sphinx. Refer to documentation
165 | # for a list of supported languages.
166 | # language = None
167 |
168 | # There are two options for replacing |today|: either, you set today to some
169 | # non-false value, then it is used:
170 | # today = ''
171 | # Else, today_fmt is used as the format for a strftime call.
172 | # today_fmt = '%B %d, %Y'
173 |
174 | # List of patterns, relative to source directory, that match files and
175 | # directories to ignore when looking for source files.
176 | exclude_patterns = ["_build"]
177 |
178 | # The reST default role (used for this markup: `text`) to use for all documents.
179 | # default_role = None
180 |
181 | # If true, '()' will be appended to :func: etc. cross-reference text.
182 | # add_function_parentheses = True
183 |
184 | # If true, the current module name will be prepended to all description
185 | # unit titles (such as .. function::).
186 | # add_module_names = True
187 |
188 | # If true, sectionauthor and moduleauthor directives will be shown in the
189 | # output. They are ignored by default.
190 | # show_authors = False
191 |
192 | # The name of the Pygments (syntax highlighting) style to use.
193 | pygments_style = "sphinx"
194 |
195 | # A list of ignored prefixes for module index sorting.
196 | # modindex_common_prefix = []
197 |
198 | # If true, keep warnings as "system message" paragraphs in the built documents.
199 | # keep_warnings = False
200 |
201 |
202 | # -- Intersphinx links ---------------------------------------------------------
203 |
204 | intersphinx_mapping = {
205 | "python": ("https://docs.python.org/3.8/", None),
206 | "numpy": ("https://numpy.org/doc/stable", None),
207 | "xarray": ("https://xarray.pydata.org/en/stable/", None),
208 | }
209 |
210 | # -- Options for HTML output ---------------------------------------------------
211 |
212 | # The theme to use for HTML and HTML Help pages. See the documentation for
213 | # a list of builtin themes.
214 | html_theme = "sphinx_book_theme"
215 |
216 | # Theme options are theme-specific and customize the look and feel of a theme
217 | # further. For a list of options available for each theme, see the
218 | # documentation.
219 | html_theme_options = {
220 | "repository_url": "https://github.com/xarray-contrib/datatree",
221 | "repository_branch": "main",
222 | "path_to_docs": "docs/source",
223 | "use_repository_button": True,
224 | "use_issues_button": True,
225 | "use_edit_page_button": True,
226 | }
227 |
228 | # Add any paths that contain custom themes here, relative to this directory.
229 | # html_theme_path = []
230 |
231 | # The name for this set of Sphinx documents. If None, it defaults to
232 | # " v documentation".
233 | # html_title = None
234 |
235 | # A shorter title for the navigation bar. Default is the same as html_title.
236 | # html_short_title = None
237 |
238 | # The name of an image file (relative to this directory) to place at the top
239 | # of the sidebar.
240 | # html_logo = None
241 |
242 | # The name of an image file (within the static path) to use as favicon of the
243 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
244 | # pixels large.
245 | # html_favicon = None
246 |
247 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
248 | # using the given strftime format.
249 | # html_last_updated_fmt = '%b %d, %Y'
250 |
251 | # If true, SmartyPants will be used to convert quotes and dashes to
252 | # typographically correct entities.
253 | # html_use_smartypants = True
254 |
255 | # Custom sidebar templates, maps document names to template names.
256 | # html_sidebars = {}
257 |
258 | # Additional templates that should be rendered to pages, maps page names to
259 | # template names.
260 | # html_additional_pages = {}
261 |
262 | # If false, no module index is generated.
263 | # html_domain_indices = True
264 |
265 | # If false, no index is generated.
266 | # html_use_index = True
267 |
268 | # If true, the index is split into individual pages for each letter.
269 | # html_split_index = False
270 |
271 | # If true, links to the reST sources are added to the pages.
272 | # html_show_sourcelink = True
273 |
274 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
275 | # html_show_sphinx = True
276 |
277 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
278 | # html_show_copyright = True
279 |
280 | # If true, an OpenSearch description file will be output, and all pages will
281 | # contain a tag referring to it. The value of this option must be the
282 | # base URL from which the finished HTML is served.
283 | # html_use_opensearch = ''
284 |
285 | # This is the file name suffix for HTML files (e.g. ".xhtml").
286 | # html_file_suffix = None
287 |
288 | # Output file base name for HTML help builder.
289 | htmlhelp_basename = "datatree_doc"
290 |
291 |
292 | # -- Options for LaTeX output --------------------------------------------------
293 |
294 | latex_elements = {
295 | # The paper size ('letterpaper' or 'a4paper').
296 | # 'papersize': 'letterpaper',
297 | # The font size ('10pt', '11pt' or '12pt').
298 | # 'pointsize': '10pt',
299 | # Additional stuff for the LaTeX preamble.
300 | # 'preamble': '',
301 | }
302 |
303 | # Grouping the document tree into LaTeX files. List of tuples
304 | # (source start file, target name, title, author, documentclass [howto/manual]).
305 | latex_documents = [
306 | ("index", "datatree.tex", "Datatree Documentation", author, "manual")
307 | ]
308 |
309 | # The name of an image file (relative to this directory) to place at the top of
310 | # the title page.
311 | # latex_logo = None
312 |
313 | # For "manual" documents, if this is true, then toplevel headings are parts,
314 | # not chapters.
315 | # latex_use_parts = False
316 |
317 | # If true, show page references after internal links.
318 | # latex_show_pagerefs = False
319 |
320 | # If true, show URL addresses after external links.
321 | # latex_show_urls = False
322 |
323 | # Documents to append as an appendix to all manuals.
324 | # latex_appendices = []
325 |
326 | # If false, no module index is generated.
327 | # latex_domain_indices = True
328 |
329 |
330 | # -- Options for manual page output --------------------------------------------
331 |
332 | # One entry per manual page. List of tuples
333 | # (source start file, name, description, authors, manual section).
334 | man_pages = [("index", "datatree", "Datatree Documentation", [author], 1)]
335 |
336 | # If true, show URL addresses after external links.
337 | # man_show_urls = False
338 |
339 |
340 | # -- Options for Texinfo output ------------------------------------------------
341 |
342 | # Grouping the document tree into Texinfo files. List of tuples
343 | # (source start file, target name, title, author,
344 | # dir menu entry, description, category)
345 | texinfo_documents = [
346 | (
347 | "index",
348 | "datatree",
349 | "Datatree Documentation",
350 | author,
351 | "datatree",
352 | "Tree-like hierarchical data structure for xarray.",
353 | "Miscellaneous",
354 | )
355 | ]
356 |
357 | # Documents to append as an appendix to all manuals.
358 | # texinfo_appendices = []
359 |
360 | # If false, no module index is generated.
361 | # texinfo_domain_indices = True
362 |
363 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
364 | # texinfo_show_urls = 'footnote'
365 |
366 | # If true, do not generate a @detailmenu in the "Top" node's menu.
367 | # texinfo_no_detailmenu = False
368 |
369 |
370 | # based on numpy doc/source/conf.py
371 | def linkcode_resolve(domain, info):
372 | """
373 | Determine the URL corresponding to Python object
374 | """
375 | if domain != "py":
376 | return None
377 |
378 | modname = info["module"]
379 | fullname = info["fullname"]
380 |
381 | submod = sys.modules.get(modname)
382 | if submod is None:
383 | return None
384 |
385 | obj = submod
386 | for part in fullname.split("."):
387 | try:
388 | obj = getattr(obj, part)
389 | except AttributeError:
390 | return None
391 |
392 | try:
393 | fn = inspect.getsourcefile(inspect.unwrap(obj))
394 | except TypeError:
395 | fn = None
396 | if not fn:
397 | return None
398 |
399 | try:
400 | source, lineno = inspect.getsourcelines(obj)
401 | except OSError:
402 | lineno = None
403 |
404 | if lineno:
405 | linespec = f"#L{lineno}-L{lineno + len(source) - 1}"
406 | else:
407 | linespec = ""
408 |
409 | fn = os.path.relpath(fn, start=os.path.dirname(datatree.__file__))
410 |
411 | if "+" in datatree.__version__:
412 | return f"https://github.com/xarray-contrib/datatree/blob/main/datatree/{fn}{linespec}"
413 | else:
414 | return (
415 | f"https://github.com/xarray-contrib/datatree/blob/"
416 | f"v{datatree.__version__}/datatree/{fn}{linespec}"
417 | )
418 |
--------------------------------------------------------------------------------
/docs/source/whats-new.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: datatree
2 |
3 | What's New
4 | ==========
5 |
6 | .. ipython:: python
7 | :suppress:
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import xarray as xray
12 | import xarray
13 | import xarray as xr
14 | import datatree
15 |
16 | np.random.seed(123456)
17 |
18 | .. _whats-new.v0.0.14:
19 |
20 | v0.0.14 (unreleased)
21 | --------------------
22 |
23 | New Features
24 | ~~~~~~~~~~~~
25 |
26 | Breaking changes
27 | ~~~~~~~~~~~~~~~~
28 |
29 | - Renamed `DataTree.lineage` to `DataTree.parents` to match `pathlib` vocabulary
30 | (:issue:`283`, :pull:`286`)
31 | - Minimum required version of xarray is now 2023.12.0, i.e. the latest version.
32 | This is required to prevent recent changes to xarray's internals from breaking datatree.
33 | (:issue:`293`, :pull:`294`)
34 | By `Tom Nicholas `_.
35 | - Change default write mode of :py:meth:`DataTree.to_zarr` to ``'w-'`` to match ``xarray``
36 | default and prevent accidental directory overwrites. (:issue:`274`, :pull:`275`)
37 | By `Sam Levang `_.
38 |
39 | Deprecations
40 | ~~~~~~~~~~~~
41 |
42 | - Renamed `DataTree.lineage` to `DataTree.parents` to match `pathlib` vocabulary
43 | (:issue:`283`, :pull:`286`). `lineage` is now deprecated and use of `parents` is encouraged.
44 | By `Etienne Schalk `_.
45 |
46 | Bug fixes
47 | ~~~~~~~~~
48 | - Keep attributes on nodes containing no data in :py:func:`map_over_subtree`. (:issue:`278`, :pull:`279`)
49 | By `Sam Levang `_.
50 |
51 | Documentation
52 | ~~~~~~~~~~~~~
53 | - Use ``napoleon`` instead of ``numpydoc`` to align with xarray documentation
54 | (:issue:`284`, :pull:`298`).
55 | By `Etienne Schalk `_.
56 |
57 | Internal Changes
58 | ~~~~~~~~~~~~~~~~
59 |
60 | .. _whats-new.v0.0.13:
61 |
62 | v0.0.13 (27/10/2023)
63 | --------------------
64 |
65 | New Features
66 | ~~~~~~~~~~~~
67 |
68 | - New :py:meth:`DataTree.match` method for glob-like pattern matching of node paths. (:pull:`267`)
69 | By `Tom Nicholas `_.
70 | - New :py:meth:`DataTree.is_hollow` property for checking if data is only contained at the leaf nodes. (:pull:`272`)
71 | By `Tom Nicholas `_.
72 | - Indicate which node caused the problem if error encountered while applying user function using :py:func:`map_over_subtree`
73 | (:issue:`190`, :pull:`264`). Only works when using python 3.11 or later.
74 | By `Tom Nicholas `_.
75 |
76 | Breaking changes
77 | ~~~~~~~~~~~~~~~~
78 |
79 | - Nodes containing only attributes but no data are now ignored by :py:func:`map_over_subtree` (:issue:`262`, :pull:`263`)
80 | By `Tom Nicholas `_.
81 | - Disallow altering of given dataset inside function called by :py:func:`map_over_subtree` (:pull:`269`, reverts part of :pull:`194`).
82 | By `Tom Nicholas `_.
83 |
84 | Bug fixes
85 | ~~~~~~~~~
86 |
87 | - Fix unittests on i386. (:pull:`249`)
88 | By `Antonio Valentino `_.
89 | - Ensure nodepath class is compatible with python 3.12 (:pull:`260`)
90 | By `Max Grover `_.
91 |
92 | Documentation
93 | ~~~~~~~~~~~~~
94 |
95 | - Added new sections to page on ``Working with Hierarchical Data`` (:pull:`180`)
96 | By `Tom Nicholas `_.
97 |
98 | Internal Changes
99 | ~~~~~~~~~~~~~~~~
100 |
101 | * No longer use the deprecated `distutils` package.
102 |
103 | .. _whats-new.v0.0.12:
104 |
105 | v0.0.12 (03/07/2023)
106 | --------------------
107 |
108 | New Features
109 | ~~~~~~~~~~~~
110 |
111 | - Added a :py:func:`DataTree.level`, :py:func:`DataTree.depth`, and :py:func:`DataTree.width` property (:pull:`208`).
112 | By `Tom Nicholas `_.
113 | - Allow dot-style (or "attribute-like") access to child nodes and variables, with ipython autocomplete. (:issue:`189`, :pull:`98`)
114 | By `Tom Nicholas `_.
115 |
116 | Breaking changes
117 | ~~~~~~~~~~~~~~~~
118 |
119 | Deprecations
120 | ~~~~~~~~~~~~
121 |
122 | - Dropped support for python 3.8 (:issue:`212`, :pull:`214`)
123 | By `Tom Nicholas `_.
124 |
125 | Bug fixes
126 | ~~~~~~~~~
127 |
128 | - Allow for altering of given dataset inside function called by :py:func:`map_over_subtree` (:issue:`188`, :pull:`194`).
129 | By `Tom Nicholas `_.
130 | - copy subtrees without creating ancestor nodes (:pull:`201`)
131 | By `Justus Magin `_.
132 |
133 | Documentation
134 | ~~~~~~~~~~~~~
135 |
136 | Internal Changes
137 | ~~~~~~~~~~~~~~~~
138 |
139 | .. _whats-new.v0.0.11:
140 |
141 | v0.0.11 (01/09/2023)
142 | --------------------
143 |
144 | Big update with entirely new pages in the docs,
145 | new methods (``.drop_nodes``, ``.filter``, ``.leaves``, ``.descendants``), and bug fixes!
146 |
147 | New Features
148 | ~~~~~~~~~~~~
149 |
150 | - Added a :py:meth:`DataTree.drop_nodes` method (:issue:`161`, :pull:`175`).
151 | By `Tom Nicholas `_.
152 | - New, more specific exception types for tree-related errors (:pull:`169`).
153 | By `Tom Nicholas `_.
154 | - Added a new :py:meth:`DataTree.descendants` property (:pull:`170`).
155 | By `Tom Nicholas `_.
156 | - Added a :py:meth:`DataTree.leaves` property (:pull:`177`).
157 | By `Tom Nicholas `_.
158 | - Added a :py:meth:`DataTree.filter` method (:pull:`184`).
159 | By `Tom Nicholas `_.
160 |
161 | Breaking changes
162 | ~~~~~~~~~~~~~~~~
163 |
164 | - :py:meth:`DataTree.copy` copy method now only copies the subtree, not the parent nodes (:pull:`171`).
165 | By `Tom Nicholas `_.
166 | - Grafting a subtree onto another tree now leaves name of original subtree object unchanged (:issue:`116`, :pull:`172`, :pull:`178`).
167 | By `Tom Nicholas `_.
168 | - Changed the :py:meth:`DataTree.assign` method to just work on the local node (:pull:`181`).
169 | By `Tom Nicholas `_.
170 |
171 | Deprecations
172 | ~~~~~~~~~~~~
173 |
174 | Bug fixes
175 | ~~~~~~~~~
176 |
177 | - Fix bug with :py:meth:`DataTree.relative_to` method (:issue:`133`, :pull:`160`).
178 | By `Tom Nicholas `_.
179 | - Fix links to API docs in all documentation (:pull:`183`).
180 | By `Tom Nicholas `_.
181 |
182 | Documentation
183 | ~~~~~~~~~~~~~
184 |
185 | - Changed docs theme to match xarray's main documentation. (:pull:`173`)
186 | By `Tom Nicholas `_.
187 | - Added ``Terminology`` page. (:pull:`174`)
188 | By `Tom Nicholas `_.
189 | - Added page on ``Working with Hierarchical Data`` (:pull:`179`)
190 | By `Tom Nicholas `_.
191 | - Added context content to ``Index`` page (:pull:`182`)
192 | By `Tom Nicholas `_.
193 | - Updated the README (:pull:`187`)
194 | By `Tom Nicholas `_.
195 |
196 | Internal Changes
197 | ~~~~~~~~~~~~~~~~
198 |
199 |
200 | .. _whats-new.v0.0.10:
201 |
202 | v0.0.10 (12/07/2022)
203 | --------------------
204 |
205 | Adds accessors and a `.pipe()` method.
206 |
207 | New Features
208 | ~~~~~~~~~~~~
209 |
210 | - Add the ability to register accessors on ``DataTree`` objects, by using ``register_datatree_accessor``. (:pull:`144`)
211 | By `Tom Nicholas `_.
212 | - Allow method chaining with a new :py:meth:`DataTree.pipe` method (:issue:`151`, :pull:`156`).
213 | By `Justus Magin `_.
214 |
215 | Breaking changes
216 | ~~~~~~~~~~~~~~~~
217 |
218 | Deprecations
219 | ~~~~~~~~~~~~
220 |
221 | Bug fixes
222 | ~~~~~~~~~
223 |
224 | - Allow ``Datatree`` objects as values in :py:meth:`DataTree.from_dict` (:pull:`159`).
225 | By `Justus Magin `_.
226 |
227 | Documentation
228 | ~~~~~~~~~~~~~
229 |
230 | - Added ``Reading and Writing Files`` page. (:pull:`158`)
231 | By `Tom Nicholas `_.
232 |
233 | Internal Changes
234 | ~~~~~~~~~~~~~~~~
235 |
236 | - Avoid reading from same file twice with fsspec3 (:pull:`130`)
237 | By `William Roberts `_.
238 |
239 |
240 | .. _whats-new.v0.0.9:
241 |
242 | v0.0.9 (07/14/2022)
243 | -------------------
244 |
245 | New Features
246 | ~~~~~~~~~~~~
247 |
248 | Breaking changes
249 | ~~~~~~~~~~~~~~~~
250 |
251 | Deprecations
252 | ~~~~~~~~~~~~
253 |
254 | Bug fixes
255 | ~~~~~~~~~
256 |
257 | Documentation
258 | ~~~~~~~~~~~~~
259 | - Switch docs theme (:pull:`123`).
260 | By `JuliusBusecke `_.
261 |
262 | Internal Changes
263 | ~~~~~~~~~~~~~~~~
264 |
265 |
266 | .. _whats-new.v0.0.7:
267 |
268 | v0.0.7 (07/11/2022)
269 | -------------------
270 |
271 | New Features
272 | ~~~~~~~~~~~~
273 |
274 | - Improve the HTML repr by adding tree-style lines connecting groups and sub-groups (:pull:`109`).
275 | By `Benjamin Woods `_.
276 |
277 | Breaking changes
278 | ~~~~~~~~~~~~~~~~
279 |
280 | - The ``DataTree.ds`` attribute now returns a view onto an immutable Dataset-like object, instead of an actual instance
281 | of ``xarray.Dataset``. This make break existing ``isinstance`` checks or ``assert`` comparisons. (:pull:`99`)
282 | By `Tom Nicholas `_.
283 |
284 | Deprecations
285 | ~~~~~~~~~~~~
286 |
287 | Bug fixes
288 | ~~~~~~~~~
289 |
290 | - Modifying the contents of a ``DataTree`` object via the ``DataTree.ds`` attribute is now forbidden, which prevents
291 | any possibility of the contents of a ``DataTree`` object and its ``.ds`` attribute diverging. (:issue:`38`, :pull:`99`)
292 | By `Tom Nicholas `_.
293 | - Fixed a bug so that names of children now always match keys under which parents store them (:pull:`99`).
294 | By `Tom Nicholas `_.
295 |
296 | Documentation
297 | ~~~~~~~~~~~~~
298 |
299 | - Added ``Data Structures`` page describing the internal structure of a ``DataTree`` object, and its relation to
300 | ``xarray.Dataset`` objects. (:pull:`103`)
301 | By `Tom Nicholas `_.
302 | - API page updated with all the methods that are copied from ``xarray.Dataset``. (:pull:`41`)
303 | By `Tom Nicholas `_.
304 |
305 | Internal Changes
306 | ~~~~~~~~~~~~~~~~
307 |
308 | - Refactored ``DataTree`` class to store a set of ``xarray.Variable`` objects instead of a single ``xarray.Dataset``.
309 | This approach means that the ``DataTree`` class now effectively copies and extends the internal structure of
310 | ``xarray.Dataset``. (:pull:`41`)
311 | By `Tom Nicholas `_.
312 | - Refactored to use intermediate ``NamedNode`` class, separating implementation of methods requiring a ``name``
313 | attribute from those not requiring it.
314 | By `Tom Nicholas `_.
315 | - Made ``testing.test_datatree.create_test_datatree`` into a pytest fixture (:pull:`107`).
316 | By `Benjamin Woods `_.
317 |
318 |
319 |
320 | .. _whats-new.v0.0.6:
321 |
322 | v0.0.6 (06/03/2022)
323 | -------------------
324 |
325 | Various small bug fixes, in preparation for more significant changes in the next version.
326 |
327 | Bug fixes
328 | ~~~~~~~~~
329 |
330 | - Fixed bug with checking that assigning parent or new children did not create a loop in the tree (:pull:`105`)
331 | By `Tom Nicholas `_.
332 | - Do not call ``__exit__`` on Zarr store when opening (:pull:`90`)
333 | By `Matt McCormick `_.
334 | - Fix netCDF encoding for compression (:pull:`95`)
335 | By `Joe Hamman `_.
336 | - Added validity checking for node names (:pull:`106`)
337 | By `Tom Nicholas `_.
338 |
339 | .. _whats-new.v0.0.5:
340 |
341 | v0.0.5 (05/05/2022)
342 | -------------------
343 |
344 | - Major refactor of internals, moving from the ``DataTree.children`` attribute being a ``Tuple[DataTree]`` to being a
345 | ``OrderedDict[str, DataTree]``. This was necessary in order to integrate better with xarray's dictionary-like API,
346 | solve several issues, simplify the code internally, remove dependencies, and enable new features. (:pull:`76`)
347 | By `Tom Nicholas `_.
348 |
349 | New Features
350 | ~~~~~~~~~~~~
351 |
352 | - Syntax for accessing nodes now supports file-like paths, including parent nodes via ``"../"``, relative paths, the
353 | root node via ``"/"``, and the current node via ``"."``. (Internally it actually uses ``pathlib`` now.)
354 | By `Tom Nicholas `_.
355 | - New path-like API methods, such as ``.relative_to``, ``.find_common_ancestor``, and ``.same_tree``.
356 | - Some new dictionary-like methods, such as ``DataTree.get`` and ``DataTree.update``. (:pull:`76`)
357 | By `Tom Nicholas `_.
358 | - New HTML repr, which will automatically display in a jupyter notebook. (:pull:`78`)
359 | By `Tom Nicholas `_.
360 | - New delitem method so you can delete nodes. (:pull:`88`)
361 | By `Tom Nicholas `_.
362 | - New ``to_dict`` method. (:pull:`82`)
363 | By `Tom Nicholas `_.
364 |
365 | Breaking changes
366 | ~~~~~~~~~~~~~~~~
367 |
368 | - Node names are now optional, which means that the root of the tree can be unnamed. This has knock-on effects for
369 | a lot of the API.
370 | - The ``__init__`` signature for ``DataTree`` has changed, so that ``name`` is now an optional kwarg.
371 | - Files will now be loaded as a slightly different tree, because the root group no longer needs to be given a default
372 | name.
373 | - Removed tag-like access to nodes.
374 | - Removes the option to delete all data in a node by assigning None to the node (in favour of deleting data by replacing
375 | the node's ``.ds`` attribute with an empty Dataset), or to create a new empty node in the same way (in favour of
376 | assigning an empty DataTree object instead).
377 | - Removes the ability to create a new node by assigning a ``Dataset`` object to ``DataTree.__setitem__``.
378 | - Several other minor API changes such as ``.pathstr`` -> ``.path``, and ``from_dict``'s dictionary argument now being
379 | required. (:pull:`76`)
380 | By `Tom Nicholas `_.
381 |
382 | Deprecations
383 | ~~~~~~~~~~~~
384 |
385 | - No longer depends on the anytree library (:pull:`76`)
386 | By `Tom Nicholas `_.
387 |
388 | Bug fixes
389 | ~~~~~~~~~
390 |
391 | - Fixed indentation issue with the string repr (:pull:`86`)
392 | By `Tom Nicholas `_.
393 |
394 | Documentation
395 | ~~~~~~~~~~~~~
396 |
397 | - Quick-overview page updated to match change in path syntax (:pull:`76`)
398 | By `Tom Nicholas `_.
399 |
400 | Internal Changes
401 | ~~~~~~~~~~~~~~~~
402 |
403 | - Basically every file was changed in some way to accommodate (:pull:`76`).
404 | - No longer need the utility functions for string manipulation that were defined in ``utils.py``.
405 | - A considerable amount of code copied over from the internals of anytree (e.g. in ``render.py`` and ``iterators.py``).
406 | The Apache license for anytree has now been bundled with datatree. (:pull:`76`).
407 | By `Tom Nicholas `_.
408 |
409 | .. _whats-new.v0.0.4:
410 |
411 | v0.0.4 (31/03/2022)
412 | -------------------
413 |
414 | - Ensure you get the pretty tree-like string representation by default in ipython (:pull:`73`).
415 | By `Tom Nicholas `_.
416 | - Now available on conda-forge (as xarray-datatree)! (:pull:`71`)
417 | By `Anderson Banihirwe `_.
418 | - Allow for python 3.8 (:pull:`70`).
419 | By `Don Setiawan `_.
420 |
421 | .. _whats-new.v0.0.3:
422 |
423 | v0.0.3 (30/03/2022)
424 | -------------------
425 |
426 | - First released version available on both pypi (as xarray-datatree)!
427 |
--------------------------------------------------------------------------------