├── .github
    └── workflows
    │   └── fair-software.yml
├── .gitignore
├── CHANGELOG.md
├── CITATION.cff
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── environment.yml
├── example
    ├── data.json
    ├── external.json
    ├── external_group.json
    └── external_var.json
├── pyproject.toml
├── setup.cfg
├── src
    └── json2netcdf
    │   ├── __init__.py
    │   ├── __main__.py
    │   └── json2netcdf.py
└── tests
    └── test_json2netcdf.py


/.github/workflows/fair-software.yml:
--------------------------------------------------------------------------------
 1 | name: fair-software
 2 | on: push
 3 | 
 4 | jobs:
 5 |   verify:
 6 |     name: "fair-software"
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: fair-software/howfairis-github-action@main
10 |         name: Measure compliance with fair-software.eu recommendations
11 |         env:
12 |           PYCHARM_HOSTED: "Trick colorama into displaying colored output" 
13 |         with:
14 |           MY_REPO_URL: "https://github.com/${{ github.repository }}"
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Test files
 2 | test.json
 3 | 
 4 | # Generated data files
 5 | *.nc
 6 | 
 7 | # IDE / editor files
 8 | *.sublime-project
 9 | *.sublime-workspace
10 | .idea/
11 | .ipynb_checkpoints/
12 | 
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 | 
18 | # C extensions
19 | *.so
20 | 
21 | # Distribution / packaging
22 | .Python
23 | build/
24 | develop-eggs/
25 | dist/
26 | downloads/
27 | eggs/
28 | .eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | wheels/
35 | share/python-wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | MANIFEST
40 | .pytest_cache/
41 | 
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 | 
46 | # Logging
47 | *.log
48 | 
49 | # Environments
50 | .env
51 | .venv
52 | env/
53 | venv/
54 | ENV/
55 | env.bak/
56 | venv.bak/
57 | 
58 | # Misc
59 | scratch/


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | Notable changes to json2netcdf will be documented here.
 3 | 
 4 | ## [Unreleased]
 5 | 
 6 | ## [1.1.2] - 2022-03-27
 7 | 
 8 | ### Fixed
 9 | 
10 | - Issue with dependencies not being specified in `setup.cfg`.
11 | 
12 | ## [1.1.0] - 2022-03-27
13 | 
14 | This release introduces a Python package alongside the previous command line interface, allowing easy use of json2netcdf from within Python programs. A number of additions have been made to the repository to enable this. The command line interface remains the same as previously.
15 | 
16 | ## [1.0] - 2020-11-23
17 | 
18 | First major release.
19 | 
20 | [Unreleased]: https://github.com/samharrison7/json2netcdf/tree/develop 
21 | [1.1.2]: https://github.com/samharrison7/json2netcdf/tree/1.1.2
22 | [1.1.0]: https://github.com/samharrison7/json2netcdf/tree/1.1.0
23 | [1.0]: https://github.com/samharrison7/json2netcdf/tree/1.0 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | title: json2netcdf
 3 | message: 'If you use this model, please cite it.'
 4 | type: software
 5 | abstract: >-
 6 |   A Python package that converts well-formatted JSON
 7 |   into NetCDF4 files.
 8 | authors:
 9 |   - given-names: Sam
10 |     family-names: Harrison
11 |     email: sharrison@ceh.ac.uk
12 |     affiliation: UK Centre for Ecology & Hydrology
13 |     orcid: 'https://orcid.org/0000-0001-8491-4720'
14 | license: BSD-3-Clause
15 | url: https://github.com/samharrison7/json2netcdf.git


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution Guide
 2 | 
 3 | Thanks for considering contributing to json2netcdf. We very much welcome your input.
 4 | 
 5 | ## Bug reports
 6 | 
 7 | Whilst bug reports are very welcome, we strongly encourage pull requests to accompany these. Reports should be made via the issue tracker and should include enough information to be fully reproducible. This includes information on the system and compiler you are using, as well as detailed information about what you were doing that caused the bug.
 8 | 
 9 | ## New and updated features
10 | 
11 | We welcome new and updated features via pull requests. Only completed features that do not require any further coding to become functional will be accepted. If you have an idea for a feature but don't have the time to code it, then consider raising a feature request issue. If you have some code towards a feature but won't have time to finish it, then raise an issue highlighting this, and if we think the feature is valuable enough, we may open a new feature branch for you to submit your code to.
12 | 
13 | ## Which branch?
14 | 
15 | Please send pull requests for all contributions (bug reports and features) to the `develop` branch.
16 | 
17 | ## Security vulnerabilities
18 | 
19 | If you discover a security vulnerability, please send an email to Sam Harrison at [samharrison.xg@gmail.com](mailto:samharrison.xg@gmail.com). Please do not submit an issue with the vulnerability.
20 | 
21 | ## Coding style
22 | 
23 | Please use pep8 and comment your code well. Remember that code is (probably) read more often than it is written.


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Sam Harrison
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # json2netcdf
  2 | 
  3 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4286216.svg)](https://doi.org/10.5281/zenodo.4286216)
  4 | [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8B-yellow)](https://fair-software.eu)
  5 | [![PyPI version](https://badge.fury.io/py/json2netcdf.svg)](https://badge.fury.io/py/json2netcdf)
  6 | 
  7 | json2netcdf is a Python package to convert JSON data into NetCDF4 data. The motivation? A quick and easy way to write NetCDF input files without having to hand-craft a script to do so. JSON files are simple, easy to understand and write, and, crucially, follow a hierarchical format.
  8 | 
  9 | Features:
 10 | - Programmatic and command line interfaces.
 11 | - Converts well-formatted JSON files and Python dictionaries to NetCDF files.
 12 | - NetCDF files can be physical or in-memory (diskless).
 13 | - Nested JSON files can be specified.
 14 | - Internally uses the Python `netCDF4` package and returns `Dataset` objects.
 15 | - Groups, attributes, dimensions, variables and multiple datatypes are supported.
 16 | 
 17 | ## Getting started
 18 | 
 19 | You can use pip to install json2netcdf as a progammatic and command line interface:
 20 | 
 21 | ```bash
 22 | $ pip install json2netcdf
 23 | ```
 24 | 
 25 | A Conda environment file is also provided with the required libraries for developing or extending the package.
 26 | 
 27 | ```bash
 28 | $ conda env create -f environment.yml
 29 | $ conda activate json2netcdf
 30 | ```
 31 | 
 32 | ## Usage
 33 | 
 34 | The package has one main method, `convert`, which does the file/data conversion. See below for the required formating for JSON files.
 35 | 
 36 | ```python
 37 | >>> import json2netcdf
 38 | >>> json2netcdf.convert(from_json={'my_var': 42}, diskless=True)
 39 | <class 'netCDF4._netCDF4.Dataset'>
 40 | root group (NETCDF4 data model, file format HDF5):
 41 |     dimensions(sizes):
 42 |     variables(dimensions): int64 my_var()
 43 |     groups:
 44 | ```
 45 | 
 46 | `from_json` can be a Python dictionary or the path to a JSON file. Set `diskless` to `True` for an in-memory NetCDF dataset to be returned (default is `False`). `to_netcdf` can be used to specify the location of the output NetCDF file you want (defaults to `data.nc`). The `convert` method can be used as a context manager, and if it isn't, the user is responsible for closing the returned dataset (`nc_file.close()`). Using the [example/data.json](https://github.com/samharrison7/json2netcdf/blob/develop/example/data.json) file:
 47 | 
 48 | ```python
 49 | >>> with json2netcdf.convert(from_json='example/data.json', to_netcdf='data.nc') as nc_file:
 50 | ...     nc_file['var_group']['spatial_var']
 51 | ...
 52 | <class 'netCDF4._netCDF4.Variable'>
 53 | int64 spatial_var(x, y)
 54 | path = /var_group
 55 | unlimited dimensions:
 56 | current shape = (2, 2)
 57 | filling on, default _FillValue of -9223372036854775806 used
 58 | ``` 
 59 | 
 60 | For more information on using the returned NetCDF `Dataset` object, see the [netCDF4 library documentation](https://unidata.github.io/netcdf4-python/).
 61 | 
 62 | ### Command line interface
 63 | 
 64 | There is a command line interface which acts as a wrapper around `json2netcdf.convert`. It requires you to specify paths to the input JSON file and output NetCDF file:
 65 | 
 66 | ```
 67 | usage: json2netcdf [-h] [-v] input output
 68 | 
 69 | Convert JSON to NetCDF files.
 70 | 
 71 | positional arguments:
 72 |   input          path to the input JSON file
 73 |   output         path to store the output NetCDF file to
 74 | 
 75 | optional arguments:
 76 |   -h, --help     show this help message and exit
 77 |   -v, --verbose  make terminal output more verbose
 78 | ```
 79 | 
 80 | ## JSON input format
 81 | 
 82 | Your JSON data must be well formatted, following the conventions described below. Take a look at the example JSON file at [example/data.json](https://github.com/samharrison7/json2netcdf/blob/develop/example/data.json) for an idea of how to format your JSON file. In this example, we are trying to create a NetCDF file with the following data structure:
 83 | 
 84 | ```
 85 | var_group (group)
 86 |     spatial_var = [[1, 2], [3, 4]]
 87 |     spatiotemporal_var = [[[1.1, 1.2], [1.3, 1.4]], [[1.5, 1.6], [1.7, 1.8]]]
 88 | scalar_var = 42.0
 89 | ```
 90 | 
 91 | ### Dimensions
 92 | 
 93 | The file will have one group `var_group`, with two variables: `spatial_var` is a 2D array and `spatiotemporal_var` is a 3D array. There is also a scalar variable `scalar_var` which only belongs to the root group. As this is a NetCDF file, we need to specify dimensions for the variables, so let's say that `spatial_var` has `(x,y)` dimensions, and `spatiotemporal_var` has `(x,y,t)` dimensions. In this example, each of these has a size of 2. To define this is the JSON file, we create a `dimensions` object:
 94 | 
 95 | ```json
 96 | {
 97 |     "dimensions" : {
 98 |         "x" : 2, "y" : 2, "t": 2
 99 |     }
100 | }
101 | ```
102 | 
103 | ### Groups and variables
104 | 
105 | We can now create an object for the `var_group` and place these variables in it. The square bracket notation is used to tell the script what dimensions your variables have. We will also create `scalar_var` in the root group, which doesn't have any dimensions associated to it:
106 | 
107 | ```json
108 | {
109 |     "dimensions" : {
110 |         "x" : 2, "y" : 2, "t": 2
111 |     },
112 |     "var_group" : {
113 |         "spatial_var[x,y]" : [[1, 2], [3, 4]],
114 |         "spatiotemporal_var[x,y,t]" : [[[1.1, 1.2], [1.3, 1.4]], [[1.5, 1.6], [1.7, 1.8]]]
115 |     },
116 |     "scalar_var": 42.0
117 | }
118 | ```
119 | 
120 | Here, the dimensions are available from the root group (i.e. to all groups in the NetCDF file's hierarchy). If you want to add dimensions specifically for certain groups, you can include a `dimensions` object within that group.
121 | 
122 | ### Datatype 
123 | 
124 | The datatype of the variable will be automatically deduced. In this example, `spatial_var` will have a datatype of `int64`, and the other variables will have a datatype of `double`. Internally, NumPy is responsible for deducing the variable type and at this moment in time, there is no way to specify what datatype your variable is ([pull requests are welcome!](https://github.com/samharrison7/json2netcdf/blob/develop/CONTRIBUTING.md))
125 | 
126 | ### Attributes
127 | 
128 | Attributes can be added to the NetCDF file by creating an `attributes` object in the group you wish to add the attributes to. For example, to add attributes to the root group:
129 | 
130 | ```json
131 | {
132 |     "dimensions" : {
133 |         "x" : 2, "y" : 2, "t": 2
134 |     },
135 |     "attributes" : {
136 |         "description" : "Example data file",
137 |         "author" : "Sam Harrison"
138 |     },
139 |     "var_group" : {
140 |         "spatial_var[x,y]" : [[1, 2], [3, 4]],
141 |         "spatiotemporal_var[x,y,t]" : [[[1.1, 1.2], [1.3, 1.4]], [[1.5, 1.6], [1.7, 1.8]]]
142 |     },
143 |     "scalar_var": 42.0
144 | }
145 | ```
146 | 
147 | Attributes cannot yet be added to variables. [Pull requests are welcome!](https://github.com/samharrison7/json2netcdf/blob/develop/CONTRIBUTING.md).
148 | 
149 | ### Multiple input files
150 | 
151 | The main input file specified when running the script can contain reference to other JSON files in its JSON data structure, so that large data sets can be split. The path to the external file must be prefixed with `file::` and the contents of that file will be imported in the same place as the path to the file. Therefore, either variables or entire groups can be imported. An example is given at [example/external.json](https://github.com/samharrison7/json2netcdf/blob/develop/example/external.json):
152 | 
153 | ```json
154 | {
155 |     "dimensions" : {
156 |         "x" : 10,
157 |         "y" : 5
158 |     },
159 |     "external_group" : "file::external_group.json",
160 |     "external_var[x,y]" : "file::external_var.json"
161 | }
162 | ```
163 | 
164 | Bear in mind, if importing an array variable, the dimensions of the array must be present in the parent file. Imported files can themselves include file imports.
165 | 
166 | ## Dict to NetCDF, YAML to NetCDF, TOML to NetCDF...
167 | 
168 | Whilst this library is primarily for JSON to NetCDF4 conversion, you will notice that it is really just a Python dictionary to NetCDF4 converter with the ability to also open JSON files. This means it can be flexibly used to convert other markup languages without too much trouble. For example, say we have the following YAML:
169 | 
170 | ```yaml
171 | dimensions:
172 |   x: 4
173 | var_group:
174 |   my_var[x]: [1, 2, 3, 4]
175 | ```
176 | 
177 | We can use the [PyYAML library](https://pyyaml.org/wiki/PyYAMLDocumentation) to load this as a dict, before converting it to NetCDF:
178 | 
179 | ```python
180 | >>> import yaml
181 | >>> import json2netcdf
182 | >>> data = yaml.load("""
183 | ... dimensions:
184 | ...   x: 4
185 | ... var_group:
186 | ...   my_var[x]: [1, 2, 3, 4]
187 | ... """)
188 | >>> json2netcdf.convert(data, diskless=True)
189 | <class 'netCDF4._netCDF4.Dataset'>
190 | root group (NETCDF4 data model, file format HDF5):
191 |     dimensions(sizes): x(4)
192 |     variables(dimensions):
193 |     groups: var_group
194 | ```
195 | 
196 | ## Limitations
197 | 
198 | This script is a simple way to create NetCDF files from JSON data, and doesn't support the full feature set that NetCDF offer. A few specific limitations are:
199 | - There is no way to specify attributes for variables, only groups.
200 | - Datatypes are implied and cannot be set explicitly.
201 | - Only NetCDF4 files can be created.


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: json2netcdf
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python
 6 |   - pytest
 7 |   - numpy
 8 |   - netCDF4
 9 |   - build
10 |   - twine


--------------------------------------------------------------------------------
/example/data.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"dimensions" : {
 3 | 		"x" : 2,
 4 | 		"y" : 2,
 5 | 		"t" : 2
 6 | 	},
 7 | 	"attributes" : {
 8 | 		"description" : "Example data file",
 9 | 		"author" : "Sam Harrison"
10 | 	},
11 | 	"var_group" : {
12 | 		"spatial_var[x,y]" : [[1, 2], [3, 4]],
13 | 		"spatiotemporal_var[x,y,t]" : [[[1.1, 1.2], [1.3, 1.4]], [[1.5, 1.6], [1.7, 1.8]]]
14 | 	},
15 | 	"scalar_var" : 42.0
16 | }
17 | 


--------------------------------------------------------------------------------
/example/external.json:
--------------------------------------------------------------------------------
1 | {
2 |     "dimensions" : {
3 |         "x" : 10,
4 |         "y" : 5
5 |     },
6 |     "external_group" : "file::external_group.json",
7 |     "external_var[x,y]" : "file::external_var.json"
8 | }
9 | 


--------------------------------------------------------------------------------
/example/external_group.json:
--------------------------------------------------------------------------------
1 | {
2 |     "string_var" : "hello"
3 | }


--------------------------------------------------------------------------------
/example/external_var.json:
--------------------------------------------------------------------------------
 1 | [[0.40651776, 0.90161085, 0.01940297, 0.35544921, 0.18450805],
 2 | [0.04103999, 0.52146902, 0.22269805, 0.10768315, 0.37110202],
 3 | [0.41918195, 0.27591318, 0.33183685, 0.56783628, 0.44206814],
 4 | [0.81542372, 0.34241464, 0.07808869, 0.39533565, 0.24629834],
 5 | [0.38712621, 0.11625967, 0.61545108, 0.56651808, 0.38636763],
 6 | [0.6949791 , 0.00315513, 0.31391201, 0.3685991 , 0.07028275],
 7 | [0.99120702, 0.97580757, 0.68916023, 0.81315521, 0.34397308],
 8 | [0.17172243, 0.83031933, 0.49601777, 0.88932462, 0.03468772],
 9 | [0.32318425, 0.71910279, 0.18715064, 0.30357312, 0.97281634],
10 | [0.33693462, 0.25939414, 0.70024207, 0.21321536, 0.11704582]]


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42"]
3 | build-backend = "setuptools.build_meta"


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = json2netcdf
 3 | version = 1.1.2
 4 | author = Sam Harrison
 5 | author_email = samharrison.xg@gmail.com
 6 | description = Package to convert JSON to NetCDF data
 7 | long_description = file: README.md
 8 | long_description_content_type = text/markdown
 9 | url = https://github.com/samharrison7/json2netcdf
10 | project_urls =
11 |     Bug Tracker = https://github.com/samharrison7/json2netcdf/issues
12 | classifiers =
13 |     Programming Language :: Python :: 3
14 |     License :: OSI Approved :: BSD License
15 |     Operating System :: OS Independent
16 | 
17 | [options]
18 | package_dir =
19 |     = src
20 | packages = find:
21 | python_requires = >=3.6
22 | install_requires =
23 |     numpy
24 |     netCDF4
25 | 
26 | [options.packages.find]
27 | where = src
28 | 
29 | [options.entry_points]
30 | console_scripts =
31 |     json2netcdf = json2netcdf.__main__:main


--------------------------------------------------------------------------------
/src/json2netcdf/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | json2netcdf
 3 | ===========
 4 | 
 5 | Python package for converting JSON data to NetCDF data.
 6 | 
 7 | Functions
 8 | ---------
 9 | convert
10 |     Converts JSON data, as a file or Python dictionary, to NetCDF data,
11 |     either in-memory (diskless) or to a physical file.
12 | """
13 | 
14 | from .json2netcdf import convert
15 | 


--------------------------------------------------------------------------------
/src/json2netcdf/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Command line interface to the JSON to NetCDF4 converter"""
 3 | import argparse
 4 | import json2netcdf
 5 | 
 6 | 
 7 | def main():
 8 |     # Parse the command line arguments
 9 |     parser = argparse.ArgumentParser(description='Convert JSON to NetCDF files.')
10 |     parser.add_argument('input', help='path to the input JSON file')
11 |     parser.add_argument('output', help='path to store the output NetCDF file to')
12 |     parser.add_argument('-v', '--verbose', action='store_true', help='make terminal output more verbose')
13 |     args = parser.parse_args()
14 |     # Do the conversion
15 |     nc_file = json2netcdf.convert(args.input, args.output, log_level=1 if not args.verbose else 2)
16 |     nc_file.close()
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     main()


--------------------------------------------------------------------------------
/src/json2netcdf/json2netcdf.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json
  4 | from copy import deepcopy
  5 | import numpy as np
  6 | from netCDF4 import Dataset
  7 | 
  8 | 
  9 | def convert(from_json, to_netcdf='data.nc', diskless=False, log_level=0):
 10 |     """
 11 |     Converts JSON data, as a file or Python dictionary, to NetCDF data,
 12 |     either in-memory (diskless) or to a physical file.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     from_json : str or dict
 17 |         Either the path to the JSON file to convert, or a dictionary of
 18 |         data to convert.
 19 |     to_netcdf : str, default 'data.nc'
 20 |         Path to create the output NetCDF file. Ignored if `diskless=True`.
 21 |     diskless : bool, default False
 22 |         If `True`, create the NetCDF dataset in-memory only.
 23 |     log_level : {0, 1, 2}
 24 |         Controls the verbosity of terminal output, with 0 being no output
 25 |         and 2 being the most output.
 26 | 
 27 |     Returns
 28 |     -------
 29 |     netCDF4._netCDF4.Dataset
 30 |         NetCDF4 Dataset object containing the converted data.
 31 |     """
 32 | 
 33 |     # Is "from" a file path, in which case open it
 34 |     if type(from_json) is not dict:
 35 |         with open(from_json) as json_file:
 36 |             json_data = json.loads(json_file.read())
 37 |             base_dir = os.path.dirname(from_json)
 38 |     else:
 39 |         json_data = from_json
 40 |         base_dir = None
 41 |     # Create the NetCDF file
 42 |     nc_data = Dataset(to_netcdf, 'w', diskless=diskless)
 43 |     # Do the conversion
 44 |     _, _, nc_data = __parse(json_data, nc_data, log_level=log_level, base_dir=base_dir)
 45 |     # Return the dataset
 46 |     return nc_data
 47 | 
 48 | 
 49 | def __parse(json_group, nc_data, hierarchy=[], root=True, log_level=0, base_dir=None,
 50 |           data_to_fill=[], variables_to_fill=[]):
 51 |     """Parse the data and turn into NetCDF file. Parse will only over be called for groups,
 52 |     variables within that group are created by the parse method called for that group.
 53 |     The parse function is called recursively"""
 54 |     # Local names reference the same object, so appending to hierarchy without copying it first
 55 |     # alters everything that refers to it. I.e. siblings groups end up as children of their siblings
 56 |     hierarchy = deepcopy(hierarchy)
 57 |     data_to_fill = deepcopy(data_to_fill)
 58 |     variables_to_fill = deepcopy(variables_to_fill)
 59 |     # Get the NC group we're currently in
 60 |     current_group = nc_data['/' + '/'.join(hierarchy)] if not root else nc_data
 61 |     # Get the dimensions first, because if they're not first in the json_group, then parsing a var
 62 |     # that uses them will fail
 63 |     if 'dimensions' in json_group:
 64 |         for dim_name, size in json_group['dimensions'].items():
 65 |             # Dimension will be specified size if it's an integer, else unlimited
 66 |             current_group.createDimension(dim_name, (size if (isinstance(size, int) and size>0) else None))
 67 |     # Loop through this group's items and check if they're attributes, dimensions or data/groups
 68 |     for name, data in json_group.items():
 69 |         # If this item is a list of attributes, create them
 70 |         if name == 'attributes':
 71 |             for att_name, value in data.items():
 72 |                 setattr(current_group, att_name, value)
 73 |         # If this item is the dimensions, ignore it (as we already created dimensions above)
 74 |         elif name == 'dimensions':
 75 |             pass
 76 |         # If this item is a group
 77 |         elif isinstance(data, dict):
 78 |             # Create this group
 79 |             _ = nc_data.createGroup('/' + '/'.join(hierarchy + [name]))
 80 |             # If the verbose option was specified, print that we're creating this group
 81 |             if (log_level > 1) and len(hierarchy) < 2:
 82 |                 print(f'Creating group {name}')
 83 |             data_to_fill, variables_to_fill, _ = __parse(data, nc_data, hierarchy + [name], root=False,
 84 |                                                          log_level=log_level, base_dir=base_dir,
 85 |                                                          data_to_fill=data_to_fill,
 86 |                                                          variables_to_fill=variables_to_fill)
 87 |         # Otherwise, it must be data or an external file
 88 |         else:
 89 |             # Is this variable referencing an external file?
 90 |             if isinstance(data, str) and data[:6] == "file::":
 91 |                 file_path = data[6:]
 92 |                 with open(os.path.join(base_dir, file_path)) as external_data:
 93 |                     external_data = json.loads(external_data.read())
 94 |                 # If the external data is a group
 95 |                 if isinstance(external_data, dict):
 96 |                     _ = nc_data.createGroup('/' + '/'.join(hierarchy + [name]))
 97 |                     data_to_fill, variables_to_fill, _ = __parse(external_data, nc_data, hierarchy + [name],
 98 |                                                                  root=False, log_level=log_level, base_dir=base_dir,
 99 |                                                                  data_to_fill=data_to_fill,
100 |                                                                  variables_to_fill=variables_to_fill)
101 |                 # Otherwise, it must be a variable
102 |                 else:
103 |                     data_to_fill, variables_to_fill = __parse_var(name, external_data, nc_data, hierarchy, log_level,
104 |                                                                   data_to_fill=data_to_fill,
105 |                                                                   variables_to_fill=variables_to_fill)
106 |             # Otherwise, it must be data (not from external file)
107 |             else:
108 |                 data_to_fill, variables_to_fill = __parse_var(name, data, nc_data, hierarchy, log_level,
109 |                                                               data_to_fill=data_to_fill,
110 |                                                               variables_to_fill=variables_to_fill)
111 | 
112 |     # If this is the root group and have got this far, we must be done creating variables/groups
113 |     # and can eventually fill then. This step is left to last to speed things up
114 |     if root:
115 |         if log_level > 0:
116 |             print(f'Filling variables ({len(data_to_fill)}) with data')
117 |         for i, data in enumerate(data_to_fill):
118 |             try:
119 |                 variables_to_fill[i][:] = data
120 |             except (IndexError, ValueError) as err:
121 |                 print(f'{err}. Variable: {variables_to_fill[i].group().path}/{variables_to_fill[i].name}')
122 | 
123 |     # Finally, return the NetCDF database
124 |     return data_to_fill, variables_to_fill, nc_data
125 | 
126 | 
127 | def __parse_var(name, data, nc_data, hierarchy, log_level, data_to_fill, variables_to_fill):
128 |     """Parse a variable item, given its name, data and hierarchy"""
129 |     # Get the dimensions from the name, which are between square brackets
130 |     dimensions = re.findall(r'\[(.*?)\]', name)
131 |     if len(dimensions) > 0:
132 |         dimensions = dimensions[0].split(',')
133 |     # Then retrieve just the name, without the dimensions (square brackets)
134 |     parsed_name = name.split('[')[0]
135 |     # Convert to numpy array to get dtype object
136 |     np_data = np.array(data)
137 |     # Append the list of data so that we can use it later to fill the
138 |     # variable we're about to create
139 |     data_to_fill.append(np_data)
140 |     # Try and create the variable
141 |     try:
142 |         nc_var = nc_data.createVariable(
143 |             '/' + '/'.join(hierarchy + [parsed_name]),
144 |             np_data.dtype,
145 |             tuple(dimensions)
146 |         )
147 |         if (log_level > 1) and len(hierarchy) < 2:
148 |             print(f'Creating variable {parsed_name}')
149 |     except TypeError as err:
150 |         print(f'{err}. Variable: / {"/".join(hierarchy + [parsed_name])}')
151 | 
152 |     # Add the newly created variable to the list of variables to fill later
153 |     variables_to_fill.append(nc_var)
154 | 
155 |     return data_to_fill, variables_to_fill
156 | 


--------------------------------------------------------------------------------
/tests/test_json2netcdf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for the json2netcdf package. Run `pytest` in the root directory.
 3 | You must install the package before running these tests. This can be done
 4 | in a editable state (i.e. so edits to the source code will automatically
 5 | be included in the install package) by running `pip install -e .` in the
 6 | project root directory.
 7 | """
 8 | import os
 9 | import numpy as np
10 | import json2netcdf
11 | 
12 | root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
13 | 
14 | def test_var():
15 |     with json2netcdf.convert(from_json=os.path.join(root_dir, 'example/data.json'),
16 |                              diskless=True) as nc_file:
17 |         assert nc_file['var_group']['spatial_var'][:].shape == (2,2)
18 |         assert nc_file['var_group']['spatial_var'][:].sum() == 10
19 |         assert nc_file['var_group']['spatiotemporal_var'].dtype == 'float64'
20 | 
21 | def test_attr():
22 |     with json2netcdf.convert(from_json=os.path.join(root_dir, 'example/data.json'),
23 |                              diskless=True) as nc_file:
24 |         assert nc_file.author == 'Sam Harrison' 
25 | 
26 | def test_external():
27 |     with json2netcdf.convert(from_json=os.path.join(root_dir, 'example/external.json'),
28 |                              diskless=True) as nc_file:
29 |         assert nc_file['external_group']['string_var'][:] == 'hello'
30 |         assert nc_file['external_var'][:].sum() == 20.9365634
31 | 
32 | def test_file_creation(tmp_path):
33 |     nc_path = os.path.join(tmp_path, 'test.nc')
34 |     nc_file = json2netcdf.convert(from_json={}, to_netcdf=nc_path)
35 |     assert os.path.isfile(nc_path)
36 |     nc_file.close()
37 | 
38 | def test_dict_to_nc():
39 |     data = {
40 |         'dimensions': {
41 |             'x': 10
42 |         },
43 |         'var[x]': np.full((10,), 42)
44 |     }
45 |     with json2netcdf.convert(from_json=data, diskless=True) as nc_file:
46 |         assert np.array_equal(nc_file['var'][:], np.full((10,), 42))
47 | 


--------------------------------------------------------------------------------