"
108 | ]
109 | },
110 | "execution_count": 3,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "hindcast = cml.load_dataset(\n",
117 | " \"s2s-ai-challenge-training-input\", origin=\"ecmwf\", parameter=\"tp\", format=\"zarr\"\n",
118 | ").to_xarray()\n",
119 | "\n",
120 | "hindcast.coords"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "# forecast"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 4,
133 | "metadata": {
134 | "ExecuteTime": {
135 | "start_time": "2021-03-08T13:24:36.784Z"
136 | },
137 | "scrolled": true
138 | },
139 | "outputs": [
140 | {
141 | "name": "stdout",
142 | "output_type": "stream",
143 | "text": [
144 | "By downloading data from this dataset, you agree to the terms and conditions defined at https://apps.ecmwf.int/datasets/data/s2s/licence/. If you do not agree with such terms, do not download the data. \n"
145 | ]
146 | },
147 | {
148 | "data": {
149 | "text/html": [
150 | "<xarray.Dataset>\n",
151 | "Dimensions: (forecast_time: 53, latitude: 121, lead_time: 47, longitude: 240, realization: 51)\n",
152 | "Coordinates:\n",
153 | " * forecast_time (forecast_time) datetime64[ns] 2020-01-02 ... 2020-12-31\n",
154 | " * latitude (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
155 | " * lead_time (lead_time) timedelta64[ns] 0 days 1 days ... 45 days 46 days\n",
156 | " * longitude (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
157 | " * realization (realization) int64 0 1 2 3 4 5 6 7 ... 44 45 46 47 48 49 50\n",
158 | " valid_time (forecast_time, lead_time) datetime64[ns] dask.array<chunksize=(53, 47), meta=np.ndarray>\n",
159 | "Data variables:\n",
160 | " tp (realization, forecast_time, lead_time, latitude, longitude) float32 dask.array<chunksize=(6, 2, 47, 121, 240), meta=np.ndarray>\n",
161 | "Attributes:\n",
162 | " Conventions: CF-1.7\n",
163 | " GRIB_centre: ecmf\n",
164 | " GRIB_centreDescription: European Centre for Medium-Range Weather Forecasts\n",
165 | " GRIB_edition: 2\n",
166 | " GRIB_subCentre: 0\n",
167 | " history: 2021-05-10T15:46:13 GRIB to CDM+CF via cfgrib-0....\n",
168 | " institution: European Centre for Medium-Range Weather Forecasts
"
169 | ],
170 | "text/plain": [
171 | "\n",
172 | "Dimensions: (forecast_time: 53, latitude: 121, lead_time: 47, longitude: 240, realization: 51)\n",
173 | "Coordinates:\n",
174 | " * forecast_time (forecast_time) datetime64[ns] 2020-01-02 ... 2020-12-31\n",
175 | " * latitude (latitude) float64 90.0 88.5 87.0 85.5 ... -87.0 -88.5 -90.0\n",
176 | " * lead_time (lead_time) timedelta64[ns] 0 days 1 days ... 45 days 46 days\n",
177 | " * longitude (longitude) float64 0.0 1.5 3.0 4.5 ... 355.5 357.0 358.5\n",
178 | " * realization (realization) int64 0 1 2 3 4 5 6 7 ... 44 45 46 47 48 49 50\n",
179 | " valid_time (forecast_time, lead_time) datetime64[ns] dask.array\n",
180 | "Data variables:\n",
181 | " tp (realization, forecast_time, lead_time, latitude, longitude) float32 dask.array\n",
182 | "Attributes:\n",
183 | " Conventions: CF-1.7\n",
184 | " GRIB_centre: ecmf\n",
185 | " GRIB_centreDescription: European Centre for Medium-Range Weather Forecasts\n",
186 | " GRIB_edition: 2\n",
187 | " GRIB_subCentre: 0\n",
188 | " history: 2021-05-10T15:46:13 GRIB to CDM+CF via cfgrib-0....\n",
189 | " institution: European Centre for Medium-Range Weather Forecasts"
190 | ]
191 | },
192 | "execution_count": 4,
193 | "metadata": {},
194 | "output_type": "execute_result"
195 | }
196 | ],
197 | "source": [
198 | "forecast = cml.load_dataset(\"s2s-ai-challenge-test-input\", origin=\"ecmwf\", parameter=[\"tp\"], format=\"zarr\").to_xarray()\n",
199 | "\n",
200 | "forecast"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": []
209 | }
210 | ],
211 | "metadata": {
212 | "kernelspec": {
213 | "display_name": "Python 3",
214 | "language": "python",
215 | "name": "python3"
216 | },
217 | "language_info": {
218 | "codemirror_mode": {
219 | "name": "ipython",
220 | "version": 3
221 | },
222 | "file_extension": ".py",
223 | "mimetype": "text/x-python",
224 | "name": "python",
225 | "nbconvert_exporter": "python",
226 | "pygments_lexer": "ipython3",
227 | "version": "3.7.10"
228 | },
229 | "toc": {
230 | "base_numbering": 1,
231 | "nav_menu": {},
232 | "number_sections": true,
233 | "sideBar": true,
234 | "skip_h1_title": false,
235 | "title_cell": "Table of Contents",
236 | "title_sidebar": "Contents",
237 | "toc_cell": false,
238 | "toc_position": {
239 | "height": "calc(100% - 180px)",
240 | "left": "10px",
241 | "top": "150px",
242 | "width": "378.4px"
243 | },
244 | "toc_section_display": true,
245 | "toc_window_display": true
246 | }
247 | },
248 | "nbformat": 4,
249 | "nbformat_minor": 4
250 | }
251 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 120
3 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | scipy
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | climetlab
2 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # (C) Copyright 2020 ECMWF.
3 | #
4 | # This software is licensed under the terms of the Apache Licence Version 2.0
5 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6 | # In applying this licence, ECMWF does not waive the privileges and immunities
7 | # granted to it by virtue of its status as an intergovernmental organisation
8 | # nor does it submit to any jurisdiction.
9 | #
10 |
11 |
12 | import io
13 | import os
14 |
15 | import setuptools
16 |
17 |
18 | def read(fname):
19 | file_path = os.path.join(os.path.dirname(__file__), fname)
20 | return io.open(file_path, encoding="utf-8").read()
21 |
22 |
23 | package_name = "climetlab-s2s-ai-challenge"
24 |
25 | version = None
26 | init_py = os.path.join(package_name.replace("-", "_"), "__init__.py")
27 | for line in read(init_py).split("\n"):
28 | if line.startswith("__version__"):
29 | version = line.split("=")[-1].strip()[1:-1]
30 | assert version
31 |
32 |
33 | extras_require = {"zarr": ["zarr", "s3fs"]}
34 |
35 | setuptools.setup(
36 | name=package_name,
37 | version=version,
38 | description="Climetlab external dataset plugin for the S2S AI competition organised by ECMWF",
39 | long_description=read("README.md"),
40 | long_description_content_type="text/markdown",
41 | author="European Centre for Medium-Range Weather Forecasts (ECMWF)",
42 | author_email="software.support@ecmwf.int",
43 | license="Apache License Version 2.0",
44 | url="https://github.com/ecmwf-lab/climetlab-s2s-ai-challenge",
45 | packages=setuptools.find_packages(),
46 | include_package_data=True,
47 | install_requires=["climetlab>=0.9.3"],
48 | extras_require=extras_require,
49 | zip_safe=True,
50 | entry_points={
51 | "climetlab.datasets": [
52 | "s2s-ai-challenge-observations = climetlab_s2s_ai_challenge.observations:RawObservations",
53 | # Domain style
54 | "s2s-ai-challenge-hindcast-input = climetlab_s2s_ai_challenge.fields:TrainingInput",
55 | "s2s-ai-challenge-forecast-input = climetlab_s2s_ai_challenge.fields:TestInput",
56 | "s2s-ai-challenge-hindcast-like-observations = climetlab_s2s_ai_challenge.observations:HindcastLikeObservations",
57 | "s2s-ai-challenge-forecast-like-observations = climetlab_s2s_ai_challenge.observations:ForecastLikeObservations",
58 | "s2s-ai-challenge-hindcast-benchmark = climetlab_s2s_ai_challenge.benchmark:HindcastBenchmark",
59 | "s2s-ai-challenge-forecast-benchmark = climetlab_s2s_ai_challenge.benchmark:ForecastBenchmark",
60 | # ML style
61 | "s2s-ai-challenge-training-input = climetlab_s2s_ai_challenge.fields:TrainingInput",
62 | "s2s-ai-challenge-test-input = climetlab_s2s_ai_challenge.fields:TestInput",
63 | "s2s-ai-challenge-training-output-reference = climetlab_s2s_ai_challenge.observations:TrainingOutputReference", # noqa: E501
64 | "s2s-ai-challenge-training-output-benchmark = climetlab_s2s_ai_challenge.benchmark:TrainingOutputBenchmark",
65 | "s2s-ai-challenge-test-output-reference = climetlab_s2s_ai_challenge.observations:TestOutputReference",
66 | "s2s-ai-challenge-test-output-benchmark = climetlab_s2s_ai_challenge.benchmark:TestOutputBenchmark",
67 | ]
68 | },
69 | keywords="meteorology",
70 | classifiers=[
71 | "Development Status :: 3 - Alpha",
72 | "Intended Audience :: Developers",
73 | "License :: OSI Approved :: Apache Software License",
74 | "Programming Language :: Python :: 3",
75 | "Programming Language :: Python :: 3.6",
76 | "Programming Language :: Python :: 3.7",
77 | "Programming Language :: Python :: 3.8",
78 | "Programming Language :: Python :: Implementation :: CPython",
79 | "Programming Language :: Python :: Implementation :: PyPy",
80 | "Operating System :: OS Independent",
81 | ],
82 | )
83 |
--------------------------------------------------------------------------------
/tests/test_availability.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # (C) Copyright 2020 ECMWF.
4 | #
5 | # This software is licensed under the terms of the Apache Licence Version 2.0
6 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 | # In applying this licence, ECMWF does not waive the privileges and immunities
8 | # granted to it by virtue of its status as an intergovernmental organisation
9 | # nor does it submit to any jurisdiction.
10 | #
11 |
12 | import os
13 |
14 | import climetlab as cml
15 | import pytest
16 |
17 | is_test = os.environ.get("TEST_FAST", False)
18 |
19 |
20 | def get_dataset(origin, param):
21 | return cml.load_dataset(
22 | "s2s-ai-challenge-test-input",
23 | dev=is_test,
24 | origin=origin,
25 | date="20200102",
26 | parameter=param,
27 | format="netcdf",
28 | )
29 |
30 |
31 | @pytest.mark.parametrize(
32 | "args",
33 | [
34 | ["ecmwf", "t2m"],
35 | ["eccc", "t2m"],
36 | ],
37 | )
38 | def test_availabilty_1(args):
39 | print(get_dataset(origin=args[0], param=args[1]).to_xarray())
40 |
41 |
42 | @pytest.mark.parametrize(
43 | "args",
44 | [
45 | ["eccc", "st100"],
46 | ["ncep", "rsn"],
47 | ],
48 | )
49 | def test_availability_2(args):
50 | with pytest.raises(ValueError):
51 | print(get_dataset(origin=args[0], param=args[1]).to_xarray())
52 |
53 |
54 | def test_availability_3():
55 | cml.load_dataset(
56 | "s2s-ai-challenge-training-input", date=[20100107], origin="ncep", parameter="tp", format="netcdf"
57 | ).to_xarray()
58 |
59 |
60 | if __name__ == "__main__":
61 | from climetlab.testing import main
62 |
63 | main(__file__)
64 |
--------------------------------------------------------------------------------
/tests/test_benchmarks.py:
--------------------------------------------------------------------------------
1 | import climetlab as cml
2 |
3 | PARAMS = ["t2m", "tp"]
4 |
5 |
6 | def test_benchmark_1():
7 | ds = cml.load_dataset("s2s-ai-challenge-test-output-benchmark", parameter=PARAMS)
8 | print(ds.to_xarray())
9 |
10 |
11 | def test_benchmark_2():
12 | for p in PARAMS:
13 | ds = cml.load_dataset(
14 | "s2s-ai-challenge-test-output-benchmark",
15 | parameter=p,
16 | )
17 | print(ds.to_xarray())
18 |
--------------------------------------------------------------------------------
/tests/test_cfconventions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # (C) Copyright 2020 ECMWF.
4 | #
5 | # This software is licensed under the terms of the Apache Licence Version 2.0
6 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 | # In applying this licence, ECMWF does not waive the privileges and immunities
8 | # granted to it by virtue of its status as an intergovernmental organisation
9 | # nor does it submit to any jurisdiction.
10 | #
11 |
12 | import os
13 |
14 | import climetlab as cml
15 | import pytest
16 |
17 | is_test = os.environ.get("TEST_FAST", False)
18 |
19 |
20 | def get_dataset(format, param):
21 | return cml.load_dataset(
22 | "s2s-ai-challenge-test-input",
23 | dev=is_test,
24 | origin="ecmwf",
25 | date="20200102",
26 | parameter=param,
27 | format=format,
28 | )
29 |
30 |
31 | @pytest.mark.skipif(not os.environ.get("TEST_FAST", None) is None, reason="siconc/ci not in dev dataset")
32 | @pytest.mark.parametrize("param", ["2t", "ci", "t2m", ["t2m", "ci"]])
33 | def test_read_grib_to_xarray(param):
34 | dsgrib = get_dataset("grib", param)
35 | dsgrib = dsgrib.to_xarray()
36 | dsnetcdf = get_dataset("netcdf", param).to_xarray()
37 | print(dsgrib)
38 | print(dsnetcdf)
39 | assert dsgrib.attrs == dsgrib.attrs
40 |
41 |
42 | @pytest.mark.parametrize("param", ["2t", "t2m"])
43 | def test_read_grib_to_xarray_2(param):
44 | dsgrib = get_dataset("grib", param)
45 | dsgrib = dsgrib.to_xarray()
46 | dsnetcdf = get_dataset("netcdf", param).to_xarray()
47 | print(dsgrib)
48 | print(dsnetcdf)
49 | assert dsgrib.attrs == dsgrib.attrs
50 |
51 |
52 | if __name__ == "__main__":
53 | from climetlab.testing import main
54 |
55 | main(__file__)
56 |
--------------------------------------------------------------------------------
/tests/test_info.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # (C) Copyright 2020 ECMWF.
4 | #
5 | # This software is licensed under the terms of the Apache Licence Version 2.0
6 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 | # In applying this licence, ECMWF does not waive the privileges and immunities
8 | # granted to it by virtue of its status as an intergovernmental organisation
9 | # nor does it submit to any jurisdiction.
10 | #
11 | import pandas as pd
12 |
13 | from climetlab_s2s_ai_challenge.info import Info
14 |
15 |
16 | def test_info():
17 | for n in (
18 | "ncep-hindcast-only",
19 | "test-input-dev",
20 | "training-input",
21 | "test-input",
22 | "training-input-dev",
23 | ):
24 | info = Info(n)
25 | print(info)
26 |
27 |
28 | def test_get_param_list():
29 | lst = Info("training-input").get_param_list(origin="ncep", fctype="hindcast")
30 | assert len(lst) == 19
31 | assert lst[0] == "t2m"
32 | assert lst[1] == "siconc"
33 | assert lst[-1] == "v"
34 |
35 | lst = Info("training-input").get_param_list(origin="ecmwf")
36 | assert len(lst) == 20
37 |
38 | lst = Info("training-input-dev").get_param_list(origin="ecmwf")
39 | assert len(lst) == 5
40 |
41 |
42 | def test_get_all_dates():
43 | lst = Info("training-input")._get_config("alldates", origin="ncep")
44 | assert len(lst) == 51
45 | assert lst[0] == pd.Timestamp("2010-01-07 00:00:00")
46 | assert lst[1] == pd.Timestamp("2010-01-14 00:00:00")
47 | assert lst[-1] == pd.Timestamp("2010-12-23 00:00:00")
48 |
49 | lst = Info("training-input-dev")._get_config("alldates", origin="ncep")
50 | assert len(lst) == 6
51 |
52 |
53 | if __name__ == "__main__":
54 | # test_read_2t_ecmwf_grib_cf_convention()
55 | test_info()
56 |
--------------------------------------------------------------------------------
/tests/test_long_observations.py:
--------------------------------------------------------------------------------
1 | import climetlab as cml
2 |
3 | # import pytest
4 |
5 |
6 | def test_observations_merged():
7 | cmlds = cml.load_dataset(
8 | "s2s-ai-challenge-observations",
9 | parameter=["pr", "t2m"],
10 | )
11 | ds = cmlds.to_xarray()
12 | print(ds)
13 |
14 |
15 | def test_observations():
16 | for p in ["pr", "t2m"]:
17 | cmlds = cml.load_dataset(
18 | "s2s-ai-challenge-observations",
19 | parameter=p,
20 | )
21 | ds = cmlds.to_xarray()
22 | print(ds)
23 |
24 |
25 | def test_observations_720x360():
26 | cmlds = cml.load_dataset("s2s-ai-challenge-observations", parameter="pr", grid="720x360")
27 | ds = cmlds.to_xarray()
28 | cmlds = cml.load_dataset("s2s-ai-challenge-observations", parameter="t2m", grid="720x360")
29 | ds = cmlds.to_xarray()
30 | print(ds)
31 |
32 |
33 | # @pytest.mark.skipif(True, reason="Disabled because it needs a lot of memory")
34 | def test_observations_720x360_merged_1():
35 | cmlds = cml.load_dataset("s2s-ai-challenge-observations", parameter=["pr", "t2m"], grid="720x360")
36 | ds = cmlds.to_xarray()
37 | print(ds)
38 |
39 |
40 | # @pytest.mark.skipif(True, reason="Disabled because it needs a lot of memory")
41 | def test_observations_720x360_merged_2():
42 | cmlds = cml.load_dataset("s2s-ai-challenge-observations", grid="720x360")
43 | ds = cmlds.to_xarray()
44 | print(ds)
45 |
--------------------------------------------------------------------------------
/tests/test_merge.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import climetlab as cml
4 | import pytest
5 | import xarray as xr
6 |
7 | is_test = os.environ.get("TEST_FAST", False)
8 |
9 |
10 | def short_print(ds):
11 | print(dict(ds.dims), list(ds.keys()))
12 |
13 |
14 | @pytest.mark.parametrize("format1", ["grib", "netcdf"])
15 | @pytest.mark.parametrize("format2", ["grib", "netcdf"])
16 | def test_merge_2020_01_02_and_2020_01_09(format1, format2):
17 | merge_multiple_dates(["20200102", "20200109"], format1, format2)
18 |
19 |
20 | def test_merge_2020_01_02():
21 | merge("20200102")
22 |
23 |
24 | # not uploaded yet
25 | # def test_merge_2020_12_31():
26 | # merge("20201231")
27 |
28 |
29 | def merge(date):
30 | dslist = []
31 | ds = cml.load_dataset(
32 | "s2s-ai-challenge-forecast-input",
33 | dev=is_test,
34 | origin="cwao",
35 | date=date,
36 | parameter="2t",
37 | format="grib",
38 | )
39 | dslist.append(ds.to_xarray())
40 | ds = cml.load_dataset(
41 | "s2s-ai-challenge-forecast-input",
42 | dev=is_test,
43 | origin="cwao",
44 | date=date,
45 | parameter="tp",
46 | format="grib",
47 | )
48 | dslist.append(ds.to_xarray())
49 |
50 | for ds in dslist:
51 | short_print(ds)
52 |
53 | ds = xr.merge(dslist)
54 | print("-- Merged into --")
55 | short_print(ds)
56 |
57 | # failing on test data.
58 | # assert dslist[0].lead_time.values[0] == dslist[1].lead_time.values[0]
59 | # assert dslist[0].lead_time.values[-1] == dslist[1].lead_time.values[-1]
60 |
61 |
62 | def merge_multiple_dates(dates, format1, format2):
63 | dslist = []
64 | for date in dates:
65 | ds = cml.load_dataset(
66 | "s2s-ai-challenge-forecast-input",
67 | dev=is_test,
68 | origin="cwao",
69 | date=date,
70 | parameter="2t",
71 | format=format1,
72 | )
73 | dslist.append(ds.to_xarray())
74 | for ds in dslist:
75 | short_print(ds)
76 | print(ds)
77 |
78 | ds = xr.merge(dslist)
79 | print("-- Merged into --")
80 | short_print(ds)
81 |
82 | ds2 = cml.load_dataset(
83 | "s2s-ai-challenge-forecast-input",
84 | dev=is_test,
85 | origin="cwao",
86 | date=dates,
87 | parameter="2t",
88 | format=format2,
89 | )
90 | ds2 = ds2.to_xarray()
91 | print("-- direct merge --")
92 | short_print(ds2)
93 | print(ds2)
94 |
95 |
96 | def test_get_obs_merge_concat():
97 | cmlds = cml.load_dataset(
98 | "s2s-ai-challenge-test-output-reference",
99 | date=20200312,
100 | parameter=["t2m", "tp"],
101 | )
102 | ds = cmlds.to_xarray()
103 | print(ds)
104 |
105 |
106 | if __name__ == "__main__":
107 | merge_multiple_dates(["20200102", "20200109"])
108 | merge("20200102")
109 | merge("20201231")
110 |
--------------------------------------------------------------------------------
/tests/test_notebooks.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # (C) Copyright 2020 ECMWF.
4 | #
5 | # This software is licensed under the terms of the Apache Licence Version 2.0
6 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 | # In applying this licence, ECMWF does not waive the privileges and immunities
8 | # granted to it by virtue of its status as an intergovernmental organisation
9 | # nor does it submit to any jurisdiction.
10 | #
11 |
12 | import os
13 | import re
14 |
15 | import nbformat
16 | import pytest
17 | from nbconvert.preprocessors import ExecutePreprocessor
18 |
19 | # See https://www.blog.pythonlibrary.org/2018/10/16/testing-jupyter-notebooks/
20 |
21 |
22 | EXAMPLES = os.path.join(os.path.dirname(os.path.dirname(__file__)), "notebooks")
23 |
24 | SKIP = (
25 | "demo_zarr_experimental.ipynb",
26 | "demo_zarr.ipynb",
27 | "demo_forecast_benchmark.ipynb",
28 | )
29 |
30 |
31 | def notebooks_list():
32 | notebooks = []
33 | for path in os.listdir(EXAMPLES):
34 | if not path.startswith("demo_"): # test only demo notebooks
35 | continue
36 | if not re.match(r"[^_].*\.ipynb$", path): # ignore notebooks starting with '_'
37 | continue
38 | if "Copy" in path: # ignore notebooks including 'Copy'
39 | continue
40 | if path.startswith("Untitled"): # ignore untitled notebooks
41 | continue
42 | notebooks.append(path)
43 |
44 | return sorted(notebooks)
45 |
46 |
47 | @pytest.mark.parametrize("path", notebooks_list())
48 | def test_notebook(path):
49 | print(path)
50 |
51 | if path in SKIP:
52 | pytest.skip("Notebook marked as 'skip'")
53 |
54 | with open(os.path.join(EXAMPLES, path)) as f:
55 | nb = nbformat.read(f, as_version=4)
56 |
57 | proc = ExecutePreprocessor(timeout=60 * 60, kernel_name="python3")
58 | proc.preprocess(nb, {"metadata": {"path": EXAMPLES}})
59 |
60 |
61 | if __name__ == "__main__":
62 | for k, f in sorted(globals().items()):
63 | if k.startswith("test_") and callable(f):
64 | print(k)
65 | f()
66 |
--------------------------------------------------------------------------------
/tests/test_observations.py:
--------------------------------------------------------------------------------
1 | import climetlab as cml
2 | import numpy as np
3 | import xarray as xr
4 |
5 |
6 | def test_test_get_rain_obs():
7 | cmlds = cml.load_dataset(
8 | "s2s-ai-challenge-test-output-reference",
9 | date=20200312,
10 | parameter="tp",
11 | )
12 | ds = cmlds.to_xarray()
13 | print(ds)
14 |
15 |
16 | def test_test_get_rain_obs_2():
17 | cmlds = cml.load_dataset(
18 | "s2s-ai-challenge-training-output-reference",
19 | date=[20200102, 20200312],
20 | parameter="tp",
21 | )
22 | ds = cmlds.to_xarray()
23 | print(ds)
24 |
25 |
26 | def test_train_get_rain_obs():
27 | cmlds = cml.load_dataset(
28 | "s2s-ai-challenge-training-output-reference",
29 | date=20200312,
30 | parameter="tp",
31 | )
32 | ds = cmlds.to_xarray()
33 | print(ds)
34 |
35 |
36 | def test_test_get_t2m_obs():
37 | cmlds = cml.load_dataset(
38 | "s2s-ai-challenge-test-output-reference",
39 | date=20200312,
40 | parameter="t2m",
41 | )
42 | ds = cmlds.to_xarray()
43 | print(ds)
44 |
45 |
46 | def test_test_get_t2m_obs_2():
47 | cmlds = cml.load_dataset(
48 | "s2s-ai-challenge-test-output-reference",
49 | date=20200312,
50 | parameter="t2m",
51 | )
52 | ds = cmlds.to_xarray()
53 | print(ds)
54 |
55 |
56 | def test_test_get_t2m_obs_3():
57 | cmlds = cml.load_dataset(
58 | "s2s-ai-challenge-test-output-reference",
59 | date="2020-03-12",
60 | parameter="t2m",
61 | )
62 | ds = cmlds.to_xarray()
63 | print(ds)
64 |
65 |
66 | def test_train_get_t2m_obs():
67 | cmlds = cml.load_dataset(
68 | "s2s-ai-challenge-training-output-reference",
69 | date=20200312,
70 | parameter="t2m",
71 | )
72 | ds = cmlds.to_xarray()
73 | print(ds)
74 |
75 |
76 | def test_get_obs():
77 | cmlds = cml.load_dataset(
78 | "s2s-ai-challenge-test-output-reference",
79 | date=20200312,
80 | parameter="t2m",
81 | )
82 | ds = cmlds.to_xarray()
83 | print(ds)
84 |
85 |
86 | def test_forecast_like_observations_script():
87 | """Create synthetic observations object with time dim
88 | and forecast object with lead_time and forecast_time,
89 | to create observation with forecast_time and lead_time
90 | while accumulating pr to tp.
91 | """
92 | import pandas as pd
93 |
94 | from climetlab_s2s_ai_challenge.extra import (
95 | create_lead_time_and_forecast_time_from_time,
96 | create_valid_time_from_forecast_time_and_lead_time,
97 | forecast_like_observations,
98 | )
99 |
100 | # create obs with time dimension
101 | n_time = 100
102 | time = np.arange(n_time)
103 | time_coord = pd.date_range(start="2000", freq="1D", periods=n_time)
104 | ds_time = xr.DataArray(time, dims="time", coords={"time": time_coord})
105 |
106 | # create valid_time
107 | i_time = 10
108 | init_coord = pd.date_range(start="2000", freq="W-THU", periods=i_time)
109 | inits = xr.DataArray(np.arange(i_time), dims="forecast_time", coords={"forecast_time": init_coord})
110 | leads = [pd.Timedelta(f"{d} d") for d in range(10)]
111 | valid_times = create_valid_time_from_forecast_time_and_lead_time(inits.forecast_time, leads)
112 | assert "lead_time" in valid_times.dims
113 | assert "forecast_time" in valid_times.dims
114 |
115 | # create a forecast with 10 forecast_time and 10 lead_time and add valid_time
116 | forecast = xr.DataArray(
117 | ds_time.values.reshape(10, 10),
118 | dims=["forecast_time", "lead_time"],
119 | coords={"forecast_time": valid_times.forecast_time, "lead_time": valid_times.lead_time},
120 | )
121 | forecast = forecast.assign_coords(valid_time=valid_times)
122 |
123 | # add dimensions lead_time and forecast_time from dim time
124 | ds_lead_init = create_lead_time_and_forecast_time_from_time(forecast, ds_time)
125 |
126 | for d in ["lead_time", "forecast_time"]:
127 | assert d in ds_lead_init.dims
128 |
129 | # promote to dataset
130 | forecast = forecast.to_dataset(name="pr")
131 | forecast["t2m"] = forecast["pr"]
132 | ds_time = ds_time.to_dataset(name="pr")
133 | ds_time["t2m"] = ds_time["pr"]
134 |
135 | # testing forecast_like_observations
136 | obs_lead_init = forecast_like_observations(forecast, ds_time)
137 | assert "tp" in obs_lead_init.data_vars
138 | assert "pr" not in obs_lead_init.data_vars
139 | assert not obs_lead_init["tp"].identical(obs_lead_init["t2m"])
140 | assert obs_lead_init["tp"].attrs["standard_name"] == "precipitation_amount"
141 |
--------------------------------------------------------------------------------
/tests/test_read.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # (C) Copyright 2020 ECMWF.
4 | #
5 | # This software is licensed under the terms of the Apache Licence Version 2.0
6 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 | # In applying this licence, ECMWF does not waive the privileges and immunities
8 | # granted to it by virtue of its status as an intergovernmental organisation
9 | # nor does it submit to any jurisdiction.
10 | #
11 |
12 | import os
13 |
14 | import climetlab as cml
15 | import pytest
16 |
17 | is_test = os.environ.get("TEST_FAST", False)
18 |
19 |
20 | def _generic_test_read(
21 | parameter,
22 | origin,
23 | format,
24 | date="20200102",
25 | fctype="forecast",
26 | datasetname="s2s-ai-challenge-forecast-input",
27 | dev=is_test,
28 | ):
29 | ds = cml.load_dataset(
30 | datasetname,
31 | origin=origin,
32 | date=date,
33 | parameter=parameter,
34 | format=format,
35 | fctype=fctype,
36 | )
37 | xds = ds.to_xarray()
38 | print(xds)
39 |
40 |
41 | def test_read_tp_ecmwf_grib__():
42 | _generic_test_read(parameter="tp", origin="ecmwf", format="grib")
43 |
44 |
45 | def test_read_domain_name():
46 | _generic_test_read(
47 | parameter="tp", origin="ecmwf", format="grib", datasetname="s2s-ai-challenge-forecast-input", dev=is_test
48 | ),
49 |
50 |
51 | def test_read_ml_name():
52 | _generic_test_read(
53 | parameter="tp", origin="ecmwf", format="grib", datasetname="s2s-ai-challenge-test-input", dev=is_test
54 | ),
55 |
56 |
57 | def test_read_tp_ecmwf_netcdf():
58 | _generic_test_read(parameter="tp", origin="ecmwf", format="netcdf")
59 |
60 |
61 | def test_read_tp_cwao_grib__():
62 | _generic_test_read(parameter="tp", origin="cwao", format="grib")
63 |
64 |
65 | def test_read_tp_cwao_netcdf():
66 | _generic_test_read(parameter="tp", origin="cwao", format="netcdf")
67 |
68 |
69 | def test_read_tp_kwbc_grib__():
70 | _generic_test_read(parameter="tp", origin="kwbc", format="grib")
71 |
72 |
73 | def test_read_tp_kwbc_netcdf():
74 | _generic_test_read(parameter="tp", origin="kwbc", format="netcdf")
75 |
76 |
77 | def test_read_2t_ecmwf_grib_mars_convention():
78 | _generic_test_read(parameter="2t", origin="ecmwf", format="grib")
79 |
80 |
81 | def test_read_2t_ecmwf_grib_cf_convention():
82 | _generic_test_read(parameter="t2m", origin="ecmwf", format="grib")
83 |
84 |
85 | def test_read_2dates_cwao():
86 | _generic_test_read(parameter="t2m", origin="cwao", format="grib", date=["20200102", "20200109"])
87 |
88 |
89 | def test_read_2dates_kwbc():
90 | _generic_test_read(parameter="t2m", origin="kwbc", format="grib", date=["20200102", "20200109"])
91 |
92 |
93 | def test_read_hindcast_grib():
94 | _generic_test_read(parameter="t2m", origin="ecmwf", format="grib")
95 |
96 |
97 | def test_read_hindcast_netcdf():
98 | _generic_test_read(parameter="t2m", origin="ecmwf", format="netcdf")
99 |
100 |
101 | @pytest.mark.skipif(not os.environ.get("TEST_FAST", None) is None, reason="TEST_FAST is set")
102 | def test_read_hindcast_netcdf_2():
103 | _generic_test_read(parameter="rsn", origin="ecmwf", format="netcdf")
104 |
105 |
106 | @pytest.mark.skipif(not os.environ.get("TEST_FAST", None) is None, reason="TEST_FAST is set")
107 | def test_read_2dates_cwao_2():
108 | _generic_test_read(parameter="t2m", origin="cwao", format="grib", date=["20200102", "20201231"])
109 |
110 |
111 | @pytest.mark.skipif(not os.environ.get("TEST_FAST", None) is None, reason="TEST_FAST is set")
112 | def test_read_2dates_kwbc_2():
113 | _generic_test_read(parameter="t2m", origin="kwbc", format="grib", date=["20200102", "20201231"])
114 |
--------------------------------------------------------------------------------
/tests/test_read_zarr.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # (C) Copyright 2020 ECMWF.
4 | #
5 | # This software is licensed under the terms of the Apache Licence Version 2.0
6 | # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
7 | # In applying this licence, ECMWF does not waive the privileges and immunities
8 | # granted to it by virtue of its status as an intergovernmental organisation
9 | # nor does it submit to any jurisdiction.
10 | #
11 |
12 | import climetlab as cml
13 |
14 |
15 | def test_read_zarr():
16 | return # TODO re-enable test when data is uploaded
17 | for parameter in ["2t"] + ["t2m"]:
18 | for fctype in ["forecast"]: # ["forecast", "hindcast"]:
19 | for origin in ["ecmwf"]: # ["cwao", "ecmwf", "kwbc"]:
20 | ds = cml.load_dataset(
21 | "s2s-ai-challenge-test-input",
22 | origin=origin,
23 | fctype=fctype,
24 | format="zarr",
25 | parameter=parameter,
26 | )
27 | xds = ds.to_xarray()
28 | print(xds)
29 |
30 |
31 | if __name__ == "__main__":
32 | test_read_zarr()
33 |
--------------------------------------------------------------------------------
/tools/.gitignore:
--------------------------------------------------------------------------------
1 | *.json
2 |
--------------------------------------------------------------------------------
/tools/availability.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from climetlab.utils.availability import Availability
4 |
5 | a = Availability("availability.json")
6 |
7 | for p in a.iterate():
8 | print(p)
9 |
10 |
11 | print()
12 | print(a.tree())
13 |
--------------------------------------------------------------------------------
/tools/list.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import json
4 | import time
5 | from itertools import product
6 |
7 | import pandas as pd
8 | import requests
9 |
10 | VERSION = "0.1.43"
11 |
12 | URL = "https://object-store.os-api.cci1.ecmwf.int"
13 | DATA = "s2s-ai-challenge/data"
14 |
15 | PATTERN = "{URL}/{DATA}/{dataset}-{fctype}-{origin}/{VERSION}/grib/{parameter}-{date}.grib"
16 |
17 | DATASET = (
18 | "training-set",
19 | "reference-set",
20 | )
21 | ORIGIN = (
22 | "ecmf",
23 | "kwbc",
24 | "cwao",
25 | )
26 |
27 | FCTYPE = (
28 | "forecast",
29 | "hindcast",
30 | )
31 |
32 | PARAMETER = ("2t", "tp")
33 |
34 | DATES = [d.strftime("%Y%m%d") for d in pd.date_range(start="2020-01-01", end="2020-12-31")]
35 |
36 | avail = []
37 |
38 | for origin, dataset, fctype, parameter, date in product(ORIGIN, DATASET, FCTYPE, PARAMETER, DATES):
39 | url = PATTERN.format(**locals())
40 | print(url)
41 | while True:
42 | try:
43 | r = requests.head(url)
44 | break
45 | except Exception as e:
46 | print(e)
47 | time.sleep(10)
48 | if r.status_code == 200:
49 | avail.append(
50 | dict(
51 | origin=origin,
52 | dataset=dataset,
53 | fctype=fctype,
54 | parameter=parameter,
55 | date=date,
56 | )
57 | )
58 | with open("availability.json", "wt") as f:
59 | print(json.dumps(avail, indent=4, sort_keys=True), file=f)
60 |
--------------------------------------------------------------------------------
/tools/observations/conda-packages.txt:
--------------------------------------------------------------------------------
1 | cf_xarray>=0.6.0
2 | esmf>=8.1.0
3 |
4 |
--------------------------------------------------------------------------------
/tools/observations/download_from_source.sh:
--------------------------------------------------------------------------------
1 | #!/bin/env bash
2 |
3 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
4 |
5 | OUTDIR=${1:-/s2s-obs/}
6 |
7 | cd $OUTDIR
8 |
9 | for t in tmin; do
10 | mkdir -p $t;
11 | for i in {1979..2021}; do
12 | wget http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.temperature/.daily/.${t}/T/%281%20Jan%20${i}%29%2831%20Dec%20${i}%29RANGEEDGES/data.nc -O $t/data.$i.nc;
13 | done;
14 | done
15 |
16 | for t in tmax; do
17 | mkdir -p $t;
18 | for i in {1979..2021}; do
19 | wget http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.temperature/.daily/.${t}/T/%281%20Jan%20${i}%29%2831%20Dec%20${i}%29RANGEEDGES/data.nc -O $t/data.$i.nc;
20 | done;
21 | done
22 |
23 | for t in rain; do
24 | mkdir -p $t;
25 | for i in {1979..2021}; do
26 | wget http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.extREALTIME/.rain/T/%280000%201%20Jan%20${i}%29%280000%2031%20Dec%20${i}%29RANGEEDGES/data.nc -O $t/data.$i.nc;
27 | done;
28 | done
29 |
30 | echo "data downloaded"
31 |
--------------------------------------------------------------------------------
/tools/observations/makefile:
--------------------------------------------------------------------------------
1 | OUTDIR=/s2s-obs/tmp/
2 |
3 |
4 | # TODO : turn this into a ecflow suite. If needed.
5 | # To run this do :
6 | # make build
7 | # make publish
8 |
9 | download: ${OUTDIR}/tmax ${OUTDIR}/tmin ${OUTDIR}/rain
10 |
11 | ${OUTDIR}/tmax:
12 | mkdir -p ${OUTDIR} && ./download_from_source.sh ${OUTDIR}
13 | ${OUTDIR}/tmin:
14 | mkdir -p ${OUTDIR} && ./download_from_source.sh ${OUTDIR}
15 | ${OUTDIR}/rain:
16 | mkdir -p ${OUTDIR} && ./download_from_source.sh ${OUTDIR}
17 |
18 | build: ${OUTDIR}/tmax ${OUTDIR}/tmin ${OUTDIR}/rain
19 | ./build_dataset_observations.py --outdir ${OUTDIR} --input ${OUTDIR} --temperature && \
20 | ./build_dataset_observations.py --outdir ${OUTDIR} --input ${OUTDIR} --rain
21 |
22 | upload:
23 | cd ${OUTDIR} && \
24 | for i in training-output-reference/*/*; do echo $$i; s3cmd put $$i s3://s2s-ai-challenge/data/$$i; done && \
25 | for i in test-output-reference/*/*; do echo $$i; s3cmd put $$i s3://s2s-ai-challenge/data/$$i; done && \
26 | for i in observations/*/*; do echo $$i; s3cmd put $$i s3://s2s-ai-challenge/data/$$i; done
27 |
28 | publish: upload
29 | s3cmd setacl s3://s2s-ai-challenge/data/test-output-reference --recursive --acl-public && \
30 | s3cmd setacl s3://s2s-ai-challenge/data/test-output-reference/ --recursive --acl-public && \
31 | s3cmd setacl s3://s2s-ai-challenge/data/training-output-reference --recursive --acl-public && \
32 | s3cmd setacl s3://s2s-ai-challenge/data/training-output-reference/ --recursive --acl-public && \
33 | s3cmd setacl s3://s2s-ai-challenge/data/observations --recursive --acl-public && \
34 | s3cmd setacl s3://s2s-ai-challenge/data/observations/ --recursive --acl-public
35 |
36 | nuke:
37 | s3cmd rm s3://s2s-ai-challenge/data/training-output-reference --recursive
38 | s3cmd rm s3://s2s-ai-challenge/data/test-output-reference --recursive
39 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [isort]
2 | profile=black
3 |
4 | [flake8]
5 | max-line-length = 125
6 | max-complexity = 10
7 |
--------------------------------------------------------------------------------