├── .gitignore ├── LICENSE.md ├── README.md ├── checksums.txt ├── download.ipynb ├── download_merra2.ipynb ├── generate_metadata.py ├── main.ipynb ├── opendap_download ├── .gitignore ├── __init__.py ├── authentication.yaml └── multi_processing_download.py ├── processing.ipynb └── requirements.yml /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | __pycache__/ 3 | downloads/ 4 | datapackage.json 5 | 20*-*-*/ 6 | cached_dataframe.csv 7 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Stefan Pfenninger, ETH Zurich 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open Power System Data: Weather Data 2 | 3 | See the [main Jupyter notebook](main.ipynb) for further details. 4 | 5 | ## License 6 | 7 | This repository is published under the [MIT License](LICENSE.md). 8 | -------------------------------------------------------------------------------- /checksums.txt: -------------------------------------------------------------------------------- 1 | weather_data.csv,b06e72cb32f06f6f57da7ab891c6be70fb201777617433c12c08fbd2ac64062b 2 | weather_data_multiindex.csv,6092915ca6793bffb9d0474723e6a293f5862bc479f0f7e02716478f6c537e37 3 | weather_data.sqlite,e127eb828afb63e670dd667fd0ee0e25a14de612b685f7b624bfa8889827bc8b 4 | -------------------------------------------------------------------------------- /download.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

\n", 8 | " Weather Data: Renewables.ninja download notebook\n", 9 | "

\n", 15 | "
This Notebook is part of the Weather Data Package of Open Power System Data.\n", 16 | "

" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import os\n", 26 | "import shutil\n", 27 | "import zipfile\n", 28 | "\n", 29 | "import pandas as pd\n", 30 | "import requests" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "dir_shapefiles = './downloads/shapefiles'\n", 40 | "dir_countries = './downloads/countries'\n", 41 | "dir_nuts = './downloads/nuts'" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "---" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "def save_to_disk(url, path):\n", 58 | " response = requests.get(url, stream=True)\n", 59 | " \n", 60 | " with open(path, 'wb') as f:\n", 61 | " shutil.copyfileobj(response.raw, f)\n", 62 | "\n", 63 | " del response" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "for d in [dir_shapefiles, dir_countries, dir_nuts]:\n", 73 | " os.makedirs(d, exist_ok=True)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "base_url = 'https://www.renewables.ninja/country_downloads/'\n", 83 | "country_url_template = '{country}/ninja_weather_country_{country}_merra-2_population_weighted.csv'\n", 84 | "countries = [\n", 85 | " 'BE', 'BG', 'CH', 'CZ', 'DK', 'DE', 'EE', 'IE', 'ES', 'FR',\n", 86 | " 'AT', 'HR', 'IT', 'LV', 'LT', 'LU', 'NO', 'HU', 'NL',\n", 87 | " 'PL', 'PT', 'RO', 'SI', 'SK', 'FI', 'SE', 'GB', 'GR',\n", 88 | "]\n", 89 | "\n", 90 | "country_urls = [base_url + country_url_template.format(country=i) for i in countries]\n", 91 | "\n", 92 | "for u in country_urls:\n", 93 | " save_to_disk(u, os.path.join(dir_countries, u.split('/')[-1]))\n", 94 | "\n", 95 | " \n", 96 | "# FIXME: wind data not yet in the publicly available files on Renewables.ninja - to be added soon\n", 97 | " \n", 98 | "# FIXME: NUTS-2 weather not yet in the publicly available files on Renewables.ninja - to be added soon\n", 99 | "\n", 100 | "# nuts_url_template = '{country}/ninja_weather_{variable}_country_{country}_merra-2_nuts-2_population_weighted.csv'" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# NUTS geodata\n", 110 | "# http://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units/nuts\n", 111 | "\n", 112 | "save_to_disk(\n", 113 | " 'http://ec.europa.eu/eurostat/cache/GISCO/distribution/v2/nuts/download/ref-nuts-2016-20m.shp.zip',\n", 114 | " os.path.join(dir_shapefiles, 'ref-nuts-2016-20m.shp.zip')\n", 115 | ")\n", 116 | "\n", 117 | "with zipfile.ZipFile(os.path.join(dir_shapefiles, 'ref-nuts-2016-20m.shp.zip'), 'r') as f:\n", 118 | " f.extractall(dir_shapefiles)" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "Python 3", 125 | "language": "python", 126 | "name": "python3" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.6.6" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 2 143 | } 144 | -------------------------------------------------------------------------------- /download_merra2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

\n", 8 | " Weather Data: Example script to download arbitrary MERRA-2 data\n", 9 | "

Main notebook
Download weather data from Renewables.ninja
Process weather data from Renewables.ninja
Example script to download arbitrary MERRA-2 data

\n", 15 | "
This Notebook is part of the Weather Data Package of Open Power System Data.\n", 16 | "

" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "toc": "true" 23 | }, 24 | "source": [ 25 | "# Table of Contents\n", 26 | "

1 Introductory Notes

1.1 How to use the script:

3.1.2 Geography coordinates

3.2 Subsetting data

4 Downloading data

4.1 Get wind data

4.2 Get roughness data

4.3 Get radiation data

4.4 Get temperature data

4.5 Get air density data

4.6 Get air pressure data

4.7 Get lat and lon dimensions

4.8 Check the precision of the downloaded data

5 Setting up the DataFrame

5.1 Converting the timeformat to ISO 8601

5.2 Converting wind vectors to wind speed

5.3 Setting up data Frame for roughness, radiation, temperature and air parameters

5.4 Combining the data Frames

6 Structure the dataframe, add and remove columns

6.1 Calculating the displacement height

6.2 Adding needed and removing not needed columns

6.3 Renaming and sorting columns

6.4 First look at the final data Frame

6.4.1 structure and format

6.4.2 resulting dataframe

7.4 Generating checksums

" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "---\n", 34 | "\n", 35 | "\n", 36 | "# Introductory Notes" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "Previously, OPSD chose to supply only a **documented methodological script** for weather data, given that hosting global meteorological datasets like MERRA-2 would not be feasible due to their size (variety of variables, very long timespan, huge geographical coverage etc.).\n", 44 | "\n", 45 | "We now also make country-aggregated weather data available directly obtained from the [Renewables.ninja project](https://www.renewables.ninja). However, we still provide the previous download script for reference if you want to download arbitrary MERRA-2 weather data directly.\n", 46 | "\n", 47 | "This script contains code that allows the download, subset and processing of [MERRA-2](http://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/) datasets (provided by NASA Goddard Space Flight Center) and export them as CSV. The method describes one way to automatically obtain the desired weather data from the MERRA-2 database and simplifies resp. unifies alternative manual data obtaining methods in a single script." 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "cell_style": "center" 54 | }, 55 | "source": [ 56 | "## How to use the script:" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "To download MERRA-2 data, you have to **register at NASA earth data portal:**\n", 64 | "1. Register an account at [https://urs.earthdata.nasa.gov/](https://urs.earthdata.nasa.gov/)\n", 65 | "2. Go to \"My Applications\" -> \"Approve More Applications\" and add _NASA GESDISC DATA ARCHIVE_ (scroll down list)\n", 66 | "3. Input your username and password when requested by the script\n", 67 | "\n", 68 | "_Hints:_\n", 69 | "* _Be aware that by registering you are \"consenting to complete monitoring with no expectation of privacy\"..._\n", 70 | "* _It seems that the routine sometimes has problems with usernames which include upper case letters - avoid them if you can._" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "---\n", 78 | "# Script Setup" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "ExecuteTime": { 86 | "end_time": "2017-07-05T19:08:34.066581", 87 | "start_time": "2017-07-05T19:08:31.685713" 88 | } 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "import pandas as pd\n", 93 | "import xarray as xr\n", 94 | "import numpy as np\n", 95 | "import requests\n", 96 | "import logging\n", 97 | "import yaml\n", 98 | "import json\n", 99 | "import os\n", 100 | "import hashlib\n", 101 | "import sqlalchemy\n", 102 | "\n", 103 | "from datetime import datetime\n", 104 | "from calendar import monthrange\n", 105 | "from opendap_download.multi_processing_download import DownloadManager\n", 106 | "import math\n", 107 | "from functools import partial\n", 108 | "import re\n", 109 | "import getpass\n", 110 | "from datetime import datetime, timedelta\n", 111 | "import dateutil.parser\n", 112 | "\n", 113 | "# Set up a log\n", 114 | "logging.basicConfig(level=logging.INFO)\n", 115 | "log = logging.getLogger('notebook')" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "---\n", 123 | "# Download raw data" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "## Input" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "This part defines the input parameters according to the user and creates an URL that can download the desired MERRA-2 data via the OPeNDAP interface (see documentation notebook for information on OPeNDAP)." 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "### Timeframe" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Definition of desired timespan the data is needed for. (only complete years)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "ExecuteTime": { 159 | "end_time": "2017-07-05T19:08:40.156966", 160 | "start_time": "2017-07-05T19:08:40.147943" 161 | }, 162 | "collapsed": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# User input of timespan\n", 167 | "download_year = 2016\n", 168 | "\n", 169 | "# Create the start date 2016-01-01\n", 170 | "download_start_date = str(download_year) + '-01-01'" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "metadata": {}, 176 | "source": [ 177 | "### Geography coordinates" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Definition of desired coordinates. The user has to input two corner coordinates \n", 185 | "of a rectangular area (Format WGS84, decimal system).\n", 186 | "* Northeast coordinate: lat_1, lon_1\n", 187 | "* Southwest coordinate: lat_2, lon_2\n", 188 | "\n", 189 | "The area/coordinates will be converted from lat/lon to the MERRA-2 grid coordinates.\n", 190 | "Since the resolution of the MERRA-2 grid is 0.5 x 0.625°, the given exact coordinates will \n", 191 | "matched as close as possible." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "ExecuteTime": { 199 | "end_time": "2017-07-05T19:08:49.864174", 200 | "start_time": "2017-07-05T19:08:49.747867" 201 | } 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "# User input of coordinates\n", 206 | "# ------\n", 207 | "# Example: Germany (lat/lon)\n", 208 | "# Northeastern point: 55.05917°N, 15.04361°E\n", 209 | "# Southwestern point: 47.27083°N, 5.86694°E\n", 210 | "\n", 211 | "# It is important to make the southwestern coordinate lat_1 and lon_1 since\n", 212 | "# the MERRA-2 portal requires it!\n", 213 | "# Southwestern coordinate\n", 214 | "# lat_1, lon_1 = 47.27083, 5.86694 Germany\n", 215 | "# Northeastern coordinate \n", 216 | "# lat_2, lon_2 = 55.05917, 15.04361 Germany\n", 217 | "\n", 218 | "# Southwestern coordinate\n", 219 | "lat_1, lon_1 = 47.27083, 5.86694\n", 220 | "# Northeastern coordinate\n", 221 | "lat_2, lon_2 = 55.05917, 15.04361\n", 222 | "\n", 223 | "def translate_lat_to_geos5_native(latitude):\n", 224 | " \"\"\"\n", 225 | " The source for this formula is in the MERRA2 \n", 226 | " Variable Details - File specifications for GEOS pdf file.\n", 227 | " The Grid in the documentation has points from 1 to 361 and 1 to 576.\n", 228 | " The MERRA-2 Portal uses 0 to 360 and 0 to 575.\n", 229 | " latitude: float Needs +/- instead of N/S\n", 230 | " \"\"\"\n", 231 | " return ((latitude + 90) / 0.5)\n", 232 | "\n", 233 | "def translate_lon_to_geos5_native(longitude):\n", 234 | " \"\"\"See function above\"\"\"\n", 235 | " return ((longitude + 180) / 0.625)\n", 236 | "\n", 237 | "def find_closest_coordinate(calc_coord, coord_array):\n", 238 | " \"\"\"\n", 239 | " Since the resolution of the grid is 0.5 x 0.625, the 'real world'\n", 240 | " coordinates will not be matched 100% correctly. This function matches \n", 241 | " the coordinates as close as possible. \n", 242 | " \"\"\"\n", 243 | " # np.argmin() finds the smallest value in an array and returns its\n", 244 | " # index. np.abs() returns the absolute value of each item of an array.\n", 245 | " # To summarize, the function finds the difference closest to 0 and returns \n", 246 | " # its index. \n", 247 | " index = np.abs(coord_array-calc_coord).argmin()\n", 248 | " return coord_array[index]\n", 249 | "\n", 250 | "# The arrays contain the coordinates of the grid used by the API.\n", 251 | "# The values are from 0 to 360 and 0 to 575\n", 252 | "lat_coords = np.arange(0, 361, dtype=int)\n", 253 | "lon_coords = np.arange(0, 576, dtype=int)\n", 254 | "\n", 255 | "# Translate the coordinates that define your area to grid coordinates.\n", 256 | "lat_coord_1 = translate_lat_to_geos5_native(lat_1)\n", 257 | "lon_coord_1 = translate_lon_to_geos5_native(lon_1)\n", 258 | "lat_coord_2 = translate_lat_to_geos5_native(lat_2)\n", 259 | "lon_coord_2 = translate_lon_to_geos5_native(lon_2)\n", 260 | "\n", 261 | "\n", 262 | "# Find the closest coordinate in the grid.\n", 263 | "lat_co_1_closest = find_closest_coordinate(lat_coord_1, lat_coords)\n", 264 | "lon_co_1_closest = find_closest_coordinate(lon_coord_1, lon_coords)\n", 265 | "lat_co_2_closest = find_closest_coordinate(lat_coord_2, lat_coords)\n", 266 | "lon_co_2_closest = find_closest_coordinate(lon_coord_2, lon_coords)\n", 267 | "\n", 268 | "# Check the precision of the grid coordinates. These coordinates are not lat/lon. \n", 269 | "# They are coordinates on the MERRA-2 grid. \n", 270 | "log.info('Calculated coordinates for point 1: ' + str((lat_coord_1, lon_coord_1)))\n", 271 | "log.info('Closest coordinates for point 1: ' + str((lat_co_1_closest, lon_co_1_closest)))\n", 272 | "log.info('Calculated coordinates for point 2: ' + str((lat_coord_2, lon_coord_2)))\n", 273 | "log.info('Closest coordinates for point 2: ' + str((lat_co_2_closest, lon_co_2_closest)))" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": { 279 | "collapsed": true 280 | }, 281 | "source": [ 282 | "## Subsetting data" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": { 288 | "collapsed": true 289 | }, 290 | "source": [ 291 | "Combining parameter choices above/translation according to OPenDAP guidelines into URL-appendix" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "ExecuteTime": { 299 | "end_time": "2017-07-05T19:08:56.233786", 300 | "start_time": "2017-07-05T19:08:56.111463" 301 | } 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "def translate_year_to_file_number(year):\n", 306 | " \"\"\"\n", 307 | " The file names consist of a number and a meta data string. \n", 308 | " The number changes over the years. 1980 until 1991 it is 100, \n", 309 | " 1992 until 2000 it is 200, 2001 until 2010 it is 300 \n", 310 | " and from 2011 until now it is 400.\n", 311 | " \"\"\"\n", 312 | " file_number = ''\n", 313 | " \n", 314 | " if year >= 1980 and year < 1992:\n", 315 | " file_number = '100'\n", 316 | " elif year >= 1992 and year < 2001:\n", 317 | " file_number = '200'\n", 318 | " elif year >= 2001 and year < 2011:\n", 319 | " file_number = '300'\n", 320 | " elif year >= 2011:\n", 321 | " file_number = '400'\n", 322 | " else:\n", 323 | " raise Exception('The specified year is out of range.')\n", 324 | " \n", 325 | " return file_number\n", 326 | " \n", 327 | "\n", 328 | "\n", 329 | "def generate_url_params(parameter, time_para, lat_para, lon_para):\n", 330 | " \"\"\"Creates a string containing all the parameters in query form\"\"\"\n", 331 | " parameter = map(lambda x: x + time_para, parameter)\n", 332 | " parameter = map(lambda x: x + lat_para, parameter)\n", 333 | " parameter = map(lambda x: x + lon_para, parameter)\n", 334 | " \n", 335 | " return ','.join(parameter)\n", 336 | " \n", 337 | " \n", 338 | "\n", 339 | "def generate_download_links(download_years, base_url, dataset_name, url_params):\n", 340 | " \"\"\"\n", 341 | " Generates the links for the download. \n", 342 | " download_years: The years you want to download as array. \n", 343 | " dataset_name: The name of the data set. For example tavg1_2d_slv_Nx\n", 344 | " \"\"\"\n", 345 | " urls = []\n", 346 | " for y in download_years: \n", 347 | " # build the file_number\n", 348 | " y_str = str(y)\n", 349 | " file_num = translate_year_to_file_number(download_year)\n", 350 | " for m in range(1,13):\n", 351 | " # build the month string: for the month 1 - 9 it starts with a leading 0. \n", 352 | " # zfill solves that problem\n", 353 | " m_str = str(m).zfill(2)\n", 354 | " # monthrange returns the first weekday and the number of days in a \n", 355 | " # month. Also works for leap years.\n", 356 | " _, nr_of_days = monthrange(y, m)\n", 357 | " for d in range(1,nr_of_days+1):\n", 358 | " d_str = str(d).zfill(2)\n", 359 | " # Create the file name string\n", 360 | " file_name = 'MERRA2_{num}.{name}.{y}{m}{d}.nc4'.format(\n", 361 | " num=file_num, name=dataset_name, \n", 362 | " y=y_str, m=m_str, d=d_str)\n", 363 | " # Create the query\n", 364 | " query = '{base}{y}/{m}/{name}.nc4?{params}'.format(\n", 365 | " base=base_url, y=y_str, m=m_str, \n", 366 | " name=file_name, params=url_params)\n", 367 | " urls.append(query)\n", 368 | " return urls\n", 369 | "\n", 370 | "requested_params = ['U2M', 'U10M', 'U50M', 'V2M', 'V10M', 'V50M', 'DISPH']\n", 371 | "requested_time = '[0:1:23]'\n", 372 | "# Creates a string that looks like [start:1:end]. start and end are the lat or\n", 373 | "# lon coordinates define your area.\n", 374 | "requested_lat = '[{lat_1}:1:{lat_2}]'.format(\n", 375 | " lat_1=lat_co_1_closest, lat_2=lat_co_2_closest)\n", 376 | "requested_lon = '[{lon_1}:1:{lon_2}]'.format(\n", 377 | " lon_1=lon_co_1_closest, lon_2=lon_co_2_closest)\n", 378 | "\n", 379 | "\n", 380 | "\n", 381 | "parameter = generate_url_params(requested_params, requested_time,\n", 382 | " requested_lat, requested_lon)\n", 383 | "\n", 384 | "BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/'\n", 385 | "generated_URL = generate_download_links([download_year], BASE_URL, \n", 386 | " 'tavg1_2d_slv_Nx', \n", 387 | " parameter)\n", 388 | " \n", 389 | "# See what a query to the MERRA-2 portal looks like. \n", 390 | "log.info(generated_URL[0])" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "---\n", 398 | "# Downloading data" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "This part subsequently downloads the subsetted raw data from the MERRA-2-datasets. \n", 406 | "The download process is outsourced from the notebook, because it is a standard and repetitive process. If you are interested in the the code, see the [opendap_download module](opendap_download/).\n", 407 | "\n", 408 | "_Note: Each of the following steps to download the data will take a few minutes, depending on the size of geographical area and amount of data (the total download routine e.G. for Germany takes roughly 70 minutes)._" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "## Get wind data" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "Parameters from the dataset [tavg1_2d_slv_Nx (M2T1NXSLV)](http://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/contents.html)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": { 429 | "ExecuteTime": { 430 | "end_time": "2017-07-05T19:19:19.577626", 431 | "start_time": "2017-07-05T19:09:11.208420" 432 | } 433 | }, 434 | "outputs": [], 435 | "source": [ 436 | "# Download data (one file per day and dataset) with links to local directory.\n", 437 | "# Username and password for MERRA-2 (NASA earthdata portal)\n", 438 | "username = input('Username: ')\n", 439 | "password = getpass.getpass('Password:')\n", 440 | "\n", 441 | "# The DownloadManager is able to download files. If you have a fast internet \n", 442 | "# connection, setting this to 2 is enough. If you have slow wifi, try setting\n", 443 | "# it to 4 or 5. If you download too fast, the data portal might ban you for a \n", 444 | "# day. \n", 445 | "NUMBER_OF_CONNECTIONS = 5\n", 446 | "\n", 447 | "# The DownloadManager class is defined in the opendap_download module. \n", 448 | "download_manager = DownloadManager()\n", 449 | "download_manager.set_username_and_password(username, password)\n", 450 | "download_manager.download_path = 'download_wind'\n", 451 | "download_manager.download_urls = generated_URL\n", 452 | "\n", 453 | "# If you want to see the download progress, check the download folder you \n", 454 | "# specified\n", 455 | "%time download_manager.start_download(NUMBER_OF_CONNECTIONS)\n", 456 | "\n", 457 | "# Download time approx. 20+ min." 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "## Get roughness data" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "Parameters from the dataset [tavg1_2d_rad_Nx (M2T1NXRAD)](https://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXRAD.5.12.4/contents.html)" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": { 478 | "ExecuteTime": { 479 | "end_time": "2017-07-05T19:25:32.163166", 480 | "start_time": "2017-07-05T19:19:19.577626" 481 | } 482 | }, 483 | "outputs": [], 484 | "source": [ 485 | "# Roughness data is in a different data set. The parameter is called Z0M. \n", 486 | "roughness_para = generate_url_params(['Z0M'], requested_time, \n", 487 | " requested_lat, requested_lon)\n", 488 | "\n", 489 | "ROUGHNESS_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXFLX.5.12.4/'\n", 490 | "\n", 491 | "roughness_links = generate_download_links([download_year], ROUGHNESS_BASE_URL,\n", 492 | " 'tavg1_2d_flx_Nx', roughness_para)\n", 493 | "\n", 494 | "download_manager.download_path = 'download_roughness'\n", 495 | "download_manager.download_urls = roughness_links\n", 496 | "\n", 497 | "# If you want to see the download progress, check the download folder you \n", 498 | "# specified.\n", 499 | "%time download_manager.start_download(NUMBER_OF_CONNECTIONS)\n", 500 | "\n", 501 | "# Download time approx. 12+ min." 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "metadata": {}, 507 | "source": [ 508 | "## Get radiation data" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": {}, 514 | "source": [ 515 | "Parameters from the dataset [tavg1_2d_flx_Nx (M2T1NXFLX)](http://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXFLX.5.12.4/contents.html)" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": null, 521 | "metadata": { 522 | "ExecuteTime": { 523 | "end_time": "2017-07-05T19:30:15.480181", 524 | "start_time": "2017-07-05T19:25:32.170186" 525 | } 526 | }, 527 | "outputs": [], 528 | "source": [ 529 | "# Parameters SWGDN and SWTDN\n", 530 | "radiation_para = generate_url_params(['SWGDN', 'SWTDN'], requested_time, \n", 531 | " requested_lat, requested_lon)\n", 532 | "RADIATION_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXRAD.5.12.4/'\n", 533 | "radiation_links = generate_download_links([download_year], RADIATION_BASE_URL, \n", 534 | " 'tavg1_2d_rad_Nx', radiation_para)\n", 535 | "\n", 536 | "download_manager.download_path = 'download_radiation'\n", 537 | "download_manager.download_urls = radiation_links\n", 538 | "\n", 539 | "%time download_manager.start_download(NUMBER_OF_CONNECTIONS)\n", 540 | "\n", 541 | "# Download time approx. 8+ min." 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": {}, 547 | "source": [ 548 | "## Get temperature data" 549 | ] 550 | }, 551 | { 552 | "cell_type": "markdown", 553 | "metadata": {}, 554 | "source": [ 555 | "Parameters from the dataset [tavg1_2d_slv_Nx (M2T1NXSLV)](http://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/contents.html)" 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "execution_count": null, 561 | "metadata": { 562 | "ExecuteTime": { 563 | "end_time": "2017-07-05T19:37:42.740556", 564 | "start_time": "2017-07-05T19:30:15.486197" 565 | } 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "# Parameter T2M (i.e. the temperature 2 meters above displacement height)\n", 570 | "temperature_para = generate_url_params(['T2M'], requested_time, \n", 571 | " requested_lat, requested_lon)\n", 572 | "TEMPERATURE_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/'\n", 573 | "temperature_links = generate_download_links([download_year], TEMPERATURE_BASE_URL, \n", 574 | " 'tavg1_2d_slv_Nx', temperature_para)\n", 575 | "\n", 576 | "download_manager.download_path = 'download_temperature'\n", 577 | "download_manager.download_urls = temperature_links\n", 578 | "\n", 579 | "%time download_manager.start_download(NUMBER_OF_CONNECTIONS)\n", 580 | "\n", 581 | "# Download time approx. 13+ min." 582 | ] 583 | }, 584 | { 585 | "cell_type": "markdown", 586 | "metadata": {}, 587 | "source": [ 588 | "## Get air density data" 589 | ] 590 | }, 591 | { 592 | "cell_type": "markdown", 593 | "metadata": {}, 594 | "source": [ 595 | "Parameters from the dataset [tavg1_2d_flx_Nx (M2T1NXFLX)](http://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXFLX.5.12.4/contents.html)" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": null, 601 | "metadata": { 602 | "ExecuteTime": { 603 | "end_time": "2017-07-05T19:45:02.911206", 604 | "start_time": "2017-07-05T19:37:42.749574" 605 | } 606 | }, 607 | "outputs": [], 608 | "source": [ 609 | "# Parameter RHOA\n", 610 | "density_para = generate_url_params(['RHOA'], requested_time, \n", 611 | " requested_lat, requested_lon)\n", 612 | "DENSITY_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXFLX.5.12.4/'\n", 613 | "density_links = generate_download_links([download_year], DENSITY_BASE_URL, \n", 614 | " 'tavg1_2d_flx_Nx', density_para)\n", 615 | "\n", 616 | "download_manager.download_path = 'download_density'\n", 617 | "download_manager.download_urls = density_links\n", 618 | "\n", 619 | "%time download_manager.start_download(NUMBER_OF_CONNECTIONS)\n", 620 | "\n", 621 | "# Download time approx. 13+ min." 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | "## Get air pressure data" 629 | ] 630 | }, 631 | { 632 | "cell_type": "markdown", 633 | "metadata": {}, 634 | "source": [ 635 | "Parameters from the dataset [tavg1_2d_slv_Nx (M2T1NXSLV)](http://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/contents.html)" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": null, 641 | "metadata": { 642 | "ExecuteTime": { 643 | "end_time": "2017-07-05T19:52:19.832091", 644 | "start_time": "2017-07-05T19:45:02.911206" 645 | } 646 | }, 647 | "outputs": [], 648 | "source": [ 649 | "# Parameters PS\n", 650 | "pressure_para = generate_url_params(['PS'], requested_time, \n", 651 | " requested_lat, requested_lon)\n", 652 | "PRESSURE_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/'\n", 653 | "pressure_links = generate_download_links([download_year], PRESSURE_BASE_URL, \n", 654 | " 'tavg1_2d_slv_Nx', pressure_para)\n", 655 | "\n", 656 | "download_manager.download_path = 'download_pressure'\n", 657 | "download_manager.download_urls = pressure_links\n", 658 | "\n", 659 | "%time download_manager.start_download(NUMBER_OF_CONNECTIONS)\n", 660 | "\n", 661 | "# Download time approx. 15+ min." 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "## Get lat and lon dimensions" 669 | ] 670 | }, 671 | { 672 | "cell_type": "markdown", 673 | "metadata": {}, 674 | "source": [ 675 | "For now, the dataset only has MERRA-2 grid coordinates. To translate the points\n", 676 | "back to \"real world\" coordinates, the data portal offers a dimension scale file." 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": null, 682 | "metadata": { 683 | "ExecuteTime": { 684 | "end_time": "2017-07-05T19:56:49.202588", 685 | "start_time": "2017-07-05T19:56:45.345723" 686 | } 687 | }, 688 | "outputs": [], 689 | "source": [ 690 | "# The dimensions map the MERRA2 grid coordinates to lat/lon. The coordinates \n", 691 | "# to request are 0:360 wheare as the other coordinates are 1:361\n", 692 | "requested_lat_dim = '[{lat_1}:1:{lat_2}]'.format(\n", 693 | " lat_1=lat_co_1_closest, lat_2=lat_co_2_closest)\n", 694 | "requested_lon_dim = '[{lon_1}:1:{lon_2}]'.format(\n", 695 | " lon_1=lon_co_1_closest , lon_2=lon_co_2_closest )\n", 696 | "\n", 697 | "lat_lon_dimension_para = 'lat' + requested_lat_dim + ',lon' + requested_lon_dim\n", 698 | "\n", 699 | "# Creating the download url.\n", 700 | "dimension_url = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/2014/01/MERRA2_400.tavg1_2d_slv_Nx.20140101.nc4.nc4?'\n", 701 | "dimension_url = dimension_url + lat_lon_dimension_para\n", 702 | "download_manager.download_path = 'dimension_scale'\n", 703 | "download_manager.download_urls = [dimension_url]\n", 704 | "\n", 705 | "# Since the dimension is only one file, we only need one connection. \n", 706 | "%time download_manager.start_download(1)\n" 707 | ] 708 | }, 709 | { 710 | "cell_type": "markdown", 711 | "metadata": {}, 712 | "source": [ 713 | "## Check the precision of the downloaded data" 714 | ] 715 | }, 716 | { 717 | "cell_type": "markdown", 718 | "metadata": {}, 719 | "source": [ 720 | "Due to the back and forth conversion from \"real world\" coordinates to MERRA-2 grid points,\n", 721 | "this part helps you to check if the conversion was precise enough." 722 | ] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": null, 727 | "metadata": { 728 | "ExecuteTime": { 729 | "end_time": "2017-07-05T19:56:50.141524", 730 | "start_time": "2017-07-05T19:56:49.941997" 731 | } 732 | }, 733 | "outputs": [], 734 | "source": [ 735 | "file_path = os.path.join('dimension_scale', DownloadManager.get_filename(\n", 736 | " dimension_url))\n", 737 | "\n", 738 | "with xr.open_dataset(file_path) as ds_dim:\n", 739 | " df_dim = ds_dim.to_dataframe()\n", 740 | "\n", 741 | "lat_array = ds_dim['lat'].data.tolist()\n", 742 | "lon_array = ds_dim['lon'].data.tolist()\n", 743 | "\n", 744 | "# The log output helps evaluating the precision of the received data.\n", 745 | "log.info('Requested lat: ' + str((lat_1, lat_2)))\n", 746 | "log.info('Received lat: ' + str(lat_array))\n", 747 | "log.info('Requested lon: ' + str((lon_1, lon_2)))\n", 748 | "log.info('Received lon: ' + str(lon_array))" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | "---\n", 756 | "# Setting up the DataFrame" 757 | ] 758 | }, 759 | { 760 | "cell_type": "markdown", 761 | "metadata": {}, 762 | "source": [ 763 | "This part sets up a DataFrame and reads the raw data into it. First the wind data and adding the remaining data afterwards." 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": null, 769 | "metadata": { 770 | "ExecuteTime": { 771 | "end_time": "2017-07-05T19:57:11.379096", 772 | "start_time": "2017-07-05T19:57:01.242313" 773 | } 774 | }, 775 | "outputs": [], 776 | "source": [ 777 | "def extract_date(data_set):\n", 778 | " \"\"\"\n", 779 | " Extracts the date from the filename before merging the datasets. \n", 780 | " \"\"\"\n", 781 | " try:\n", 782 | " # The attribute name changed during the development of this script\n", 783 | " # from HDF5_Global.Filename to Filename. \n", 784 | " if 'HDF5_GLOBAL.Filename' in data_set.attrs:\n", 785 | " f_name = data_set.attrs['HDF5_GLOBAL.Filename']\n", 786 | " elif 'Filename' in data_set.attrs:\n", 787 | " f_name = data_set.attrs['Filename']\n", 788 | " else: \n", 789 | " raise AttributeError('The attribute name has changed again!')\n", 790 | " \n", 791 | " # find a match between \".\" and \".nc4\" that does not have \".\" .\n", 792 | " exp = r'(?<=\\.)[^\\.]*(?=\\.nc4)'\n", 793 | " res = re.search(exp, f_name).group(0)\n", 794 | " # Extract the date. \n", 795 | " y, m, d = res[0:4], res[4:6], res[6:8]\n", 796 | " date_str = ('%s-%s-%s' % (y, m, d))\n", 797 | " data_set = data_set.assign(date=date_str)\n", 798 | " return data_set\n", 799 | "\n", 800 | " except KeyError:\n", 801 | " # The last dataset is the one all the other sets will be merged into. \n", 802 | " # Therefore, no date can be extracted.\n", 803 | " return data_set\n", 804 | " \n", 805 | "\n", 806 | "file_path = os.path.join('download_wind', '*.nc4')\n", 807 | "\n", 808 | "try:\n", 809 | " with xr.open_mfdataset(file_path, concat_dim='date',\n", 810 | " preprocess=extract_date) as ds_wind:\n", 811 | " print(ds_wind)\n", 812 | " df_wind = ds_wind.to_dataframe()\n", 813 | " \n", 814 | "except xr.MergeError as e:\n", 815 | " print(e)" 816 | ] 817 | }, 818 | { 819 | "cell_type": "code", 820 | "execution_count": null, 821 | "metadata": { 822 | "ExecuteTime": { 823 | "end_time": "2017-07-05T19:57:11.576594", 824 | "start_time": "2017-07-05T19:57:11.383085" 825 | } 826 | }, 827 | "outputs": [], 828 | "source": [ 829 | "df_wind.reset_index(inplace=True)" 830 | ] 831 | }, 832 | { 833 | "cell_type": "code", 834 | "execution_count": null, 835 | "metadata": { 836 | "ExecuteTime": { 837 | "end_time": "2017-07-05T19:58:54.443971", 838 | "start_time": "2017-07-05T19:57:14.121877" 839 | } 840 | }, 841 | "outputs": [], 842 | "source": [ 843 | "start_date = datetime.strptime(download_start_date, '%Y-%m-%d')\n", 844 | "\n", 845 | "def calculate_datetime(d_frame):\n", 846 | " \"\"\"\n", 847 | " Calculates the accumulated hour based on the date.\n", 848 | " \"\"\"\n", 849 | " cur_date = datetime.strptime(d_frame['date'], '%Y-%m-%d')\n", 850 | " hour = int(d_frame['time'])\n", 851 | " delta = abs(cur_date - start_date).days\n", 852 | " date_time_value = (delta * 24) + (hour)\n", 853 | " return date_time_value\n", 854 | "\n", 855 | "\n", 856 | "df_wind['date_time_hours'] = df_wind.apply(calculate_datetime, axis=1)\n", 857 | "df_wind" 858 | ] 859 | }, 860 | { 861 | "cell_type": "markdown", 862 | "metadata": {}, 863 | "source": [ 864 | "## Converting the timeformat to ISO 8601" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": null, 870 | "metadata": { 871 | "ExecuteTime": { 872 | "end_time": "2017-07-05T20:02:23.077220", 873 | "start_time": "2017-07-05T19:58:54.445977" 874 | } 875 | }, 876 | "outputs": [], 877 | "source": [ 878 | "def converting_timeformat_to_ISO8601(row):\n", 879 | " \"\"\"Generates datetime according to ISO 8601 (UTC)\"\"\"\n", 880 | " date = dateutil.parser.parse(row['date'])\n", 881 | " hour = int(row['time'])\n", 882 | " # timedelta from the datetime module enables the programmer \n", 883 | " # to add time to a date. \n", 884 | " date_time = date + timedelta(hours = hour)\n", 885 | " return str(date_time.isoformat()) + 'Z' # MERRA2 datasets have UTC time zone.\n", 886 | "df_wind['date_utc'] = df_wind.apply(converting_timeformat_to_ISO8601, axis=1)\n", 887 | "\n", 888 | "df_wind['date_utc']\n", 889 | "\n", 890 | "# execution time approx. 3+ min" 891 | ] 892 | }, 893 | { 894 | "cell_type": "markdown", 895 | "metadata": {}, 896 | "source": [ 897 | "## Converting wind vectors to wind speed" 898 | ] 899 | }, 900 | { 901 | "cell_type": "markdown", 902 | "metadata": {}, 903 | "source": [ 904 | "This part uses the given wind vectors in the MERRA-2 original data to calculate a wind speed (vector addition)." 905 | ] 906 | }, 907 | { 908 | "cell_type": "code", 909 | "execution_count": null, 910 | "metadata": { 911 | "ExecuteTime": { 912 | "end_time": "2017-07-05T20:06:12.987618", 913 | "start_time": "2017-07-05T20:02:23.079229" 914 | } 915 | }, 916 | "outputs": [], 917 | "source": [ 918 | "def calculate_windspeed(d_frame, idx_u, idx_v):\n", 919 | " \"\"\"\n", 920 | " Calculates the windspeed. The returned unit is m/s\n", 921 | " \"\"\"\n", 922 | " um = float(d_frame[idx_u])\n", 923 | " vm = float(d_frame[idx_v])\n", 924 | " speed = math.sqrt((um ** 2) + (vm ** 2))\n", 925 | " return round(speed, 2)\n", 926 | "\n", 927 | "# partial is used to create a function with pre-set arguments. \n", 928 | "calc_windspeed_2m = partial(calculate_windspeed, idx_u='U2M', idx_v='V2M')\n", 929 | "calc_windspeed_10m = partial(calculate_windspeed, idx_u='U10M', idx_v='V10M')\n", 930 | "calc_windspeed_50m = partial(calculate_windspeed, idx_u='U50M', idx_v='V50M')\n", 931 | "\n", 932 | "df_wind['v_2m'] = df_wind.apply(calc_windspeed_2m, axis=1)\n", 933 | "df_wind['v_10m']= df_wind.apply(calc_windspeed_10m, axis=1)\n", 934 | "df_wind['v_50m'] = df_wind.apply(calc_windspeed_50m, axis=1)\n", 935 | "df_wind\n", 936 | "\n", 937 | "# execution time approx. 3 min" 938 | ] 939 | }, 940 | { 941 | "cell_type": "markdown", 942 | "metadata": {}, 943 | "source": [ 944 | "## Setting up data Frame for roughness, radiation, temperature and air parameters" 945 | ] 946 | }, 947 | { 948 | "cell_type": "code", 949 | "execution_count": null, 950 | "metadata": { 951 | "ExecuteTime": { 952 | "end_time": "2017-07-05T20:07:49.977393", 953 | "start_time": "2017-07-05T20:07:45.887270" 954 | }, 955 | "scrolled": true 956 | }, 957 | "outputs": [], 958 | "source": [ 959 | "file_path = os.path.join('download_roughness', '*.nc4')\n", 960 | "with xr.open_mfdataset(file_path, concat_dim='date', \n", 961 | " preprocess=extract_date) as ds_rough:\n", 962 | " df_rough = ds_rough.to_dataframe()\n", 963 | "\n", 964 | "df_rough.reset_index(inplace=True)" 965 | ] 966 | }, 967 | { 968 | "cell_type": "code", 969 | "execution_count": null, 970 | "metadata": { 971 | "ExecuteTime": { 972 | "end_time": "2017-07-05T20:07:55.300506", 973 | "start_time": "2017-07-05T20:07:49.978395" 974 | } 975 | }, 976 | "outputs": [], 977 | "source": [ 978 | "file_path = os.path.join('download_radiation', '*.nc4')\n", 979 | "try:\n", 980 | " with xr.open_mfdataset(file_path, concat_dim='date',\n", 981 | " preprocess=extract_date) as ds_rad:\n", 982 | " print(ds_rad)\n", 983 | " df_rad = ds_rad.to_dataframe()\n", 984 | "\n", 985 | "except xr.MergeError as e:\n", 986 | " print(e)\n", 987 | "df_rad.reset_index(inplace=True)" 988 | ] 989 | }, 990 | { 991 | "cell_type": "code", 992 | "execution_count": null, 993 | "metadata": { 994 | "ExecuteTime": { 995 | "end_time": "2017-07-05T20:07:59.487112", 996 | "start_time": "2017-07-05T20:07:55.301510" 997 | } 998 | }, 999 | "outputs": [], 1000 | "source": [ 1001 | "file_path = os.path.join('download_temperature', '*.nc4')\n", 1002 | "try:\n", 1003 | " with xr.open_mfdataset(file_path, concat_dim='date',\n", 1004 | " preprocess=extract_date) as ds_temp:\n", 1005 | " print(ds_temp)\n", 1006 | " df_temp = ds_temp.to_dataframe()\n", 1007 | "\n", 1008 | "except xr.MergeError as e:\n", 1009 | " print(e)\n", 1010 | "df_temp.reset_index(inplace=True)" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": null, 1016 | "metadata": { 1017 | "ExecuteTime": { 1018 | "end_time": "2017-07-05T20:08:03.661984", 1019 | "start_time": "2017-07-05T20:07:59.488116" 1020 | } 1021 | }, 1022 | "outputs": [], 1023 | "source": [ 1024 | "file_path = os.path.join('download_density', '*.nc4')\n", 1025 | "try:\n", 1026 | " with xr.open_mfdataset(file_path, concat_dim='date',\n", 1027 | " preprocess=extract_date) as ds_dens:\n", 1028 | " print(ds_dens)\n", 1029 | " df_dens = ds_dens.to_dataframe()\n", 1030 | "\n", 1031 | "except xr.MergeError as e:\n", 1032 | " print(e)\n", 1033 | "df_dens.reset_index(inplace=True)" 1034 | ] 1035 | }, 1036 | { 1037 | "cell_type": "code", 1038 | "execution_count": null, 1039 | "metadata": { 1040 | "ExecuteTime": { 1041 | "end_time": "2017-07-05T20:08:08.196582", 1042 | "start_time": "2017-07-05T20:08:03.662961" 1043 | } 1044 | }, 1045 | "outputs": [], 1046 | "source": [ 1047 | "file_path = os.path.join('download_pressure', '*.nc4')\n", 1048 | "try:\n", 1049 | " with xr.open_mfdataset(file_path, concat_dim='date',\n", 1050 | " preprocess=extract_date) as ds_pres:\n", 1051 | " print(ds_pres)\n", 1052 | " df_pres = ds_pres.to_dataframe()\n", 1053 | "\n", 1054 | "except xr.MergeError as e:\n", 1055 | " print(e)\n", 1056 | "df_pres.reset_index(inplace=True)" 1057 | ] 1058 | }, 1059 | { 1060 | "cell_type": "markdown", 1061 | "metadata": { 1062 | "collapsed": true 1063 | }, 1064 | "source": [ 1065 | "## Combining the data Frames" 1066 | ] 1067 | }, 1068 | { 1069 | "cell_type": "code", 1070 | "execution_count": null, 1071 | "metadata": { 1072 | "ExecuteTime": { 1073 | "end_time": "2017-07-05T20:08:14.554442", 1074 | "start_time": "2017-07-05T20:08:08.198587" 1075 | } 1076 | }, 1077 | "outputs": [], 1078 | "source": [ 1079 | "df = pd.merge(df_wind, df_rough, on=['date', 'lat', 'lon', 'time'])\n", 1080 | "df = pd.merge(df, df_rad, on=['date', 'lat', 'lon', 'time'])\n", 1081 | "df = pd.merge(df, df_temp, on=['date', 'lat', 'lon', 'time'])\n", 1082 | "df = pd.merge(df, df_dens, on=['date', 'lat', 'lon', 'time'])\n", 1083 | "df = pd.merge(df, df_pres, on=['date', 'lat', 'lon', 'time'])\n", 1084 | "df.info()" 1085 | ] 1086 | }, 1087 | { 1088 | "cell_type": "markdown", 1089 | "metadata": {}, 1090 | "source": [ 1091 | "---\n", 1092 | "# Structure the dataframe, add and remove columns" 1093 | ] 1094 | }, 1095 | { 1096 | "cell_type": "markdown", 1097 | "metadata": {}, 1098 | "source": [ 1099 | "## Calculating the displacement height" 1100 | ] 1101 | }, 1102 | { 1103 | "cell_type": "markdown", 1104 | "metadata": {}, 1105 | "source": [ 1106 | "The so-called \"displacement height\" is the height\n", 1107 | "> _\"[...] at which zero wind speed is achieved as a result of flow obstacles such as trees or buildings. It is generally approximated as 2/3 of the average height of the obstacles. For example, if estimating winds over a forest canopy of height h = 30 m, the zero-plane displacement would be d = 20 m.\"_ ([Source](https://en.wikipedia.org/wiki/Log_wind_profile#Definition))" 1108 | ] 1109 | }, 1110 | { 1111 | "cell_type": "code", 1112 | "execution_count": null, 1113 | "metadata": { 1114 | "ExecuteTime": { 1115 | "end_time": "2017-07-05T20:23:10.533218", 1116 | "start_time": "2017-07-05T20:21:45.412045" 1117 | } 1118 | }, 1119 | "outputs": [], 1120 | "source": [ 1121 | "# Calculate height for h1 (displacement height +2m) and h2 (displacement height\n", 1122 | "# +10m).\n", 1123 | "df['h1'] = df.apply((lambda x:int(x['DISPH']) + 2), axis=1)\n", 1124 | "df['h2'] = df.apply((lambda x:int(x['DISPH']) + 10), axis=1)" 1125 | ] 1126 | }, 1127 | { 1128 | "cell_type": "markdown", 1129 | "metadata": {}, 1130 | "source": [ 1131 | "## Adding needed and removing not needed columns" 1132 | ] 1133 | }, 1134 | { 1135 | "cell_type": "code", 1136 | "execution_count": null, 1137 | "metadata": { 1138 | "ExecuteTime": { 1139 | "end_time": "2017-07-05T20:23:13.000898", 1140 | "start_time": "2017-07-05T20:23:10.534193" 1141 | } 1142 | }, 1143 | "outputs": [], 1144 | "source": [ 1145 | "df.drop('DISPH', axis=1, inplace=True)\n", 1146 | "df.drop(['time', 'date'], axis=1, inplace=True)\n", 1147 | "df.drop(['U2M', 'U10M', 'U50M', 'V2M', 'V10M', 'V50M'], axis=1, inplace=True)\n", 1148 | "\n", 1149 | "df['lat'] = df['lat'].apply(lambda x: lat_array[int(x)])\n", 1150 | "df['lon'] = df['lon'].apply(lambda x: lon_array[int(x)])" 1151 | ] 1152 | }, 1153 | { 1154 | "cell_type": "markdown", 1155 | "metadata": {}, 1156 | "source": [ 1157 | "## Renaming and sorting columns" 1158 | ] 1159 | }, 1160 | { 1161 | "cell_type": "code", 1162 | "execution_count": null, 1163 | "metadata": { 1164 | "ExecuteTime": { 1165 | "end_time": "2017-07-05T20:23:13.422342", 1166 | "start_time": "2017-07-05T20:23:13.001904" 1167 | } 1168 | }, 1169 | "outputs": [], 1170 | "source": [ 1171 | "rename_map = {'date_time_hours': 'cumulated hours', \n", 1172 | " 'date_utc': 'timestamp',\n", 1173 | " 'v_2m': 'v1', \n", 1174 | " 'v_10m': 'v2', \n", 1175 | " 'Z0M': 'z0',\n", 1176 | " 'T2M': 'T',\n", 1177 | " 'RHOA': 'rho',\n", 1178 | " 'PS': 'p'\n", 1179 | " }\n", 1180 | "\n", 1181 | "df.rename(columns=rename_map, inplace=True)" 1182 | ] 1183 | }, 1184 | { 1185 | "cell_type": "code", 1186 | "execution_count": null, 1187 | "metadata": { 1188 | "ExecuteTime": { 1189 | "end_time": "2017-07-05T20:23:13.568401", 1190 | "start_time": "2017-07-05T20:23:13.425351" 1191 | } 1192 | }, 1193 | "outputs": [], 1194 | "source": [ 1195 | "# Change order of the columns\n", 1196 | "columns = ['timestamp', 'cumulated hours', 'lat', 'lon',\n", 1197 | " 'v1', 'v2', 'v_50m',\n", 1198 | " 'h1', 'h2', 'z0', 'SWTDN', 'SWGDN', 'T', 'rho', 'p']\n", 1199 | "df = df[columns]" 1200 | ] 1201 | }, 1202 | { 1203 | "cell_type": "markdown", 1204 | "metadata": {}, 1205 | "source": [ 1206 | "## First look at the final data Frame" 1207 | ] 1208 | }, 1209 | { 1210 | "cell_type": "code", 1211 | "execution_count": null, 1212 | "metadata": { 1213 | "ExecuteTime": { 1214 | "end_time": "2017-07-05T20:23:13.592465", 1215 | "start_time": "2017-07-05T20:23:13.571409" 1216 | } 1217 | }, 1218 | "outputs": [], 1219 | "source": [ 1220 | "df.info()" 1221 | ] 1222 | }, 1223 | { 1224 | "cell_type": "code", 1225 | "execution_count": null, 1226 | "metadata": { 1227 | "ExecuteTime": { 1228 | "end_time": "2017-07-05T20:23:13.742866", 1229 | "start_time": "2017-07-05T20:23:13.594471" 1230 | } 1231 | }, 1232 | "outputs": [], 1233 | "source": [ 1234 | "df" 1235 | ] 1236 | }, 1237 | { 1238 | "cell_type": "markdown", 1239 | "metadata": {}, 1240 | "source": [ 1241 | "## Save as CSV" 1242 | ] 1243 | }, 1244 | { 1245 | "cell_type": "code", 1246 | "execution_count": null, 1247 | "metadata": { 1248 | "ExecuteTime": { 1249 | "end_time": "2017-07-05T20:26:04.782036", 1250 | "start_time": "2017-07-05T20:25:19.746858" 1251 | } 1252 | }, 1253 | "outputs": [], 1254 | "source": [ 1255 | "df.to_csv('weather_data_GER_2016.csv', index=False)" 1256 | ] 1257 | } 1258 | ], 1259 | "metadata": { 1260 | "anaconda-cloud": {}, 1261 | "kernelspec": { 1262 | "display_name": "Python 3", 1263 | "language": "python", 1264 | "name": "python3" 1265 | }, 1266 | "language_info": { 1267 | "codemirror_mode": { 1268 | "name": "ipython", 1269 | "version": 3 1270 | }, 1271 | "file_extension": ".py", 1272 | "mimetype": "text/x-python", 1273 | "name": "python", 1274 | "nbconvert_exporter": "python", 1275 | "pygments_lexer": "ipython3", 1276 | "version": "3.6.4" 1277 | }, 1278 | "nav_menu": {}, 1279 | "toc": { 1280 | "navigate_menu": true, 1281 | "number_sections": true, 1282 | "sideBar": true, 1283 | "threshold": 6, 1284 | "toc_cell": true, 1285 | "toc_section_display": "block", 1286 | "toc_window_display": false 1287 | } 1288 | }, 1289 | "nbformat": 4, 1290 | "nbformat_minor": 1 1291 | } 1292 | -------------------------------------------------------------------------------- /generate_metadata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import yaml 4 | import hashlib 5 | 6 | 7 | country_map = { 8 | 'BE': 'Belgium', 9 | 'BG': 'Bulgaria', 10 | 'CZ': 'Czech Republic', 11 | 'DK': 'Denmark', 12 | 'DE': 'Germany', 13 | 'EE': 'Estonia', 14 | 'IE': 'Ireland', 15 | 'GR': 'Greece', 16 | 'ES': 'Spain', 17 | 'FR': 'France', 18 | 'IT': 'Italy', 19 | 'CY': 'Cyprus', 20 | 'LV': 'Latvia', 21 | 'LT': 'Lithuania', 22 | 'LU': 'Luxembourg', 23 | 'HU': 'Hungary', 24 | 'MT': 'Malta', 25 | 'NL': 'Netherlands', 26 | 'AT': 'Austria', 27 | 'PL': 'Poland', 28 | 'PT': 'Portugal', 29 | 'RO': 'Romania', 30 | 'SI': 'Slovenia', 31 | 'SK': 'Slovakia', 32 | 'FI': 'Finland', 33 | 'SE': 'Sweden', 34 | 'GB': 'Great Britain', 35 | 'CH': 'Switzerland', 36 | 'GR': 'Greece', 37 | 'NO': 'Norway', 38 | 'ME': 'Montenegro', 39 | 'MD': 'Moldova', 40 | 'RS': 'Serbia', 41 | 'HR': 'Croatia', 42 | 'AL': 'Albania', 43 | 'MK': 'Macedonia', 44 | 'BA': 'Bosnia and Herzegovina', 45 | } 46 | 47 | 48 | metadata_head = ''' 49 | profile: tabular-data-package 50 | name: opsd_weather_data 51 | title: Weather Data 52 | id: 'https://doi.org/10.25832/weather_data/{version}' 53 | description: Hourly geographically aggregated weather data for Europe 54 | longDescription: "This data package contains weather data relevant for power system modeling, at hourly resolution, for Europe, aggregated by Renewables.ninja from the NASA MERRA-2 reanalysis. It covers the European countries using a population-weighted mean across all MERRA-2 grid cells within the given country. It also covers Germany's NUTS-2 zones." 55 | homepage: 'https://data.open-power-system-data.org/weather_data/{version}' 56 | documentation: 'https://github.com/Open-Power-System-Data/weather_data/blob/{version}/main.ipynb' 57 | version: '{version}' 58 | created: '{version}' 59 | lastChanges: '{changes}' 60 | # license: 61 | # name: 62 | # path: 63 | # title: 64 | keywords: 65 | - Open Power System Data 66 | - time series 67 | - power systems 68 | - weather 69 | - MERRA-2 70 | - Renewables.ninja 71 | geographicalScope: Europe 72 | temporalScope: 73 | start: "1980-01-01" 74 | end: "2016-12-31" 75 | contributors: 76 | - web: https://www.pfenninger.org/ 77 | name: Stefan Pfenninger 78 | email: stefan.pfenninger@usys.ethz.ch 79 | organization: ETH Zürich 80 | role: author 81 | - web: https://www.imperial.ac.uk/people/i.staffell 82 | name: Iain Staffell 83 | email: i.staffell@imperial.ac.uk 84 | organization: Imperial College London 85 | role: author 86 | sources: 87 | - name: NASA 88 | web: https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/ 89 | - name: Renewables.ninja 90 | web: https://www.renewables.ninja/#/country 91 | resources: 92 | ''' 93 | 94 | metadata_resource_singleindex_csv = ''' 95 | profile: tabular-data-resource 96 | name: opsd_weather_data 97 | title: Weather Data 98 | description: Geographically aggregated weather data 99 | path: weather_data.csv 100 | format: csv 101 | mediatype: text/csv 102 | encoding: UTF8 103 | bytes: {bytes} 104 | hash: {hash} 105 | dialect: 106 | csvddfVersion: 1.0 107 | delimiter: "," 108 | lineTerminator: "\\n" 109 | header: true 110 | _alternativeFormats: 111 | - path: weather_data.csv 112 | stacking: Singleindex 113 | format: csv 114 | - path: weather_data_multiindex.csv 115 | stacking: Multiindex 116 | format: csv 117 | # - path: weather_data.xlsx 118 | # stacking: Multiindex 119 | # format: xlsx 120 | schema: 121 | primaryKey: utc_timestamp 122 | missingValues: "" 123 | fields: 124 | - name: utc_timestamp 125 | description: Start of time period in Coordinated Universal Time 126 | type: datetime 127 | format: "fmt:%Y-%m-%dT%H%M%SZ" 128 | opsdContentfilter: true 129 | ''' 130 | 131 | metadata_resource_xlsx = ''' 132 | name: opsd_weather_data 133 | title: Weather Data (Excel file) 134 | description: Geographically aggregated weather data (Excel file) 135 | path: weather_data.xlsx 136 | format: xlsx 137 | mediatype: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet 138 | bytes: {bytes} 139 | hash: {hash} 140 | ''' 141 | 142 | 143 | def get_field(column): 144 | """``column`` is a tuple of the form (geography, variable)""" 145 | # HACK: units hardcoded here 146 | units = { 147 | 'temperature': 'degrees C', 148 | 'windspeed_10m': 'm/s', 149 | 'radiation_direct_horizontal': 'W/m2', 150 | 'radiation_diffuse_horizontal': 'W/m2', 151 | } 152 | 153 | geography, variable = column 154 | 155 | country = geography[0:2] 156 | 157 | if len(geography) == 2: 158 | resolution = 'Country' 159 | else: 160 | resolution = 'NUTS-2' 161 | 162 | unit = units[variable] 163 | 164 | field_template = ''' 165 | name: {geography}_{variable} 166 | description: {variable} weather variable for {geography} in {unit} 167 | type: number (float) 168 | opsdProperties: 169 | Variable: {variable} 170 | Country: {country} 171 | Resolution: {resolution} 172 | '''.format( 173 | geography=geography, 174 | variable=variable, 175 | country=country, 176 | resolution=resolution, 177 | unit=unit 178 | ) 179 | 180 | return yaml.load(field_template) 181 | 182 | 183 | def get_resource_data(template, file_path): 184 | filesize_bytes = os.path.getsize(file_path) 185 | with open(file_path, 'rb') as f: 186 | file_md5_hash = hashlib.md5(f.read()).hexdigest() 187 | 188 | return yaml.load( 189 | template.format(bytes=filesize_bytes, hash=file_md5_hash) 190 | ) 191 | 192 | 193 | def generate_json(df, version, changes): 194 | ''' 195 | Creates a datapackage.json file that complies with the Frictionless 196 | data JSON Table Schema from the information in the column MultiIndex. 197 | 198 | Parameters 199 | ---------- 200 | df: pandas.DataFrame 201 | version: str 202 | Version tag of the Data Package 203 | changes : str 204 | Desription of the changes from the last version to this one. 205 | 206 | Returns 207 | ------- 208 | None 209 | 210 | ''' 211 | md_head = yaml.load( 212 | metadata_head.format(version=version, changes=changes) 213 | ) 214 | 215 | md_resource_singleindex_csv = get_resource_data( 216 | metadata_resource_singleindex_csv, 217 | os.path.join(version, 'weather_data.csv') 218 | ) 219 | 220 | # md_resource_xlsx = get_resource_data( 221 | # metadata_resource_xlsx, 222 | # os.path.join(version, 'weather_data.xlsx') 223 | # ) 224 | 225 | fields = [get_field(col) for col in df.columns] 226 | 227 | metadata = md_head 228 | metadata['resources'] = [md_resource_singleindex_csv] # , md_resource_xlsx] 229 | metadata['resources'][0]['schema']['fields'] += fields 230 | 231 | out_path = os.path.join(version, 'datapackage.json') 232 | os.makedirs(version, exist_ok=True) 233 | 234 | datapackage_json = json.dumps(metadata, indent=4, separators=(',', ': ')) 235 | with open(out_path, 'w') as f: 236 | f.write(datapackage_json) 237 | -------------------------------------------------------------------------------- /main.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "

\n", 8 | " Weather Data: Main Notebook\n", 9 | "

Main notebook
Download weather data from Renewables.ninja
Process weather data from Renewables.ninja
Example script to download arbitrary MERRA-2 data

\n", 15 | "
This Notebook is part of the Weather Data Package of Open Power System Data.\n", 16 | "

" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "toc": true 23 | }, 24 | "source": [ 25 | "

\n", 26 | "

" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# 1. About Open Power System Data" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "This notebook is part of the project [Open Power System Data](http://open-power-system-data.org). Open Power System Data develops a platform for free and open data for electricity system modeling. We collect, check, process, document, and provide data that are publicly available but currently inconvenient to use. \n", 41 | "More info on Open Power System Data:\n", 42 | "- [Information on the project on our website](http://open-power-system-data.org)\n", 43 | "- [Data and metadata on our data platform](http://data.open-power-system-data.org)\n", 44 | "- [Data processing scripts on our GitHub page](https://github.com/Open-Power-System-Data)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "# 2. About Jupyter Notebooks and GitHub" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "This file is a [Jupyter Notebook](http://jupyter.org/). A Jupyter Notebook is a file that combines executable programming code with visualizations and comments in markdown format, allowing for an intuitive documentation of the code. We use Jupyter Notebooks for combined coding and documentation. We use Python 3 as programming language. All Notebooks are stored on [GitHub](https://github.com/), a platform for software development, and are publicly available. More information on our IT-concept can be found [here](http://open-power-system-data.org/it). See also our [step-by-step manual](http://open-power-system-data.org/step-by-step) how to use the dataplatform." 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "# 3. About this datapackage" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "We provide data in different chunks, or [data packages](http://frictionlessdata.io/data-packages/).\n", 73 | "\n", 74 | "The one you are looking at right now, [Weather Data](http://data.open-power-system-data.org/weather_data/), contains geographically aggregated weather variables relevant for power system modelling. The main focus of this data package is Germany and neighboring European countries." 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "# 4. Data sources" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "The data source for the pre-computed country-aggregated weather datasets is [Renewables.ninja](https://www.renewables.ninja), which in turn is based on weather data from the [NASA MERRA-2 reanalysis](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/)." 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "# 5. License" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "This notebook as well as all other documents in this repository is published under the [MIT License](LICENSE.md)." 103 | ] 104 | } 105 | ], 106 | "metadata": { 107 | "anaconda-cloud": {}, 108 | "kernelspec": { 109 | "display_name": "Python 3", 110 | "language": "python", 111 | "name": "python3" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.6.6" 124 | }, 125 | "toc": { 126 | "nav_menu": { 127 | "height": "120px", 128 | "width": "252px" 129 | }, 130 | "number_sections": false, 131 | "sideBar": true, 132 | "skip_h1_title": false, 133 | "toc_cell": true, 134 | "toc_position": {}, 135 | "toc_section_display": "block", 136 | "toc_window_display": true 137 | }, 138 | "varInspector": { 139 | "cols": { 140 | "lenName": 16, 141 | "lenType": 16, 142 | "lenVar": 40 143 | }, 144 | "kernels_config": { 145 | "python": { 146 | "delete_cmd_postfix": "", 147 | "delete_cmd_prefix": "del ", 148 | "library": "var_list.py", 149 | "varRefreshCmd": "print(var_dic_list())" 150 | }, 151 | "r": { 152 | "delete_cmd_postfix": ") ", 153 | "delete_cmd_prefix": "rm(", 154 | "library": "var_list.r", 155 | "varRefreshCmd": "cat(var_dic_list()) " 156 | } 157 | }, 158 | "types_to_exclude": [ 159 | "module", 160 | "function", 161 | "builtin_function_or_method", 162 | "instance", 163 | "_Feature" 164 | ], 165 | "window_display": false 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 1 170 | } 171 | -------------------------------------------------------------------------------- /opendap_download/.gitignore: -------------------------------------------------------------------------------- 1 | *.yaml 2 | -------------------------------------------------------------------------------- /opendap_download/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Jan' 2 | -------------------------------------------------------------------------------- /opendap_download/authentication.yaml: -------------------------------------------------------------------------------- 1 | username: "testUser" 2 | password: "testPassword" 3 | -------------------------------------------------------------------------------- /opendap_download/multi_processing_download.py: -------------------------------------------------------------------------------- 1 | __author__ = "Jan Urbansky" 2 | 3 | # TODO: Change and describe structure of the links that have to be provided. 4 | # TODO: Proper readme with examples. 5 | 6 | from multiprocessing.dummy import Pool as Threadpool 7 | import requests 8 | import logging 9 | import yaml 10 | import os 11 | import urllib.response 12 | from http import cookiejar 13 | import urllib.error 14 | import urllib.request 15 | import re 16 | 17 | log = logging.getLogger('opendap_download') 18 | 19 | 20 | 21 | 22 | class DownloadManager(object): 23 | __AUTHENTICATION_URL = 'https://urs.earthdata.nasa.gov/oauth/authorize' 24 | __username = '' 25 | __password = '' 26 | __download_urls = [] 27 | __download_path = '' 28 | _authenticated_session = None 29 | 30 | def __init__(self, username='', password='', links=None, download_path='download'): 31 | self.set_username_and_password(username, password) 32 | self.download_urls = links 33 | self.download_path = download_path 34 | 35 | if logging.getLogger().getEffectiveLevel() == logging.INFO: 36 | logging.getLogger("requests").setLevel(logging.CRITICAL) 37 | logging.getLogger("urllib3").setLevel(logging.CRITICAL) 38 | 39 | log.debug('Init DownloadManager') 40 | 41 | @property 42 | def download_urls(self): 43 | return self.__download_urls 44 | 45 | @download_urls.setter 46 | def download_urls(self, links): 47 | """ 48 | Setter for the links to download. The links have to be an array containing the URLs. The module will 49 | figure out the filename from the url and save it to the folder provided with download_path() 50 | :param links: The links to download 51 | :type links: List[str] 52 | """ 53 | # TODO: Check if links have the right structure? Read filename from links? 54 | # Check if all links are formed properly 55 | if links is None: 56 | self.__download_urls = [] 57 | else: 58 | for item in links: 59 | try: 60 | self.get_filename(item) 61 | except AttributeError: 62 | raise ValueError('The URL seems to not have the right structure: ', item) 63 | self.__download_urls = links 64 | 65 | @property 66 | def download_path(self): 67 | return self.__download_path 68 | 69 | @download_path.setter 70 | def download_path(self, file_path): 71 | self.__download_path = file_path 72 | 73 | def set_username_and_password(self, username, password): 74 | self.__username = username 75 | self.__password = password 76 | 77 | def read_credentials_from_yaml(self, file_path_to_yaml): 78 | with open(file_path_to_yaml, 'r') as f: 79 | credentials = yaml.load(f) 80 | log.debug('Credentials: ' + str(credentials)) 81 | self.set_username_and_password(credentials['username'], credentials['password']) 82 | 83 | def _mp_download_wrapper(self, url_item): 84 | """ 85 | Wrapper for parallel download. The function name cannot start with __ due to visibility issues. 86 | :param url_item: 87 | :type url_item: 88 | :return: 89 | :rtype: 90 | """ 91 | query = url_item 92 | file_path = os.path.join(self.download_path, self.get_filename(query)) 93 | self.__download_and_save_file(query, file_path) 94 | 95 | def start_download(self, nr_of_threads=4): 96 | if self._authenticated_session is None: 97 | self._authenticated_session = self.__create_authenticated_sesseion() 98 | # Create the download folder. 99 | os.makedirs(self.download_path, exist_ok=True) 100 | # p = multiprocessing.Pool(nr_of_processes) 101 | p = Threadpool(nr_of_threads) 102 | p.map(self._mp_download_wrapper, self.download_urls) 103 | p.close() 104 | p.join() 105 | 106 | @staticmethod 107 | def get_filename(url): 108 | """ 109 | Extracts the filename from the url. This method can also be used to check 110 | if the links have the correct structure 111 | :param url: The MERRA URL 112 | :type url: str 113 | :return: The filename 114 | :rtype: str 115 | """ 116 | # Extract everything between a leading / and .nc4? . The problem with using this without any 117 | # other classification is, that the URLs have multiple / in their structure. The expressions [^/]* matches 118 | # everything but /. Combined with the outer expressions, this only matches the part between the last / and .nc4? 119 | reg_exp = r'(?<=/)[^/]*(?=.nc4?)' 120 | file_name = re.search(reg_exp, url).group(0) 121 | return file_name 122 | 123 | def __download_and_save_file(self, url, file_path): 124 | r = self._authenticated_session.get(url, stream=True) 125 | with open(file_path, 'wb') as f: 126 | for chunk in r.iter_content(chunk_size=1024): 127 | if chunk: 128 | f.write(chunk) 129 | return r.status_code 130 | 131 | def __create_authenticated_sesseion(self): 132 | s = requests.Session() 133 | s.headers = { 134 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36'} 135 | s.auth = (self.__username, self.__password) 136 | s.cookies = self.__authorize_cookies_with_urllib() 137 | 138 | if logging.getLogger().getEffectiveLevel() == logging.DEBUG: 139 | r = s.get(self.download_urls[0]) 140 | log.debug('Authentication Status') 141 | log.debug(r.status_code) 142 | log.debug(r.headers) 143 | log.debug(r.cookies) 144 | 145 | log.debug('Sessions Data') 146 | log.debug(s.cookies) 147 | log.debug(s.headers) 148 | return s 149 | 150 | def __authorize_cookies_with_urllib(self): 151 | username = self.__username 152 | password = self.__password 153 | top_level_url = "https://urs.earthdata.nasa.gov" 154 | 155 | # create an authorization handler 156 | p = urllib.request.HTTPPasswordMgrWithDefaultRealm() 157 | p.add_password(None, top_level_url, username, password); 158 | 159 | auth_handler = urllib.request.HTTPBasicAuthHandler(p) 160 | auth_cookie_jar = cookiejar.CookieJar() 161 | cookie_jar = urllib.request.HTTPCookieProcessor(auth_cookie_jar) 162 | opener = urllib.request.build_opener(auth_handler, cookie_jar) 163 | 164 | urllib.request.install_opener(opener) 165 | 166 | try: 167 | # The merra portal moved the authentication to the download level. Before this change you had to 168 | # provide username and password on the overview page. For example: 169 | # goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/ 170 | # authentication_url = 'https://goldsmr4.sci.gsfc.nasa.gov/opendap/MERRA2/M2T1NXSLV.5.12.4/1980/01/MERRA2_100.tavg1_2d_slv_Nx.19800101.nc4.ascii?U2M[0:1:1][0:1:1][0:1:1]' 171 | # Changes: 172 | # Authenticate with the first url in the links. 173 | # Request the website and initialiaze the BasicAuth. This will populate the auth_cookie_jar 174 | authentication_url = self.download_urls[0] 175 | result = opener.open(authentication_url) 176 | log.debug(list(auth_cookie_jar)) 177 | log.debug(list(auth_cookie_jar)[0]) 178 | log.debug(list(auth_cookie_jar)[1]) 179 | 180 | except urllib.error.HTTPError: 181 | raise ValueError('Username and or Password are not correct!') 182 | except IOError as e: 183 | log.warning(e) 184 | raise IOError 185 | except IndexError as e: 186 | log.warning(e) 187 | raise IndexError('download_urls is not set') 188 | 189 | return auth_cookie_jar 190 | 191 | 192 | if __name__ == '__main__': 193 | link = [ 194 | 'http://goldsmr4.sci.gsfc.nasa.gov:80/opendap/MERRA2/M2T1NXSLV.5.12.4/2014/01/MERRA2_400.tavg1_2d_slv_Nx.20140101.nc4.nc4?U2M[0:1:5][358:1:360][573:1:575],U10M[0:1:5][358:1:360][573:1:575],U50M[0:1:5][358:1:360][573:1:575],V2M[0:1:5][358:1:360][573:1:575],V10M[0:1:5][358:1:360][573:1:575],V50M[0:1:5][358:1:360][573:1:575]'] 195 | 196 | logging.basicConfig(level=logging.DEBUG, handlers=[logging.StreamHandler()]) 197 | dl = DownloadManager() 198 | dl.download_path = 'downlaod123' 199 | dl.read_credentials_from_yaml((os.path.join(os.path.dirname(os.path.realpath(__file__)), 'authentication.yaml'))) 200 | dl.download_urls = link 201 | dl.start_download() 202 | 203 | -------------------------------------------------------------------------------- /processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "nbpresent": { 7 | "id": "fdd7071a-0bcf-48c5-9ed7-772ab69e59df" 8 | } 9 | }, 10 | "source": [ 11 | "

\n", 12 | " Weather Data: Renewables.ninja processing notebook\n", 13 | "

Main notebook
Download weather data from Renewables.ninja
Process weather data from Renewables.ninja
Example script to download arbitrary MERRA-2 data

\n", 19 | "
This Notebook is part of the Weather Data Package of Open Power System Data.\n", 20 | "

" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "%load_ext autoreload\n", 30 | "%autoreload 2\n", 31 | "\n", 32 | "import glob\n", 33 | "import os\n", 34 | "import sqlite3\n", 35 | "import hashlib\n", 36 | "import shutil\n", 37 | "\n", 38 | "import pandas as pd\n", 39 | "import geopandas as gp\n", 40 | "import gsee\n", 41 | "import tqdm\n", 42 | "from joblib import Parallel, delayed\n", 43 | "\n", 44 | "import generate_metadata" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "version = '2019-04-09'\n", 54 | "changes = 'All European countries'" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "dir_shapefiles = os.path.join('downloads', 'shapefiles')\n", 64 | "dir_countries = os.path.join('downloads', 'countries')\n", 65 | "dir_nuts = os.path.join('downloads', 'nuts')" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "# Read and process data" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "if os.path.exists('cached_dataframe.csv'):\n", 82 | " df_cached = pd.read_csv('cached_dataframe.csv', index_col=0, header=[0, 1], parse_dates=True)\n", 83 | "else:\n", 84 | " df_cached = df_cached = pd.DataFrame({}) # Empty dataframe, to permit 'x in df_cached' tests" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "scrolled": false 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "dataframes = {}\n", 96 | "parse_kwargs = dict(skiprows=2, index_col=0, parse_dates=True)\n", 97 | "\n", 98 | "# Files of form `ninja_weather_country_AT_merra-2_population_weighted.csv`\n", 99 | "for f in glob.glob(os.path.join(dir_countries, '*.csv')):\n", 100 | "\n", 101 | " country_code = f.split('_')[3]\n", 102 | " \n", 103 | " if country_code not in df_cached:\n", 104 | " df = pd.read_csv(f, **parse_kwargs)\n", 105 | " dataframes[country_code] = df\n", 106 | " \n", 107 | "# Files of form `ninja_weather_irradiance_surface_country_DE_merra-2_nuts-2_population_weighted.csv`\n", 108 | "for f in glob.glob(os.path.join(dir_nuts, '*.csv')):\n", 109 | "\n", 110 | " country_code = f.split('country_')[1][0:2]\n", 111 | " variable = f.split('weather_')[1].split('_country')[0]\n", 112 | "\n", 113 | " df = pd.read_csv(f, **parse_kwargs)\n", 114 | " df = df.rename(columns={country_code + '_TOTAL': country_code})\n", 115 | "\n", 116 | " for c in df.columns:\n", 117 | " \n", 118 | " if c not in df_cached:\n", 119 | " \n", 120 | " if c not in dataframes:\n", 121 | " dataframes[c] = pd.DataFrame({variable: df[c]})\n", 122 | " else:\n", 123 | " dataframes[c].loc[:, variable] = df[c]\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# Estimate direct and diffuse radiation using the BRL model as implented in GSEE.\n", 133 | "# https://github.com/renewables-ninja/gsee\n", 134 | "\n", 135 | "# Warning: this code takes a while to execute (easily 1-2mins CPU time per location).\n", 136 | "\n", 137 | "nuts_centroids = gp.GeoDataFrame.from_file(os.path.join(dir_shapefiles, 'NUTS_LB_2016_4326.shp'))\n", 138 | "nuts_centroids.set_index('NUTS_ID', inplace=True)\n", 139 | "\n", 140 | "# Map GB to UK, GR to EL\n", 141 | "nuts_centroids.loc['GB', : ] = nuts_centroids.loc['UK', :]\n", 142 | "nuts_centroids.loc['GR', : ] = nuts_centroids.loc['EL', :]\n", 143 | "\n", 144 | "data = {k: {\n", 145 | " 'clearness': dataframes[k]['irradiance_surface'] / dataframes[k]['irradiance_toa'],\n", 146 | " 'centroid': list(nuts_centroids.loc[k, 'geometry'].coords)[0][::-1],\n", 147 | " } for k in dataframes.keys()}\n", 148 | "\n", 149 | "\n", 150 | "def process_item(item):\n", 151 | " return gsee.brl_model.run(hourly_clearness=item['clearness'], coords=item['centroid'])\n", 152 | "\n", 153 | "\n", 154 | "result = Parallel(n_jobs=-1)(delayed(process_item)(item) for item in tqdm.tqdm(data.values()))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [ 163 | "for i, k in enumerate(data.keys()):\n", 164 | " df = dataframes[k]\n", 165 | " diffuse_fraction = result[i]\n", 166 | " df['radiation_direct_horizontal'] = ((1 - diffuse_fraction) * df['irradiance_surface']).fillna(0)\n", 167 | " df['radiation_diffuse_horizontal'] = (diffuse_fraction * df['irradiance_surface']).fillna(0)\n", 168 | " dataframes[k] = df" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "variables = ['windspeed_10m', 'temperature', 'radiation_direct_horizontal', 'radiation_diffuse_horizontal']\n", 178 | "\n", 179 | "for k in dataframes.keys():\n", 180 | " dataframes[k] = dataframes[k].loc[:, [v for v in variables if v in dataframes[k].columns]]\n", 181 | "\n", 182 | "if len(dataframes) > 0:\n", 183 | " \n", 184 | " complete_data = pd.concat(dataframes, axis=1, join='inner')\n", 185 | "\n", 186 | " df = pd.concat([complete_data, df_cached], axis=1)\n", 187 | "\n", 188 | " df.columns = pd.MultiIndex.from_tuples(\n", 189 | " [(i[0], i[1]) for i in df.columns],\n", 190 | " names=['geography', 'variable']\n", 191 | " )\n", 192 | "\n", 193 | " df.index.name = 'utc_timestamp'\n", 194 | "\n", 195 | " df.to_csv('cached_dataframe.csv')\n", 196 | " \n", 197 | "else:\n", 198 | " \n", 199 | " df = df_cached" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": { 205 | "nbpresent": { 206 | "id": "fa919796-a7f6-4556-aeed-181ddc6028ac" 207 | } 208 | }, 209 | "source": [ 210 | "# Write data to disk" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "os.makedirs(version, exist_ok=True)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": { 225 | "nbpresent": { 226 | "id": "47c1efa2-d93f-4d13-81d7-8f64dadeff3f" 227 | } 228 | }, 229 | "source": [ 230 | "## Reshape data" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "nbpresent": { 237 | "id": "a753ac43-a0f4-44bc-a89d-1ccaaf48289a" 238 | } 239 | }, 240 | "source": [ 241 | "Data are provided in two different \"shapes\": \n", 242 | "- SingleIndex (easy to read for humans, compatible with datapackage standard, small file size)\n", 243 | " - File formats: CSV, SQLite\n", 244 | "- MultiIndex (easy to read into GAMS, not compatible with datapackage standard, small file size)\n", 245 | " - File formats: CSV, Excel" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": null, 251 | "metadata": { 252 | "nbpresent": { 253 | "id": "769225c6-31f5-4db8-8d91-32a3f983489c" 254 | }, 255 | "scrolled": true 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "df_multiindex = df\n", 260 | "\n", 261 | "df_singleindex = df.copy()\n", 262 | "\n", 263 | "df_singleindex.columns = [\n", 264 | " '_'.join([level for level in list(col)])\n", 265 | " for col in df.columns.values\n", 266 | "]\n", 267 | "\n", 268 | "df_stacked = df.copy()\n", 269 | "df_stacked = df_stacked.transpose().stack(dropna=True).to_frame(name='data')" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "datetime_format = '%Y-%m-%dT%H:%M:%SZ'" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": { 284 | "nbpresent": { 285 | "id": "84f1822e-3aa6-42c4-a424-5dc5ab6fa56f" 286 | } 287 | }, 288 | "source": [ 289 | "## Write to SQLite database" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "nbpresent": { 297 | "id": "fd35212c-ec5c-4fcf-9897-4608742d1bf8" 298 | }, 299 | "scrolled": false 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "# SQLite is required for the filtering function on the OPSD website\n", 304 | "\n", 305 | "df_sqlite = df_singleindex.copy()\n", 306 | "df_sqlite.index = df_sqlite.index.strftime(datetime_format)\n", 307 | "filepath = os.path.join(version, 'weather_data.sqlite')\n", 308 | "\n", 309 | "if os.path.exists(filepath):\n", 310 | " os.unlink(filepath)\n", 311 | "\n", 312 | "df_sqlite.to_sql(\n", 313 | " 'weather_data',\n", 314 | " sqlite3.connect(filepath),\n", 315 | " if_exists='replace',\n", 316 | " index_label='utc_timestamp'\n", 317 | ")" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": { 323 | "nbpresent": { 324 | "id": "454ee5f5-e8f1-4088-94e9-e846f48ee75b" 325 | } 326 | }, 327 | "source": [ 328 | "## Write to CSV" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "nbpresent": { 336 | "id": "89449c49-608d-488d-8bc8-077c64bc26c7" 337 | }, 338 | "scrolled": false 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "csv_kwargs = dict(\n", 343 | " float_format='%.4f',\n", 344 | " date_format=datetime_format\n", 345 | ")\n", 346 | "\n", 347 | "df_singleindex.to_csv(\n", 348 | " os.path.join(version, 'weather_data.csv'),\n", 349 | " **csv_kwargs\n", 350 | ")\n", 351 | "\n", 352 | "df_multiindex.to_csv(\n", 353 | " os.path.join(version, 'weather_data_multiindex.csv'),\n", 354 | " **csv_kwargs\n", 355 | ")" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## Write metadata" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "metadata": {}, 369 | "outputs": [], 370 | "source": [ 371 | "# See generate_metadata.py for details\n", 372 | "generate_metadata.generate_json(df_multiindex, version, changes)" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "## Write checksums.txt" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "We publish SHA checksums for the output files on GitHub to allow verifying their integrity on the OPSD server." 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "def get_sha_hash(path, blocksize=65536):\n", 396 | " sha_hasher = hashlib.sha256()\n", 397 | " with open(path, 'rb') as f:\n", 398 | " buffer = f.read(blocksize)\n", 399 | " while len(buffer) > 0:\n", 400 | " sha_hasher.update(buffer)\n", 401 | " buffer = f.read(blocksize)\n", 402 | " return sha_hasher.hexdigest()\n", 403 | "\n", 404 | "\n", 405 | "checksum_file_path = os.path.join(version, 'checksums.txt')\n", 406 | "files = glob.glob(os.path.join(version, 'weather_data*'))\n", 407 | "\n", 408 | "# Create checksums.txt in the version directory\n", 409 | "with open(checksum_file_path, 'w') as f:\n", 410 | " for this_file in files:\n", 411 | " file_hash = get_sha_hash(this_file)\n", 412 | " f.write('{},{}\\n'.format(os.path.basename(this_file), file_hash))\n", 413 | "\n", 414 | "# Copy the file to root directory from where it will be pushed to GitHub\n", 415 | "shutil.copyfile(checksum_file_path, 'checksums.txt')" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": {}, 422 | "outputs": [], 423 | "source": [] 424 | } 425 | ], 426 | "metadata": { 427 | "anaconda-cloud": {}, 428 | "celltoolbar": "Initialisation Cell", 429 | "kernelspec": { 430 | "display_name": "Python 3", 431 | "language": "python", 432 | "name": "python3" 433 | }, 434 | "language_info": { 435 | "codemirror_mode": { 436 | "name": "ipython", 437 | "version": 3 438 | }, 439 | "file_extension": ".py", 440 | "mimetype": "text/x-python", 441 | "name": "python", 442 | "nbconvert_exporter": "python", 443 | "pygments_lexer": "ipython3", 444 | "version": "3.6.6" 445 | }, 446 | "nbpresent": { 447 | "slides": { 448 | "f6b300bf-88b5-4dea-951e-c926a9ea8287": { 449 | "id": "f6b300bf-88b5-4dea-951e-c926a9ea8287", 450 | "prev": "f96dd4bc-93a6-4014-b85f-a43061cf5688", 451 | "regions": { 452 | "dc486e18-7547-4610-99c0-55dfb5553f62": { 453 | "attrs": { 454 | "height": 1, 455 | "width": 1, 456 | "x": 0, 457 | "y": 0 458 | }, 459 | "content": { 460 | "cell": "c0035fc6-ff1d-44d8-a3fd-b4c08f53be71", 461 | "part": "source" 462 | }, 463 | "id": "dc486e18-7547-4610-99c0-55dfb5553f62" 464 | } 465 | } 466 | }, 467 | "f96dd4bc-93a6-4014-b85f-a43061cf5688": { 468 | "id": "f96dd4bc-93a6-4014-b85f-a43061cf5688", 469 | "prev": null, 470 | "regions": { 471 | "657c3ad3-2fcf-4c8e-a527-de3d0a46fa4e": { 472 | "attrs": { 473 | "height": 1, 474 | "width": 1, 475 | "x": 0, 476 | "y": 0 477 | }, 478 | "content": { 479 | "cell": "1562965a-7d74-4c1c-8251-4d82847f294a", 480 | "part": "source" 481 | }, 482 | "id": "657c3ad3-2fcf-4c8e-a527-de3d0a46fa4e" 483 | } 484 | } 485 | } 486 | }, 487 | "themes": {} 488 | }, 489 | "notify_time": "10", 490 | "toc": { 491 | "colors": { 492 | "hover_highlight": "#DAA520", 493 | "navigate_num": "#000000", 494 | "navigate_text": "#333333", 495 | "running_highlight": "#FF0000", 496 | "selected_highlight": "#FFD700", 497 | "sidebar_border": "#EEEEEE", 498 | "wrapper_background": "#FFFFFF" 499 | }, 500 | "moveMenuLeft": true, 501 | "nav_menu": { 502 | "height": "156px", 503 | "width": "252px" 504 | }, 505 | "navigate_menu": true, 506 | "number_sections": true, 507 | "sideBar": true, 508 | "threshold": 4, 509 | "toc_cell": false, 510 | "toc_section_display": "block", 511 | "toc_window_display": false, 512 | "widenNotebook": false 513 | } 514 | }, 515 | "nbformat": 4, 516 | "nbformat_minor": 1 517 | } 518 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | name: opsd_weather_data 2 | 3 | channels: 4 | - conda-forge 5 | - anaconda 6 | 7 | dependencies: 8 | - python=3.6 9 | - requests 10 | - pandas=0.23.4 11 | - geopandas 12 | - jupyter 13 | - openpyxl=2.5.5 # pandas: excel i/o; v2.5.6 produces error 14 | - ruamel.yaml<=0.15 15 | - xlrd # pandas: excel i/o 16 | - tqdm 17 | - joblib 18 | - pyyaml 19 | - xlsxwriter 20 | - pip: 21 | - gsee==0.2.0 22 | --------------------------------------------------------------------------------