├── .devcontainer ├── devcontainer.json ├── docker-compose.yaml └── requirements.txt ├── .gitignore ├── .hsds ├── config │ ├── override.yml │ └── passwd.txt └── data │ └── hsdstest │ └── home │ ├── .domain.json │ ├── nrel_user │ └── .domain.json │ └── test_user1 │ └── .domain.json ├── LICENSE ├── README.md ├── benchmark ├── read_selection.py └── results.txt ├── bin ├── docs │ ├── cube.graffle │ └── cube.png ├── functions.py ├── nsrdb random sampling.ipynb ├── pv_SAM_config.json └── wind_SAM_config.json ├── datasets ├── NSRDB.md ├── US_Wave.md ├── WINDToolkit.md └── wtk-us.md ├── notebooks ├── 01_WTK_introduction.ipynb ├── 02_WTK_Domains_introduction.ipynb ├── 03_NSRDB_introduction.ipynb ├── 04_WTK-NSRDB_comparison.ipynb ├── 05_NSRDB_multi_year_means.ipynb ├── 06_data_export.ipynb ├── 07_Data_Slicing.ipynb ├── 08_NREL-rex.ipynb ├── 09_NREL-reV.ipynb └── 10_NCDB_introduction.ipynb └── scripts ├── box.py ├── point_statistics.py └── shape.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "NREL HSDS Examples", 3 | "hostRequirements": { 4 | "cpus": 4 5 | }, 6 | "dockerComposeFile": "docker-compose.yaml", 7 | "updateContentCommand": "python3 -m pip install -r .devcontainer/requirements.txt", 8 | "postCreateCommand": "", 9 | "service": "app", 10 | "workspaceFolder": "/workspace", 11 | "forwardPorts": [5101], 12 | "portsAttributes": { 13 | "5101": {"label": "HSDS port", "onAutoForward": "silent"} 14 | }, 15 | "features": { 16 | "ghcr.io/devcontainers/features/docker-outside-of-docker": {} 17 | }, 18 | "customizations": { 19 | "codespaces": { 20 | "openFiles": [] 21 | }, 22 | "vscode": { 23 | "extensions": [ 24 | "ms-python.python", 25 | "ms-toolsai.jupyter" 26 | ] 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /.devcontainer/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | app: 3 | image: "mcr.microsoft.com/devcontainers/universal:2" 4 | environment: 5 | - HS_ENDPOINT=http://localhost:5101 6 | - HS_USERNAME=nrel_user 7 | - HS_PASSWORD=test 8 | volumes: 9 | - ..:/workspace:cached 10 | 11 | # Overrides default command so things don't shut down after the process ends. 12 | command: sleep infinity 13 | 14 | # Runs app on the same network as the SN container, allows "forwardPorts" in devcontainer.json function. 15 | network_mode: service:sn 16 | head: 17 | image: hdfgroup/hsds:master 18 | restart: on-failure 19 | mem_limit: 512m 20 | environment: 21 | - TARGET_SN_COUNT=1 22 | - TARGET_DN_COUNT=4 23 | - NODE_TYPE=head_node 24 | ports: 25 | - 5100 26 | volumes: 27 | - ../.hsds/config/:/config/ 28 | dn1: 29 | image: hdfgroup/hsds:master 30 | restart: on-failure 31 | mem_limit: 1g 32 | environment: 33 | - NODE_TYPE=dn 34 | ports: 35 | - 6101 36 | depends_on: 37 | - head 38 | volumes: 39 | - ../.hsds/config/:/config/ 40 | links: 41 | - head 42 | dn2: 43 | image: hdfgroup/hsds:master 44 | restart: on-failure 45 | mem_limit: 1g 46 | environment: 47 | - NODE_TYPE=dn 48 | ports: 49 | - 6102 50 | depends_on: 51 | - head 52 | volumes: 53 | - ../.hsds/config/:/config/ 54 | links: 55 | - head 56 | dn3: 57 | image: hdfgroup/hsds:master 58 | restart: on-failure 59 | mem_limit: 1g 60 | environment: 61 | - NODE_TYPE=dn 62 | ports: 63 | - 6103 64 | depends_on: 65 | - head 66 | volumes: 67 | - ../.hsds/config/:/config/ 68 | links: 69 | - head 70 | dn4: 71 | image: hdfgroup/hsds:master 72 | restart: on-failure 73 | mem_limit: 1g 74 | environment: 75 | - NODE_TYPE=dn 76 | ports: 77 | - 6104 78 | depends_on: 79 | - head 80 | volumes: 81 | - ../.hsds/config/:/config/ 82 | links: 83 | - head 84 | sn: 85 | image: hdfgroup/hsds:master 86 | restart: on-failure 87 | mem_limit: 1g 88 | environment: 89 | - SN_PORT=5101 90 | - NODE_TYPE=sn 91 | ports: 92 | - 5101:5101 93 | depends_on: 94 | - head 95 | volumes: 96 | - ../.hsds/config/:/config/ 97 | links: 98 | - head 99 | -------------------------------------------------------------------------------- /.devcontainer/requirements.txt: -------------------------------------------------------------------------------- 1 | asttokens==2.4.1 2 | certifi==2024.8.30 3 | charset-normalizer==3.3.2 4 | comm==0.2.2 5 | contourpy==1.3.0 6 | cycler==0.12.1 7 | debugpy==1.8.5 8 | decorator==5.1.1 9 | executing==2.1.0 10 | fonttools==4.53.1 11 | h5py==3.11.0 12 | h5pyd @ git+https://github.com/hdfgroup/h5pyd@41ff1901b310a9bb15e712a9065561e1d4c775c6 13 | idna==3.8 14 | ipykernel==6.29.5 15 | ipython==8.27.0 16 | jedi==0.19.1 17 | jupyter_client==8.6.2 18 | jupyter_core==5.7.2 19 | kiwisolver==1.4.7 20 | matplotlib==3.9.2 21 | matplotlib-inline==0.1.7 22 | nest-asyncio==1.6.0 23 | numpy==1.26.4 24 | packaging==24.1 25 | pandas==2.2.2 26 | parso==0.8.4 27 | pexpect==4.9.0 28 | pillow==10.4.0 29 | platformdirs==4.3.2 30 | prompt_toolkit==3.0.47 31 | psutil==6.0.0 32 | ptyprocess==0.7.0 33 | pure_eval==0.2.3 34 | Pygments==2.18.0 35 | PyJWT==2.9.0 36 | pyparsing==3.1.4 37 | pyproj==3.6.1 38 | python-dateutil==2.9.0.post0 39 | pytz==2024.1 40 | pyzmq==26.2.0 41 | requests==2.32.3 42 | requests-unixsocket==0.3.0 43 | seaborn==0.13.2 44 | six==1.16.0 45 | stack-data==0.6.3 46 | tornado>=6.5.0 47 | traitlets==5.14.3 48 | typing_extensions==4.12.2 49 | tzdata==2024.1 50 | urllib3==2.2.2 51 | wcwidth==0.2.13 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | *.csv 3 | .DS_Store 4 | *.pyc 5 | __pycache__ 6 | 7 | .vscode -------------------------------------------------------------------------------- /.hsds/config/override.yml: -------------------------------------------------------------------------------- 1 | log_level: ERROR 2 | bucket_name: nrel-pds-hsds 3 | server_name: "HSDS for Github codespaces" 4 | aws_region: us-west-2 # (original was us-east-1) 5 | aws_s3_gateway: http://s3.us-west-2.amazonaws.com/ # (original was null) 6 | aws_s3_no_sign_request: True # (original was false) -------------------------------------------------------------------------------- /.hsds/config/passwd.txt: -------------------------------------------------------------------------------- 1 | # HSDS password file template 2 | # 3 | # 4 | # This file contains a list of usernames/passwords that will be used to authenticate 5 | # requests to HSDS. 6 | # If using HTTP Basic Auth, copy file to "passwd.txt" in the same directory before deploying HSDS. 7 | # Otherwise, if using Azure Active Directory or Kerberos, don't copy this file - usernames will be 8 | # authenticated using those identity providers. 9 | # For production use, replace the "test" password below with secret passwords and add 10 | # and any new accounts desired. 11 | admin:admin 12 | test_user1:test 13 | test_user2:test 14 | nrel_user:test 15 | -------------------------------------------------------------------------------- /.hsds/data/hsdstest/home/.domain.json: -------------------------------------------------------------------------------- 1 | {"owner": "admin", "acls": {"admin": {"create": true, "read": true, "update": true, "delete": true, "readACL": true, "updateACL": true}, "default": {"create": false, "read": true, "update": false, "delete": false, "readACL": false, "updateACL": false}}, "created": 1708897646.0599918, "lastModified": 1708897646.0599918} -------------------------------------------------------------------------------- /.hsds/data/hsdstest/home/nrel_user/.domain.json: -------------------------------------------------------------------------------- 1 | {"owner": "nrel_user", "acls": {"nrel_user": {"create": true, "read": true, "update": true, "delete": true, "readACL": true, "updateACL": true}, "default": {"create": false, "read": true, "update": false, "delete": false, "readACL": false, "updateACL": false}}, "created": 1711992550.3733413, "lastModified": 1711992550.3733413} 2 | -------------------------------------------------------------------------------- /.hsds/data/hsdstest/home/test_user1/.domain.json: -------------------------------------------------------------------------------- 1 | {"owner": "test_user1", "acls": {"test_user1": {"create": true, "read": true, "update": true, "delete": true, "readACL": true, "updateACL": true}, "default": {"create": false, "read": true, "update": false, "delete": false, "readACL": false, "updateACL": false}}, "created": 1711992550.3733413, "lastModified": 1711992550.3733413} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Alliance for Sustainable Energy, LLC 2 | 3 | NOTICE: This software was developed at least in part by Alliance for Sustainable Energy, LLC (“Alliance”) under Contract 4 | No. DE-AC36-08GO28308 with the U.S. Department of Energy and the U.S. Government retains for itself and others acting on 5 | its behalf a nonexclusive, paid-up, irrevocable worldwide license in the software to reproduce, prepare derivative works, 6 | distribute copies to the public, perform publicly and display publicly, and to permit others to do so. 7 | 8 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following 9 | conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, the above government rights notice, this list of 12 | conditions and the following disclaimer. 13 | 14 | 2. Redistributions in binary form must reproduce the above copyright notice, the above government rights notice, this list of 15 | conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 16 | 17 | 3. The entire corresponding source code of any redistribution, with or without modification, by a research entity, 18 | including but not limited to any contracting manager/operator of a United States National Laboratory, any 19 | institution of higher learning, and any non-profit organization, must be made publicly available under this license 20 | for as long as the redistribution is made available by the research entity. 21 | 22 | 4. The name of the copyright holder, contributors, the United States Government, the United States Department of Energy, 23 | or any of their employees may not be used to endorse or promote products derived from this software without specific 24 | prior written permission. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 27 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 28 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER, CONTRIBUTORS, UNITED STATES GOVERNMENT OR UNITED STATES DEPARTMENT OF 29 | ENERGY, NOR ANY OF THEIR EMPLOYEES, BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 31 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 | POSSIBILITY OF SUCH DAMAGE. 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/nrel/hsds-examples) 2 | 3 | 4 | # NREL Highly Scalable Data Service (HSDS) Examples 5 | 6 | NREL has many several of DOE's high-value datasets publicly available on AWS's Simple Storage Service (S3). While users are free to 7 | download or directly read these files, their large sizes (typically 10's of TBs per file) can make this impractical. To provide an 8 | efficient way to access large HDF5 files in the cloud, The HDF Group (https://www.hdfgroup.org) has developed a REST-based data service 9 | known as HSDS (Highly Scalable Data Service). HSDS provides high performance access to these files via the use of parallel processing to 10 | obtain the highest bandwidth possible from S3. You can read more about the HSDS service here: https://www.hdfgroup.org/solutions/highly-scalable-data-service-hsds/. 11 | 12 | 13 | This repository provides examples and convenience functions for accessing the below datasets using HSDS: 14 | - [Wind Integration National Dataset (WIND) Toolkit](https://www.nrel.gov/grid/wind-toolkit.html) 15 | - [National Solar Radiation Database (NSRDB)](https://nsrdb.nrel.gov/) 16 | 17 | 18 | ## How to Use 19 | 20 | There are several options for using HSDS to access NREL datasets. The most common approaches are: 21 | 22 | 1. Use GitHub Codespaces 23 | 2. Access the NREL developer API 24 | 3. Setup a local HSDS Server 25 | 4. Setup a local HSDS Server on EC2 (AWS) 26 | 27 | Options 1 or 2 are the two easiest ways to get started. Option 4 requires an AWS account and will incur AWS charges for every hour the 28 | EC2 instance is running, but this option may be best suited for users planning long-running, heavy access to NREL datasets. 29 | 30 | Setup instructions for each appoach follow. 31 | 32 | ### Use GitHub Codespaces 33 | 34 | GitHub Codespaces are a convienent way to run your development environment in the cloud. This repository includes the necessary 35 | configuration files to run as a CodeSpace, no additional software is required. When the codespace is launched, HSDS will be running 36 | in the background as part of the codespace. And since the codespace runs in the cloud, access to the NREL data on S3 is quite fast. 37 | To learn more about CodeSpaces, see: https://docs.github.com/en/codespaces/overview. 38 | 39 | Github provides 120 free core hours (equivalent to running this repository continuously for 30 hours) per account per month. 40 | For details on pricing, see: https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces. 41 | 42 | Running this repository as codespace: 43 | 44 | 1. Open: https://github.com/codespaces (or just click on the codespace banner at the top of this page) 45 | 2. If you are not signed in to GitHub, sign in 46 | 3. Click the "New codespace" button 47 | 4. On the next page select the "NREL/hsds-examples" repository 48 | 5. Select "US West" as the region 49 | 6. Click the "Create Codespace" button 50 | 51 | It will take a few minutes until your codespace is ready to use. Once ready, you can run any of the notebooks in this repository. 52 | 53 | When running the first cell in a notebook, you will be prompted to select a "kernel". To do so, choose "Python Environments" and 54 | then "Python 3.10.13". 55 | 56 | Once your work is complete, you can delete the codespace, by going to http://github.com/codespaces, selecting our codespace, and clicking the "delete" button. 57 | 58 | ### Access the NREL developer API 59 | 60 | The second mehtod is to access the NREL developer API. This is a web API that can be used to connect to an HSDS instances hosted by NREL. 61 | Since it is a shared resource, performance may be impacted by the number of other users accessing the API at any given time. 62 | 63 | 64 | To get started, [download Anaconda](https://anaconda.org/anaconda/python) or another distribution of Python, install the h5pyd library: 65 | 66 | ``` 67 | pip install --user h5pyd 68 | ``` 69 | 70 | Next you'll need to configure HSDS: 71 | 72 | ``` 73 | hsconfigure 74 | ``` 75 | 76 | and enter at the prompt: 77 | 78 | ``` 79 | hs_endpoint = https://developer.nrel.gov/api/hsds 80 | hs_username = 81 | hs_password = 82 | hs_api_key = 3K3JQbjZmWctY0xmIfSYvYgtIcM3CN0cb1Y2w9bf 83 | ``` 84 | 85 | **IMPORTANT: The example API key here is for demonstation and is rate-limited per IP. To get your own API key, visit https://developer.nrel.gov/signup/** 86 | 87 | You can also add the above contents to a configuration file at ~/.hscfg 88 | 89 | Finally, you can use Jupyter Notebook to view the example notebooks: 90 | 91 | ``` 92 | cd notebooks 93 | jupyter notebook 94 | ``` 95 | 96 | ## Setup a Local HSDS Server 97 | Setting up an HSDS server on your local machine can be done by following [these instructions](https://nrel.github.io/rex/misc/examples.hsds.html#setting-up-a-local-hsds-server) from the rex documentation. 98 | 99 | 100 | ## Running a Local HSDS Server on EC2 (AWS) 101 | You can stand up a local HSDS server on an EC2 instance to improve the HSDS throughput versus the NREL developer API. Generally you should follow [these instructions](https://github.com/HDFGroup/hsds/blob/master/docs/docker_install_aws.md) from the HSDS documentation. Here are a few tips and tricks to get everything connected to the NREL bucket: 102 | 103 | To install docker and docker-compose on an EC2 instance (if not already installed): 104 | 105 | 1. `sudo amazon-linux-extras install docker` 106 | 2. `sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose` 107 | 3. `sudo chmod +x /usr/local/bin/docker-compose` 108 | 4. `sudo groupadd docker` 109 | 5. `sudo usermod -aG docker $USER` 110 | 6. `newgrp docker` 111 | 7. `sudo service docker start` 112 | 8. `docker run hello-world` 113 | 114 | Your ~/.hscfg file should look like this: 115 | ``` 116 | # local hsds server 117 | hs_endpoint = http://localhost:5101 118 | hs_username = admin 119 | hs_password = admin 120 | hs_api_key = None 121 | hs_bucket = nrel-pds-hsds 122 | ``` 123 | 124 | The following environment variables must be set: 125 | ``` 126 | export AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} 127 | export AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} 128 | export BUCKET_NAME=${YOUR_S3_BUCKET_NAME_HERE} 129 | export AWS_REGION=us-west-2 130 | export AWS_S3_GATEWAY=http://s3.us-west-2.amazonaws.com/ 131 | export HSDS_ENDPOINT=http://localhost:5101 132 | export LOG_LEVEL=INFO 133 | ``` 134 | 135 | A few miscellaneous tips: 136 | 137 | 1. You can list the available docker images with `docker images` 138 | 2. You can delete the docker HSDS image with `docker rmi $IMAGE_ID` (useful to reset the docker image) 139 | 3. If you have AWS permissions issues try using a non-root IAM user with the corresponding AWS credentials as environment variables 140 | 141 | 142 | ## Datasets 143 | 144 | - [/nrel/wtk_us.h5](datasets/wtk-us.md) 145 | - [/nrel/wtk/](datasets/WINDToolkit.md) 146 | - [/nrel/wtk-5min/](datasets/WINDToolkit.md) 147 | - [/nrel/nsrdb/](datasets/NSRDB.md) 148 | - [/nrel/US_wave/](datasets/US_Wave.md) 149 | 150 | ## Credit 151 | 152 | This software is currently maintained by Reid Olson (reid.olson@nrel.gov). This software was initially developed by Caleb Phillips (caleb.phillips@nrel.gov), John Readey (jreadey@hdfgroup.org), Jordan Perr-Sauer (Jordan.Perr-Sauer@nrel.gov), and Michael Rossol to support the NREL Research Data Initiative and WIND Toolkit at the National Rewnable Energy Laboratory in Golden, Colorado, USA. 153 | 154 | ## License 155 | 156 | Copyright (c) 2017, National Renewable Energy Laboratory (NREL) 157 | All rights reserved. See LICENSE for additional information. 158 | -------------------------------------------------------------------------------- /benchmark/read_selection.py: -------------------------------------------------------------------------------- 1 | import h5pyd 2 | import h5py 3 | import os 4 | import random 5 | import sys 6 | import time 7 | 8 | mb = 1024*1024 9 | 10 | def get_indices(extent, count): 11 | indices = set() 12 | while len(indices) < count: 13 | index = random.randint(0, extent-1) 14 | indices.add(index) 15 | indices = list(indices) 16 | indices.sort() 17 | return indices 18 | 19 | def get_page_size(filename): 20 | # if the filename has the format: [name]_pNm.h5 return N, 21 | # otherwise return None 22 | page_size = None 23 | if filename.endswith("m.h5"): 24 | n = filename.rfind("_p") 25 | if n > 0: 26 | s = filename[n+2:-4] 27 | page_size = int(s) 28 | return page_size 29 | 30 | 31 | # 32 | # main 33 | # 34 | 35 | usage = f"Usage: {sys.argv[0]} [filepath] [h5path] [page_buf_size_mb]" 36 | 37 | if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"): 38 | print(usage) 39 | print("filepath prefix:") 40 | print(" hdf5:// -- use HSDS") 41 | print(" s3fs:// -- use s3fs") 42 | print(" http:// -- use ros3") 43 | print("else - use local posix with HDF5 lib") 44 | print("") 45 | print(f"example: python {sys.argv[0]} hdf5://nrel/nsrdb/conus/nsrdb_conus_2020.h5") 46 | print(f"example: python {sys.argv[0]} s3://nrel-pds-nsrdb/conus/nsrdb_conus_pv_2020.h5") 47 | # s3://nrel-pds-nsrdb/v3/nsrdb_2020.h5 48 | sys.exit(1) 49 | 50 | filepath = sys.argv[1] 51 | if filepath.startswith("hdf5://"): 52 | driver = "hsds" 53 | elif filepath.startswith("s3://"): 54 | driver = "s3fs" 55 | elif filepath.startswith("http://"): 56 | driver = "ros3" 57 | else: 58 | driver = "posix" 59 | 60 | if len(sys.argv) > 2: 61 | h5path = sys.argv[2] 62 | else: 63 | h5path = "wind_speed" # default 64 | 65 | if len(sys.argv) > 3: 66 | page_buf_size = int(sys.argv[3]) 67 | print(f"page_buf_size: {page_buf_size} mb") 68 | else: 69 | page_buf_size = None 70 | 71 | page_size = get_page_size(filepath) 72 | if page_size: 73 | print(f"page_size: {page_size} mb") 74 | if page_buf_size: 75 | if page_buf_size < page_size: 76 | sys.exit("page_buf_size must be >= page_size") 77 | else: 78 | page_buf_size = page_size 79 | page_buf_size *= mb # convert to bytes 80 | 81 | if driver == "s3fs": 82 | 83 | try: 84 | import s3fs 85 | except ImportError: 86 | sys.exit("run 'pip install s3fs' to use the s3fs option") 87 | if not filepath.startswith("s3://"): 88 | sys.exit("expected filepath to start with 's3://'") 89 | # s3fs enables h5py to "see" S3 files as read-only posix files 90 | s3 = s3fs.S3FileSystem(anon=True) 91 | print(f"opening HDF5 file at: {filepath} with s3fs") 92 | f = h5py.File(s3.open(filepath, "rb"), page_buf_size=page_buf_size) 93 | elif driver == "ros3": 94 | print(f"opening HDF5 file at: {filepath} with ros3") 95 | f = h5py.File(filepath, driver="ros3", page_buf_size=page_buf_size) 96 | elif driver == "hsds": 97 | print(f"opening HSDS domain: {filepath}") 98 | if "BUCKET_NAME" in os.environ: 99 | bucket = os.environ["BUCKET_NAME"] 100 | else: 101 | bucket = None 102 | f = h5pyd.File(filepath, bucket=bucket) 103 | else: 104 | print(f"opening HDF5 file at: {filepath} with hdf5 lib") 105 | f = h5py.File(filepath, page_buf_size=page_buf_size) 106 | 107 | 108 | if h5path not in f: 109 | sys.exit(f"{h5path} dataset not found") 110 | 111 | dset = f[h5path] 112 | print(f"{h5path}: {dset}") 113 | print(f"chunks: {dset.chunks}") 114 | print(f"compression: {dset.compression}") 115 | if isinstance(dset.id.id, str): 116 | # hsds 117 | layout = dset.id.dcpl_json["layout"] 118 | s3_uri = layout["file_uri"] 119 | print(f"links to: {s3_uri}") 120 | 121 | # read x-y slices 122 | 123 | indices = get_indices(dset.shape[1], 10) 124 | 125 | total_time = 0.0 126 | for index in indices: 127 | t = time.time() 128 | arr = dset[:, index] 129 | elapsed = time.time() - t 130 | print(f"dset[:, {index:8n}] min: {arr.min():8.2f} max: {arr.max():8.2f} mean: {arr.mean():8.2f} ({elapsed:.2f} s)") 131 | total_time += elapsed 132 | f.close() 133 | avg_elapsed = total_time / len(indices) 134 | print(f"avg time: {avg_elapsed:.2f} s") 135 | -------------------------------------------------------------------------------- /benchmark/results.txt: -------------------------------------------------------------------------------- 1 | $ docker run --rm -v /data:/data -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py /data/nsrdb_2020.h5 wind_speed 2 | opening HDF5 file at: /data/nsrdb_2020.h5 with hdf5 lib 3 | wind_speed: 4 | chunks: (2688, 372) 5 | compression: None 6 | dset[:, 390500] min: 0.00 max: 20.00 mean: 5.86 (0.10 s) 7 | dset[:, 425567] min: 0.00 max: 20.00 mean: 3.33 (0.09 s) 8 | dset[:, 440598] min: 0.00 max: 24.00 mean: 4.47 (0.10 s) 9 | dset[:, 566187] min: 1.00 max: 70.00 mean: 17.49 (0.10 s) 10 | dset[:, 1116094] min: 1.00 max: 51.00 mean: 16.03 (0.10 s) 11 | dset[:, 1161378] min: 1.00 max: 173.00 mean: 44.52 (0.09 s) 12 | dset[:, 1387219] min: 1.00 max: 58.00 mean: 18.93 (0.09 s) 13 | dset[:, 1457715] min: 1.00 max: 90.00 mean: 24.64 (0.10 s) 14 | dset[:, 1536592] min: 3.00 max: 152.00 mean: 46.37 (0.09 s) 15 | dset[:, 1654158] min: 1.00 max: 60.00 mean: 22.82 (0.09 s) 16 | avg time: 0.09 s 17 | 18 | $ docker run --rm -v /data:/data -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py /data/nsrdb_2020_p2m.h5 wind_speed 2 19 | page_buf_size: 2 mb 20 | page_size: 2 mb 21 | opening HDF5 file at: /data/nsrdb_2020_p2m.h5 with hdf5 lib 22 | wind_speed: 23 | chunks: (2688, 372) 24 | compression: None 25 | dset[:, 34283] min: 1.00 max: 49.00 mean: 14.55 (0.07 s) 26 | dset[:, 302115] min: 0.00 max: 16.00 mean: 3.35 (0.06 s) 27 | dset[:, 314893] min: 1.00 max: 116.00 mean: 33.71 (0.06 s) 28 | dset[:, 1005115] min: 0.00 max: 13.00 mean: 1.58 (0.06 s) 29 | dset[:, 1164086] min: 1.00 max: 19.00 mean: 5.31 (0.07 s) 30 | dset[:, 1315072] min: 0.00 max: 144.00 mean: 41.91 (0.06 s) 31 | dset[:, 1553391] min: 0.00 max: 21.00 mean: 5.57 (0.06 s) 32 | dset[:, 1592028] min: 0.00 max: 9.00 mean: 0.66 (0.07 s) 33 | dset[:, 1788744] min: 0.00 max: 6.00 mean: 0.50 (0.07 s) 34 | dset[:, 1862369] min: 0.00 max: 38.00 mean: 9.73 (0.06 s) 35 | avg time: 0.06 s 36 | 37 | 38 | $ docker run --rm -v /home/ec2-user/hsds-examples/benchmark:/benchmark -v /home/ec2-user/.hscfg:/root/.hscfg -e BUCKET_NAME=nrel-pds-hsds -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py hdf5://nrel/nsrdb/v3/nsrdb_2020.h5 wind_speed 39 | opening HSDS domain: hdf5://nrel/nsrdb/v3/nsrdb_2020.h5 40 | wind_speed: 41 | chunks: (2688, 372) 42 | compression: None 43 | links to: s3://nrel-pds-nsrdb/v3/nsrdb_2020.h5 44 | dset[:, 11881] min: 5.00 max: 90.00 mean: 58.08 (0.70 s) 45 | dset[:, 207440] min: 1.00 max: 37.00 mean: 9.62 (0.56 s) 46 | dset[:, 282982] min: 2.00 max: 127.00 mean: 38.97 (0.51 s) 47 | dset[:, 337526] min: 0.00 max: 30.00 mean: 8.75 (0.57 s) 48 | dset[:, 383100] min: 0.00 max: 22.00 mean: 5.94 (0.42 s) 49 | dset[:, 440290] min: 0.00 max: 24.00 mean: 3.14 (0.40 s) 50 | dset[:, 674262] min: 2.00 max: 108.00 mean: 37.58 (0.39 s) 51 | dset[:, 897595] min: 3.00 max: 85.00 mean: 23.70 (0.41 s) 52 | dset[:, 1279980] min: 2.00 max: 136.00 mean: 35.70 (0.40 s) 53 | dset[:, 1525056] min: 0.00 max: 9.00 mean: 2.62 (0.58 s) 54 | avg time: 0.49 s 55 | 56 | 57 | $ docker run --rm -v /data:/data -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py s3://hdf5.sample/data/NREL/nsrdb_2020.h5 wind_speed 58 | opening HDF5 file at: s3://hdf5.sample/data/NREL/nsrdb_2020.h5 with s3fs 59 | wind_speed: 60 | chunks: (2688, 372) 61 | compression: None 62 | dset[:, 355578] min: 2.00 max: 196.00 mean: 51.52 (4.34 s) 63 | dset[:, 543335] min: 4.00 max: 121.00 mean: 33.77 (4.49 s) 64 | dset[:, 623756] min: 1.00 max: 22.00 mean: 6.18 (3.58 s) 65 | dset[:, 663761] min: 1.00 max: 164.00 mean: 47.36 (2.74 s) 66 | dset[:, 712406] min: 1.00 max: 95.00 mean: 28.53 (2.52 s) 67 | dset[:, 844512] min: 0.00 max: 11.00 mean: 2.00 (2.21 s) 68 | dset[:, 861570] min: 0.00 max: 19.00 mean: 5.50 (3.37 s) 69 | dset[:, 1247405] min: 0.00 max: 23.00 mean: 3.10 (5.33 s) 70 | dset[:, 1579490] min: 0.00 max: 7.00 mean: 0.66 (3.71 s) 71 | dset[:, 1764230] min: 2.00 max: 107.00 mean: 34.74 (3.08 s) 72 | avg time: 3.54 s 73 | $ docker run --rm -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 wind_speed 2 74 | page_buf_size: 2 mb 75 | page_size: 2 mb 76 | opening HDF5 file at: s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 with s3fs 77 | wind_speed: 78 | chunks: (2688, 372) 79 | compression: None 80 | dset[:, 265443] min: 0.00 max: 15.00 mean: 3.85 (2.66 s) 81 | dset[:, 268346] min: 1.00 max: 126.00 mean: 37.15 (1.61 s) 82 | dset[:, 663331] min: 1.00 max: 116.00 mean: 37.27 (2.15 s) 83 | dset[:, 769828] min: 1.00 max: 100.00 mean: 28.19 (2.57 s) 84 | dset[:, 1030129] min: 1.00 max: 68.00 mean: 24.29 (2.28 s) 85 | dset[:, 1121682] min: 0.00 max: 20.00 mean: 4.81 (2.08 s) 86 | dset[:, 1635351] min: 0.00 max: 13.00 mean: 6.56 (2.45 s) 87 | dset[:, 1796265] min: 0.00 max: 9.00 mean: 1.09 (2.87 s) 88 | dset[:, 1874698] min: 2.00 max: 83.00 mean: 21.72 (2.44 s) 89 | dset[:, 1928255] min: 1.00 max: 65.00 mean: 21.04 (2.47 s) 90 | avg time: 2.36 s 91 | 92 | $ docker run --rm -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 wind_speed 4 93 | page_buf_size: 4 mb 94 | page_size: 2 mb 95 | opening HDF5 file at: s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 with s3fs 96 | wind_speed: 97 | chunks: (2688, 372) 98 | compression: None 99 | dset[:, 25889] min: 0.00 max: 119.00 mean: 29.03 (2.52 s) 100 | dset[:, 94782] min: 1.00 max: 94.00 mean: 28.02 (1.96 s) 101 | dset[:, 504024] min: 2.00 max: 110.00 mean: 33.05 (2.00 s) 102 | dset[:, 611498] min: 1.00 max: 89.00 mean: 27.64 (2.47 s) 103 | dset[:, 636536] min: 0.00 max: 68.00 mean: 21.73 (1.72 s) 104 | dset[:, 699075] min: 0.00 max: 42.00 mean: 14.78 (2.25 s) 105 | dset[:, 1176375] min: 0.00 max: 35.00 mean: 14.18 (1.97 s) 106 | dset[:, 1202756] min: 0.00 max: 33.00 mean: 4.68 (1.82 s) 107 | dset[:, 1355434] min: 0.00 max: 11.00 mean: 4.02 (2.14 s) 108 | dset[:, 1588393] min: 1.00 max: 38.00 mean: 9.90 (2.38 s) 109 | avg time: 2.12 s 110 | 111 | $ docker run --rm -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 wind_speed 8 112 | page_buf_size: 8 mb 113 | page_size: 2 mb 114 | opening HDF5 file at: s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 with s3fs 115 | wind_speed: 116 | chunks: (2688, 372) 117 | compression: None 118 | dset[:, 24402] min: 0.00 max: 12.00 mean: 3.71 (1.59 s) 119 | dset[:, 203319] min: 2.00 max: 83.00 mean: 19.12 (2.30 s) 120 | dset[:, 503040] min: 0.00 max: 132.00 mean: 36.14 (1.64 s) 121 | dset[:, 602327] min: 1.00 max: 53.00 mean: 14.02 (2.31 s) 122 | dset[:, 942391] min: 1.00 max: 79.00 mean: 24.37 (1.78 s) 123 | dset[:, 943043] min: 0.00 max: 12.00 mean: 1.51 (1.48 s) 124 | dset[:, 1228299] min: 2.00 max: 119.00 mean: 37.76 (1.78 s) 125 | dset[:, 1336902] min: 0.00 max: 20.00 mean: 3.31 (1.44 s) 126 | dset[:, 1474634] min: 1.00 max: 131.00 mean: 27.27 (2.21 s) 127 | dset[:, 1833278] min: 0.00 max: 7.00 mean: 1.48 (2.63 s) 128 | avg time: 1.92 s 129 | 130 | $ docker run --rm -v /home/ec2-user/hsds-examples/benchmark:/benchmark -t hdfgroup/hdf5lib:1.14.4 python /benchmark/read_selection.py s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 wind_speed 16 131 | page_buf_size: 16 mb 132 | page_size: 2 mb 133 | opening HDF5 file at: s3://hdf5.sample/data/NREL/nsrdb_2020_p2m.h5 with s3fs 134 | wind_speed: 135 | chunks: (2688, 372) 136 | compression: None 137 | dset[:, 237824] min: 0.00 max: 12.00 mean: 3.54 (1.78 s) 138 | dset[:, 300269] min: 1.00 max: 129.00 mean: 36.53 (1.52 s) 139 | dset[:, 587059] min: 1.00 max: 98.00 mean: 37.04 (2.29 s) 140 | dset[:, 663852] min: 1.00 max: 105.00 mean: 32.10 (2.27 s) 141 | dset[:, 954870] min: 2.00 max: 108.00 mean: 31.16 (1.62 s) 142 | dset[:, 1392460] min: 0.00 max: 7.00 mean: 0.83 (2.34 s) 143 | dset[:, 1443504] min: 2.00 max: 72.00 mean: 29.42 (2.07 s) 144 | dset[:, 1469200] min: 0.00 max: 20.00 mean: 5.05 (1.70 s) 145 | dset[:, 1669532] min: 0.00 max: 9.00 mean: 0.79 (1.71 s) 146 | dset[:, 1973050] min: 1.00 max: 33.00 mean: 12.09 (2.01 s) 147 | avg time: 1.93 s 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /bin/docs/cube.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NREL/hsds-examples/f652cbec919b952c184a4d68a2232ad5d756ecd9/bin/docs/cube.graffle -------------------------------------------------------------------------------- /bin/docs/cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NREL/hsds-examples/f652cbec919b952c184a4d68a2232ad5d756ecd9/bin/docs/cube.png -------------------------------------------------------------------------------- /bin/functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | HSDS data extraction functions 3 | """ 4 | import dateutil 5 | import h5pyd 6 | import matplotlib as mpl 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import os 10 | import pandas as pd 11 | from pyproj import Proj 12 | from scipy.spatial import cKDTree 13 | import seaborn as sns 14 | 15 | mpl.rcParams['font.sans-serif'] = 'DejaVu Sans' 16 | mpl.rcParams['pdf.fonttype'] = 42 17 | mpl.rc('xtick', labelsize=14) 18 | mpl.rc('ytick', labelsize=14) 19 | mpl.rc('font', size=16) 20 | sns.set_style("white") 21 | sns.set_style("ticks") 22 | 23 | 24 | def WTK_idx(wtk, lat_lon): 25 | """ 26 | Function to find the nearest x/y WTK indices for a given lat/lon using 27 | Proj4 projection library 28 | 29 | Parameters 30 | ---------- 31 | wtk : 'h5pyd.File' 32 | h5pyd File instance for the WTK 33 | lat_lon : tuple | list 34 | (lat, lon) coordinates of interest 35 | 36 | Results 37 | ------- 38 | ij : 'tuple' 39 | x/y coordinate in the database of the closest pixel to coordinate of 40 | interest 41 | """ 42 | dset_coords = wtk['coordinates'] 43 | projstring = """+proj=lcc +lat_1=30 +lat_2=60 44 | +lat_0=38.47240422490422 +lon_0=-96.0 45 | +x_0=0 +y_0=0 +ellps=sphere 46 | +units=m +no_defs """ 47 | projectLcc = Proj(projstring) 48 | # Grab origin directly from database 49 | origin_ll = reversed(dset_coords[0][0]) 50 | origin = projectLcc(*origin_ll) 51 | 52 | lon_lat = reversed(lat_lon) 53 | coords = projectLcc(*lon_lat) 54 | delta = np.subtract(coords, origin) 55 | ij = [int(round(x / 2000)) for x in delta] 56 | return tuple(reversed(ij)) 57 | 58 | 59 | def NSRDB_idx(nsrdb, lat_lon): 60 | """ 61 | Function to find the NSRDB site index for a given lat/lon using a KDTree 62 | 63 | Parameters 64 | ---------- 65 | nsrdb : 'h5pyd.File' 66 | h5pyd File instance for the NSRDB 67 | lat_lon : tuple | list 68 | (lat, lon) coordinates of interest 69 | 70 | Results 71 | ------- 72 | ij : 'tuple' 73 | x/y coordinate in the database of the closest pixel to coordinate of 74 | interest 75 | """ 76 | dset_coords = nsrdb['coordinates'][...] 77 | tree = cKDTree(dset_coords) 78 | _, pos = tree.query(np.array(lat_lon)) 79 | return pos 80 | 81 | 82 | def datetimeIndex(f): 83 | """ 84 | Function returns a dataframe containing the time-dimension 85 | index and parsed timestamps 86 | 87 | Parameters 88 | ---------- 89 | f : 'h5pyd.File' 90 | h5pyd File instance for the wtk_us.h5 91 | 92 | Results 93 | ------- 94 | dt : 'pd.DataFrame' 95 | DataFrame containing parsed 'datetime' stamps 96 | """ 97 | dt = f["datetime"] 98 | dt = pd.DataFrame({"datetime": dt[:]}, index=range(0, dt.shape[0])) 99 | dt['datetime'] = dt['datetime'].apply(dateutil.parser.parse) 100 | return dt 101 | 102 | 103 | class HSDS: 104 | """ 105 | HSDS Resource handler class 106 | """ 107 | def __init__(self, hsds_path, preload=False): 108 | """ 109 | Parameters 110 | ---------- 111 | hsds_path : h5pyd.File instance 112 | """ 113 | self._h5d = h5pyd.File(hsds_path, mode='r') 114 | if preload: 115 | self.preload() 116 | else: 117 | self._time_index = None 118 | self._meta = None 119 | self._tree = None 120 | 121 | @property 122 | def time_index(self): 123 | """ 124 | Returns 125 | ------- 126 | _time_index : pd.Datetime_Index 127 | Datetime index vector for given HSDS file 128 | """ 129 | if self._time_index is None: 130 | time_index = self._h5d['time_index'][...].astype(str) 131 | self._time_index = pd.to_datetime(time_index) 132 | 133 | return self._time_index 134 | 135 | @property 136 | def meta(self): 137 | """ 138 | Returns 139 | ------- 140 | _meta : pd.DataFrame 141 | Site meta data for give HSDS file 142 | """ 143 | if self._meta is None: 144 | self._meta = pd.DataFrame(self._h5d['meta'][...]) 145 | 146 | return self._meta 147 | 148 | @property 149 | def tree(self): 150 | """ 151 | Returns 152 | ------- 153 | _tree : cKDTree 154 | KDTree on site coordinates (latitude, longitude) 155 | """ 156 | if self._tree is None: 157 | site_coords = self._h5d['coordinates'][...] 158 | self._tree = cKDTree(site_coords) 159 | 160 | return self._tree 161 | 162 | def preload(self): 163 | """ 164 | Preload time_index, meta, and tree 165 | """ 166 | time_index = self._h5d['time_index'][...].astype(str) 167 | self._time_index = pd.to_datetime(time_index) 168 | 169 | site_coords = self._h5d['coordinates'][...] 170 | self._tree = cKDTree(site_coords) 171 | 172 | self._meta = pd.DataFrame(self._h5d['meta'][...]) 173 | 174 | def _nearest_site(self, coords): 175 | """ 176 | Find nearest site to coordinate (lat, lon) of interest 177 | 178 | Parameters 179 | ---------- 180 | coords : tuple 181 | (lat, lon) coordinates of interest 182 | 183 | Returns 184 | ------- 185 | site_idx : int 186 | Site index in the datasets 187 | """ 188 | lat_lon = np.array(coords) 189 | _, site_idx = self.tree.query(lat_lon) 190 | return site_idx 191 | 192 | def _nearest_timestep(self, timestep): 193 | """ 194 | Find the nearest timestep to timestep of interest 195 | 196 | Parameters 197 | ---------- 198 | timestep : datetime 199 | Datetime step of interest 200 | 201 | Returns 202 | ------- 203 | time_idx : int 204 | Time index in the datasets 205 | """ 206 | delta = np.abs(self.time_index - timestep) 207 | time_idx = delta.argmin() 208 | 209 | return time_idx 210 | 211 | def _get_region_idx(self, value, column='state'): 212 | """ 213 | Find sites associated with given region 214 | 215 | Parameters 216 | ---------- 217 | value : str 218 | Regional value filter to 219 | column : str 220 | Column in the meta data to filter on 221 | 222 | Returns 223 | ------- 224 | region_idx : list 225 | Indices of all sites corresponding to region of interest 226 | """ 227 | if column in self.meta: 228 | col_data = self.meta[column].str.decode('utf-8') 229 | region_idx = self.meta.index[col_data == value].values 230 | else: 231 | raise ValueError('{} is not a valid column in meta' 232 | .format(column)) 233 | 234 | return region_idx 235 | 236 | def _get_conus_idx(self): 237 | """ 238 | Find sites associated with CONUS 239 | 240 | Returns 241 | ------- 242 | conus_idx : list 243 | Indices of all sites in CONUS 244 | """ 245 | country_data = self.meta['country'].str.decode('utf-8') 246 | us_idx = country_data == 'United States' 247 | state_data = self.meta.loc[us_idx, 'state'].str.decode('utf-8') 248 | conus_idx = state_data.isin(['Alaska', 'Hawaii', 'AK', 'HI', 'None']) 249 | conus_idx = state_data.index[~conus_idx].values 250 | 251 | return conus_idx 252 | 253 | def get_timeseries(self, variable, coords, local=True): 254 | """ 255 | Extract time-series data for the given variable at the given 256 | coordinates 257 | 258 | Parameters 259 | ---------- 260 | variable : str 261 | Variable to extract time-series for 262 | coords : tuple 263 | (lat, lon) coordinates of interest 264 | local : bool 265 | Shift time-series to local time 266 | 267 | Returns 268 | ------- 269 | ts : pd.DataFrame 270 | Time-series DataFrame 271 | """ 272 | site_idx = self._nearest_site(coords) 273 | time_index = self.time_index.copy() 274 | if local: 275 | utc_dt = self.meta.iloc[site_idx]['timezone'] 276 | utc_dt = pd.Timedelta('{}h'.format(utc_dt)) 277 | time_index += utc_dt 278 | 279 | ds = self._h5d[variable] 280 | ts = ds[:, site_idx] / ds.attrs.get('scale_factor', 1) 281 | ts = pd.DataFrame({variable: ts, 'Datetime': time_index, 282 | 'Date': time_index.date, 'Month': time_index.month, 283 | 'Day': time_index.day, 'Hour': time_index.hour}) 284 | 285 | return ts 286 | 287 | @staticmethod 288 | def create_boxplots(df, variable, dpi=100, figsize=(12, 4)): 289 | """ 290 | Create monthly and diurnal box plots 291 | """ 292 | fig = plt.figure(figsize=figsize, dpi=dpi) 293 | ax1 = fig.add_subplot(121) 294 | ax2 = fig.add_subplot(122) 295 | 296 | sns.boxplot(x="Month", y=variable, data=df, ax=ax1) 297 | ax1.set_xlabel('Month', fontsize=16) 298 | ax1.set_ylabel(variable, fontsize=16) 299 | sns.boxplot(x="Hour", y=variable, data=df, ax=ax2) 300 | ax2.set_xlabel('Hour', fontsize=16) 301 | ax2.set_ylabel(variable, fontsize=16) 302 | sns.despine(offset=10, trim=False) 303 | 304 | fig.tight_layout() 305 | plt.show() 306 | 307 | def get_timestep(self, variable, timestep): 308 | """ 309 | Extract a days worth of data for the given day for CONUS 310 | 311 | Parameters 312 | ---------- 313 | variable : str 314 | Variable to extract time-series for 315 | timestep : str 316 | Datetimestep to extract 317 | local : bool 318 | Shift time-series to local time 319 | 320 | Returns 321 | ------- 322 | day : pd.DataFrame 323 | 324 | """ 325 | conus_idx = self._get_conus_idx() 326 | time_idx = self._nearest_timestep(pd.to_datetime(timestep)) 327 | meta = self.meta.iloc[conus_idx] 328 | lon = meta['longitude'].values 329 | lat = meta['latitude'].values 330 | ds = self._h5d[variable] 331 | sf = ds.attrs.get('scale_factor', 1) 332 | data = self._h5d[variable][time_idx][conus_idx] / sf 333 | 334 | df = pd.DataFrame({'longitude': lon, 'latitude': lat, variable: data}) 335 | 336 | return df 337 | 338 | @staticmethod 339 | def create_scatter(df, variable, cbar_label=None, title=None, 340 | cmap='Rainbow', dpi=100, figsize=(8, 4)): 341 | """ 342 | Create scatter plot from lon, lat, and data and save to f_out 343 | 344 | Parameters 345 | ---------- 346 | df : pd.DataFrame 347 | DataFrame containing data to plot 348 | cbar_label : str 349 | Colorbar label 350 | title : str 351 | Title to plot 352 | cmap : str 353 | Colormap to use 354 | dpi : int 355 | plot resolution 356 | figsize : tuple 357 | Figure size 358 | """ 359 | fig = plt.figure(figsize=figsize, dpi=dpi) 360 | if title is not None: 361 | fig.suptitle(title, fontsize=16) 362 | 363 | ax = fig.add_subplot(111) 364 | lon = df['longitude'].values 365 | lat = df['latitude'].values 366 | data = df[variable].values 367 | if cbar_label is None: 368 | cbar_label = variable 369 | vmax = np.max(data) 370 | 371 | sc = ax.scatter(lon, lat, c=data, cmap=cmap, vmin=0, vmax=vmax) 372 | cbar = plt.colorbar(sc) 373 | cbar.ax.set_ylabel(cbar_label, rotation=90) 374 | ax.axis('off') 375 | fig.tight_layout() 376 | plt.show() 377 | 378 | def get_day(self, variable, date, local=True): 379 | """ 380 | Extract a days worth of data for the given day for CONUS 381 | 382 | Parameters 383 | ---------- 384 | variable : str 385 | Variable to extract time-series for 386 | date : str 387 | Date to extract a days worth of data for 388 | local : bool 389 | Shift time-series to local time 390 | 391 | Returns 392 | ------- 393 | day : pd.DataFrame 394 | 395 | """ 396 | conus_idx = self._get_conus_idx() 397 | time_index = self.time_index 398 | if local: 399 | utc_dt = self.meta.iloc[conus_idx]['timezone'].mean() 400 | utc_dt = pd.Timedelta('{}h'.format(utc_dt)) 401 | time_index += utc_dt 402 | 403 | date = pd.to_datetime(date).date() 404 | time_idx = np.where(time_index.date == date)[0] 405 | time_slice = slice(time_idx[0], time_idx[-1] + 1) 406 | 407 | day_df = pd.DataFrame(self._h5d[variable][time_slice][:, conus_idx], 408 | index=time_idx, columns=conus_idx) 409 | 410 | return day_df 411 | 412 | @staticmethod 413 | def create_map(lon, lat, data, cbar_label, f_out=None, vmax=None, 414 | title=None, cmap='Rainbow', dpi=100, figsize=(8, 4)): 415 | """ 416 | Create scatter plot from lon, lat, and data and save to f_out 417 | 418 | Parameters 419 | ---------- 420 | lon : ndarray 421 | Longitude vector 422 | lat : ndarray 423 | Latitude vector 424 | cbar_label : str 425 | Colorbar label 426 | f_out : str 427 | File to save plot to 428 | vmax : float 429 | Max value for colormap 430 | cmap : str 431 | Colormap to use 432 | dpi : int 433 | plot resolution 434 | figsize : tuple 435 | Figure size 436 | """ 437 | fig = plt.figure(figsize=figsize, dpi=dpi) 438 | if title is not None: 439 | fig.suptitle(title, fontsize=16) 440 | 441 | ax = fig.add_subplot(111) 442 | if vmax is None: 443 | vmax = np.max(data) 444 | 445 | sc = ax.scatter(lon, lat, c=data, cmap=cmap, s=10, 446 | vmin=0, vmax=vmax) 447 | cbar = plt.colorbar(sc) 448 | cbar.ax.set_ylabel(cbar_label, rotation=90) 449 | ax.axis('off') 450 | fig.tight_layout() 451 | if f_out is not None: 452 | plt.savefig(f_out, dpi=dpi, transparent=True, 453 | bbox_inches='tight') 454 | else: 455 | plt.show() 456 | 457 | @staticmethod 458 | def creat_gif(fig_dir, file_prefix): 459 | """ 460 | Create gif from all files in 461 | """ 462 | 463 | def create_nsrdb_gif(self, date, variable='dni'): 464 | """ 465 | Extract, plot, and create gif for given NSRDB date and variable 466 | 467 | Parameters 468 | ---------- 469 | date : str 470 | Date to extract 471 | variable : str 472 | Variable to extract 473 | """ 474 | day_df = self.get_day(variable, date) 475 | label = '{} W/m^2'.format(variable) 476 | vmax = np.max(day_df.values) 477 | meta = self.meta.iloc[day_df.columns] 478 | lon = meta['longitude'].values 479 | lat = meta['latitude'].values 480 | fig_dir = '../bin/gifs' 481 | if not os.path.exists(fig_dir): 482 | os.makedirs(fig_dir) 483 | 484 | for i in len(day_df): 485 | data = day_df.iloc[i] 486 | f_out = os.path.join(fig_dir, 'nsrdb_{:03d}.png'.format(i)) 487 | self.create_map(lon, lat, data, label, f_out, vmax=vmax, 488 | cmap='YlOrRd') 489 | -------------------------------------------------------------------------------- /bin/nsrdb random sampling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 116, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import os\n", 13 | "import pandas as pd\n", 14 | "\n", 15 | "cwd = os.getcwd()\n", 16 | "\n", 17 | "def get_gid(meta):\n", 18 | " \"\"\"\n", 19 | " Extract random gid from nsrdb ensuring samples are randomly sampled from available states and counties\n", 20 | " \n", 21 | " Parameters\n", 22 | " ----------\n", 23 | " meta : 'pandas.DataFrame'\n", 24 | " DataFrame of meta data from which to randomly samples pixels\n", 25 | "\n", 26 | " Returns\n", 27 | " -------\n", 28 | " gid : 'int'\n", 29 | " Selected gid\n", 30 | " \"\"\"\n", 31 | " if len(meta['state'].unique()) > 1:\n", 32 | " state = np.random.choice(meta['state'].unique(), 1)[0]\n", 33 | " meta = meta.loc[meta['state'] == state]\n", 34 | "\n", 35 | " if len(meta['county'].unique()) > 1:\n", 36 | " county = np.random.choice(meta['county'].unique(), 1)[0]\n", 37 | " meta = meta.loc[meta['county'] == county]\n", 38 | " \n", 39 | " gid = np.random.choice(meta['gid'].values, 1)[0]\n", 40 | " return gid\n", 41 | "\n", 42 | "\n", 43 | "def sample_nsrdb(meta, samples):\n", 44 | " \"\"\"\n", 45 | " Randomly sample from nsrdb meta data\n", 46 | " Samples are selected from available countries, states, or counties\n", 47 | " \n", 48 | " Parameters\n", 49 | " ----------\n", 50 | " meta : 'pandas.DataFrame'\n", 51 | " DataFrame of meta data from which to randomly samples pixels\n", 52 | " samples : 'int'\n", 53 | " Number of samples to select\n", 54 | "\n", 55 | " Returns\n", 56 | " -------\n", 57 | " 'pandas.DataFrame'\n", 58 | " Meta data for selected pixels\n", 59 | " \"\"\"\n", 60 | " gids = []\n", 61 | " if len(meta['country'].unique()) > 1:\n", 62 | " countries = np.random.choice(meta['country'].unique(), samples)\n", 63 | " for country in countries:\n", 64 | " country_meta = meta.loc[meta['country'] == country]\n", 65 | " gids.append(get_gid(country_meta)) \n", 66 | " elif len(meta['state'].unique()) > 1:\n", 67 | " states = np.random.choice(meta['state'].unique(), samples)\n", 68 | " for state in states:\n", 69 | " state_meta = meta.loc[meta['state'] == state]\n", 70 | " gids.append(get_gid(state_meta))\n", 71 | " elif len(meta['county'].unique()) > 1:\n", 72 | " counties = np.random.choice(meta['county'].unique(), samples)\n", 73 | " for county in counties:\n", 74 | " county_meta = meta.loc[meta['county'] == county]\n", 75 | " gids.append(get_gid(county_meta))\n", 76 | " else:\n", 77 | " gids = np.random.choice(meta['gid'], samples)\n", 78 | " \n", 79 | " return meta.loc[gids]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# Randomly sample from nsrdb" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 117, 92 | "metadata": { 93 | "collapsed": true 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "path = os.path.join(cwd, 'nsrdb_meta.csv')\n", 98 | "meta = pd.read_csv(path)\n", 99 | "meta['gid'] = np.arange(len(meta))" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 118, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/html": [ 110 | "
\n", 111 | "\n", 124 | "\n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | "
latitudelongitudeelevationtimezonecountrystatecountyurbanpopulationlandcovergid
157257218.09-63.021.00-4St-MartinMAF-00 (St. Martin aggregationNoneNone38162101572572
173438046.97-56.341.00-3St. Pierre and MiquelonSPM-00 (St. Pierre and MiqueloNoneNone102101734380
134919618.37-70.38457.24-4Dominican Rep.PeraviaBaníNone411401349196
122921922.37-74.064.72-5BahamasBHS-00 (Bahamas aggregation)NoneNone2401229219
131859019.37-71.30478.85-4Dominican Rep.Santiago RodríguezSan Ignacio de SabanetaNone610301318590
\n", 214 | "
" 215 | ], 216 | "text/plain": [ 217 | " latitude longitude elevation timezone country \\\n", 218 | "1572572 18.09 -63.02 1.00 -4 St-Martin \n", 219 | "1734380 46.97 -56.34 1.00 -3 St. Pierre and Miquelon \n", 220 | "1349196 18.37 -70.38 457.24 -4 Dominican Rep. \n", 221 | "1229219 22.37 -74.06 4.72 -5 Bahamas \n", 222 | "1318590 19.37 -71.30 478.85 -4 Dominican Rep. \n", 223 | "\n", 224 | " state county urban \\\n", 225 | "1572572 MAF-00 (St. Martin aggregation None None \n", 226 | "1734380 SPM-00 (St. Pierre and Miquelo None None \n", 227 | "1349196 Peravia Baní None \n", 228 | "1229219 BHS-00 (Bahamas aggregation) None None \n", 229 | "1318590 Santiago Rodríguez San Ignacio de Sabaneta None \n", 230 | "\n", 231 | " population landcover gid \n", 232 | "1572572 3816 210 1572572 \n", 233 | "1734380 10 210 1734380 \n", 234 | "1349196 411 40 1349196 \n", 235 | "1229219 2 40 1229219 \n", 236 | "1318590 610 30 1318590 " 237 | ] 238 | }, 239 | "execution_count": 118, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "countries = sample_nsrdb(meta, 5)\n", 246 | "countries" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 119, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/html": [ 257 | "
\n", 258 | "\n", 271 | "\n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | "
latitudelongitudeelevationtimezonecountrystatecountyurbanpopulationlandcovergid
112960638.93-77.1075.75-5United StatesDistrict of ColumbiaDistrict of ColumbiaWashington, D.C.20118701129606
93083237.65-85.66261.04-5United StatesKentuckyLarueNone3750930832
76256647.37-93.38414.44-6United StatesMinnesotaItascaNone11850762566
9762448.05-124.38209.80-8United StatesWashingtonClallamNone4010097624
28331647.05-113.341443.00-7United StatesMontanaMissoulaNone0150283316
\n", 361 | "
" 362 | ], 363 | "text/plain": [ 364 | " latitude longitude elevation timezone country \\\n", 365 | "1129606 38.93 -77.10 75.75 -5 United States \n", 366 | "930832 37.65 -85.66 261.04 -5 United States \n", 367 | "762566 47.37 -93.38 414.44 -6 United States \n", 368 | "97624 48.05 -124.38 209.80 -8 United States \n", 369 | "283316 47.05 -113.34 1443.00 -7 United States \n", 370 | "\n", 371 | " state county urban \\\n", 372 | "1129606 District of Columbia District of Columbia Washington, D.C. \n", 373 | "930832 Kentucky Larue None \n", 374 | "762566 Minnesota Itasca None \n", 375 | "97624 Washington Clallam None \n", 376 | "283316 Montana Missoula None \n", 377 | "\n", 378 | " population landcover gid \n", 379 | "1129606 20118 70 1129606 \n", 380 | "930832 37 50 930832 \n", 381 | "762566 118 50 762566 \n", 382 | "97624 40 100 97624 \n", 383 | "283316 0 150 283316 " 384 | ] 385 | }, 386 | "execution_count": 119, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "US = meta.loc[meta['country'] == 'United States']\n", 393 | "states = sample_nsrdb(US, 5)\n", 394 | "states" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 114, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/html": [ 405 | "
\n", 406 | "\n", 419 | "\n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | "
latitudelongitudeelevationtimezonecountrystatecountyurbanpopulationlandcovergid
40828238.13-107.622663.30-7United StatesColoradoOurayNone070408282
46715537.89-105.142678.44-7United StatesColoradoHuerfanoNone170467155
54232438.49-102.261245.44-7United StatesColoradoKiowaNone0130542324
52516540.81-102.901147.80-7United StatesColoradoLoganNone414525165
41011040.01-107.542404.08-7United StatesColoradoRio BlancoNone670410110
\n", 509 | "
" 510 | ], 511 | "text/plain": [ 512 | " latitude longitude elevation timezone country state \\\n", 513 | "408282 38.13 -107.62 2663.30 -7 United States Colorado \n", 514 | "467155 37.89 -105.14 2678.44 -7 United States Colorado \n", 515 | "542324 38.49 -102.26 1245.44 -7 United States Colorado \n", 516 | "525165 40.81 -102.90 1147.80 -7 United States Colorado \n", 517 | "410110 40.01 -107.54 2404.08 -7 United States Colorado \n", 518 | "\n", 519 | " county urban population landcover gid \n", 520 | "408282 Ouray None 0 70 408282 \n", 521 | "467155 Huerfano None 1 70 467155 \n", 522 | "542324 Kiowa None 0 130 542324 \n", 523 | "525165 Logan None 4 14 525165 \n", 524 | "410110 Rio Blanco None 6 70 410110 " 525 | ] 526 | }, 527 | "execution_count": 114, 528 | "metadata": {}, 529 | "output_type": "execute_result" 530 | } 531 | ], 532 | "source": [ 533 | "CO = conus.loc[US['state'] == 'Colorado']\n", 534 | "counties = sample_nsrdb(CO, 5)\n", 535 | "counties" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 115, 541 | "metadata": {}, 542 | "outputs": [ 543 | { 544 | "data": { 545 | "text/html": [ 546 | "
\n", 547 | "\n", 560 | "\n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | "
latitudelongitudeelevationtimezonecountrystatecountyurbanpopulationlandcovergid
46910739.61-105.061685.28-7United StatesColoradoDenverDenver1508970469107
46964839.65-105.061672.64-7United StatesColoradoDenverDenver2207670469648
48093439.85-104.621618.84-7United StatesColoradoDenverNone230480934
47270639.77-104.941596.64-7United StatesColoradoDenverDenver27744190472706
46910739.61-105.061685.28-7United StatesColoradoDenverDenver1508970469107
\n", 650 | "
" 651 | ], 652 | "text/plain": [ 653 | " latitude longitude elevation timezone country state \\\n", 654 | "469107 39.61 -105.06 1685.28 -7 United States Colorado \n", 655 | "469648 39.65 -105.06 1672.64 -7 United States Colorado \n", 656 | "480934 39.85 -104.62 1618.84 -7 United States Colorado \n", 657 | "472706 39.77 -104.94 1596.64 -7 United States Colorado \n", 658 | "469107 39.61 -105.06 1685.28 -7 United States Colorado \n", 659 | "\n", 660 | " county urban population landcover gid \n", 661 | "469107 Denver Denver 15089 70 469107 \n", 662 | "469648 Denver Denver 22076 70 469648 \n", 663 | "480934 Denver None 2 30 480934 \n", 664 | "472706 Denver Denver 27744 190 472706 \n", 665 | "469107 Denver Denver 15089 70 469107 " 666 | ] 667 | }, 668 | "execution_count": 115, 669 | "metadata": {}, 670 | "output_type": "execute_result" 671 | } 672 | ], 673 | "source": [ 674 | "Denver = CO.loc[CO['county'] == 'Denver']\n", 675 | "pixels = sample_nsrdb(Denver, 5)\n", 676 | "pixels" 677 | ] 678 | } 679 | ], 680 | "metadata": { 681 | "kernelspec": { 682 | "display_name": "Python [default]", 683 | "language": "python", 684 | "name": "python3" 685 | }, 686 | "language_info": { 687 | "codemirror_mode": { 688 | "name": "ipython", 689 | "version": 3 690 | }, 691 | "file_extension": ".py", 692 | "mimetype": "text/x-python", 693 | "name": "python", 694 | "nbconvert_exporter": "python", 695 | "pygments_lexer": "ipython3", 696 | "version": "3.6.2" 697 | } 698 | }, 699 | "nbformat": 4, 700 | "nbformat_minor": 2 701 | } 702 | -------------------------------------------------------------------------------- /bin/pv_SAM_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "adjust:constant": 0, 3 | "array_type": 2, 4 | "azimuth": 180, 5 | "capital_cost": 39767200, 6 | "dc_ac_ratio": 1.3, 7 | "fixed_charge_rate": 0.096, 8 | "fixed_operating_cost": 260000, 9 | "gcr": 0.4, 10 | "inv_eff": 96, 11 | "losses": 14.07566, 12 | "module_type": 0, 13 | "system_capacity": 20000, 14 | "tilt": 0, 15 | "variable_operating_cost": 0 16 | } 17 | -------------------------------------------------------------------------------- /bin/wind_SAM_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "adjust:constant": 0, 3 | "system_capacity": 1620, 4 | "turb_generic_loss": 16.7, 5 | "wind_farm_wake_model": 0, 6 | "wind_farm_xCoordinates": [ 7 | 0 8 | ], 9 | "wind_farm_yCoordinates": [ 10 | 0 11 | ], 12 | "wind_resource_model_choice": 0, 13 | "wind_resource_shear": 0.140000001, 14 | "wind_resource_turbulence_coeff": 0.1, 15 | "wind_turbine_hub_ht": 80, 16 | "wind_turbine_powercurve_powerout": [ 17 | 0, 18 | 0, 19 | 0, 20 | 0, 21 | 0, 22 | 0, 23 | 0, 24 | 0, 25 | 0, 26 | 0, 27 | 0, 28 | 0, 29 | 1, 30 | 9, 31 | 16, 32 | 49, 33 | 81, 34 | 122, 35 | 163, 36 | 211, 37 | 259, 38 | 319, 39 | 378, 40 | 441, 41 | 504, 42 | 574, 43 | 643, 44 | 726, 45 | 808, 46 | 896, 47 | 984, 48 | 1072, 49 | 1159, 50 | 1236, 51 | 1312, 52 | 1369, 53 | 1426, 54 | 1473, 55 | 1519, 56 | 1545, 57 | 1571, 58 | 1583, 59 | 1594, 60 | 1602, 61 | 1609, 62 | 1614, 63 | 1619, 64 | 1620, 65 | 1620, 66 | 1620, 67 | 1620, 68 | 1620, 69 | 1620, 70 | 1620, 71 | 1620, 72 | 1620, 73 | 1620, 74 | 1620, 75 | 1620, 76 | 1620, 77 | 1620, 78 | 1620, 79 | 1620, 80 | 1620, 81 | 1620, 82 | 1620, 83 | 1620, 84 | 1620, 85 | 1620, 86 | 1620, 87 | 1620, 88 | 1620, 89 | 1620, 90 | 1620, 91 | 1620, 92 | 1620, 93 | 1620, 94 | 1620, 95 | 1620, 96 | 1620, 97 | 1620, 98 | 1620, 99 | 1620, 100 | 1620, 101 | 1620, 102 | 1620, 103 | 1620, 104 | 1620, 105 | 1620, 106 | 1620, 107 | 1620, 108 | 1620, 109 | 1620, 110 | 1620, 111 | 1620, 112 | 1620, 113 | 1620, 114 | 1620, 115 | 1620, 116 | 1620, 117 | 1620, 118 | 0 119 | ], 120 | "wind_turbine_powercurve_windspeeds": [ 121 | 0, 122 | 0.25, 123 | 0.5, 124 | 0.75, 125 | 1, 126 | 1.25, 127 | 1.5, 128 | 1.75, 129 | 2, 130 | 2.25, 131 | 2.5, 132 | 2.75, 133 | 3, 134 | 3.25, 135 | 3.5, 136 | 3.75, 137 | 4, 138 | 4.25, 139 | 4.5, 140 | 4.75, 141 | 5, 142 | 5.25, 143 | 5.5, 144 | 5.75, 145 | 6, 146 | 6.25, 147 | 6.5, 148 | 6.75, 149 | 7, 150 | 7.25, 151 | 7.5, 152 | 7.75, 153 | 8, 154 | 8.25, 155 | 8.5, 156 | 8.75, 157 | 9, 158 | 9.25, 159 | 9.5, 160 | 9.75, 161 | 10, 162 | 10.25, 163 | 10.5, 164 | 10.75, 165 | 11, 166 | 11.25, 167 | 11.5, 168 | 11.75, 169 | 12, 170 | 12.25, 171 | 12.5, 172 | 12.75, 173 | 13, 174 | 13.25, 175 | 13.5, 176 | 13.75, 177 | 14, 178 | 14.25, 179 | 14.5, 180 | 14.75, 181 | 15, 182 | 15.25, 183 | 15.5, 184 | 15.75, 185 | 16, 186 | 16.25, 187 | 16.5, 188 | 16.75, 189 | 17, 190 | 17.25, 191 | 17.5, 192 | 17.75, 193 | 18, 194 | 18.25, 195 | 18.5, 196 | 18.75, 197 | 19, 198 | 19.25, 199 | 19.5, 200 | 19.75, 201 | 20, 202 | 20.25, 203 | 20.5, 204 | 20.75, 205 | 21, 206 | 21.25, 207 | 21.5, 208 | 21.75, 209 | 22, 210 | 22.25, 211 | 22.5, 212 | 22.75, 213 | 23, 214 | 23.25, 215 | 23.5, 216 | 23.75, 217 | 24, 218 | 24.25, 219 | 24.5, 220 | 24.75, 221 | 25, 222 | 25.25 223 | ], 224 | "wind_turbine_rotor_diameter": 77 225 | } 226 | -------------------------------------------------------------------------------- /datasets/NSRDB.md: -------------------------------------------------------------------------------- 1 | # Solar Resource Data: National Solar Radiation Database (NSRDB) 2 | 3 | - /nrel/nsrdb/ 4 | - conus/ 5 | - nsrdb_conus_2018.h5 6 | - ... 7 | - nsrdb_conus_2022.h5 8 | - current/ 9 | - nsrdb_1998.h5 10 | - ... 11 | - nsrdb_2022.h5 12 | -nsrdb_tdy-2021.h5 13 | -nsrdb_tdy-2022.h5 14 | -nsrdb_tgy-2021.h5 15 | -nsrdb_tgy-2022.h5 16 | -nsrdb_tmy-2021.h5 17 | -nsrdb_tmy-2022.h5 18 | - full_disc/ 19 | - nsrdb_full_disc_2018.h5 20 | - ... 21 | - nsrdb_full_disc_2022.h5 22 | - india/ 23 | - india_spectral_tmy.h5 24 | - nsrdb_india_2000.h5 25 | - ... 26 | - nsrdb_india_2014.h5 27 | - nsrdb_india_tmy.h5 28 | - himawari/ 29 | - himawari7_2011.h5 30 | - ... 31 | - himawari7_2015.h5 32 | - himawari8_2015.h5 33 | - ... 34 | - himawari8_2020.h5 35 | - himawari_tdy-2020.h5 36 | - himawari_tgy-2020.h5 37 | - himawari_tmy-2020.h5 38 | - himawari7/ 39 | - himawari7_2011.h5 40 | - ... 41 | - himawari7_2020.h5 42 | - himawari8/ 43 | - himawari8_2015.h5 44 | - ... 45 | - himawari8_2020.h5 46 | - meteosat/ 47 | - meteosat_2017.h5 48 | - meteosat_2018.h5 49 | - meteosat_2019.h5 50 | - philippines/ 51 | - philippines_2017.h5 52 | - puerto_rico/ 53 | - nsrdb_puerto_rico_1998.h5 54 | - ... 55 | - nsrdb_puerto_rico_2017.h5 56 | - vietnam/ 57 | - vietnam_2016.h5 58 | - v3/ 59 | - nsrdb_1998.h5 60 | - ... 61 | - nsrdb_2020.h5 62 | - tdy/ 63 | - nsrdb_tdy-2016.h5 64 | - ... 65 | - nsrdb_tdy-2020.h5 66 | - tgy/ 67 | - nsrdb_tgy-2016.h5 68 | - ... 69 | - nsrdb_tgy-2020.h5 70 | - tmy/ 71 | - nsrdb_tmy-2016.h5 72 | - ... 73 | - nsrdb_tmy-2020.h5 74 | 75 | ## NSRDB 76 | 77 | The National Solar Radiation Database (NSRDB) is a serially complete collection 78 | of meteorological and solar irradiance data sets for the United States and a 79 | growing list of international locations for 1998-2017. The NSRDB provides 80 | foundational information to support U.S. Department of Energy programs, 81 | research, and the general public. 82 | 83 | The NSRDB provides time-series data at 30 minute resolution of resource 84 | averaged over surface cells of 0.038 degrees in both latitude and longitude, 85 | or nominally 4 km in size. The solar radiation values represent the resource 86 | available to solar energy systems. The data was created using cloud properties 87 | which are generated using the AVHRR Pathfinder Atmospheres-Extended (PATMOS-x) 88 | algorithms developed by the University of Wisconsin. Fast all-sky radiation 89 | model for solar applications (FARMS) in conjunction with the cloud properties, 90 | and aerosol optical depth (AOD) and precipitable water vapor (PWV) from 91 | ancillary source are used to estimate solar irradiance (GHI, DNI, and DHI). 92 | The Global Horizontal Irradiance (GHI) is computed for clear skies using the 93 | REST2 model. For cloud scenes identified by the cloud mask, FARMS is used to 94 | compute GHI. The Direct Normal Irradiance (DNI) for cloud scenes is then 95 | computed using the DISC model. The PATMOS-X model uses half-hourly radiance 96 | images in visible and infrared channels from the GOES series of geostationary 97 | weather satellites. Ancillary variables needed to run REST2 and FARMS (e.g., 98 | aerosol optical depth, precipitable water vapor, and albedo) are derived from 99 | the the Modern Era-Retrospective Analysis (MERRA-2) dataset. Temperature and 100 | wind speed data are also derived from MERRA-2 and provided for use in SAM to 101 | compute PV generation. 102 | 103 | The following variables are provided by the NSRDB: 104 | - Irradiance: 105 | - Global Horizontal (ghi) 106 | - Direct Normal (dni) 107 | - Diffuse (dhi) 108 | - Clear-sky Irradiance 109 | - Cloud Type 110 | - Dew Point 111 | - Temperature 112 | - Surface Albedo 113 | - Pressure 114 | - Relative Humidity 115 | - Solar Zenith Angle 116 | - Precipitable Water 117 | - Wind Direction 118 | - Wind Speed 119 | - Fill Flag 120 | - Angstrom wavelength exponent (alpha) 121 | - Aerosol optical depth (aod) 122 | - Aerosol asymmetry parameter (asymmetry) 123 | - Cloud optical depth (cld_opd_dcomp) 124 | - Cloud effective radius (cld_ref_dcomp) 125 | - cloud_press_acha 126 | - Reduced ozone vertical pathlength (ozone) 127 | - Aerosol single-scatter albedo (ssa) 128 | 129 | ## Data Format 130 | 131 | The data is provided in high density data file (.h5) separated by year. The 132 | variables mentioned above are provided in 2 dimensional time-series arrays with 133 | dimensions (time x location). The temporal axis is defined by the `time_index` 134 | dataset, while the positional axis is defined by the `meta` dataset. For 135 | storage efficiency each variable has been scaled and stored as an integer. The 136 | scale-factor is provided in the 'psm_scale-factor' attribute. The units for 137 | the variable data is also provided as an attribute (`psm_units`). 138 | 139 | ## Python Examples 140 | 141 | Example scripts to extract solar resource data using python are provided below: 142 | 143 | The easiest way to access and extract data from the Resource eXtraction tool 144 | [`rex`](https://github.com/nrel/rex) 145 | 146 | ```python 147 | from rex import NSRDBX 148 | 149 | nsrdb_file = '/nrel/nsrdb/v3/nsrdb_2010.h5' 150 | with NSRDBX(nsrdb_file, hsds=True) as f: 151 | meta = f.meta 152 | time_index = f.time_index 153 | dni = f['dni'] 154 | ``` 155 | 156 | `rex` also allows easy extraction of the nearest site to a desired (lat, lon) 157 | location: 158 | 159 | ```python 160 | from rex import NSRDBX 161 | 162 | nsrdb_file = '/nrel/nsrdb/v3/nsrdb_2010.h5' 163 | nrel = (39.741931, -105.169891) 164 | with NSRDBX(nsrdb_file, hsds=True) as f: 165 | nrel_dni = f.get_lat_lon_df('dni', nrel) 166 | ``` 167 | 168 | or to extract all sites in a given region: 169 | 170 | ```python 171 | from rex import NSRDBX 172 | 173 | nsrdb_file = '/nrel/nsrdb/v3/nsrdb_2010.h5' 174 | state='Colorado' 175 | with NSRDBX(nsrdb_file, hsds=True) as f: 176 | co_dni = f.get_region_df('dni', state, region_col='state') 177 | ``` 178 | 179 | Lastly, `rex` can be used to extract all variables needed to run SAM at a given 180 | location: 181 | 182 | ```python 183 | from rex import NSRDBX 184 | 185 | nsrdb_file = '/nrel/nsrdb/v3/nsrdb_2010.h5' 186 | nrel = (39.741931, -105.169891) 187 | with NSRDBX(nsrdb_file, hsds=True) as f: 188 | nrel_sam_vars = f.get_SAM_df(nwtc) 189 | ``` 190 | 191 | If you would rather access the NSRDB data directly using h5pyd: 192 | 193 | ```python 194 | # Extract the average direct normal irradiance (dni) 195 | import h5pyd 196 | import pandas as pd 197 | 198 | # Open .h5 file 199 | with h5pyd.File('/nrel/nsrdb/v3/nsrdb_2010.h5', mode='r') as f: 200 | # Extract meta data and convert from records array to DataFrame 201 | meta = pd.DataFrame(f['meta'][...]) 202 | # dni dataset 203 | dni= f['dni'] 204 | # Extract scale factor 205 | scale_factor = dni.attrs['psm_scale_factor'] 206 | # Extract, average, and un-scale dni 207 | mean_dni= dni[...].mean(axis=0) / scale_factor 208 | 209 | # Add mean windspeed to meta data 210 | meta['Average DNI'] = mean_dni 211 | ``` 212 | 213 | ```python 214 | # Extract time-series data for a single site 215 | import h5pyd 216 | import pandas as pd 217 | 218 | # Open .h5 file 219 | with h5pyd.File('/nrel/nsrdb/v3/nsrdb_2010.h5', mode='r') as f: 220 | # Extract time_index and convert to datetime 221 | # NOTE: time_index is saved as byte-strings and must be decoded 222 | time_index = pd.to_datetime(f['time_index'][...].astype(str)) 223 | # Initialize DataFrame to store time-series data 224 | time_series = pd.DataFrame(index=time_index) 225 | # Extract variables needed to compute generation from SAM: 226 | for var in ['dni', 'dhi', 'air_temperature', 'wind_speed']: 227 | # Get dataset 228 | ds = f[var] 229 | # Extract scale factor 230 | scale_factor = ds.attrs['psm_scale_factor'] 231 | # Extract site 100 and add to DataFrame 232 | time_series[var] = ds[:, 100] / scale_factor 233 | ``` 234 | 235 | ## References 236 | 237 | For more information about the NSRDB please see the 238 | [website](https://nsrdb.nrel.gov/) 239 | Users of the NSRDB should please cite: 240 | - [Sengupta, M., Y. Xie, A. Lopez, A. Habte, G. Maclaurin, and J. Shelby. 2018. "The National Solar Radiation Data Base (NSRDB)." Renewable and Sustainable Energy Reviews 89 (June): 51-60.](https://www.sciencedirect.com/science/article/pii/S136403211830087X?via%3Dihub) 241 | -------------------------------------------------------------------------------- /datasets/US_Wave.md: -------------------------------------------------------------------------------- 1 | # DOE Water Power Technology Office's (WPTO) US Wave dataset 2 | 3 | - /nrel/US_wave/ 4 | - Alaska/ 5 | - Alaska_wave_1979.h5 6 | - ... 7 | - Alaska_wave_2010.h5 8 | - Atlantic/ 9 | - Atlantic_wave_1979.h5 10 | - ... 11 | - Atlantic_wave_2010.h5 12 | - Hawaii/ 13 | - Hawaii_wave_1979.h5 14 | - ... 15 | - Hawaii_wave_2010.h5 16 | - West_Coast/ 17 | - West_Coast_wave_1979.h5 18 | - ... 19 | - West_Coast_wave_2010.h5 20 | - maine/ 21 | - North_Atlantic_2000-01.h5 22 | - ... 23 | - North_Atlantic_2000-12.h5 24 | - North_Atlantic_2001-01.h5 25 | - ... 26 | - North_Atlantic_2001-12.h5 27 | - virtual_buoy/ 28 | - Alaska/ 29 | - Hawaii/ 30 | - West_Coast/ 31 | - West_Coast_virtual_buoy_1979.h5 32 | - ... 33 | - West_Coast_virtual_buoy_2010.h5 34 | 35 | ## Description 36 | 37 | The development of this dataset was funded by the U.S. Department of Energy, 38 | Office of Energy Efficiency & Renewable Energy, Water Power Technologies Office 39 | to improve our understanding of the U.S. wave energy resource and to provide 40 | critical information for wave energy project development and wave energy 41 | converter design. 42 | 43 | This is the highest resolution publicly available long-term wave hindcast 44 | dataset that – when complete – will cover the entire U.S. Exclusive Economic 45 | Zone (EEZ). The data can be used to investigate the historical record of wave 46 | statistics at any U.S. site. As such, the dataset could also be of value to any 47 | entity with marine operations inside the U.S. EEZ. 48 | 49 | A technical summary of the dataset is as follows: 50 | 51 | - 32 Year Wave Hindcast (1979-2010), 3-hour temporal resolution 52 | - Unstructured grid spatial resolution ranges from 200 meters in shallow water to ~10 km in deep water (700,000 grid points in West Coast dataset) 53 | - Current spatial coverage: EEZ offshore of U.S. West Coast (other regions coming soon, see below) 54 | 55 | The following variables are included in the dataset: 56 | 57 | - Mean Wave Direction: Direction normal to the wave crests 58 | - Significant Wave Height: Calculated as the zeroth spectral moment (i.e., H_m0) 59 | - Mean Absolute Period: Calculated as a ratio of spectral moments (m_0/m_1) 60 | - Peak Period: The period associated with the maximum value of the wave energy spectrum 61 | - Mean Zero-Crossing Period: Calculated as a ratio of spectral moments (sqrt(m_0/m_2)) 62 | - Energy Period: Calculated as a ratio of spectral moments (m_-1/m_0) 63 | - Directionality Coefficient: Fraction of total wave energy travelling in the direction of maximum wave power 64 | - Maximum Energy Direction: The direction from which the most wave energy is travelling 65 | - Omni-Directional Wave Power: Total wave energy flux from all directions 66 | - Spectral Width: Spectral width characterizes the relative spreading of energy in the wave spectrum 67 | 68 | Currently the dataset only covers the EEZ offshore of the U.S. West Coast, but 69 | it will be updated to include all other U.S. regions by 2022. 70 | The timeline for extending the dataset is as follows: 71 | 72 | - West Coast United States: Available 73 | - East Coast United States: Available 74 | - Alaskan Coast: TBD 75 | - Hawaiian Islands: Available 76 | - Gulf of Mexico, Puerto Rico, and U.S. Virgin Islands: TBD 77 | - U.S. Pacific Island Territories: TBD 78 | 79 | ## Model 80 | 81 | The multi-scale, unstructured-grid modeling approach using WaveWatch III and 82 | SWAN enabled long-term (decades) high-resolution hindcasts in a large regional 83 | domain. In particular, the dataset was generated from the unstructured-grid 84 | SWAN model output that was driven by a WaveWatch III model with global-regional 85 | nested grids. The unstructured-grid SWAN model simulations were performed with 86 | a spatial resolution as fine as 200 meters in shallow waters. The dataset has a 87 | 3-hour timestep spanning 32 years from 1979 through 2010. The project team 88 | intends to extend this to 2020 (i.e., 1979-2020), pending DOE support to do so. 89 | 90 | The models were extensively validated not only for the most common wave 91 | parameters, but also six IEC resource parameters and 2D spectra with high 92 | quality spectral data derived from publicly available buoys. Additional details 93 | on definitions of the variables found in the dataset, the SWAN and WaveWatch 94 | III model configurations and model validation are available in technical report 95 | and peer-reviewed publications (Wu et al. 2020, Yang et al. 2020, Yang et al. 96 | 2018). This study was funded by the U.S. Department of Energy, Office of Energy 97 | Efficiency & Renewable Energy, Water Power Technologies Office under Contract 98 | DE-AC05-76RL01830 to Pacific Northwest National Laboratory (PNNL). 99 | 100 | ## Data Format 101 | 102 | The data is provided in high density data file (.h5) separated by year. The 103 | variables mentioned above are provided in 2 dimensional time-series arrays with 104 | dimensions (time x location). The temporal axis is defined by the `time_index` 105 | dataset, while the positional axis is defined by the `coordinate` dataset. The 106 | units for the variable data is also provided as an attribute (`units`). The 107 | SWAN and IEC valiable names are also provide under the attributes 108 | (`SWAWN_name`) and (`IEC_name`) respectively. 109 | 110 | ## Python Examples 111 | 112 | Example scripts to extract wind resource data using python are provided below: 113 | 114 | The easiest way to access and extract data from the Resource eXtraction tool 115 | [`rex`](https://github.com/nrel/rex) 116 | 117 | To use `rex` with [`HSDS`](https://github.com/NREL/hsds-examples) you will need 118 | to install `h5pyd`: 119 | 120 | ``` 121 | pip install h5pyd 122 | ``` 123 | 124 | Next you'll need to configure HSDS: 125 | 126 | ``` 127 | hsconfigure 128 | ``` 129 | 130 | and enter at the prompt: 131 | 132 | ``` 133 | hs_endpoint = https://developer.nrel.gov/api/hsds 134 | hs_username = 135 | hs_password = 136 | hs_api_key = 3K3JQbjZmWctY0xmIfSYvYgtIcM3CN0cb1Y2w9bf 137 | ``` 138 | 139 | **IMPORTANT: The example API key here is for demonstation and is rate-limited per IP. To get your own API key, visit https://developer.nrel.gov/signup/** 140 | 141 | You can also add the above contents to a configuration file at `~/.hscfg` 142 | 143 | ```python 144 | from rex import WaveX 145 | 146 | wave_file = '/nrel/US_wave/West_Coast/West_Coast_wave_2010.h5' 147 | with WaveX(wave_file, hsds=True) as f: 148 | meta = f.meta 149 | time_index = f.time_index 150 | swh = f['significant_wave_height'] 151 | ``` 152 | 153 | `rex` also allows easy extraction of the nearest site to a desired (lat, lon) 154 | location: 155 | 156 | ```python 157 | from rex import WaveX 158 | 159 | wave_file = '/nrel/US_wave/West_Coast/West_Coast_wave_2010.h5' 160 | lat_lon = (34.399408, -119.841181) 161 | with WaveX(wave_file, hsds=True) as f: 162 | lat_lon_swh = f.get_lat_lon_df('significant_wave_height', nwtc) 163 | ``` 164 | 165 | or to extract all sites in a given region: 166 | 167 | ```python 168 | from rex import WaveX 169 | 170 | wave_file = '/nrel/US_wave/West_Coast/West_Coast_wave_2010.h5' 171 | jurisdication='California' 172 | with WaveX(wave_file, hsds=True) as f: 173 | ca_swh = f.get_region_df('significant_wave_height', jurisdiction, 174 | region_col='jurisdiction') 175 | ``` 176 | 177 | If you would rather access the US Wave data directly using h5pyd: 178 | 179 | ```python 180 | # Extract the average wave height 181 | import h5pyd 182 | import pandas as pd 183 | 184 | # Open .h5 file 185 | with h5pyd.File('/nrel/US_wave/West_Coast/West_Coast_wave_2010.h5', mode='r') as f: 186 | # Extract meta data and convert from records array to DataFrame 187 | meta = pd.DataFrame(f['meta'][...]) 188 | # Significant Wave Height 189 | swh = f['significant_wave_height'] 190 | # Extract scale factor 191 | scale_factor = swh.attrs['scale_factor'] 192 | # Extract, average, and unscale wave height 193 | mean_swh = swh[...].mean(axis=0) / scale_factor 194 | 195 | # Add mean wave height to meta data 196 | meta['Average Wave Height'] = mean_swh 197 | ``` 198 | 199 | ```python 200 | # Extract time-series data for a single site 201 | import h5pyd 202 | import pandas as pd 203 | 204 | # Open .h5 file 205 | with h5pyd.File('/nrel/US_wave/West_Coast/West_Coast_wave_2010.h5', mode='r') as f: 206 | # Extract time_index and convert to datetime 207 | # NOTE: time_index is saved as byte-strings and must be decoded 208 | time_index = pd.to_datetime(f['time_index'][...].astype(str)) 209 | # Initialize DataFrame to store time-series data 210 | time_series = pd.DataFrame(index=time_index) 211 | # Extract wave height, direction, and period 212 | for var in ['significant_wave_height', 'mean_wave_direction', 213 | 'mean_absolute_period']: 214 | # Get dataset 215 | ds = f[var] 216 | # Extract scale factor 217 | scale_factor = ds.attrs['scale_factor'] 218 | # Extract site 100 and add to DataFrame 219 | time_series[var] = ds[:, 100] / scale_factor 220 | ``` 221 | ## References 222 | 223 | Please cite the most relevant publication below when referencing this dataset: 224 | 225 | 1) [Wu, Wei-Cheng, et al. "Development and validation of a high-resolution regional wave hindcast model for US West Coast wave resource characterization." Renewable Energy 152 (2020): 736-753.](https://www.osti.gov/biblio/1599105) 226 | 2) [Yang, Z., G. García-Medina, W. Wu, and T. Wang, 2020. Characteristics and variability of the Nearshore Wave Resource on the U.S. West Coast. Energy.](https://doi.org/10.1016/j.energy.2020.117818) 227 | 3) [Yang, Zhaoqing, et al. High-Resolution Regional Wave Hindcast for the US West Coast. No. PNNL-28107. Pacific Northwest National Lab.(PNNL), Richland, WA (United States), 2018.](https://doi.org/10.2172/1573061) 228 | 229 | ## Disclaimer and Attribution 230 | 231 | The National Renewable Energy Laboratory (“NREL”) is operated for the U.S. 232 | Department of Energy (“DOE”) by the Alliance for Sustainable Energy, LLC 233 | ("Alliance"). Pacific Northwest National Laboratory (PNNL) is managed and 234 | operated by Battelle Memorial Institute ("Battelle") for DOE. As such the 235 | following rules apply: 236 | 237 | This data arose from worked performed under funding provided by the United 238 | States Government. Access to or use of this data ("Data") denotes consent with 239 | the fact that this data is provided "AS IS," “WHEREIS” AND SPECIFICALLY FREE 240 | FROM ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND, INCLUDING BUT NOT LIMITED TO 241 | ANY IMPLIED WARRANTIES SUCH AS MERCHANTABILITY AND/OR FITNESS FOR ANY 242 | PARTICULAR PURPOSE. Furthermore, NEITHER THE UNITED STATES GOVERNMENT NOR ANY 243 | OF ITS ASSOCITED ENTITES OR CONTRACTORS INCLUDING BUT NOT LIMITED TO THE 244 | DOE/PNNL/NREL/BATTELLE/ALLIANCE ASSUME ANY LEGAL LIABILITY OR RESPONSIBILITY 245 | FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF THE DATA, OR REPRESENT THAT 246 | ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. NO ENDORSEMENT OF THE DATA 247 | OR ANY REPRESENTATIONS MADE IN CONNECTION WITH THE DATA IS PROVIDED. IN NO 248 | EVENT SHALL ANY PARTY BE LIABLE FOR ANY DAMAGES, INCLUDING BUT NOT LIMITED TO 249 | SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES ARISING FROM THE PROVISION OF THIS 250 | DATA; TO THE EXTENT PERMITTED BY LAW USER AGREES TO INDEMNIFY 251 | DOE/PNNL/NREL/BATTELLE/ALLIANCE AND ITS SUBSIDIARIES, AFFILIATES, OFFICERS, 252 | AGENTS, AND EMPLOYEES AGAINST ANY CLAIM OR DEMAND RELATED TO USER'S USE OF THE 253 | DATA, INCLUDING ANY REASONABLE ATTORNEYS FEES INCURRED. 254 | 255 | The user is granted the right, without any fee or cost, to use or copy the 256 | Data, provided that this entire notice appears in all copies of the Data. In 257 | the event that user engages in any scientific or technical publication 258 | utilizing this data user agrees to credit DOE/PNNL/NREL/BATTELLE/ALLIANCE in 259 | any such publication consistent with respective professional practice. 260 | -------------------------------------------------------------------------------- /datasets/WINDToolkit.md: -------------------------------------------------------------------------------- 1 | # Wind Resource Data: Wind Integration National Dataset (WIND) Toolkit 2 | 3 | - /nrel/wtk/ 4 | - NOW-WAKES_Mid_Atlantic/ 5 | - Call_Areas_100TKE/ 6 | - v1.0.0/ 7 | - 2019-09.h5 8 | - 2019-10.h5 9 | - 2020-07.h5 10 | - 2020-08.h5 11 | - Lease_Areas_0TKE/ 12 | - v1.0.0/ 13 | - 2019-09.h5 14 | - ... 15 | - 2019-12.h5 16 | - 2020-01.h5 17 | - ... 18 | - 2020-08.h5 19 | - Lease_Areas_100TKE/ 20 | - v1.0.0/ 21 | - 2019-09.h5 22 | - ... 23 | - 2019-12.h5 24 | - 2020-01.h5 25 | - ... 26 | - 2020-08.h5 27 | - No_Wind_Farms/ 28 | - v1.0.0/ 29 | - 2019-09.h5 30 | - ... 31 | - 2019-12.h5 32 | - 2020-01.h5 33 | - ... 34 | - 2020-08.h5 35 | - One_Plant_0TKE/ 36 | - v1.0.0/ 37 | - 2019-09.h5 38 | - ... 39 | - 2019-12.h5 40 | - 2020-01.h5 41 | - ... 42 | - 2020-08.h5 43 | - One_Plant_100TKE/ 44 | - v1.0.0/ 45 | - 2019-09.h5 46 | - ... 47 | - 2019-12.h5 48 | - 2020-01.h5 49 | - ... 50 | - 2020-08.h5 51 | - alaska/ 52 | - v1.0.0/ 53 | - 5min/ 54 | - alaska_2018.h5 55 | - ... 56 | - alaska_2020.h5 57 | - alaska_2018.h5 58 | - ... 59 | - alaska_2020.h5 60 | - bangladesh/ 61 | - wtk_bangladesh_2014.h5 62 | - ... 63 | - wtk_bangladesh_2017.h5 64 | - canada-5min/ # 5min resolution 65 | - wtk_canada_2007.h5 66 | - ... 67 | - wtk_canada_2014.h5 68 | - canada/ # hourly resolution 69 | - wtk_canada_2007.h5 70 | - ... 71 | - wtk_canada_2014.h5 72 | - canada_bc/ # Bias corrected hourly resolution 73 | - wtk_canada_2007.h5 74 | - ... 75 | - wtk_canada_2014.h5 76 | - central_asia/ 77 | - wtk_central_asia_2015.h5 78 | - conus-5min/ # 5min resolution 79 | - v2.0.0/ 80 | - conus_2018.h5 81 | - ... 82 | - conus_2020.h5 83 | - wtk_conus_2007.h5 84 | - ... 85 | - wtk_conus_2014.h5 86 | - conus/ # hourly resolution 87 | - wtk_conus_2007.h5 88 | - ... 89 | - wtk_conus_2014.h5 90 | - wtk_conus_2019.h5 91 | - wtk_conus_2020.h5 92 | - great_lakes-5min/ 93 | - Great_Lakes_2000.h5 94 | - ... 95 | - Great_Lakes_2020.h5 96 | - great_lakes/ 97 | - Great_Lakes_2000.h5 98 | - ... 99 | - Great_Lakes_2020.h5 100 | - gulf_of_mexico/ 101 | - gulf_2000.h5 102 | - ... 103 | - gulf_2020.h5 104 | - yearly_hr/ 105 | - gulf_2000_hr.h5 106 | - ... 107 | - gulf_2020_hr.h5 108 | - hawaii-5min/ # 5min resolution 109 | - Hawaii_2000.h5 110 | - ... 111 | - Hawaii_2019.h5 112 | - hawaii/ # hourly resolution 113 | - Hawaii_2000.h5 114 | - ... 115 | - Hawaii_2019.h5 116 | - india/ 117 | - wtk_india_2014.h5 118 | - kazakhstan/ 119 | - wtk_kazakhstan_11km_2015.h5 120 | - wtk_kazakhstan_4km_2015.h5 121 | - led/ 122 | - conus-hr/ 123 | - conus_2017.h5 124 | - conus_2018.h5 125 | - maine/ 126 | - North_Atlantic_2000-01.h5 127 | - ... 128 | - North_Atlantic_2000-12.h5 129 | - ... 130 | - North_Atlantic_2020-01.h5 131 | - ... 132 | - North_Atlantic_2020-12.h5 133 | - North_Atlantic_2000.h5 134 | - ... 135 | - North_Atlantic_2020.h5 136 | - yearly_hr/ 137 | - North_Atlantic_2000_hr.h5 138 | - ... 139 | - North_Atlantic_2020_hr.h5 140 | - mexico-5min/ # 5min resolution 141 | - wtk_mexico_2007.h5 142 | - ... 143 | - wtk_mexico_2014.h5 144 | - mexico/ # hourly resolution 145 | - wtk_mexico_2007.h5 146 | - ... 147 | - wtk_mexico_2014.h5 148 | - mid_atlantic-5min/ # 5min resolution 149 | - Mid_Atlantic_2000.h5 150 | - ... 151 | - Mid_Atlantic_2020.h5 152 | - mid_atlantic/ # hourly resolution 153 | - Mid_Atlantic_2000.h5 154 | - ... 155 | - Mid_Atlantic_2020.h5 156 | - north_america/ 157 | - v1.0.0/ 158 | - north_america_2001.h5 159 | - ... 160 | - north_america_2020.h5 161 | - now23_california/ 162 | - v1.0.0/ 163 | - 5min/ 164 | - now23_ca_5min_2000.h5 165 | - ... 166 | - now23_ca_5min_2022.h5 167 | - 60min/ 168 | - now23_ca_60min_2000.h5 169 | - ... 170 | - now23_ca_60min_2022.h5 171 | - nw_pacific-5min/ # 5min resolution 172 | - NW_Pacific_2000.h5 173 | - ... 174 | - NW_Pacific_2019.h5 175 | - nw_pacific/ # hourly resolution 176 | - NW_Pacific_2000.h5 177 | - ... 178 | - NW_Pacific_2019.h5 179 | - offshore_ca-5min/ # 5min resolution 180 | - Offshore_CA_2000.h5 181 | - ... 182 | - Offshore_CA_2019.h5 183 | - offshore_ca/ # hourly resolution 184 | - Offshore_CA_2000.h5 185 | - ... 186 | - Offshore_CA_2019.h5 187 | - philippines/ 188 | - wtk_philippines_2017.h5 189 | - pr100/ 190 | - 5min/ 191 | - puerto_rico_wind_5min_2001.h5 192 | - ... 193 | - puerto_rico_wind_5min_2020.h5 194 | - hourly/ 195 | - puerto_rico_wind_hourly_2001.h5 196 | - ... 197 | - puerto_rico_wind_hourly_2020.h5 198 | - seasiawind/ 199 | - seasiawind_2017.h5 200 | - ... 201 | - seasiawind_2021.h5 202 | - seasiawind_v2/ 203 | - seasiawind_2007_v2.h5 204 | - ... 205 | - seasiawind_2021_v2.h5 206 | - seasiawind_v3/ 207 | - seasiawind_2007_v3.h5 208 | - ... 209 | - seasiawind_2021_v3.h5 210 | - south_atlantic/ 211 | - monthly/ 212 | - v1.0.0/ 213 | - satlantic_2000-01.h5 214 | - ... 215 | - satlantic_2000-12.h5 216 | - ... 217 | - satlantic_2020-01.h5 218 | - ... 219 | - satlantic_2020-12.h5 220 | - yearly_5min/ 221 | - v1.0.0/ 222 | - satlantic_2000_hr.h5 223 | - ... 224 | - satlantic_2020_hr.h5 225 | - yearly_hr/ 226 | - v1.0.0/ 227 | - satlantic_2000_hr.h5 228 | - ... 229 | - satlantic_2020_hr.h5 230 | - sup3rwind/ 231 | - ukraine/ 232 | - 5min/ 233 | - sup3rwind_ukraine_2000.h5 234 | - ... 235 | - sup3rwind_ukraine_2023.h5 236 | - 60min/ 237 | - sup3rwind_ukraine_2000.h5 238 | - ... 239 | - sup3rwind_ukraine_2023.h5 240 | - vietnam/ 241 | - wtk_vietnam_2016.h5 242 | - ... 243 | - wtk_vietnam_2018.h5 244 | 245 | ## Model 246 | 247 | Wind resource data for North America was produced using the [Weather Research and Forecasting Model (WRF)](https://www.mmm.ucar.edu/weather-research-and-forecasting-model). 248 | The WRF model was initialized with the European Centre for Medium Range Weather 249 | Forecasts Interim Reanalysis (ERA-Interm) data set with an initial grid spacing 250 | of 54 km. Three internal nested domains were used to refine the spatial 251 | resolution to 18, 6, and finally 2 km. The WRF model was run for years 2007 252 | to 2014. While outputs were extracted from WRF at 5 minute time-steps, due to 253 | storage limitations instantaneous hourly time-step are provided for all 254 | variables while full 5 min resolution data is provided for wind speed and wind 255 | direction only. 256 | 257 | The following variables were extracted from the WRF model data: 258 | - Wind Speed at 10, 40, 60, 80, 100, 120, 140, 160, 200 m 259 | - Wind Direction at 10, 40, 60, 80, 100, 120, 140, 160, 200 m 260 | - Temperature at 2, 10, 40, 60, 80, 100, 120, 140, 160, 200 m 261 | - Pressure at 0, 100, 200 m 262 | - Surface Precipitation Rate 263 | - Surface Relative Humidity 264 | - Inverse Monin Obukhov Length 265 | 266 | ## Domains 267 | 268 | The wind resource was produce using three distinct WRF domains shown below. The 269 | CONUS domain for 2007-2013 was run by 3Tier while 2014 as well as all years of 270 | the Canada and Mexico domains were run under NARIS. The data is provided in 271 | three sets of files: 272 | 273 | - CONUS: Extracted exclusively from the CONUS domain 274 | - Canada: Combined data from the Canada and CONUS domains 275 | - Mexico: Combined data from the Mexico and CONUS domains 276 | 277 | ## Data Format 278 | 279 | The data is provided in high density data file (.h5) separated by year. The 280 | variables mentioned above are provided in 2 dimensional time-series arrays with 281 | dimensions (time x location). The temporal axis is defined by the `time_index` 282 | dataset, while the positional axis is defined by the `meta` dataset. For 283 | storage efficiency each variable has been scaled and stored as an integer. The 284 | scale-factor is provided in the `scale-factor` attribute. The units for the 285 | variable data is also provided as an attribute (`units`). 286 | 287 | ## Python Examples 288 | 289 | Example scripts to extract wind resource data using python are provided below: 290 | 291 | The easiest way to access and extract data from the Resource eXtraction tool 292 | [`rex`](https://github.com/nrel/rex) 293 | 294 | 295 | ```python 296 | from rex import WindX 297 | 298 | wtk_file = '/nrel/wtk/conus/wtk_conus_2010.h5' 299 | with WindX(wtk_file, hsds=True) as f: 300 | meta = f.meta 301 | time_index = f.time_index 302 | wspd_100m = f['windspeed_100m'] 303 | ``` 304 | 305 | Note: `WindX` will automatically interpolate to the desired hub-height: 306 | 307 | ```python 308 | from rex import WindX 309 | 310 | wtk_file = '/nrel/wtk/conus/wtk_conus_2010.h5' 311 | with WindX(wtk_file, hsds=True) as f: 312 | print(f.datasets) # not 90m is not a valid dataset 313 | wspd_90m = f['windspeed_90m'] 314 | ``` 315 | 316 | `rex` also allows easy extraction of the nearest site to a desired (lat, lon) 317 | location: 318 | 319 | ```python 320 | from rex import WindX 321 | 322 | wtk_file = '/nrel/wtk/conus/wtk_conus_2010.h5' 323 | nwtc = (39.913561, -105.222422) 324 | with WindX(wtk_file, hsds=True) as f: 325 | nwtc_wspd = f.get_lat_lon_df('windspeed_100m', nwtc) 326 | ``` 327 | 328 | or to extract all sites in a given region: 329 | 330 | ```python 331 | from rex import WindX 332 | 333 | wtk_file = '/nrel/wtk/conus/wtk_conus_2010.h5' 334 | state='Colorado' 335 | with WindX(wtk_file, hsds=True) as f: 336 | co_wspd = f.get_region_df('windspeed_100m', state, region_col='state') 337 | ``` 338 | 339 | Lastly, `rex` can be used to extract all variables needed to run SAM at a given 340 | location: 341 | 342 | ```python 343 | from rex import WindX 344 | 345 | wtk_file = '/nrel/wtk/conus/wtk_conus_2010.h5' 346 | nwtc = (39.913561, -105.222422) 347 | with WindX(wtk_file, hsds=True) as f: 348 | nwtc_sam_vars = f.get_SAM_df(nwtc) 349 | ``` 350 | 351 | If you would rather access the WIND Toolkit data directly using h5pyd: 352 | 353 | ```python 354 | # Extract the average 100m wind speed 355 | import h5pyd 356 | import pandas as pd 357 | 358 | # Open .h5 file 359 | with h5pyd.File('/nrel/wtk/conus/wtk_conus_2010.h5', mode='r') as f: 360 | # Extract meta data and convert from records array to DataFrame 361 | meta = pd.DataFrame(f['meta'][...]) 362 | # 100m windspeed dataset 363 | wspd = f['windspeed_100m'] 364 | # Extract scale factor 365 | scale_factor = wspd.attrs['scale_factor'] 366 | # Extract, average, and unscale windspeed 367 | mean_wspd_100m = wspd[...].mean(axis=0) / scale_factor 368 | 369 | # Add mean windspeed to meta data 370 | meta['Average 100m Wind Speed'] = mean_wspd_100m 371 | ``` 372 | 373 | ```python 374 | # Extract time-series data for a single site 375 | import h5pyd 376 | import pandas as pd 377 | 378 | # Open .h5 file 379 | with h5pyd.File('/nrel/wtk/conus/wtk_conus_2010.h5', mode='r') as f: 380 | # Extract time_index and convert to datetime 381 | # NOTE: time_index is saved as byte-strings and must be decoded 382 | time_index = pd.to_datetime(f['time_index'][...].astype(str)) 383 | # Initialize DataFrame to store time-series data 384 | time_series = pd.DataFrame(index=time_index) 385 | # Extract 100m wind speed, wind direction, temperature, and pressure 386 | for var in ['windspeed_100m', 'winddirection_100m', 387 | 'temperature_100m', 'pressure_100m']: 388 | # Get dataset 389 | ds = f[var] 390 | # Extract scale factor 391 | scale_factor = ds.attrs['scale_factor'] 392 | # Extract site 100 and add to DataFrame 393 | time_series[var] = ds[:, 100] / scale_factor 394 | ``` 395 | 396 | ## References 397 | 398 | For more information about the WIND Toolkit please see the [website.](https://www.nrel.gov/grid/wind-toolkit.html) 399 | Users of the WIND Toolkit should use the following citations: 400 | - [Draxl, C., B.M. Hodge, A. Clifton, and J. McCaa. 2015. Overview and Meteorological Validation of the Wind Integration National Dataset Toolkit (Technical Report, NREL/TP-5000-61740). Golden, CO: National Renewable Energy Laboratory.](https://www.nrel.gov/docs/fy15osti/61740.pdf) 401 | - [Draxl, C., B.M. Hodge, A. Clifton, and J. McCaa. 2015. "The Wind Integration National Dataset (WIND) Toolkit." Applied Energy 151: 355366.](https://www.sciencedirect.com/science/article/pii/S0306261915004237?via%3Dihub) 402 | - [Lieberman-Cribbin, W., C. Draxl, and A. Clifton. 2014. Guide to Using the WIND Toolkit Validation Code (Technical Report, NREL/TP-5000-62595). Golden, CO: National Renewable Energy Laboratory.](https://www.nrel.gov/docs/fy15osti/62595.pdf) 403 | - [King, J., A. Clifton, and B.M. Hodge. 2014. Validation of Power Output for the WIND Toolkit (Technical Report, NREL/TP-5D00-61714). Golden, CO: National Renewable Energy Laboratory.](https://www.nrel.gov/docs/fy14osti/61714.pdf) 404 | -------------------------------------------------------------------------------- /datasets/wtk-us.md: -------------------------------------------------------------------------------- 1 | # Wind Integration National Dataset (WIND Toolkit) Gridded Data Cube 2 | 3 | /nrel/wtk_us.h5 4 | 5 | ## Data Layout 6 | 7 | The data has three dimensions: latitudinal index, longitudinal index, and temporal index and is arranged in a uniform matrix: 8 | 9 | ![](https://github.com/NREL/hsds-examples/blob/master/bin/docs/cube.png?raw=true) 10 | 11 | The coordinates are thus defined: 12 | 13 | * t = number of hours since 12AM on the 1st of January, 2007 UTC. Up to hour 61368, which would be 7 years worth of data. 14 | * y = index of lambert conic coordinates. 15 | * x = index of lambert conic coordinates. 16 | 17 | *Note: All data are instantaneous in time.* 18 | 19 | At any point there exist 37 variables, or datasets: 20 | 21 | Datasets (t,x,y) 22 | 23 | * DIF 24 | * DNI 25 | * GHI 26 | * inversemoninobukhovlength_2m 27 | * precipitationrate_0m 28 | * pressure_0m 29 | * pressure_100m 30 | * pressure_200m 31 | * relativehumidity_2m 32 | * temperature_100m 33 | * temperature_10m 34 | * temperature_120m 35 | * temperature_140m 36 | * temperature_160m 37 | * temperature_200m 38 | * temperature_2m 39 | * temperature_40m 40 | * temperature_60 41 | * temperature_80m 42 | * winddirection_100m 43 | * winddirection_10m 44 | * winddirection_120m 45 | * winddirection_140m 46 | * winddirection_160m 47 | * winddirection_200m 48 | * winddirection_40m 49 | * winddirection_60m 50 | * winddirection_80m 51 | * windspeed_100m 52 | * windspeed_10m 53 | * windspeed_120m 54 | * windspeed_140m 55 | * windspeed_160m 56 | * windspeed_200m 57 | * windspeed_40m 58 | * windspeed_60m 59 | * windspeed_80m 60 | 61 | There are two special datasets for indexing and time slicing: 62 | 63 | * coordinates (y,x) - lat/lon coordinates for every point on the x/y grid (original projection is a modified Lambert Conic) 64 | * datetime (t) - YYYYMMDDHHMMSS datetimestamp for every time in the time dimension 65 | 66 | 67 | ## Units 68 | 69 | * Pressure: Pa 70 | * Temperature: K 71 | * Direction: degree 72 | * Speed: m s-1 73 | * GHI: W m-2 74 | * inversemoninobukhovlength_2m: m-1 75 | 76 | ## Data Access 77 | 78 | Use the `h5pyd.File` function to open a connection to the server. 79 | 80 | ``` 81 | f = h5pyd.File("/nrel/wtk-us.h5", 'r') 82 | ``` 83 | 84 | Most datasets can be access with the following pattern: 85 | 86 | ``` 87 | f[dataset][t,y,x] 88 | ``` 89 | 90 | The indices support numpy-style indexing, including slices. For example: 91 | 92 | ``` 93 | f = h5pyd.File("/nrel/wtk-us.h5", 'r') 94 | one_value = f["windspeed_100m"][42,42,42] 95 | timeseries = f["windspeed_100m"][:,42,42] 96 | map = f["windspeed_100m"][42,:,:] 97 | ``` 98 | 99 | Downsampling can also be accomplished easily by using a numpy-style skip parameter: 100 | 101 | ``` 102 | downsampled_map = f["windspeed_100m"][42,::16,::16] # every 16th point 103 | downsampled_timeseries = f["windspeed_100m"][::24,42,42] # daily (every 24 hours) 104 | ``` 105 | 106 | Special datasets may not have three dimensions. 107 | 108 | ``` 109 | #retrieve the latitude and longitude of y=0, x=0. 110 | coordinate = f["coordinates"][0,0] 111 | 112 | #retrieve the datetime string for t=0. 113 | datetime = f["datetime"][0] 114 | ``` -------------------------------------------------------------------------------- /notebooks/05_NSRDB_multi_year_means.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NREL - NSRDB Multi-year Means HSDS Example" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This notebook demonstrates data export from the National Renewable Energy Laboratory (NREL) Wind Integration National Dataset (WIND) Toolkit and National Solar Radiation Database (NSRDB) data. The data is provided from Amazon Web Services using the HDF Group's Highly Scalable Data Service (HSDS).\n", 15 | "\n", 16 | "Please consult the README file for setup instructions prior to running this notebook.\n" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "%matplotlib inline\n", 26 | "import h5pyd\n", 27 | "import matplotlib as mpl\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import numpy as np\n", 30 | "import pandas as pd\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "from scipy.spatial import cKDTree\n", 33 | "import time\n", 34 | "\n", 35 | "# Plotting settings\n", 36 | "mpl.rcParams['font.sans-serif'] = 'DejaVu Sans'\n", 37 | "mpl.rcParams['pdf.fonttype'] = 42\n", 38 | "mpl.rc('xtick', labelsize=16) \n", 39 | "mpl.rc('ytick', labelsize=16)\n", 40 | "mpl.rc('font', size=16)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## NSRDB on AWS (via HSDS)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/html": [ 58 | "
\n", 59 | "\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | "
latitudelongitudeelevationtimezonecountrystatecountyurbanpopulationlandcover
0-19.99-175.2599950.013b'None'b'None'b'None'b'None'-9999210
1-19.99-175.2200010.013b'None'b'None'b'None'b'None'-9999210
2-19.99-175.1799930.013b'None'b'None'b'None'b'None'-9999210
3-19.99-175.1399990.013b'None'b'None'b'None'b'None'-9999210
4-19.99-175.1000060.013b'None'b'None'b'None'b'None'-9999210
\n", 156 | "
" 157 | ], 158 | "text/plain": [ 159 | " latitude longitude elevation timezone country state county \\\n", 160 | "0 -19.99 -175.259995 0.0 13 b'None' b'None' b'None' \n", 161 | "1 -19.99 -175.220001 0.0 13 b'None' b'None' b'None' \n", 162 | "2 -19.99 -175.179993 0.0 13 b'None' b'None' b'None' \n", 163 | "3 -19.99 -175.139999 0.0 13 b'None' b'None' b'None' \n", 164 | "4 -19.99 -175.100006 0.0 13 b'None' b'None' b'None' \n", 165 | "\n", 166 | " urban population landcover \n", 167 | "0 b'None' -9999 210 \n", 168 | "1 b'None' -9999 210 \n", 169 | "2 b'None' -9999 210 \n", 170 | "3 b'None' -9999 210 \n", 171 | "4 b'None' -9999 210 " 172 | ] 173 | }, 174 | "execution_count": 2, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "file_path = '/nrel/nsrdb/v3/nsrdb_2017.h5'\n", 181 | "with h5pyd.File(file_path, mode='r') as f:\n", 182 | " meta = pd.DataFrame(f['meta'][...])\n", 183 | " \n", 184 | "meta.head()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "## Compute Multi-year GHI means for California" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 3, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "Number of NSRDB pixels in CA = 26010\n", 204 | "Download size per year = 911.3904 MB\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "ca_meta = meta.loc[meta['state'] == b'California']\n", 210 | "ca_pos = ca_meta.index.values.copy()\n", 211 | "ca_slice = slice(ca_pos[0], ca_pos[-1] + 1)\n", 212 | "ca_pos -= ca_pos[0]\n", 213 | "down_size = 17520 * len(ca_pos) * 2 * 10**-6\n", 214 | "ca_meta.head()\n", 215 | "print('Number of NSRDB pixels in CA = {}'.format(len(ca_meta)))\n", 216 | "print('Download size per year = {:.4f} MB'.format(down_size))" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 6, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "name": "stderr", 226 | "output_type": "stream", 227 | "text": [ 228 | "WARNING:urllib3.connectionpool:Retrying (Retry(total=9, connect=9, read=10, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')': /?getdnids=1&getobjs=T&include_attrs=T&CreateOrder=0&domain=%2Fnrel%2Fnsrdb%2Fv3%2Fnsrdb_1998.h5\n", 229 | "WARNING:urllib3.connectionpool:Retrying (Retry(total=8, connect=8, read=10, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')': /?getdnids=1&getobjs=T&include_attrs=T&CreateOrder=0&domain=%2Fnrel%2Fnsrdb%2Fv3%2Fnsrdb_1998.h5\n", 230 | "WARNING:urllib3.connectionpool:Retrying (Retry(total=7, connect=7, read=10, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')': /?getdnids=1&getobjs=T&include_attrs=T&CreateOrder=0&domain=%2Fnrel%2Fnsrdb%2Fv3%2Fnsrdb_1998.h5\n", 231 | "WARNING:urllib3.connectionpool:Retrying (Retry(total=6, connect=6, read=10, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')': /?getdnids=1&getobjs=T&include_attrs=T&CreateOrder=0&domain=%2Fnrel%2Fnsrdb%2Fv3%2Fnsrdb_1998.h5\n", 232 | "WARNING:urllib3.connectionpool:Retrying (Retry(total=5, connect=5, read=10, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')': /?getdnids=1&getobjs=T&include_attrs=T&CreateOrder=0&domain=%2Fnrel%2Fnsrdb%2Fv3%2Fnsrdb_1998.h5\n", 233 | "WARNING:urllib3.connectionpool:Retrying (Retry(total=4, connect=4, read=10, redirect=None, status=None)) after connection broken by 'NewConnectionError(': Failed to establish a new connection: [Errno 111] Connection refused')': /?getdnids=1&getobjs=T&include_attrs=T&CreateOrder=0&domain=%2Fnrel%2Fnsrdb%2Fv3%2Fnsrdb_1998.h5\n" 234 | ] 235 | }, 236 | { 237 | "ename": "KeyboardInterrupt", 238 | "evalue": "", 239 | "output_type": "error", 240 | "traceback": [ 241 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 242 | "\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)", 243 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connection.py:174\u001b[0m, in \u001b[0;36mHTTPConnection._new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 174\u001b[0m conn \u001b[38;5;241m=\u001b[39m \u001b[43mconnection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_connection\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 175\u001b[0m \u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dns_host\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mport\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mextra_kw\u001b[49m\n\u001b[1;32m 176\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SocketTimeout:\n", 244 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/util/connection.py:95\u001b[0m, in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m err \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m socket\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgetaddrinfo returns an empty list\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", 245 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/util/connection.py:85\u001b[0m, in \u001b[0;36mcreate_connection\u001b[0;34m(address, timeout, source_address, socket_options)\u001b[0m\n\u001b[1;32m 84\u001b[0m sock\u001b[38;5;241m.\u001b[39mbind(source_address)\n\u001b[0;32m---> 85\u001b[0m \u001b[43msock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnect\u001b[49m\u001b[43m(\u001b[49m\u001b[43msa\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m sock\n", 246 | "\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 111] Connection refused", 247 | "\nDuring handling of the above exception, another exception occurred:\n", 248 | "\u001b[0;31mNewConnectionError\u001b[0m Traceback (most recent call last)", 249 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connectionpool.py:715\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[38;5;66;03m# Make the request on the httplib connection object.\u001b[39;00m\n\u001b[0;32m--> 715\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 716\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 717\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 718\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 719\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 720\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 721\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 722\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 723\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 725\u001b[0m \u001b[38;5;66;03m# If we're going to release the connection in ``finally:``, then\u001b[39;00m\n\u001b[1;32m 726\u001b[0m \u001b[38;5;66;03m# the response doesn't need to know about the connection. Otherwise\u001b[39;00m\n\u001b[1;32m 727\u001b[0m \u001b[38;5;66;03m# it will also try to release it and we'll have a double-release\u001b[39;00m\n\u001b[1;32m 728\u001b[0m \u001b[38;5;66;03m# mess.\u001b[39;00m\n", 250 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connectionpool.py:416\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 416\u001b[0m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhttplib_request_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;66;03m# We are swallowing BrokenPipeError (errno.EPIPE) since the server is\u001b[39;00m\n\u001b[1;32m 419\u001b[0m \u001b[38;5;66;03m# legitimately able to close the connection after sending a valid response.\u001b[39;00m\n\u001b[1;32m 420\u001b[0m \u001b[38;5;66;03m# With this behaviour, the received response is still readable.\u001b[39;00m\n", 251 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connection.py:244\u001b[0m, in \u001b[0;36mHTTPConnection.request\u001b[0;34m(self, method, url, body, headers)\u001b[0m\n\u001b[1;32m 243\u001b[0m headers[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUser-Agent\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m _get_default_user_agent()\n\u001b[0;32m--> 244\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mHTTPConnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m)\u001b[49m\n", 252 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/http/client.py:1283\u001b[0m, in \u001b[0;36mHTTPConnection.request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1282\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Send a complete request to the server.\"\"\"\u001b[39;00m\n\u001b[0;32m-> 1283\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencode_chunked\u001b[49m\u001b[43m)\u001b[49m\n", 253 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/http/client.py:1329\u001b[0m, in \u001b[0;36mHTTPConnection._send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1328\u001b[0m body \u001b[38;5;241m=\u001b[39m _encode(body, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbody\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 1329\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mendheaders\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencode_chunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencode_chunked\u001b[49m\u001b[43m)\u001b[49m\n", 254 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/http/client.py:1278\u001b[0m, in \u001b[0;36mHTTPConnection.endheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1277\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CannotSendHeader()\n\u001b[0;32m-> 1278\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_output\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessage_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencode_chunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencode_chunked\u001b[49m\u001b[43m)\u001b[49m\n", 255 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/http/client.py:1038\u001b[0m, in \u001b[0;36mHTTPConnection._send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1037\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_buffer[:]\n\u001b[0;32m-> 1038\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmsg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1040\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m message_body \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1041\u001b[0m \n\u001b[1;32m 1042\u001b[0m \u001b[38;5;66;03m# create a consistent interface to message_body\u001b[39;00m\n", 256 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/http/client.py:976\u001b[0m, in \u001b[0;36mHTTPConnection.send\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 975\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_open:\n\u001b[0;32m--> 976\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 977\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", 257 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connection.py:205\u001b[0m, in \u001b[0;36mHTTPConnection.connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconnect\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 205\u001b[0m conn \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_new_conn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_conn(conn)\n", 258 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connection.py:186\u001b[0m, in \u001b[0;36mHTTPConnection._new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SocketError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 186\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m NewConnectionError(\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to establish a new connection: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m e\n\u001b[1;32m 188\u001b[0m )\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m conn\n", 259 | "\u001b[0;31mNewConnectionError\u001b[0m: : Failed to establish a new connection: [Errno 111] Connection refused", 260 | "\nDuring handling of the above exception, another exception occurred:\n", 261 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 262 | "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m ty \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 7\u001b[0m file_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/nrel/nsrdb/v3/nsrdb_\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m.h5\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mformat(year)\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mh5pyd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 9\u001b[0m ghi \u001b[38;5;241m=\u001b[39m f[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mghi\u001b[39m\u001b[38;5;124m'\u001b[39m][:, ca_slice]\n\u001b[1;32m 11\u001b[0m ca_df[col] \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mmean(ghi[:, ca_pos], axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n", 263 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/h5pyd/_hl/files.py:313\u001b[0m, in \u001b[0;36mFile.__init__\u001b[0;34m(self, domain, mode, endpoint, username, password, bucket, api_key, use_session, use_cache, swmr, libver, logger, owner, linked_domain, track_order, retries, timeout, **kwds)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 313\u001b[0m rsp \u001b[38;5;241m=\u001b[39m \u001b[43mhttp_conn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mGET\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m:\n", 264 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/h5pyd/_hl/httpconn.py:467\u001b[0m, in \u001b[0;36mHttpConn.GET\u001b[0;34m(self, req, format, params, headers, use_cache)\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 465\u001b[0m stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 467\u001b[0m rsp \u001b[38;5;241m=\u001b[39m \u001b[43ms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_endpoint\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 469\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 470\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 471\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 472\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 473\u001b[0m \u001b[43m \u001b[49m\u001b[43mverify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverifyCert\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 474\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 475\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlog\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(rsp\u001b[38;5;241m.\u001b[39mstatus_code))\n\u001b[1;32m 476\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_hsds:\n", 265 | "File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/sessions.py:602\u001b[0m, in \u001b[0;36mSession.get\u001b[0;34m(self, url, **kwargs)\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a GET request. Returns :class:`Response` object.\u001b[39;00m\n\u001b[1;32m 595\u001b[0m \n\u001b[1;32m 596\u001b[0m \u001b[38;5;124;03m:param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[1;32m 597\u001b[0m \u001b[38;5;124;03m:param \\*\\*kwargs: Optional arguments that ``request`` takes.\u001b[39;00m\n\u001b[1;32m 598\u001b[0m \u001b[38;5;124;03m:rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 599\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 601\u001b[0m kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 602\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGET\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", 266 | "File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", 267 | "File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n", 268 | "File \u001b[0;32m~/.local/lib/python3.10/site-packages/requests/adapters.py:667\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 664\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 667\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 668\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 669\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 670\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 671\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 672\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 673\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 674\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 675\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 676\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 677\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 678\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 679\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 681\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 682\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n", 269 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connectionpool.py:829\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 824\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m conn:\n\u001b[1;32m 825\u001b[0m \u001b[38;5;66;03m# Try again\u001b[39;00m\n\u001b[1;32m 826\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 827\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying (\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) after connection broken by \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, retries, err, url\n\u001b[1;32m 828\u001b[0m )\n\u001b[0;32m--> 829\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 830\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 831\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 832\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 833\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 834\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 835\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 836\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 837\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 838\u001b[0m \u001b[43m \u001b[49m\u001b[43mpool_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpool_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 839\u001b[0m \u001b[43m \u001b[49m\u001b[43mrelease_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelease_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 840\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 841\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody_pos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody_pos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 842\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\n\u001b[1;32m 843\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 845\u001b[0m \u001b[38;5;66;03m# Handle redirect?\u001b[39;00m\n\u001b[1;32m 846\u001b[0m redirect_location \u001b[38;5;241m=\u001b[39m redirect \u001b[38;5;129;01mand\u001b[39;00m response\u001b[38;5;241m.\u001b[39mget_redirect_location()\n", 270 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connectionpool.py:829\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 824\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m conn:\n\u001b[1;32m 825\u001b[0m \u001b[38;5;66;03m# Try again\u001b[39;00m\n\u001b[1;32m 826\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 827\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying (\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) after connection broken by \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, retries, err, url\n\u001b[1;32m 828\u001b[0m )\n\u001b[0;32m--> 829\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 830\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 831\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 832\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 833\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 834\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 835\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 836\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 837\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 838\u001b[0m \u001b[43m \u001b[49m\u001b[43mpool_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpool_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 839\u001b[0m \u001b[43m \u001b[49m\u001b[43mrelease_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelease_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 840\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 841\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody_pos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody_pos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 842\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\n\u001b[1;32m 843\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 845\u001b[0m \u001b[38;5;66;03m# Handle redirect?\u001b[39;00m\n\u001b[1;32m 846\u001b[0m redirect_location \u001b[38;5;241m=\u001b[39m redirect \u001b[38;5;129;01mand\u001b[39;00m response\u001b[38;5;241m.\u001b[39mget_redirect_location()\n", 271 | " \u001b[0;31m[... skipping similar frames: HTTPConnectionPool.urlopen at line 829 (3 times)]\u001b[0m\n", 272 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connectionpool.py:829\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 824\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m conn:\n\u001b[1;32m 825\u001b[0m \u001b[38;5;66;03m# Try again\u001b[39;00m\n\u001b[1;32m 826\u001b[0m log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 827\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying (\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) after connection broken by \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, retries, err, url\n\u001b[1;32m 828\u001b[0m )\n\u001b[0;32m--> 829\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 830\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 831\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 832\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 833\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 834\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 835\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 836\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 837\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 838\u001b[0m \u001b[43m \u001b[49m\u001b[43mpool_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpool_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 839\u001b[0m \u001b[43m \u001b[49m\u001b[43mrelease_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelease_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 840\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 841\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody_pos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody_pos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 842\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\n\u001b[1;32m 843\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 845\u001b[0m \u001b[38;5;66;03m# Handle redirect?\u001b[39;00m\n\u001b[1;32m 846\u001b[0m redirect_location \u001b[38;5;241m=\u001b[39m redirect \u001b[38;5;129;01mand\u001b[39;00m response\u001b[38;5;241m.\u001b[39mget_redirect_location()\n", 273 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/connectionpool.py:804\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 799\u001b[0m e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, e)\n\u001b[1;32m 801\u001b[0m retries \u001b[38;5;241m=\u001b[39m retries\u001b[38;5;241m.\u001b[39mincrement(\n\u001b[1;32m 802\u001b[0m method, url, error\u001b[38;5;241m=\u001b[39me, _pool\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, _stacktrace\u001b[38;5;241m=\u001b[39msys\u001b[38;5;241m.\u001b[39mexc_info()[\u001b[38;5;241m2\u001b[39m]\n\u001b[1;32m 803\u001b[0m )\n\u001b[0;32m--> 804\u001b[0m \u001b[43mretries\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[38;5;66;03m# Keep track of the error for the retry warning.\u001b[39;00m\n\u001b[1;32m 807\u001b[0m err \u001b[38;5;241m=\u001b[39m e\n", 274 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/util/retry.py:434\u001b[0m, in \u001b[0;36mRetry.sleep\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m slept:\n\u001b[1;32m 432\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 434\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sleep_backoff\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", 275 | "File \u001b[0;32m/usr/local/python/3.10.13/lib/python3.10/site-packages/urllib3/util/retry.py:418\u001b[0m, in \u001b[0;36mRetry._sleep_backoff\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m backoff \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 418\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbackoff\u001b[49m\u001b[43m)\u001b[49m\n", 276 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "ca_df = ca_meta[['latitude', 'longitude']].copy()\n", 282 | "ext_time = {}\n", 283 | "ts = time.time()\n", 284 | "for year in range(1998, 2018):\n", 285 | " col = '{}_means'.format(year)\n", 286 | " ty = time.time()\n", 287 | " file_path = '/nrel/nsrdb/v3/nsrdb_{}.h5'.format(year)\n", 288 | " with h5pyd.File(file_path, mode='r') as f:\n", 289 | " ghi = f['ghi'][:, ca_slice]\n", 290 | " \n", 291 | " ca_df[col] = np.mean(ghi[:, ca_pos], axis=0)\n", 292 | " tt = time.time() - ty\n", 293 | " ext_time[str(year)] = tt\n", 294 | " print('Means for {} computed in {:.4f} minutes'.format(year, tt / 60))\n", 295 | " \n", 296 | "tt = time.time() - ts\n", 297 | "ext_time['total'] = tt\n", 298 | "print('Time to extract all means = {:.4f} hours'.format(tt / 3600))" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": {}, 305 | "outputs": [], 306 | "source": [ 307 | "ext_df = pd.DataFrame(ext_time, index=['seconds']).T\n", 308 | "ext_df['minutes'] = ext_df['seconds'] / 60\n", 309 | "ext_df['hours'] = ext_df['seconds'] / 3600\n", 310 | "ax = ext_df.iloc[:-1]['minutes'].plot.bar()\n", 311 | "ax.set_xlabel('Year')\n", 312 | "ax.set_ylabel('Compute Time (min)')\n", 313 | "plt.show()" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "means_cols = [col for col in ca_df.columns\n", 323 | " if 'means' in col]\n", 324 | "ca_df['MY means'] = ca_df[means_cols].mean(axis=1)\n", 325 | "ca_df['CV'] = ca_df[means_cols].std(axis=1) / ca_df['MY means']\n", 326 | "ca_df.head()" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": null, 332 | "metadata": {}, 333 | "outputs": [], 334 | "source": [ 335 | "ax = ca_df.plot.scatter(x='longitude', y='latitude', c='MY means',\n", 336 | " colormap='YlOrRd',\n", 337 | " title='Multi-year GHI Means')\n", 338 | "plt.show()" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "ca_df.plot.scatter(x='longitude', y='latitude', c='CV',\n", 348 | " colormap='BuPu',\n", 349 | " title='CV of annual GHI means')\n", 350 | "plt.show()" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [] 359 | } 360 | ], 361 | "metadata": { 362 | "kernelspec": { 363 | "display_name": "Python 3", 364 | "language": "python", 365 | "name": "python3" 366 | }, 367 | "language_info": { 368 | "codemirror_mode": { 369 | "name": "ipython", 370 | "version": 3 371 | }, 372 | "file_extension": ".py", 373 | "mimetype": "text/x-python", 374 | "name": "python", 375 | "nbconvert_exporter": "python", 376 | "pygments_lexer": "ipython3", 377 | "version": "3.10.13" 378 | } 379 | }, 380 | "nbformat": 4, 381 | "nbformat_minor": 2 382 | } 383 | -------------------------------------------------------------------------------- /scripts/box.py: -------------------------------------------------------------------------------- 1 | # Jordan Perr-Sauer 2 | 3 | import h5pyd, h5py 4 | import numpy as np 5 | from pyproj import Proj 6 | import sys 7 | 8 | ######### CONFIGURATION ######### 9 | 10 | 11 | output="output_file.hdf5" 12 | 13 | # Rectangle in lat/lon 14 | sw = [36.96744946416934, -109.05029296875] 15 | ne = [41.02964338716638, -102.0849609375] 16 | 17 | # Time coordinates (hours since 0:0:0 on January 1st, 2007) 18 | tmin = 0 19 | tmax = 24 20 | 21 | # Full resolution would be 1,1,1 22 | latskip = 1 23 | lonskip = 1 24 | tskip = 1 25 | 26 | # Which data sets do you want included in the download? 27 | datasets = ['windspeed_100m', 'winddirection_100m'] 28 | 29 | 30 | ######### END CONFIGURATION ######### 31 | 32 | projstring = """+proj=lcc +lat_1=30 +lat_2=60 33 | +lat_0=38.47240422490422 +lon_0=-96.0 34 | +x_0=0 +y_0=0 +ellps=sphere 35 | +units=m +no_defs """ 36 | projectLcc = Proj(projstring) 37 | 38 | f = h5pyd.File("/nrel/wtk-us.h5", 'r') 39 | 40 | dset_coords = f['coordinates'] 41 | origin_ll = reversed(dset_coords[0][0]) # Grab origin directly from database 42 | origin = projectLcc(*origin_ll) 43 | 44 | def indicesForCoord(lat_index, lon_index): 45 | coords = (lon_index,lat_index) 46 | coords = projectLcc(*coords) 47 | delta = np.subtract(coords, origin) 48 | ij = [int(round(x/2000)) for x in delta] 49 | return tuple(reversed(ij)) 50 | 51 | def line(lat1, lon1, lat2, lon2): 52 | z = zip( np.linspace(lat1, lat2, 5000), np.linspace(lon1, lon2, 5000) ) 53 | if sys.version_info >= (3,0): 54 | z = list(z) 55 | return z 56 | 57 | def bounding_ij(sw, ne): 58 | bb = line(sw[0], ne[1], ne[0], ne[1]) #top 59 | bb.extend( line(sw[0], sw[1], ne[0], sw[1]) ) #bottom 60 | bb.extend( line(sw[0], sw[1], sw[0], ne[1]) ) #left 61 | bb.extend( line(ne[0], sw[1], ne[0], ne[1]) ) #right 62 | lcbb = map(lambda c: indicesForCoord(*c), bb) 63 | if sys.version_info >= (3,0): 64 | lcbb = list(lcbb) 65 | lcbb_i = [x[0] for x in lcbb] 66 | lcbb_j = [x[1] for x in lcbb] 67 | ne = (max(lcbb_i), max(lcbb_j)) 68 | sw = (min(lcbb_i), min(lcbb_j)) 69 | return (ne, sw) 70 | 71 | bd = bounding_ij(sw, ne) 72 | 73 | 74 | # Download data and save to local file 75 | 76 | lf = h5py.File(output, "w") 77 | 78 | for d in datasets: 79 | ds = f[d][tmin:tmax:tskip,bd[1][0]:bd[0][0]:latskip,bd[1][1]:bd[0][1]:lonskip] 80 | lf[d] = ds 81 | 82 | lf["coordinates"] = f["coordinates"][bd[1][0]:bd[0][0]:latskip,bd[1][1]:bd[0][1]:lonskip] 83 | lf.flush() 84 | lf.close() 85 | -------------------------------------------------------------------------------- /scripts/point_statistics.py: -------------------------------------------------------------------------------- 1 | # Jordan Perr-Sauer 2 | 3 | import h5pyd 4 | import numpy as np 5 | from pyproj import Proj 6 | import sys 7 | 8 | ######### CONFIGURATION ######### 9 | 10 | lat = 36.96744946416934 11 | lon = -109.05029296875 12 | hub_height = "100" 13 | 14 | ################################# 15 | 16 | f = h5pyd.File("/nrel/wtk-us.h5", 'r') 17 | 18 | projstring = """+proj=lcc +lat_1=30 +lat_2=60 19 | +lat_0=38.47240422490422 +lon_0=-96.0 20 | +x_0=0 +y_0=0 +ellps=sphere 21 | +units=m +no_defs""" 22 | 23 | projectLcc = Proj(projstring) 24 | 25 | _origin_ll = reversed(f['coordinates'][0][0]) # Grab origin directly from database 26 | origin = projectLcc(*_origin_ll) 27 | 28 | def ijForCoord(coords): 29 | coords = reversed(coords) 30 | coords = projectLcc(*coords) 31 | delta = np.subtract(coords, origin) 32 | ij = [int(round(x/2000)) for x in delta] 33 | return tuple(reversed(ij)) 34 | 35 | i, j = ijForCoord((lat, lon)) 36 | 37 | coord = f["coordinates"][i][j] 38 | speed = f["windspeed_{0}m".format(hub_height)][0:24,i,j] 39 | direc = f["winddirection_{0}m".format(hub_height)][0:24,i,j] 40 | 41 | ### Write raw data 42 | 43 | stack = np.column_stack((speed, direc)) 44 | np.savetxt('rawdata.out', stack, delimiter=",", fmt="%10.5f") 45 | 46 | ### Write 47 | -------------------------------------------------------------------------------- /scripts/shape.py: -------------------------------------------------------------------------------- 1 | # Jordan Perr-Sauer 2 | # December, 2017 3 | # shape.py, script to help download WTK data in hsds server for points within geojson shape 4 | 5 | GEOJSON_FILE = "../data/wtk/W0463_1kmBuffer.json" 6 | 7 | DESTINATION = "./output" 8 | 9 | DATASETS = ["windspeed_10m", "windspeed_40m", "windspeed_60m", "windspeed_80m", "windspeed_100m", "windspeed_120m", 10 | "winddirection_10m", "winddirection_10m", "winddirection_10m", "winddirection_10m", "winddirection_10m", "winddirection_10m", 11 | "temperature_2m", "temperature_10m", "temperature_100m", "temperature_120m", 12 | "pressure_0m", "pressure_100m", 13 | "relativehumidity_2m", 14 | "inversemoninobukhovlength_2m"] 15 | 16 | SKIP = 1 # stride length in x and y 17 | 18 | tmin = 0 # hours since 12AM January 1st, 2007 19 | tmax = 5 # hours since 12AM January 1st, 2007 20 | 21 | tskip = 1 # stride length in time 22 | 23 | ################# 24 | 25 | import h5pyd 26 | import geopandas as gpd 27 | import numpy as np 28 | from pyproj import Proj 29 | from tqdm import tqdm 30 | import matplotlib 31 | matplotlib.use("Agg") 32 | 33 | 34 | proj4j_string = """+proj=lcc +lat_1=30 +lat_2=60 35 | +lat_0=38.47240422490422 +lon_0=-96.0 36 | +x_0=0 +y_0=0 +ellps=sphere 37 | +units=m +no_defs """ 38 | projectLcc = Proj(proj4j_string) 39 | 40 | 41 | # Download origin data from server 42 | 43 | f = h5pyd.File("/nrel/wtk-us.h5", 'r') 44 | dset_coords = f['coordinates'] 45 | origin_ll = reversed(dset_coords[0][0]) # Grab origin directly from database 46 | origin = projectLcc(*origin_ll) 47 | 48 | # Read polygon from GeoJSON and find IJ bounds 49 | 50 | polygon = gpd.read_file(GEOJSON_FILE) 51 | polygon.crs = {'init': 'epsg:4326'} 52 | polygon_lcc = polygon.to_crs(proj4j_string) 53 | bounds = polygon_lcc.scale(2,2).bounds.iloc[0] 54 | 55 | ll = (bounds.minx, bounds.miny) 56 | ur = (bounds.maxx, bounds.maxy) 57 | 58 | delta = np.subtract(ll, origin) 59 | ll_ij = [int(round(x/2000)) for x in delta] 60 | 61 | delta = np.subtract(ur, origin) 62 | ur_ij = [int(round(x/2000)) for x in delta] 63 | 64 | # Download data from server and write to CSV file 65 | 66 | coords = dset_coords[ll_ij[1]:ur_ij[1]:SKIP,ll_ij[0]:ur_ij[0]:SKIP] 67 | coordsToString = np.vectorize(lambda c: "(%f %f)"%(c[0], c[1])) 68 | np.savetxt("%s/coords.csv"%(DESTINATION), coordsToString(coords), fmt="%s", delimiter=",") 69 | 70 | for d in tqdm(DATASETS, desc="Downloading Datasets"): 71 | data = f[d][tmin:tmax:tskip,ll_ij[1]:ur_ij[1]:SKIP, ll_ij[0]:ur_ij[0]:SKIP] 72 | for t in range(data.shape[0]): 73 | np.savetxt("%s/%s_t%d.csv"%(DESTINATION, d, t), data[t], fmt='%.18f', delimiter=",") 74 | 75 | ax = polygon.plot() 76 | x,y = zip(*coords.flatten()) 77 | ax.scatter(y,x,c='g') 78 | fig = ax.get_figure() 79 | fig.savefig("%s/plot.png"%(DESTINATION)) --------------------------------------------------------------------------------