├── .github └── workflows │ ├── codeql-analysis.yml │ └── python-package.yml ├── .gitignore ├── CHANGELOG.md ├── CITATION.cff ├── NOAA_tests.ipynb ├── README.md ├── data ├── 1696868.csv ├── Kotzebue.csv ├── weather_station_bismarck.csv ├── weather_station_kotzebue.csv ├── weather_station_orlando.csv └── weather_station_sanfrancisco.csv ├── examples ├── example_daily_series.py ├── example_daily_series_winter.py └── example_monthly_series.py ├── figures ├── daily_series_Kotzebue_1992.png ├── daily_series_Kotzebue_2017-2018_winter.png ├── monthly_series_precipitation_12mthsTrMn_Kotzebue.png ├── monthly_series_precipitation_12mthsTrMn_Kotzebue_anomaly.png ├── monthly_series_temperature_12mthsTrMn_Kotzebue.png └── monthly_series_temperature_12mthsTrMn_Kotzebue_anomaly.png ├── noaaplotter ├── __init__.py ├── noaaplotter.py ├── scripts │ ├── __pycache__ │ │ ├── download_data.cpython-310.pyc │ │ └── plot_daily.cpython-310.pyc │ ├── download_data.py │ ├── download_data_ERA5.py │ ├── download_data_SST.py │ ├── plot_daily.py │ └── plot_monthly.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── dataset.cpython-310.pyc │ ├── download_utils.cpython-310.pyc │ ├── plot_utils.cpython-310.pyc │ └── utils.cpython-310.pyc │ ├── dataset.py │ ├── download_utils.py │ ├── plot_utils.py │ └── utils.py └── pyproject.toml /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '19 1 * * 0' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v1 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v1 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v1 68 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.8", "3.9", "3.10"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 pytest 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | 39 | #- name: Test with pytest 40 | # run: | 41 | # pytest 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | noaaplotter/__pycache__ 3 | Session.log 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # [0.5.4] - 2025-01-05 2 | ### Changed 3 | * fixed streamlit crash 4 | * added toml for install 5 | * fixed accounted for nan in monthly aggregates 6 | 7 | # [0.5.1] - 2023-02-18 8 | ### Changed 9 | * created download_utils 10 | * some code restructuring for noaaplotter_streamlit support (https://github.com/initze/noaaplotter_streamlit) 11 | 12 | # [0.5.0] - 2023-02-03 13 | ### Changed 14 | * fixed NOAA APIv2 bug for losing January February data 15 | * some code fixes and cleanup 16 | 17 | # [0.4.1] - 2023-01-16 18 | ### Added 19 | * basic support for sst 20 | 21 | # [0.4.0] - 2022-11-30 22 | ### Added 23 | ### Changed 24 | * moved scripts to subdir and automatic package install 25 | 26 | # [0.3.0] - 2022-06-30 27 | ### Added 28 | * Automated ERA5 download script through Google Earthengine 29 | ### Changed 30 | * code cleanup and minor changes 31 | 32 | # [0.2.0] - 2021-09-19 33 | ### Added 34 | * Automated NOAA API download script 35 | * No Data visual for daily data plot 36 | ### Changed 37 | * moved legend out of plot for daily plots 38 | * some code cleanup 39 | * minor bugfixes 40 | 41 | ## [0.1.8] - 2020-12-14 42 | ### Changed 43 | - fixed truncated rolling mean at the beginning of monthly plots 44 | - fixed crash bug for end dates after data avalability 45 | 46 | ## [0.1.7] - 2020-12-09 47 | ### Changed 48 | - fixed crash of plot_monthly 49 | - simplification of environement.yml 50 | - minor style fixes 51 | 52 | ### no changelog yet for previous versions 53 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Nitze" 5 | given-names: "Ingmar" 6 | orcid: "https://orcid.org/0000-0002-1165-6852" 7 | title: "noaaplotter" 8 | version: 0.5.1 9 | doi: 10.5281/zenodo.7753462 10 | date-released: 2023-03-20 11 | url: "https://github.com/initze/noaaplotter" 12 | -------------------------------------------------------------------------------- /NOAA_tests.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Tests for downloading data from NOAA " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### TODO\n", 15 | "* autoloop over dates" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 93, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "# Imports\n", 25 | "#needed to make web requests\n", 26 | "import requests\n", 27 | "#store the data we get as a dataframe\n", 28 | "import pandas as pd\n", 29 | "#convert the response as a strcuctured json\n", 30 | "import json\n", 31 | "#mathematical operations on lists\n", 32 | "import numpy as np\n", 33 | "#parse the datetimes we get from NOAA\n", 34 | "from datetime import datetime, timedelta\n", 35 | "\n", 36 | "from joblib import delayed, Parallel\n", 37 | "\n", 38 | "import csv\n", 39 | "import tqdm\n", 40 | "from noaaplotter.utils import dl_noaa_api" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 94, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "#add the access token you got from NOAA\n", 50 | "Token = 'LaVQzwUgOBQLBRwoTpOLyRbIKDTHAVVe'\n", 51 | "\n", 52 | "#Long Beach Airport station\n", 53 | "#station_id = 'GHCND:USW00026616' # Kotzebue\n", 54 | "station_id = 'GHCND:USW00027502' # Barrow\n", 55 | "#station_name = 'Kotzebue'\n", 56 | "station_name = 'Barrow'\n", 57 | "datatypes = ['TMIN', 'TMAX', 'PRCP', 'SNOW']\n", 58 | "date_start = '1971-01-01'\n", 59 | "date_end = '2021-12-31'" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 95, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "dtypes_string = '&'.join([f'datatypeid={dt}' for dt in datatypes])" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "#### Prepare requests" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 96, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "# convert datestring to dt\n", 85 | "dt_start = datetime.strptime(date_start, '%Y-%m-%d')\n", 86 | "dt_end = datetime.strptime(date_end, '%Y-%m-%d')\n", 87 | "# calculate number of days\n", 88 | "n_days = (dt_end-dt_start).days\n", 89 | "# calculate nuber of splits to fit into 1000 lines/rows\n", 90 | "split_size = np.floor(1000 / len(datatypes))\n", 91 | "# calculate splits\n", 92 | "split_range = np.arange(0, n_days, split_size)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "### Run data loading" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "make joblib" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 97, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stderr", 116 | "output_type": "stream", 117 | "text": [ 118 | " 11%|████████▊ | 8/75 [00:02<00:23, 2.89it/s]" 119 | ] 120 | }, 121 | { 122 | "ename": "AttributeError", 123 | "evalue": "module 'datetime' has no attribute 'strptime'", 124 | "output_type": "error", 125 | "traceback": [ 126 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 127 | "\u001b[1;31m_RemoteTraceback\u001b[0m Traceback (most recent call last)", 128 | "\u001b[1;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\externals\\loky\\process_executor.py\", line 431, in _process_worker\n r = call_item()\n File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\externals\\loky\\process_executor.py\", line 285, in __call__\n return self.fn(*self.args, **self.kwargs)\n File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\_parallel_backends.py\", line 595, in __call__\n return self.func(*args, **kwargs)\n File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\", line 263, in __call__\n for func, args, kwargs in self.items]\n File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\", line 263, in \n for func, args, kwargs in self.items]\n File \"C:\\Users\\initze\\Documents\\Python_Scripts\\noaaplotter\\noaaplotter\\utils.py\", line 66, in dl_noaa_api\n dt_start = dt.strptime(date_start, '%Y-%m-%d')\nAttributeError: module 'datetime' has no attribute 'strptime'\n\"\"\"", 129 | "\nThe above exception was the direct cause of the following exception:\n", 130 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 131 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n", 132 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 1052\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1053\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1054\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1055\u001b[0m \u001b[1;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1056\u001b[0m \u001b[0melapsed_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 133 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 931\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 932\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'supports_timeout'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 933\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 934\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 935\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 134 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[1;34m(future, timeout)\u001b[0m\n\u001b[0;32m 540\u001b[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001b[0;32m 541\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 542\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 543\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mCfTimeoutError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 544\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 135 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\concurrent\\futures\\_base.py\u001b[0m in \u001b[0;36mresult\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 433\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 434\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mFINISHED\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 435\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 436\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 136 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\concurrent\\futures\\_base.py\u001b[0m in \u001b[0;36m__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 382\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__get_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 383\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 384\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 385\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 386\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_result\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 137 | "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'strptime'" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "%time datasets_list = Parallel(n_jobs=4)(delayed(dl_noaa_api)(i, dtypes_string, station_id, Token, date_start, date_end, split_size) for i in tqdm.tqdm(split_range[:]))" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 98, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "df = pd.concat(datasets_list)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "#### Pivot table to correct form" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 99, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "df_pivot = df.pivot(index='date', columns='datatype', values='value')" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "#### adapt factor " 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 100, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "df_pivot.loc[:, :] /= 10" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "#### Prepare data export\n", 191 | "Option 1: load as object\n", 192 | "Option 2: save to csv" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "#### Reformat columns to target " 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "\"STATION\",\"NAME\",\"DATE\",\"PRCP\",\"SNWD\",\"TAVG\",\"TMAX\",\"TMIN\"" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 101, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "df_pivot = df_pivot.reset_index(drop=False)\n", 216 | "df_pivot['DATE'] = df_pivot.apply(lambda x: datetime.fromisoformat(x['date']).strftime('%Y-%m-%d'), axis=1)\n" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 102, 222 | "metadata": {}, 223 | "outputs": [ 224 | { 225 | "data": { 226 | "text/html": [ 227 | "
\n", 228 | "\n", 241 | "\n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | "
datatypedatePRCPSNOWTMAXTMINDATE
01971-01-01T00:00:000.00.0-32.8-37.21971-01-01
11971-01-02T00:00:000.00.0-30.0-41.11971-01-02
21971-01-03T00:00:001.52.8-16.7-30.61971-01-03
31971-01-04T00:00:000.30.5-16.7-30.01971-01-04
41971-01-05T00:00:000.00.0-23.3-28.91971-01-05
.....................
185022021-08-29T00:00:0017.0NaN9.46.12021-08-29
185032021-08-30T00:00:000.3NaN8.96.72021-08-30
185042021-08-31T00:00:000.5NaN11.18.32021-08-31
185052021-09-01T00:00:002.3NaN13.38.92021-09-01
185062021-09-02T00:00:000.0NaN18.98.92021-09-02
\n", 355 | "

18507 rows × 6 columns

\n", 356 | "
" 357 | ], 358 | "text/plain": [ 359 | "datatype date PRCP SNOW TMAX TMIN DATE\n", 360 | "0 1971-01-01T00:00:00 0.0 0.0 -32.8 -37.2 1971-01-01\n", 361 | "1 1971-01-02T00:00:00 0.0 0.0 -30.0 -41.1 1971-01-02\n", 362 | "2 1971-01-03T00:00:00 1.5 2.8 -16.7 -30.6 1971-01-03\n", 363 | "3 1971-01-04T00:00:00 0.3 0.5 -16.7 -30.0 1971-01-04\n", 364 | "4 1971-01-05T00:00:00 0.0 0.0 -23.3 -28.9 1971-01-05\n", 365 | "... ... ... ... ... ... ...\n", 366 | "18502 2021-08-29T00:00:00 17.0 NaN 9.4 6.1 2021-08-29\n", 367 | "18503 2021-08-30T00:00:00 0.3 NaN 8.9 6.7 2021-08-30\n", 368 | "18504 2021-08-31T00:00:00 0.5 NaN 11.1 8.3 2021-08-31\n", 369 | "18505 2021-09-01T00:00:00 2.3 NaN 13.3 8.9 2021-09-01\n", 370 | "18506 2021-09-02T00:00:00 0.0 NaN 18.9 8.9 2021-09-02\n", 371 | "\n", 372 | "[18507 rows x 6 columns]" 373 | ] 374 | }, 375 | "execution_count": 102, 376 | "metadata": {}, 377 | "output_type": "execute_result" 378 | } 379 | ], 380 | "source": [ 381 | "df_pivot" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 103, 387 | "metadata": {}, 388 | "outputs": [], 389 | "source": [ 390 | "dr = pd.DataFrame(pd.date_range(start=date_start, end=date_end), columns=['DATE'])\n", 391 | "dr['DATE'] = dr['DATE'].astype(str)\n", 392 | "\n" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 104, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "df_merged = pd.concat([df_pivot.set_index('DATE'), dr.set_index('DATE')], join='outer', axis=1).reset_index(drop=False)" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 105, 407 | "metadata": {}, 408 | "outputs": [], 409 | "source": [ 410 | "df_merged['STATION'] = station_id\n", 411 | "df_merged['NAME'] = station_name\n", 412 | "\n", 413 | "df_merged['TAVG'] = None\n", 414 | "df_merged['SNWD'] = None" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 106, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "final_cols = [\"STATION\",\"NAME\",\"DATE\",\"PRCP\",\"SNWD\",\"TAVG\",\"TMAX\",\"TMIN\"]" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "no index, make all strings " 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 107, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "df_final = df_merged[final_cols]" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 108, 445 | "metadata": {}, 446 | "outputs": [], 447 | "source": [ 448 | "df_final = df_final.replace({np.nan: None})" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 109, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [ 457 | "df_final.to_csv('./data/tmp.csv', index=False, quoting=csv.QUOTE_ALL)" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "Fill empty dates" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": null, 470 | "metadata": {}, 471 | "outputs": [], 472 | "source": [] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [] 487 | } 488 | ], 489 | "metadata": { 490 | "kernelspec": { 491 | "display_name": "Python 3", 492 | "language": "python", 493 | "name": "python3" 494 | }, 495 | "language_info": { 496 | "codemirror_mode": { 497 | "name": "ipython", 498 | "version": 3 499 | }, 500 | "file_extension": ".py", 501 | "mimetype": "text/x-python", 502 | "name": "python", 503 | "nbconvert_exporter": "python", 504 | "pygments_lexer": "ipython3", 505 | "version": "3.7.6" 506 | } 507 | }, 508 | "nbformat": 4, 509 | "nbformat_minor": 2 510 | } 511 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # noaaplotter 2 | A python package to create fancy plots with NOAA weather data. 3 | 4 | ## Install 5 | #### Recommended conda install 6 | 7 | I recommend to use a fresh conda environment 8 | `conda create -n noaaplotter python pip` 9 | 10 | activate conda environment 11 | `conda activate noaaplotter` 12 | 13 | pip install noaaplotter and dependencies 14 | `pip install git+https://github.com/initze/noaaplotter.git` 15 | 16 | #### Requirements 17 | - matplotlib 18 | - numpy 19 | - pandas 20 | - python 21 | - requests 22 | - joblib 23 | - tqdm 24 | - geemap 25 | 26 | 27 | ## Examples 28 | ### Download data 29 | #### Option 1 NOAA Daily Summaries: Download via script 30 | Download daily summaries (temperature + precipitation) from Kotzebue (or other station) from 1970-01-01 until 2021-12-31 31 | * NOAA API Token is required: https://www.ncdc.noaa.gov/cdo-web/token 32 | 33 | `download_data.py -o ./data/kotzebue.csv -sid USW00026616 -start 1970-01-01 -end 2021-12-31 -t ` 34 | 35 | #### Option 2 NOAA Daily Summaries: Download via browser 36 | CSV files of "daily summaries" 37 | ("https://www.ncdc.noaa.gov/cdo-web/search") 38 | * Values: metric 39 | * File types: csv 40 | 41 | #### Option 3 ERA5 Daily: Download via script 42 | Download daily summaries (temperature + precipitation) from Potsdam (13.05°E, 52.4°N) from 1980-01-01 until 2021-12-31 43 | * Google Earthengine account is required 44 | * Caution: full dataset may take a few minutes 45 | 46 | `download_data_ERA5.py -o ./data/potsdam_ERA5.csv -start 1980-01-01 -end 2021-12-31 -lat 52.4 -lon 13.05` 47 | 48 | ### Daily Mean Temperature and Precipitation values vs. Climate 49 | #### Entire year 1 January until 31 December (e.g. 1992) 50 | 51 | `plot_daily.py -infile data/kotzebue.csv -start 1992-01-01 -end 1992-12-31 -t_range -45 25 -p_range 50 -plot` 52 | 53 | ![alt text](https://user-images.githubusercontent.com/4864803/132648353-d1792234-dc68-4baf-a608-5aa5fe6899a8.png "Mean monthly temperatures with 12 months trailing mean") 54 | 55 | ### Monthly aggregates 56 | #### Absolute values 57 | 58 | Temperature: 59 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Temperature -trail 12 -save_plot figures/kotzebue_monthly_temperature_anomaly.png -plot` 60 | ![Kotzebue_monthly_t_abs](https://user-images.githubusercontent.com/4864803/133925329-540933c1-b30a-4d31-a66f-0ba624223abf.png) 61 | 62 | 63 | Precipitation: 64 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Precipitation -trail 12 -save_plot figures/kotzebue_monthly_precipitation.png -anomaly -plot` 65 | ![Kotzebue_monthly_p_abs](https://user-images.githubusercontent.com/4864803/133925351-5d7513df-2794-472a-b00d-780538f68ce6.png) 66 | 67 | 68 | #### Anomalies/Departures from Climate (1981-2010) 69 | 70 | Temperature: 71 | 72 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Temperature -trail 12 -save_plot figures/kotzebue_monthly_temperature_anomaly.png -anomaly -plot` 73 | 74 | !["Mean monthly temperatures with 12 months trailing mean"](https://user-images.githubusercontent.com/4864803/133923928-9ca78105-3718-48d9-80c5-efaf0bfa3217.png) 75 | 76 | Precipitation: 77 | 78 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Precipitation -trail 12 -save_plot figures/kotzebue_monthly_precipitation_anomaly.png -anomaly -plot` 79 | 80 | !["Mean monthly temperatures with 12 months trailing mean"](https://user-images.githubusercontent.com/4864803/133923987-faabba54-e2d7-4340-be05-078bce0648cf.png) 81 | 82 | 83 | -------------------------------------------------------------------------------- /examples/example_daily_series.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Example Script to plot daily weather data for the entire year (January 1 to December 31) 5 | using the noaaplotter package 6 | author: Ingmar Nitze 7 | """ 8 | 9 | from src.noaaplotter import NOAAPlotter 10 | import logging 11 | 12 | def main(): 13 | logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG) 14 | n = NOAAPlotter(r'../data/Kotzebue.csv', 15 | location='Kotzebue') 16 | for year in [1984, 2017, 2018]: 17 | print(year) 18 | try: 19 | n.plot_weather_series(start_date='{yr}-01-01'.format(yr=year), end_date='{yr}-12-31'.format(yr=year), 20 | show_snow_accumulation=False, plot_extrema=True, 21 | show_plot=False, kwargs_fig={'dpi':100}, 22 | save_path=r'../figures/daily_series_year_Kotzebue_{yr0}-{yr1}.png'.format(yr0=year, yr1=year), 23 | plot_tmin=-45, plot_tmax=25, plot_pmax=50, plot_snowmax=300) 24 | except Exception as e: 25 | print(e) 26 | continue 27 | 28 | if __name__ == '__main__': 29 | main() -------------------------------------------------------------------------------- /examples/example_daily_series_winter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Example Script to plot daily weather data for the winter season year (July 1 to June 30 of the subsequent year) 5 | using the noaaplotter package 6 | author: Ingmar Nitze 7 | """ 8 | 9 | from src.noaaplotter import NOAAPlotter 10 | import logging 11 | 12 | def main(): 13 | logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG) 14 | n = NOAAPlotter(r'../data/Kotzebue.csv', 15 | location='Kotzebue') 16 | for year in [1984, 2017, 2018]: 17 | print(year) 18 | try: 19 | n.plot_weather_series(start_date='{yr}-07-01'.format(yr=year), end_date='{yr}-06-30'.format(yr=year+1), 20 | show_snow_accumulation=True, plot_extrema=True, 21 | show_plot=False, kwargs_fig={'dpi':100}, 22 | save_path=r'../figures/daily_series_winter_Kotzebue_{yr0}-{yr1}.png'.format(yr0=year, yr1=year+1), 23 | plot_tmin=-45, plot_tmax=25, plot_pmax=50, plot_snowmax=300) 24 | except Exception as e: 25 | print(e) 26 | continue 27 | 28 | if __name__ == '__main__': 29 | main() -------------------------------------------------------------------------------- /examples/example_monthly_series.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Example Script to plot monthly deviation of temperature or precipitation from the climatological mean (1981-2010). 5 | Furthermore, the trailing mean, mean of the last n-months (here 12) is plotted. 6 | using the noaaplotter package 7 | author: Ingmar Nitze 8 | """ 9 | 10 | from src.noaaplotter import NOAAPlotter 11 | import logging 12 | 13 | def main(): 14 | logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG) 15 | 16 | LOCATION = 'Kotzebue' 17 | START = '1990-01-01' 18 | END = '2019-12-31' 19 | TRAILING_MEAN = 12 20 | DPI = 300 21 | FIGSIZE = (15,7) 22 | PERIOD = '1990-2019' 23 | 24 | n = NOAAPlotter(r'C:/Users/initze/OneDrive/noaaplotter/data/2005576.csv', location='Kotzebue') 25 | 26 | try: 27 | n.plot_monthly_barchart(START, END, information='Precipitation', anomaly=False, 28 | trailing_mean=TRAILING_MEAN, show_plot=False, 29 | dpi=DPI, figsize=FIGSIZE, 30 | save_path=r'./figures/{loc}_monthly_series_precipitation_12mthsTrMn_{p}.png'.format(p=PERIOD, loc=LOCATION)) 31 | 32 | n.plot_monthly_barchart(START, END, information='Temperature', anomaly=False, 33 | trailing_mean=TRAILING_MEAN, show_plot=False, 34 | dpi=DPI, figsize=FIGSIZE, 35 | save_path=r'./figures/{loc}_monthly_series_temperature_12mthsTrMn_{p}.png'.format(p=PERIOD, loc=LOCATION)) 36 | 37 | n.plot_monthly_barchart(START, END, information='Precipitation', anomaly=True, 38 | trailing_mean=TRAILING_MEAN, show_plot=False, 39 | dpi=DPI, figsize=FIGSIZE, 40 | save_path=r'./figures/{loc}_monthly_series_precipitation_12mthsTrMn_anomaly_{p}.png'.format(p=PERIOD, loc=LOCATION)) 41 | 42 | n.plot_monthly_barchart(START, END, information='Temperature', anomaly=True, 43 | trailing_mean=TRAILING_MEAN, show_plot=False, 44 | dpi=DPI, figsize=FIGSIZE, 45 | save_path=r'./figures/{loc}_monthly_series_temperature_12mthsTrMn_anomaly_{p}.png'.format(p=PERIOD, loc=LOCATION)) 46 | except Exception as e: 47 | print(e) 48 | 49 | if __name__ == '__main__': 50 | main() -------------------------------------------------------------------------------- /figures/daily_series_Kotzebue_1992.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/daily_series_Kotzebue_1992.png -------------------------------------------------------------------------------- /figures/daily_series_Kotzebue_2017-2018_winter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/daily_series_Kotzebue_2017-2018_winter.png -------------------------------------------------------------------------------- /figures/monthly_series_precipitation_12mthsTrMn_Kotzebue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_precipitation_12mthsTrMn_Kotzebue.png -------------------------------------------------------------------------------- /figures/monthly_series_precipitation_12mthsTrMn_Kotzebue_anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_precipitation_12mthsTrMn_Kotzebue_anomaly.png -------------------------------------------------------------------------------- /figures/monthly_series_temperature_12mthsTrMn_Kotzebue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_temperature_12mthsTrMn_Kotzebue.png -------------------------------------------------------------------------------- /figures/monthly_series_temperature_12mthsTrMn_Kotzebue_anomaly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_temperature_12mthsTrMn_Kotzebue_anomaly.png -------------------------------------------------------------------------------- /noaaplotter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/__init__.py -------------------------------------------------------------------------------- /noaaplotter/noaaplotter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ######################## 5 | # Credits here 6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research 7 | # contact: ingmar.nitze@awi.de 8 | # version: 2021-09-06 9 | 10 | import numpy as np 11 | from matplotlib import dates 12 | 13 | ######################## 14 | from matplotlib import pyplot as plt 15 | 16 | from noaaplotter.utils.dataset import NOAAPlotterDailyClimateDataset as DS_daily 17 | from noaaplotter.utils.dataset import NOAAPlotterDailySummariesDataset as Dataset 18 | from noaaplotter.utils.dataset import NOAAPlotterMonthlyClimateDataset as DS_monthly 19 | from noaaplotter.utils.plot_utils import * 20 | from noaaplotter.utils.utils import * 21 | 22 | pd.plotting.register_matplotlib_converters() 23 | numeric_only = True 24 | 25 | 26 | class NOAAPlotter(object): 27 | """ 28 | This class/module creates nice plots of observed weather data from NOAA 29 | """ 30 | 31 | def __init__( 32 | self, 33 | input_filepath=None, 34 | location=None, 35 | remove_feb29=False, 36 | climate_start=dt.datetime(1981, 1, 1), 37 | climate_end=dt.datetime(2010, 12, 31), 38 | climate_filtersize=7, 39 | ): 40 | """ 41 | 42 | :param input_filepath: path to input file 43 | :type input_filepath: str 44 | :param location: name of location 45 | :type location: str, optional 46 | :param remove_feb29: 47 | :type remove_feb29: bool, optional 48 | :param climate_start: start date of climate period, defaults to 01-01-1981 49 | :type climate_start: datetime, optional 50 | :param climate_end: start date of climate period, defaults to 31-12-2010 51 | :type climate_end: datetime, optional 52 | """ 53 | self.input_filepath = input_filepath 54 | self.location = location 55 | self.climate_start = climate_start 56 | self.climate_end = climate_end 57 | self.remove_feb29 = remove_feb29 58 | self.dataset = Dataset( 59 | input_filepath, location=location, remove_feb29=remove_feb29 60 | ) 61 | 62 | # TODO: move to respective functions? 63 | self.df_clim_ = DS_daily(self.dataset, filtersize=climate_filtersize) 64 | # 65 | 66 | def _make_short_dateseries(self, start_date, end_date): 67 | x_dates = pd.DataFrame() 68 | x_dates["DATE"] = pd.date_range(start=start_date, end=end_date) 69 | x_dates["DATE_MD"] = x_dates["DATE"].dt.strftime("%m-%d") 70 | # TODO: Filter Feb29 71 | if self.dataset.data["DATE"].max() >= end_date: 72 | x_dates_short = x_dates.set_index("DATE", drop=False).loc[ 73 | pd.date_range(start=start_date, end=end_date) 74 | ] 75 | else: 76 | x_dates_short = x_dates.set_index("DATE", drop=False).loc[ 77 | pd.date_range(start=start_date, end=self.dataset.data["DATE"].max()) 78 | ] 79 | 80 | return x_dates, x_dates_short 81 | 82 | def plot_weather_series( 83 | self, 84 | start_date, 85 | end_date, 86 | plot_tmax="auto", 87 | plot_tmin="auto", 88 | plot_pmax="auto", 89 | plot_snowmax="auto", 90 | plot_extrema=True, 91 | show_plot=True, 92 | show_snow_accumulation=True, 93 | save_path=False, 94 | figsize=(9, 6), 95 | legend_fontsize="x-small", 96 | dpi=300, 97 | title=None, 98 | return_plot=False, 99 | ): 100 | """ 101 | Plotting Function to show observed vs climate temperatures and snowfall 102 | :param dpi: 103 | :param legend_fontsize: 104 | :param figsize: 105 | :param start_date: start date of plot 106 | :type start_date: datetime, str 107 | :param end_date: end date of plot 108 | :type end_date: datetime, str 109 | :param plot_tmax: 110 | :type plot_tmax: int, float, str 111 | :param plot_tmin: 112 | :type plot_tmin: int, float, str 113 | :param plot_pmax: 114 | :type plot_pmax: int, float, str 115 | :param plot_snowmax: 116 | :type plot_snowmax: int, float, str 117 | :param plot_extrema: 118 | :type plot_extrema: 119 | :param show_plot: 120 | :type show_plot: 121 | :param show_snow_accumulation: 122 | :type show_snow_accumulation: 123 | :param save_path: 124 | :type save_path: 125 | :return: 126 | """ 127 | start_date = parse_dates(start_date) 128 | end_date = parse_dates(end_date) 129 | x_dates, x_dates_short = self._make_short_dateseries(start_date, end_date) 130 | 131 | df_clim = self.df_clim_.data.loc[x_dates["DATE_MD"]] 132 | 133 | df_clim["DATE"] = x_dates["DATE"].values 134 | df_clim = df_clim.set_index("DATE", drop=False) 135 | df_obs = self.dataset.data.set_index("DATE", drop=False).loc[ 136 | x_dates_short["DATE"] 137 | ] 138 | 139 | clim_locs_short = x_dates_short[ 140 | "DATE" 141 | ] # short series for incomplete years (actual data) 142 | 143 | # get mean and mean+-standard deviation of daily mean temperatures of climate series 144 | y_clim = df_clim["tmean_doy_mean"] 145 | y_clim_std_hi = df_clim[["tmean_doy_mean", "tmean_doy_std"]].sum(axis=1) 146 | y_clim_std_lo = df_clim["tmean_doy_mean"] - df_clim["tmean_doy_std"] 147 | 148 | # Prepare data for filled plot areas 149 | t_above = np.vstack( 150 | [df_obs["TMEAN"].values, y_clim.loc[clim_locs_short].values] 151 | ).max(axis=0) 152 | t_above_std = np.vstack( 153 | [df_obs["TMEAN"].values, y_clim_std_hi.loc[clim_locs_short].values] 154 | ).max(axis=0) 155 | t_below = np.vstack( 156 | [df_obs["TMEAN"].values, y_clim.loc[clim_locs_short].values] 157 | ).min(axis=0) 158 | t_below_std = np.vstack( 159 | [df_obs["TMEAN"].values, y_clim_std_lo.loc[clim_locs_short].values] 160 | ).min(axis=0) 161 | 162 | # Calculate the date of last snowfall and cumulative sum of snowfall 163 | if not show_snow_accumulation: 164 | None 165 | elif (show_snow_accumulation) and ("SNOW" in df_obs.columns): 166 | last_snow_date = df_obs[df_obs["SNOW"] > 0].iloc[-1]["DATE"] 167 | snow_acc = np.cumsum(df_obs["SNOW"]) 168 | elif "SNOW" not in df_obs.columns: 169 | show_snow_accumulation = False 170 | raise Warning("No snow information available") 171 | 172 | # PLOT 173 | fig = plt.figure(figsize=figsize, dpi=dpi) 174 | ax_t = fig.add_subplot(211) 175 | ax_p = fig.add_subplot(212, sharex=ax_t) 176 | 177 | # climate series (red line) 178 | (cm,) = ax_t.plot(x_dates["DATE"].values, y_clim.values, c="k", alpha=0.5, lw=2) 179 | (cm_hi,) = ax_t.plot( 180 | x_dates["DATE"].values, 181 | y_clim_std_hi.values, 182 | c="r", 183 | ls="--", 184 | alpha=0.4, 185 | lw=1, 186 | ) 187 | (cm_low,) = ax_t.plot( 188 | x_dates["DATE"].values, 189 | y_clim_std_lo.values, 190 | c="r", 191 | ls="--", 192 | alpha=0.4, 193 | lw=1, 194 | ) 195 | 196 | # observed series (grey line) 197 | (fb,) = ax_t.plot( 198 | x_dates_short["DATE"].values, 199 | df_obs["TMEAN"].values, 200 | c="k", 201 | alpha=0.4, 202 | lw=1.2, 203 | ) 204 | 205 | # difference of observed and climate (grey area) 206 | fill_r = ax_t.fill_between( 207 | x_dates_short["DATE"].values, 208 | y1=t_above, 209 | y2=y_clim.loc[clim_locs_short].values, 210 | facecolor="#d6604d", 211 | alpha=0.5, 212 | ) 213 | fill_rr = ax_t.fill_between( 214 | x_dates_short["DATE"].values, 215 | y1=t_above_std, 216 | y2=y_clim_std_hi.loc[clim_locs_short].values, 217 | facecolor="#d6604d", 218 | alpha=0.7, 219 | ) 220 | fill_b = ax_t.fill_between( 221 | x_dates_short["DATE"].values, 222 | y1=y_clim.loc[clim_locs_short].values, 223 | y2=t_below, 224 | facecolor="#4393c3", 225 | alpha=0.5, 226 | ) 227 | fill_bb = ax_t.fill_between( 228 | x_dates_short["DATE"].values, 229 | y1=y_clim_std_lo.loc[clim_locs_short].values, 230 | y2=t_below_std, 231 | facecolor="#4393c3", 232 | alpha=0.7, 233 | ) 234 | 235 | # plot extremes 236 | if plot_extrema: 237 | tmax = self.dataset.data.groupby("DATE_MD").max(numeric_only=numeric_only)[ 238 | "TMEAN" 239 | ] 240 | tmin = self.dataset.data.groupby("DATE_MD").min(numeric_only=numeric_only)[ 241 | "TMEAN" 242 | ] 243 | local_obs = df_obs[["DATE", "DATE_MD", "TMEAN"]].set_index( 244 | "DATE_MD", drop=False 245 | ) 246 | idx = local_obs.index 247 | local_max = tmax.loc[idx] == local_obs["TMEAN"] 248 | local_min = tmin.loc[idx] == local_obs["TMEAN"] 249 | # extract x and y values 250 | x_max = local_obs[local_max]["DATE"] 251 | y_max = local_obs[local_max]["TMEAN"] 252 | x_min = local_obs[local_min]["DATE"] 253 | y_min = local_obs[local_min]["TMEAN"] 254 | xtreme_hi = ax_t.scatter( 255 | x_max.values, y_max.values, c="#d6604d", marker="x" 256 | ) 257 | xtreme_lo = ax_t.scatter( 258 | x_min.values, y_min.values, c="#4393c3", marker="x" 259 | ) 260 | 261 | xlim = ax_t.get_xlim() 262 | ax_t.hlines(0, *xlim, linestyles="--") 263 | # grid 264 | ax_t.grid() 265 | 266 | # labels 267 | ax_t.set_xlim(start_date, end_date) 268 | if not (plot_tmin == "auto" and plot_tmin == "auto"): 269 | ax_t.set_ylim(plot_tmin, plot_tmax) 270 | ax_t.set_ylabel("Temperature in °C") 271 | ax_t.set_xlabel("Date") 272 | if title: 273 | ax_t.set_title(title) 274 | 275 | # add legend 276 | legend_handle_t = [fb, cm, cm_hi, fill_r, fill_b] 277 | legend_text_t = [ 278 | "Observed Temperatures", 279 | "Climatological Mean", 280 | "Std of Climatological Mean", 281 | "Above average Temperature", 282 | "Below average Temperature", 283 | ] 284 | if plot_extrema: 285 | legend_handle_t.extend([xtreme_hi, xtreme_lo]) 286 | legend_text_t.extend(["Record High on Date", "Record Low on Date"]) 287 | 288 | # PRECIPITATION# 289 | # legend handles 290 | legend_handle_p = [] 291 | legend_text_p = [] 292 | 293 | # precipitation 294 | rain = ax_p.bar( 295 | x=x_dates_short["DATE"].values, 296 | height=df_obs["PRCP"].values, 297 | fc="#4393c3", 298 | alpha=1, 299 | ) 300 | legend_handle_p.append(rain) 301 | legend_text_p.append("Precipitation") 302 | 303 | # grid 304 | ax_p.grid() 305 | # labels 306 | ax_p.set_ylabel("Precipitation in mm") 307 | ax_p.set_xlabel("Date") 308 | # y-axis scaling 309 | ax_p.set_ylim(bottom=0) 310 | if isinstance(plot_pmax, (int, float)): 311 | ax_p.set_ylim(top=plot_pmax) 312 | 313 | # snow 314 | # TODO: make snowcheck 315 | if (show_snow_accumulation) and ("SNOW" in df_obs.columns): 316 | ax2_snow = ax_p.twinx() 317 | # plots 318 | sn_acc = ax2_snow.fill_between( 319 | x=x_dates_short.loc[:last_snow_date, "DATE"].values, 320 | y1=snow_acc.loc[:last_snow_date] / 10, 321 | facecolor="k", 322 | alpha=0.2, 323 | ) 324 | _ = ax2_snow.plot( 325 | x_dates_short.loc[last_snow_date:, "DATE"].values, 326 | snow_acc.loc[last_snow_date:] / 10, 327 | c="k", 328 | alpha=0.2, 329 | ls="--", 330 | ) 331 | # y-axis label 332 | ax2_snow.set_ylabel("Cumulative Snowfall in cm") 333 | # legend 334 | legend_handle_p.append(sn_acc) 335 | legend_text_p.append("Cumulative Snowfall") 336 | # y-axis scaling 337 | ax2_snow.set_ylim(bottom=0) 338 | if isinstance(plot_snowmax, (int, float)): 339 | ax2_snow.set_ylim(top=plot_snowmax) 340 | 341 | # Show nodata - make function 342 | lo, hi = ax_t.get_ylim() 343 | nanvals_t = x_dates_short["DATE"].loc[pd.isna(df_obs["TMEAN"])] 344 | nan_bar_t = ax_t.bar( 345 | x=nanvals_t, 346 | height=hi - lo, 347 | bottom=lo, 348 | width=1, 349 | edgecolor=None, 350 | facecolor="k", 351 | alpha=0.2, 352 | ) 353 | if len(nan_bar_t) > 0: 354 | legend_handle_t.append(nan_bar_t) 355 | legend_text_t.append("No Data") 356 | 357 | lo, hi = ax_p.get_ylim() 358 | nanvals_p = x_dates_short["DATE"].loc[pd.isna(df_obs["PRCP"])] 359 | nan_bar_p = ax_p.bar( 360 | x=nanvals_p, 361 | height=hi - lo, 362 | bottom=lo, 363 | width=1, 364 | edgecolor=None, 365 | facecolor="k", 366 | alpha=0.2, 367 | ) 368 | if len(nan_bar_p) > 0: 369 | legend_handle_p.append(nan_bar_p) 370 | legend_text_p.append("No Data") 371 | 372 | # add Legends 373 | ax_t.legend( 374 | legend_handle_t, 375 | legend_text_t, 376 | loc="lower center", 377 | fontsize=legend_fontsize, 378 | ncol=4, 379 | bbox_to_anchor=(0.5, 1.02), 380 | ) # -0.35)) 381 | ax_p.legend( 382 | legend_handle_p, legend_text_p, loc="upper left", fontsize=legend_fontsize 383 | ) 384 | 385 | # set locator to monthly 386 | locator = dates.MonthLocator() 387 | ax_t.xaxis.set_major_locator(locator) 388 | ax_p.xaxis.set_major_locator(locator) 389 | plt.setp( 390 | ax_t.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor" 391 | ) 392 | plt.setp( 393 | ax_p.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor" 394 | ) 395 | fig.tight_layout() 396 | 397 | # Save Figure 398 | if save_path: 399 | fig.savefig(save_path) # , figsize=figsize, dpi=dpi) 400 | # Show plot if chosen, destroy figure object at the end 401 | if show_plot: 402 | plt.show() 403 | if return_plot: 404 | return fig 405 | else: 406 | plt.close(fig) 407 | 408 | def plot_monthly_barchart( 409 | self, 410 | start_date, 411 | end_date, 412 | information="Temperature", 413 | show_plot=True, 414 | anomaly=False, 415 | anomaly_type="absolute", 416 | trailing_mean=None, 417 | save_path=False, 418 | figsize=(9, 4), 419 | dpi=100, 420 | legend_fontsize="x-small", 421 | return_plot=False, 422 | ): 423 | # legend handles 424 | legend_handle = [] 425 | legend_text = [] 426 | 427 | # setup plot arguments 428 | plot_kwargs = setup_monthly_plot_props(information, anomaly) 429 | 430 | # Data Preprocessing 431 | if parse_dates(end_date) > self.dataset.data["DATE"].max(): 432 | end_date = self.dataset.data["DATE"].max() 433 | data_monthly = DS_monthly( 434 | self.dataset, start=self.dataset.data["DATE"].min(), end=end_date 435 | ) 436 | data_monthly.calculate_monthly_statistics() 437 | data_clim = DS_monthly( 438 | self.dataset, start=self.climate_start, end=self.climate_end 439 | ) 440 | data_clim.calculate_monthly_climate() 441 | 442 | data = data_monthly.monthly_aggregate.reset_index(drop=False) 443 | df_clim = data_clim.monthly_climate.reset_index(drop=False) 444 | 445 | if ( 446 | plot_kwargs["value_column"] == "prcp_diff" 447 | and df_clim["prcp_sum"].isna().any() 448 | ): 449 | print("Invalid precipitation values, information not available!") 450 | return None 451 | 452 | data["DATE"] = data.apply(lambda x: parse_dates_YM(x["DATE_YM"]), axis=1) 453 | data["Month"] = data.apply(lambda x: parse_dates_YM(x["DATE_YM"]).month, axis=1) 454 | data["Year"] = data.apply(lambda x: parse_dates_YM(x["DATE_YM"]).year, axis=1) 455 | data = ( 456 | data.set_index("Month", drop=False) 457 | .join(df_clim.set_index("Month", drop=False), rsuffix="_clim") 458 | .sort_values("DATE_YM") 459 | ) 460 | data["tmean_diff"] = data["tmean_doy_mean"] - data["tmean_doy_mean_clim"] 461 | data["prcp_diff"] = data["prcp_sum"] - data["prcp_sum_clim"] 462 | data = data.set_index("DATE", drop=False) 463 | 464 | # trailing mean calculation 465 | if trailing_mean: 466 | data = calc_trailing_mean( 467 | data, trailing_mean, plot_kwargs["value_column"], "trailing_values" 468 | ) 469 | 470 | # PLOT part 471 | fig = plt.figure(figsize=figsize, dpi=dpi) 472 | ax = fig.add_subplot(111) 473 | data_low = data[data[plot_kwargs["value_column"]] < 0] 474 | data_high = data[data[plot_kwargs["value_column"]] >= 0] 475 | bar_low = ax.bar( 476 | x=data_low["DATE"], 477 | height=data_low[plot_kwargs["value_column"]], 478 | width=30, 479 | align="edge", 480 | color=plot_kwargs["fc_low"], 481 | ) 482 | # Fix for absolute values 483 | if len(bar_low) > 1: 484 | legend_handle.append(bar_low) 485 | legend_text.append(plot_kwargs["legend_label_below"]) 486 | bar_high = ax.bar( 487 | x=data_high["DATE"], 488 | height=data_high[plot_kwargs["value_column"]], 489 | width=30, 490 | align="edge", 491 | color=plot_kwargs["fc_high"], 492 | ) 493 | legend_handle.append(bar_high) 494 | legend_text.append(plot_kwargs["legend_label_above"]) 495 | if trailing_mean: 496 | line_tr_mean = ax.plot(data["DATE"], data["trailing_values"], c="k") 497 | legend_handle.append(line_tr_mean[0]) 498 | legend_text.append("Trailing mean: {} months".format(trailing_mean)) 499 | ax.xaxis.set_major_locator(dates.YearLocator()) 500 | ax.tick_params(axis="x", rotation=90) 501 | ax.grid(True) 502 | 503 | # x-limit 504 | ax.set_xlim(start_date, end_date) 505 | 506 | # labels 507 | ax.set_ylabel(plot_kwargs["y_label"]) 508 | ax.set_xlabel("Date") 509 | ax.set_title(plot_kwargs["title"]) 510 | # add legend 511 | ax.legend(legend_handle, legend_text, loc="best", fontsize=legend_fontsize) 512 | 513 | fig.tight_layout() 514 | # Save Figure 515 | if save_path: 516 | fig.savefig(save_path) # , figsize=figsize, dpi=dpi) 517 | # Show plot if chosen, destroy figure object at the end 518 | if show_plot: 519 | plt.show() 520 | if return_plot: 521 | return fig 522 | else: 523 | plt.close(fig) 524 | -------------------------------------------------------------------------------- /noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/scripts/download_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # Imports 4 | import argparse 5 | 6 | from noaaplotter.utils.download_utils import download_from_noaa 7 | 8 | 9 | def main(): 10 | """ 11 | Main Function 12 | :return: 13 | """ 14 | ##### Parse arguments ##### 15 | parser = argparse.ArgumentParser(description="Parse arguments.") 16 | 17 | parser.add_argument( 18 | "-o", 19 | dest="output_file", 20 | type=str, 21 | required=True, 22 | default="data/parquet.csv", 23 | help="parquet file to save results", 24 | ) 25 | 26 | parser.add_argument( 27 | "-t", dest="token", type=str, required=False, default="", help="NOAA API token" 28 | ) 29 | 30 | parser.add_argument( 31 | "-sid", 32 | dest="station_id", 33 | type=str, 34 | required=False, 35 | default="", 36 | help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API', 37 | ) 38 | 39 | parser.add_argument( 40 | "-loc", 41 | dest="loc_name", 42 | type=str, 43 | required=False, 44 | default="", 45 | help="Location name", 46 | ) 47 | 48 | parser.add_argument( 49 | "-dt", 50 | dest="datatypes", 51 | type=list, 52 | required=False, 53 | default=["TMIN", "TMAX", "PRCP", "SNOW"], 54 | ) 55 | 56 | parser.add_argument( 57 | "-start", 58 | dest="start_date", 59 | type=str, 60 | required=True, 61 | help='start date of plot ("yyyy-mm-dd")', 62 | ) 63 | 64 | parser.add_argument( 65 | "-end", 66 | dest="end_date", 67 | type=str, 68 | required=True, 69 | help='end date of plot ("yyyy-mm-dd")', 70 | ) 71 | 72 | parser.add_argument( 73 | "-n_jobs", 74 | dest="n_jobs", 75 | type=int, 76 | required=False, 77 | default=1, 78 | help="number of parallel processes", 79 | ) 80 | 81 | args = parser.parse_args() 82 | 83 | download_from_noaa( 84 | output_file=args.output_file, 85 | start_date=args.start_date, 86 | end_date=args.end_date, 87 | datatypes=args.datatypes, 88 | noaa_api_token=args.token, 89 | loc_name=args.loc_name, 90 | station_id=args.station_id, 91 | n_jobs=args.n_jobs, 92 | ) 93 | 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /noaaplotter/scripts/download_data_ERA5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # Imports 4 | import argparse 5 | import os 6 | 7 | from src.download_utils import download_era5_from_gee 8 | 9 | 10 | def main(): 11 | """ 12 | Main Function 13 | :return: 14 | """ 15 | ##### Parse arguments ##### 16 | parser = argparse.ArgumentParser(description='Parse arguments.') 17 | 18 | parser.add_argument('-o', dest='output_file', type=str, required=True, 19 | default='data/data.csv', 20 | help='csv file to save results') 21 | 22 | parser.add_argument('-lat', dest='lat', type=float, required=True, 23 | help='Latitude of selected location') 24 | 25 | parser.add_argument('-lon', dest='lon', type=float, required=True, 26 | help='Longitude of selected location') 27 | 28 | parser.add_argument('-loc', dest='loc_name', type=str, required=False, 29 | default='', 30 | help='Location name') 31 | 32 | parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) 33 | 34 | parser.add_argument('-start', dest='start_date', type=str, required=True, 35 | help='start date of plot ("yyyy-mm-dd")') 36 | 37 | parser.add_argument('-end', dest='end_date', type=str, required=True, 38 | help='end date of plot ("yyyy-mm-dd")') 39 | 40 | args = parser.parse_args() 41 | 42 | # remove file if exists 43 | if os.path.exists(args.output_file): 44 | os.remove(args.output_file) 45 | 46 | download_era5_from_gee(latitude=args.lat, 47 | longitude = args.lon, 48 | end_date= args.end_date, 49 | start_date = args.start_date, 50 | output_file = args.output_file) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() -------------------------------------------------------------------------------- /noaaplotter/scripts/download_data_SST.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # Imports 4 | import argparse 5 | import csv 6 | from datetime import datetime 7 | import numpy as np 8 | import os 9 | import pandas as pd 10 | import tqdm 11 | from joblib import delayed, Parallel 12 | from noaaplotter.utils.download_utils import dl_noaa_api 13 | import ee 14 | import geemap 15 | 16 | def main(): 17 | """ 18 | Main Function 19 | :return: 20 | """ 21 | ##### Parse arguments ##### 22 | parser = argparse.ArgumentParser(description='Parse arguments.') 23 | 24 | parser.add_argument('-o', dest='output_file', type=str, required=True, 25 | default='data/data.csv', 26 | help='csv file to save results') 27 | 28 | parser.add_argument('-lat', dest='lat', type=float, required=True, 29 | help='Latitude of selected location') 30 | 31 | parser.add_argument('-lon', dest='lon', type=float, required=True, 32 | help='Longitude of selected location') 33 | 34 | parser.add_argument('-loc', dest='loc_name', type=str, required=False, 35 | default='', 36 | help='Location name') 37 | 38 | #parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW']) 39 | 40 | parser.add_argument('-start', dest='start_date', type=str, required=True, 41 | help='start date of plot ("yyyy-mm-dd")') 42 | 43 | parser.add_argument('-end', dest='end_date', type=str, required=True, 44 | help='end date of plot ("yyyy-mm-dd")') 45 | 46 | args = parser.parse_args() 47 | 48 | # remove file if exists 49 | if os.path.exists(args.output_file): 50 | os.remove(args.output_file) 51 | 52 | ee.Initialize() 53 | 54 | EE_LAYER = "NOAA/CDR/OISST/V2_1" 55 | 56 | location = ee.Geometry.Point([args.lon, args.lat]) 57 | 58 | # load ImageCollection 59 | col = ee.ImageCollection(EE_LAYER).filterBounds(location).filterDate(args.start_date, args.end_date).select('sst') 60 | 61 | # Download data 62 | print("Start downloading NOAA CDR OISST v02r01 data.") 63 | print("Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min") 64 | 65 | out_dict = geemap.extract_pixel_values(col, location, getInfo=True) 66 | df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T 67 | 68 | # parse dates and values 69 | df_gee['time'] = df_gee[0].apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}') 70 | df_gee['feature'] = df_gee[0].apply(lambda x: x[9:]) 71 | df_gee['value'] = df_gee[1] 72 | 73 | df = df_gee.pivot_table(values='value', columns=['feature'], index='time')#.reset_index(drop=False) 74 | 75 | # #### recalculate values 76 | df_new = pd.DataFrame(index=df.index) 77 | 78 | temperature_cols = ['sst'] 79 | #precipitation_cols = ['total_precipitation'] 80 | df_joined = df_new.join(df[temperature_cols]*0.01)#.join(df[precipitation_cols] *1e3).reset_index(drop=False) 81 | 82 | # Create Output 83 | df_joined.reset_index(drop=False, inplace=True) 84 | rename_dict = {'time': 'DATE', 'sst': 'TMAX'} 85 | df_renamed = df_joined.rename(columns=rename_dict) 86 | df_renamed['NAME'] = '' 87 | df_renamed['STATION'] = '' 88 | df_renamed['SNWD'] = '' 89 | df_renamed['PRCP'] = '' 90 | df_renamed['TAVG'] = df_renamed['TMAX'] 91 | df_renamed['TMIN'] = df_renamed['TMAX'] 92 | 93 | output_cols = ["STATION","NAME","DATE","PRCP","SNWD","TAVG","TMAX","TMIN"] 94 | df_save = df_renamed[output_cols].astype(str) 95 | 96 | df_save.to_csv(args.output_file, index=False) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /noaaplotter/scripts/plot_daily.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from noaaplotter.noaaplotter import NOAAPlotter 4 | import argparse 5 | 6 | def main(): 7 | """ 8 | Main Function 9 | :return: 10 | """ 11 | ##### Parse arguments ##### 12 | parser = argparse.ArgumentParser(description='Parse arguments.') 13 | 14 | parser.add_argument('-infile', dest='infile', type=str, required=True, 15 | default='data/temp.parquet', 16 | help='input file with climate data') 17 | 18 | parser.add_argument('-t', dest='token', type=str, required=False, 19 | default='', 20 | help='NOAA API token, only if loading through NOAA API') 21 | 22 | parser.add_argument('-sid', dest='station_id', type=str, required=False, 23 | default='', 24 | help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API') 25 | 26 | parser.add_argument('-start', dest='start_date', type=str, required=True, 27 | help='start date of plot ("yyyy-mm-dd")') 28 | 29 | parser.add_argument('-end', dest='end_date', type=str, required=True, 30 | help='end date of plot ("yyyy-mm-dd")') 31 | 32 | parser.add_argument('-loc', dest='location', required=False, 33 | type=str, default=None, 34 | help='Location name, must be in data file') 35 | 36 | parser.add_argument('-save_plot', dest='save_path', type=str, required=False, 37 | default=None, 38 | help='filepath for plot') 39 | 40 | parser.add_argument('-t_range', dest='t_range', type=float, nargs=2, required=False, 41 | default=[None, None], 42 | help='temperature range in plot') 43 | 44 | parser.add_argument('-p_range', dest='p_range', type=float, required=False, 45 | default=None, 46 | help='maximum precipitation value in plot') 47 | 48 | parser.add_argument('-s_range', dest='s_range', type=float, required=False, 49 | default=None, 50 | help='maximum snow accumulation value in plot') 51 | 52 | parser.add_argument('-snow_acc', dest='snow_acc', required=False, 53 | default=False, action='store_true', 54 | help='show snow accumulation, only useful for plotting winter season (e.g. July to June') 55 | 56 | parser.add_argument('-filtersize', dest='filtersize', type=int, required=False, 57 | default=7, 58 | help='parameter to smooth climate temperature series by n days for smoother visual appearance. ' 59 | 'default value: 7') 60 | 61 | parser.add_argument('-dpi', dest='dpi', type=float, required=False, 62 | default=100, 63 | help='dpi for plot output') 64 | 65 | parser.add_argument('-plot', dest='show_plot', required=False, 66 | default=False, action='store_true', 67 | help='Location name, must be in data file') 68 | 69 | parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False, 70 | default=[9, 6], 71 | help='figure size in inches width x height. 15 10 recommended for 1 year, 30 10 for 2 years ...') 72 | 73 | parser.add_argument('-title', dest='title', type=str, required=False, 74 | default=None, 75 | help='Plot title') 76 | 77 | args = parser.parse_args() 78 | 79 | ##### Download from NOAA ##### 80 | 81 | ##### Run Plotting function ##### 82 | n = NOAAPlotter(args.infile, 83 | location=args.location, 84 | climate_filtersize=args.filtersize) 85 | 86 | n.plot_weather_series(start_date=args.start_date, 87 | end_date=args.end_date, 88 | show_snow_accumulation=args.snow_acc, 89 | #kwargs_fig={'dpi':args.dpi, 'figsize':args.figsize}, 90 | plot_extrema=True, 91 | show_plot=args.show_plot, 92 | save_path=args.save_path, 93 | plot_tmin=args.t_range[0], 94 | plot_tmax=args.t_range[1], 95 | plot_pmax=args.p_range, 96 | plot_snowmax=args.s_range, 97 | dpi=args.dpi, 98 | figsize=args.figsize, 99 | title=args.title) 100 | 101 | if __name__ == "__main__": 102 | main() 103 | -------------------------------------------------------------------------------- /noaaplotter/scripts/plot_monthly.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | from noaaplotter.noaaplotter import NOAAPlotter 4 | import argparse 5 | 6 | def main(): 7 | """ 8 | Main Function 9 | :return: 10 | """ 11 | ##### Parse arguments ##### 12 | parser = argparse.ArgumentParser(description='Parse arguments.') 13 | 14 | parser.add_argument('-infile', dest='infile', type=str, required=True, 15 | help='input file with climate data') 16 | 17 | parser.add_argument('-start', dest='start_date', type=str, required=True, 18 | help='start date of plot ("yyyy-mm-dd")') 19 | 20 | parser.add_argument('-end', dest='end_date', type=str, required=True, 21 | help='end date of plot ("yyyy-mm-dd")') 22 | 23 | parser.add_argument('-loc', dest='location', required=False, 24 | type=str, default=None, 25 | help='Location name, must be in data file') 26 | 27 | parser.add_argument('-save_plot', dest='save_path', type=str, required=False, 28 | default=None, 29 | help='filepath for plot') 30 | 31 | parser.add_argument('-type', dest='type', type=str, required=True, 32 | help='Attribute Type: {Temperature, Precipitation}', 33 | default='Temperature') 34 | 35 | parser.add_argument('-trail', dest='trailing_mean', type=int, required=False, 36 | default=None, 37 | help='trailing/rolling mean value in months') 38 | 39 | parser.add_argument('-anomaly', dest='anomaly', required=False, 40 | default=False, action='store_true', 41 | help='show anomaly from climate') 42 | 43 | parser.add_argument('-dpi', dest='dpi', type=float, required=False, 44 | default=100, 45 | help='dpi for plot output') 46 | 47 | parser.add_argument('-plot', dest='show_plot', required=False, 48 | default=False, action='store_true', 49 | help='Location name, must be in data file') 50 | 51 | parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False, 52 | default=[9, 4], 53 | help='figure size in inches width x height. 9 4 recommended 30 years') 54 | 55 | args = parser.parse_args() 56 | 57 | ##### Run Plotting function ##### 58 | n = NOAAPlotter(args.infile, 59 | location=args.location) 60 | 61 | n.plot_monthly_barchart(args.start_date, 62 | args.end_date, 63 | information=args.type, 64 | anomaly=args.anomaly, 65 | trailing_mean=args.trailing_mean, 66 | show_plot=args.show_plot, 67 | dpi=args.dpi, 68 | figsize=args.figsize, 69 | save_path=args.save_path) 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /noaaplotter/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__init__.py -------------------------------------------------------------------------------- /noaaplotter/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/utils/__pycache__/dataset.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/dataset.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/utils/__pycache__/plot_utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/plot_utils.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/utils/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /noaaplotter/utils/dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ######################## 5 | # Credits here 6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research 7 | # contact: ingmar.nitze@awi.de 8 | # version: 2020-12-09 9 | 10 | ######################## 11 | import os 12 | import time 13 | 14 | import numpy as np 15 | import polars as pl 16 | 17 | from .utils import * 18 | 19 | NUMERIC_ONLY = True 20 | 21 | 22 | class NOAAPlotterDailySummariesDataset(object): 23 | """ 24 | This class/module creates nice plots of observed weather data from NOAA 25 | """ 26 | 27 | def __init__(self, input_filepath=None, location=None, remove_feb29=False): 28 | self.input_switch = None 29 | self.input_filepath = input_filepath 30 | self.location = location 31 | self.noaa_token = None 32 | self.noaa_location = None 33 | self.remove_feb29 = remove_feb29 34 | self.data = None 35 | self._check_data_loading() 36 | if self.input_switch == "file": 37 | self._load_file() 38 | elif self.input_switch == "noaa_api": 39 | self._load_noaa() 40 | self._validate_location() 41 | self._update_datatypes() 42 | self._get_datestring() 43 | self._get_tmean() 44 | self._remove_feb29() 45 | self._filter_to_location() 46 | 47 | def print_locations(self): 48 | """ 49 | Print all locations names 50 | """ 51 | print(self.data["NAME"].unique()) 52 | 53 | def _check_data_loading(self): 54 | """ 55 | function check if all requirements for loading options are met 56 | File loading: 57 | * input_filepath 58 | """ 59 | if os.path.exists(self.input_filepath): 60 | self.input_switch = "file" 61 | elif self.noaa_token and self.noaa_location: 62 | self.input_switch = "noaa_api" 63 | else: 64 | raise ImportError( 65 | "Please enter either correct file path or noaa station_id and API token" 66 | ) 67 | 68 | def _load_file(self): 69 | """ 70 | load csv file into Pandas DataFrame 71 | :return: 72 | """ 73 | data = pl.read_parquet(self.input_filepath).to_pandas() 74 | if "__index_level_0__" in data.columns: 75 | data = data.drop(columns=["__index_level_0__"]) 76 | self.data = data 77 | 78 | def _load_noaa(self): 79 | """ 80 | load data through NOAA API 81 | """ 82 | pass 83 | 84 | def _save_noaa(self): 85 | """ 86 | save loaded NOAA API data to temporary csv file 87 | """ 88 | 89 | def _validate_location(self): 90 | """ 91 | raise error and message if location name cannot be found 92 | :return: 93 | """ 94 | if not self.location and len(pd.unique(self.data["NAME"]) == 1): 95 | pass 96 | elif not self.location and len(pd.unique(self.data["NAME"]) > 1): 97 | raise ValueError( 98 | "There is more than one location in the dataset. Please choose a location using the -loc option! " 99 | "Valid Location identifiers: {0} ".format(self.data["NAME"].unique()) 100 | ) 101 | else: 102 | filt = self.data["NAME"].str.lower().str.contains(self.location.lower()) 103 | if filt.sum() == 0: 104 | raise ValueError( 105 | "Location Name is not valid! Valid Location identifiers: {0}".format( 106 | self.data["NAME"].unique() 107 | ) 108 | ) 109 | 110 | def _update_datatypes(self): 111 | """ 112 | define 'DATE' as datetime 113 | :return: 114 | """ 115 | self.data["DATE"] = pd.to_datetime(self.data["DATE"]) 116 | 117 | def _get_datestring(self): 118 | """ 119 | write specific date formats 120 | :return: 121 | """ 122 | self.data["DATE_MD"] = self.data["DATE"].dt.strftime("%m-%d") 123 | self.data["DATE_YM"] = self.data["DATE"].dt.strftime("%Y-%m") 124 | self.data["DATE_M"] = self.data["DATE"].dt.strftime("%m") 125 | 126 | def _get_tmean(self): 127 | """ 128 | calculate mean daily temperature from min and max 129 | :return: 130 | """ 131 | # TODO: check for cases where TMIN and TMAX are empty (e.g. Schonefeld). There TAVG is the main field 132 | self.data["TMEAN"] = self.data[["TMIN", "TMAX"]].mean(axis=1) 133 | 134 | def _remove_feb29(self): 135 | """ 136 | Function to remove February 29 from the data 137 | :return: 138 | """ 139 | if self.remove_feb29: 140 | self.data = self.data[self.data["DATE_MD"] != "02-29"] 141 | 142 | def _filter_to_location(self): 143 | """ 144 | Filter dataset to the defined location 145 | :return: 146 | """ 147 | if self.location: 148 | filt = self.data["NAME"].str.lower().str.contains(self.location.lower()) 149 | if len(filt) > 0: 150 | self.data = self.data.loc[filt] 151 | else: 152 | raise ValueError("Location Name is not valid") 153 | 154 | def filter_to_climate(self, climate_start, climate_end): 155 | """ 156 | Function to create filtered dataset covering the defined climate normal period 157 | :return: 158 | """ 159 | df_clim = self.data[ 160 | (self.data["DATE"] >= climate_start) & (self.data["DATE"] <= climate_end) 161 | ] 162 | return df_clim 163 | 164 | @staticmethod 165 | def get_monthly_stats(df): 166 | """ 167 | calculate monthly statistics 168 | :param df: 169 | :type df: pandas.DataFrame 170 | :return: 171 | """ 172 | df_out = pd.DataFrame() 173 | df_out["tmean_doy_mean"] = ( 174 | df[["DATE", "TMEAN"]] 175 | .groupby(df["DATE_YM"]) 176 | .mean(numeric_only=NUMERIC_ONLY) 177 | .TMEAN 178 | ) 179 | df_out["tmean_doy_std"] = ( 180 | df[["DATE", "TMEAN"]] 181 | .groupby(df["DATE_YM"]) 182 | .std(numeric_only=NUMERIC_ONLY) 183 | .TMEAN 184 | ) 185 | df_out["tmax_doy_max"] = ( 186 | df[["DATE", "TMAX"]] 187 | .groupby(df["DATE_YM"]) 188 | .max(numeric_only=NUMERIC_ONLY) 189 | .TMAX 190 | ) 191 | df_out["tmax_doy_std"] = ( 192 | df[["DATE", "TMAX"]] 193 | .groupby(df["DATE_YM"]) 194 | .std(numeric_only=NUMERIC_ONLY) 195 | .TMAX 196 | ) 197 | df_out["tmin_doy_min"] = ( 198 | df[["DATE", "TMIN"]] 199 | .groupby(df["DATE_YM"]) 200 | .min(numeric_only=NUMERIC_ONLY) 201 | .TMIN 202 | ) 203 | df_out["tmin_doy_std"] = ( 204 | df[["DATE", "TMIN"]] 205 | .groupby(df["DATE_YM"]) 206 | .std(numeric_only=NUMERIC_ONLY) 207 | .TMIN 208 | ) 209 | if "SNOW" in df.columns: 210 | df_out["snow_doy_mean"] = ( 211 | df[["DATE", "SNOW"]] 212 | .groupby(df["DATE_YM"]) 213 | .mean(numeric_only=NUMERIC_ONLY) 214 | .SNOW 215 | ) 216 | df_out["prcp_sum"] = ( 217 | df[["DATE", "PRCP"]] 218 | .groupby(df["DATE_YM"]) 219 | .sum(numeric_only=NUMERIC_ONLY) 220 | .PRCP 221 | ) 222 | return df_out 223 | 224 | @staticmethod 225 | def get_monthy_climate(df): 226 | """ 227 | :param df: 228 | :return: 229 | """ 230 | df_out = pd.DataFrame() 231 | df = df.data 232 | df["Month"] = ( 233 | df.reset_index().apply(lambda x: int(x["DATE_MD"][:2]), axis=1).values 234 | ) 235 | df_out["tmean_mean"] = ( 236 | df[["Month", "TMEAN"]] 237 | .groupby(df["Month"]) 238 | .mean(numeric_only=NUMERIC_ONLY) 239 | .TMEAN 240 | ) 241 | df_out["tmean_std"] = ( 242 | df[["Month", "TMEAN"]] 243 | .groupby(df["Month"]) 244 | .std(numeric_only=NUMERIC_ONLY) 245 | .TMEAN 246 | ) 247 | df_out["tmax_max"] = ( 248 | df[["Month", "TMAX"]] 249 | .groupby(df["Month"]) 250 | .max(numeric_only=NUMERIC_ONLY) 251 | .TMAX 252 | ) 253 | df_out["tmax_std"] = ( 254 | df[["Month", "TMAX"]] 255 | .groupby(df["Month"]) 256 | .std(numeric_only=NUMERIC_ONLY) 257 | .TMAX 258 | ) 259 | df_out["tmin_min"] = ( 260 | df[["Month", "TMIN"]] 261 | .groupby(df["Month"]) 262 | .min(numeric_only=NUMERIC_ONLY) 263 | .TMIN 264 | ) 265 | df_out["tmin_std"] = ( 266 | df[["Month", "TMIN"]] 267 | .groupby(df["Month"]) 268 | .std(numeric_only=NUMERIC_ONLY) 269 | .TMIN 270 | ) 271 | if "SNOW" in df.columns: 272 | df_out["snow_mean"] = ( 273 | df[["Month", "SNOW"]] 274 | .groupby(df["Month"]) 275 | .mean(numeric_only=NUMERIC_ONLY) 276 | .SNOW 277 | ) 278 | unique_years = len( 279 | np.unique(df.apply(lambda x: parse_dates_YM(x["DATE_YM"]).year, axis=1)) 280 | ) 281 | df_out["prcp_mean"] = ( 282 | df[["Month", "PRCP"]] 283 | .groupby(df["Month"]) 284 | .mean(numeric_only=NUMERIC_ONLY) 285 | .PRCP 286 | * unique_years 287 | ) 288 | return df_out.reset_index(drop=False) 289 | 290 | 291 | class NOAAPlotterDailyClimateDataset(object): 292 | # TODO: make main class sub subclasses for daily/monthly 293 | def __init__( 294 | self, 295 | daily_dataset, 296 | start="1981-01-01", 297 | end="2010-12-31", 298 | filtersize=7, 299 | impute_feb29=True, 300 | ): 301 | """ 302 | :param start: 303 | :param end: 304 | :param filtersize: 305 | :param impute_feb29: 306 | """ 307 | self.start = parse_dates(start) 308 | self.end = parse_dates(end) 309 | self.filtersize = filtersize 310 | self.impute_feb29 = impute_feb29 311 | self.daily_dataset = daily_dataset 312 | self.data_daily = None 313 | self.data = None 314 | self.date_range_valid = False 315 | 316 | # validate date range 317 | self._validate_date_range() 318 | # filter daily to date range 319 | self._filter_to_climate() 320 | # calculate daily statistics 321 | self._calculate_climate_statistics() 322 | # mean imputation for 29 February 323 | self._impute_feb29() 324 | # filter if desired 325 | start_time = time.time() 326 | self._run_filter() 327 | # self._run_filter_polars() 328 | end_time = time.time() 329 | print(f"_run_filter took {end_time - start_time:.2f} seconds to run.") 330 | # make completeness report 331 | 332 | def _validate_date_range(self): 333 | if self.daily_dataset.data["DATE"].max() >= self.end: 334 | if self.daily_dataset.data["DATE"].min() <= self.end: 335 | self.date_range_valid = True 336 | else: 337 | raise ("Dataset is insufficient to calculate climate normals!") 338 | 339 | def _filter_to_climate(self): 340 | """ 341 | calculate climate dataset 342 | :return: 343 | """ 344 | df_clim = self.daily_dataset.data[ 345 | (self.daily_dataset.data["DATE"] >= self.start) 346 | & (self.daily_dataset.data["DATE"] <= self.end) 347 | ] 348 | df_clim = df_clim[(df_clim["DATE_MD"] != "02-29")] 349 | self.data_daily = df_clim 350 | 351 | def _calculate_climate_statistics(self): 352 | """ 353 | Function to calculate major statistics 354 | :param self.data_daily: 355 | :type self.data_daily: pandas.DataFrame 356 | :return: 357 | """ 358 | df_out = pd.DataFrame() 359 | df_out["tmean_doy_mean"] = ( 360 | self.data_daily[["DATE", "TMEAN"]] 361 | .groupby(self.data_daily["DATE_MD"]) 362 | .mean(numeric_only=NUMERIC_ONLY) 363 | .TMEAN 364 | ) 365 | df_out["tmean_doy_std"] = ( 366 | self.data_daily[["DATE", "TMEAN"]] 367 | .groupby(self.data_daily["DATE_MD"]) 368 | .std() 369 | .TMEAN 370 | ) 371 | df_out["tmean_doy_max"] = ( 372 | self.data_daily[["DATE", "TMEAN"]] 373 | .groupby(self.data_daily["DATE_MD"]) 374 | .max(numeric_only=NUMERIC_ONLY) 375 | .TMEAN 376 | ) 377 | df_out["tmean_doy_min"] = ( 378 | self.data_daily[["DATE", "TMEAN"]] 379 | .groupby(self.data_daily["DATE_MD"]) 380 | .min(numeric_only=NUMERIC_ONLY) 381 | .TMEAN 382 | ) 383 | df_out["tmax_doy_max"] = ( 384 | self.data_daily[["DATE", "TMAX"]] 385 | .groupby(self.data_daily["DATE_MD"]) 386 | .max(numeric_only=NUMERIC_ONLY) 387 | .TMAX 388 | ) 389 | df_out["tmax_doy_std"] = ( 390 | self.data_daily[["DATE", "TMAX"]] 391 | .groupby(self.data_daily["DATE_MD"]) 392 | .std() 393 | .TMAX 394 | ) 395 | df_out["tmin_doy_min"] = ( 396 | self.data_daily[["DATE", "TMIN"]] 397 | .groupby(self.data_daily["DATE_MD"]) 398 | .min(numeric_only=NUMERIC_ONLY) 399 | .TMIN 400 | ) 401 | df_out["tmin_doy_std"] = ( 402 | self.data_daily[["DATE", "TMIN"]] 403 | .groupby(self.data_daily["DATE_MD"]) 404 | .std() 405 | .TMIN 406 | ) 407 | if "SNOW" in self.data_daily.columns: 408 | df_out["snow_doy_mean"] = ( 409 | self.data_daily[["DATE", "SNOW"]] 410 | .groupby(self.data_daily["DATE_MD"]) 411 | .mean(numeric_only=NUMERIC_ONLY) 412 | .SNOW 413 | ) 414 | self.data = df_out 415 | 416 | def _impute_feb29(self): 417 | """ 418 | Function for mean imputation of February 29. 419 | :return: 420 | """ 421 | if self.impute_feb29: 422 | self.data.loc["02-29"] = self.data.loc["02-28":"03-01"].mean(axis=0) 423 | self.data.sort_index(inplace=True) 424 | 425 | def _run_filter(self): 426 | """ 427 | Function to run rolling mean filter on climate series to smooth out short fluctuations 428 | :return: 429 | """ 430 | if self.filtersize % 2 != 0: 431 | data_roll = ( 432 | pd.concat( 433 | [ 434 | self.data.iloc[-self.filtersize :], 435 | self.data, 436 | self.data[: self.filtersize], 437 | ] 438 | ) 439 | .rolling(self.filtersize) 440 | .mean() 441 | ) 442 | self.data = data_roll[self.filtersize : -self.filtersize] 443 | 444 | # TODO: produces different reults than pandas version 445 | def _run_filter_polars(self): 446 | """ 447 | Function to run rolling mean filter on climate series to smooth out short fluctuations using Polars 448 | """ 449 | if self.filtersize % 2 != 0: 450 | # Convert pandas DataFrame to Polars DataFrame 451 | idx = self.data.index 452 | df = pl.from_pandas(self.data) 453 | 454 | # Prepare data for rolling operation 455 | extended_df = pl.concat( 456 | [df.tail(self.filtersize), df, df.head(self.filtersize)] 457 | ) 458 | 459 | # Apply rolling mean 460 | rolled = extended_df.select( 461 | [pl.all().rolling_mean(window_size=self.filtersize)] 462 | ) 463 | 464 | # Slice the result to match original data size 465 | result = rolled.slice(self.filtersize, len(df)).to_pandas() 466 | result.index = idx 467 | 468 | # Convert back to pandas DataFrame and update self.data 469 | self.data = result 470 | else: 471 | raise ValueError("Filter size must be odd") 472 | 473 | def _make_report(self): 474 | """ 475 | Function to create report on climate data completeness 476 | :return: 477 | """ 478 | # input climate series (e.g. 1981-01-01 - 2010-12-31) 479 | pass 480 | 481 | 482 | class NOAAPlotterMonthlyClimateDataset(object): 483 | def __init__( 484 | self, daily_dataset, start="1981-01-01", end="2010-12-31", impute_feb29=True 485 | ): 486 | self.daily_dataset = daily_dataset 487 | self.monthly_aggregate = None 488 | self.start = parse_dates(start) 489 | self.end = parse_dates(end) 490 | self.impute_feb29 = impute_feb29 491 | self._validate_date_range() 492 | 493 | def _validate_date_range(self): 494 | if self.daily_dataset.data["DATE"].max() >= self.end: 495 | if self.daily_dataset.data["DATE"].min() <= self.end: 496 | self.date_range_valid = True 497 | else: 498 | raise ("Dataset is insufficient to calculate climate normals!") 499 | 500 | def _filter_to_climate(self): 501 | """ 502 | calculate climate dataset 503 | :return: 504 | """ 505 | df_clim = self.daily_dataset.data[ 506 | (self.daily_dataset.data["DATE"] >= self.start) 507 | & (self.daily_dataset.data["DATE"] <= self.end) 508 | ] 509 | df_clim = df_clim[(df_clim["DATE_MD"] != "02-29")] 510 | self.data_daily = df_clim 511 | 512 | def filter_to_date(self): 513 | """ 514 | calculate climate dataset 515 | :return: 516 | """ 517 | df_clim = self.daily_dataset.data[ 518 | (self.daily_dataset.data["DATE"] >= self.start) 519 | & (self.daily_dataset.data["DATE"] <= self.end) 520 | ] 521 | df_clim = df_clim[(df_clim["DATE_MD"] != "02-29")] 522 | return df_clim 523 | 524 | def _impute_feb29(self): 525 | """ 526 | Function for mean imputation of February 29. 527 | :return: 528 | """ 529 | pass 530 | 531 | def calculate_monthly_statistics(self): 532 | """ 533 | Function to calculate monthly statistics. 534 | :return: 535 | """ 536 | 537 | df_out = pd.DataFrame() 538 | data_filtered = self.filter_to_date() 539 | df_out["tmean_doy_mean"] = ( 540 | data_filtered[["TMEAN"]] 541 | .groupby(data_filtered["DATE_YM"]) 542 | .agg(lambda x: x.mean() if x.notna().any() else np.nan) 543 | .TMEAN 544 | ) 545 | df_out["tmean_doy_std"] = ( 546 | data_filtered[["TMEAN"]] 547 | .groupby(data_filtered["DATE_YM"]) 548 | .agg(lambda x: x.std() if x.notna().any() else np.nan) 549 | .TMEAN 550 | ) 551 | df_out["tmax_doy_max"] = ( 552 | data_filtered[["TMAX"]] 553 | .groupby(data_filtered["DATE_YM"]) 554 | .agg(lambda x: x.max() if x.notna().any() else np.nan) 555 | .TMAX 556 | ) 557 | df_out["tmax_doy_std"] = ( 558 | data_filtered[["TMAX"]] 559 | .groupby(data_filtered["DATE_YM"]) 560 | .agg(lambda x: x.std() if x.notna().any() else np.nan) 561 | .TMAX 562 | ) 563 | df_out["tmin_doy_min"] = ( 564 | data_filtered[["TMIN"]] 565 | .groupby(data_filtered["DATE_YM"]) 566 | .agg(lambda x: x.min() if x.notna().any() else np.nan) 567 | .TMIN 568 | ) 569 | df_out["tmin_doy_std"] = ( 570 | data_filtered[["TMIN"]] 571 | .groupby(data_filtered["DATE_YM"]) 572 | .agg(lambda x: x.std() if x.notna().any() else np.nan) 573 | .TMIN 574 | ) 575 | if "SNOW" in data_filtered.columns: 576 | df_out["snow_doy_mean"] = ( 577 | data_filtered[["SNOW"]] 578 | .groupby(data_filtered["DATE_YM"]) 579 | .agg(lambda x: x.mean() if x.notna().any() else np.nan) 580 | .SNOW 581 | ) 582 | df_out["prcp_sum"] = ( 583 | data_filtered[["PRCP"]] 584 | .groupby(data_filtered["DATE_YM"]) 585 | .agg(lambda x: x.sum() if x.notna().any() else np.nan) 586 | .PRCP 587 | ) 588 | self.monthly_aggregate = df_out 589 | 590 | def calculate_monthly_climate(self): 591 | """ 592 | Function to calculate monthly climate statistics. 593 | :return: 594 | """ 595 | df_out = pd.DataFrame() 596 | data_filtered = self.filter_to_date() 597 | 598 | data_filtered["DATE"] = data_filtered.apply( 599 | lambda x: parse_dates_YM(x["DATE_YM"]), axis=1 600 | ) 601 | data_filtered["Month"] = data_filtered.apply( 602 | lambda x: parse_dates_YM(x["DATE_YM"]).month, axis=1 603 | ) 604 | data_filtered["Year"] = data_filtered.apply( 605 | lambda x: parse_dates_YM(x["DATE_YM"]).year, axis=1 606 | ) 607 | 608 | df_out["tmean_doy_mean"] = ( 609 | data_filtered[["DATE", "TMEAN"]] 610 | .groupby(data_filtered["Month"]) 611 | .mean(numeric_only=NUMERIC_ONLY) 612 | .TMEAN 613 | ) 614 | df_out["tmean_doy_std"] = ( 615 | data_filtered[["DATE", "TMEAN"]] 616 | .groupby(data_filtered["Month"]) 617 | .std(numeric_only=NUMERIC_ONLY) 618 | .TMEAN 619 | ) 620 | df_out["tmax_doy_max"] = ( 621 | data_filtered[["DATE", "TMAX"]] 622 | .groupby(data_filtered["Month"]) 623 | .max(numeric_only=NUMERIC_ONLY) 624 | .TMAX 625 | ) 626 | df_out["tmax_doy_std"] = ( 627 | data_filtered[["DATE", "TMAX"]] 628 | .groupby(data_filtered["Month"]) 629 | .std(numeric_only=NUMERIC_ONLY) 630 | .TMAX 631 | ) 632 | df_out["tmin_doy_min"] = ( 633 | data_filtered[["DATE", "TMIN"]] 634 | .groupby(data_filtered["Month"]) 635 | .min(numeric_only=NUMERIC_ONLY) 636 | .TMIN 637 | ) 638 | df_out["tmin_doy_std"] = ( 639 | data_filtered[["DATE", "TMIN"]] 640 | .groupby(data_filtered["Month"]) 641 | .std(numeric_only=NUMERIC_ONLY) 642 | .TMIN 643 | ) 644 | if "SNOW" in data_filtered.columns: 645 | df_out["snow_doy_mean"] = ( 646 | data_filtered[["DATE", "SNOW"]] 647 | .groupby(data_filtered["Month"]) 648 | .mean(numeric_only=NUMERIC_ONLY) 649 | .SNOW 650 | ) 651 | df_out["prcp_sum"] = ( 652 | data_filtered[["DATE", "PRCP"]] 653 | .groupby(data_filtered["Month"]) 654 | .mean(numeric_only=NUMERIC_ONLY) 655 | .PRCP 656 | * 30 657 | ) 658 | # df_out = df_out.set_index('DATE_YM', drop=False) 659 | self.monthly_climate = df_out 660 | 661 | def _make_report(self): 662 | """ 663 | Function to create report on climate data completeness 664 | :return: 665 | """ 666 | # input climate series (e.g. 1981-01-01 - 2010-12-31) 667 | 668 | pass 669 | -------------------------------------------------------------------------------- /noaaplotter/utils/download_utils.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | import json 3 | import os 4 | from datetime import datetime, timedelta 5 | 6 | import ee 7 | import geemap 8 | import numpy as np 9 | import pandas as pd 10 | import polars as pl 11 | import requests 12 | import tqdm 13 | from joblib import Parallel, delayed 14 | 15 | from noaaplotter.utils.utils import assign_numeric_datatypes 16 | 17 | 18 | # move some logic outside 19 | def download_from_noaa( 20 | output_file, 21 | start_date, 22 | end_date, 23 | datatypes, 24 | loc_name, 25 | station_id, 26 | noaa_api_token, 27 | n_jobs=4, 28 | ): 29 | # Check if file exists and load it 30 | if os.path.exists(output_file): 31 | existing_df = pl.read_parquet(output_file).drop_nulls(subset='STATION') 32 | existing_dates = set(existing_df['DATE'].to_list()) 33 | else: 34 | existing_df = None 35 | existing_dates = set() 36 | 37 | # Convert datestrings to datetime 38 | dt_start = datetime.strptime(start_date, "%Y-%m-%d") 39 | dt_end = datetime.strptime(end_date, "%Y-%m-%d") 40 | 41 | # Calculate date range 42 | all_dates = set(pd.date_range(start=dt_start, end=dt_end).strftime("%Y-%m-%d")) 43 | missing_dates = sorted(list(all_dates - existing_dates)) 44 | 45 | if not missing_dates: 46 | print("No new data to download.") 47 | return 0 48 | 49 | # Find contiguous date ranges to download 50 | date_ranges = [] 51 | range_start = missing_dates[0] 52 | prev_date = datetime.strptime(missing_dates[0], "%Y-%m-%d") 53 | 54 | for date_str in missing_dates[1:] + [None]: # Add None to handle the last range 55 | if date_str is None or datetime.strptime(date_str, "%Y-%m-%d") - prev_date > timedelta(days=1): 56 | date_ranges.append((range_start, prev_date.strftime("%Y-%m-%d"))) 57 | if date_str is not None: 58 | range_start = date_str 59 | prev_date = datetime.strptime(date_str, "%Y-%m-%d") if date_str else None 60 | 61 | # Data Loading 62 | print("Downloading missing data through NOAA API") 63 | all_new_data = [] 64 | 65 | for start, end in date_ranges: 66 | print(f"Downloading data from {start} to {end}") 67 | n_days = (datetime.strptime(end, "%Y-%m-%d") - datetime.strptime(start, "%Y-%m-%d")).days + 1 68 | split_size = np.floor(1000 / len(datatypes)) 69 | split_range = np.arange(0, n_days, split_size) 70 | 71 | datasets_list = Parallel(n_jobs=n_jobs)( 72 | delayed(dl_noaa_api)( 73 | i, datatypes, station_id, noaa_api_token, start, end, split_size 74 | ) 75 | for i in tqdm.tqdm(split_range[:]) 76 | ) 77 | 78 | # Drop empty/None from datasets_list 79 | datasets_list = [i for i in datasets_list if i is not None] 80 | all_new_data.extend(datasets_list) 81 | 82 | # Merge subsets and create DataFrame 83 | df = pd.concat(all_new_data) 84 | 85 | df_pivot = assign_numeric_datatypes(df) 86 | df_pivot["DATE"] = df_pivot.apply( 87 | lambda x: datetime.fromisoformat(x["DATE"]).strftime("%Y-%m-%d"), axis=1 88 | ) 89 | 90 | df_pivot = df_pivot.reset_index(drop=False) 91 | dr = pd.DataFrame(pd.date_range(start=start_date, end=end_date), columns=["DATE"]) 92 | dr["DATE"] = dr["DATE"].astype(str) 93 | df_merged = pd.concat( 94 | [df_pivot.set_index("DATE"), dr.set_index("DATE")], 95 | join="outer", 96 | axis=1, 97 | sort=True, 98 | ) 99 | df_merged["DATE"] = df_merged.index 100 | df_merged["NAME"] = loc_name 101 | if "TAVG" not in df_merged.columns: 102 | df_merged["TAVG"] = None 103 | if "SNWD" not in df_merged.columns: 104 | df_merged["SNWD"] = None 105 | final_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] 106 | df_final = df_merged[final_cols] 107 | df_final = df_final.replace({np.nan: None}) 108 | 109 | # Merge with existing data if it exists 110 | if existing_df is not None: 111 | df_final = pd.concat([existing_df.to_pandas(), df_final]).drop_duplicates(subset=["DATE"], keep="last") 112 | 113 | print(f"Saving data to {output_file}") 114 | df_final.to_parquet(output_file) 115 | return 0 116 | 117 | 118 | 119 | def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size): 120 | """ 121 | function to download from NOAA API 122 | """ 123 | dt_start = dt.datetime.strptime(date_start, "%Y-%m-%d") 124 | dt_end = dt.datetime.strptime(date_end, "%Y-%m-%d") 125 | 126 | split_start = dt_start + timedelta(days=i) 127 | split_end = dt_start + timedelta(days=i + split_size - 1) 128 | if split_end > dt_end: 129 | split_end = dt_end 130 | 131 | date_start_split = split_start.strftime("%Y-%m-%d") 132 | date_end_split = split_end.strftime("%Y-%m-%d") 133 | 134 | # make the api call 135 | request_url = "https://www.ncei.noaa.gov/access/services/data/v1" 136 | request_params = dict( 137 | dataset="daily-summaries", 138 | dataTypes=dtypes, # ['PRCP', 'TMIN', 'TMAX'], 139 | stations=station_id, 140 | limit=1000, 141 | startDate=date_start_split, 142 | endDate=date_end_split, 143 | units="metric", 144 | format="json", 145 | ) 146 | r = requests.get(request_url, params=request_params, headers={"token": Token}) 147 | 148 | # workaround to skip empty returns (no data within period) 149 | try: 150 | # load the api response as a json 151 | d = json.loads(r.text) 152 | result = pd.DataFrame(d) 153 | except json.JSONDecodeError: 154 | print( 155 | f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping." 156 | ) 157 | result = None 158 | return result 159 | 160 | 161 | def download_era5_from_gee(latitude, longitude, end_date, start_date, output_file): 162 | ee.Initialize() 163 | EE_LAYER = "ECMWF/ERA5/DAILY" 164 | location = ee.Geometry.Point([longitude, latitude]) 165 | # load ImageCollection 166 | col = ( 167 | ee.ImageCollection(EE_LAYER) 168 | .filterBounds(location) 169 | .filterDate(start_date, end_date) 170 | ) 171 | # Download data 172 | print("Start downloading daily ERA5 data.") 173 | print( 174 | "Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min" 175 | ) 176 | result = geemap.extract_pixel_values(col, region=location) 177 | out_dict = result.getInfo() 178 | df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T 179 | # parse dates and values 180 | df_gee["time"] = df_gee[0].apply(lambda x: f"{x[:4]}-{x[4:6]}-{x[6:8]}") 181 | df_gee["feature"] = df_gee[0].apply(lambda x: x[9:]) 182 | df_gee["value"] = df_gee[1] 183 | df = df_gee.pivot_table( 184 | values="value", columns=["feature"], index="time" 185 | ) # .reset_index(drop=False) 186 | # #### recalculate values 187 | df_new = pd.DataFrame(index=df.index) 188 | temperature_cols = [ 189 | "mean_2m_air_temperature", 190 | "minimum_2m_air_temperature", 191 | "maximum_2m_air_temperature", 192 | "dewpoint_2m_temperature", 193 | ] 194 | precipitation_cols = ["total_precipitation"] 195 | df_joined = ( 196 | df_new.join(df[temperature_cols] - 273.15) 197 | .join(df[precipitation_cols] * 1e3) 198 | .reset_index(drop=False) 199 | ) 200 | # Create Output 201 | rename_dict = { 202 | "time": "DATE", 203 | "total_precipitation": "PRCP", 204 | "mean_2m_air_temperature": "TAVG", 205 | "maximum_2m_air_temperature": "TMAX", 206 | "minimum_2m_air_temperature": "TMIN", 207 | } 208 | df_renamed = df_joined.rename(columns=rename_dict) 209 | df_renamed["NAME"] = "" 210 | df_renamed["STATION"] = "" 211 | df_renamed["SNWD"] = "" 212 | output_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"] 213 | df_save = df_renamed[output_cols].astype(str) 214 | df_save.to_csv(output_file, index=False) 215 | -------------------------------------------------------------------------------- /noaaplotter/utils/plot_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ######################## 5 | # Credits here 6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research 7 | # contact: ingmar.nitze@awi.de 8 | # version: 2021-09-11 9 | 10 | ######################## 11 | 12 | # TODO: move to external file 13 | def setup_monthly_plot_props(information, anomaly): 14 | plot_kwargs = {} 15 | if information.lower() == 'temperature': 16 | plot_kwargs['cmap'] = 'RdBu_r' 17 | plot_kwargs['fc_low'] = '#4393c3' 18 | plot_kwargs['fc_high'] = '#d6604d' 19 | if anomaly: 20 | plot_kwargs['value_column'] = 'tmean_diff' 21 | plot_kwargs['y_label'] = 'Temperature departure [°C]' 22 | plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)' 23 | plot_kwargs['legend_label_above'] = 'Above average' 24 | plot_kwargs['legend_label_below'] = 'Below average' 25 | else: 26 | plot_kwargs['value_column'] = 'tmean_doy_mean' 27 | plot_kwargs['y_label'] = 'Temperature [°C]' 28 | plot_kwargs['title'] = 'Monthly Mean Temperature' 29 | plot_kwargs['legend_label_above'] = 'Above freezing' 30 | plot_kwargs['legend_label_below'] = 'Below freezing' 31 | 32 | elif information.lower() == 'precipitation': 33 | plot_kwargs['fc_low'] = '#d6604d' 34 | plot_kwargs['fc_high'] = '#4393c3' 35 | if anomaly: 36 | plot_kwargs['cmap'] = 'RdBu' 37 | plot_kwargs['value_column'] = 'prcp_diff' 38 | plot_kwargs['y_label'] = 'Precipitation departure [mm]' 39 | plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)' 40 | plot_kwargs['legend_label_above'] = 'Above average' 41 | plot_kwargs['legend_label_below'] = 'Below average' 42 | else: 43 | plot_kwargs['cmap'] = 'Blues' 44 | plot_kwargs['value_column'] = 'prcp_sum' 45 | plot_kwargs['y_label'] = 'Precipitation [mm]' 46 | plot_kwargs['title'] = 'Monthly Precipitation' 47 | plot_kwargs['legend_label_below'] = '' 48 | plot_kwargs['legend_label_above'] = 'Monthly Precipitation' 49 | return plot_kwargs -------------------------------------------------------------------------------- /noaaplotter/utils/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ######################## 5 | # Credits here 6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research 7 | # contact: ingmar.nitze@awi.de 8 | # version: 2020-12-09 9 | 10 | ######################## 11 | import datetime as dt 12 | from datetime import timedelta 13 | import requests, json 14 | import pandas as pd 15 | 16 | 17 | #import datetime 18 | 19 | 20 | def parse_dates(date): 21 | """ 22 | 23 | :param date: 24 | :return: 25 | """ 26 | if isinstance(date, str): 27 | return dt.datetime.strptime(date, '%Y-%m-%d') 28 | elif isinstance(date, dt.datetime) or isinstance(date, dt.date): 29 | return date 30 | else: 31 | raise ('Wrong date format. Either use native datetime format or "YYYY-mm-dd"') 32 | 33 | 34 | def calc_trailing_mean(df, length, feature, new_feature): 35 | """ 36 | :param df: 37 | :param length: 38 | :param feature: 39 | :param new_feature: 40 | :return: 41 | 42 | """ 43 | df[new_feature] = df[feature].rolling(length).mean() 44 | return df 45 | 46 | 47 | def parse_dates_YM(date): 48 | """ 49 | :param date: 50 | :return: 51 | """ 52 | if isinstance(date, str): 53 | return dt.datetime.strptime(date, '%Y-%m') 54 | elif isinstance(date, dt.datetime): 55 | return date 56 | else: 57 | raise('Wrong date format. Either use native datetime format or "YYYY-mm-dd"') 58 | 59 | 60 | def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size): 61 | """ 62 | function to download from NOAA API 63 | """ 64 | dt_start = dt.datetime.strptime(date_start, '%Y-%m-%d') 65 | dt_end = dt.datetime.strptime(date_end, '%Y-%m-%d') 66 | 67 | split_start = dt_start + timedelta(days=i) 68 | split_end = dt_start + timedelta(days=i + split_size - 1) 69 | if split_end > dt_end: 70 | split_end = dt_end 71 | 72 | date_start_split = split_start.strftime('%Y-%m-%d') 73 | date_end_split = split_end.strftime('%Y-%m-%d') 74 | 75 | # make the api call 76 | request_url = 'https://www.ncei.noaa.gov/access/services/data/v1' 77 | request_params = dict( 78 | dataset = 'daily-summaries', 79 | dataTypes = dtypes,#['PRCP', 'TMIN', 'TMAX'], 80 | stations = station_id, 81 | limit = 1000, 82 | startDate = date_start_split, 83 | endDate= date_end_split, 84 | units='metric', 85 | format='json' 86 | ) 87 | r = requests.get( 88 | request_url, 89 | params=request_params, 90 | headers={'token': Token}) 91 | 92 | # workaround to skip empty returns (no data within period) 93 | try: 94 | # load the api response as a json 95 | d = json.loads(r.text) 96 | result = pd.DataFrame(d) 97 | except json.JSONDecodeError: 98 | print(f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping.") 99 | result = None 100 | return result 101 | 102 | 103 | def assign_numeric_datatypes(df): 104 | for col in df.columns: 105 | if df[col].dtype == 'object': 106 | try: 107 | df[col] = pd.to_numeric(df[col]) 108 | except: 109 | pass 110 | return df -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>0.70", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "noaaplotter" 7 | version = "0.5.4" 8 | description = "Package to plot fancy climate/weather data of NOAA" 9 | requires-python = ">=3.11" 10 | authors = [ 11 | { name = "Ingmar Nitze", email = "ingmar.nitze@awi.de" } 12 | ] 13 | license = { text = "" } 14 | readme = "README.md" # Specify a README file if available 15 | # homepage = "https://github.com/initze/noaaplotter" 16 | keywords = ["climate", "weather", "NOAA", "plotting"] 17 | 18 | dependencies = [ 19 | "pandas>=2.2", 20 | "numpy>=2,<3", 21 | "matplotlib>=3.9", 22 | "requests", 23 | "joblib>=1.4", 24 | "tqdm>=4.67", 25 | "geemap>=0.35.1", 26 | "polars>=1.18.0", 27 | "pyarrow>=18.1.0", 28 | "box>=0.1.5", 29 | "setuptools>=75.6.0", 30 | "narwhals>=1.20.1", 31 | ] 32 | 33 | [tool.setuptools.packages.find] 34 | include = ["noaaplotter*"] 35 | 36 | [project.scripts] 37 | plot_daily = "noaaplotter.scripts.plot_daily:main" # Adjust if necessary 38 | plot_monthly = "noaaplotter.scripts.plot_monthly:main" # Adjust if necessary 39 | download_data = "noaaplotter.scripts.download_data:main" # Adjust if necessary 40 | download_data_ERA5 = "noaaplotter.scripts.download_data_ERA5:main" # Adjust if necessary 41 | --------------------------------------------------------------------------------