├── .github
    └── workflows
    │   ├── codeql-analysis.yml
    │   └── python-package.yml
├── .gitignore
├── CHANGELOG.md
├── CITATION.cff
├── NOAA_tests.ipynb
├── README.md
├── data
    ├── 1696868.csv
    ├── Kotzebue.csv
    ├── weather_station_bismarck.csv
    ├── weather_station_kotzebue.csv
    ├── weather_station_orlando.csv
    └── weather_station_sanfrancisco.csv
├── examples
    ├── example_daily_series.py
    ├── example_daily_series_winter.py
    └── example_monthly_series.py
├── figures
    ├── daily_series_Kotzebue_1992.png
    ├── daily_series_Kotzebue_2017-2018_winter.png
    ├── monthly_series_precipitation_12mthsTrMn_Kotzebue.png
    ├── monthly_series_precipitation_12mthsTrMn_Kotzebue_anomaly.png
    ├── monthly_series_temperature_12mthsTrMn_Kotzebue.png
    └── monthly_series_temperature_12mthsTrMn_Kotzebue_anomaly.png
├── noaaplotter
    ├── __init__.py
    ├── noaaplotter.py
    ├── scripts
    │   ├── __pycache__
    │   │   ├── download_data.cpython-310.pyc
    │   │   └── plot_daily.cpython-310.pyc
    │   ├── download_data.py
    │   ├── download_data_ERA5.py
    │   ├── download_data_SST.py
    │   ├── plot_daily.py
    │   └── plot_monthly.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── dataset.cpython-310.pyc
    │       ├── download_utils.cpython-310.pyc
    │       ├── plot_utils.cpython-310.pyc
    │       └── utils.cpython-310.pyc
    │   ├── dataset.py
    │   ├── download_utils.py
    │   ├── plot_utils.py
    │   └── utils.py
└── pyproject.toml


/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ master ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ master ]
20 |   schedule:
21 |     - cron: '19 1 * * 0'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 | 
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         language: [ 'python' ]
32 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
33 |         # Learn more:
34 |         # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
35 | 
36 |     steps:
37 |     - name: Checkout repository
38 |       uses: actions/checkout@v2
39 | 
40 |     # Initializes the CodeQL tools for scanning.
41 |     - name: Initialize CodeQL
42 |       uses: github/codeql-action/init@v1
43 |       with:
44 |         languages: ${{ matrix.language }}
45 |         # If you wish to specify custom queries, you can do so here or in a config file.
46 |         # By default, queries listed here will override any specified in a config file.
47 |         # Prefix the list here with "+" to use these queries and those in the config file.
48 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
49 | 
50 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
51 |     # If this step fails, then you should remove it and run the build manually (see below)
52 |     - name: Autobuild
53 |       uses: github/codeql-action/autobuild@v1
54 | 
55 |     # ℹ️ Command-line programs to run using the OS shell.
56 |     # 📚 https://git.io/JvXDl
57 | 
58 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
59 |     #    and modify them (or add more) to build your code if your project
60 |     #    uses a compiled language
61 | 
62 |     #- run: |
63 |     #   make bootstrap
64 |     #   make release
65 | 
66 |     - name: Perform CodeQL Analysis
67 |       uses: github/codeql-action/analyze@v1
68 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "master" ]
 9 |   pull_request:
10 |     branches: [ "master" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.8", "3.9", "3.10"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 pytest
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |     - name: Lint with flake8
33 |       run: |
34 |         # stop the build if there are Python syntax errors or undefined names
35 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     
39 |     #- name: Test with pytest
40 |     #  run: |
41 |     #    pytest
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | noaaplotter/__pycache__
3 | Session.log
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # [0.5.4] - 2025-01-05
 2 | ### Changed
 3 | * fixed streamlit crash
 4 | * added toml for install
 5 | * fixed accounted for nan in monthly aggregates
 6 | 
 7 | # [0.5.1] - 2023-02-18
 8 | ### Changed
 9 | * created download_utils
10 | * some code restructuring for noaaplotter_streamlit support (https://github.com/initze/noaaplotter_streamlit)
11 | 
12 | # [0.5.0] - 2023-02-03
13 | ### Changed
14 | * fixed NOAA APIv2 bug for losing January February data
15 | * some code fixes and cleanup
16 | 
17 | # [0.4.1] - 2023-01-16
18 | ### Added
19 | * basic support for sst
20 | 
21 | # [0.4.0] - 2022-11-30
22 | ### Added
23 | ### Changed
24 | * moved scripts to subdir and automatic package install 
25 | 
26 | # [0.3.0] - 2022-06-30
27 | ### Added
28 | * Automated ERA5 download script through Google Earthengine
29 | ### Changed
30 | * code cleanup and minor changes
31 | 
32 | # [0.2.0] - 2021-09-19
33 | ### Added
34 | * Automated NOAA API download script
35 | * No Data visual for daily data plot
36 | ### Changed
37 | * moved legend out of plot for daily plots
38 | * some code cleanup
39 | *  minor bugfixes
40 | 
41 | ## [0.1.8] - 2020-12-14
42 | ### Changed
43 | - fixed truncated rolling mean at the beginning of monthly plots
44 | - fixed crash bug for end dates after data avalability
45 | 
46 | ## [0.1.7] - 2020-12-09
47 | ### Changed
48 | - fixed crash of plot_monthly
49 | - simplification of environement.yml
50 | - minor style fixes
51 | 
52 | ### no changelog yet for previous versions
53 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Nitze"
 5 |   given-names: "Ingmar"
 6 |   orcid: "https://orcid.org/0000-0002-1165-6852"
 7 | title: "noaaplotter"
 8 | version: 0.5.1
 9 | doi: 10.5281/zenodo.7753462
10 | date-released: 2023-03-20
11 | url: "https://github.com/initze/noaaplotter"
12 | 


--------------------------------------------------------------------------------
/NOAA_tests.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Tests for downloading data from NOAA "
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### TODO\n",
 15 |     "* autoloop over dates"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 93,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "# Imports\n",
 25 |     "#needed to make web requests\n",
 26 |     "import requests\n",
 27 |     "#store the data we get as a dataframe\n",
 28 |     "import pandas as pd\n",
 29 |     "#convert the response as a strcuctured json\n",
 30 |     "import json\n",
 31 |     "#mathematical operations on lists\n",
 32 |     "import numpy as np\n",
 33 |     "#parse the datetimes we get from NOAA\n",
 34 |     "from datetime import datetime, timedelta\n",
 35 |     "\n",
 36 |     "from joblib import delayed, Parallel\n",
 37 |     "\n",
 38 |     "import csv\n",
 39 |     "import tqdm\n",
 40 |     "from noaaplotter.utils import dl_noaa_api"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 94,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "#add the access token you got from NOAA\n",
 50 |     "Token = 'LaVQzwUgOBQLBRwoTpOLyRbIKDTHAVVe'\n",
 51 |     "\n",
 52 |     "#Long Beach Airport station\n",
 53 |     "#station_id = 'GHCND:USW00026616' # Kotzebue\n",
 54 |     "station_id = 'GHCND:USW00027502' # Barrow\n",
 55 |     "#station_name = 'Kotzebue'\n",
 56 |     "station_name = 'Barrow'\n",
 57 |     "datatypes = ['TMIN', 'TMAX', 'PRCP', 'SNOW']\n",
 58 |     "date_start = '1971-01-01'\n",
 59 |     "date_end = '2021-12-31'"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 95,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "dtypes_string = '&'.join([f'datatypeid={dt}' for dt in datatypes])"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "#### Prepare requests"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 96,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# convert datestring to dt\n",
 85 |     "dt_start = datetime.strptime(date_start, '%Y-%m-%d')\n",
 86 |     "dt_end = datetime.strptime(date_end, '%Y-%m-%d')\n",
 87 |     "# calculate number of days\n",
 88 |     "n_days = (dt_end-dt_start).days\n",
 89 |     "# calculate nuber of splits to fit into 1000 lines/rows\n",
 90 |     "split_size = np.floor(1000 / len(datatypes))\n",
 91 |     "# calculate splits\n",
 92 |     "split_range = np.arange(0, n_days, split_size)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "### Run data loading"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "make joblib"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 97,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "name": "stderr",
116 |      "output_type": "stream",
117 |      "text": [
118 |       " 11%|████████▊                                                                          | 8/75 [00:02<00:23,  2.89it/s]"
119 |      ]
120 |     },
121 |     {
122 |      "ename": "AttributeError",
123 |      "evalue": "module 'datetime' has no attribute 'strptime'",
124 |      "output_type": "error",
125 |      "traceback": [
126 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
127 |       "\u001b[1;31m_RemoteTraceback\u001b[0m                          Traceback (most recent call last)",
128 |       "\u001b[1;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n  File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\externals\\loky\\process_executor.py\", line 431, in _process_worker\n    r = call_item()\n  File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\externals\\loky\\process_executor.py\", line 285, in __call__\n    return self.fn(*self.args, **self.kwargs)\n  File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\_parallel_backends.py\", line 595, in __call__\n    return self.func(*args, **kwargs)\n  File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\", line 263, in __call__\n    for func, args, kwargs in self.items]\n  File \"C:\\Users\\initze\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\", line 263, in <listcomp>\n    for func, args, kwargs in self.items]\n  File \"C:\\Users\\initze\\Documents\\Python_Scripts\\noaaplotter\\noaaplotter\\utils.py\", line 66, in dl_noaa_api\n    dt_start = dt.strptime(date_start, '%Y-%m-%d')\nAttributeError: module 'datetime' has no attribute 'strptime'\n\"\"\"",
129 |       "\nThe above exception was the direct cause of the following exception:\n",
130 |       "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
131 |       "\u001b[1;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n",
132 |       "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m   1052\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1053\u001b[0m             \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1054\u001b[1;33m                 \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1055\u001b[0m             \u001b[1;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1056\u001b[0m             \u001b[0melapsed_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
133 |       "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    931\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    932\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'supports_timeout'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 933\u001b[1;33m                     \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    934\u001b[0m                 \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    935\u001b[0m                     \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
134 |       "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\site-packages\\joblib\\_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[1;34m(future, timeout)\u001b[0m\n\u001b[0;32m    540\u001b[0m         AsyncResults.get from multiprocessing.\"\"\"\n\u001b[0;32m    541\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 542\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    543\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mCfTimeoutError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    544\u001b[0m             \u001b[1;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
135 |       "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\concurrent\\futures\\_base.py\u001b[0m in \u001b[0;36mresult\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m    433\u001b[0m                 \u001b[1;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    434\u001b[0m             \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mFINISHED\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 435\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    436\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    437\u001b[0m                 \u001b[1;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
136 |       "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\envs\\noaaplotter\\lib\\concurrent\\futures\\_base.py\u001b[0m in \u001b[0;36m__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    382\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__get_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    383\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 384\u001b[1;33m             \u001b[1;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    385\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    386\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_result\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
137 |       "\u001b[1;31mAttributeError\u001b[0m: module 'datetime' has no attribute 'strptime'"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "%time datasets_list = Parallel(n_jobs=4)(delayed(dl_noaa_api)(i, dtypes_string, station_id, Token, date_start, date_end, split_size) for i in tqdm.tqdm(split_range[:]))"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 98,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "df = pd.concat(datasets_list)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "#### Pivot table to correct form"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 99,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "df_pivot = df.pivot(index='date', columns='datatype', values='value')"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "#### adapt  factor "
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 100,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "df_pivot.loc[:, :] /= 10"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "#### Prepare data export\n",
191 |     "Option 1: load as object\n",
192 |     "Option 2: save to csv"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "#### Reformat columns to target "
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "\"STATION\",\"NAME\",\"DATE\",\"PRCP\",\"SNWD\",\"TAVG\",\"TMAX\",\"TMIN\""
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 101,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "df_pivot = df_pivot.reset_index(drop=False)\n",
216 |     "df_pivot['DATE'] = df_pivot.apply(lambda x: datetime.fromisoformat(x['date']).strftime('%Y-%m-%d'), axis=1)\n"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 102,
222 |    "metadata": {},
223 |    "outputs": [
224 |     {
225 |      "data": {
226 |       "text/html": [
227 |        "<div>\n",
228 |        "<style scoped>\n",
229 |        "    .dataframe tbody tr th:only-of-type {\n",
230 |        "        vertical-align: middle;\n",
231 |        "    }\n",
232 |        "\n",
233 |        "    .dataframe tbody tr th {\n",
234 |        "        vertical-align: top;\n",
235 |        "    }\n",
236 |        "\n",
237 |        "    .dataframe thead th {\n",
238 |        "        text-align: right;\n",
239 |        "    }\n",
240 |        "</style>\n",
241 |        "<table border=\"1\" class=\"dataframe\">\n",
242 |        "  <thead>\n",
243 |        "    <tr style=\"text-align: right;\">\n",
244 |        "      <th>datatype</th>\n",
245 |        "      <th>date</th>\n",
246 |        "      <th>PRCP</th>\n",
247 |        "      <th>SNOW</th>\n",
248 |        "      <th>TMAX</th>\n",
249 |        "      <th>TMIN</th>\n",
250 |        "      <th>DATE</th>\n",
251 |        "    </tr>\n",
252 |        "  </thead>\n",
253 |        "  <tbody>\n",
254 |        "    <tr>\n",
255 |        "      <th>0</th>\n",
256 |        "      <td>1971-01-01T00:00:00</td>\n",
257 |        "      <td>0.0</td>\n",
258 |        "      <td>0.0</td>\n",
259 |        "      <td>-32.8</td>\n",
260 |        "      <td>-37.2</td>\n",
261 |        "      <td>1971-01-01</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>1</th>\n",
265 |        "      <td>1971-01-02T00:00:00</td>\n",
266 |        "      <td>0.0</td>\n",
267 |        "      <td>0.0</td>\n",
268 |        "      <td>-30.0</td>\n",
269 |        "      <td>-41.1</td>\n",
270 |        "      <td>1971-01-02</td>\n",
271 |        "    </tr>\n",
272 |        "    <tr>\n",
273 |        "      <th>2</th>\n",
274 |        "      <td>1971-01-03T00:00:00</td>\n",
275 |        "      <td>1.5</td>\n",
276 |        "      <td>2.8</td>\n",
277 |        "      <td>-16.7</td>\n",
278 |        "      <td>-30.6</td>\n",
279 |        "      <td>1971-01-03</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>3</th>\n",
283 |        "      <td>1971-01-04T00:00:00</td>\n",
284 |        "      <td>0.3</td>\n",
285 |        "      <td>0.5</td>\n",
286 |        "      <td>-16.7</td>\n",
287 |        "      <td>-30.0</td>\n",
288 |        "      <td>1971-01-04</td>\n",
289 |        "    </tr>\n",
290 |        "    <tr>\n",
291 |        "      <th>4</th>\n",
292 |        "      <td>1971-01-05T00:00:00</td>\n",
293 |        "      <td>0.0</td>\n",
294 |        "      <td>0.0</td>\n",
295 |        "      <td>-23.3</td>\n",
296 |        "      <td>-28.9</td>\n",
297 |        "      <td>1971-01-05</td>\n",
298 |        "    </tr>\n",
299 |        "    <tr>\n",
300 |        "      <th>...</th>\n",
301 |        "      <td>...</td>\n",
302 |        "      <td>...</td>\n",
303 |        "      <td>...</td>\n",
304 |        "      <td>...</td>\n",
305 |        "      <td>...</td>\n",
306 |        "      <td>...</td>\n",
307 |        "    </tr>\n",
308 |        "    <tr>\n",
309 |        "      <th>18502</th>\n",
310 |        "      <td>2021-08-29T00:00:00</td>\n",
311 |        "      <td>17.0</td>\n",
312 |        "      <td>NaN</td>\n",
313 |        "      <td>9.4</td>\n",
314 |        "      <td>6.1</td>\n",
315 |        "      <td>2021-08-29</td>\n",
316 |        "    </tr>\n",
317 |        "    <tr>\n",
318 |        "      <th>18503</th>\n",
319 |        "      <td>2021-08-30T00:00:00</td>\n",
320 |        "      <td>0.3</td>\n",
321 |        "      <td>NaN</td>\n",
322 |        "      <td>8.9</td>\n",
323 |        "      <td>6.7</td>\n",
324 |        "      <td>2021-08-30</td>\n",
325 |        "    </tr>\n",
326 |        "    <tr>\n",
327 |        "      <th>18504</th>\n",
328 |        "      <td>2021-08-31T00:00:00</td>\n",
329 |        "      <td>0.5</td>\n",
330 |        "      <td>NaN</td>\n",
331 |        "      <td>11.1</td>\n",
332 |        "      <td>8.3</td>\n",
333 |        "      <td>2021-08-31</td>\n",
334 |        "    </tr>\n",
335 |        "    <tr>\n",
336 |        "      <th>18505</th>\n",
337 |        "      <td>2021-09-01T00:00:00</td>\n",
338 |        "      <td>2.3</td>\n",
339 |        "      <td>NaN</td>\n",
340 |        "      <td>13.3</td>\n",
341 |        "      <td>8.9</td>\n",
342 |        "      <td>2021-09-01</td>\n",
343 |        "    </tr>\n",
344 |        "    <tr>\n",
345 |        "      <th>18506</th>\n",
346 |        "      <td>2021-09-02T00:00:00</td>\n",
347 |        "      <td>0.0</td>\n",
348 |        "      <td>NaN</td>\n",
349 |        "      <td>18.9</td>\n",
350 |        "      <td>8.9</td>\n",
351 |        "      <td>2021-09-02</td>\n",
352 |        "    </tr>\n",
353 |        "  </tbody>\n",
354 |        "</table>\n",
355 |        "<p>18507 rows × 6 columns</p>\n",
356 |        "</div>"
357 |       ],
358 |       "text/plain": [
359 |        "datatype                 date  PRCP  SNOW  TMAX  TMIN        DATE\n",
360 |        "0         1971-01-01T00:00:00   0.0   0.0 -32.8 -37.2  1971-01-01\n",
361 |        "1         1971-01-02T00:00:00   0.0   0.0 -30.0 -41.1  1971-01-02\n",
362 |        "2         1971-01-03T00:00:00   1.5   2.8 -16.7 -30.6  1971-01-03\n",
363 |        "3         1971-01-04T00:00:00   0.3   0.5 -16.7 -30.0  1971-01-04\n",
364 |        "4         1971-01-05T00:00:00   0.0   0.0 -23.3 -28.9  1971-01-05\n",
365 |        "...                       ...   ...   ...   ...   ...         ...\n",
366 |        "18502     2021-08-29T00:00:00  17.0   NaN   9.4   6.1  2021-08-29\n",
367 |        "18503     2021-08-30T00:00:00   0.3   NaN   8.9   6.7  2021-08-30\n",
368 |        "18504     2021-08-31T00:00:00   0.5   NaN  11.1   8.3  2021-08-31\n",
369 |        "18505     2021-09-01T00:00:00   2.3   NaN  13.3   8.9  2021-09-01\n",
370 |        "18506     2021-09-02T00:00:00   0.0   NaN  18.9   8.9  2021-09-02\n",
371 |        "\n",
372 |        "[18507 rows x 6 columns]"
373 |       ]
374 |      },
375 |      "execution_count": 102,
376 |      "metadata": {},
377 |      "output_type": "execute_result"
378 |     }
379 |    ],
380 |    "source": [
381 |     "df_pivot"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": 103,
387 |    "metadata": {},
388 |    "outputs": [],
389 |    "source": [
390 |     "dr = pd.DataFrame(pd.date_range(start=date_start, end=date_end), columns=['DATE'])\n",
391 |     "dr['DATE'] = dr['DATE'].astype(str)\n",
392 |     "\n"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "execution_count": 104,
398 |    "metadata": {},
399 |    "outputs": [],
400 |    "source": [
401 |     "df_merged = pd.concat([df_pivot.set_index('DATE'), dr.set_index('DATE')], join='outer', axis=1).reset_index(drop=False)"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": 105,
407 |    "metadata": {},
408 |    "outputs": [],
409 |    "source": [
410 |     "df_merged['STATION'] = station_id\n",
411 |     "df_merged['NAME'] = station_name\n",
412 |     "\n",
413 |     "df_merged['TAVG'] = None\n",
414 |     "df_merged['SNWD'] = None"
415 |    ]
416 |   },
417 |   {
418 |    "cell_type": "code",
419 |    "execution_count": 106,
420 |    "metadata": {},
421 |    "outputs": [],
422 |    "source": [
423 |     "final_cols = [\"STATION\",\"NAME\",\"DATE\",\"PRCP\",\"SNWD\",\"TAVG\",\"TMAX\",\"TMIN\"]"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "markdown",
428 |    "metadata": {},
429 |    "source": [
430 |     "no index, make all strings "
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": 107,
436 |    "metadata": {},
437 |    "outputs": [],
438 |    "source": [
439 |     "df_final = df_merged[final_cols]"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": 108,
445 |    "metadata": {},
446 |    "outputs": [],
447 |    "source": [
448 |     "df_final = df_final.replace({np.nan: None})"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": 109,
454 |    "metadata": {},
455 |    "outputs": [],
456 |    "source": [
457 |     "df_final.to_csv('./data/tmp.csv', index=False, quoting=csv.QUOTE_ALL)"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "markdown",
462 |    "metadata": {},
463 |    "source": [
464 |     "Fill empty dates"
465 |    ]
466 |   },
467 |   {
468 |    "cell_type": "code",
469 |    "execution_count": null,
470 |    "metadata": {},
471 |    "outputs": [],
472 |    "source": []
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": null,
477 |    "metadata": {},
478 |    "outputs": [],
479 |    "source": []
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {},
485 |    "outputs": [],
486 |    "source": []
487 |   }
488 |  ],
489 |  "metadata": {
490 |   "kernelspec": {
491 |    "display_name": "Python 3",
492 |    "language": "python",
493 |    "name": "python3"
494 |   },
495 |   "language_info": {
496 |    "codemirror_mode": {
497 |     "name": "ipython",
498 |     "version": 3
499 |    },
500 |    "file_extension": ".py",
501 |    "mimetype": "text/x-python",
502 |    "name": "python",
503 |    "nbconvert_exporter": "python",
504 |    "pygments_lexer": "ipython3",
505 |    "version": "3.7.6"
506 |   }
507 |  },
508 |  "nbformat": 4,
509 |  "nbformat_minor": 2
510 | }
511 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # noaaplotter
 2 | A python package to create fancy plots with NOAA weather data.
 3 | 
 4 | ## Install
 5 | #### Recommended conda install
 6 | 
 7 | I recommend to use a fresh conda environment
 8 | `conda create -n noaaplotter python pip`
 9 | 
10 | activate conda environment
11 | `conda activate noaaplotter`
12 | 
13 | pip install noaaplotter and dependencies
14 | `pip install git+https://github.com/initze/noaaplotter.git`
15 | 
16 | #### Requirements
17 |   - matplotlib
18 |   - numpy
19 |   - pandas
20 |   - python
21 |   - requests
22 |   - joblib
23 |   - tqdm
24 |   - geemap
25 | 
26 | 
27 | ## Examples
28 | ### Download data
29 | #### Option 1 NOAA Daily Summaries: Download via script
30 | Download daily summaries (temperature + precipitation) from Kotzebue (or other station) from 1970-01-01 until 2021-12-31
31 | * NOAA API Token is required: https://www.ncdc.noaa.gov/cdo-web/token
32 | 
33 | `download_data.py -o ./data/kotzebue.csv -sid USW00026616 -start 1970-01-01 -end 2021-12-31 -t <NOAA API Token>`
34 |  
35 |  #### Option 2 NOAA Daily Summaries: Download via browser
36 |  CSV files of "daily summaries"
37 | ("https://www.ncdc.noaa.gov/cdo-web/search")
38 | * Values: metric
39 | * File types: csv
40 | 
41 |  #### Option 3 ERA5 Daily: Download via script
42 | Download daily summaries (temperature + precipitation) from Potsdam (13.05°E, 52.4°N) from 1980-01-01 until 2021-12-31
43 | * Google Earthengine account is required
44 | * Caution: full dataset may take a few minutes
45 | 
46 | `download_data_ERA5.py -o ./data/potsdam_ERA5.csv -start 1980-01-01 -end 2021-12-31 -lat 52.4 -lon 13.05`
47 |  
48 | ### Daily Mean Temperature and Precipitation values vs. Climate
49 | #### Entire year 1 January until 31 December (e.g. 1992)
50 | 
51 | `plot_daily.py -infile data/kotzebue.csv -start 1992-01-01 -end 1992-12-31 -t_range -45 25 -p_range 50 -plot`
52 | 
53 | ![alt text](https://user-images.githubusercontent.com/4864803/132648353-d1792234-dc68-4baf-a608-5aa5fe6899a8.png "Mean monthly temperatures with 12 months trailing mean")
54 | 
55 | ### Monthly aggregates
56 | #### Absolute values
57 | 
58 | Temperature:
59 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Temperature -trail 12 -save_plot figures/kotzebue_monthly_temperature_anomaly.png  -plot`
60 | ![Kotzebue_monthly_t_abs](https://user-images.githubusercontent.com/4864803/133925329-540933c1-b30a-4d31-a66f-0ba624223abf.png)
61 | 
62 | 
63 | Precipitation:
64 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Precipitation -trail 12 -save_plot figures/kotzebue_monthly_precipitation.png  -anomaly -plot`
65 | ![Kotzebue_monthly_p_abs](https://user-images.githubusercontent.com/4864803/133925351-5d7513df-2794-472a-b00d-780538f68ce6.png)
66 | 
67 | 
68 | #### Anomalies/Departures from Climate (1981-2010)
69 | 
70 | Temperature:
71 | 
72 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Temperature -trail 12 -save_plot figures/kotzebue_monthly_temperature_anomaly.png  -anomaly -plot`
73 | 
74 | !["Mean monthly temperatures with 12 months trailing mean"](https://user-images.githubusercontent.com/4864803/133923928-9ca78105-3718-48d9-80c5-efaf0bfa3217.png)
75 | 
76 | Precipitation:
77 | 
78 | `plot_monthly.py -infile data/data2.csv -start 1980-01-01 -end 2021-08-31 -type Precipitation -trail 12 -save_plot figures/kotzebue_monthly_precipitation_anomaly.png  -anomaly -plot`
79 | 
80 | !["Mean monthly temperatures with 12 months trailing mean"](https://user-images.githubusercontent.com/4864803/133923987-faabba54-e2d7-4340-be05-078bce0648cf.png)
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/examples/example_daily_series.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Example Script to plot daily weather data for the entire year (January 1 to December 31)
 5 | using the noaaplotter package
 6 | author: Ingmar Nitze
 7 | """
 8 | 
 9 | from src.noaaplotter import NOAAPlotter
10 | import logging
11 | 
12 | def main():
13 |     logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG)
14 |     n = NOAAPlotter(r'../data/Kotzebue.csv',
15 |                     location='Kotzebue')
16 |     for year in [1984, 2017, 2018]:
17 |         print(year)
18 |         try:
19 |             n.plot_weather_series(start_date='{yr}-01-01'.format(yr=year), end_date='{yr}-12-31'.format(yr=year),
20 |                                   show_snow_accumulation=False, plot_extrema=True,
21 |                                   show_plot=False, kwargs_fig={'dpi':100},
22 |                                   save_path=r'../figures/daily_series_year_Kotzebue_{yr0}-{yr1}.png'.format(yr0=year, yr1=year),
23 |                                   plot_tmin=-45, plot_tmax=25, plot_pmax=50, plot_snowmax=300)
24 |         except Exception as e:
25 |             print(e)
26 |             continue
27 | 
28 | if __name__ == '__main__':
29 |     main()


--------------------------------------------------------------------------------
/examples/example_daily_series_winter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Example Script to plot daily weather data for the winter season year (July 1 to June 30 of the subsequent year)
 5 | using the noaaplotter package
 6 | author: Ingmar Nitze
 7 | """
 8 | 
 9 | from src.noaaplotter import NOAAPlotter
10 | import logging
11 | 
12 | def main():
13 |     logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG)
14 |     n = NOAAPlotter(r'../data/Kotzebue.csv',
15 |                     location='Kotzebue')
16 |     for year in [1984, 2017, 2018]:
17 |         print(year)
18 |         try:
19 |             n.plot_weather_series(start_date='{yr}-07-01'.format(yr=year), end_date='{yr}-06-30'.format(yr=year+1),
20 |                                   show_snow_accumulation=True, plot_extrema=True,
21 |                                   show_plot=False, kwargs_fig={'dpi':100},
22 |                                   save_path=r'../figures/daily_series_winter_Kotzebue_{yr0}-{yr1}.png'.format(yr0=year, yr1=year+1),
23 |                                   plot_tmin=-45, plot_tmax=25, plot_pmax=50, plot_snowmax=300)
24 |         except Exception as e:
25 |             print(e)
26 |             continue
27 | 
28 | if __name__ == '__main__':
29 |     main()


--------------------------------------------------------------------------------
/examples/example_monthly_series.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Example Script to plot monthly deviation of temperature or precipitation from the climatological mean (1981-2010).
 5 | Furthermore, the trailing mean, mean of the last n-months (here 12) is plotted.
 6 | using the noaaplotter package
 7 | author: Ingmar Nitze
 8 | """
 9 | 
10 | from src.noaaplotter import NOAAPlotter
11 | import logging
12 | 
13 | def main():
14 |     logging.basicConfig(filename='example.log', filemode='w', level=logging.DEBUG)
15 |     
16 |     LOCATION = 'Kotzebue'
17 |     START = '1990-01-01'
18 |     END = '2019-12-31'
19 |     TRAILING_MEAN = 12
20 |     DPI = 300
21 |     FIGSIZE = (15,7)
22 |     PERIOD = '1990-2019'
23 |    
24 |     n = NOAAPlotter(r'C:/Users/initze/OneDrive/noaaplotter/data/2005576.csv', location='Kotzebue')   
25 | 
26 |     try:
27 |         n.plot_monthly_barchart(START, END, information='Precipitation', anomaly=False,
28 |                                 trailing_mean=TRAILING_MEAN, show_plot=False,
29 |                                 dpi=DPI, figsize=FIGSIZE,
30 |                                 save_path=r'./figures/{loc}_monthly_series_precipitation_12mthsTrMn_{p}.png'.format(p=PERIOD, loc=LOCATION))
31 | 
32 |         n.plot_monthly_barchart(START, END, information='Temperature', anomaly=False,
33 |                                 trailing_mean=TRAILING_MEAN, show_plot=False,
34 |                                 dpi=DPI, figsize=FIGSIZE,
35 |                                  save_path=r'./figures/{loc}_monthly_series_temperature_12mthsTrMn_{p}.png'.format(p=PERIOD, loc=LOCATION))
36 | 
37 |         n.plot_monthly_barchart(START, END, information='Precipitation', anomaly=True,
38 |                                 trailing_mean=TRAILING_MEAN, show_plot=False,
39 |                                 dpi=DPI, figsize=FIGSIZE,
40 |                                 save_path=r'./figures/{loc}_monthly_series_precipitation_12mthsTrMn_anomaly_{p}.png'.format(p=PERIOD, loc=LOCATION))
41 | 
42 |         n.plot_monthly_barchart(START, END, information='Temperature', anomaly=True,
43 |                                 trailing_mean=TRAILING_MEAN, show_plot=False,
44 |                                 dpi=DPI, figsize=FIGSIZE,
45 |                                 save_path=r'./figures/{loc}_monthly_series_temperature_12mthsTrMn_anomaly_{p}.png'.format(p=PERIOD, loc=LOCATION))
46 |     except Exception as e:
47 |         print(e)
48 | 
49 | if __name__ == '__main__':
50 |     main()


--------------------------------------------------------------------------------
/figures/daily_series_Kotzebue_1992.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/daily_series_Kotzebue_1992.png


--------------------------------------------------------------------------------
/figures/daily_series_Kotzebue_2017-2018_winter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/daily_series_Kotzebue_2017-2018_winter.png


--------------------------------------------------------------------------------
/figures/monthly_series_precipitation_12mthsTrMn_Kotzebue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_precipitation_12mthsTrMn_Kotzebue.png


--------------------------------------------------------------------------------
/figures/monthly_series_precipitation_12mthsTrMn_Kotzebue_anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_precipitation_12mthsTrMn_Kotzebue_anomaly.png


--------------------------------------------------------------------------------
/figures/monthly_series_temperature_12mthsTrMn_Kotzebue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_temperature_12mthsTrMn_Kotzebue.png


--------------------------------------------------------------------------------
/figures/monthly_series_temperature_12mthsTrMn_Kotzebue_anomaly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/figures/monthly_series_temperature_12mthsTrMn_Kotzebue_anomaly.png


--------------------------------------------------------------------------------
/noaaplotter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/__init__.py


--------------------------------------------------------------------------------
/noaaplotter/noaaplotter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | ########################
  5 | # Credits here
  6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research
  7 | # contact: ingmar.nitze@awi.de
  8 | # version: 2021-09-06
  9 | 
 10 | import numpy as np
 11 | from matplotlib import dates
 12 | 
 13 | ########################
 14 | from matplotlib import pyplot as plt
 15 | 
 16 | from noaaplotter.utils.dataset import NOAAPlotterDailyClimateDataset as DS_daily
 17 | from noaaplotter.utils.dataset import NOAAPlotterDailySummariesDataset as Dataset
 18 | from noaaplotter.utils.dataset import NOAAPlotterMonthlyClimateDataset as DS_monthly
 19 | from noaaplotter.utils.plot_utils import *
 20 | from noaaplotter.utils.utils import *
 21 | 
 22 | pd.plotting.register_matplotlib_converters()
 23 | numeric_only = True
 24 | 
 25 | 
 26 | class NOAAPlotter(object):
 27 |     """
 28 |     This class/module creates nice plots of observed weather data from NOAA
 29 |     """
 30 | 
 31 |     def __init__(
 32 |         self,
 33 |         input_filepath=None,
 34 |         location=None,
 35 |         remove_feb29=False,
 36 |         climate_start=dt.datetime(1981, 1, 1),
 37 |         climate_end=dt.datetime(2010, 12, 31),
 38 |         climate_filtersize=7,
 39 |     ):
 40 |         """
 41 | 
 42 |         :param input_filepath: path to input file
 43 |         :type input_filepath: str
 44 |         :param location: name of location
 45 |         :type location: str, optional
 46 |         :param remove_feb29:
 47 |         :type remove_feb29: bool, optional
 48 |         :param climate_start: start date of climate period, defaults to 01-01-1981
 49 |         :type climate_start: datetime, optional
 50 |         :param climate_end: start date of climate period, defaults to 31-12-2010
 51 |         :type climate_end: datetime, optional
 52 |         """
 53 |         self.input_filepath = input_filepath
 54 |         self.location = location
 55 |         self.climate_start = climate_start
 56 |         self.climate_end = climate_end
 57 |         self.remove_feb29 = remove_feb29
 58 |         self.dataset = Dataset(
 59 |             input_filepath, location=location, remove_feb29=remove_feb29
 60 |         )
 61 | 
 62 |         # TODO: move to respective functions?
 63 |         self.df_clim_ = DS_daily(self.dataset, filtersize=climate_filtersize)
 64 |         #
 65 | 
 66 |     def _make_short_dateseries(self, start_date, end_date):
 67 |         x_dates = pd.DataFrame()
 68 |         x_dates["DATE"] = pd.date_range(start=start_date, end=end_date)
 69 |         x_dates["DATE_MD"] = x_dates["DATE"].dt.strftime("%m-%d")
 70 |         # TODO: Filter Feb29
 71 |         if self.dataset.data["DATE"].max() >= end_date:
 72 |             x_dates_short = x_dates.set_index("DATE", drop=False).loc[
 73 |                 pd.date_range(start=start_date, end=end_date)
 74 |             ]
 75 |         else:
 76 |             x_dates_short = x_dates.set_index("DATE", drop=False).loc[
 77 |                 pd.date_range(start=start_date, end=self.dataset.data["DATE"].max())
 78 |             ]
 79 | 
 80 |         return x_dates, x_dates_short
 81 | 
 82 |     def plot_weather_series(
 83 |         self,
 84 |         start_date,
 85 |         end_date,
 86 |         plot_tmax="auto",
 87 |         plot_tmin="auto",
 88 |         plot_pmax="auto",
 89 |         plot_snowmax="auto",
 90 |         plot_extrema=True,
 91 |         show_plot=True,
 92 |         show_snow_accumulation=True,
 93 |         save_path=False,
 94 |         figsize=(9, 6),
 95 |         legend_fontsize="x-small",
 96 |         dpi=300,
 97 |         title=None,
 98 |         return_plot=False,
 99 |     ):
100 |         """
101 |         Plotting Function to show observed vs climate temperatures and snowfall
102 |         :param dpi:
103 |         :param legend_fontsize:
104 |         :param figsize:
105 |         :param start_date: start date of plot
106 |         :type start_date: datetime, str
107 |         :param end_date: end date of plot
108 |         :type end_date: datetime, str
109 |         :param plot_tmax:
110 |         :type plot_tmax: int, float, str
111 |         :param plot_tmin:
112 |         :type plot_tmin: int, float, str
113 |         :param plot_pmax:
114 |         :type plot_pmax: int, float, str
115 |         :param plot_snowmax:
116 |         :type plot_snowmax: int, float, str
117 |         :param plot_extrema:
118 |         :type plot_extrema:
119 |         :param show_plot:
120 |         :type show_plot:
121 |         :param show_snow_accumulation:
122 |         :type show_snow_accumulation:
123 |         :param save_path:
124 |         :type save_path:
125 |         :return:
126 |         """
127 |         start_date = parse_dates(start_date)
128 |         end_date = parse_dates(end_date)
129 |         x_dates, x_dates_short = self._make_short_dateseries(start_date, end_date)
130 | 
131 |         df_clim = self.df_clim_.data.loc[x_dates["DATE_MD"]]
132 | 
133 |         df_clim["DATE"] = x_dates["DATE"].values
134 |         df_clim = df_clim.set_index("DATE", drop=False)
135 |         df_obs = self.dataset.data.set_index("DATE", drop=False).loc[
136 |             x_dates_short["DATE"]
137 |         ]
138 | 
139 |         clim_locs_short = x_dates_short[
140 |             "DATE"
141 |         ]  # short series for incomplete years (actual data)
142 | 
143 |         # get mean and mean+-standard deviation of daily mean temperatures of climate series
144 |         y_clim = df_clim["tmean_doy_mean"]
145 |         y_clim_std_hi = df_clim[["tmean_doy_mean", "tmean_doy_std"]].sum(axis=1)
146 |         y_clim_std_lo = df_clim["tmean_doy_mean"] - df_clim["tmean_doy_std"]
147 | 
148 |         # Prepare data for filled plot areas
149 |         t_above = np.vstack(
150 |             [df_obs["TMEAN"].values, y_clim.loc[clim_locs_short].values]
151 |         ).max(axis=0)
152 |         t_above_std = np.vstack(
153 |             [df_obs["TMEAN"].values, y_clim_std_hi.loc[clim_locs_short].values]
154 |         ).max(axis=0)
155 |         t_below = np.vstack(
156 |             [df_obs["TMEAN"].values, y_clim.loc[clim_locs_short].values]
157 |         ).min(axis=0)
158 |         t_below_std = np.vstack(
159 |             [df_obs["TMEAN"].values, y_clim_std_lo.loc[clim_locs_short].values]
160 |         ).min(axis=0)
161 | 
162 |         # Calculate the date of last snowfall and cumulative sum of snowfall
163 |         if not show_snow_accumulation:
164 |             None
165 |         elif (show_snow_accumulation) and ("SNOW" in df_obs.columns):
166 |             last_snow_date = df_obs[df_obs["SNOW"] > 0].iloc[-1]["DATE"]
167 |             snow_acc = np.cumsum(df_obs["SNOW"])
168 |         elif "SNOW" not in df_obs.columns:
169 |             show_snow_accumulation = False
170 |             raise Warning("No snow information available")
171 | 
172 |             # PLOT
173 |         fig = plt.figure(figsize=figsize, dpi=dpi)
174 |         ax_t = fig.add_subplot(211)
175 |         ax_p = fig.add_subplot(212, sharex=ax_t)
176 | 
177 |         # climate series (red line)
178 |         (cm,) = ax_t.plot(x_dates["DATE"].values, y_clim.values, c="k", alpha=0.5, lw=2)
179 |         (cm_hi,) = ax_t.plot(
180 |             x_dates["DATE"].values,
181 |             y_clim_std_hi.values,
182 |             c="r",
183 |             ls="--",
184 |             alpha=0.4,
185 |             lw=1,
186 |         )
187 |         (cm_low,) = ax_t.plot(
188 |             x_dates["DATE"].values,
189 |             y_clim_std_lo.values,
190 |             c="r",
191 |             ls="--",
192 |             alpha=0.4,
193 |             lw=1,
194 |         )
195 | 
196 |         # observed series (grey line)
197 |         (fb,) = ax_t.plot(
198 |             x_dates_short["DATE"].values,
199 |             df_obs["TMEAN"].values,
200 |             c="k",
201 |             alpha=0.4,
202 |             lw=1.2,
203 |         )
204 | 
205 |         # difference of observed and climate (grey area)
206 |         fill_r = ax_t.fill_between(
207 |             x_dates_short["DATE"].values,
208 |             y1=t_above,
209 |             y2=y_clim.loc[clim_locs_short].values,
210 |             facecolor="#d6604d",
211 |             alpha=0.5,
212 |         )
213 |         fill_rr = ax_t.fill_between(
214 |             x_dates_short["DATE"].values,
215 |             y1=t_above_std,
216 |             y2=y_clim_std_hi.loc[clim_locs_short].values,
217 |             facecolor="#d6604d",
218 |             alpha=0.7,
219 |         )
220 |         fill_b = ax_t.fill_between(
221 |             x_dates_short["DATE"].values,
222 |             y1=y_clim.loc[clim_locs_short].values,
223 |             y2=t_below,
224 |             facecolor="#4393c3",
225 |             alpha=0.5,
226 |         )
227 |         fill_bb = ax_t.fill_between(
228 |             x_dates_short["DATE"].values,
229 |             y1=y_clim_std_lo.loc[clim_locs_short].values,
230 |             y2=t_below_std,
231 |             facecolor="#4393c3",
232 |             alpha=0.7,
233 |         )
234 | 
235 |         # plot extremes
236 |         if plot_extrema:
237 |             tmax = self.dataset.data.groupby("DATE_MD").max(numeric_only=numeric_only)[
238 |                 "TMEAN"
239 |             ]
240 |             tmin = self.dataset.data.groupby("DATE_MD").min(numeric_only=numeric_only)[
241 |                 "TMEAN"
242 |             ]
243 |             local_obs = df_obs[["DATE", "DATE_MD", "TMEAN"]].set_index(
244 |                 "DATE_MD", drop=False
245 |             )
246 |             idx = local_obs.index
247 |             local_max = tmax.loc[idx] == local_obs["TMEAN"]
248 |             local_min = tmin.loc[idx] == local_obs["TMEAN"]
249 |             # extract x and y values
250 |             x_max = local_obs[local_max]["DATE"]
251 |             y_max = local_obs[local_max]["TMEAN"]
252 |             x_min = local_obs[local_min]["DATE"]
253 |             y_min = local_obs[local_min]["TMEAN"]
254 |             xtreme_hi = ax_t.scatter(
255 |                 x_max.values, y_max.values, c="#d6604d", marker="x"
256 |             )
257 |             xtreme_lo = ax_t.scatter(
258 |                 x_min.values, y_min.values, c="#4393c3", marker="x"
259 |             )
260 | 
261 |         xlim = ax_t.get_xlim()
262 |         ax_t.hlines(0, *xlim, linestyles="--")
263 |         # grid
264 |         ax_t.grid()
265 | 
266 |         # labels
267 |         ax_t.set_xlim(start_date, end_date)
268 |         if not (plot_tmin == "auto" and plot_tmin == "auto"):
269 |             ax_t.set_ylim(plot_tmin, plot_tmax)
270 |         ax_t.set_ylabel("Temperature in °C")
271 |         ax_t.set_xlabel("Date")
272 |         if title:
273 |             ax_t.set_title(title)
274 | 
275 |         # add legend
276 |         legend_handle_t = [fb, cm, cm_hi, fill_r, fill_b]
277 |         legend_text_t = [
278 |             "Observed Temperatures",
279 |             "Climatological Mean",
280 |             "Std of Climatological Mean",
281 |             "Above average Temperature",
282 |             "Below average Temperature",
283 |         ]
284 |         if plot_extrema:
285 |             legend_handle_t.extend([xtreme_hi, xtreme_lo])
286 |             legend_text_t.extend(["Record High on Date", "Record Low on Date"])
287 | 
288 |         # PRECIPITATION#
289 |         # legend handles
290 |         legend_handle_p = []
291 |         legend_text_p = []
292 | 
293 |         # precipitation
294 |         rain = ax_p.bar(
295 |             x=x_dates_short["DATE"].values,
296 |             height=df_obs["PRCP"].values,
297 |             fc="#4393c3",
298 |             alpha=1,
299 |         )
300 |         legend_handle_p.append(rain)
301 |         legend_text_p.append("Precipitation")
302 | 
303 |         # grid
304 |         ax_p.grid()
305 |         # labels
306 |         ax_p.set_ylabel("Precipitation in mm")
307 |         ax_p.set_xlabel("Date")
308 |         # y-axis scaling
309 |         ax_p.set_ylim(bottom=0)
310 |         if isinstance(plot_pmax, (int, float)):
311 |             ax_p.set_ylim(top=plot_pmax)
312 | 
313 |         # snow
314 |         # TODO: make snowcheck
315 |         if (show_snow_accumulation) and ("SNOW" in df_obs.columns):
316 |             ax2_snow = ax_p.twinx()
317 |             # plots
318 |             sn_acc = ax2_snow.fill_between(
319 |                 x=x_dates_short.loc[:last_snow_date, "DATE"].values,
320 |                 y1=snow_acc.loc[:last_snow_date] / 10,
321 |                 facecolor="k",
322 |                 alpha=0.2,
323 |             )
324 |             _ = ax2_snow.plot(
325 |                 x_dates_short.loc[last_snow_date:, "DATE"].values,
326 |                 snow_acc.loc[last_snow_date:] / 10,
327 |                 c="k",
328 |                 alpha=0.2,
329 |                 ls="--",
330 |             )
331 |             # y-axis label
332 |             ax2_snow.set_ylabel("Cumulative Snowfall in cm")
333 |             # legend
334 |             legend_handle_p.append(sn_acc)
335 |             legend_text_p.append("Cumulative Snowfall")
336 |             # y-axis scaling
337 |             ax2_snow.set_ylim(bottom=0)
338 |             if isinstance(plot_snowmax, (int, float)):
339 |                 ax2_snow.set_ylim(top=plot_snowmax)
340 | 
341 |         # Show nodata - make function
342 |         lo, hi = ax_t.get_ylim()
343 |         nanvals_t = x_dates_short["DATE"].loc[pd.isna(df_obs["TMEAN"])]
344 |         nan_bar_t = ax_t.bar(
345 |             x=nanvals_t,
346 |             height=hi - lo,
347 |             bottom=lo,
348 |             width=1,
349 |             edgecolor=None,
350 |             facecolor="k",
351 |             alpha=0.2,
352 |         )
353 |         if len(nan_bar_t) > 0:
354 |             legend_handle_t.append(nan_bar_t)
355 |             legend_text_t.append("No Data")
356 | 
357 |         lo, hi = ax_p.get_ylim()
358 |         nanvals_p = x_dates_short["DATE"].loc[pd.isna(df_obs["PRCP"])]
359 |         nan_bar_p = ax_p.bar(
360 |             x=nanvals_p,
361 |             height=hi - lo,
362 |             bottom=lo,
363 |             width=1,
364 |             edgecolor=None,
365 |             facecolor="k",
366 |             alpha=0.2,
367 |         )
368 |         if len(nan_bar_p) > 0:
369 |             legend_handle_p.append(nan_bar_p)
370 |             legend_text_p.append("No Data")
371 | 
372 |         # add Legends
373 |         ax_t.legend(
374 |             legend_handle_t,
375 |             legend_text_t,
376 |             loc="lower center",
377 |             fontsize=legend_fontsize,
378 |             ncol=4,
379 |             bbox_to_anchor=(0.5, 1.02),
380 |         )  # -0.35))
381 |         ax_p.legend(
382 |             legend_handle_p, legend_text_p, loc="upper left", fontsize=legend_fontsize
383 |         )
384 | 
385 |         # set locator to monthly
386 |         locator = dates.MonthLocator()
387 |         ax_t.xaxis.set_major_locator(locator)
388 |         ax_p.xaxis.set_major_locator(locator)
389 |         plt.setp(
390 |             ax_t.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor"
391 |         )
392 |         plt.setp(
393 |             ax_p.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor"
394 |         )
395 |         fig.tight_layout()
396 | 
397 |         # Save Figure
398 |         if save_path:
399 |             fig.savefig(save_path)  # , figsize=figsize, dpi=dpi)
400 |         # Show plot if chosen, destroy figure object at the end
401 |         if show_plot:
402 |             plt.show()
403 |         if return_plot:
404 |             return fig
405 |         else:
406 |             plt.close(fig)
407 | 
408 |     def plot_monthly_barchart(
409 |         self,
410 |         start_date,
411 |         end_date,
412 |         information="Temperature",
413 |         show_plot=True,
414 |         anomaly=False,
415 |         anomaly_type="absolute",
416 |         trailing_mean=None,
417 |         save_path=False,
418 |         figsize=(9, 4),
419 |         dpi=100,
420 |         legend_fontsize="x-small",
421 |         return_plot=False,
422 |     ):
423 |         # legend handles
424 |         legend_handle = []
425 |         legend_text = []
426 | 
427 |         # setup plot arguments
428 |         plot_kwargs = setup_monthly_plot_props(information, anomaly)
429 | 
430 |         # Data Preprocessing
431 |         if parse_dates(end_date) > self.dataset.data["DATE"].max():
432 |             end_date = self.dataset.data["DATE"].max()
433 |         data_monthly = DS_monthly(
434 |             self.dataset, start=self.dataset.data["DATE"].min(), end=end_date
435 |         )
436 |         data_monthly.calculate_monthly_statistics()
437 |         data_clim = DS_monthly(
438 |             self.dataset, start=self.climate_start, end=self.climate_end
439 |         )
440 |         data_clim.calculate_monthly_climate()
441 | 
442 |         data = data_monthly.monthly_aggregate.reset_index(drop=False)
443 |         df_clim = data_clim.monthly_climate.reset_index(drop=False)
444 | 
445 |         if (
446 |             plot_kwargs["value_column"] == "prcp_diff"
447 |             and df_clim["prcp_sum"].isna().any()
448 |         ):
449 |             print("Invalid precipitation values, information not available!")
450 |             return None
451 | 
452 |         data["DATE"] = data.apply(lambda x: parse_dates_YM(x["DATE_YM"]), axis=1)
453 |         data["Month"] = data.apply(lambda x: parse_dates_YM(x["DATE_YM"]).month, axis=1)
454 |         data["Year"] = data.apply(lambda x: parse_dates_YM(x["DATE_YM"]).year, axis=1)
455 |         data = (
456 |             data.set_index("Month", drop=False)
457 |             .join(df_clim.set_index("Month", drop=False), rsuffix="_clim")
458 |             .sort_values("DATE_YM")
459 |         )
460 |         data["tmean_diff"] = data["tmean_doy_mean"] - data["tmean_doy_mean_clim"]
461 |         data["prcp_diff"] = data["prcp_sum"] - data["prcp_sum_clim"]
462 |         data = data.set_index("DATE", drop=False)
463 | 
464 |         # trailing mean calculation
465 |         if trailing_mean:
466 |             data = calc_trailing_mean(
467 |                 data, trailing_mean, plot_kwargs["value_column"], "trailing_values"
468 |             )
469 | 
470 |         # PLOT part
471 |         fig = plt.figure(figsize=figsize, dpi=dpi)
472 |         ax = fig.add_subplot(111)
473 |         data_low = data[data[plot_kwargs["value_column"]] < 0]
474 |         data_high = data[data[plot_kwargs["value_column"]] >= 0]
475 |         bar_low = ax.bar(
476 |             x=data_low["DATE"],
477 |             height=data_low[plot_kwargs["value_column"]],
478 |             width=30,
479 |             align="edge",
480 |             color=plot_kwargs["fc_low"],
481 |         )
482 |         # Fix for absolute values
483 |         if len(bar_low) > 1:
484 |             legend_handle.append(bar_low)
485 |             legend_text.append(plot_kwargs["legend_label_below"])
486 |         bar_high = ax.bar(
487 |             x=data_high["DATE"],
488 |             height=data_high[plot_kwargs["value_column"]],
489 |             width=30,
490 |             align="edge",
491 |             color=plot_kwargs["fc_high"],
492 |         )
493 |         legend_handle.append(bar_high)
494 |         legend_text.append(plot_kwargs["legend_label_above"])
495 |         if trailing_mean:
496 |             line_tr_mean = ax.plot(data["DATE"], data["trailing_values"], c="k")
497 |             legend_handle.append(line_tr_mean[0])
498 |             legend_text.append("Trailing mean: {} months".format(trailing_mean))
499 |         ax.xaxis.set_major_locator(dates.YearLocator())
500 |         ax.tick_params(axis="x", rotation=90)
501 |         ax.grid(True)
502 | 
503 |         # x-limit
504 |         ax.set_xlim(start_date, end_date)
505 | 
506 |         # labels
507 |         ax.set_ylabel(plot_kwargs["y_label"])
508 |         ax.set_xlabel("Date")
509 |         ax.set_title(plot_kwargs["title"])
510 |         # add legend
511 |         ax.legend(legend_handle, legend_text, loc="best", fontsize=legend_fontsize)
512 | 
513 |         fig.tight_layout()
514 |         # Save Figure
515 |         if save_path:
516 |             fig.savefig(save_path)  # , figsize=figsize, dpi=dpi)
517 |         # Show plot if chosen, destroy figure object at the end
518 |         if show_plot:
519 |             plt.show()
520 |         if return_plot:
521 |             return fig
522 |         else:
523 |             plt.close(fig)
524 | 


--------------------------------------------------------------------------------
/noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/scripts/__pycache__/download_data.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/scripts/__pycache__/plot_daily.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/scripts/download_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | # Imports
 4 | import argparse
 5 | 
 6 | from noaaplotter.utils.download_utils import download_from_noaa
 7 | 
 8 | 
 9 | def main():
10 |     """
11 |     Main Function
12 |     :return:
13 |     """
14 |     ##### Parse arguments #####
15 |     parser = argparse.ArgumentParser(description="Parse arguments.")
16 | 
17 |     parser.add_argument(
18 |         "-o",
19 |         dest="output_file",
20 |         type=str,
21 |         required=True,
22 |         default="data/parquet.csv",
23 |         help="parquet file to save results",
24 |     )
25 | 
26 |     parser.add_argument(
27 |         "-t", dest="token", type=str, required=False, default="", help="NOAA API token"
28 |     )
29 | 
30 |     parser.add_argument(
31 |         "-sid",
32 |         dest="station_id",
33 |         type=str,
34 |         required=False,
35 |         default="",
36 |         help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API',
37 |     )
38 | 
39 |     parser.add_argument(
40 |         "-loc",
41 |         dest="loc_name",
42 |         type=str,
43 |         required=False,
44 |         default="",
45 |         help="Location name",
46 |     )
47 | 
48 |     parser.add_argument(
49 |         "-dt",
50 |         dest="datatypes",
51 |         type=list,
52 |         required=False,
53 |         default=["TMIN", "TMAX", "PRCP", "SNOW"],
54 |     )
55 | 
56 |     parser.add_argument(
57 |         "-start",
58 |         dest="start_date",
59 |         type=str,
60 |         required=True,
61 |         help='start date of plot ("yyyy-mm-dd")',
62 |     )
63 | 
64 |     parser.add_argument(
65 |         "-end",
66 |         dest="end_date",
67 |         type=str,
68 |         required=True,
69 |         help='end date of plot ("yyyy-mm-dd")',
70 |     )
71 | 
72 |     parser.add_argument(
73 |         "-n_jobs",
74 |         dest="n_jobs",
75 |         type=int,
76 |         required=False,
77 |         default=1,
78 |         help="number of parallel processes",
79 |     )
80 | 
81 |     args = parser.parse_args()
82 | 
83 |     download_from_noaa(
84 |         output_file=args.output_file,
85 |         start_date=args.start_date,
86 |         end_date=args.end_date,
87 |         datatypes=args.datatypes,
88 |         noaa_api_token=args.token,
89 |         loc_name=args.loc_name,
90 |         station_id=args.station_id,
91 |         n_jobs=args.n_jobs,
92 |     )
93 | 
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/noaaplotter/scripts/download_data_ERA5.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | # Imports
 4 | import argparse
 5 | import os
 6 | 
 7 | from src.download_utils import download_era5_from_gee
 8 | 
 9 | 
10 | def main():
11 |     """
12 |     Main Function
13 |     :return:
14 |     """
15 |     ##### Parse arguments #####
16 |     parser = argparse.ArgumentParser(description='Parse arguments.')
17 | 
18 |     parser.add_argument('-o', dest='output_file', type=str, required=True,
19 |                         default='data/data.csv',
20 |                         help='csv file to save results')
21 | 
22 |     parser.add_argument('-lat', dest='lat', type=float, required=True,
23 |                         help='Latitude of selected location')
24 |     
25 |     parser.add_argument('-lon', dest='lon', type=float, required=True,
26 |                         help='Longitude of selected location')
27 |     
28 |     parser.add_argument('-loc', dest='loc_name', type=str, required=False,
29 |                         default='',
30 |                         help='Location name')
31 | 
32 |     parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW'])
33 | 
34 |     parser.add_argument('-start', dest='start_date', type=str, required=True,
35 |                         help='start date of plot ("yyyy-mm-dd")')
36 | 
37 |     parser.add_argument('-end', dest='end_date', type=str, required=True,
38 |                         help='end date of plot ("yyyy-mm-dd")')
39 | 
40 |     args = parser.parse_args()
41 | 
42 |     # remove file if exists
43 |     if os.path.exists(args.output_file):
44 |         os.remove(args.output_file)
45 | 
46 |     download_era5_from_gee(latitude=args.lat,
47 |                            longitude = args.lon,
48 |                            end_date= args.end_date,
49 |                            start_date = args.start_date,
50 |                            output_file = args.output_file)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()


--------------------------------------------------------------------------------
/noaaplotter/scripts/download_data_SST.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | # Imports
  4 | import argparse
  5 | import csv
  6 | from datetime import datetime
  7 | import numpy as np
  8 | import os
  9 | import pandas as pd
 10 | import tqdm
 11 | from joblib import delayed, Parallel
 12 | from noaaplotter.utils.download_utils import dl_noaa_api
 13 | import ee
 14 | import geemap
 15 | 
 16 | def main():
 17 |     """
 18 |     Main Function
 19 |     :return:
 20 |     """
 21 |     ##### Parse arguments #####
 22 |     parser = argparse.ArgumentParser(description='Parse arguments.')
 23 | 
 24 |     parser.add_argument('-o', dest='output_file', type=str, required=True,
 25 |                         default='data/data.csv',
 26 |                         help='csv file to save results')
 27 | 
 28 |     parser.add_argument('-lat', dest='lat', type=float, required=True,
 29 |                         help='Latitude of selected location')
 30 |     
 31 |     parser.add_argument('-lon', dest='lon', type=float, required=True,
 32 |                         help='Longitude of selected location')
 33 |     
 34 |     parser.add_argument('-loc', dest='loc_name', type=str, required=False,
 35 |                         default='',
 36 |                         help='Location name')
 37 | 
 38 |     #parser.add_argument('-dt', dest='datatypes', type=list, required=False, default=['TMIN', 'TMAX', 'PRCP', 'SNOW'])
 39 | 
 40 |     parser.add_argument('-start', dest='start_date', type=str, required=True,
 41 |                         help='start date of plot ("yyyy-mm-dd")')
 42 | 
 43 |     parser.add_argument('-end', dest='end_date', type=str, required=True,
 44 |                         help='end date of plot ("yyyy-mm-dd")')
 45 | 
 46 |     args = parser.parse_args()
 47 | 
 48 |     # remove file if exists
 49 |     if os.path.exists(args.output_file):
 50 |         os.remove(args.output_file)
 51 |     
 52 |     ee.Initialize()
 53 | 
 54 |     EE_LAYER = "NOAA/CDR/OISST/V2_1"
 55 |     
 56 |     location = ee.Geometry.Point([args.lon, args.lat])
 57 |     
 58 |     # load ImageCollection
 59 |     col = ee.ImageCollection(EE_LAYER).filterBounds(location).filterDate(args.start_date, args.end_date).select('sst')
 60 |         
 61 |     # Download data
 62 |     print("Start downloading NOAA CDR OISST v02r01 data.")
 63 |     print("Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min")
 64 | 
 65 |     out_dict = geemap.extract_pixel_values(col, location, getInfo=True)
 66 |     df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T
 67 |     
 68 |     # parse dates and values
 69 |     df_gee['time'] = df_gee[0].apply(lambda x: f'{x[:4]}-{x[4:6]}-{x[6:8]}')
 70 |     df_gee['feature'] = df_gee[0].apply(lambda x: x[9:])
 71 |     df_gee['value'] = df_gee[1]
 72 |     
 73 |     df = df_gee.pivot_table(values='value', columns=['feature'], index='time')#.reset_index(drop=False)
 74 |     
 75 |     # #### recalculate values 
 76 |     df_new = pd.DataFrame(index=df.index)
 77 |     
 78 |     temperature_cols = ['sst']
 79 |     #precipitation_cols = ['total_precipitation']
 80 |     df_joined = df_new.join(df[temperature_cols]*0.01)#.join(df[precipitation_cols] *1e3).reset_index(drop=False)
 81 |     
 82 |     # Create Output
 83 |     df_joined.reset_index(drop=False, inplace=True)
 84 |     rename_dict = {'time': 'DATE', 'sst': 'TMAX'}
 85 |     df_renamed = df_joined.rename(columns=rename_dict)
 86 |     df_renamed['NAME'] = ''
 87 |     df_renamed['STATION'] = ''
 88 |     df_renamed['SNWD'] = ''
 89 |     df_renamed['PRCP'] = ''
 90 |     df_renamed['TAVG'] = df_renamed['TMAX']
 91 |     df_renamed['TMIN'] = df_renamed['TMAX']
 92 |     
 93 |     output_cols = ["STATION","NAME","DATE","PRCP","SNWD","TAVG","TMAX","TMIN"]
 94 |     df_save = df_renamed[output_cols].astype(str)
 95 |     
 96 |     df_save.to_csv(args.output_file, index=False)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/noaaplotter/scripts/plot_daily.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | from noaaplotter.noaaplotter import NOAAPlotter
  4 | import argparse
  5 | 
  6 | def main():
  7 |     """
  8 |     Main Function
  9 |     :return:
 10 |     """
 11 |     ##### Parse arguments #####
 12 |     parser = argparse.ArgumentParser(description='Parse arguments.')
 13 | 
 14 |     parser.add_argument('-infile', dest='infile', type=str, required=True,
 15 |                         default='data/temp.parquet',
 16 |                         help='input file with climate data')
 17 | 
 18 |     parser.add_argument('-t', dest='token', type=str, required=False,
 19 |                         default='',
 20 |                         help='NOAA API token, only if loading through NOAA API')
 21 | 
 22 |     parser.add_argument('-sid', dest='station_id', type=str, required=False,
 23 |                         default='',
 24 |                         help='NOAA Station ID, e.g. "GHCND:USW00026616" for Kotzebue, only if loading through NOAA API')
 25 | 
 26 |     parser.add_argument('-start', dest='start_date', type=str, required=True,
 27 |                         help='start date of plot ("yyyy-mm-dd")')
 28 | 
 29 |     parser.add_argument('-end', dest='end_date', type=str, required=True,
 30 |                         help='end date of plot ("yyyy-mm-dd")')
 31 | 
 32 |     parser.add_argument('-loc', dest='location', required=False,
 33 |                         type=str, default=None,
 34 |                         help='Location name, must be in data file')
 35 | 
 36 |     parser.add_argument('-save_plot', dest='save_path', type=str, required=False,
 37 |                         default=None,
 38 |                         help='filepath for plot')
 39 | 
 40 |     parser.add_argument('-t_range', dest='t_range', type=float, nargs=2, required=False,
 41 |                         default=[None, None],
 42 |                         help='temperature range in plot')
 43 | 
 44 |     parser.add_argument('-p_range', dest='p_range', type=float, required=False,
 45 |                         default=None,
 46 |                         help='maximum precipitation value in plot')
 47 | 
 48 |     parser.add_argument('-s_range', dest='s_range', type=float, required=False,
 49 |                         default=None,
 50 |                         help='maximum snow accumulation value in plot')
 51 | 
 52 |     parser.add_argument('-snow_acc', dest='snow_acc', required=False,
 53 |                         default=False, action='store_true',
 54 |                         help='show snow accumulation, only useful for plotting winter season (e.g. July to June')
 55 | 
 56 |     parser.add_argument('-filtersize', dest='filtersize', type=int, required=False,
 57 |                         default=7,
 58 |                         help='parameter to smooth climate temperature series by n days for smoother visual appearance. '
 59 |                              'default value: 7')
 60 | 
 61 |     parser.add_argument('-dpi', dest='dpi', type=float, required=False,
 62 |                         default=100,
 63 |                         help='dpi for plot output')
 64 | 
 65 |     parser.add_argument('-plot', dest='show_plot', required=False,
 66 |                         default=False, action='store_true',
 67 |                         help='Location name, must be in data file')
 68 | 
 69 |     parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False,
 70 |                         default=[9, 6],
 71 |                         help='figure size in inches width x height. 15 10 recommended for 1 year, 30 10 for 2 years ...')
 72 | 
 73 |     parser.add_argument('-title', dest='title', type=str, required=False,
 74 |                         default=None,
 75 |                         help='Plot title')
 76 | 
 77 |     args = parser.parse_args()
 78 | 
 79 |     ##### Download from NOAA #####
 80 | 
 81 |     ##### Run Plotting function #####
 82 |     n = NOAAPlotter(args.infile,
 83 |                     location=args.location,
 84 |                     climate_filtersize=args.filtersize)
 85 | 
 86 |     n.plot_weather_series(start_date=args.start_date,
 87 |                           end_date=args.end_date,
 88 |                           show_snow_accumulation=args.snow_acc,
 89 |                           #kwargs_fig={'dpi':args.dpi, 'figsize':args.figsize},
 90 |                           plot_extrema=True,
 91 |                           show_plot=args.show_plot,
 92 |                           save_path=args.save_path,
 93 |                           plot_tmin=args.t_range[0],
 94 |                           plot_tmax=args.t_range[1],
 95 |                           plot_pmax=args.p_range,
 96 |                           plot_snowmax=args.s_range,
 97 |                           dpi=args.dpi,
 98 |                           figsize=args.figsize,
 99 |                           title=args.title)
100 | 
101 | if __name__ == "__main__":
102 |     main()
103 | 


--------------------------------------------------------------------------------
/noaaplotter/scripts/plot_monthly.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | from noaaplotter.noaaplotter import NOAAPlotter
 4 | import argparse
 5 | 
 6 | def main():
 7 |     """
 8 |     Main Function
 9 |     :return:
10 |     """
11 |     ##### Parse arguments #####
12 |     parser = argparse.ArgumentParser(description='Parse arguments.')
13 | 
14 |     parser.add_argument('-infile', dest='infile', type=str, required=True,
15 |                         help='input file with climate data')
16 | 
17 |     parser.add_argument('-start', dest='start_date', type=str, required=True,
18 |                         help='start date of plot ("yyyy-mm-dd")')
19 | 
20 |     parser.add_argument('-end', dest='end_date', type=str, required=True,
21 |                         help='end date of plot ("yyyy-mm-dd")')
22 | 
23 |     parser.add_argument('-loc', dest='location', required=False,
24 |                         type=str, default=None,
25 |                         help='Location name, must be in data file')
26 | 
27 |     parser.add_argument('-save_plot', dest='save_path', type=str, required=False,
28 |                         default=None,
29 |                         help='filepath for plot')
30 | 
31 |     parser.add_argument('-type', dest='type', type=str, required=True,
32 |                         help='Attribute Type: {Temperature, Precipitation}',
33 |                         default='Temperature')
34 | 
35 |     parser.add_argument('-trail', dest='trailing_mean', type=int, required=False,
36 |                         default=None,
37 |                         help='trailing/rolling mean value in months')
38 | 
39 |     parser.add_argument('-anomaly', dest='anomaly', required=False,
40 |                         default=False, action='store_true',
41 |                         help='show anomaly from climate')
42 | 
43 |     parser.add_argument('-dpi', dest='dpi', type=float, required=False,
44 |                         default=100,
45 |                         help='dpi for plot output')
46 | 
47 |     parser.add_argument('-plot', dest='show_plot', required=False,
48 |                         default=False, action='store_true',
49 |                         help='Location name, must be in data file')
50 | 
51 |     parser.add_argument('-figsize', dest='figsize', type=float, nargs=2, required=False,
52 |                         default=[9, 4],
53 |                         help='figure size in inches width x height. 9 4 recommended 30 years')
54 | 
55 |     args = parser.parse_args()
56 | 
57 |     ##### Run Plotting function #####
58 |     n = NOAAPlotter(args.infile,
59 |                     location=args.location)
60 | 
61 |     n.plot_monthly_barchart(args.start_date,
62 |                             args.end_date,
63 |                             information=args.type,
64 |                             anomaly=args.anomaly,
65 |                             trailing_mean=args.trailing_mean,
66 |                             show_plot=args.show_plot,
67 |                             dpi=args.dpi,
68 |                             figsize=args.figsize,
69 |                             save_path=args.save_path)
70 | 
71 | if __name__ == "__main__":
72 |     main()
73 | 


--------------------------------------------------------------------------------
/noaaplotter/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__init__.py


--------------------------------------------------------------------------------
/noaaplotter/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/utils/__pycache__/dataset.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/dataset.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/download_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/utils/__pycache__/plot_utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/plot_utils.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/utils/__pycache__/utils.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/initze/noaaplotter/065976916a685302927ed196c3c8735920e6f14d/noaaplotter/utils/__pycache__/utils.cpython-310.pyc


--------------------------------------------------------------------------------
/noaaplotter/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | ########################
  5 | # Credits here
  6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research
  7 | # contact: ingmar.nitze@awi.de
  8 | # version: 2020-12-09
  9 | 
 10 | ########################
 11 | import os
 12 | import time
 13 | 
 14 | import numpy as np
 15 | import polars as pl
 16 | 
 17 | from .utils import *
 18 | 
 19 | NUMERIC_ONLY = True
 20 | 
 21 | 
 22 | class NOAAPlotterDailySummariesDataset(object):
 23 |     """
 24 |     This class/module creates nice plots of observed weather data from NOAA
 25 |     """
 26 | 
 27 |     def __init__(self, input_filepath=None, location=None, remove_feb29=False):
 28 |         self.input_switch = None
 29 |         self.input_filepath = input_filepath
 30 |         self.location = location
 31 |         self.noaa_token = None
 32 |         self.noaa_location = None
 33 |         self.remove_feb29 = remove_feb29
 34 |         self.data = None
 35 |         self._check_data_loading()
 36 |         if self.input_switch == "file":
 37 |             self._load_file()
 38 |         elif self.input_switch == "noaa_api":
 39 |             self._load_noaa()
 40 |         self._validate_location()
 41 |         self._update_datatypes()
 42 |         self._get_datestring()
 43 |         self._get_tmean()
 44 |         self._remove_feb29()
 45 |         self._filter_to_location()
 46 | 
 47 |     def print_locations(self):
 48 |         """
 49 |         Print all locations names
 50 |         """
 51 |         print(self.data["NAME"].unique())
 52 | 
 53 |     def _check_data_loading(self):
 54 |         """
 55 |         function check if all requirements for loading options are met
 56 |         File loading:
 57 |         * input_filepath
 58 |         """
 59 |         if os.path.exists(self.input_filepath):
 60 |             self.input_switch = "file"
 61 |         elif self.noaa_token and self.noaa_location:
 62 |             self.input_switch = "noaa_api"
 63 |         else:
 64 |             raise ImportError(
 65 |                 "Please enter either correct file path or noaa station_id and API token"
 66 |             )
 67 | 
 68 |     def _load_file(self):
 69 |         """
 70 |         load csv file into Pandas DataFrame
 71 |         :return:
 72 |         """
 73 |         data = pl.read_parquet(self.input_filepath).to_pandas()
 74 |         if "__index_level_0__" in data.columns:
 75 |             data = data.drop(columns=["__index_level_0__"])
 76 |         self.data = data
 77 | 
 78 |     def _load_noaa(self):
 79 |         """
 80 |         load data through NOAA API
 81 |         """
 82 |         pass
 83 | 
 84 |     def _save_noaa(self):
 85 |         """
 86 |         save loaded NOAA API data to temporary csv file
 87 |         """
 88 | 
 89 |     def _validate_location(self):
 90 |         """
 91 |         raise error and message if location name cannot be found
 92 |         :return:
 93 |         """
 94 |         if not self.location and len(pd.unique(self.data["NAME"]) == 1):
 95 |             pass
 96 |         elif not self.location and len(pd.unique(self.data["NAME"]) > 1):
 97 |             raise ValueError(
 98 |                 "There is more than one location in the dataset. Please choose a location using the -loc option! "
 99 |                 "Valid Location identifiers: {0} ".format(self.data["NAME"].unique())
100 |             )
101 |         else:
102 |             filt = self.data["NAME"].str.lower().str.contains(self.location.lower())
103 |             if filt.sum() == 0:
104 |                 raise ValueError(
105 |                     "Location Name is not valid! Valid Location identifiers: {0}".format(
106 |                         self.data["NAME"].unique()
107 |                     )
108 |                 )
109 | 
110 |     def _update_datatypes(self):
111 |         """
112 |         define 'DATE' as datetime
113 |         :return:
114 |         """
115 |         self.data["DATE"] = pd.to_datetime(self.data["DATE"])
116 | 
117 |     def _get_datestring(self):
118 |         """
119 |         write specific date formats
120 |         :return:
121 |         """
122 |         self.data["DATE_MD"] = self.data["DATE"].dt.strftime("%m-%d")
123 |         self.data["DATE_YM"] = self.data["DATE"].dt.strftime("%Y-%m")
124 |         self.data["DATE_M"] = self.data["DATE"].dt.strftime("%m")
125 | 
126 |     def _get_tmean(self):
127 |         """
128 |         calculate mean daily temperature from min and max
129 |         :return:
130 |         """
131 |         # TODO: check for cases where TMIN and TMAX are empty (e.g. Schonefeld). There TAVG is the main field
132 |         self.data["TMEAN"] = self.data[["TMIN", "TMAX"]].mean(axis=1)
133 | 
134 |     def _remove_feb29(self):
135 |         """
136 |         Function to remove February 29 from the data
137 |         :return:
138 |         """
139 |         if self.remove_feb29:
140 |             self.data = self.data[self.data["DATE_MD"] != "02-29"]
141 | 
142 |     def _filter_to_location(self):
143 |         """
144 |         Filter dataset to the defined location
145 |         :return:
146 |         """
147 |         if self.location:
148 |             filt = self.data["NAME"].str.lower().str.contains(self.location.lower())
149 |             if len(filt) > 0:
150 |                 self.data = self.data.loc[filt]
151 |             else:
152 |                 raise ValueError("Location Name is not valid")
153 | 
154 |     def filter_to_climate(self, climate_start, climate_end):
155 |         """
156 |         Function to create filtered dataset covering the defined climate normal period
157 |         :return:
158 |         """
159 |         df_clim = self.data[
160 |             (self.data["DATE"] >= climate_start) & (self.data["DATE"] <= climate_end)
161 |         ]
162 |         return df_clim
163 | 
164 |     @staticmethod
165 |     def get_monthly_stats(df):
166 |         """
167 |         calculate monthly statistics
168 |         :param df:
169 |         :type df: pandas.DataFrame
170 |         :return:
171 |         """
172 |         df_out = pd.DataFrame()
173 |         df_out["tmean_doy_mean"] = (
174 |             df[["DATE", "TMEAN"]]
175 |             .groupby(df["DATE_YM"])
176 |             .mean(numeric_only=NUMERIC_ONLY)
177 |             .TMEAN
178 |         )
179 |         df_out["tmean_doy_std"] = (
180 |             df[["DATE", "TMEAN"]]
181 |             .groupby(df["DATE_YM"])
182 |             .std(numeric_only=NUMERIC_ONLY)
183 |             .TMEAN
184 |         )
185 |         df_out["tmax_doy_max"] = (
186 |             df[["DATE", "TMAX"]]
187 |             .groupby(df["DATE_YM"])
188 |             .max(numeric_only=NUMERIC_ONLY)
189 |             .TMAX
190 |         )
191 |         df_out["tmax_doy_std"] = (
192 |             df[["DATE", "TMAX"]]
193 |             .groupby(df["DATE_YM"])
194 |             .std(numeric_only=NUMERIC_ONLY)
195 |             .TMAX
196 |         )
197 |         df_out["tmin_doy_min"] = (
198 |             df[["DATE", "TMIN"]]
199 |             .groupby(df["DATE_YM"])
200 |             .min(numeric_only=NUMERIC_ONLY)
201 |             .TMIN
202 |         )
203 |         df_out["tmin_doy_std"] = (
204 |             df[["DATE", "TMIN"]]
205 |             .groupby(df["DATE_YM"])
206 |             .std(numeric_only=NUMERIC_ONLY)
207 |             .TMIN
208 |         )
209 |         if "SNOW" in df.columns:
210 |             df_out["snow_doy_mean"] = (
211 |                 df[["DATE", "SNOW"]]
212 |                 .groupby(df["DATE_YM"])
213 |                 .mean(numeric_only=NUMERIC_ONLY)
214 |                 .SNOW
215 |             )
216 |         df_out["prcp_sum"] = (
217 |             df[["DATE", "PRCP"]]
218 |             .groupby(df["DATE_YM"])
219 |             .sum(numeric_only=NUMERIC_ONLY)
220 |             .PRCP
221 |         )
222 |         return df_out
223 | 
224 |     @staticmethod
225 |     def get_monthy_climate(df):
226 |         """
227 |         :param df:
228 |         :return:
229 |         """
230 |         df_out = pd.DataFrame()
231 |         df = df.data
232 |         df["Month"] = (
233 |             df.reset_index().apply(lambda x: int(x["DATE_MD"][:2]), axis=1).values
234 |         )
235 |         df_out["tmean_mean"] = (
236 |             df[["Month", "TMEAN"]]
237 |             .groupby(df["Month"])
238 |             .mean(numeric_only=NUMERIC_ONLY)
239 |             .TMEAN
240 |         )
241 |         df_out["tmean_std"] = (
242 |             df[["Month", "TMEAN"]]
243 |             .groupby(df["Month"])
244 |             .std(numeric_only=NUMERIC_ONLY)
245 |             .TMEAN
246 |         )
247 |         df_out["tmax_max"] = (
248 |             df[["Month", "TMAX"]]
249 |             .groupby(df["Month"])
250 |             .max(numeric_only=NUMERIC_ONLY)
251 |             .TMAX
252 |         )
253 |         df_out["tmax_std"] = (
254 |             df[["Month", "TMAX"]]
255 |             .groupby(df["Month"])
256 |             .std(numeric_only=NUMERIC_ONLY)
257 |             .TMAX
258 |         )
259 |         df_out["tmin_min"] = (
260 |             df[["Month", "TMIN"]]
261 |             .groupby(df["Month"])
262 |             .min(numeric_only=NUMERIC_ONLY)
263 |             .TMIN
264 |         )
265 |         df_out["tmin_std"] = (
266 |             df[["Month", "TMIN"]]
267 |             .groupby(df["Month"])
268 |             .std(numeric_only=NUMERIC_ONLY)
269 |             .TMIN
270 |         )
271 |         if "SNOW" in df.columns:
272 |             df_out["snow_mean"] = (
273 |                 df[["Month", "SNOW"]]
274 |                 .groupby(df["Month"])
275 |                 .mean(numeric_only=NUMERIC_ONLY)
276 |                 .SNOW
277 |             )
278 |         unique_years = len(
279 |             np.unique(df.apply(lambda x: parse_dates_YM(x["DATE_YM"]).year, axis=1))
280 |         )
281 |         df_out["prcp_mean"] = (
282 |             df[["Month", "PRCP"]]
283 |             .groupby(df["Month"])
284 |             .mean(numeric_only=NUMERIC_ONLY)
285 |             .PRCP
286 |             * unique_years
287 |         )
288 |         return df_out.reset_index(drop=False)
289 | 
290 | 
291 | class NOAAPlotterDailyClimateDataset(object):
292 |     # TODO: make main class sub subclasses for daily/monthly
293 |     def __init__(
294 |         self,
295 |         daily_dataset,
296 |         start="1981-01-01",
297 |         end="2010-12-31",
298 |         filtersize=7,
299 |         impute_feb29=True,
300 |     ):
301 |         """
302 |         :param start:
303 |         :param end:
304 |         :param filtersize:
305 |         :param impute_feb29:
306 |         """
307 |         self.start = parse_dates(start)
308 |         self.end = parse_dates(end)
309 |         self.filtersize = filtersize
310 |         self.impute_feb29 = impute_feb29
311 |         self.daily_dataset = daily_dataset
312 |         self.data_daily = None
313 |         self.data = None
314 |         self.date_range_valid = False
315 | 
316 |         # validate date range
317 |         self._validate_date_range()
318 |         # filter daily to date range
319 |         self._filter_to_climate()
320 |         # calculate daily statistics
321 |         self._calculate_climate_statistics()
322 |         # mean imputation for 29 February
323 |         self._impute_feb29()
324 |         # filter if desired
325 |         start_time = time.time()
326 |         self._run_filter()
327 |         # self._run_filter_polars()
328 |         end_time = time.time()
329 |         print(f"_run_filter  took {end_time - start_time:.2f} seconds to run.")
330 |         # make completeness report
331 | 
332 |     def _validate_date_range(self):
333 |         if self.daily_dataset.data["DATE"].max() >= self.end:
334 |             if self.daily_dataset.data["DATE"].min() <= self.end:
335 |                 self.date_range_valid = True
336 |         else:
337 |             raise ("Dataset is insufficient to calculate climate normals!")
338 | 
339 |     def _filter_to_climate(self):
340 |         """
341 |         calculate climate dataset
342 |         :return:
343 |         """
344 |         df_clim = self.daily_dataset.data[
345 |             (self.daily_dataset.data["DATE"] >= self.start)
346 |             & (self.daily_dataset.data["DATE"] <= self.end)
347 |         ]
348 |         df_clim = df_clim[(df_clim["DATE_MD"] != "02-29")]
349 |         self.data_daily = df_clim
350 | 
351 |     def _calculate_climate_statistics(self):
352 |         """
353 |         Function to calculate major statistics
354 |         :param self.data_daily:
355 |         :type self.data_daily: pandas.DataFrame
356 |         :return:
357 |         """
358 |         df_out = pd.DataFrame()
359 |         df_out["tmean_doy_mean"] = (
360 |             self.data_daily[["DATE", "TMEAN"]]
361 |             .groupby(self.data_daily["DATE_MD"])
362 |             .mean(numeric_only=NUMERIC_ONLY)
363 |             .TMEAN
364 |         )
365 |         df_out["tmean_doy_std"] = (
366 |             self.data_daily[["DATE", "TMEAN"]]
367 |             .groupby(self.data_daily["DATE_MD"])
368 |             .std()
369 |             .TMEAN
370 |         )
371 |         df_out["tmean_doy_max"] = (
372 |             self.data_daily[["DATE", "TMEAN"]]
373 |             .groupby(self.data_daily["DATE_MD"])
374 |             .max(numeric_only=NUMERIC_ONLY)
375 |             .TMEAN
376 |         )
377 |         df_out["tmean_doy_min"] = (
378 |             self.data_daily[["DATE", "TMEAN"]]
379 |             .groupby(self.data_daily["DATE_MD"])
380 |             .min(numeric_only=NUMERIC_ONLY)
381 |             .TMEAN
382 |         )
383 |         df_out["tmax_doy_max"] = (
384 |             self.data_daily[["DATE", "TMAX"]]
385 |             .groupby(self.data_daily["DATE_MD"])
386 |             .max(numeric_only=NUMERIC_ONLY)
387 |             .TMAX
388 |         )
389 |         df_out["tmax_doy_std"] = (
390 |             self.data_daily[["DATE", "TMAX"]]
391 |             .groupby(self.data_daily["DATE_MD"])
392 |             .std()
393 |             .TMAX
394 |         )
395 |         df_out["tmin_doy_min"] = (
396 |             self.data_daily[["DATE", "TMIN"]]
397 |             .groupby(self.data_daily["DATE_MD"])
398 |             .min(numeric_only=NUMERIC_ONLY)
399 |             .TMIN
400 |         )
401 |         df_out["tmin_doy_std"] = (
402 |             self.data_daily[["DATE", "TMIN"]]
403 |             .groupby(self.data_daily["DATE_MD"])
404 |             .std()
405 |             .TMIN
406 |         )
407 |         if "SNOW" in self.data_daily.columns:
408 |             df_out["snow_doy_mean"] = (
409 |                 self.data_daily[["DATE", "SNOW"]]
410 |                 .groupby(self.data_daily["DATE_MD"])
411 |                 .mean(numeric_only=NUMERIC_ONLY)
412 |                 .SNOW
413 |             )
414 |         self.data = df_out
415 | 
416 |     def _impute_feb29(self):
417 |         """
418 |         Function for mean imputation of February 29.
419 |         :return:
420 |         """
421 |         if self.impute_feb29:
422 |             self.data.loc["02-29"] = self.data.loc["02-28":"03-01"].mean(axis=0)
423 |             self.data.sort_index(inplace=True)
424 | 
425 |     def _run_filter(self):
426 |         """
427 |         Function to run rolling mean filter on climate series to smooth out short fluctuations
428 |         :return:
429 |         """
430 |         if self.filtersize % 2 != 0:
431 |             data_roll = (
432 |                 pd.concat(
433 |                     [
434 |                         self.data.iloc[-self.filtersize :],
435 |                         self.data,
436 |                         self.data[: self.filtersize],
437 |                     ]
438 |                 )
439 |                 .rolling(self.filtersize)
440 |                 .mean()
441 |             )
442 |             self.data = data_roll[self.filtersize : -self.filtersize]
443 | 
444 |     # TODO: produces different reults than pandas version
445 |     def _run_filter_polars(self):
446 |         """
447 |         Function to run rolling mean filter on climate series to smooth out short fluctuations using Polars
448 |         """
449 |         if self.filtersize % 2 != 0:
450 |             # Convert pandas DataFrame to Polars DataFrame
451 |             idx = self.data.index
452 |             df = pl.from_pandas(self.data)
453 | 
454 |             # Prepare data for rolling operation
455 |             extended_df = pl.concat(
456 |                 [df.tail(self.filtersize), df, df.head(self.filtersize)]
457 |             )
458 | 
459 |             # Apply rolling mean
460 |             rolled = extended_df.select(
461 |                 [pl.all().rolling_mean(window_size=self.filtersize)]
462 |             )
463 | 
464 |             # Slice the result to match original data size
465 |             result = rolled.slice(self.filtersize, len(df)).to_pandas()
466 |             result.index = idx
467 | 
468 |             # Convert back to pandas DataFrame and update self.data
469 |             self.data = result
470 |         else:
471 |             raise ValueError("Filter size must be odd")
472 | 
473 |     def _make_report(self):
474 |         """
475 |         Function to create report on climate data completeness
476 |         :return:
477 |         """
478 |         # input climate series (e.g. 1981-01-01 - 2010-12-31)
479 |         pass
480 | 
481 | 
482 | class NOAAPlotterMonthlyClimateDataset(object):
483 |     def __init__(
484 |         self, daily_dataset, start="1981-01-01", end="2010-12-31", impute_feb29=True
485 |     ):
486 |         self.daily_dataset = daily_dataset
487 |         self.monthly_aggregate = None
488 |         self.start = parse_dates(start)
489 |         self.end = parse_dates(end)
490 |         self.impute_feb29 = impute_feb29
491 |         self._validate_date_range()
492 | 
493 |     def _validate_date_range(self):
494 |         if self.daily_dataset.data["DATE"].max() >= self.end:
495 |             if self.daily_dataset.data["DATE"].min() <= self.end:
496 |                 self.date_range_valid = True
497 |         else:
498 |             raise ("Dataset is insufficient to calculate climate normals!")
499 | 
500 |     def _filter_to_climate(self):
501 |         """
502 |         calculate climate dataset
503 |         :return:
504 |         """
505 |         df_clim = self.daily_dataset.data[
506 |             (self.daily_dataset.data["DATE"] >= self.start)
507 |             & (self.daily_dataset.data["DATE"] <= self.end)
508 |         ]
509 |         df_clim = df_clim[(df_clim["DATE_MD"] != "02-29")]
510 |         self.data_daily = df_clim
511 | 
512 |     def filter_to_date(self):
513 |         """
514 |         calculate climate dataset
515 |         :return:
516 |         """
517 |         df_clim = self.daily_dataset.data[
518 |             (self.daily_dataset.data["DATE"] >= self.start)
519 |             & (self.daily_dataset.data["DATE"] <= self.end)
520 |         ]
521 |         df_clim = df_clim[(df_clim["DATE_MD"] != "02-29")]
522 |         return df_clim
523 | 
524 |     def _impute_feb29(self):
525 |         """
526 |         Function for mean imputation of February 29.
527 |         :return:
528 |         """
529 |         pass
530 | 
531 |     def calculate_monthly_statistics(self):
532 |         """
533 |         Function to calculate monthly statistics.
534 |         :return:
535 |         """
536 | 
537 |         df_out = pd.DataFrame()
538 |         data_filtered = self.filter_to_date()
539 |         df_out["tmean_doy_mean"] = (
540 |             data_filtered[["TMEAN"]]
541 |             .groupby(data_filtered["DATE_YM"])
542 |             .agg(lambda x: x.mean() if x.notna().any() else np.nan)
543 |             .TMEAN
544 |         )
545 |         df_out["tmean_doy_std"] = (
546 |             data_filtered[["TMEAN"]]
547 |             .groupby(data_filtered["DATE_YM"])
548 |             .agg(lambda x: x.std() if x.notna().any() else np.nan)
549 |             .TMEAN
550 |         )
551 |         df_out["tmax_doy_max"] = (
552 |             data_filtered[["TMAX"]]
553 |             .groupby(data_filtered["DATE_YM"])
554 |             .agg(lambda x: x.max() if x.notna().any() else np.nan)
555 |             .TMAX
556 |         )
557 |         df_out["tmax_doy_std"] = (
558 |             data_filtered[["TMAX"]]
559 |             .groupby(data_filtered["DATE_YM"])
560 |             .agg(lambda x: x.std() if x.notna().any() else np.nan)
561 |             .TMAX
562 |         )
563 |         df_out["tmin_doy_min"] = (
564 |             data_filtered[["TMIN"]]
565 |             .groupby(data_filtered["DATE_YM"])
566 |             .agg(lambda x: x.min() if x.notna().any() else np.nan)
567 |             .TMIN
568 |         )
569 |         df_out["tmin_doy_std"] = (
570 |             data_filtered[["TMIN"]]
571 |             .groupby(data_filtered["DATE_YM"])
572 |             .agg(lambda x: x.std() if x.notna().any() else np.nan)
573 |             .TMIN
574 |         )
575 |         if "SNOW" in data_filtered.columns:
576 |             df_out["snow_doy_mean"] = (
577 |                 data_filtered[["SNOW"]]
578 |                 .groupby(data_filtered["DATE_YM"])
579 |                 .agg(lambda x: x.mean() if x.notna().any() else np.nan)
580 |                 .SNOW
581 |             )
582 |         df_out["prcp_sum"] = (
583 |             data_filtered[["PRCP"]]
584 |             .groupby(data_filtered["DATE_YM"])
585 |             .agg(lambda x: x.sum() if x.notna().any() else np.nan)
586 |             .PRCP
587 |         )
588 |         self.monthly_aggregate = df_out
589 | 
590 |     def calculate_monthly_climate(self):
591 |         """
592 |         Function to calculate monthly climate statistics.
593 |         :return:
594 |         """
595 |         df_out = pd.DataFrame()
596 |         data_filtered = self.filter_to_date()
597 | 
598 |         data_filtered["DATE"] = data_filtered.apply(
599 |             lambda x: parse_dates_YM(x["DATE_YM"]), axis=1
600 |         )
601 |         data_filtered["Month"] = data_filtered.apply(
602 |             lambda x: parse_dates_YM(x["DATE_YM"]).month, axis=1
603 |         )
604 |         data_filtered["Year"] = data_filtered.apply(
605 |             lambda x: parse_dates_YM(x["DATE_YM"]).year, axis=1
606 |         )
607 | 
608 |         df_out["tmean_doy_mean"] = (
609 |             data_filtered[["DATE", "TMEAN"]]
610 |             .groupby(data_filtered["Month"])
611 |             .mean(numeric_only=NUMERIC_ONLY)
612 |             .TMEAN
613 |         )
614 |         df_out["tmean_doy_std"] = (
615 |             data_filtered[["DATE", "TMEAN"]]
616 |             .groupby(data_filtered["Month"])
617 |             .std(numeric_only=NUMERIC_ONLY)
618 |             .TMEAN
619 |         )
620 |         df_out["tmax_doy_max"] = (
621 |             data_filtered[["DATE", "TMAX"]]
622 |             .groupby(data_filtered["Month"])
623 |             .max(numeric_only=NUMERIC_ONLY)
624 |             .TMAX
625 |         )
626 |         df_out["tmax_doy_std"] = (
627 |             data_filtered[["DATE", "TMAX"]]
628 |             .groupby(data_filtered["Month"])
629 |             .std(numeric_only=NUMERIC_ONLY)
630 |             .TMAX
631 |         )
632 |         df_out["tmin_doy_min"] = (
633 |             data_filtered[["DATE", "TMIN"]]
634 |             .groupby(data_filtered["Month"])
635 |             .min(numeric_only=NUMERIC_ONLY)
636 |             .TMIN
637 |         )
638 |         df_out["tmin_doy_std"] = (
639 |             data_filtered[["DATE", "TMIN"]]
640 |             .groupby(data_filtered["Month"])
641 |             .std(numeric_only=NUMERIC_ONLY)
642 |             .TMIN
643 |         )
644 |         if "SNOW" in data_filtered.columns:
645 |             df_out["snow_doy_mean"] = (
646 |                 data_filtered[["DATE", "SNOW"]]
647 |                 .groupby(data_filtered["Month"])
648 |                 .mean(numeric_only=NUMERIC_ONLY)
649 |                 .SNOW
650 |             )
651 |         df_out["prcp_sum"] = (
652 |             data_filtered[["DATE", "PRCP"]]
653 |             .groupby(data_filtered["Month"])
654 |             .mean(numeric_only=NUMERIC_ONLY)
655 |             .PRCP
656 |             * 30
657 |         )
658 |         # df_out = df_out.set_index('DATE_YM', drop=False)
659 |         self.monthly_climate = df_out
660 | 
661 |     def _make_report(self):
662 |         """
663 |         Function to create report on climate data completeness
664 |         :return:
665 |         """
666 |         # input climate series (e.g. 1981-01-01 - 2010-12-31)
667 | 
668 |         pass
669 | 


--------------------------------------------------------------------------------
/noaaplotter/utils/download_utils.py:
--------------------------------------------------------------------------------
  1 | import datetime as dt
  2 | import json
  3 | import os
  4 | from datetime import datetime, timedelta
  5 | 
  6 | import ee
  7 | import geemap
  8 | import numpy as np
  9 | import pandas as pd
 10 | import polars as pl
 11 | import requests
 12 | import tqdm
 13 | from joblib import Parallel, delayed
 14 | 
 15 | from noaaplotter.utils.utils import assign_numeric_datatypes
 16 | 
 17 | 
 18 | # move some logic outside
 19 | def download_from_noaa(
 20 |     output_file,
 21 |     start_date,
 22 |     end_date,
 23 |     datatypes,
 24 |     loc_name,
 25 |     station_id,
 26 |     noaa_api_token,
 27 |     n_jobs=4,
 28 | ):
 29 |     # Check if file exists and load it
 30 |     if os.path.exists(output_file):
 31 |         existing_df = pl.read_parquet(output_file).drop_nulls(subset='STATION')
 32 |         existing_dates = set(existing_df['DATE'].to_list())
 33 |     else:
 34 |         existing_df = None
 35 |         existing_dates = set()
 36 | 
 37 |     # Convert datestrings to datetime
 38 |     dt_start = datetime.strptime(start_date, "%Y-%m-%d")
 39 |     dt_end = datetime.strptime(end_date, "%Y-%m-%d")
 40 | 
 41 |     # Calculate date range
 42 |     all_dates = set(pd.date_range(start=dt_start, end=dt_end).strftime("%Y-%m-%d"))
 43 |     missing_dates = sorted(list(all_dates - existing_dates))
 44 | 
 45 |     if not missing_dates:
 46 |         print("No new data to download.")
 47 |         return 0
 48 | 
 49 |     # Find contiguous date ranges to download
 50 |     date_ranges = []
 51 |     range_start = missing_dates[0]
 52 |     prev_date = datetime.strptime(missing_dates[0], "%Y-%m-%d")
 53 | 
 54 |     for date_str in missing_dates[1:] + [None]:  # Add None to handle the last range
 55 |         if date_str is None or datetime.strptime(date_str, "%Y-%m-%d") - prev_date > timedelta(days=1):
 56 |             date_ranges.append((range_start, prev_date.strftime("%Y-%m-%d")))
 57 |             if date_str is not None:
 58 |                 range_start = date_str
 59 |         prev_date = datetime.strptime(date_str, "%Y-%m-%d") if date_str else None
 60 | 
 61 |     # Data Loading
 62 |     print("Downloading missing data through NOAA API")
 63 |     all_new_data = []
 64 | 
 65 |     for start, end in date_ranges:
 66 |         print(f"Downloading data from {start} to {end}")
 67 |         n_days = (datetime.strptime(end, "%Y-%m-%d") - datetime.strptime(start, "%Y-%m-%d")).days + 1
 68 |         split_size = np.floor(1000 / len(datatypes))
 69 |         split_range = np.arange(0, n_days, split_size)
 70 | 
 71 |         datasets_list = Parallel(n_jobs=n_jobs)(
 72 |             delayed(dl_noaa_api)(
 73 |                 i, datatypes, station_id, noaa_api_token, start, end, split_size
 74 |             )
 75 |             for i in tqdm.tqdm(split_range[:])
 76 |         )
 77 | 
 78 |         # Drop empty/None from datasets_list
 79 |         datasets_list = [i for i in datasets_list if i is not None]
 80 |         all_new_data.extend(datasets_list)
 81 | 
 82 |     # Merge subsets and create DataFrame
 83 |     df = pd.concat(all_new_data)
 84 | 
 85 |     df_pivot = assign_numeric_datatypes(df)
 86 |     df_pivot["DATE"] = df_pivot.apply(
 87 |         lambda x: datetime.fromisoformat(x["DATE"]).strftime("%Y-%m-%d"), axis=1
 88 |     )
 89 | 
 90 |     df_pivot = df_pivot.reset_index(drop=False)
 91 |     dr = pd.DataFrame(pd.date_range(start=start_date, end=end_date), columns=["DATE"])
 92 |     dr["DATE"] = dr["DATE"].astype(str)
 93 |     df_merged = pd.concat(
 94 |         [df_pivot.set_index("DATE"), dr.set_index("DATE")],
 95 |         join="outer",
 96 |         axis=1,
 97 |         sort=True,
 98 |     )
 99 |     df_merged["DATE"] = df_merged.index
100 |     df_merged["NAME"] = loc_name
101 |     if "TAVG" not in df_merged.columns:
102 |         df_merged["TAVG"] = None
103 |     if "SNWD" not in df_merged.columns:
104 |         df_merged["SNWD"] = None
105 |     final_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"]
106 |     df_final = df_merged[final_cols]
107 |     df_final = df_final.replace({np.nan: None})
108 | 
109 |     # Merge with existing data if it exists
110 |     if existing_df is not None:
111 |         df_final = pd.concat([existing_df.to_pandas(), df_final]).drop_duplicates(subset=["DATE"], keep="last")
112 | 
113 |     print(f"Saving data to {output_file}")
114 |     df_final.to_parquet(output_file)
115 |     return 0
116 | 
117 | 
118 | 
119 | def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size):
120 |     """
121 |     function to download from NOAA API
122 |     """
123 |     dt_start = dt.datetime.strptime(date_start, "%Y-%m-%d")
124 |     dt_end = dt.datetime.strptime(date_end, "%Y-%m-%d")
125 | 
126 |     split_start = dt_start + timedelta(days=i)
127 |     split_end = dt_start + timedelta(days=i + split_size - 1)
128 |     if split_end > dt_end:
129 |         split_end = dt_end
130 | 
131 |     date_start_split = split_start.strftime("%Y-%m-%d")
132 |     date_end_split = split_end.strftime("%Y-%m-%d")
133 | 
134 |     # make the api call
135 |     request_url = "https://www.ncei.noaa.gov/access/services/data/v1"
136 |     request_params = dict(
137 |         dataset="daily-summaries",
138 |         dataTypes=dtypes,  # ['PRCP', 'TMIN', 'TMAX'],
139 |         stations=station_id,
140 |         limit=1000,
141 |         startDate=date_start_split,
142 |         endDate=date_end_split,
143 |         units="metric",
144 |         format="json",
145 |     )
146 |     r = requests.get(request_url, params=request_params, headers={"token": Token})
147 | 
148 |     # workaround to skip empty returns (no data within period)
149 |     try:
150 |         # load the api response as a json
151 |         d = json.loads(r.text)
152 |         result = pd.DataFrame(d)
153 |     except json.JSONDecodeError:
154 |         print(
155 |             f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping."
156 |         )
157 |         result = None
158 |     return result
159 | 
160 | 
161 | def download_era5_from_gee(latitude, longitude, end_date, start_date, output_file):
162 |     ee.Initialize()
163 |     EE_LAYER = "ECMWF/ERA5/DAILY"
164 |     location = ee.Geometry.Point([longitude, latitude])
165 |     # load ImageCollection
166 |     col = (
167 |         ee.ImageCollection(EE_LAYER)
168 |         .filterBounds(location)
169 |         .filterDate(start_date, end_date)
170 |     )
171 |     # Download data
172 |     print("Start downloading daily ERA5 data.")
173 |     print(
174 |         "Download may take a while.\n1yr: ~5 seconds\n10yrs: ~35 seconds\n50yrs: ~8 min"
175 |     )
176 |     result = geemap.extract_pixel_values(col, region=location)
177 |     out_dict = result.getInfo()
178 |     df_gee = pd.DataFrame(data=[out_dict.keys(), out_dict.values()]).T
179 |     # parse dates and values
180 |     df_gee["time"] = df_gee[0].apply(lambda x: f"{x[:4]}-{x[4:6]}-{x[6:8]}")
181 |     df_gee["feature"] = df_gee[0].apply(lambda x: x[9:])
182 |     df_gee["value"] = df_gee[1]
183 |     df = df_gee.pivot_table(
184 |         values="value", columns=["feature"], index="time"
185 |     )  # .reset_index(drop=False)
186 |     # #### recalculate values
187 |     df_new = pd.DataFrame(index=df.index)
188 |     temperature_cols = [
189 |         "mean_2m_air_temperature",
190 |         "minimum_2m_air_temperature",
191 |         "maximum_2m_air_temperature",
192 |         "dewpoint_2m_temperature",
193 |     ]
194 |     precipitation_cols = ["total_precipitation"]
195 |     df_joined = (
196 |         df_new.join(df[temperature_cols] - 273.15)
197 |         .join(df[precipitation_cols] * 1e3)
198 |         .reset_index(drop=False)
199 |     )
200 |     # Create Output
201 |     rename_dict = {
202 |         "time": "DATE",
203 |         "total_precipitation": "PRCP",
204 |         "mean_2m_air_temperature": "TAVG",
205 |         "maximum_2m_air_temperature": "TMAX",
206 |         "minimum_2m_air_temperature": "TMIN",
207 |     }
208 |     df_renamed = df_joined.rename(columns=rename_dict)
209 |     df_renamed["NAME"] = ""
210 |     df_renamed["STATION"] = ""
211 |     df_renamed["SNWD"] = ""
212 |     output_cols = ["STATION", "NAME", "DATE", "PRCP", "SNWD", "TAVG", "TMAX", "TMIN"]
213 |     df_save = df_renamed[output_cols].astype(str)
214 |     df_save.to_csv(output_file, index=False)
215 | 


--------------------------------------------------------------------------------
/noaaplotter/utils/plot_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | ########################
 5 | # Credits here
 6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research
 7 | # contact: ingmar.nitze@awi.de
 8 | # version: 2021-09-11
 9 | 
10 | ########################
11 | 
12 | # TODO: move to external file
13 | def setup_monthly_plot_props(information, anomaly):
14 |     plot_kwargs = {}
15 |     if information.lower() == 'temperature':
16 |         plot_kwargs['cmap'] = 'RdBu_r'
17 |         plot_kwargs['fc_low'] = '#4393c3'
18 |         plot_kwargs['fc_high'] = '#d6604d'
19 |         if anomaly:
20 |             plot_kwargs['value_column'] = 'tmean_diff'
21 |             plot_kwargs['y_label'] = 'Temperature departure [°C]'
22 |             plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)'
23 |             plot_kwargs['legend_label_above'] = 'Above average'
24 |             plot_kwargs['legend_label_below'] = 'Below average'
25 |         else:
26 |             plot_kwargs['value_column'] = 'tmean_doy_mean'
27 |             plot_kwargs['y_label'] = 'Temperature [°C]'
28 |             plot_kwargs['title'] = 'Monthly Mean Temperature'
29 |             plot_kwargs['legend_label_above'] = 'Above freezing'
30 |             plot_kwargs['legend_label_below'] = 'Below freezing'
31 | 
32 |     elif information.lower() == 'precipitation':
33 |         plot_kwargs['fc_low'] = '#d6604d'
34 |         plot_kwargs['fc_high'] = '#4393c3'
35 |         if anomaly:
36 |             plot_kwargs['cmap'] = 'RdBu'
37 |             plot_kwargs['value_column'] = 'prcp_diff'
38 |             plot_kwargs['y_label'] = 'Precipitation departure [mm]'
39 |             plot_kwargs['title'] = 'Monthly departure from climatological mean (1981-2010)'
40 |             plot_kwargs['legend_label_above'] = 'Above average'
41 |             plot_kwargs['legend_label_below'] = 'Below average'
42 |         else:
43 |             plot_kwargs['cmap'] = 'Blues'
44 |             plot_kwargs['value_column'] = 'prcp_sum'
45 |             plot_kwargs['y_label'] = 'Precipitation [mm]'
46 |             plot_kwargs['title'] = 'Monthly Precipitation'
47 |             plot_kwargs['legend_label_below'] = ''
48 |             plot_kwargs['legend_label_above'] = 'Monthly Precipitation'
49 |     return plot_kwargs


--------------------------------------------------------------------------------
/noaaplotter/utils/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | ########################
  5 | # Credits here
  6 | # author: Ingmar Nitze, Alfred Wegener Institute for Polar and Marine Research
  7 | # contact: ingmar.nitze@awi.de
  8 | # version: 2020-12-09
  9 | 
 10 | ########################
 11 | import datetime as dt
 12 | from datetime import timedelta
 13 | import requests, json
 14 | import pandas as pd
 15 | 
 16 | 
 17 | #import datetime
 18 | 
 19 | 
 20 | def parse_dates(date):
 21 |     """
 22 | 
 23 |     :param date:
 24 |     :return:
 25 |     """
 26 |     if isinstance(date, str):
 27 |         return dt.datetime.strptime(date, '%Y-%m-%d')
 28 |     elif isinstance(date, dt.datetime) or isinstance(date, dt.date):
 29 |         return date
 30 |     else:
 31 |         raise ('Wrong date format. Either use native datetime format or "YYYY-mm-dd"')
 32 | 
 33 | 
 34 | def calc_trailing_mean(df, length, feature, new_feature):
 35 |     """
 36 |     :param df:
 37 |     :param length:
 38 |     :param feature:
 39 |     :param new_feature:
 40 |     :return:
 41 | 
 42 |     """
 43 |     df[new_feature] = df[feature].rolling(length).mean()
 44 |     return df
 45 | 
 46 | 
 47 | def parse_dates_YM(date):
 48 |     """
 49 |     :param date:
 50 |     :return:
 51 |     """
 52 |     if isinstance(date, str):
 53 |         return dt.datetime.strptime(date, '%Y-%m')
 54 |     elif isinstance(date, dt.datetime):
 55 |         return date
 56 |     else:
 57 |         raise('Wrong date format. Either use native datetime format or "YYYY-mm-dd"')
 58 | 
 59 | 
 60 | def dl_noaa_api(i, dtypes, station_id, Token, date_start, date_end, split_size):
 61 |     """
 62 |     function to download from NOAA API
 63 |     """
 64 |     dt_start = dt.datetime.strptime(date_start, '%Y-%m-%d')
 65 |     dt_end = dt.datetime.strptime(date_end, '%Y-%m-%d')
 66 | 
 67 |     split_start = dt_start + timedelta(days=i)
 68 |     split_end = dt_start + timedelta(days=i + split_size - 1)
 69 |     if split_end > dt_end:
 70 |         split_end = dt_end
 71 | 
 72 |     date_start_split = split_start.strftime('%Y-%m-%d')
 73 |     date_end_split = split_end.strftime('%Y-%m-%d')
 74 | 
 75 |     # make the api call
 76 |     request_url = 'https://www.ncei.noaa.gov/access/services/data/v1'
 77 |     request_params = dict(
 78 |         dataset = 'daily-summaries',
 79 |         dataTypes = dtypes,#['PRCP', 'TMIN', 'TMAX'],
 80 |         stations = station_id,
 81 |         limit = 1000,
 82 |         startDate = date_start_split,
 83 |         endDate= date_end_split,
 84 |         units='metric',
 85 |         format='json'
 86 |     )
 87 |     r = requests.get(
 88 |         request_url,
 89 |         params=request_params,
 90 |         headers={'token': Token})
 91 | 
 92 |     # workaround to skip empty returns (no data within period)
 93 |     try:
 94 |             # load the api response as a json
 95 |         d = json.loads(r.text)
 96 |         result = pd.DataFrame(d)
 97 |     except json.JSONDecodeError:
 98 |         print(f"Warning: No data available for period {date_start_split} to {date_end_split}. Skipping.")
 99 |         result = None
100 |     return result
101 | 
102 | 
103 | def assign_numeric_datatypes(df):
104 |     for col in df.columns:
105 |         if df[col].dtype == 'object':
106 |             try:
107 |                 df[col] = pd.to_numeric(df[col])
108 |             except:
109 |                 pass
110 |     return df


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>0.70", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "noaaplotter"
 7 | version = "0.5.4"
 8 | description = "Package to plot fancy climate/weather data of NOAA"
 9 | requires-python = ">=3.11"
10 | authors = [
11 |     { name = "Ingmar Nitze", email = "ingmar.nitze@awi.de" }
12 | ]
13 | license = { text = "" }
14 | readme = "README.md"  # Specify a README file if available
15 | # homepage = "https://github.com/initze/noaaplotter"
16 | keywords = ["climate", "weather", "NOAA", "plotting"]
17 | 
18 | dependencies = [
19 |     "pandas>=2.2",
20 |     "numpy>=2,<3",
21 |     "matplotlib>=3.9",
22 |     "requests",
23 |     "joblib>=1.4",
24 |     "tqdm>=4.67",
25 |     "geemap>=0.35.1",
26 |     "polars>=1.18.0",
27 |     "pyarrow>=18.1.0",
28 |     "box>=0.1.5",
29 |     "setuptools>=75.6.0",
30 |     "narwhals>=1.20.1",
31 | ]
32 | 
33 | [tool.setuptools.packages.find]
34 | include = ["noaaplotter*"]
35 | 
36 | [project.scripts]
37 | plot_daily = "noaaplotter.scripts.plot_daily:main"  # Adjust if necessary
38 | plot_monthly = "noaaplotter.scripts.plot_monthly:main"  # Adjust if necessary
39 | download_data = "noaaplotter.scripts.download_data:main"  # Adjust if necessary
40 | download_data_ERA5 = "noaaplotter.scripts.download_data_ERA5:main"  # Adjust if necessary
41 | 


--------------------------------------------------------------------------------