├── .github
    └── workflows
    │   └── python-app.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── configs
    ├── callbacks
    │   ├── default.yaml
    │   ├── none.yaml
    │   └── wandb.yaml
    ├── config.yaml
    ├── datamodule
    │   ├── netcdf_datamodule.yaml
    │   └── netcdf_datamodule_gcp.yaml
    ├── experiment
    │   ├── baseline.yaml
    │   ├── conv3d.yaml
    │   ├── conv3d_nwp.yaml
    │   ├── conv3d_sat_nwp.yaml
    │   ├── example_simple.yaml
    │   ├── perceiver.yaml
    │   ├── perceiver_conv3d_sat_nwp.yaml
    │   └── perceiver_sat_nwp.yaml
    ├── hparams_search
    │   └── conv3d_optuna.yaml
    ├── hydra
    │   └── default.yaml
    ├── logger
    │   ├── comet.yaml
    │   ├── csv.yaml
    │   ├── many_loggers.yaml
    │   ├── mlflow.yaml
    │   ├── neptune.yaml
    │   ├── tensorboard.yaml
    │   └── wandb.yaml
    ├── model
    │   ├── baseline.yaml
    │   ├── conv3d.yaml
    │   ├── conv3d_nwp.yaml
    │   ├── conv3d_sat_nwp.yaml
    │   ├── perceiver.yaml
    │   ├── perceiver_conv3d_sat_nwp.yaml
    │   └── perceiver_sat_nwp.yaml
    ├── readme.md
    └── trainer
    │   ├── all_params.yaml
    │   └── default.yaml
├── environment.yml
├── experiments
    ├── 001_CNN_concat_all_timesteps_as_channels.py
    ├── 002_cnn_processes_single_sat_image_then_rnn.py
    ├── 003_perceiver_processes_single_sat_image_then_rnn.py
    ├── 2021-08
    │   ├── 2021-08-17
    │   │   └── run_cnn3d.py
    │   ├── 2021-08-18
    │   │   ├── Run baseline model.ipynb
    │   │   ├── run_baseline.py
    │   │   └── run_cnn3d.py
    │   ├── 2021-08-24
    │   │   ├── run_cnn3d.py
    │   │   └── run_cnn3d_n_layers.py
    │   ├── 2021-08-26
    │   │   └── run_cnn3d_n_layers.py
    │   ├── 2021-08-27
    │   │   ├── experiments.md
    │   │   └── run_baseline.py
    │   └── 2021-08-31
    │   │   ├── conv3d.py
    │   │   └── experiments.txt
    ├── 2021-09
    │   ├── 2021-09-03
    │   │   ├── conv3d.py
    │   │   └── experiments.txt
    │   ├── 2021-09-24
    │   │   └── experiments.txt
    │   ├── 2021-09-27
    │   │   └── experiments.txt
    │   └── 2021-09-28
    │   │   └── experiments.txt
    ├── 2021-10
    │   └── 2021-10-01
    │   │   └── experiment.txt
    ├── 2021-11
    │   ├── 2021-11-22.txt
    │   └── 2021-11-25.txt
    └── plot_003.ipynb
├── notebooks
    ├── 03_simplify_data_loading_remove_gpu_super_batch.ipynb
    ├── 04_simplify_data_loading_multiple_cpu_batches.ipynb
    ├── 05_more_image_inputs.ipynb
    ├── 06_skip_connections.ipynb
    ├── 07_multiple_historical_images.ipynb
    ├── 08_multiple_historical_images_as_separate_channels.ipynb
    ├── 09_horizon_represented_as_a_stripe.ipynb
    ├── 10_just_conv.ipynb
    ├── 11_just_conv_and_conv_over_time.ipynb
    ├── 12_just_3d_conv.ipynb
    ├── 13_3d_conv_with_optical_flow_predictions.ipynb
    ├── 14_back_to_2d_conv_AE.ipynb
    ├── 15_int16.ipynb
    ├── 16_maxpool.ipynb
    ├── 20.0_simplify_data_loading.ipynb
    ├── 20.1_simplify_data_loading.ipynb
    ├── 21.0_include_PV_data.ipynb
    ├── 22.0_refactor_data_loading_to_quickly_load_NWP.ipynb
    ├── 23.0_dask_client.ipynb
    ├── 24.0_dask_client_in_separate_worker_process.ipynb
    ├── 25.0_dask_client_in_separate_manually_defined_process.ipynb
    ├── 26.0_dask_client_in_separate_manually_defined_process_get_in_separate_thread.ipynb
    ├── 27.0_dask_client_in_separate_manually_defined_process_get_in_separate_thread_multiple_writer_processes.ipynb
    ├── 28.0_manual_processes.ipynb
    ├── dask_experiments.ipynb
    ├── debug_gcsfs_multiprocessing_issue.ipynb
    ├── lightning_experiment_with_loading_data_into_GPU.ipynb
    ├── lightning_experiment_with_loading_data_into_GPU_v2.ipynb
    ├── mean_and_std_of_satellite_imagery.ipynb
    ├── optical_flow_1.ipynb
    ├── sat_data_loader_1_multiple_chunks_on_gpu.ipynb
    └── sat_data_loader_2_lightning_1_chunk_on_gpu.ipynb
├── predict_pv_yield
    ├── __init__.py
    ├── data
    │   └── dataloader.py
    ├── models
    │   ├── __init__.py
    │   ├── base_model.py
    │   ├── baseline
    │   │   ├── last_value.py
    │   │   └── readme.md
    │   ├── conv3d
    │   │   ├── architect.png
    │   │   ├── conv3d_sat_nwp.png
    │   │   ├── model.py
    │   │   ├── model_nwp.py
    │   │   ├── model_sat_nwp.py
    │   │   └── readme.md
    │   ├── layers
    │   │   └── __init__.py
    │   └── perceiver
    │   │   ├── perceiver.py
    │   │   ├── perceiver_conv3d_nwp_sat.py
    │   │   └── perceiver_nwp_sat.py
    ├── netcdf_dataset.py
    ├── training.py
    └── utils.py
├── requirements.txt
├── run.py
├── setup.py
├── tests
    ├── __init__.py
    ├── configs
    │   ├── dataset
    │   │   └── configuration.yaml
    │   ├── experiment
    │   │   └── example_simple.yaml
    │   └── model
    │   │   ├── conv3d.yaml
    │   │   ├── conv3d_gsp.yaml
    │   │   ├── conv3d_nwp.yaml
    │   │   └── conv3d_sat_nwp.yaml
    ├── conftest.py
    ├── models
    │   ├── baseline
    │   │   ├── test_baseline_model.py
    │   │   └── test_baseline_model_gsp.py
    │   ├── conv3d
    │   │   ├── test_conv3d_model.py
    │   │   ├── test_conv3d_model_gsp.py
    │   │   ├── test_conv3d_model_nwp.py
    │   │   └── test_conv3d_model_sat_nwp.py
    │   └── perceiver
    │   │   ├── test_perceiver.py
    │   │   ├── test_perceiver_conv3d_sat_nwp.py
    │   │   ├── test_perceiver_gsp.py
    │   │   └── test_perceiver_sat_nwp.py
    ├── test_training.py
    └── test_utils.py
└── weights
    └── conv3d
        └── readme.md


/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python application
 5 | 
 6 | on: [push, pull_request]
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - uses: actions/checkout@v2
15 |     - name: Set up Python 3.9
16 |       uses: actions/setup-python@v2
17 |       with:
18 |         python-version: 3.9
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install flake8 pytest
23 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
24 |         pip install -e .
25 |     - name: Lint with flake8
26 |       run: |
27 |         # stop the build if there are Python syntax errors or undefined names
28 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
29 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
30 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
31 |     - name: Test with pytest
32 |       run: |
33 |         pytest -s --cov=predict_pv_yield
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3.9
 3 | 
 4 | repos:
 5 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 6 |     rev: v4.1.0
 7 |     hooks:
 8 |       # list of supported hooks: https://pre-commit.com/hooks.html
 9 |       - id: trailing-whitespace
10 |       - id: end-of-file-fixer
11 |       - id: check-yaml
12 |       - id: debug-statements
13 |       - id: detect-private-key
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Open Climate Fix Ltd
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Intro
 2 | Early experiments on predicting solar electricity generation over the next few hours, using deep learning, satellite imagery, and as many other data sources as we can think of :)
 3 | 
 4 | These experiments are focused on predicting solar PV yield.
 5 | 
 6 | Please see [SatFlow](https://github.com/openclimatefix/satflow/) for complementary experiments on predicting the next few hours of satellite imagery (i.e. trying to predict how clouds are going to move!)
 7 | 
 8 | And please see [OCF's Nowcasting page](https://github.com/openclimatefix/nowcasting) for more context.
 9 | 
10 | # Installation
11 | 
12 | From within the cloned `predict_pv_yield` directory:
13 | 
14 | ```
15 | conda env create -f environment.yml
16 | conda activate predict_pv_yield
17 | pip install -e .
18 | ```
19 | 


--------------------------------------------------------------------------------
/configs/callbacks/default.yaml:
--------------------------------------------------------------------------------
 1 | model_checkpoint:
 2 |   _target_: pytorch_lightning.callbacks.ModelCheckpoint
 3 |   monitor: "MSE/Validation_epoch" # name of the logged metric which determines when model is improving
 4 |   mode: "min" # can be "max" or "min"
 5 |   save_top_k: 1 # save k best models (determined by above metric)
 6 |   save_last: True # additionaly always save model from last epoch
 7 |   verbose: False
 8 |   dirpath: "checkpoints/"
 9 |   filename: "epoch_{epoch:03d}"
10 |   auto_insert_metric_name: False
11 | 
12 | early_stopping:
13 |   _target_: pytorch_lightning.callbacks.EarlyStopping
14 |   monitor: "MSE/Validation_epoch" # name of the logged metric which determines when model is improving
15 |   mode: "min" # can be "max" or "min"
16 |   patience: 5 # how many epochs of not improving until training stops
17 |   min_delta: 0 # minimum change in the monitored metric needed to qualify as an improvement
18 | 


--------------------------------------------------------------------------------
/configs/callbacks/none.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/configs/callbacks/none.yaml


--------------------------------------------------------------------------------
/configs/callbacks/wandb.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - default.yaml
 3 | 
 4 | watch_model:
 5 |   _target_: src.callbacks.wandb_callbacks.WatchModel
 6 |   log: "all"
 7 |   log_freq: 100
 8 | 
 9 | upload_code_as_artifact:
10 |   _target_: src.callbacks.wandb_callbacks.UploadCodeAsArtifact
11 |   code_dir: ${work_dir}/src
12 | 
13 | upload_ckpts_as_artifact:
14 |   _target_: src.callbacks.wandb_callbacks.UploadCheckpointsAsArtifact
15 |   ckpt_dir: "checkpoints/"
16 |   upload_best_only: True
17 | 
18 | log_f1_precision_recall_heatmap:
19 |   _target_: src.callbacks.wandb_callbacks.LogF1PrecRecHeatmap
20 | 
21 | log_confusion_matrix:
22 |   _target_: src.callbacks.wandb_callbacks.LogConfusionMatrix
23 | 
24 | log_image_predictions:
25 |   _target_: src.callbacks.wandb_callbacks.LogImagePredictions
26 |   num_samples: 8
27 | 


--------------------------------------------------------------------------------
/configs/config.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # specify here default training configuration
 4 | defaults:
 5 |   - trainer: default.yaml
 6 |   - model: conv3d.yaml
 7 |   - datamodule: netcdf_datamodule.yaml
 8 |   - callbacks: default.yaml # set this to null if you don't want to use callbacks
 9 |   - logger: neptune # set logger here or use command line (e.g. `python run.py logger=wandb`)
10 | 
11 |   - experiment: null
12 |   - hparams_search: null
13 | 
14 |   - hydra: default.yaml
15 | 
16 |   # enable color logging
17 | #  - override hydra/hydra_logging: colorlog
18 | #  - override hydra/job_logging: colorlog
19 | 
20 | # path to original working directory
21 | # hydra hijacks working directory by changing it to the current log directory,
22 | # so it's useful to have this path as a special variable
23 | # learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
24 | work_dir: ${hydra:runtime.cwd}
25 | 
26 | # path to folder with data
27 | data_dir: ${work_dir}/data/
28 | 
29 | # use `python run.py debug=true` for easy debugging!
30 | # this will run 1 train, val and test loop with only 1 batch
31 | # equivalent to running `python run.py trainer.fast_dev_run=true`
32 | # (this is placed here just for easier access from command line)
33 | debug: False
34 | 
35 | # pretty print config at the start of the run using Rich library
36 | print_config: True
37 | 
38 | # disable python warnings if they annoy you
39 | ignore_warnings: True
40 | 
41 | # check performance on test set, using the best model achieved during training
42 | # lightning chooses best model based on metric specified in checkpoint callback
43 | test_after_training: True
44 | 


--------------------------------------------------------------------------------
/configs/datamodule/netcdf_datamodule.yaml:
--------------------------------------------------------------------------------
 1 | _target_: nowcasting_dataloader.datamodules.NetCDFDataModule
 2 | 
 3 | temp_path: "."
 4 | n_train_data: 4000
 5 | n_val_data: 400
 6 | num_workers: 8
 7 | pin_memory: True
 8 | data_path: "/mnt/storage_ssd_4tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v15/"
 9 | fake_data: False
10 | shuffle_train: True
11 | 


--------------------------------------------------------------------------------
/configs/datamodule/netcdf_datamodule_gcp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.data.dataloader.NetCDFDataModule
 2 | 
 3 | temp_path: "."
 4 | n_train_data: 24900
 5 | n_val_data: 1000
 6 | num_workers: 8
 7 | pin_memory: True
 8 | data_path: "gs://solar-pv-nowcasting-data/prepared_ML_training_data/v6/"
 9 | fake_data: False
10 | 


--------------------------------------------------------------------------------
/configs/experiment/baseline.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: baseline.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | validate_only: '1'  # by putting this key in the config file, the model does not get trained.
18 | 
19 | trainer:
20 |   min_epochs: 1
21 |   max_epochs: 1
22 | 
23 | datamodule:
24 |   n_train_data: 2
25 |   n_val_data: 10
26 | 


--------------------------------------------------------------------------------
/configs/experiment/conv3d.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: conv3d.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 10
21 | 
22 | datamodule:
23 |   n_train_data: 4000
24 |   n_val_data: 400
25 | 
26 | model:
27 |   conv3d_channels: 32
28 | 


--------------------------------------------------------------------------------
/configs/experiment/conv3d_nwp.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: conv3d_nwp.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 10
21 | 
22 | datamodule:
23 |   n_train_data: 4000
24 |   n_val_data: 400
25 | 
26 | model:
27 |   conv3d_channels: 32
28 | 


--------------------------------------------------------------------------------
/configs/experiment/conv3d_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: conv3d_sat_nwp.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 10
21 | 
22 | datamodule:
23 |   n_train_data: 4000
24 |   n_val_data: 400
25 | 
26 | model:
27 |   conv3d_channels: 32
28 | 


--------------------------------------------------------------------------------
/configs/experiment/example_simple.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: baseline.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 2
21 | 
22 | datamodule:
23 |   n_train_data: 2
24 |   n_val_data: 2
25 |   fake_data: 1
26 | 
27 | validate_only: '1'  # by putting this key in the config file, the model does not get trained.
28 | 


--------------------------------------------------------------------------------
/configs/experiment/perceiver.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: perceiver.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 10
21 | 
22 | datamodule:
23 |   n_train_data: 4000
24 |   n_val_data: 400
25 | 


--------------------------------------------------------------------------------
/configs/experiment/perceiver_conv3d_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: perceiver_conv3d_sat_nwp.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 50
21 | 
22 | datamodule:
23 |   n_train_data: 4000
24 |   n_val_data: 400
25 | 


--------------------------------------------------------------------------------
/configs/experiment/perceiver_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: perceiver_sat_nwp.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 10
21 | 
22 | datamodule:
23 |   n_train_data: 4000
24 |   n_val_data: 400
25 | 


--------------------------------------------------------------------------------
/configs/hparams_search/conv3d_optuna.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # example hyperparameter optimization of some experiment with Optuna:
 4 | # python run.py -m hparams_search=conv3d_optuna experiment=conv3d_sat_nwp
 5 | 
 6 | defaults:
 7 |   - override /hydra/sweeper: optuna
 8 | 
 9 | # choose metric which will be optimized by Optuna
10 | optimized_metric: "MSE/Validation_epoch"
11 | 
12 | hydra:
13 |   # here we define Optuna hyperparameter search
14 |   # it optimizes for value returned from function with @hydra.main decorator
15 |   # learn more here: https://hydra.cc/docs/next/plugins/optuna_sweeper
16 |   sweeper:
17 |     _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
18 |     storage: null
19 |     study_name: null
20 |     n_jobs: 1
21 | 
22 |     # 'minimize' or 'maximize' the objective
23 |     direction: minimize
24 | 
25 |     # number of experiments that will be executed
26 |     n_trials: 20
27 | 
28 |     # choose Optuna hyperparameter sampler
29 |     # learn more here: https://optuna.readthedocs.io/en/stable/reference/samplers.html
30 |     sampler:
31 |       _target_: optuna.samplers.TPESampler
32 |       seed: 12345
33 |       consider_prior: true
34 |       prior_weight: 1.0
35 |       consider_magic_clip: true
36 |       consider_endpoints: false
37 |       n_startup_trials: 10
38 |       n_ei_candidates: 24
39 |       multivariate: false
40 |       warn_independent_sampling: true
41 | 
42 |     # define range of hyperparameters
43 |     search_space:
44 |       model.include_pv_yield_history:
45 |         type: categorical
46 |         choices: [ true, false ]
47 |       model.include_future_satellite:
48 |         type: categorical
49 |         choices: [ true, false ]
50 | 


--------------------------------------------------------------------------------
/configs/hydra/default.yaml:
--------------------------------------------------------------------------------
 1 | # output paths for hydra logs
 2 | run:
 3 |   dir: logs/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
 4 | sweep:
 5 |   dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
 6 |   subdir: ${hydra.job.num}
 7 | 
 8 | # you can set here environment variables that are universal for all users
 9 | # for system specific variables (like data paths) it's better to use .env file!
10 | job:
11 |   env_set:
12 |     EXAMPLE_VAR: "example_value"
13 | 


--------------------------------------------------------------------------------
/configs/logger/comet.yaml:
--------------------------------------------------------------------------------
1 | # https://www.comet.ml
2 | 
3 | comet:
4 |   _target_: pytorch_lightning.loggers.comet.CometLogger
5 |   api_key: ${oc.env:COMET_API_TOKEN} # api key is laoded from environment variable
6 |   project_name: "template-tests"
7 |   experiment_name: null
8 | 


--------------------------------------------------------------------------------
/configs/logger/csv.yaml:
--------------------------------------------------------------------------------
1 | # csv logger built in lightning
2 | 
3 | csv:
4 |   _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
5 |   save_dir: "."
6 |   name: "csv/"
7 |   version: null
8 |   prefix: ""
9 | 


--------------------------------------------------------------------------------
/configs/logger/many_loggers.yaml:
--------------------------------------------------------------------------------
 1 | # train with many loggers at once
 2 | 
 3 | defaults:
 4 |   # - aim.yaml
 5 |   # - comet.yaml
 6 |   - csv.yaml
 7 |   # - mlflow.yaml
 8 |   # - neptune.yaml
 9 |   # - tensorboard.yaml
10 |   - wandb.yaml
11 | 


--------------------------------------------------------------------------------
/configs/logger/mlflow.yaml:
--------------------------------------------------------------------------------
 1 | # https://mlflow.org
 2 | 
 3 | mlflow:
 4 |   _target_: pytorch_lightning.loggers.mlflow.MLFlowLogger
 5 |   experiment_name: default
 6 |   tracking_uri: null
 7 |   tags: null
 8 |   save_dir: ./mlruns
 9 |   prefix: ""
10 |   artifact_location: null
11 | 


--------------------------------------------------------------------------------
/configs/logger/neptune.yaml:
--------------------------------------------------------------------------------
1 | # https://neptune.ai
2 | 
3 | neptune:
4 |   _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger
5 |   api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
6 |   project: OpenClimateFix/predict-pv-yield
7 |   prefix: ""
8 | 


--------------------------------------------------------------------------------
/configs/logger/tensorboard.yaml:
--------------------------------------------------------------------------------
 1 | # https://www.tensorflow.org/tensorboard/
 2 | 
 3 | tensorboard:
 4 |   _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
 5 |   save_dir: "tensorboard/"
 6 |   name: "default"
 7 |   version: null
 8 |   log_graph: False
 9 |   default_hp_metric: True
10 |   prefix: ""
11 | 


--------------------------------------------------------------------------------
/configs/logger/wandb.yaml:
--------------------------------------------------------------------------------
 1 | # https://wandb.ai
 2 | 
 3 | wandb:
 4 |   _target_: pytorch_lightning.loggers.wandb.WandbLogger
 5 |   project: "template-tests"
 6 |   name: null
 7 |   save_dir: "."
 8 |   offline: False # set True to store all logs only locally
 9 |   id: null # pass correct id to resume experiment!
10 |   # entity: ""  # set to name of your wandb team or just remove it
11 |   log_model: False
12 |   prefix: ""
13 |   job_type: "train"
14 |   group: ""
15 |   tags: []
16 | 


--------------------------------------------------------------------------------
/configs/model/baseline.yaml:
--------------------------------------------------------------------------------
1 | _target_: predict_pv_yield.models.baseline.last_value.Model
2 | 
3 | forecast_minutes: 120
4 | history_minutes: 30
5 | output_variable: gsp_yield
6 | 


--------------------------------------------------------------------------------
/configs/model/conv3d.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model.Model
 2 | 
 3 | include_pv_yield: True
 4 | include_nwp: True
 5 | forecast_minutes: 120
 6 | history_minutes: 30
 7 | number_of_conv3d_layers: 6
 8 | image_size_pixels: 24
 9 | number_sat_channels: 11
10 | conv3d_channels: 32
11 | fc1_output_features: 128
12 | fc2_output_features: 128
13 | fc3_output_features: 64
14 | output_variable: gsp_yield
15 | 


--------------------------------------------------------------------------------
/configs/model/conv3d_nwp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model_nwp.Model
 2 | 
 3 | include_pv_or_gsp_yield_history: True
 4 | forecast_minutes: 120
 5 | history_minutes: 30
 6 | number_of_conv3d_layers: 6
 7 | conv3d_channels: 32
 8 | fc1_output_features: 128
 9 | fc2_output_features: 128
10 | fc3_output_features: 64
11 | number_nwp_channels: 1
12 | 


--------------------------------------------------------------------------------
/configs/model/conv3d_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model_sat_nwp.Model
 2 | 
 3 | include_pv_or_gsp_yield_history: True
 4 | include_nwp: True
 5 | forecast_minutes: 120
 6 | history_minutes: 30
 7 | number_of_conv3d_layers: 6
 8 | image_size_pixels: 24
 9 | number_sat_channels: 11
10 | conv3d_channels: 32
11 | fc1_output_features: 128
12 | fc2_output_features: 128
13 | fc3_output_features: 64
14 | output_variable: gsp_yield
15 | include_pv_yield_history: False
16 | include_future_satellite: True
17 | 


--------------------------------------------------------------------------------
/configs/model/perceiver.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.perceiver.perceiver.PerceiverModel
 2 | 
 3 | forecast_minutes: 30
 4 | history_minutes: 60
 5 | batch_size: 8
 6 | num_latents: 128
 7 | latent_dim: 64
 8 | embedding_dem: 16
 9 | output_variable: gsp_yield
10 | 


--------------------------------------------------------------------------------
/configs/model/perceiver_conv3d_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.perceiver.perceiver_conv3d_nwp_sat.Model
 2 | 
 3 | forecast_minutes: 30
 4 | history_minutes: 60
 5 | batch_size: 32
 6 | num_latents: 24
 7 | latent_dim: 24
 8 | embedding_dem: 0
 9 | output_variable: gsp_yield
10 | conv3d_channels: 8
11 | use_future_satellite_images: 0
12 | 


--------------------------------------------------------------------------------
/configs/model/perceiver_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.perceiver.perceiver_nwp_sat.Model
 2 | 
 3 | forecast_minutes: 30
 4 | history_minutes: 60
 5 | batch_size: 8
 6 | num_latents: 128
 7 | latent_dim: 64
 8 | embedding_dem: 0
 9 | output_variable: gsp_yield
10 | 


--------------------------------------------------------------------------------
/configs/readme.md:
--------------------------------------------------------------------------------
1 | The following folders how the configuration files
2 | 
3 | This idea is copied from
4 | https://github.com/ashleve/lightning-hydra-template/blob/main/configs/experiment/example_simple.yaml
5 | 
6 | run experiments by:
7 | `python run.py experiment=example_simple `
8 | 


--------------------------------------------------------------------------------
/configs/trainer/all_params.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorch_lightning.Trainer
 2 | 
 3 | # default values for all trainer parameters
 4 | checkpoint_callback: True
 5 | default_root_dir: null
 6 | gradient_clip_val: 0.0
 7 | process_position: 0
 8 | num_nodes: 1
 9 | num_processes: 1
10 | gpus: null
11 | auto_select_gpus: False
12 | tpu_cores: null
13 | log_gpu_memory: null
14 | progress_bar_refresh_rate: 1
15 | overfit_batches: 0.0
16 | track_grad_norm: -1
17 | check_val_every_n_epoch: 1
18 | fast_dev_run: False
19 | accumulate_grad_batches: 1
20 | max_epochs: 1
21 | min_epochs: 1
22 | max_steps: null
23 | min_steps: null
24 | limit_train_batches: 1.0
25 | limit_val_batches: 1.0
26 | limit_test_batches: 1.0
27 | val_check_interval: 1.0
28 | flush_logs_every_n_steps: 100
29 | log_every_n_steps: 50
30 | accelerator: null
31 | sync_batchnorm: False
32 | precision: 32
33 | weights_summary: "top"
34 | weights_save_path: null
35 | num_sanity_val_steps: 2
36 | truncated_bptt_steps: null
37 | resume_from_checkpoint: null
38 | profiler: null
39 | benchmark: False
40 | deterministic: False
41 | reload_dataloaders_every_epoch: False
42 | auto_lr_find: False
43 | replace_sampler_ddp: True
44 | terminate_on_nan: False
45 | auto_scale_batch_size: False
46 | prepare_data_per_node: True
47 | plugins: null
48 | amp_backend: "native"
49 | amp_level: "O2"
50 | move_metrics_to_cpu: False
51 | 


--------------------------------------------------------------------------------
/configs/trainer/default.yaml:
--------------------------------------------------------------------------------
 1 | _target_: pytorch_lightning.Trainer
 2 | 
 3 | # set `1` to train on GPU, `0` to train on CPU only
 4 | gpus: 0
 5 | auto_select_gpus: False
 6 | 
 7 | min_epochs: 1
 8 | max_epochs: 10
 9 | 
10 | weights_summary: null
11 | progress_bar_refresh_rate: 5
12 | resume_from_checkpoint: null
13 | fast_dev_run: false
14 | profiler: 'simple'
15 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: predict_pv_yield
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 | dependencies:
 6 |   - python>=3.9
 7 |   - pip
 8 |   - rich
 9 |   - python-dotenv
10 | 
11 |   # Scientific Python
12 |   - numpy
13 |   - pandas
14 |   - matplotlib
15 |   - xarray
16 |   - ipykernel
17 |   - h5netcdf
18 |   - omegaconf
19 |   - hydra-core
20 | 
21 |   # Machine learning
22 |   - pytorch::pytorch  # explicitly specify pytorch channel to prevent conda from using conda-forge for pytorch, and hence installing the CPU-only version.
23 |   - pytorch-lightning
24 | 
25 |   # Development tools
26 |   - pytest
27 |   - pytest-cov
28 |   - flake8
29 |   - jedi
30 |   - black
31 | 
32 |   - pip:
33 |     - neptune-client[pytorch-lightning]
34 |     - tilemapbase  # For plotting human-readable geographical maps.
35 |     - perceiver_pytorch
36 |     - nowcasting_dataset
37 |     - nowcasting_utils
38 |     - nowcasting_dataloader
39 | 


--------------------------------------------------------------------------------
/experiments/001_CNN_concat_all_timesteps_as_channels.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | 
  7 | from nowcasting_dataset.datamodule import NowcastingDataModule
  8 | from pathlib import Path
  9 | import matplotlib.pyplot as plt
 10 | import matplotlib.dates as mdates
 11 | import pandas as pd
 12 | 
 13 | import torch
 14 | from torch import nn
 15 | import torch.nn.functional as F
 16 | import pytorch_lightning as pl
 17 | 
 18 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 19 | 
 20 | import logging
 21 | logging.basicConfig()
 22 | logger = logging.getLogger('nowcasting_dataset')
 23 | logger.setLevel(logging.DEBUG)
 24 | 
 25 | 
 26 | # In[2]:
 27 | 
 28 | 
 29 | import numpy as np
 30 | 
 31 | 
 32 | # In[3]:
 33 | 
 34 | 
 35 | BUCKET = Path('solar-pv-nowcasting-data')
 36 | 
 37 | # Solar PV data
 38 | PV_PATH = BUCKET / 'PV/PVOutput.org'
 39 | PV_DATA_FILENAME = PV_PATH / 'UK_PV_timeseries_batch.nc'
 40 | PV_METADATA_FILENAME = PV_PATH / 'UK_PV_metadata.csv'
 41 | 
 42 | # SAT_FILENAME = BUCKET / 'satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep_quarter_geospatial.zarr'
 43 | SAT_FILENAME = BUCKET / 'satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr'
 44 | 
 45 | # Numerical weather predictions
 46 | #NWP_BASE_PATH = BUCKET / 'NWP/UK_Met_Office/UKV_zarr'
 47 | #NWP_BASE_PATH = BUCKET / 'NWP/UK_Met_Office/UKV_single_step_and_single_timestep_all_vars.zarr'
 48 | NWP_BASE_PATH = BUCKET / 'NWP/UK_Met_Office/UKV_single_step_and_single_timestep_all_vars_full_spatial_2018_7-12_float32.zarr'
 49 | 
 50 | 
 51 | # In[4]:
 52 | 
 53 | 
 54 | params = dict(
 55 |     batch_size=32,
 56 |     history_len=6,  #: Number of timesteps of history, not including t0.
 57 |     forecast_len=12,  #: Number of timesteps of forecast.
 58 |     nwp_channels=(
 59 |         't', 'dswrf', 'prate', 'r', 'sde', 'si10', 'vis', 'lcc', 'mcc', 'hcc')
 60 | )
 61 | 
 62 | 
 63 | # In[5]:
 64 | 
 65 | 
 66 | data_module = NowcastingDataModule(
 67 |     pv_power_filename=PV_DATA_FILENAME,
 68 |     pv_metadata_filename=f'gs://{PV_METADATA_FILENAME}',
 69 |     sat_filename = f'gs://{SAT_FILENAME}',
 70 |     # sat_channels =('HRV', 'WV_062', 'WV_073'),
 71 |     nwp_base_path = f'gs://{NWP_BASE_PATH}',
 72 |     pin_memory = True,  #: Passed to DataLoader.
 73 |     num_workers = 22,  #: Passed to DataLoader.
 74 |     prefetch_factor = 256,  #: Passed to DataLoader.
 75 |     n_samples_per_timestep = 8,  #: Passed to NowcastingDataset
 76 |     **params
 77 | )
 78 | 
 79 | 
 80 | # In[6]:
 81 | 
 82 | 
 83 | data_module.prepare_data()
 84 | 
 85 | 
 86 | # In[7]:
 87 | 
 88 | 
 89 | data_module.setup()
 90 | 
 91 | 
 92 | # ## Define very simple ML model
 93 | 
 94 | # In[8]:
 95 | 
 96 | 
 97 | import tilemapbase
 98 | from nowcasting_dataset.geospatial import osgb_to_lat_lon
 99 | 
100 | 
101 | # In[9]:
102 | 
103 | 
104 | tilemapbase.init(create=True)
105 | 
106 | 
107 | # In[10]:
108 | 
109 | 
110 | def plot_example(batch, model_output, example_i: int=0, border: int=0):
111 |     fig = plt.figure(figsize=(20, 20))
112 |     ncols=4
113 |     nrows=2
114 | 
115 |     # Satellite data
116 |     extent = (
117 |         float(batch['sat_x_coords'][example_i, 0].cpu().numpy()),
118 |         float(batch['sat_x_coords'][example_i, -1].cpu().numpy()),
119 |         float(batch['sat_y_coords'][example_i, -1].cpu().numpy()),
120 |         float(batch['sat_y_coords'][example_i, 0].cpu().numpy()))  # left, right, bottom, top
121 | 
122 |     def _format_ax(ax):
123 |         #ax.set_xlim(extent[0]-border, extent[1]+border)
124 |         #ax.set_ylim(extent[2]-border, extent[3]+border)
125 |         # ax.coastlines(color='black')
126 |         ax.scatter(
127 |             batch['x_meters_center'][example_i].cpu(),
128 |             batch['y_meters_center'][example_i].cpu(),
129 |             s=500, color='white', marker='x')
130 | 
131 |     ax = fig.add_subplot(nrows, ncols, 1) #, projection=ccrs.OSGB(approx=False))
132 |     sat_data = batch['sat_data'][example_i, :, :, :, 0].cpu().numpy()
133 |     sat_min = np.min(sat_data)
134 |     sat_max = np.max(sat_data)
135 |     ax.imshow(sat_data[0], extent=extent, interpolation='none', vmin=sat_min, vmax=sat_max)
136 |     ax.set_title('t = -{}'.format(params['history_len']))
137 |     _format_ax(ax)
138 | 
139 |     ax = fig.add_subplot(nrows, ncols, 2)
140 |     ax.imshow(sat_data[params['history_len']+1], extent=extent, interpolation='none', vmin=sat_min, vmax=sat_max)
141 |     ax.set_title('t = 0')
142 |     _format_ax(ax)
143 | 
144 |     ax = fig.add_subplot(nrows, ncols, 3)
145 |     ax.imshow(sat_data[-1], extent=extent, interpolation='none', vmin=sat_min, vmax=sat_max)
146 |     ax.set_title('t = {}'.format(params['forecast_len']))
147 |     _format_ax(ax)
148 | 
149 |     ax = fig.add_subplot(nrows, ncols, 4)
150 |     lat_lon_bottom_left = osgb_to_lat_lon(extent[0], extent[2])
151 |     lat_lon_top_right = osgb_to_lat_lon(extent[1], extent[3])
152 |     tiles = tilemapbase.tiles.build_OSM()
153 |     lat_lon_extent = tilemapbase.Extent.from_lonlat(
154 |         longitude_min=lat_lon_bottom_left[1],
155 |         longitude_max=lat_lon_top_right[1],
156 |         latitude_min=lat_lon_bottom_left[0],
157 |         latitude_max=lat_lon_top_right[0])
158 |     plotter = tilemapbase.Plotter(lat_lon_extent, tile_provider=tiles, zoom=6)
159 |     plotter.plot(ax, tiles)
160 | 
161 |     ############## TIMESERIES ##################
162 |     # NWP
163 |     ax = fig.add_subplot(nrows, ncols, 5)
164 |     nwp_dt_index = pd.to_datetime(batch['nwp_target_time'][example_i].cpu().numpy(), unit='s')
165 |     pd.DataFrame(
166 |         batch['nwp'][example_i, :, :, 0, 0].T.cpu().numpy(),
167 |         index=nwp_dt_index,
168 |         columns=params['nwp_channels']).plot(ax=ax)
169 |     ax.set_title('NWP')
170 | 
171 |     # datetime features
172 |     ax = fig.add_subplot(nrows, ncols, 6)
173 |     ax.set_title('datetime features')
174 |     datetime_feature_cols = ['hour_of_day_sin', 'hour_of_day_cos', 'day_of_year_sin', 'day_of_year_cos']
175 |     datetime_features_df = pd.DataFrame(index=nwp_dt_index, columns=datetime_feature_cols)
176 |     for key in datetime_feature_cols:
177 |         datetime_features_df[key] = batch[key][example_i].cpu().numpy()
178 |     datetime_features_df.plot(ax=ax)
179 |     ax.legend()
180 |     ax.set_xlabel(nwp_dt_index[0].date())
181 | 
182 |     # PV yield
183 |     ax = fig.add_subplot(nrows, ncols, 7)
184 |     ax.set_title('PV yield for PV ID {:,d}'.format(batch['pv_system_id'][example_i].cpu()))
185 |     pv_actual = pd.Series(
186 |         batch['pv_yield'][example_i].cpu().numpy(),
187 |         index=nwp_dt_index,
188 |         name='actual')
189 |     pv_pred = pd.Series(
190 |         model_output[example_i].detach().cpu().numpy(),
191 |         index=nwp_dt_index[params['history_len']+1:],
192 |         name='prediction')
193 |     pd.concat([pv_actual, pv_pred], axis='columns').plot(ax=ax)
194 |     ax.legend()
195 | 
196 |     # fig.tight_layout()
197 | 
198 |     return fig
199 | 
200 | 
201 | # In[11]:
202 | 
203 | 
204 | # plot_example(batch, model_output, example_i=20);
205 | 
206 | 
207 | # In[12]:
208 | 
209 | 
210 | SAT_X_MEAN = np.float32(309000)
211 | SAT_X_STD = np.float32(316387.42073603)
212 | SAT_Y_MEAN = np.float32(519000)
213 | SAT_Y_STD = np.float32(406454.17945938)
214 | 
215 | 
216 | # In[13]:
217 | 
218 | 
219 | from neptune.new.types import File
220 | 
221 | 
222 | # In[14]:
223 | 
224 | 
225 | TOTAL_SEQ_LEN = params['history_len'] + params['forecast_len'] + 1
226 | CHANNELS = 144
227 | KERNEL = 3
228 | EMBEDDING_DIM = 16
229 | NWP_SIZE = 10 * 2 * 2 * TOTAL_SEQ_LEN  # channels x width x height
230 | N_DATETIME_FEATURES = 4 * TOTAL_SEQ_LEN
231 | 
232 | class LitAutoEncoder(pl.LightningModule):
233 |     def __init__(
234 |         self,
235 |         history_len = params['history_len'],
236 |         forecast_len = params['forecast_len'],
237 | 
238 |     ):
239 |         super().__init__()
240 |         self.history_len = history_len
241 |         self.forecast_len = forecast_len
242 | 
243 |         self.sat_conv1 = nn.Conv2d(in_channels=history_len+6, out_channels=CHANNELS, kernel_size=KERNEL)#, groups=history_len+1)
244 |         self.sat_conv2 = nn.Conv2d(in_channels=CHANNELS, out_channels=CHANNELS, kernel_size=KERNEL) #, groups=CHANNELS//2)
245 |         self.sat_conv3 = nn.Conv2d(in_channels=CHANNELS, out_channels=CHANNELS, kernel_size=KERNEL) #, groups=CHANNELS)
246 | 
247 |         self.maxpool = nn.MaxPool2d(kernel_size=KERNEL)
248 | 
249 |         self.fc1 = nn.Linear(
250 |             in_features=CHANNELS * 11 * 11,
251 |             out_features=256)
252 | 
253 |         self.fc2 = nn.Linear(in_features=256 + EMBEDDING_DIM + NWP_SIZE + N_DATETIME_FEATURES + history_len+1, out_features=128)
254 |         #self.fc2 = nn.Linear(in_features=EMBEDDING_DIM + N_DATETIME_FEATURES, out_features=128)
255 |         self.fc3 = nn.Linear(in_features=128, out_features=128)
256 |         self.fc4 = nn.Linear(in_features=128, out_features=128)
257 |         self.fc5 = nn.Linear(in_features=128, out_features=params['forecast_len'])
258 | 
259 |         if EMBEDDING_DIM:
260 |             self.pv_system_id_embedding = nn.Embedding(
261 |                 num_embeddings=len(data_module.pv_data_source.pv_metadata),
262 |                 embedding_dim=EMBEDDING_DIM)
263 | 
264 |     def forward(self, x):
265 |         # ******************* Satellite imagery *************************
266 |         # Shape: batch_size, seq_length, width, height, channel
267 |         sat_data = x['sat_data'][:, :self.history_len+1]
268 |         batch_size, seq_len, width, height, n_chans = sat_data.shape
269 | 
270 |         # Move seq_length to be the last dim, ready for changing the shape
271 |         sat_data = sat_data.permute(0, 2, 3, 4, 1)
272 | 
273 |         # Stack timesteps into the channel dimension
274 |         sat_data = sat_data.view(batch_size, width, height, seq_len * n_chans)
275 | 
276 |         sat_data = sat_data.permute(0, 3, 1, 2)  # Conv2d expects channels to be the 2nd dim!
277 | 
278 |         ### EXTRA CHANNELS
279 |         # Center marker
280 |         center_marker = torch.zeros((batch_size, 1, width, height), dtype=torch.float32, device=self.device)
281 |         half_width = width // 2
282 |         center_marker[..., half_width-2:half_width+2, half_width-2:half_width+2] = 1
283 | 
284 |         # geo-spatial x
285 |         x_coords = x['sat_x_coords'] - SAT_X_MEAN
286 |         x_coords /= SAT_X_STD
287 |         x_coords = x_coords.unsqueeze(1).expand(-1, width, -1).unsqueeze(1)
288 | 
289 |         # geo-spatial y
290 |         y_coords = x['sat_y_coords'] - SAT_Y_MEAN
291 |         y_coords /= SAT_Y_STD
292 |         y_coords = y_coords.unsqueeze(-1).expand(-1, -1, height).unsqueeze(1)
293 | 
294 |         # pixel x & y
295 |         pixel_range = (torch.arange(width, device=self.device) - 64) / 37
296 |         pixel_range = pixel_range.unsqueeze(0).unsqueeze(0)
297 |         pixel_x = pixel_range.unsqueeze(-2).expand(batch_size, 1, width, -1)
298 |         pixel_y = pixel_range.unsqueeze(-1).expand(batch_size, 1, -1, height)
299 | 
300 |         # Concat
301 |         sat_data = torch.cat((sat_data, center_marker, x_coords, y_coords, pixel_x, pixel_y), dim=1)
302 | 
303 |         del center_marker, x_coords, y_coords, pixel_x, pixel_y
304 | 
305 |         # Pass data through the network :)
306 |         out = F.relu(self.sat_conv1(sat_data))
307 |         out = self.maxpool(out)
308 |         out = F.relu(self.sat_conv2(out))
309 |         out = self.maxpool(out)
310 |         out = F.relu(self.sat_conv3(out))
311 | 
312 |         out = out.view(-1, CHANNELS * 11 * 11)
313 |         out = F.relu(self.fc1(out))
314 | 
315 |         # *********************** NWP Data **************************************
316 |         nwp_data = x['nwp'].float() # Shape: batch_size, channel, seq_length, width, height
317 |         batch_size, n_nwp_chans, nwp_seq_len, nwp_width, nwp_height = nwp_data.shape
318 |         nwp_data = nwp_data.reshape(batch_size, n_nwp_chans * nwp_seq_len * nwp_width * nwp_height)
319 | 
320 |         # Concat
321 |         out = torch.cat(
322 |             (
323 |                 out,
324 |                 x['pv_yield'][:, :self.history_len+1],
325 |                 nwp_data,
326 |                 x['hour_of_day_sin'],
327 |                 x['hour_of_day_cos'],
328 |                 x['day_of_year_sin'],
329 |                 x['day_of_year_cos'],
330 |             ),
331 |             dim=1)
332 |         del nwp_data
333 | 
334 |         # Embedding of PV system ID
335 |         if EMBEDDING_DIM:
336 |             pv_embedding = self.pv_system_id_embedding(x['pv_system_row_number'])
337 |             out = torch.cat(
338 |                 (
339 |                     out,
340 |                     pv_embedding
341 |                 ),
342 |                 dim=1)
343 | 
344 |         # Fully connected layers.
345 |         out = F.relu(self.fc2(out))
346 |         out = F.relu(self.fc3(out))
347 |         out = F.relu(self.fc4(out))
348 |         out = F.relu(self.fc5(out)) # PV yield is in range [0, 1].  ReLU should train more cleanly than sigmoid.
349 | 
350 |         return out
351 | 
352 |     def _training_or_validation_step(self, batch, is_train_step):
353 |         y_hat = self(batch)
354 |         y = batch['pv_yield'][:, -self.forecast_len:]
355 |         #y = torch.rand((32, 1), device=self.device)
356 |         mse_loss = F.mse_loss(y_hat, y)
357 |         nmae_loss = (y_hat - y).abs().mean()
358 |         # TODO: Compute correlation coef using np.corrcoef(tensor with shape (2, num_timesteps))[0, 1]
359 |         # on each example, and taking the mean across the batch?
360 |         tag = "Train" if is_train_step else "Validation"
361 |         self.log_dict({f'MSE/{tag}': mse_loss}, on_step=is_train_step, on_epoch=True)
362 |         self.log_dict({f'NMAE/{tag}': nmae_loss}, on_step=is_train_step, on_epoch=True)
363 | 
364 |         return nmae_loss
365 | 
366 |     def training_step(self, batch, batch_idx):
367 |         return self._training_or_validation_step(batch, is_train_step=True)
368 | 
369 |     def validation_step(self, batch, batch_idx):
370 |         if batch_idx == 0:
371 |             # Plot example
372 |             model_output = self(batch)
373 |             fig = plot_example(batch, model_output)
374 |             self.logger.experiment['validation/plot'].log(File.as_image(fig))
375 | 
376 |         return self._training_or_validation_step(batch, is_train_step=False)
377 | 
378 |     def configure_optimizers(self):
379 |         optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
380 |         return optimizer
381 | 
382 | 
383 | # In[15]:
384 | 
385 | 
386 | model = LitAutoEncoder()
387 | 
388 | 
389 | # In[16]:
390 | 
391 | 
392 | #train_ds = data_module.train_dataset
393 | #train_ds.per_worker_init(0)
394 | #for batch in train_ds:
395 | #    break
396 | 
397 | 
398 | # In[17]:
399 | 
400 | 
401 | #model_output = model(batch)
402 | 
403 | 
404 | # In[18]:
405 | 
406 | 
407 | #plot_example(batch, model_output, example_i=2);
408 | 
409 | 
410 | # In[19]:
411 | 
412 | 
413 | logger = NeptuneLogger(
414 |     project='OpenClimateFix/predict-pv-yield',
415 |     #params=params,
416 |     #experiment_name='climatology',
417 |     #experiment_id='PRED-1'
418 | )
419 | 
420 | 
421 | # In[20]:
422 | 
423 | 
424 | logger.version
425 | 
426 | 
427 | # In[21]:
428 | 
429 | 
430 | trainer = pl.Trainer(gpus=1, max_epochs=10_000, logger=logger)
431 | 
432 | 
433 | # In[ ]:
434 | 
435 | 
436 | trainer.fit(model, data_module)
437 | 
438 | 
439 | # In[ ]:
440 | 


--------------------------------------------------------------------------------
/experiments/002_cnn_processes_single_sat_image_then_rnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import numpy as np
  4 | import os
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | import torch.nn.functional as F
  9 | import pytorch_lightning as pl
 10 | 
 11 | from predict_pv_yield.netcdf_dataset import NetCDFDataset, worker_init_fn
 12 | from predict_pv_yield.visualisation import plot_example
 13 | 
 14 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 15 | from neptune.new.types import File
 16 | 
 17 | import logging
 18 | logging.basicConfig()
 19 | _LOG = logging.getLogger('predict_pv_yield')
 20 | _LOG.setLevel(logging.DEBUG)
 21 | 
 22 | 
 23 | params = dict(
 24 |     batch_size=32,
 25 |     history_len=6,  #: Number of timesteps of history, not including t0.
 26 |     forecast_len=12,  #: Number of timesteps of forecast.
 27 |     image_size_pixels=32,
 28 |     nwp_channels=(
 29 |         't', 'dswrf', 'prate', 'r', 'sde', 'si10', 'vis', 'lcc', 'mcc', 'hcc'),
 30 |     sat_channels=(
 31 |         'HRV', 'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120',
 32 |         'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073')
 33 | )
 34 | 
 35 | 
 36 | SAT_X_MEAN = np.float32(309000)
 37 | SAT_X_STD = np.float32(316387.42073603)
 38 | SAT_Y_MEAN = np.float32(519000)
 39 | SAT_Y_STD = np.float32(406454.17945938)
 40 | 
 41 | 
 42 | TOTAL_SEQ_LEN = params['history_len'] + params['forecast_len'] + 1
 43 | CHANNELS = 32
 44 | N_CHANNELS_LAST_CONV = 4
 45 | KERNEL = 3
 46 | EMBEDDING_DIM = 16
 47 | NWP_SIZE = 10 * 2 * 2  # channels x width x height
 48 | N_DATETIME_FEATURES = 4
 49 | CNN_OUTPUT_SIZE = N_CHANNELS_LAST_CONV * ((params['image_size_pixels'] - 6) ** 2)
 50 | FC_OUTPUT_SIZE = 8
 51 | RNN_HIDDEN_SIZE = 16
 52 | 
 53 | 
 54 | def get_dataloaders():
 55 |     DATA_PATH = 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v2/'
 56 |     TEMP_PATH = '/home/jack/temp/'
 57 | 
 58 |     train_dataset = NetCDFDataset(
 59 |         12_500,
 60 |         os.path.join(DATA_PATH, 'train'),
 61 |         os.path.join(TEMP_PATH, 'train'))
 62 | 
 63 |     #validation_dataset = NetCDFDataset(1_000, 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v2/validation/', '/home/jack/temp/validation')
 64 | 
 65 |     dataloader_config = dict(
 66 |         pin_memory=True,
 67 |         num_workers=24,
 68 |         prefetch_factor=8,
 69 |         worker_init_fn=worker_init_fn,
 70 |         persistent_workers=True,
 71 | 
 72 |         # Disable automatic batching because dataset
 73 |         # returns complete batches.
 74 |         batch_size=None,
 75 |     )
 76 | 
 77 |     train_dataloader = torch.utils.data.DataLoader(
 78 |         train_dataset, **dataloader_config)
 79 | 
 80 |     return train_dataloader
 81 | 
 82 | 
 83 | class LitModel(pl.LightningModule):
 84 |     def __init__(
 85 |         self,
 86 |         history_len=params['history_len'],
 87 |         forecast_len=params['forecast_len'],
 88 |     ):
 89 |         super().__init__()
 90 |         self.history_len = history_len
 91 |         self.forecast_len = forecast_len
 92 | 
 93 |         self.sat_conv1 = nn.Conv2d(
 94 |             in_channels=len(params['sat_channels'])+5,
 95 |             out_channels=CHANNELS, kernel_size=KERNEL)
 96 |         self.sat_conv2 = nn.Conv2d(
 97 |             in_channels=CHANNELS,
 98 |             out_channels=CHANNELS, kernel_size=KERNEL)
 99 |         self.sat_conv3 = nn.Conv2d(
100 |             in_channels=CHANNELS,
101 |             out_channels=N_CHANNELS_LAST_CONV, kernel_size=KERNEL)
102 | 
103 |         self.fc1 = nn.Linear(
104 |             in_features=CNN_OUTPUT_SIZE,
105 |             out_features=256)
106 | 
107 |         self.fc2 = nn.Linear(
108 |             in_features=256 + EMBEDDING_DIM,
109 |             out_features=128)
110 | 
111 |         self.fc3 = nn.Linear(in_features=128, out_features=64)
112 |         self.fc4 = nn.Linear(in_features=64, out_features=32)
113 |         self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE)
114 | 
115 |         if EMBEDDING_DIM:
116 |             self.pv_system_id_embedding = nn.Embedding(
117 |                 num_embeddings=940,
118 |                 embedding_dim=EMBEDDING_DIM)
119 | 
120 |         self.encoder_rnn = nn.GRU(
121 |             # plus 1 for history
122 |             input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + 1 + NWP_SIZE,
123 |             hidden_size=RNN_HIDDEN_SIZE,
124 |             num_layers=2,
125 |             batch_first=True)
126 |         self.decoder_rnn = nn.GRU(
127 |             input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + NWP_SIZE,
128 |             hidden_size=RNN_HIDDEN_SIZE,
129 |             num_layers=2,
130 |             batch_first=True)
131 | 
132 |         self.decoder_fc1 = nn.Linear(
133 |             in_features=RNN_HIDDEN_SIZE,
134 |             out_features=8)
135 |         self.decoder_fc2 = nn.Linear(
136 |             in_features=8,
137 |             out_features=1)
138 | 
139 |         # EXTRA CHANNELS
140 |         # Center marker
141 |         new_batch_size = params['batch_size'] * TOTAL_SEQ_LEN
142 |         self.center_marker = torch.zeros(
143 |             (
144 |                 new_batch_size,
145 |                 1,
146 |                 params['image_size_pixels'],
147 |                 params['image_size_pixels']
148 |             ),
149 |             dtype=torch.float32, device=self.device)
150 |         half_width = params['image_size_pixels'] // 2
151 |         self.center_marker[
152 |             ..., half_width-2:half_width+2, half_width-2:half_width+2] = 1
153 | 
154 |         # pixel x & y
155 |         pixel_range = (
156 |             torch.arange(params['image_size_pixels'], device=self.device)
157 |             - 64) / 37
158 |         pixel_range = pixel_range.unsqueeze(0).unsqueeze(0)
159 |         self.pixel_x = pixel_range.unsqueeze(-2).expand(
160 |             new_batch_size, 1, params['image_size_pixels'], -1)
161 |         self.pixel_y = pixel_range.unsqueeze(-1).expand(
162 |             new_batch_size, 1, -1, params['image_size_pixels'])
163 | 
164 |     def forward(self, x):
165 |         # ******************* Satellite imagery *************************
166 |         # Shape: batch_size, seq_length, width, height, channel
167 |         # TODO: Use optical flow, not actual sat images of the future!
168 |         sat_data = x['sat_data']
169 |         batch_size, seq_len, width, height, n_chans = sat_data.shape
170 | 
171 |         # Stack timesteps as extra examples
172 |         new_batch_size = batch_size * seq_len
173 |         #                                 0           1       2      3
174 |         sat_data = sat_data.reshape(new_batch_size, width, height, n_chans)
175 | 
176 |         # Conv2d expects channels to be the 2nd dim!
177 |         sat_data = sat_data.permute(0, 3, 1, 2)
178 |         # Now shape: new_batch_size, n_chans, width, height
179 | 
180 |         # EXTRA CHANNELS
181 |         # geo-spatial x
182 |         x_coords = x['sat_x_coords']  # shape:  batch_size, image_size_pixels
183 |         x_coords = x_coords - SAT_X_MEAN
184 |         x_coords = x_coords / SAT_X_STD
185 |         x_coords = x_coords.unsqueeze(1).expand(-1, width, -1).unsqueeze(1)
186 |         x_coords = x_coords.repeat_interleave(repeats=TOTAL_SEQ_LEN, dim=0)
187 | 
188 |         # geo-spatial y
189 |         y_coords = x['sat_y_coords']  # shape:  batch_size, image_size_pixels
190 |         y_coords = y_coords - SAT_Y_MEAN
191 |         y_coords = y_coords / SAT_Y_STD
192 |         y_coords = y_coords.unsqueeze(-1).expand(-1, -1, height).unsqueeze(1)
193 |         y_coords = y_coords.repeat_interleave(repeats=TOTAL_SEQ_LEN, dim=0)
194 | 
195 |         # Concat
196 |         if sat_data.device != self.center_marker.device:
197 |             self.center_marker = self.center_marker.to(sat_data.device)
198 |             self.pixel_x = self.pixel_x.to(sat_data.device)
199 |             self.pixel_y = self.pixel_y.to(sat_data.device)
200 | 
201 |         sat_data = torch.cat(
202 |             (
203 |                 sat_data, self.center_marker,
204 |                 x_coords, y_coords, self.pixel_x, self.pixel_y
205 |             ),
206 |             dim=1)
207 | 
208 |         del x_coords, y_coords
209 | 
210 |         # Pass data through the network :)
211 |         out = F.relu(self.sat_conv1(sat_data))
212 |         out = F.relu(self.sat_conv2(out))
213 |         out = F.relu(self.sat_conv3(out))
214 | 
215 |         out = out.reshape(new_batch_size, CNN_OUTPUT_SIZE)
216 |         out = F.relu(self.fc1(out))
217 | 
218 |         # ********************** Embedding of PV system ID ********************
219 |         if EMBEDDING_DIM:
220 |             pv_row = x['pv_system_row_number'].repeat_interleave(TOTAL_SEQ_LEN)
221 |             pv_embedding = self.pv_system_id_embedding(pv_row)
222 |             out = torch.cat(
223 |                 (
224 |                     out,
225 |                     pv_embedding
226 |                 ),
227 |                 dim=1)
228 | 
229 |         # Fully connected layers.
230 |         out = F.relu(self.fc2(out))
231 |         out = F.relu(self.fc3(out))
232 |         out = F.relu(self.fc4(out))
233 |         out = F.relu(self.fc5(out))
234 | 
235 |         # ******************* PREP DATA FOR RNN *******************************
236 |         out = out.reshape(batch_size, TOTAL_SEQ_LEN, FC_OUTPUT_SIZE)
237 | 
238 |         # The RNN encoder gets recent history: satellite, NWP,
239 |         # datetime features, and recent PV history.  The RNN decoder
240 |         # gets what we know about the future: satellite, NWP, and
241 |         # datetime features.
242 | 
243 |         # *********************** NWP Data ************************************
244 |         # Shape: batch_size, channel, seq_length, width, height
245 |         nwp_data = x['nwp'].float()
246 |         # RNN expects seq_len to be dim 1.
247 |         nwp_data = nwp_data.permute(0, 2, 1, 3, 4)
248 |         batch_size, nwp_seq_len, n_nwp_chans, nwp_width, nwp_height = (
249 |             nwp_data.shape)
250 |         nwp_data = nwp_data.reshape(
251 |             batch_size, nwp_seq_len, n_nwp_chans * nwp_width * nwp_height)
252 | 
253 |         # Concat
254 |         rnn_input = torch.cat(
255 |             (
256 |                 out,
257 |                 nwp_data,
258 |                 x['hour_of_day_sin'].unsqueeze(-1),
259 |                 x['hour_of_day_cos'].unsqueeze(-1),
260 |                 x['day_of_year_sin'].unsqueeze(-1),
261 |                 x['day_of_year_cos'].unsqueeze(-1),
262 |             ),
263 |             dim=2)
264 | 
265 |         pv_yield_history = x['pv_yield'][:, :self.history_len+1].unsqueeze(-1)
266 |         encoder_input = torch.cat(
267 |             (
268 |                 rnn_input[:, :self.history_len+1],
269 |                 pv_yield_history
270 |             ),
271 |             dim=2)
272 | 
273 |         encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
274 |         decoder_output, _ = self.decoder_rnn(
275 |             rnn_input[:, -self.forecast_len:], encoder_hidden)
276 |         # decoder_output is shape batch_size, seq_len, rnn_hidden_size
277 | 
278 |         decoder_output = F.relu(self.decoder_fc1(decoder_output))
279 |         decoder_output = self.decoder_fc2(decoder_output)
280 | 
281 |         return decoder_output.squeeze()
282 | 
283 |     def _training_or_validation_step(self, batch, is_train_step):
284 |         y_hat = self(batch)
285 |         y = batch['pv_yield'][:, -self.forecast_len:]
286 |         mse_loss = F.mse_loss(y_hat, y)
287 |         nmae_loss = (y_hat - y).abs().mean()
288 |         # TODO: Compute correlation coef using np.corrcoef(tensor with
289 |         # shape (2, num_timesteps))[0, 1] on each example, and taking
290 |         # the mean across the batch?
291 |         tag = "Train" if is_train_step else "Validation"
292 |         self.log_dict(
293 |             {f'MSE/{tag}': mse_loss}, on_step=is_train_step, on_epoch=True)
294 |         self.log_dict(
295 |             {f'NMAE/{tag}': nmae_loss}, on_step=is_train_step, on_epoch=True)
296 | 
297 |         return nmae_loss
298 | 
299 |     def training_step(self, batch, batch_idx):
300 |         return self._training_or_validation_step(batch, is_train_step=True)
301 | 
302 |     def validation_step(self, batch, batch_idx):
303 |         if batch_idx == 0:
304 |             # Plot example
305 |             model_output = self(batch)
306 |             fig = plot_example(
307 |                 batch, model_output, history_len=params['history_len'],
308 |                 forecast_len=params['forecast_len'],
309 |                 nwp_channels=params['nwp_channels'])
310 |             self.logger.experiment['validation/plot'].log(File.as_image(fig))
311 | 
312 |         return self._training_or_validation_step(batch, is_train_step=False)
313 | 
314 |     def configure_optimizers(self):
315 |         optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
316 |         return optimizer
317 | 
318 | 
319 | def main():
320 |     train_dataloader = get_dataloaders()
321 |     model = LitModel()
322 |     logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield')
323 |     logger.log_hyperparams(params)
324 |     _LOG.info(f'logger.version = {logger.version}')
325 |     trainer = pl.Trainer(gpus=1, max_epochs=10_000, logger=logger)
326 |     trainer.fit(model, train_dataloader)
327 | 
328 | 
329 | if __name__ == '__main__':
330 |     main()
331 | 


--------------------------------------------------------------------------------
/experiments/003_perceiver_processes_single_sat_image_then_rnn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import numpy as np
  4 | import os
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | import torch.nn.functional as F
  9 | import pytorch_lightning as pl
 10 | 
 11 | from predict_pv_yield.netcdf_dataset import NetCDFDataset, worker_init_fn
 12 | from predict_pv_yield.visualisation import plot_example
 13 | 
 14 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 15 | from neptune.new.types import File
 16 | 
 17 | from perceiver_pytorch import Perceiver
 18 | 
 19 | import logging
 20 | logging.basicConfig()
 21 | _LOG = logging.getLogger('predict_pv_yield')
 22 | _LOG.setLevel(logging.DEBUG)
 23 | 
 24 | 
 25 | params = dict(
 26 |     # DATA
 27 |     # TODO: Everything that relates to the dataset should come automatically
 28 |     # from a yaml file stored with the dataset.
 29 |     batch_size=32,
 30 |     history_len=6,  #: Number of timesteps of history, not including t0.
 31 |     forecast_len=12,  #: Number of timesteps of forecast.
 32 |     image_size_pixels=64,
 33 |     nwp_channels=(
 34 |         't', 'dswrf', 'prate', 'r', 'sde', 'si10', 'vis', 'lcc', 'mcc', 'hcc'),
 35 |     sat_channels=(
 36 |         'HRV', 'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120',
 37 |         'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073'),
 38 | 
 39 |     # TRAINING
 40 |     precision=16,  #: 16, 32, or 64-bit precision for data.
 41 |     val_check_interval=1_000,  #: Check validation this many batches, or proportion of the epoch
 42 | )
 43 | 
 44 | 
 45 | SAT_X_MEAN = np.float32(309000)
 46 | SAT_X_STD = np.float32(316387.42073603)
 47 | SAT_Y_MEAN = np.float32(519000)
 48 | SAT_Y_STD = np.float32(406454.17945938)
 49 | 
 50 | 
 51 | TOTAL_SEQ_LEN = params['history_len'] + params['forecast_len'] + 1
 52 | EMBEDDING_DIM = 16
 53 | NWP_SIZE = len(params['nwp_channels']) * 2 * 2  # channels x width x height
 54 | N_DATETIME_FEATURES = 4
 55 | PERCEIVER_OUTPUT_SIZE = 512
 56 | FC_OUTPUT_SIZE = 8
 57 | RNN_HIDDEN_SIZE = 16
 58 | 
 59 | 
 60 | def get_dataloaders():
 61 |     DATA_PATH = 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v3/'
 62 |     TEMP_PATH = '/home/jack/temp/'
 63 | 
 64 |     train_dataset = NetCDFDataset(
 65 |         24_900,
 66 |         os.path.join(DATA_PATH, 'train'),
 67 |         os.path.join(TEMP_PATH, 'train'))
 68 | 
 69 |     validation_dataset = NetCDFDataset(
 70 |         900,
 71 |         os.path.join(DATA_PATH, 'validation'),
 72 |         os.path.join(TEMP_PATH, 'validation'))
 73 | 
 74 |     dataloader_config = dict(
 75 |         pin_memory=True,
 76 |         num_workers=16,
 77 |         prefetch_factor=8,
 78 |         worker_init_fn=worker_init_fn,
 79 |         persistent_workers=True,
 80 | 
 81 |         # Disable automatic batching because dataset
 82 |         # returns complete batches.
 83 |         batch_size=None,
 84 |     )
 85 | 
 86 |     train_dataloader = torch.utils.data.DataLoader(
 87 |         train_dataset, **dataloader_config)
 88 | 
 89 |     validation_dataloader = torch.utils.data.DataLoader(
 90 |         validation_dataset, **dataloader_config)
 91 | 
 92 |     return train_dataloader, validation_dataloader
 93 | 
 94 | 
 95 | class LitModel(pl.LightningModule):
 96 |     def __init__(
 97 |         self,
 98 |         history_len=params['history_len'],
 99 |         forecast_len=params['forecast_len'],
100 |     ):
101 |         super().__init__()
102 |         self.history_len = history_len
103 |         self.forecast_len = forecast_len
104 | 
105 |         self.perceiver = Perceiver(
106 |             input_channels=len(params['sat_channels']),
107 |             input_axis=2,
108 |             num_freq_bands=6,
109 |             max_freq=10,
110 |             depth=2,
111 |             num_latents=128,
112 |             latent_dim=64,
113 |             num_classes=PERCEIVER_OUTPUT_SIZE,
114 |         )
115 | 
116 |         self.fc1 = nn.Linear(
117 |             in_features=PERCEIVER_OUTPUT_SIZE,
118 |             out_features=256)
119 | 
120 |         self.fc2 = nn.Linear(
121 |             in_features=256 + EMBEDDING_DIM,
122 |             out_features=128)
123 | 
124 |         self.fc3 = nn.Linear(in_features=128, out_features=64)
125 |         self.fc4 = nn.Linear(in_features=64, out_features=32)
126 |         self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE)
127 | 
128 |         if EMBEDDING_DIM:
129 |             self.pv_system_id_embedding = nn.Embedding(
130 |                 num_embeddings=940,
131 |                 embedding_dim=EMBEDDING_DIM)
132 | 
133 |         self.encoder_rnn = nn.GRU(
134 |             # plus 1 for history
135 |             input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + 1 + NWP_SIZE,
136 |             hidden_size=RNN_HIDDEN_SIZE,
137 |             num_layers=2,
138 |             batch_first=True)
139 |         self.decoder_rnn = nn.GRU(
140 |             input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + NWP_SIZE,
141 |             hidden_size=RNN_HIDDEN_SIZE,
142 |             num_layers=2,
143 |             batch_first=True)
144 | 
145 |         self.decoder_fc1 = nn.Linear(
146 |             in_features=RNN_HIDDEN_SIZE,
147 |             out_features=8)
148 |         self.decoder_fc2 = nn.Linear(
149 |             in_features=8,
150 |             out_features=1)
151 | 
152 |     def forward(self, x):
153 |         # ******************* Satellite imagery *************************
154 |         # Shape: batch_size, seq_length, width, height, channel
155 |         # TODO: Use optical flow, not actual sat images of the future!
156 |         sat_data = x['sat_data']
157 |         batch_size, seq_len, width, height, n_chans = sat_data.shape
158 | 
159 |         # Stack timesteps as examples (to make a large batch)
160 |         new_batch_size = batch_size * seq_len
161 |         #                                 0           1       2      3
162 |         sat_data = sat_data.reshape(new_batch_size, width, height, n_chans)
163 | 
164 |         # Pass data through the network :)
165 |         out = self.perceiver(sat_data)
166 | 
167 |         out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE)
168 |         out = F.relu(self.fc1(out))
169 | 
170 |         # ********************** Embedding of PV system ID ********************
171 |         if EMBEDDING_DIM:
172 |             pv_row = x['pv_system_row_number'].repeat_interleave(TOTAL_SEQ_LEN)
173 |             pv_embedding = self.pv_system_id_embedding(pv_row)
174 |             out = torch.cat(
175 |                 (
176 |                     out,
177 |                     pv_embedding
178 |                 ),
179 |                 dim=1)
180 | 
181 |         # Fully connected layers.
182 |         out = F.relu(self.fc2(out))
183 |         out = F.relu(self.fc3(out))
184 |         out = F.relu(self.fc4(out))
185 |         out = F.relu(self.fc5(out))
186 | 
187 |         # ******************* PREP DATA FOR RNN *******************************
188 |         out = out.reshape(batch_size, TOTAL_SEQ_LEN, FC_OUTPUT_SIZE)
189 | 
190 |         # The RNN encoder gets recent history: satellite, NWP,
191 |         # datetime features, and recent PV history.  The RNN decoder
192 |         # gets what we know about the future: satellite, NWP, and
193 |         # datetime features.
194 | 
195 |         # *********************** NWP Data ************************************
196 |         # Shape: batch_size, channel, seq_length, width, height
197 |         nwp_data = x['nwp'].float()
198 |         # RNN expects seq_len to be dim 1.
199 |         nwp_data = nwp_data.permute(0, 2, 1, 3, 4)
200 |         batch_size, nwp_seq_len, n_nwp_chans, nwp_width, nwp_height = (
201 |             nwp_data.shape)
202 |         nwp_data = nwp_data.reshape(
203 |             batch_size, nwp_seq_len, n_nwp_chans * nwp_width * nwp_height)
204 | 
205 |         # Concat
206 |         rnn_input = torch.cat(
207 |             (
208 |                 out,
209 |                 nwp_data,
210 |                 x['hour_of_day_sin'].unsqueeze(-1),
211 |                 x['hour_of_day_cos'].unsqueeze(-1),
212 |                 x['day_of_year_sin'].unsqueeze(-1),
213 |                 x['day_of_year_cos'].unsqueeze(-1),
214 |             ),
215 |             dim=2)
216 | 
217 |         pv_yield_history = x['pv_yield'][:, :self.history_len+1].unsqueeze(-1)
218 |         encoder_input = torch.cat(
219 |             (
220 |                 rnn_input[:, :self.history_len+1],
221 |                 pv_yield_history
222 |             ),
223 |             dim=2)
224 | 
225 |         encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
226 |         decoder_output, _ = self.decoder_rnn(
227 |             rnn_input[:, -self.forecast_len:], encoder_hidden)
228 |         # decoder_output is shape batch_size, seq_len, rnn_hidden_size
229 | 
230 |         decoder_output = F.relu(self.decoder_fc1(decoder_output))
231 |         decoder_output = self.decoder_fc2(decoder_output)
232 | 
233 |         return decoder_output.squeeze()
234 | 
235 |     def _training_or_validation_step(self, batch, is_train_step):
236 |         y_hat = self(batch)
237 |         y = batch['pv_yield'][:, -self.forecast_len:]
238 |         mse_loss = F.mse_loss(y_hat, y)
239 |         nmae_loss = (y_hat - y).abs().mean()
240 |         # TODO: Compute correlation coef using np.corrcoef(tensor with
241 |         # shape (2, num_timesteps))[0, 1] on each example, and taking
242 |         # the mean across the batch?
243 |         tag = "Train" if is_train_step else "Validation"
244 |         self.log_dict(
245 |             {
246 |                 f'MSE/{tag}': mse_loss,
247 |                 f'NMAE/{tag}': nmae_loss
248 |             },
249 |             on_step=is_train_step,
250 |             on_epoch=True,
251 |             sync_dist=True  # Required for distributed training (even multi-GPU on signle machine)
252 |         )
253 | 
254 |         return nmae_loss
255 | 
256 |     def training_step(self, batch, batch_idx):
257 |         return self._training_or_validation_step(batch, is_train_step=True)
258 | 
259 |     def validation_step(self, batch, batch_idx):
260 |         INTERESTING_EXAMPLES = (1, 5, 6, 7, 9, 11, 17, 19)
261 |         name = f'validation/plot/epoch{self.current_epoch}'
262 |         if batch_idx == 0:
263 |             # Plot example
264 |             model_output = self(batch)
265 |             for example_i in INTERESTING_EXAMPLES:
266 |                 fig = plot_example(
267 |                     batch, model_output, history_len=params['history_len'],
268 |                     forecast_len=params['forecast_len'],
269 |                     nwp_channels=params['nwp_channels'],
270 |                     example_i=example_i,
271 |                     epoch=self.current_epoch)
272 |                 self.logger.experiment[name].log(File.as_image(fig))
273 |                 fig.close()
274 | 
275 |         return self._training_or_validation_step(batch, is_train_step=False)
276 | 
277 |     def configure_optimizers(self):
278 |         optimizer = torch.optim.Adam(self.parameters(), lr=0.0005)
279 |         return optimizer
280 | 
281 | 
282 | def main():
283 |     train_dataloader, validation_dataloader = get_dataloaders()
284 |     model = LitModel()
285 |     logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield')
286 |     logger.log_hyperparams(params)
287 |     _LOG.info(f'logger.version = {logger.version}')
288 |     trainer = pl.Trainer(
289 |         gpus=-1, max_epochs=10_000, logger=logger,
290 |         precision=params['precision'],
291 |         val_check_interval=params['val_check_interval'],
292 |         accelerator='ddp',
293 |         plugins=pl.plugins.DDPPlugin(find_unused_parameters=False)
294 |     )
295 |     trainer.fit(model, train_dataloader, validation_dataloader)
296 | 
297 | 
298 | if __name__ == '__main__':
299 |     main()
300 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-17/run_cnn3d.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model import Model, params
 2 | 
 3 | import os
 4 | 
 5 | import torch.nn.functional as F
 6 | import pytorch_lightning as pl
 7 | 
 8 | from predict_pv_yield.data.dataloader import get_dataloaders
 9 | 
10 | from predict_pv_yield.visualisation.visualisation import plot_example
11 | 
12 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
13 | 
14 | import logging
15 | 
16 | logging.basicConfig()
17 | _LOG = logging.getLogger("predict_pv_yield")
18 | _LOG.setLevel(logging.DEBUG)
19 | 
20 | 
21 | 
22 | def main():
23 |     train_dataloader, validation_dataloader = get_dataloaders()
24 |     model = Model()
25 |     logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield')
26 |     logger.log_hyperparams(params)
27 |     _LOG.info(f'logger.version = {logger.version}')
28 |     trainer = pl.Trainer(gpus=0, max_epochs=1, logger=logger)
29 |     trainer.fit(model, train_dataloader)
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-18/run_baseline.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.baseline.last_value import Model
 2 | from predict_pv_yield.data.dataloader import get_dataloaders
 3 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 4 | 
 5 | import pytorch_lightning as pl
 6 | import logging
 7 | 
 8 | logging.basicConfig()
 9 | _LOG = logging.getLogger("predict_pv_yield")
10 | _LOG.setLevel(logging.DEBUG)
11 | 
12 | 
13 | def main():
14 |     train_dataloader, validation_dataloader = get_dataloaders(n_train_data=10, n_validation_data=10)
15 |     model = Model()
16 |     logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield")
17 |     _LOG.info(f"logger.version = {logger.version}")
18 |     trainer = pl.Trainer(gpus=0, max_epochs=10, logger=logger)
19 | 
20 |     # dont need to train baseline model
21 |     # trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)
22 | 
23 |     trainer.validate(model, validation_dataloader)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 
29 | 
30 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-124/charts
31 | #
32 | # {'Validation: MAE': 0.08886486291885376, 'Validation: MSE': 0.02136283740401268}
33 | #
34 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-18/run_cnn3d.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import pytorch_lightning as pl
 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 5 | 
 6 | from predict_pv_yield.data.dataloader import get_dataloaders
 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default
 8 | 
 9 | logging.basicConfig()
10 | _LOG = logging.getLogger("predict_pv_yield")
11 | _LOG.setLevel(logging.DEBUG)
12 | 
13 | 
14 | def main():
15 |     train_dataloader, validation_dataloader = get_dataloaders(n_train_data=10, n_validation_data=10)
16 |     model = Model()
17 |     logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield')
18 |     logger.log_hyperparams(model_configuration_default)
19 |     _LOG.info(f'logger.version = {logger.version}')
20 |     trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)
21 |     trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)
22 | 
23 |     # run validation
24 |     trainer.validate(model, validation_dataloader)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 
30 | 
31 | # Managed to run it on GCP.
32 | #  Results are logged to https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-120/monitoring
33 | # Notes:
34 | # 1. Large training set, and one epoch took a day, so should use GPU for this model. I was a bit suprised as I didnt
35 | # think the model was so big.
36 | # 2. Need to work on validationm general validation method. Good to base line against a really simple model. For
37 | # validation might need to think carefully about metrics that will be used.
38 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-24/run_cnn3d.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import pytorch_lightning as pl
 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 5 | 
 6 | from predict_pv_yield.data.dataloader import get_dataloaders
 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default
 8 | 
 9 | logging.basicConfig()
10 | _LOG = logging.getLogger("predict_pv_yield")
11 | _LOG.setLevel(logging.DEBUG)
12 | 
13 | _LOG = logging.getLogger("nowcasting_dataset")
14 | _LOG.setLevel(logging.INFO)
15 | 
16 | 
17 | def main():
18 |     train_dataloader, validation_dataloader = get_dataloaders(
19 |         n_train_data=24900,
20 |         n_validation_data=1000,
21 |         data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/",
22 |         cloud="gcp",
23 |     )
24 |     model = Model()
25 | 
26 |     logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield")
27 |     logger.log_hyperparams(model_configuration_default)
28 |     _LOG.info(f"logger.version = {logger.version}")
29 |     trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)
30 |     trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)
31 | 
32 |     # run validation
33 |     trainer.validate(model, validation_dataloader)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 
39 | 
40 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-130/monitoring
41 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-24/run_cnn3d_n_layers.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import pytorch_lightning as pl
 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 5 | 
 6 | from predict_pv_yield.data.dataloader import get_dataloaders
 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default
 8 | 
 9 | logging.basicConfig()
10 | _LOG = logging.getLogger("predict_pv_yield")
11 | _LOG.setLevel(logging.DEBUG)
12 | 
13 | _LOG = logging.getLogger("nowcasting_dataset")
14 | _LOG.setLevel(logging.INFO)
15 | 
16 | 
17 | def main():
18 |     train_dataloader, validation_dataloader = get_dataloaders(
19 |         n_train_data=24900,
20 |         n_validation_data=1000,
21 |         data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/",
22 |         cloud="gcp",
23 |     )
24 | 
25 |     model_configuration = dict(conv3d_channels=8, kennel=3, number_of_conv3d_layers=6)
26 |     model = Model(model_configuration=model_configuration)
27 | 
28 |     logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield")
29 |     logger.log_hyperparams(model_configuration_default)
30 |     _LOG.info(f"logger.version = {logger.version}")
31 |     trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)
32 |     trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)
33 | 
34 |     # run validation
35 |     trainer.validate(model, validation_dataloader)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 
41 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-133/monitoring
42 | 
43 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-132/monitoring
44 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-131/monitoring
45 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-26/run_cnn3d_n_layers.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import pytorch_lightning as pl
 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 5 | 
 6 | from predict_pv_yield.data.dataloader import get_dataloaders
 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default
 8 | 
 9 | logging.basicConfig()
10 | _LOG = logging.getLogger("predict_pv_yield")
11 | _LOG.setLevel(logging.DEBUG)
12 | 
13 | _LOG = logging.getLogger("nowcasting_dataset")
14 | _LOG.setLevel(logging.INFO)
15 | 
16 | 
17 | def main():
18 |     train_dataloader, validation_dataloader = get_dataloaders(
19 |         n_train_data=2000,
20 |         n_validation_data=1000,
21 |         data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/",
22 |         cloud="gcp",
23 |     )
24 | 
25 |     model_configuration = dict(conv3d_channels=8, kennel=3, number_of_conv3d_layers=6)
26 |     model = Model(model_configuration=model_configuration)
27 | 
28 |     logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield")
29 |     logger.log_hyperparams(model_configuration_default)
30 |     _LOG.info(f"logger.version = {logger.version}")
31 |     trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger)
32 |     trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)
33 | 
34 |     # run validation
35 |     trainer.validate(model, validation_dataloader)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 
41 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-137/charts
42 | # ran with 2000 train data
43 | 
44 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-138/charts
45 | # ran with 10000 in train data
46 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-27/experiments.md:
--------------------------------------------------------------------------------
 1 | # Daily Experiments
 2 | 
 3 | Ran hydra for the first time, for hyper parameters optermization.
 4 | It did 2 full runs, then I think ran out of memory caused a funny error.
 5 | Now have install 'psutil' so that cpu and memory is logged to neptune.
 6 | 
 7 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-160/monitoring
 8 | Validation error after 10 epochs - 0.073
 9 | 
10 | conv3d_channels = 32
11 | fc1_output_features = 16
12 | fc2_output_features = 128
13 | fc3_output_features = 16
14 | 
15 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-161/monitoring
16 | Validation error after 10 epochs - 0.073
17 | 
18 | conv3d_channels = 32
19 | fc1_output_features = 32
20 | fc2_output_features = 16
21 | fc3_output_features = 16
22 | 
23 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-162/monitoring
24 | Validation error after 2 epochs - 0.076 (then error happened in 3rd epoch)
25 | 
26 | conv3d_channels = 32
27 | fc1_output_features = 64
28 | fc2_output_features = 16
29 | fc3_output_features = 8
30 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-27/run_baseline.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.baseline.last_value import Model
 2 | from predict_pv_yield.data.dataloader import get_dataloaders
 3 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger
 4 | 
 5 | import pytorch_lightning as pl
 6 | import logging
 7 | 
 8 | logging.basicConfig()
 9 | _LOG = logging.getLogger("predict_pv_yield")
10 | _LOG.setLevel(logging.DEBUG)
11 | 
12 | 
13 | def main():
14 |     train_dataloader, validation_dataloader = get_dataloaders(n_validation_data=1000, cloud='aws')
15 |     model = Model()
16 |     # logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield")
17 |     # _LOG.info(f"logger.version = {logger.version}")
18 |     trainer = pl.Trainer(gpus=0, max_epochs=10)
19 | 
20 |     # dont need to train baseline model
21 |     # trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader)
22 | 
23 |     trainer.validate(model, validation_dataloader)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     main()
28 | 
29 | 
30 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-124/charts
31 | #
32 | # {'Validation: MAE': 0.08886486291885376, 'Validation: MSE': 0.02136283740401268}
33 | #
34 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-31/conv3d.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model import Model
 2 | 
 3 | from predict_pv_yield.data.dataloader import get_dataloaders
 4 | from pytorch_lightning.utilities.cloud_io import load as pl_load
 5 | import torch
 6 | 
 7 | weights = './weights/conv3d/last.ckpt'
 8 | checkpoint = pl_load(weights, map_location=torch.device('cpu'))
 9 | 
10 | model = Model(conv3d_channels=32,
11 |               fc1_output_features=32,
12 |               fc2_output_features=16,
13 |               fc3_output_features=16,
14 |               include_time=False,
15 |               number_of_conv3d_layers=4)
16 | model.load_from_checkpoint(weights)
17 | 
18 | train_dataset, validation_dataset = get_dataloaders()
19 | 


--------------------------------------------------------------------------------
/experiments/2021-08/2021-08-31/experiments.txt:
--------------------------------------------------------------------------------
1 | Ran Perceiver model
2 | 
3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-186/charts
4 | 
5 | Ran it using batch size 8,
6 | Each epoch taking about 3 hours when running on GCP - n1-standard-8, NVIDIA Tesla P100
7 | 


--------------------------------------------------------------------------------
/experiments/2021-09/2021-09-03/conv3d.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model import Model
 2 | 
 3 | from predict_pv_yield.data.dataloader import get_dataloaders
 4 | from pytorch_lightning.utilities.cloud_io import load as pl_load
 5 | import torch
 6 | import pandas as pd
 7 | 
 8 | from predict_pv_yield.visualisation.line import plot_one_result, plot_batch_results
 9 | 
10 | weights = "./weights/conv3d/epoch_009.ckpt"
11 | checkpoint = pl_load(weights, map_location=torch.device("cpu"))
12 | 
13 | model = Model(
14 |     conv3d_channels=32,
15 |     fc1_output_features=128,
16 |     fc2_output_features=128,
17 |     fc3_output_features=64,
18 |     include_time=True,
19 |     forecast_len=12,
20 |     history_len=6,
21 |     number_of_conv3d_layers=6,
22 | )
23 | model.load_state_dict(checkpoint["state_dict"])
24 | 
25 | train_dataset, validation_dataset = get_dataloaders(
26 |     cloud="gcp", data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/"
27 | )
28 | validation_dataset = iter(validation_dataset)
29 | x = next(validation_dataset)
30 | 
31 | y_hat_all = model(x)
32 | 
33 | # plot one
34 | batch_index = 0
35 | y = x["pv_yield"][batch_index][7:, 0].detach().numpy()
36 | y_hat = y_hat_all[batch_index].detach().numpy()
37 | time = pd.to_datetime(x["sat_datetime_index"][batch_index][7:].detach().numpy(), unit="s")
38 | 
39 | fig = plot_one_result(x=time, y=y, y_hat=y_hat)
40 | fig.show(renderer="browser")
41 | 
42 | # plot all of batch
43 | y = x["pv_yield"][:, 7:, 0].detach().numpy()
44 | y_hat = y_hat_all.detach().numpy()
45 | time = [pd.to_datetime(x, unit="s") for x in x["sat_datetime_index"][:, 7:].detach().numpy()]
46 | 
47 | fig = plot_batch_results(x=time, y=y, y_hat=y_hat, model_name=model.name)
48 | fig.show(renderer="browser")
49 | 


--------------------------------------------------------------------------------
/experiments/2021-09/2021-09-03/experiments.txt:
--------------------------------------------------------------------------------
 1 | Ran perciever RNN model
 2 | 
 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-245/charts
 4 | 
 5 | Includes validation images, so we can see how the model is perforaming after in epoch
 6 | 
 7 | due memory of gpu had to go
 8 | 
 9 | forecast_len: 12
10 | history_len: 6
11 | batch_size: 8
12 | num_latents: 32
13 | latent_dim: 32
14 | embedding_dem: 10
15 | 


--------------------------------------------------------------------------------
/experiments/2021-09/2021-09-24/experiments.txt:
--------------------------------------------------------------------------------
 1 | # Baseline
 2 | 
 3 | Ran baseline on new v6 GCP dataset
 4 | 
 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-283/monitoring
 6 | 
 7 | Takes about 4 minutes to run the validation epoch
 8 | 
 9 | This is just for forecast 1 timestep in the future
10 | MAE = 0.0562
11 | 
12 | # Conv3d
13 | 
14 | ran not using nwp data, or datetime features
15 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-314/monitoring
16 | 
17 | MAE = 0.0401
18 | 


--------------------------------------------------------------------------------
/experiments/2021-09/2021-09-27/experiments.txt:
--------------------------------------------------------------------------------
 1 | # Baseline
 2 | 
 3 | Ran baseline on new v6 GCP dataset
 4 | 
 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-283/monitoring
 6 | 
 7 | Takes about 4 minutes to run the validation epoch
 8 | 
 9 | This is just for forecast 1 timestep in the future
10 | MAE = 0.0562
11 | 
12 | # Conv3d
13 | 
14 | ran not using nwp data, or datetimte features
15 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-314/monitoring
16 | 
17 | MAE = 0.0401
18 | 
19 | # Conv3d (Sat and NWP)
20 | 
21 | Using both sat and nwp into two separate convolution nets.
22 | 
23 | https://app.neptune.ai/OpenClimateFix/predict-pv-yield/e/PRED-320
24 | 
25 | MAE = 0.0376 - this was after 10 epochs, and I think it was still going down.
26 | 


--------------------------------------------------------------------------------
/experiments/2021-09/2021-09-28/experiments.txt:
--------------------------------------------------------------------------------
 1 | 1. Perceiver NWP SAT
 2 | 
 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-331/monitoring
 4 | 
 5 | Ran with
 6 | - batch_size of 6, as GPU was out of memory
 7 | - num_latents: int = 64,
 8 | - latent_dim: int = 64,
 9 | - embedding_dem: int = 0,
10 | 
11 | Each epoch takes about 3 hours
12 | 
13 | Decided to stop it earlier
14 | 
15 | 1. Perceiver Conv3d NWP SAT
16 | 
17 | Idea is to have 1 conv3d + max pool later before the perceiver model
18 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-331/monitoring
19 | 
20 | Conv3d did not make much memory difference, the biggest being, changing the
21 | - num_latents
22 | - latent_dim
23 | 
24 | To get batch 32, set
25 | - num_latents = 16
26 | - latent_dim = 16
27 | - PERCEIVER_OUTPUT_SIZE = 512
28 | OR
29 | To get batch 32, set
30 | - num_latents = 24
31 | - latent_dim = 24
32 | - PERCEIVER_OUTPUT_SIZE = 128
33 | 
34 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-349/monitoring
35 | 
36 | ~ 4 hours per epoch
37 | 
38 | MAE = 0.0308 (after 10 epochs)
39 | 


--------------------------------------------------------------------------------
/experiments/2021-10/2021-10-01/experiment.txt:
--------------------------------------------------------------------------------
 1 | 1. Perceiver Conv3d NWP SAT
 2 | 
 3 | No future satellite images
 4 | 
 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-378/charts
 6 | 
 7 | ~ 4 hours per epoch
 8 | 
 9 | MAE = 0.0365 (after 22 epochs), compared to MAE 0.0304 when future satellite images were included
10 | 


--------------------------------------------------------------------------------
/experiments/2021-11/2021-11-22.txt:
--------------------------------------------------------------------------------
 1 | 1. Conv3d - no nwp
 2 | n_training_batches= 450
 3 | n_test_batches= 450
 4 | 
 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-433/charts
 6 | 
 7 | 2. Con3d with nwp
 8 | 
 9 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-440/charts
10 | 
11 | 3. Con3d with nwp
12 | 
13 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-445/
14 | 
15 | n_training_batches = 769 - limited by nwp data
16 | n_test_batches= 400 (from training set)
17 | 


--------------------------------------------------------------------------------
/experiments/2021-11/2021-11-25.txt:
--------------------------------------------------------------------------------
 1 | 1. Baseline model
 2 | 
 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-493/monitoring
 4 | 
 5 | on test set of 400
 6 | 
 7 | DATALOADER:0 VALIDATE RESULTS
 8 | {'MAE_EXP/Validation': 1.8552579879760742,
 9 |  'MAE_EXP/Validation_epoch': 1.8552579879760742,
10 |  'MSE/Validation': 0.006537176202982664,
11 |  'MSE/Validation_epoch': 0.006537176202982664,
12 |  'MSE_EXP/Validation': 0.20918963849544525,
13 |  'MSE_EXP/Validation_epoch': 0.20918963849544525,
14 |  'MSE_forecast_horizon_0/Validation': 0.05797681212425232,
15 |  'MSE_forecast_horizon_0/Validation_epoch': 0.05797681212425232,
16 |  'NMAE/Validation': 0.05797681212425232,
17 |  'NMAE/Validation_epoch': 0.05797681212425232}
18 | 
19 | 
20 | 2. conv3d sat and nwp
21 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-490/monitoring
22 | 
23 | this is with no nwp data NMAE/Validation ~ 0.0676
24 | 
25 | data:
26 | sat (no hrv)
27 | gsp history
28 | 
29 | Total epochs: 4
30 | 
31 | 3. https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-504/monitoring
32 | 
33 | this is with nwp data NMAE/Validation ~ 0.0601
34 | 
35 | data:
36 | nwp
37 | sat (no hrv)
38 | gsp history
39 | 
40 | Total epochs: 4
41 | 
42 | 
43 | 4. https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-523/all
44 | 
45 | data:
46 | nwp
47 | sat (no hrv)
48 | gsp history
49 | pv history
50 | 
51 | After 1 epoch: NMAE/Validation ~ 0.0597
52 | 
53 | 5. https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-525/all
54 | 
55 | data:
56 | nwp
57 | sat (no hrv)
58 | no gsp history
59 | pv history
60 | 
61 | After X epoch: NMAE/Validation
62 | 


--------------------------------------------------------------------------------
/notebooks/debug_gcsfs_multiprocessing_issue.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "ecf5eff1-679a-42f1-b153-46e93cdf58bf",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "Code experiments for my GCSFS bug report: https://github.com/dask/gcsfs/issues/379\n",
  9 |     "\n",
 10 |     "```shell\n",
 11 |     "conda create --name test_gcsfs python=3.8 gcsfs ipykernel\n",
 12 |     "```"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "id": "adequate-virgin",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import multiprocessing\n",
 23 |     "from concurrent.futures import ProcessPoolExecutor\n",
 24 |     "import gcsfs\n",
 25 |     "import time\n",
 26 |     "gcsfs.__version__"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "02197441-5221-42e3-910f-b56f3b02992a",
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "multiprocessing.set_start_method('spawn')"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "id": "moderate-escape",
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "gcs = gcsfs.GCSFileSystem()  # Works fine!"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 4,
 52 |    "id": "7e9685f6-8ad5-4c1c-ad0c-edcf391ef0e5",
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "fs_map = gcs.get_mapper('solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16')"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 5,
 62 |    "id": "5b7c3007-b02e-4374-95ac-c33b4bb20188",
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "ItemsView(<fsspec.mapping.FSMap object at 0x7f1a600b73d0>)"
 69 |       ]
 70 |      },
 71 |      "execution_count": 5,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "fs_map.items()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 6,
 83 |    "id": "05d76d01-e477-4cf5-9faa-d8d122fc7bf4",
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def process_pool():\n",
 88 |     "    with ProcessPoolExecutor(max_workers=1) as executor:\n",
 89 |     "        for i in range(8):\n",
 90 |     "            future = executor.submit(gcsfs.GCSFileSystem)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 7,
 96 |    "id": "041cdc7c-111a-4415-a097-d4eb8f0ca818",
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "process_pool()"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 8,
106 |    "id": "449057a8-d16a-478d-b8a5-4c033ce92cef",
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "process_pool()"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 3,
116 |    "id": "a58c4aa8-51be-4fff-ad67-a8652f4c8b71",
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "def worker_loop(results_queue):\n",
121 |     "    print('worker')\n",
122 |     "    \n",
123 |     "    \n",
124 |     "    #gcs = gcsfs.GCSFileSystem()\n",
125 |     "    \n",
126 |     "    while True:\n",
127 |     "        results_queue.put(None)\n",
128 |     "        time.sleep(1)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 4,
134 |    "id": "723d464a-49f2-44ef-88c2-b785952f15ff",
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "workers = []\n",
139 |     "results_queue = multiprocessing.Queue()\n",
140 |     "for _ in range(4):\n",
141 |     "    worker = multiprocessing.Process(\n",
142 |     "        target=worker_loop,\n",
143 |     "        args=(results_queue,)\n",
144 |     "    )\n",
145 |     "    worker.daemon = True\n",
146 |     "    worker.start()\n",
147 |     "    workers.append(worker)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 5,
153 |    "id": "9789c6df-a20b-4a04-84a6-5588cd9e95ad",
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "<Process name='Process-1' pid=68636 parent=68624 stopped exitcode=1 daemon>"
160 |       ]
161 |      },
162 |      "execution_count": 5,
163 |      "metadata": {},
164 |      "output_type": "execute_result"
165 |     }
166 |    ],
167 |    "source": [
168 |     "workers[0]"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "id": "c6a4622d-0e03-4c82-87be-0f6a48ce769e",
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "results_queue.get()"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "id": "2c18440d-05bc-4113-b6f1-b16b45b26dc3",
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": []
188 |   }
189 |  ],
190 |  "metadata": {
191 |   "kernelspec": {
192 |    "display_name": "test_gcsfs",
193 |    "language": "python",
194 |    "name": "test_gcsfs"
195 |   },
196 |   "language_info": {
197 |    "codemirror_mode": {
198 |     "name": "ipython",
199 |     "version": 3
200 |    },
201 |    "file_extension": ".py",
202 |    "mimetype": "text/x-python",
203 |    "name": "python",
204 |    "nbconvert_exporter": "python",
205 |    "pygments_lexer": "ipython3",
206 |    "version": "3.8.8"
207 |   }
208 |  },
209 |  "nbformat": 4,
210 |  "nbformat_minor": 5
211 | }
212 | 


--------------------------------------------------------------------------------
/predict_pv_yield/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/__init__.py


--------------------------------------------------------------------------------
/predict_pv_yield/data/dataloader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from nowcasting_dataloader.datasets import NetCDFDataset, worker_init_fn
  3 | from nowcasting_dataloader.fake import FakeDataset
  4 | from nowcasting_dataset.config.load import load_yaml_configuration
  5 | from typing import Tuple
  6 | import logging
  7 | import torch
  8 | from pytorch_lightning import LightningDataModule
  9 | 
 10 | 
 11 | 
 12 | _LOG = logging.getLogger(__name__)
 13 | _LOG.setLevel(logging.DEBUG)
 14 | 
 15 | torch.set_default_dtype(torch.float32)
 16 | 
 17 | 
 18 | def get_dataloaders(
 19 |     n_train_data: int = 24900,
 20 |     n_validation_data: int = 900,
 21 |     cloud: str = "gcp",
 22 |     temp_path=".",
 23 |     data_path="prepared_ML_training_data/v4/",
 24 | ) -> Tuple:
 25 | 
 26 |     configuration = load_yaml_configuration(filename=f'{data_path}/configuration.yaml')
 27 | 
 28 |     data_module = NetCDFDataModule(
 29 |         temp_path=temp_path, data_path=data_path, cloud=cloud, n_train_data=n_train_data, n_val_data=n_validation_data
 30 |     )
 31 | 
 32 |     train_dataloader = data_module.train_dataloader()
 33 |     validation_dataloader = data_module.val_dataloader()
 34 | 
 35 |     return train_dataloader, validation_dataloader
 36 | 
 37 | 
 38 | class NetCDFDataModule(LightningDataModule):
 39 |     """
 40 |     Example of LightningDataModule for NETCDF dataset.
 41 |     A DataModule implements 5 key methods:
 42 |         - prepare_data (things to do on 1 GPU/TPU, not on every GPU/TPU in distributed mode)
 43 |         - setup (things to do on every accelerator in distributed mode)
 44 |         - train_dataloader (the training dataloader)
 45 |         - val_dataloader (the validation dataloader(s))
 46 |         - test_dataloader (the test dataloader(s))
 47 |     This allows you to share a full dataset without explaining how to download,
 48 |     split, transform and process the data.
 49 |     Read the docs:
 50 |         https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html
 51 |     """
 52 | 
 53 |     def __init__(
 54 |         self,
 55 |         temp_path: str = ".",
 56 |         n_train_data: int = 24900,
 57 |         n_val_data: int = 1000,
 58 |         cloud: str = "aws",
 59 |         num_workers: int = 8,
 60 |         pin_memory: bool = True,
 61 |         data_path="prepared_ML_training_data/v4/",
 62 |         fake_data: bool = False,
 63 |     ):
 64 |         """
 65 |         fake_data: random data is created and used instead. This is useful for testing
 66 |         """
 67 |         super().__init__()
 68 | 
 69 |         self.temp_path = temp_path
 70 |         self.data_path = data_path
 71 |         self.cloud = cloud
 72 |         self.n_train_data = n_train_data
 73 |         self.n_val_data = n_val_data
 74 |         self.num_workers = num_workers
 75 |         self.pin_memory = pin_memory
 76 |         self.fake_data = fake_data
 77 | 
 78 |         filename = os.path.join(data_path, 'configuration.yaml')
 79 |         _LOG.debug(f'Will be loading the configuration file {filename}')
 80 |         self.configuration = load_yaml_configuration(filename=filename)
 81 | 
 82 |         self.dataloader_config = dict(
 83 |             pin_memory=self.pin_memory,
 84 |             num_workers=self.num_workers,
 85 |             prefetch_factor=8,
 86 |             worker_init_fn=worker_init_fn,
 87 |             persistent_workers=True,
 88 |             # Disable automatic batching because dataset
 89 |             # returns complete batches.
 90 |             batch_size=None,
 91 |         )
 92 | 
 93 |     def train_dataloader(self):
 94 |         if self.fake_data:
 95 |             train_dataset = FakeDataset(configuration=self.configuration)
 96 |         else:
 97 |             train_dataset = NetCDFDataset(
 98 |                 self.n_train_data,
 99 |                 os.path.join(self.data_path, "train"),
100 |                 os.path.join(self.temp_path, "train"),
101 |                 configuration=self.configuration
102 |             )
103 | 
104 |         return torch.utils.data.DataLoader(train_dataset, **self.dataloader_config)
105 | 
106 |     def val_dataloader(self):
107 |         if self.fake_data:
108 |             val_dataset = FakeDataset(configuration=self.configuration)
109 |         else:
110 |             val_dataset = NetCDFDataset(
111 |                 self.n_val_data,
112 |                 os.path.join(self.data_path, "test"),
113 |                 os.path.join(self.temp_path, "test"),
114 |                 configuration=self.configuration
115 |             )
116 | 
117 |         return torch.utils.data.DataLoader(val_dataset, **self.dataloader_config)
118 | 
119 |     def test_dataloader(self):
120 |         if self.fake_data:
121 |             test_dataset = FakeDataset(configuration=self.configuration)
122 |         else:
123 |             # TODO need to change this to a test folder
124 |             test_dataset = NetCDFDataset(
125 |                 self.n_val_data,
126 |                 os.path.join(self.data_path, "test"),
127 |                 os.path.join(self.temp_path, "test"),
128 |                 configuration=self.configuration
129 |             )
130 | 
131 |         return torch.utils.data.DataLoader(test_dataset, **self.dataloader_config)
132 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/__init__.py


--------------------------------------------------------------------------------
/predict_pv_yield/models/base_model.py:
--------------------------------------------------------------------------------
  1 | import pytorch_lightning as pl
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | from nowcasting_utils.visualization.visualization import plot_example
  6 | from nowcasting_utils.visualization.line import plot_batch_results
  7 | from nowcasting_dataset.data_sources.nwp.nwp_data_source import NWP_VARIABLE_NAMES
  8 | from nowcasting_utils.models.loss import WeightedLosses
  9 | from nowcasting_utils.models.metrics import mae_each_forecast_horizon, mse_each_forecast_horizon
 10 | from nowcasting_dataloader.batch import BatchML
 11 | from nowcasting_utils.metrics.validation import make_validation_results, save_validation_results_to_logger
 12 | 
 13 | import pandas as pd
 14 | import numpy as np
 15 | 
 16 | import logging
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | activities = [torch.profiler.ProfilerActivity.CPU]
 21 | if torch.cuda.is_available():
 22 |     activities.append(torch.profiler.ProfilerActivity.CUDA)
 23 | 
 24 | default_output_variable = "pv_yield"
 25 | 
 26 | 
 27 | class BaseModel(pl.LightningModule):
 28 | 
 29 |     # default batch_size
 30 |     batch_size = 32
 31 | 
 32 |     # results file name
 33 |     results_file_name = "results_epoch"
 34 | 
 35 |     # list of results dataframes. This is used to save validation results
 36 |     results_dfs = []
 37 | 
 38 |     def __init__(self):
 39 |         super().__init__()
 40 | 
 41 |         self.history_len_5 = (
 42 |             self.history_minutes // 5
 43 |         )  # the number of historic timestemps for 5 minutes data
 44 |         self.forecast_len_5 = (
 45 |             self.forecast_minutes // 5
 46 |         )  # the number of forecast timestemps for 5 minutes data
 47 | 
 48 |         self.history_len_30 = (
 49 |             self.history_minutes // 30
 50 |         )  # the number of historic timestemps for 5 minutes data
 51 |         self.forecast_len_30 = (
 52 |             self.forecast_minutes // 30
 53 |         )  # the number of forecast timestemps for 5 minutes data
 54 | 
 55 |         # the number of historic timesteps for 60 minutes data
 56 |         # Note that ceil is taken as for 30 minutes of history data, one history value will be used
 57 |         self.history_len_60 = int(np.ceil(self.history_minutes / 60))
 58 |         self.forecast_len_60 = (
 59 |             self.forecast_minutes // 60
 60 |         )  # the number of forecast timestemps for 60 minutes data
 61 | 
 62 |         if not hasattr(self, "output_variable"):
 63 |             print("setting")
 64 |             self.output_variable = default_output_variable
 65 | 
 66 |         if self.output_variable == "pv_yield":
 67 |             self.forecast_len = self.forecast_len_5
 68 |             self.history_len = self.history_len_5
 69 |             self.number_of_samples_per_batch = 128
 70 |         else:
 71 |             self.forecast_len = self.forecast_len_30
 72 |             self.history_len = self.history_len_30
 73 |             self.number_of_samples_per_batch = 32
 74 |         self.number_of_pv_samples_per_batch = 128
 75 | 
 76 |         self.weighted_losses = WeightedLosses(forecast_length=self.forecast_len)
 77 | 
 78 |     def _training_or_validation_step(self, batch, tag: str, return_model_outputs: bool = False):
 79 |         """
 80 |         batch: The batch data
 81 |         tag: either 'Train', 'Validation' , 'Test'
 82 |         """
 83 | 
 84 |         if type(batch) == dict:
 85 |             batch = BatchML(**batch)
 86 | 
 87 |         # put the batch data through the model
 88 |         y_hat = self(batch)
 89 | 
 90 |         # get the true result out. Select the first data point, as this is the pv system in the center of the image
 91 |         if self.output_variable == "gsp_yield":
 92 |             y = batch.gsp.gsp_yield
 93 |         else:
 94 |             y = batch.pv.pv_yield
 95 |         y = y[0 : self.batch_size, -self.forecast_len :, 0]
 96 | 
 97 |         # calculate mse, mae
 98 |         mse_loss = F.mse_loss(y_hat, y)
 99 |         nmae_loss = (y_hat - y).abs().mean()
100 | 
101 |         # calculate mse, mae with exp weighted loss
102 |         mse_exp = self.weighted_losses.get_mse_exp(output=y_hat, target=y)
103 |         mae_exp = self.weighted_losses.get_mae_exp(output=y_hat, target=y)
104 | 
105 |         # TODO: Compute correlation coef using np.corrcoef(tensor with
106 |         # shape (2, num_timesteps))[0, 1] on each example, and taking
107 |         # the mean across the batch?
108 |         self.log_dict(
109 |             {
110 |                 f"MSE/{tag}": mse_loss,
111 |                 f"NMAE/{tag}": nmae_loss,
112 |                 f"MSE_EXP/{tag}": mse_exp,
113 |                 f"MAE_EXP/{tag}": mae_exp,
114 |             },
115 |             on_step=True,
116 |             on_epoch=True,
117 |             sync_dist=True  # Required for distributed training
118 |             # (even multi-GPU on signle machine).
119 |         )
120 | 
121 |         if tag != "Train":
122 |             # add metrics for each forecast horizon
123 |             mse_each_forecast_horizon_metric = mse_each_forecast_horizon(output=y_hat, target=y)
124 |             mae_each_forecast_horizon_metric = mae_each_forecast_horizon(output=y_hat, target=y)
125 | 
126 |             metrics_mse = {
127 |                 f"MSE_forecast_horizon_{i}/{tag}": mse_each_forecast_horizon_metric[i]
128 |                 for i in range(self.forecast_len_30)
129 |             }
130 |             metrics_mae = {
131 |                 f"MSE_forecast_horizon_{i}/{tag}": mae_each_forecast_horizon_metric[i]
132 |                 for i in range(self.forecast_len_30)
133 |             }
134 | 
135 |             self.log_dict(
136 |                 {**metrics_mse, **metrics_mae},
137 |                 on_step=True,
138 |                 on_epoch=True,
139 |                 sync_dist=True  # Required for distributed training
140 |                 # (even multi-GPU on signle machine).
141 |             )
142 | 
143 |         if return_model_outputs:
144 |             return nmae_loss, y_hat
145 |         else:
146 |             return nmae_loss
147 | 
148 |     def training_step(self, batch, batch_idx):
149 | 
150 |         if (batch_idx == 0) and (self.current_epoch == 0):
151 |             return self._training_or_validation_step(batch, tag="Train")
152 |         else:
153 |             return self._training_or_validation_step(batch, tag="Train")
154 | 
155 |     def validation_step(self, batch: BatchML, batch_idx):
156 | 
157 |         if type(batch) == dict:
158 |             batch = BatchML(**batch)
159 | 
160 |         # get model outputs
161 |         nmae_loss, model_output = self._training_or_validation_step(
162 |             batch, tag="Validation", return_model_outputs=True
163 |         )
164 | 
165 |         INTERESTING_EXAMPLES = (1, 5, 6, 7, 9, 11, 17, 19)
166 |         name = f"validation/plot/epoch_{self.current_epoch}_{batch_idx}"
167 |         if batch_idx in [0, 1, 2, 3, 4]:
168 | 
169 |             # make sure the interesting example doesnt go above the batch size
170 |             INTERESTING_EXAMPLES = (i for i in INTERESTING_EXAMPLES if i < self.batch_size)
171 | 
172 |             for example_i in INTERESTING_EXAMPLES:
173 |                 # 1. Plot example
174 |                 if 0:
175 |                     fig = plot_example(
176 |                         batch,
177 |                         model_output,
178 |                         history_minutes=self.history_len_5 * 5,
179 |                         forecast_minutes=self.forecast_len_5 * 5,
180 |                         nwp_channels=NWP_VARIABLE_NAMES,
181 |                         example_i=example_i,
182 |                         epoch=self.current_epoch,
183 |                         output_variable=self.output_variable,
184 |                     )
185 | 
186 |                     # save fig to log
187 |                     self.logger.experiment[-1].log_image(name, fig)
188 |                     try:
189 |                         fig.close()
190 |                     except Exception as _:
191 |                         # could not close figure
192 |                         pass
193 | 
194 |             # 2. plot summary batch of predictions and results
195 |             # make x,y data
196 |             if self.output_variable == "gsp_yield":
197 |                 y = batch.gsp.gsp_yield[0 : self.batch_size, :, 0].cpu().numpy()
198 |             else:
199 |                 y = batch.pv.pv_yield[0 : self.batch_size, :, 0].cpu().numpy()
200 |             y_hat = model_output[0 : self.batch_size].cpu().numpy()
201 |             time = [
202 |                 pd.to_datetime(x, unit="ns")
203 |                 for x in batch.gsp.gsp_datetime_index[0 : self.batch_size].cpu().numpy()
204 |             ]
205 |             time_hat = [
206 |                 pd.to_datetime(x, unit="ns")
207 |                 for x in batch.gsp.gsp_datetime_index[
208 |                     0 : self.batch_size, self.history_len_30 + 1 :
209 |                 ]
210 |                 .cpu()
211 |                 .numpy()
212 |             ]
213 | 
214 |             # plot and save to logger
215 |             fig = plot_batch_results(model_name=self.name, y=y, y_hat=y_hat, x=time, x_hat=time_hat)
216 |             fig.write_html(f"temp_{batch_idx}.html")
217 |             try:
218 |                 self.logger.experiment[-1][name].upload(f"temp_{batch_idx}.html")
219 |             except:
220 |                 pass
221 | 
222 |         # save validation results
223 |         capacity = batch.gsp.gsp_capacity[:,-self.forecast_len_30:,0].cpu().numpy()
224 |         predictions = model_output.cpu().numpy()
225 |         truths = batch.gsp.gsp_yield[:, -self.forecast_len_30:, 0].cpu().numpy()
226 |         predictions = predictions * capacity
227 |         truths = truths * capacity
228 | 
229 |         results = make_validation_results(truths_mw=truths,
230 |                                           predictions_mw=predictions,
231 |                                           capacity_mwp=capacity,
232 |                                           gsp_ids=batch.gsp.gsp_id[:, 0].cpu(),
233 |                                           batch_idx=batch_idx,
234 |                                           t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc))
235 | 
236 |         # append so in 'validation_epoch_end' the file is saved
237 |         if batch_idx == 0:
238 |             self.results_dfs = []
239 |         self.results_dfs.append(results)
240 | 
241 |         return nmae_loss
242 | 
243 |     def validation_epoch_end(self, outputs):
244 | 
245 |         logger.info("Validation epoch end")
246 | 
247 |         save_validation_results_to_logger(results_dfs=self.results_dfs,
248 |                                           results_file_name=self.results_file_name,
249 |                                           current_epoch=self.current_epoch,
250 |                                           logger=self.logger)
251 | 
252 |     def test_step(self, batch, batch_idx):
253 |         self._training_or_validation_step(batch, tag="Test")
254 | 
255 |     def configure_optimizers(self):
256 |         optimizer = torch.optim.Adam(self.parameters(), lr=0.0005)
257 |         return optimizer
258 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/baseline/last_value.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from predict_pv_yield.models.base_model import BaseModel
 4 | from nowcasting_dataloader.batch import BatchML
 5 | 
 6 | 
 7 | logging.basicConfig()
 8 | _LOG = logging.getLogger("predict_pv_yield")
 9 | _LOG.setLevel(logging.DEBUG)
10 | 
11 | 
12 | class Model(BaseModel):
13 |     name = "last_value"
14 | 
15 |     def __init__(self, forecast_minutes: int = 12, history_minutes: int = 6, output_variable="pv_yield"):
16 |         """
17 |         Simple baseline model that takes the last pv yield value and copies it forward
18 |         """
19 | 
20 |         self.forecast_minutes = forecast_minutes
21 |         self.history_minutes = history_minutes
22 |         self.output_variable = output_variable
23 | 
24 |         super().__init__()
25 | 
26 |     def forward(self, x:BatchML):
27 | 
28 |         if type(x) == dict:
29 |             x = BatchML(**x)
30 | 
31 |         # Shape: batch_size, seq_length, n_sites
32 |         if self.output_variable == 'gsp_yield':
33 |             gsp_yield = x.gsp.gsp_yield
34 |         else:
35 |             gsp_yield = x.pv.pv_yield
36 | 
37 |         # take the last value non forecaster value and the first in the pv yeild
38 |         # (this is the pv site we are preditcting for)
39 |         y_hat = gsp_yield[:, -self.forecast_len - 1, 0]
40 | 
41 |         # expand the last valid forward n predict steps
42 |         out = y_hat.unsqueeze(1).repeat(1, self.forecast_len)
43 |         # shape: batch_size, forecast_len
44 | 
45 |         return out
46 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/baseline/readme.md:
--------------------------------------------------------------------------------
 1 | # Baseline Models
 2 | 
 3 | Idea is to have a really simple baseline model for predicting pv yield.
 4 | First model is just to use the last pv yield amount.
 5 | The reason to have this model is so that as we develop more complicated models,
 6 | we can see how much 'better' they are doing.
 7 | 
 8 | 
 9 | Want to try and keep the same setup as other pytorch models to 'make' it similar to run.
10 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/conv3d/architect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/conv3d/architect.png


--------------------------------------------------------------------------------
/predict_pv_yield/models/conv3d/conv3d_sat_nwp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/conv3d/conv3d_sat_nwp.png


--------------------------------------------------------------------------------
/predict_pv_yield/models/conv3d/model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | 
  7 | from predict_pv_yield.models.base_model import BaseModel
  8 | from nowcasting_dataloader.batch import BatchML
  9 | 
 10 | logging.basicConfig()
 11 | _LOG = logging.getLogger("predict_pv_yield")
 12 | 
 13 | 
 14 | class Model(BaseModel):
 15 | 
 16 |     name = "conv3d"
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         include_pv_yield: bool = True,
 21 |         include_nwp: bool = True,
 22 |         forecast_minutes: int = 30,
 23 |         history_minutes: int = 60,
 24 |         number_of_conv3d_layers: int = 4,
 25 |         conv3d_channels: int = 32,
 26 |         image_size_pixels: int = 64,
 27 |         number_sat_channels: int = 12,
 28 |         fc1_output_features: int = 128,
 29 |         fc2_output_features: int = 128,
 30 |         fc3_output_features: int = 64,
 31 |         output_variable: str = "pv_yield",
 32 |     ):
 33 |         """
 34 |         3d conv model, that takes in different data streams
 35 | 
 36 |         architecture is roughly satellite image time series goes into many 3d convolution layers.
 37 |         Final convolutional layer goes to full connected layer. This is joined by other data inputs like
 38 |         - pv yield
 39 |         - nwp data
 40 |         - time variables
 41 |         Then there ~4 fully connected layers which end up forecasting the pv yield intp the future
 42 | 
 43 |         include_pv_yield: include pv yield data
 44 |         include_nwp: include nwp data
 45 |         forecast_len: the amount of minutes that should be forecasted
 46 |         history_len: the amount of historical minutes that are used
 47 |         number_of_conv3d_layers, number of convolution 3d layers that are use
 48 |         conv3d_channels, the amount of convolution 3d channels
 49 |         image_size_pixels: the input satellite image size
 50 |         number_sat_channels: number of nwp channels
 51 |         fc1_output_features: number of fully connected outputs nodes out of the the first fully connected layer
 52 |         fc2_output_features: number of fully connected outputs nodes out of the the second fully connected layer
 53 |         fc3_output_features: number of fully connected outputs nodes out of the the third fully connected layer
 54 |         output_variable: the output variable to be predicted
 55 |         """
 56 | 
 57 |         self.include_pv_yield = include_pv_yield
 58 |         self.include_nwp = include_nwp
 59 |         self.number_of_conv3d_layers = number_of_conv3d_layers
 60 |         self.number_of_nwp_features = 10 * 19 * 2 * 2
 61 |         self.fc1_output_features = fc1_output_features
 62 |         self.fc2_output_features = fc2_output_features
 63 |         self.fc3_output_features = fc3_output_features
 64 |         self.forecast_minutes = forecast_minutes
 65 |         self.history_minutes = history_minutes
 66 |         self.output_variable = output_variable
 67 | 
 68 |         super().__init__()
 69 | 
 70 |         conv3d_channels = conv3d_channels
 71 | 
 72 |         self.number_of_nwp_features = 10 * 19 * 2 * 2
 73 | 
 74 |         self.cnn_output_size = (
 75 |             conv3d_channels
 76 |             * ((image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2)
 77 |             * (self.forecast_len_5 + self.history_len_5 + 1 - 2 * self.number_of_conv3d_layers)
 78 |         )
 79 | 
 80 |         self.sat_conv0 = nn.Conv3d(
 81 |             in_channels=number_sat_channels,
 82 |             out_channels=conv3d_channels,
 83 |             kernel_size=(3, 3, 3),
 84 |             padding=0,
 85 |         )
 86 |         for i in range(0, self.number_of_conv3d_layers - 1):
 87 |             layer = nn.Conv3d(
 88 |                 in_channels=conv3d_channels, out_channels=conv3d_channels, kernel_size=(3, 3, 3), padding=0
 89 |             )
 90 |             setattr(self, f"conv3d_{i + 1}", layer)
 91 | 
 92 |         self.fc1 = nn.Linear(in_features=self.cnn_output_size, out_features=self.fc1_output_features)
 93 |         self.fc2 = nn.Linear(in_features=self.fc1_output_features, out_features=self.fc2_output_features)
 94 | 
 95 |         fc3_in_features = self.fc2_output_features
 96 |         if include_pv_yield:
 97 |             fc3_in_features += self.number_of_samples_per_batch * (self.history_len_30 + 1)
 98 |         if include_nwp:
 99 |             self.fc_nwp = nn.Linear(in_features=self.number_of_nwp_features, out_features=128)
100 |             fc3_in_features += 128
101 | 
102 |         self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features)
103 |         self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len)
104 |         # self.fc5 = nn.Linear(in_features=32, out_features=8)
105 |         # self.fc6 = nn.Linear(in_features=8, out_features=1)
106 | 
107 |     def forward(self, x):
108 | 
109 |         if type(x) == dict:
110 |             x = BatchML(**x)
111 |         # ******************* Satellite imagery *************************
112 |         # Shape: batch_size, channel, seq_length, height, width
113 |         sat_data = x.satellite.data.float()
114 |         batch_size, n_chans, seq_len, height, width = sat_data.shape
115 | 
116 |         # :) Pass data through the network :)
117 |         out = F.relu(self.sat_conv0(sat_data))
118 |         for i in range(0, self.number_of_conv3d_layers - 1):
119 |             layer = getattr(self, f"conv3d_{i + 1}")
120 |             out = F.relu(layer(out))
121 | 
122 |         out = out.reshape(batch_size, self.cnn_output_size)
123 | 
124 |         # Fully connected layers
125 |         out = F.relu(self.fc1(out))
126 |         out = F.relu(self.fc2(out))
127 |         # which has shape (batch_size, 128)
128 | 
129 |         # add pv yield
130 |         if self.include_pv_yield:
131 |             pv_yield_history = x[self.output_variable][:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float()
132 | 
133 |             pv_yield_history = pv_yield_history.reshape(
134 |                 pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2]
135 |             )
136 |             out = torch.cat((out, pv_yield_history), dim=1)
137 | 
138 |         # *********************** NWP Data ************************************
139 |         if self.include_nwp:
140 |             # Shape: batch_size, channel, seq_length, height, width
141 |             nwp_data = x["nwp"].float()
142 |             nwp_data = nwp_data.flatten(start_dim=1)
143 | 
144 |             # fully connected layer
145 |             out_nwp = F.relu(self.fc_nwp(nwp_data))
146 | 
147 |             # join with other FC layer
148 |             out = torch.cat((out, out_nwp), dim=1)
149 | 
150 |         # Fully connected layers.
151 |         out = F.relu(self.fc3(out))
152 |         out = self.fc4(out)
153 | 
154 |         out = out.reshape(batch_size, self.forecast_len)
155 | 
156 |         return out
157 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/conv3d/model_nwp.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | 
  7 | from predict_pv_yield.models.base_model import BaseModel
  8 | from nowcasting_dataloader.batch import BatchML
  9 | 
 10 | logging.basicConfig()
 11 | _LOG = logging.getLogger("predict_pv_yield")
 12 | 
 13 | 
 14 | class Model(BaseModel):
 15 | 
 16 |     name = "conv3d_sat_nwp"
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         include_pv_or_gsp_yield_history: bool = True,
 21 |         include_nwp: bool = True,
 22 |         forecast_minutes: int = 30,
 23 |         history_minutes: int = 60,
 24 |         number_of_conv3d_layers: int = 4,
 25 |         conv3d_channels: int = 32,
 26 |         nwp_image_size_pixels: int = 64,
 27 |         number_nwp_channels: int = 10,
 28 |         fc1_output_features: int = 128,
 29 |         fc2_output_features: int = 128,
 30 |         fc3_output_features: int = 64,
 31 |         output_variable: str = "gsp_yield",
 32 |         embedding_dem: int = 16,
 33 |         include_pv_yield_history: int = True,
 34 |         include_future_satellite: int = True,
 35 |     ):
 36 |         """
 37 |         3d conv model, that takes in different data streams
 38 | 
 39 |         architecture is roughly
 40 |         1. nwp time series goes into many 3d convolution layers.
 41 |         2. Final convolutional layer goes to full connected layer. This is joined by other data inputs like
 42 |         - pv yield
 43 |         - time variables
 44 |         Then there ~4 fully connected layers which end up forecasting the pv yield / gsp into the future
 45 | 
 46 |         include_pv_or_gsp_yield_history: include pv yield data
 47 |         include_nwp: include nwp data
 48 |         forecast_len: the amount of minutes that should be forecasted
 49 |         history_len: the amount of historical minutes that are used
 50 |         number_of_conv3d_layers, number of convolution 3d layers that are use
 51 |         conv3d_channels, the amount of convolution 3d channels
 52 |         image_size_pixels: the input satellite image size
 53 |         nwp_image_size_pixels: the input nwp image size
 54 |         number_sat_channels: number of nwp channels
 55 |         fc1_output_features: number of fully connected outputs nodes out of the the first fully connected layer
 56 |         fc2_output_features: number of fully connected outputs nodes out of the the second fully connected layer
 57 |         fc3_output_features: number of fully connected outputs nodes out of the the third fully connected layer
 58 |         output_variable: the output variable to be predicted
 59 |         number_nwp_channels: The number of nwp channels there are
 60 |         include_future_satellite: option to include future satellite images, or not
 61 |         """
 62 | 
 63 |         self.include_pv_or_gsp_yield_history = include_pv_or_gsp_yield_history
 64 |         self.include_nwp = include_nwp
 65 |         self.number_of_conv3d_layers = number_of_conv3d_layers
 66 |         self.number_of_nwp_features = 128
 67 |         self.fc1_output_features = fc1_output_features
 68 |         self.fc2_output_features = fc2_output_features
 69 |         self.fc3_output_features = fc3_output_features
 70 |         self.forecast_minutes = forecast_minutes
 71 |         self.history_minutes = history_minutes
 72 |         self.output_variable = output_variable
 73 |         self.number_nwp_channels = number_nwp_channels
 74 |         self.embedding_dem = embedding_dem
 75 |         self.include_pv_yield_history = include_pv_yield_history
 76 |         self.include_future_satellite = include_future_satellite
 77 | 
 78 |         super().__init__()
 79 | 
 80 |         conv3d_channels = conv3d_channels
 81 | 
 82 |         self.nwp_cnn_output_size = (
 83 |             conv3d_channels
 84 |             * ((nwp_image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2)
 85 |             * (self.forecast_len_60 + self.history_len_60 + 1)
 86 |         )
 87 | 
 88 |         # nwp
 89 |         self.nwp_conv0 = nn.Conv3d(
 90 |             in_channels=number_nwp_channels,
 91 |             out_channels=conv3d_channels,
 92 |             kernel_size=(3, 3, 3),
 93 |             padding=(1, 0, 0),
 94 |         )
 95 |         for i in range(0, self.number_of_conv3d_layers - 1):
 96 |             layer = nn.Conv3d(
 97 |                 in_channels=conv3d_channels,
 98 |                 out_channels=conv3d_channels,
 99 |                 kernel_size=(3, 3, 3),
100 |                 padding=(1, 0, 0),
101 |             )
102 |             setattr(self, f"nwp_conv{i + 1}", layer)
103 | 
104 |         self.nwp_fc1 = nn.Linear(
105 |             in_features=self.nwp_cnn_output_size, out_features=self.fc1_output_features
106 |         )
107 |         self.nwp_fc2 = nn.Linear(
108 |             in_features=self.fc1_output_features, out_features=self.number_of_nwp_features
109 |         )
110 | 
111 |         if self.embedding_dem:
112 |             self.pv_system_id_embedding = nn.Embedding(
113 |                 num_embeddings=940, embedding_dim=self.embedding_dem
114 |             )
115 | 
116 |         if self.include_pv_yield_history:
117 |             self.pv_fc1 = nn.Linear(
118 |                 in_features=self.number_of_pv_samples_per_batch * (self.history_len_5 + 1),
119 |                 out_features=128,
120 |             )
121 | 
122 |         fc3_in_features = self.number_of_nwp_features
123 | 
124 |         self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features)
125 |         self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len)
126 | 
127 | 
128 |     def forward(self, x):
129 | 
130 |         if type(x) == dict:
131 |             x = BatchML(**x)
132 | 
133 |         # shape: batch_size, n_chans, seq_len, height, width
134 |         nwp_data = x.nwp.data.float()
135 |         out_nwp = F.relu(self.nwp_conv0(nwp_data))
136 |         for i in range(0, self.number_of_conv3d_layers - 1):
137 |             layer = getattr(self, f"nwp_conv{i + 1}")
138 |             out_nwp = F.relu(layer(out_nwp))
139 | 
140 |         # fully connected layers
141 |         out_nwp = out_nwp.reshape(nwp_data.shape[0], self.nwp_cnn_output_size)
142 |         out_nwp = F.relu(self.nwp_fc1(out_nwp))
143 |         out = F.relu(self.nwp_fc2(out_nwp))
144 | 
145 |         # which has shape (batch_size, 128)
146 | 
147 |         # Fully connected layers.
148 |         out = F.relu(self.fc3(out))
149 |         out = self.fc4(out)
150 | 
151 |         out = out.reshape(nwp_data.shape[0], self.forecast_len)
152 | 
153 |         return out
154 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/conv3d/model_sat_nwp.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | 
  7 | from predict_pv_yield.models.base_model import BaseModel
  8 | from nowcasting_dataloader.batch import BatchML
  9 | 
 10 | logging.basicConfig()
 11 | _LOG = logging.getLogger("predict_pv_yield")
 12 | 
 13 | 
 14 | class Model(BaseModel):
 15 | 
 16 |     name = "conv3d_sat_nwp"
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         include_pv_or_gsp_yield_history: bool = True,
 21 |         include_nwp: bool = True,
 22 |         forecast_minutes: int = 30,
 23 |         history_minutes: int = 60,
 24 |         number_of_conv3d_layers: int = 4,
 25 |         conv3d_channels: int = 32,
 26 |         image_size_pixels: int = 64,
 27 |         nwp_image_size_pixels: int = 64,
 28 |         number_sat_channels: int = 12,
 29 |         number_nwp_channels: int = 10,
 30 |         fc1_output_features: int = 128,
 31 |         fc2_output_features: int = 128,
 32 |         fc3_output_features: int = 64,
 33 |         output_variable: str = "pv_yield",
 34 |         embedding_dem: int = 16,
 35 |         include_pv_yield_history: int = True,
 36 |         include_future_satellite: int = True,
 37 |     ):
 38 |         """
 39 |         3d conv model, that takes in different data streams
 40 | 
 41 |         architecture is roughly
 42 |         1. satellite image time series goes into many 3d convolution layers.
 43 |         2. nwp time series goes into many 3d convolution layers.
 44 |         3. Final convolutional layer goes to full connected layer. This is joined by other data inputs like
 45 |         - pv yield
 46 |         - time variables
 47 |         Then there ~4 fully connected layers which end up forecasting the pv yield / gsp into the future
 48 | 
 49 |         include_pv_or_gsp_yield_history: include pv yield data
 50 |         include_nwp: include nwp data
 51 |         forecast_len: the amount of minutes that should be forecasted
 52 |         history_len: the amount of historical minutes that are used
 53 |         number_of_conv3d_layers, number of convolution 3d layers that are use
 54 |         conv3d_channels, the amount of convolution 3d channels
 55 |         image_size_pixels: the input satellite image size
 56 |         nwp_image_size_pixels: the input nwp image size
 57 |         number_sat_channels: number of nwp channels
 58 |         fc1_output_features: number of fully connected outputs nodes out of the the first fully connected layer
 59 |         fc2_output_features: number of fully connected outputs nodes out of the the second fully connected layer
 60 |         fc3_output_features: number of fully connected outputs nodes out of the the third fully connected layer
 61 |         output_variable: the output variable to be predicted
 62 |         number_nwp_channels: The number of nwp channels there are
 63 |         include_future_satellite: option to include future satellite images, or not
 64 |         """
 65 | 
 66 |         self.include_pv_or_gsp_yield_history = include_pv_or_gsp_yield_history
 67 |         self.include_nwp = include_nwp
 68 |         self.number_of_conv3d_layers = number_of_conv3d_layers
 69 |         self.number_of_nwp_features = 128
 70 |         self.fc1_output_features = fc1_output_features
 71 |         self.fc2_output_features = fc2_output_features
 72 |         self.fc3_output_features = fc3_output_features
 73 |         self.forecast_minutes = forecast_minutes
 74 |         self.history_minutes = history_minutes
 75 |         self.output_variable = output_variable
 76 |         self.number_nwp_channels = number_nwp_channels
 77 |         self.embedding_dem = embedding_dem
 78 |         self.include_pv_yield_history = include_pv_yield_history
 79 |         self.include_future_satellite = include_future_satellite
 80 | 
 81 |         super().__init__()
 82 | 
 83 |         conv3d_channels = conv3d_channels
 84 | 
 85 |         if include_future_satellite:
 86 |             cnn_output_size_time = self.forecast_len_5 + self.history_len_5 + 1
 87 |         else:
 88 |             cnn_output_size_time = self.history_len_5 + 1
 89 |         self.cnn_output_size = (
 90 |             conv3d_channels
 91 |             * ((image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2)
 92 |             * cnn_output_size_time
 93 |         )
 94 | 
 95 |         self.nwp_cnn_output_size = (
 96 |             conv3d_channels
 97 |             * ((nwp_image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2)
 98 |             * (self.forecast_len_60 + self.history_len_60 + 1)
 99 |         )
100 | 
101 |         # conv0
102 |         self.sat_conv0 = nn.Conv3d(
103 |             in_channels=number_sat_channels,
104 |             out_channels=conv3d_channels,
105 |             kernel_size=(3, 3, 3),
106 |             padding=(1, 0, 0),
107 |         )
108 |         for i in range(0, self.number_of_conv3d_layers - 1):
109 |             layer = nn.Conv3d(
110 |                 in_channels=conv3d_channels,
111 |                 out_channels=conv3d_channels,
112 |                 kernel_size=(3, 3, 3),
113 |                 padding=(1, 0, 0),
114 |             )
115 |             setattr(self, f"sat_conv{i + 1}", layer)
116 | 
117 |         self.fc1 = nn.Linear(
118 |             in_features=self.cnn_output_size, out_features=self.fc1_output_features
119 |         )
120 |         self.fc2 = nn.Linear(
121 |             in_features=self.fc1_output_features, out_features=self.fc2_output_features
122 |         )
123 | 
124 |         # nwp
125 |         if include_nwp:
126 |             self.nwp_conv0 = nn.Conv3d(
127 |                 in_channels=number_nwp_channels,
128 |                 out_channels=conv3d_channels,
129 |                 kernel_size=(3, 3, 3),
130 |                 padding=(1, 0, 0),
131 |             )
132 |             for i in range(0, self.number_of_conv3d_layers - 1):
133 |                 layer = nn.Conv3d(
134 |                     in_channels=conv3d_channels,
135 |                     out_channels=conv3d_channels,
136 |                     kernel_size=(3, 3, 3),
137 |                     padding=(1, 0, 0),
138 |                 )
139 |                 setattr(self, f"nwp_conv{i + 1}", layer)
140 | 
141 |             self.nwp_fc1 = nn.Linear(
142 |                 in_features=self.nwp_cnn_output_size, out_features=self.fc1_output_features
143 |             )
144 |             self.nwp_fc2 = nn.Linear(
145 |                 in_features=self.fc1_output_features, out_features=self.number_of_nwp_features
146 |             )
147 | 
148 |         if self.embedding_dem:
149 |             self.pv_system_id_embedding = nn.Embedding(
150 |                 num_embeddings=940, embedding_dim=self.embedding_dem
151 |             )
152 | 
153 |         if self.include_pv_yield_history:
154 |             self.pv_fc1 = nn.Linear(
155 |                 in_features=self.number_of_pv_samples_per_batch * (self.history_len_5 + 1),
156 |                 out_features=128,
157 |             )
158 | 
159 |         fc3_in_features = self.fc2_output_features
160 |         if include_pv_or_gsp_yield_history:
161 |             fc3_in_features += self.number_of_samples_per_batch * (self.history_len_30 + 1)
162 |         if include_nwp:
163 |             fc3_in_features += 128
164 |         if self.embedding_dem:
165 |             fc3_in_features += self.embedding_dem
166 |         if self.include_pv_yield_history:
167 |             fc3_in_features += 128
168 | 
169 |         self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features)
170 |         self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len)
171 |         # self.fc5 = nn.Linear(in_features=32, out_features=8)
172 |         # self.fc6 = nn.Linear(in_features=8, out_features=1)
173 | 
174 |     def forward(self, x):
175 | 
176 |         if type(x) == dict:
177 |             x = BatchML(**x)
178 | 
179 |         # ******************* Satellite imagery *************************
180 |         # Shape: batch_size, channel, seq_length, height, width
181 |         sat_data = x.satellite.data.float()
182 |         batch_size, n_chans, seq_len, height, width = sat_data.shape
183 | 
184 |         if not self.include_future_satellite:
185 |             sat_data = sat_data[:, :, : self.history_len_5 + 1]
186 | 
187 |         # :) Pass data through the network :)
188 |         out = F.relu(self.sat_conv0(sat_data))
189 |         for i in range(0, self.number_of_conv3d_layers - 1):
190 |             layer = getattr(self, f"sat_conv{i + 1}")
191 |             out = F.relu(layer(out))
192 | 
193 |         out = out.reshape(batch_size, self.cnn_output_size)
194 | 
195 |         # Fully connected layers
196 |         out = F.relu(self.fc1(out))
197 |         out = F.relu(self.fc2(out))
198 |         # which has shape (batch_size, 128)
199 | 
200 |         # add pv yield
201 |         if self.include_pv_or_gsp_yield_history:
202 |             if self.output_variable == "gsp_yield":
203 |                 pv_yield_history = (
204 |                     x.gsp.gsp_yield[:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float()
205 |                 )
206 |             else:
207 |                 pv_yield_history = (
208 |                     x.pv.pv_yield[:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float()
209 |                 )
210 | 
211 |             pv_yield_history = pv_yield_history.reshape(
212 |                 pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2]
213 |             )
214 |             # join up
215 |             out = torch.cat((out, pv_yield_history), dim=1)
216 | 
217 |         # add the pv yield history. This can be used if trying to predict gsp
218 |         if self.include_pv_yield_history:
219 |             # just take the first 128
220 |             pv_yield_history = (
221 |                 x.pv.pv_yield[:, : self.history_len_5 + 1, :128].nan_to_num(nan=0.0).float()
222 |             )
223 | 
224 |             pv_yield_history = pv_yield_history.reshape(
225 |                 pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2]
226 |             )
227 |             pv_yield_history = F.relu(self.pv_fc1(pv_yield_history))
228 | 
229 |             out = torch.cat((out, pv_yield_history), dim=1)
230 | 
231 |         # *********************** NWP Data ************************************
232 |         if self.include_nwp:
233 | 
234 |             # shape: batch_size, n_chans, seq_len, height, width
235 |             nwp_data = x.nwp.data.float()
236 | 
237 |             out_nwp = F.relu(self.nwp_conv0(nwp_data))
238 |             for i in range(0, self.number_of_conv3d_layers - 1):
239 |                 layer = getattr(self, f"nwp_conv{i + 1}")
240 |                 out_nwp = F.relu(layer(out_nwp))
241 | 
242 |             # fully connected layers
243 |             out_nwp = out_nwp.reshape(batch_size, self.nwp_cnn_output_size)
244 |             out_nwp = F.relu(self.nwp_fc1(out_nwp))
245 |             out_nwp = F.relu(self.nwp_fc2(out_nwp))
246 | 
247 |             # join with other FC layer
248 |             out = torch.cat((out, out_nwp), dim=1)
249 | 
250 |         # ********************** Embedding of PV system ID ********************
251 |         if self.embedding_dem:
252 |             if self.output_variable == "pv_yield":
253 |                 id = x.pv.pv_system_row_number[0 : self.batch_size, 0]
254 |             else:
255 |                 id = x.gsp.gsp_id[0 : self.batch_size, 0]
256 | 
257 |             id = id.type(torch.IntTensor)
258 |             id = id.to(out.device)
259 |             id_embedding = self.pv_system_id_embedding(id)
260 |             out = torch.cat((out, id_embedding), dim=1)
261 | 
262 |         # Fully connected layers.
263 |         out = F.relu(self.fc3(out))
264 |         out = self.fc4(out)
265 | 
266 |         out = out.reshape(batch_size, self.forecast_len)
267 | 
268 |         return out
269 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/conv3d/readme.md:
--------------------------------------------------------------------------------
 1 | # Convolution 3d network
 2 | 
 3 | Idea is to use convolution 3d networks.
 4 | 
 5 | ![](architect.png)
 6 | 
 7 | Satellite images go into several 3D convolution with no padding.
 8 | Then they are moved into a 2 fully connected layers.
 9 | 
10 | NWP data, historic pv yield and time features are added to the fully connected layers.
11 | 
12 | Final outcome is PV yield for the future.
13 | 
14 | ## Conv_nwp
15 | 
16 | We have built a model that takes nwp input data only.
17 | This has several 3dd cnn layers and then a few fully connected layers.
18 | 
19 | The only nwp channel is `dswrf`
20 | 
21 | training run is https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-951/charts
22 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/layers/__init__.py


--------------------------------------------------------------------------------
/predict_pv_yield/models/perceiver/perceiver.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | import torch.nn.functional as F
  7 | from perceiver_pytorch import Perceiver
  8 | 
  9 | from predict_pv_yield.models.base_model import BaseModel
 10 | from nowcasting_dataloader.batch import BatchML
 11 | 
 12 | from nowcasting_dataset.consts import NWP_VARIABLE_NAMES, SAT_VARIABLE_NAMES
 13 | 
 14 | 
 15 | params = dict(
 16 |     # DATA
 17 |     # TODO: Everything that relates to the dataset should come automatically
 18 |     # from a yaml file stored with the dataset.
 19 |     batch_size=32,
 20 |     history_minutes=30,  #: Number of timesteps of history, not including t0.
 21 |     forecast_minutes=120,  #: Number of timesteps of forecast.
 22 |     image_size_pixels=64,
 23 |     nwp_channels=NWP_VARIABLE_NAMES[0:10],
 24 |     sat_channels=SAT_VARIABLE_NAMES[1:],
 25 | )
 26 | 
 27 | 
 28 | SAT_X_MEAN = np.float32(309000)
 29 | SAT_X_STD = np.float32(316387.42073603)
 30 | SAT_Y_MEAN = np.float32(519000)
 31 | SAT_Y_STD = np.float32(406454.17945938)
 32 | 
 33 | 
 34 | TOTAL_SEQ_LEN = params["history_minutes"] // 5 + params["forecast_minutes"] // 5 + 1
 35 | NWP_SIZE = len(params["nwp_channels"]) * 64 * 64  # channels x width x height
 36 | N_DATETIME_FEATURES = 4
 37 | PERCEIVER_OUTPUT_SIZE = 512
 38 | FC_OUTPUT_SIZE = 8
 39 | RNN_HIDDEN_SIZE = 16
 40 | 
 41 | 
 42 | class PerceiverModel(BaseModel):
 43 | 
 44 |     name = "perceiver"
 45 | 
 46 |     def __init__(
 47 |         self,
 48 |         history_minutes: int = params["history_minutes"],
 49 |         forecast_minutes: int = params["forecast_minutes"],
 50 |         nwp_channels: Iterable[str] = params["nwp_channels"],
 51 |         batch_size: int = 32,
 52 |         num_latents: int = 128,
 53 |         latent_dim: int = 64,
 54 |         embedding_dem: int = 16,
 55 |         output_variable: str = "pv_yield",
 56 |     ):
 57 |         self.history_minutes = history_minutes
 58 |         self.forecast_minutes = forecast_minutes
 59 |         self.nwp_channels = nwp_channels
 60 |         self.batch_size = batch_size
 61 |         self.num_latents = num_latents
 62 |         self.latent_dim = latent_dim
 63 |         self.embedding_dem = embedding_dem
 64 |         self.output_variable = output_variable
 65 | 
 66 |         self.total_seq_length = self.history_minutes // 5 + self.forecast_minutes //5 + 1
 67 | 
 68 |         super().__init__()
 69 | 
 70 |         self.perceiver = Perceiver(
 71 |             input_channels=len(params["sat_channels"]),
 72 |             input_axis=2,
 73 |             num_freq_bands=6,
 74 |             max_freq=10,
 75 |             depth=self.total_seq_length,
 76 |             num_latents=self.num_latents,
 77 |             latent_dim=self.latent_dim,
 78 |             num_classes=PERCEIVER_OUTPUT_SIZE,
 79 |             weight_tie_layers=True,
 80 |         )
 81 | 
 82 |         self.fc1 = nn.Linear(in_features=PERCEIVER_OUTPUT_SIZE, out_features=256)
 83 | 
 84 |         self.fc2 = nn.Linear(in_features=256 + self.embedding_dem, out_features=128)
 85 | 
 86 |         self.fc3 = nn.Linear(in_features=128, out_features=64)
 87 |         self.fc4 = nn.Linear(in_features=64, out_features=32)
 88 |         self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE)
 89 | 
 90 |         if self.embedding_dem:
 91 |             self.pv_system_id_embedding = nn.Embedding(num_embeddings=2048, embedding_dim=self.embedding_dem)
 92 | 
 93 |         # TODO: Get rid of RNNs!
 94 |         self.encoder_rnn = nn.GRU(
 95 |             # plus 1 for history
 96 |             input_size=FC_OUTPUT_SIZE + 1 + NWP_SIZE,
 97 |             hidden_size=RNN_HIDDEN_SIZE,
 98 |             num_layers=2,
 99 |             batch_first=True,
100 |         )
101 |         self.decoder_rnn = nn.GRU(
102 |             input_size=FC_OUTPUT_SIZE + NWP_SIZE,
103 |             hidden_size=RNN_HIDDEN_SIZE,
104 |             num_layers=2,
105 |             batch_first=True,
106 |         )
107 | 
108 |         self.decoder_fc1 = nn.Linear(in_features=RNN_HIDDEN_SIZE, out_features=8)
109 |         self.decoder_fc2 = nn.Linear(in_features=8, out_features=1)
110 | 
111 |     def forward(self, x: BatchML):
112 | 
113 |         if type(x) == dict:
114 |             x = BatchML(**x)
115 | 
116 |         # ******************* Satellite imagery *************************
117 |         # Shape: batch_size, channel, seq_length, height, width
118 |         # TODO: Use optical flow, not actual sat images of the future!
119 |         sat_data = x.satellite.data[0 : self.batch_size].float()
120 |         batch_size, n_chans, seq_len, width, height = sat_data.shape
121 | 
122 |         # Stack timesteps as examples (to make a large batch)
123 |         sat_data = sat_data.permute(0, 2, 3, 4, 1) # move channels to the end
124 |         new_batch_size = batch_size * seq_len
125 |         #                                 0           1       2      3
126 |         sat_data = sat_data.reshape(new_batch_size, width, height, n_chans)
127 | 
128 |         # Pass data through the network :)
129 |         out = self.perceiver(sat_data)
130 | 
131 |         out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE)
132 |         out = F.relu(self.fc1(out))
133 | 
134 |         # ********************** Embedding of PV system ID ********************
135 |         if self.embedding_dem:
136 |             if self.output_variable == 'pv_yield':
137 |                 id = x.pv.pv_system_row_number[0 : self.batch_size, 0]
138 |             else:
139 |                 id = x.gsp.gsp_id[0: self.batch_size, 0]
140 |             id = id.type(torch.IntTensor).repeat_interleave(self.total_seq_length)
141 |             id = id.to(out.device)
142 |             id_embedding = self.pv_system_id_embedding(id)
143 |             print(f'{id_embedding.shape=}')
144 |             print(f'{out.shape=}')
145 |             out = torch.cat((out, id_embedding), dim=1)
146 | 
147 |         # Fully connected layers.
148 |         out = F.relu(self.fc2(out))
149 |         out = F.relu(self.fc3(out))
150 |         out = F.relu(self.fc4(out))
151 |         out = F.relu(self.fc5(out))
152 | 
153 |         # ******************* PREP DATA FOR RNN *******************************
154 |         out = out.reshape(batch_size, self.total_seq_length, FC_OUTPUT_SIZE)
155 | 
156 |         # The RNN encoder gets recent history: satellite, NWP,
157 |         # datetime features, and recent PV history.  The RNN decoder
158 |         # gets what we know about the future: satellite, NWP, and
159 |         # datetime features.
160 | 
161 |         # *********************** NWP Data ************************************
162 |         # Shape: batch_size, channel, seq_length, width, height
163 |         nwp_data = x.nwp.data[0 : self.batch_size].float().float()
164 | 
165 |         # RNN expects seq_len to be dim 1.
166 |         nwp_data = nwp_data.permute(0, 2, 1, 3, 4)
167 |         batch_size, nwp_seq_len, n_nwp_chans, nwp_width, nwp_height = nwp_data.shape
168 | 
169 |         # nwp to have the same sel_len as sat. I think there is a better solution than this
170 |         nwp_data_zeros = torch.zeros(size=(batch_size, seq_len - nwp_seq_len, n_nwp_chans, nwp_width, nwp_height), device=nwp_data.device)
171 |         nwp_data = torch.cat([nwp_data, nwp_data_zeros], dim=1)
172 | 
173 |         nwp_data = nwp_data.reshape(batch_size, seq_len, n_nwp_chans * nwp_width * nwp_height)
174 | 
175 |         # Concat
176 |         rnn_input = torch.cat(
177 |             (
178 |                 out,
179 |                 nwp_data,
180 |             ),
181 |             dim=2,
182 |         )
183 | 
184 |         if self.output_variable == 'pv_yield':
185 |             # take the history of the pv yield of this system,
186 |             pv_yield_history = x.pv.pv_yield[0: self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
187 |             encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
188 |         elif self.output_variable == 'gsp_yield':
189 |             # take the history of the gsp yield of this system,
190 |             gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
191 |             encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)
192 | 
193 |         encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
194 |         decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden)
195 |         # decoder_output is shape batch_size, seq_len, rnn_hidden_size
196 | 
197 |         decoder_output = F.relu(self.decoder_fc1(decoder_output))
198 |         decoder_output = self.decoder_fc2(decoder_output)
199 | 
200 |         return decoder_output.squeeze(dim=-1)
201 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/perceiver/perceiver_conv3d_nwp_sat.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | import torch.nn.functional as F
  7 | from perceiver_pytorch import Perceiver
  8 | 
  9 | from predict_pv_yield.models.base_model import BaseModel
 10 | from nowcasting_dataloader.batch import BatchML
 11 | 
 12 | from nowcasting_dataset.consts import NWP_VARIABLE_NAMES, SAT_VARIABLE_NAMES
 13 | 
 14 | 
 15 | params = dict(
 16 |     # DATA
 17 |     # TODO: Everything that relates to the dataset should come automatically
 18 |     # from a yaml file stored with the dataset.
 19 |     batch_size=32,
 20 |     history_minutes=30,  #: Number of timesteps of history, not including t0.
 21 |     forecast_minutes=120,  #: Number of timesteps of forecast.
 22 |     image_size_pixels=64,
 23 |     nwp_channels=NWP_VARIABLE_NAMES[0:10],
 24 |     sat_channels=SAT_VARIABLE_NAMES[1:],
 25 | )
 26 | 
 27 | 
 28 | SAT_X_MEAN = np.float32(309000)
 29 | SAT_X_STD = np.float32(316387.42073603)
 30 | SAT_Y_MEAN = np.float32(519000)
 31 | SAT_Y_STD = np.float32(406454.17945938)
 32 | 
 33 | 
 34 | TOTAL_SEQ_LEN = params["history_minutes"] // 5 + params["forecast_minutes"] // 5 + 1
 35 | NWP_SIZE = len(params["nwp_channels"]) * 2 * 2  # channels x width x height
 36 | N_DATETIME_FEATURES = 4
 37 | PERCEIVER_OUTPUT_SIZE = 512
 38 | FC_OUTPUT_SIZE = 8
 39 | RNN_HIDDEN_SIZE = 16
 40 | 
 41 | 
 42 | class Conv3dMaxPool(nn.Module):
 43 | 
 44 |     def __init__(self, out_channels:int, in_channels:int):
 45 |         super().__init__()
 46 |         # convultion later, and pad so the output is the same size
 47 |         self.sat_conv3d = nn.Conv3d(
 48 |             in_channels=in_channels,
 49 |             out_channels=out_channels,
 50 |             kernel_size=(3, 3, 3), padding=(1, 1, 1)
 51 |         )
 52 |         # take max pool, keep time sequence the same length
 53 |         self.sat_maxpool = nn.MaxPool3d(3, stride=(1, 2, 2), padding=(1, 1, 1))
 54 |     def forward(self, x):
 55 | 
 56 |         x = self.sat_conv3d(x)
 57 |         return self.sat_maxpool(x)
 58 | 
 59 | 
 60 | class Model(BaseModel):
 61 | 
 62 |     name = "perceiver_conv3d_nwp_sat"
 63 | 
 64 |     def __init__(
 65 |         self,
 66 |         history_minutes: int,
 67 |         forecast_minutes: int,
 68 |         nwp_channels: Iterable[str] = params["nwp_channels"],
 69 |         batch_size: int = 32,
 70 |         num_latents: int = 128,
 71 |         latent_dim: int = 64,
 72 |         embedding_dem: int = 16,
 73 |         output_variable: str = "pv_yield",
 74 |         conv3d_channels: int = 16,
 75 |         use_future_satellite_images: bool = True,  # option not to use future sat images
 76 |     ):
 77 |         """
 78 |         Idea is to have a conv3d (+max pool) layer before both sat and nwp data go into perceiver model.
 79 |         """
 80 |         self.history_minutes = history_minutes
 81 |         self.forecast_minutes = forecast_minutes
 82 |         self.nwp_channels = nwp_channels
 83 |         self.batch_size = batch_size
 84 |         self.num_latents = num_latents
 85 |         self.latent_dim = latent_dim
 86 |         self.embedding_dem = embedding_dem
 87 |         self.output_variable = output_variable
 88 |         self.use_future_satellite_images = use_future_satellite_images
 89 | 
 90 |         self.total_seq_length = self.history_minutes // 5 + self.forecast_minutes // 5 + 1
 91 | 
 92 |         super().__init__()
 93 | 
 94 |         self.sat_conv3d_maxpool = Conv3dMaxPool(out_channels=conv3d_channels, in_channels=len(params['sat_channels']))
 95 |         self.nwp_conv3d_maxpool = Conv3dMaxPool(out_channels=conv3d_channels, in_channels=len(nwp_channels))
 96 | 
 97 |         self.perceiver = Perceiver(
 98 |             input_channels=2*conv3d_channels,
 99 |             input_axis=2,
100 |             num_freq_bands=6,
101 |             max_freq=10,
102 |             depth= self.total_seq_length,
103 |             num_latents=self.num_latents,
104 |             latent_dim=self.latent_dim,
105 |             num_classes=PERCEIVER_OUTPUT_SIZE,
106 |             weight_tie_layers=True,
107 |         )
108 | 
109 |         self.fc1 = nn.Linear(in_features=PERCEIVER_OUTPUT_SIZE, out_features=256)
110 | 
111 |         self.fc2 = nn.Linear(in_features=256 + self.embedding_dem, out_features=128)
112 | 
113 |         self.fc3 = nn.Linear(in_features=128, out_features=64)
114 |         self.fc4 = nn.Linear(in_features=64, out_features=32)
115 |         self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE)
116 | 
117 |         if self.embedding_dem:
118 |             self.pv_system_id_embedding = nn.Embedding(num_embeddings=2048, embedding_dim=self.embedding_dem)
119 | 
120 |         # TODO: Get rid of RNNs!
121 |         self.encoder_rnn = nn.GRU(
122 |             # plus 1 for history
123 |             input_size=FC_OUTPUT_SIZE + 1,
124 |             hidden_size=RNN_HIDDEN_SIZE,
125 |             num_layers=2,
126 |             batch_first=True,
127 |         )
128 |         self.decoder_rnn = nn.GRU(
129 |             input_size=FC_OUTPUT_SIZE,
130 |             hidden_size=RNN_HIDDEN_SIZE,
131 |             num_layers=2,
132 |             batch_first=True,
133 |         )
134 | 
135 |         self.decoder_fc1 = nn.Linear(in_features=RNN_HIDDEN_SIZE, out_features=8)
136 |         self.decoder_fc2 = nn.Linear(in_features=8, out_features=1)
137 | 
138 |     def forward(self, x):
139 | 
140 |         if type(x) == dict:
141 |             x = BatchML(**x)
142 | 
143 |         # ******************* Satellite imagery *************************
144 |         # Shape: batch_size, channel, seq_length, height, width
145 |         # TODO: Use optical flow, not actual sat images of the future!
146 |         sat_data = x.satellite.data[0 : self.batch_size].float()
147 | 
148 |         if not self.use_future_satellite_images:
149 |             sat_data[:, -self.forecast_len_5: ] = 0  # This might not be the best way to do it
150 | 
151 |         sat_data = self.sat_conv3d_maxpool(sat_data)
152 |         sat_data = sat_data.permute(0, 2, 3, 4, 1)
153 | 
154 |         # Stack timesteps as examples (to make a large batch)
155 |         batch_size, seq_len, width, height, n_chans = sat_data.shape
156 |         new_batch_size = batch_size * seq_len
157 |         #                                 0           1       2      3
158 |         sat_data = sat_data.reshape(new_batch_size, width, height, n_chans)
159 | 
160 |         # *********************** NWP Data ************************************
161 |         # Shape: batch_size, seq_length, width, height, channel
162 |         nwp_data = x.nwp.data[0 : self.batch_size].float()
163 |         nwp_data = self.nwp_conv3d_maxpool(nwp_data)
164 |         # Perciever expects seq_len to be dim 1, and channels at the end
165 |         nwp_data = nwp_data.permute(0, 2, 3, 4, 1)
166 |         batch_size, nwp_seq_len, nwp_width, nwp_height, n_nwp_chans = nwp_data.shape
167 | 
168 |         # nwp to have the same sel_len as sat. I think there is a better solution than this
169 |         nwp_data_zeros = torch.zeros(size=(batch_size, seq_len - nwp_seq_len, nwp_width, nwp_height, n_nwp_chans), device=nwp_data.device)
170 |         nwp_data = torch.cat([nwp_data, nwp_data_zeros], dim=1)
171 | 
172 |         nwp_data = nwp_data.reshape(new_batch_size, nwp_width, nwp_height, n_nwp_chans)
173 | 
174 |         assert nwp_width == width, f'widths should be the same({nwp_width},{width})'
175 |         assert nwp_height == height, f'heights should be the same({nwp_height},{height})'
176 | 
177 |         data = torch.cat((sat_data, nwp_data), dim=-1)
178 | 
179 |         # Perceiver
180 |         # Pass data through the network :)
181 |         out = self.perceiver(data)
182 | 
183 |         out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE)
184 |         out = F.relu(self.fc1(out))
185 | 
186 |         # ********************** Embedding of PV system ID ********************
187 |         if self.embedding_dem:
188 |             pv_row = (
189 |                 x.pv.pv_system_row_number[0 : self.batch_size, 0].type(torch.IntTensor).repeat_interleave(self.total_seq_length)
190 |             )
191 |             pv_row = pv_row.to(out.device)
192 |             pv_embedding = self.pv_system_id_embedding(pv_row)
193 |             out = torch.cat((out, pv_embedding), dim=1)
194 | 
195 |         # Fully connected layers.
196 |         out = F.relu(self.fc2(out))
197 |         out = F.relu(self.fc3(out))
198 |         out = F.relu(self.fc4(out))
199 |         out = F.relu(self.fc5(out))
200 | 
201 |         # ******************* PREP DATA FOR RNN *******************************
202 |         out = out.reshape(batch_size,  self.total_seq_length, FC_OUTPUT_SIZE)
203 | 
204 |         # The RNN encoder gets recent history: satellite, NWP,
205 |         # datetime features, and recent PV history.  The RNN decoder
206 |         # gets what we know about the future: satellite, NWP, and
207 |         # datetime features.
208 | 
209 |         ####### Time inputs
210 | 
211 |         # Concat
212 |         rnn_input = torch.cat(
213 |             (
214 |                 out,
215 |             ),
216 |             dim=2,
217 |         )
218 | 
219 |         if self.output_variable == 'pv_yield':
220 |             # take the history of the pv yield of this system,
221 |             pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
222 |             encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
223 |         elif self.output_variable == 'gsp_yield':
224 |             # take the history of the gsp yield of this system,
225 |             gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
226 |             encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)
227 | 
228 |         encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
229 |         decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden)
230 |         # decoder_output is shape batch_size, seq_len, rnn_hidden_size
231 | 
232 |         decoder_output = F.relu(self.decoder_fc1(decoder_output))
233 |         decoder_output = self.decoder_fc2(decoder_output)
234 | 
235 |         return decoder_output.squeeze(dim=-1)
236 | 


--------------------------------------------------------------------------------
/predict_pv_yield/models/perceiver/perceiver_nwp_sat.py:
--------------------------------------------------------------------------------
  1 | from typing import Iterable
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | from torch import nn
  6 | import torch.nn.functional as F
  7 | from perceiver_pytorch import Perceiver
  8 | 
  9 | from predict_pv_yield.models.base_model import BaseModel
 10 | from nowcasting_dataloader.batch import BatchML
 11 | 
 12 | from nowcasting_dataset.consts import NWP_VARIABLE_NAMES, SAT_VARIABLE_NAMES
 13 | 
 14 | 
 15 | params = dict(
 16 |     # DATA
 17 |     # TODO: Everything that relates to the dataset should come automatically
 18 |     # from a yaml file stored with the dataset.
 19 |     batch_size=32,
 20 |     history_minutes=30,  #: Number of timesteps of history, not including t0.
 21 |     forecast_minutes=120,  #: Number of timesteps of forecast.
 22 |     image_size_pixels=64,
 23 |     nwp_channels=NWP_VARIABLE_NAMES[0:10],
 24 |     sat_channels=SAT_VARIABLE_NAMES[1:],
 25 | )
 26 | 
 27 | 
 28 | SAT_X_MEAN = np.float32(309000)
 29 | SAT_X_STD = np.float32(316387.42073603)
 30 | SAT_Y_MEAN = np.float32(519000)
 31 | SAT_Y_STD = np.float32(406454.17945938)
 32 | 
 33 | 
 34 | TOTAL_SEQ_LEN = params["history_minutes"] // 5 + params["forecast_minutes"] // 5 + 1
 35 | NWP_SIZE = len(params["nwp_channels"]) * 2 * 2  # channels x width x height
 36 | PERCEIVER_OUTPUT_SIZE = 512
 37 | FC_OUTPUT_SIZE = 8
 38 | RNN_HIDDEN_SIZE = 16
 39 | 
 40 | 
 41 | class Model(BaseModel):
 42 | 
 43 |     name = "perceiver_nwp_sat"
 44 | 
 45 |     def __init__(
 46 |         self,
 47 |         history_minutes: int,
 48 |         forecast_minutes: int,
 49 |         nwp_channels: Iterable[str] = params["nwp_channels"],
 50 |         batch_size: int = 32,
 51 |         num_latents: int = 128,
 52 |         latent_dim: int = 64,
 53 |         embedding_dem: int = 16,
 54 |         output_variable: str = "pv_yield",
 55 |     ):
 56 |         self.history_minutes = history_minutes
 57 |         self.forecast_minutes = forecast_minutes
 58 |         self.nwp_channels = nwp_channels
 59 |         self.batch_size = batch_size
 60 |         self.num_latents = num_latents
 61 |         self.latent_dim = latent_dim
 62 |         self.embedding_dem = embedding_dem
 63 |         self.output_variable = output_variable
 64 | 
 65 |         self.total_seq_length = self.history_minutes // 5 + self.forecast_minutes // 5 + 1
 66 | 
 67 |         super().__init__()
 68 | 
 69 |         self.perceiver = Perceiver(
 70 |             input_channels=len(params["sat_channels"]) + len(nwp_channels),
 71 |             input_axis=2,
 72 |             num_freq_bands=6,
 73 |             max_freq=10,
 74 |             depth=self.total_seq_length,
 75 |             num_latents=self.num_latents,
 76 |             latent_dim=self.latent_dim,
 77 |             num_classes=PERCEIVER_OUTPUT_SIZE,
 78 |             weight_tie_layers=True,
 79 |         )
 80 | 
 81 |         self.fc1 = nn.Linear(in_features=PERCEIVER_OUTPUT_SIZE, out_features=256)
 82 | 
 83 |         self.fc2 = nn.Linear(in_features=256 + self.embedding_dem, out_features=128)
 84 | 
 85 |         self.fc3 = nn.Linear(in_features=128, out_features=64)
 86 |         self.fc4 = nn.Linear(in_features=64, out_features=32)
 87 |         self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE)
 88 | 
 89 |         if self.embedding_dem:
 90 |             self.pv_system_id_embedding = nn.Embedding(num_embeddings=2048, embedding_dim=self.embedding_dem)
 91 | 
 92 |         # TODO: Get rid of RNNs!
 93 |         self.encoder_rnn = nn.GRU(
 94 |             # plus 1 for history
 95 |             input_size=FC_OUTPUT_SIZE + 1,
 96 |             hidden_size=RNN_HIDDEN_SIZE,
 97 |             num_layers=2,
 98 |             batch_first=True,
 99 |         )
100 |         self.decoder_rnn = nn.GRU(
101 |             input_size=FC_OUTPUT_SIZE,
102 |             hidden_size=RNN_HIDDEN_SIZE,
103 |             num_layers=2,
104 |             batch_first=True,
105 |         )
106 | 
107 |         self.decoder_fc1 = nn.Linear(in_features=RNN_HIDDEN_SIZE, out_features=8)
108 |         self.decoder_fc2 = nn.Linear(in_features=8, out_features=1)
109 | 
110 |     def forward(self, x: BatchML):
111 | 
112 |         if type(x) == dict:
113 |             x = BatchML(**x)
114 | 
115 |         # ******************* Satellite imagery *************************
116 |         # Shape: batch_size, channel, seq_length, height, width
117 |         # TODO: Use optical flow, not actual sat images of the future!
118 |         sat_data = x.satellite.data[0 : self.batch_size].float()
119 |         batch_size, n_chans, seq_len, width, height = sat_data.shape
120 |         sat_data = sat_data.permute(0, 2, 3, 4, 1)
121 | 
122 |         # Stack timesteps as examples (to make a large batch)
123 |         new_batch_size = batch_size * seq_len
124 |         #                                 0           1       2      3
125 |         sat_data = sat_data.reshape(new_batch_size, width, height, n_chans)
126 | 
127 |         # *********************** NWP Data ************************************
128 |         # Shape: batch_size, channel, seq_length, height, width
129 |         nwp_data = x.nwp.data[0: self.batch_size].float()
130 |         # Perciever expects seq_len to be dim 1, and channels at the end
131 |         nwp_data = nwp_data.permute(0, 2, 3, 4, 1)
132 |         batch_size, nwp_seq_len, nwp_width, nwp_height, n_nwp_chans = nwp_data.shape
133 | 
134 |         # nwp to have the same sel_len as sat. I think there is a better solution than this
135 |         nwp_data_zeros = torch.zeros(size=(batch_size, seq_len - nwp_seq_len, nwp_width, nwp_height, n_nwp_chans), device=nwp_data.device)
136 |         nwp_data = torch.cat([nwp_data, nwp_data_zeros], dim=1)
137 | 
138 |         nwp_data = nwp_data.reshape(new_batch_size, nwp_width, nwp_height, n_nwp_chans)
139 | 
140 |         assert nwp_width == width, f'data {nwp_width} should be the model {width}'
141 |         assert nwp_height == height
142 | 
143 |         data = torch.cat((sat_data, nwp_data), dim=-1)
144 | 
145 |         # Perceiver
146 |         # Pass data through the network :)
147 |         out = self.perceiver(data)
148 | 
149 |         out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE)
150 |         out = F.relu(self.fc1(out))
151 | 
152 |         # ********************** Embedding of PV system ID ********************
153 |         if self.embedding_dem:
154 |             pv_row = (
155 |                 x.pv.pv_system_row_number[0 : self.batch_size, 0].type(torch.IntTensor).repeat_interleave(self.total_seq_length)
156 |             )
157 |             pv_row = pv_row.to(out.device)
158 |             pv_embedding = self.pv_system_id_embedding(pv_row)
159 |             print(out.shape)
160 |             print(pv_embedding.shape)
161 |             out = torch.cat((out, pv_embedding), dim=1)
162 | 
163 |         # Fully connected layers.
164 |         out = F.relu(self.fc2(out))
165 |         out = F.relu(self.fc3(out))
166 |         out = F.relu(self.fc4(out))
167 |         out = F.relu(self.fc5(out))
168 | 
169 |         # ******************* PREP DATA FOR RNN *******************************
170 |         out = out.reshape(batch_size, self.total_seq_length, FC_OUTPUT_SIZE)
171 | 
172 |         # The RNN encoder gets recent history: satellite, NWP,
173 |         # datetime features, and recent PV history.  The RNN decoder
174 |         # gets what we know about the future: satellite, NWP, and
175 |         # datetime features.
176 | 
177 |         ####### Time inputs
178 | 
179 |         # Concat
180 |         rnn_input = torch.cat(
181 |             (
182 |                 out,
183 |             ),
184 |             dim=2,
185 |         )
186 | 
187 |         if self.output_variable == 'pv_yield':
188 |             # take the history of the pv yield of this system,
189 |             pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float()
190 |             encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2)
191 |         elif self.output_variable == 'gsp_yield':
192 |             # take the history of the gsp yield of this system,
193 |             gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float()
194 |             encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2)
195 | 
196 | 
197 |         encoder_output, encoder_hidden = self.encoder_rnn(encoder_input)
198 |         decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden)
199 |         # decoder_output is shape batch_size, seq_len, rnn_hidden_size
200 | 
201 |         decoder_output = F.relu(self.decoder_fc1(decoder_output))
202 |         decoder_output = self.decoder_fc2(decoder_output)
203 | 
204 |         return decoder_output.squeeze(dim=-1)
205 | 


--------------------------------------------------------------------------------
/predict_pv_yield/netcdf_dataset.py:
--------------------------------------------------------------------------------
  1 | ########
  2 | # Moved this to 'nowcasting_dataset' repo - https://github.com/openclimatefix/nowcasting_dataset
  3 | #########
  4 | 
  5 | # import gcsfs
  6 | # import os
  7 | # import numpy as np
  8 | # import xarray as xr
  9 | # from nowcasting_dataset import utils as nd_utils
 10 | # from nowcasting_dataset import example
 11 | # import torch
 12 | #
 13 | #
 14 | # # TODO: Take these from nowcasting_dataset.
 15 | # SAT_VARIABLE_NAMES = (
 16 | #     'HRV', 'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120',
 17 | #     'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073')
 18 | #
 19 | # SAT_MEAN = xr.DataArray(
 20 | #     data=[
 21 | #         93.23458, 131.71373, 843.7779 , 736.6148 , 771.1189 , 589.66034,
 22 | #         862.29816, 927.69586,  90.70885, 107.58985, 618.4583 , 532.47394],
 23 | #     dims=['sat_variable'],
 24 | #     coords={'sat_variable': list(SAT_VARIABLE_NAMES)}).astype(np.float32)
 25 | #
 26 | # SAT_STD = xr.DataArray(
 27 | #     data=[
 28 | #         115.34247 , 139.92636 ,  36.99538 ,  57.366386,  30.346825,
 29 | #         149.68007 ,  51.70631 ,  35.872967, 115.77212 , 120.997154,
 30 | #          98.57828 ,  99.76469],
 31 | #     dims=['sat_variable'],
 32 | #     coords={'sat_variable': list(SAT_VARIABLE_NAMES)}).astype(np.float32)
 33 | #
 34 | #
 35 | #
 36 | # class NetCDFDataset(torch.utils.data.Dataset):
 37 | #     """Loads data saved by the `prepare_ml_training_data.py` script."""
 38 | #
 39 | #     def __init__(
 40 | #             self, n_batches: int, src_path: str, tmp_path: str):
 41 | #         """
 42 | #         Args:
 43 | #           n_batches: Number of batches available on disk.
 44 | #           src_path: The full path (including 'gs://') to the data on
 45 | #             Google Cloud storage.
 46 | #           tmp_path: The full path to the local temporary directory
 47 | #             (on a local filesystem).
 48 | #         """
 49 | #         self.n_batches = n_batches
 50 | #         self.src_path = src_path
 51 | #         self.tmp_path = tmp_path
 52 | #
 53 | #     def per_worker_init(self, worker_id: int):
 54 | #         self.gcs = gcsfs.GCSFileSystem()
 55 | #
 56 | #     def __len__(self):
 57 | #         return self.n_batches
 58 | #
 59 | #     def __getitem__(self, batch_idx: int) -> example.Example:
 60 | #         """Returns a whole batch at once.
 61 | #
 62 | #         Args:
 63 | #           batch_idx: The integer index of the batch. Must be in the range
 64 | #           [0, self.n_batches).
 65 | #
 66 | #         Returns:
 67 | #             NamedDict where each value is a numpy array. The size of this
 68 | #             array's first dimension is the batch size.
 69 | #         """
 70 | #         if not 0 <= batch_idx < self.n_batches:
 71 | #             raise IndexError(
 72 | #                 'batch_idx must be in the range'
 73 | #                 f' [0, {self.n_batches}), not {batch_idx}!')
 74 | #         netcdf_filename = nd_utils.get_netcdf_filename(batch_idx)
 75 | #         remote_netcdf_filename = os.path.join(self.src_path, netcdf_filename)
 76 | #         local_netcdf_filename = os.path.join(self.tmp_path, netcdf_filename)
 77 | #         self.gcs.get(remote_netcdf_filename, local_netcdf_filename)
 78 | #         netcdf_batch = xr.load_dataset(local_netcdf_filename)
 79 | #         os.remove(local_netcdf_filename)
 80 | #
 81 | #         batch = example.Example(
 82 | #             sat_datetime_index=netcdf_batch.sat_time_coords,
 83 | #             nwp_target_time=netcdf_batch.nwp_time_coords)
 84 | #         for key in [
 85 | #             'nwp', 'nwp_x_coords', 'nwp_y_coords',
 86 | #             'sat_data', 'sat_x_coords', 'sat_y_coords',
 87 | #             'pv_yield', 'pv_system_id', 'pv_system_row_number',
 88 | #             'pv_system_x_coords', 'pv_system_y_coords',
 89 | #             'x_meters_center', 'y_meters_center'
 90 | #         ] + list(example.DATETIME_FEATURE_NAMES):
 91 | #             try:
 92 | #                 batch[key] = netcdf_batch[key]
 93 | #             except KeyError:
 94 | #                 pass
 95 | #
 96 | #         sat_data = batch['sat_data']
 97 | #         if sat_data.dtype == np.int16:
 98 | #             sat_data = sat_data.astype(np.float32)
 99 | #             sat_data = sat_data - SAT_MEAN
100 | #             sat_data /= SAT_STD
101 | #             batch['sat_data'] = sat_data
102 | #
103 | #         batch = example.to_numpy(batch)
104 | #
105 | #         return batch
106 | #
107 | #
108 | # def worker_init_fn(worker_id):
109 | #     """Configures each dataset worker process.
110 | #
111 | #     Just has one job!  To call NowcastingDataset.per_worker_init().
112 | #     """
113 | #     # get_worker_info() returns information specific to each worker process.
114 | #     worker_info = torch.utils.data.get_worker_info()
115 | #     if worker_info is None:
116 | #         print('worker_info is None!')
117 | #     else:
118 | #         # The NowcastingDataset copy in this worker process.
119 | #         dataset_obj = worker_info.dataset
120 | #         dataset_obj.per_worker_init(worker_info.id)
121 | 


--------------------------------------------------------------------------------
/predict_pv_yield/training.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | 
  3 | import hydra
  4 | from omegaconf import DictConfig
  5 | from pytorch_lightning import (
  6 |     Callback,
  7 |     LightningDataModule,
  8 |     LightningModule,
  9 |     Trainer,
 10 |     seed_everything,
 11 | )
 12 | from pytorch_lightning.loggers import LightningLoggerBase
 13 | 
 14 | from predict_pv_yield import utils
 15 | import torch
 16 | 
 17 | log = utils.get_logger(__name__)
 18 | 
 19 | torch.set_default_dtype(torch.float32)
 20 | 
 21 | 
 22 | def train(config: DictConfig) -> Optional[float]:
 23 |     """Contains training pipeline.
 24 |     Instantiates all PyTorch Lightning objects from config.
 25 | 
 26 |     Args:
 27 |         config (DictConfig): Configuration composed by Hydra.
 28 | 
 29 |     Returns:
 30 |         Optional[float]: Metric score for hyperparameter optimization.
 31 |     """
 32 | 
 33 |     # Set seed for random number generators in pytorch, numpy and python.random
 34 |     if "seed" in config:
 35 |         seed_everything(config.seed, workers=True)
 36 | 
 37 |     # Init lightning datamodule
 38 |     log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
 39 |     datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
 40 | 
 41 |     # Init lightning model
 42 |     log.info(f"Instantiating model <{config.model._target_}>")
 43 |     model: LightningModule = hydra.utils.instantiate(config.model)
 44 | 
 45 |     # Init lightning callbacks
 46 |     callbacks: List[Callback] = []
 47 |     if "callbacks" in config:
 48 |         for _, cb_conf in config.callbacks.items():
 49 |             if "_target_" in cb_conf:
 50 |                 log.info(f"Instantiating callback <{cb_conf._target_}>")
 51 |                 callbacks.append(hydra.utils.instantiate(cb_conf))
 52 | 
 53 |     # Init lightning loggers
 54 |     logger: List[LightningLoggerBase] = []
 55 |     if "logger" in config:
 56 |         for _, lg_conf in config.logger.items():
 57 |             if "_target_" in lg_conf:
 58 |                 log.info(f"Instantiating logger <{lg_conf._target_}>")
 59 |                 logger.append(hydra.utils.instantiate(lg_conf))
 60 | 
 61 |     # Init lightning trainer
 62 |     log.info(f"Instantiating trainer <{config.trainer._target_}>")
 63 |     trainer: Trainer = hydra.utils.instantiate(
 64 |         config.trainer, callbacks=callbacks, logger=logger, _convert_="partial"
 65 |     )
 66 | 
 67 |     # Send some parameters from config to all lightning loggers
 68 |     log.info("Logging hyperparameters!")
 69 |     utils.log_hyperparameters(
 70 |         config=config,
 71 |         model=model,
 72 |         datamodule=datamodule,
 73 |         trainer=trainer,
 74 |         callbacks=callbacks,
 75 |         logger=logger,
 76 |     )
 77 | 
 78 |     # Train the model
 79 |     log.info("Starting training!")
 80 |     if 'validate_only' in config:
 81 |         trainer.validate(model=model, datamodule=datamodule)
 82 |     else:
 83 |         trainer.fit(model=model, datamodule=datamodule)
 84 | 
 85 |     # Evaluate model on test set, using the best model achieved during training
 86 |     if config.get("test_after_training") and not config.trainer.get("fast_dev_run"):
 87 |         log.info("Starting testing!")
 88 |         trainer.test()
 89 | 
 90 |     # Make sure everything closed properly
 91 |     log.info("Finalizing!")
 92 |     utils.finish(
 93 |         config=config,
 94 |         model=model,
 95 |         datamodule=datamodule,
 96 |         trainer=trainer,
 97 |         callbacks=callbacks,
 98 |         logger=logger,
 99 |     )
100 | 
101 |     # Print path to best checkpoint
102 |     log.info(f"Best checkpoint path:\n{trainer.checkpoint_callback.best_model_path}")
103 | 
104 |     # Return metric score for hyperparameter optimization
105 |     optimized_metric = config.get("optimized_metric")
106 |     if optimized_metric:
107 |         return trainer.callback_metrics[optimized_metric]
108 | 


--------------------------------------------------------------------------------
/predict_pv_yield/utils.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | import os
  3 | import predict_pv_yield
  4 | import logging
  5 | import os
  6 | import warnings
  7 | from typing import List, Sequence
  8 | 
  9 | import pytorch_lightning as pl
 10 | import rich.syntax
 11 | import rich.tree
 12 | from omegaconf import DictConfig, OmegaConf
 13 | from pytorch_lightning.utilities import rank_zero_only
 14 | 
 15 | 
 16 | def load_config(config_file):
 17 |     """
 18 |     Open yam configruation file, and get rid eof '_target_' line
 19 |     """
 20 | 
 21 |     # get full path of config file
 22 |     path = os.path.dirname(predict_pv_yield.__file__)
 23 |     config_file = f"{path}/../{config_file}"
 24 | 
 25 | 
 26 |     with open(config_file, "r") as cfg:
 27 |         config = yaml.load(cfg, Loader=yaml.FullLoader)
 28 | 
 29 |     if "_target_" in config.keys():
 30 |         config.pop("_target_")  # This is only for Hydra
 31 | 
 32 |     return config
 33 | 
 34 | 
 35 | def get_logger(name=__name__, level=logging.INFO) -> logging.Logger:
 36 |     """Initializes multi-GPU-friendly python logger."""
 37 | 
 38 |     logger = logging.getLogger(name)
 39 |     logger.setLevel(level)
 40 | 
 41 |     # this ensures all logging levels get marked with the rank zero decorator
 42 |     # otherwise logs would get multiplied for each GPU process in multi-GPU setup
 43 |     for level in ("debug", "info", "warning", "error", "exception", "fatal", "critical"):
 44 |         setattr(logger, level, rank_zero_only(getattr(logger, level)))
 45 | 
 46 |     return logger
 47 | 
 48 | 
 49 | def extras(config: DictConfig) -> None:
 50 |     """A couple of optional utilities, controlled by main config file:
 51 |     - disabling warnings
 52 |     - easier access to debug mode
 53 |     - forcing debug friendly configuration
 54 | 
 55 |     Modifies DictConfig in place.
 56 | 
 57 |     Args:
 58 |         config (DictConfig): Configuration composed by Hydra.
 59 |     """
 60 | 
 61 |     log = get_logger()
 62 | 
 63 |     # enable adding new keys to config
 64 |     OmegaConf.set_struct(config, False)
 65 | 
 66 |     # disable python warnings if <config.ignore_warnings=True>
 67 |     if config.get("ignore_warnings"):
 68 |         log.info("Disabling python warnings! <config.ignore_warnings=True>")
 69 |         warnings.filterwarnings("ignore")
 70 | 
 71 |     # set <config.trainer.fast_dev_run=True> if <config.debug=True>
 72 |     if config.get("debug"):
 73 |         log.info("Running in debug mode! <config.debug=True>")
 74 |         config.trainer.fast_dev_run = True
 75 | 
 76 |     # force debugger friendly configuration if <config.trainer.fast_dev_run=True>
 77 |     if config.trainer.get("fast_dev_run"):
 78 |         log.info("Forcing debugger friendly configuration! <config.trainer.fast_dev_run=True>")
 79 |         # Debuggers don't like GPUs or multiprocessing
 80 |         if config.trainer.get("gpus"):
 81 |             config.trainer.gpus = 0
 82 |         if config.datamodule.get("pin_memory"):
 83 |             config.datamodule.pin_memory = False
 84 |         if config.datamodule.get("num_workers"):
 85 |             config.datamodule.num_workers = 0
 86 | 
 87 |     # disable adding new keys to config
 88 |     OmegaConf.set_struct(config, True)
 89 | 
 90 | 
 91 | @rank_zero_only
 92 | def print_config(
 93 |     config: DictConfig,
 94 |     fields: Sequence[str] = (
 95 |         "trainer",
 96 |         "model",
 97 |         "datamodule",
 98 |         "callbacks",
 99 |         "logger",
100 |         "seed",
101 |     ),
102 |     resolve: bool = True,
103 | ) -> None:
104 |     """Prints content of DictConfig using Rich library and its tree structure.
105 | 
106 |     Args:
107 |         config (DictConfig): Configuration composed by Hydra.
108 |         fields (Sequence[str], optional): Determines which main fields from config will
109 |         be printed and in what order.
110 |         resolve (bool, optional): Whether to resolve reference fields of DictConfig.
111 |     """
112 | 
113 |     style = "dim"
114 |     tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
115 | 
116 |     for field in fields:
117 |         branch = tree.add(field, style=style, guide_style=style)
118 | 
119 |         config_section = config.get(field)
120 |         branch_content = str(config_section)
121 |         if isinstance(config_section, DictConfig):
122 |             branch_content = OmegaConf.to_yaml(config_section, resolve=resolve)
123 | 
124 |         branch.add(rich.syntax.Syntax(branch_content, "yaml"))
125 | 
126 |     rich.print(tree)
127 | 
128 |     with open("config_tree.txt", "w") as fp:
129 |         rich.print(tree, file=fp)
130 | 
131 | 
132 | def empty(*args, **kwargs):
133 |     pass
134 | 
135 | 
136 | @rank_zero_only
137 | def log_hyperparameters(
138 |     config: DictConfig,
139 |     model: pl.LightningModule,
140 |     datamodule: pl.LightningDataModule,
141 |     trainer: pl.Trainer,
142 |     callbacks: List[pl.Callback],
143 |     logger: List[pl.loggers.LightningLoggerBase],
144 | ) -> None:
145 |     """This method controls which parameters from Hydra config are saved by Lightning loggers.
146 | 
147 |     Additionaly saves:
148 |         - number of trainable model parameters
149 |     """
150 | 
151 |     hparams = {}
152 | 
153 |     # choose which parts of hydra config will be saved to loggers
154 |     hparams["trainer"] = config["trainer"]
155 |     hparams["model"] = config["model"]
156 |     hparams["datamodule"] = config["datamodule"]
157 |     if "seed" in config:
158 |         hparams["seed"] = config["seed"]
159 |     if "callbacks" in config:
160 |         hparams["callbacks"] = config["callbacks"]
161 | 
162 |     # save number of model parameters
163 |     hparams["model/params_total"] = sum(p.numel() for p in model.parameters())
164 |     hparams["model/params_trainable"] = sum(
165 |         p.numel() for p in model.parameters() if p.requires_grad
166 |     )
167 |     hparams["model/params_not_trainable"] = sum(
168 |         p.numel() for p in model.parameters() if not p.requires_grad
169 |     )
170 | 
171 |     # send hparams to all loggers
172 |     trainer.logger.log_hyperparams(hparams)
173 | 
174 |     # disable logging any more hyperparameters for all loggers
175 |     # this is just a trick to prevent trainer from logging hparams of model,
176 |     # since we already did that above
177 |     trainer.logger.log_hyperparams = empty
178 | 
179 | 
180 | def finish(
181 |     config: DictConfig,
182 |     model: pl.LightningModule,
183 |     datamodule: pl.LightningDataModule,
184 |     trainer: pl.Trainer,
185 |     callbacks: List[pl.Callback],
186 |     logger: List[pl.loggers.LightningLoggerBase],
187 | ) -> None:
188 |     """Makes sure everything closed properly."""
189 | 
190 |     # without this sweeps with wandb logger might crash!
191 |     for lg in logger:
192 |         if isinstance(lg, pl.loggers.wandb.WandbLogger):
193 |             import wandb
194 | 
195 |             wandb.finish()
196 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | nowcasting_dataset
 2 | nowcasting_utils
 3 | nowcasting_dataloader
 4 | numpy
 5 | pandas
 6 | matplotlib
 7 | xarray
 8 | ipykernel
 9 | h5netcdf
10 | torch
11 | pytorch-lightning
12 | perceiver_pytorch
13 | pytest
14 | pytest-cov
15 | flake8
16 | jedi
17 | tables
18 | tilemapbase
19 | neptune-client[pytorch-lightning]
20 | rich
21 | omegaconf
22 | hydra-core
23 | python-dotenv
24 | hydra-optuna-sweeper
25 | black
26 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | os.environ["HYDRA_FULL_ERROR"] = "1"
 4 | import dotenv
 5 | import hydra
 6 | from omegaconf import DictConfig
 7 | 
 8 | # this file can be run for example using
 9 | #  python run.py experiment=example_simple
10 | 
11 | # load environment variables from `.env` file if it exists
12 | # recursively searches for `.env` in all folders starting from work dir
13 | dotenv.load_dotenv(override=True)
14 | 
15 | 
16 | @hydra.main(config_path="configs/", config_name="config.yaml")
17 | def main(config: DictConfig):
18 | 
19 |     # Imports should be nested inside @hydra.main to optimize tab completion
20 |     # Read more here: https://github.com/facebookresearch/hydra/issues/934
21 |     from predict_pv_yield.utils import extras, print_config
22 |     from predict_pv_yield.training import train
23 | 
24 |     # A couple of optional utilities:
25 |     # - disabling python warnings
26 |     # - easier access to debug mode
27 |     # - forcing debug friendly configuration
28 |     # - forcing multi-gpu friendly configuration
29 |     # You can safely get rid of this line if you don't want those
30 |     extras(config)
31 | 
32 |     #
33 | 
34 |     # Pretty print config using Rich library
35 |     if config.get("print_config"):
36 |         print_config(config, resolve=True)
37 | 
38 |     # Train model
39 |     return train(config)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | setup(
4 |     name='predict_pv_yield',
5 |     version='0.1',
6 |     packages=find_packages())
7 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/tests/__init__.py


--------------------------------------------------------------------------------
/tests/configs/dataset/configuration.yaml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   description: example configuration
 3 |   name: example
 4 | git: null
 5 | input_data:
 6 |   gsp:
 7 |     gsp_zarr_path: tests/data/gsp/test.zarr
 8 |   nwp:
 9 |     nwp_channels:
10 |       - t
11 |     nwp_image_size_pixels: 2
12 |     nwp_zarr_path: tests/data/nwp_data/test.zarr
13 |   pv:
14 |     pv_filename: tests/data/pv_data/test.nc
15 |     pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv
16 |   satellite:
17 |     satellite_channels:
18 |       - HRV
19 |     satellite_image_size_pixels: 64
20 |     satellite_zarr_path: tests/data/sat_data.zarr
21 |   hrvsatellite:
22 |     hrvsatellite_channels:
23 |       - HRV
24 |     hrvsatellite_image_size_pixels: 64
25 |     hrvsatellite_zarr_path: tests/data/sat_data.zarr
26 |   opticalflow:
27 |     opticalflow_zarr_path: /mnt/storage_ssd_8tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/satellite/EUMETSAT/SEVIRI_RSS/zarr/v3/eumetsat_seviri_uk.zarr
28 |     opticalflow_input_image_size_pixels: 94
29 |     opticalflow_output_image_size_pixels: 24
30 |     opticalflow_source_data_source_class_name: SatelliteDataSource
31 |     opticalflow_channels:
32 |       - IR_016
33 |   sun:
34 |     sun_zarr_path: tests/data/sun/test.zarr
35 |   topographic:
36 |     topographic_filename: tests/data/europe_dem_2km_osgb.tif
37 |   default_history_minutes: 120
38 |   default_forecast_minutes: 30
39 | output_data:
40 |   filepath: not used by unittests!
41 | process:
42 |   batch_size: 2
43 |   local_temp_path: ~/temp/
44 |   seed: 1234
45 |   upload_every_n_batches: 16
46 | 


--------------------------------------------------------------------------------
/tests/configs/experiment/example_simple.yaml:
--------------------------------------------------------------------------------
 1 | # @package _global_
 2 | 
 3 | # to execute this experiment run:
 4 | # python run.py experiment=example_simple.yaml
 5 | 
 6 | defaults:
 7 |   - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
 8 |   - override /model: basline.yaml
 9 |   - override /datamodule: netcdf_datamodule.yaml
10 |   - override /callbacks: default.yaml
11 |   - override /logger: neptune.yaml
12 | 
13 | # all parameters below will be merged with parameters from default configurations set above
14 | # this allows you to overwrite only specified parameters
15 | 
16 | seed: 518
17 | 
18 | trainer:
19 |   min_epochs: 1
20 |   max_epochs: 2
21 | 
22 | datamodule:
23 |   n_train_data: 2
24 |   n_val_data: 2
25 | 
26 | 
27 | model:
28 |   conv3d_channels: 4
29 | 


--------------------------------------------------------------------------------
/tests/configs/model/conv3d.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model.Model
 2 | 
 3 | include_pv_yield: False
 4 | include_nwp: False
 5 | forecast_minutes: 60
 6 | history_minutes: 60
 7 | number_of_conv3d_layers: 4
 8 | conv3d_channels: 32
 9 | image_size_pixels: 16 # this is made smaller for testing
10 | number_sat_channels: 11
11 | fc1_output_features: 16
12 | fc2_output_features: 16
13 | fc3_output_features: 16
14 | 


--------------------------------------------------------------------------------
/tests/configs/model/conv3d_gsp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model.Model
 2 | 
 3 | include_pv_yield: False
 4 | include_nwp: False
 5 | forecast_minutes: 60
 6 | history_minutes: 60
 7 | number_of_conv3d_layers: 4
 8 | conv3d_channels: 32
 9 | image_size_pixels: 16 # this is made smaller for testing
10 | number_sat_channels: 11
11 | fc1_output_features: 16
12 | fc2_output_features: 16
13 | fc3_output_features: 16
14 | output_variable: gsp_yield
15 | 


--------------------------------------------------------------------------------
/tests/configs/model/conv3d_nwp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model.Model
 2 | 
 3 | include_pv_or_gsp_yield_history: False
 4 | include_nwp: True
 5 | forecast_minutes: 60
 6 | history_minutes: 60
 7 | number_of_conv3d_layers: 4
 8 | conv3d_channels: 32
 9 | nwp_image_size_pixels: 16
10 | number_nwp_channels: 10
11 | fc1_output_features: 16
12 | fc2_output_features: 16
13 | fc3_output_features: 16
14 | output_variable: gsp_yield
15 | include_pv_yield_history: True
16 | 


--------------------------------------------------------------------------------
/tests/configs/model/conv3d_sat_nwp.yaml:
--------------------------------------------------------------------------------
 1 | _target_: predict_pv_yield.models.conv3d.model.Model
 2 | 
 3 | include_pv_or_gsp_yield_history: False
 4 | include_nwp: True
 5 | forecast_minutes: 60
 6 | history_minutes: 60
 7 | number_of_conv3d_layers: 4
 8 | conv3d_channels: 32
 9 | image_size_pixels: 16 # this is made smaller for testing
10 | nwp_image_size_pixels: 16
11 | number_sat_channels: 11
12 | number_nwp_channels: 10
13 | fc1_output_features: 16
14 | fc2_output_features: 16
15 | fc3_output_features: 16
16 | output_variable: gsp_yield
17 | include_pv_yield_history: True
18 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from nowcasting_dataset.config.model import Configuration
 4 | from predict_pv_yield.utils import load_config
 5 | 
 6 | 
 7 | @pytest.fixture()
 8 | def configuration():
 9 |     configuration = Configuration()
10 |     configuration.input_data = configuration.input_data.set_all_to_defaults()
11 |     configuration.process.batch_size = 2
12 |     configuration.input_data.default_history_minutes = 30
13 |     configuration.input_data.default_forecast_minutes = 60
14 |     configuration.input_data.nwp.nwp_image_size_pixels = 16
15 | 
16 |     return configuration
17 | 
18 | 
19 | @pytest.fixture()
20 | def configuration_conv3d():
21 | 
22 |     config_file = "tests/configs/model/conv3d.yaml"
23 |     config = load_config(config_file)
24 | 
25 |     dataset_configuration = Configuration()
26 |     dataset_configuration.process.batch_size = 2
27 |     dataset_configuration.input_data.default_history_minutes = config['history_minutes']
28 |     dataset_configuration.input_data.default_forecast_minutes = config['forecast_minutes']
29 |     dataset_configuration.input_data = dataset_configuration.input_data.set_all_to_defaults()
30 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 2
31 |     dataset_configuration.input_data.satellite.satellite_image_size_pixels = config['image_size_pixels']
32 |     dataset_configuration.input_data.satellite.forecast_minutes = config['forecast_minutes']
33 |     dataset_configuration.input_data.satellite.history_minutes = config['history_minutes']
34 | 
35 |     return dataset_configuration
36 | 
37 | 
38 | @pytest.fixture()
39 | def configuration_perceiver():
40 | 
41 |     dataset_configuration = Configuration()
42 |     dataset_configuration.input_data = dataset_configuration.input_data.set_all_to_defaults()
43 |     dataset_configuration.process.batch_size = 2
44 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
45 |     dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16
46 |     dataset_configuration.input_data.default_history_minutes = 30
47 |     dataset_configuration.input_data.default_forecast_minutes = 120
48 |     dataset_configuration.input_data.nwp.nwp_channels = dataset_configuration.input_data.nwp.nwp_channels[0:10]
49 | 
50 |     return dataset_configuration
51 | 


--------------------------------------------------------------------------------
/tests/models/baseline/test_baseline_model.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.baseline.last_value import Model
 2 | import torch
 3 | import pytorch_lightning as pl
 4 | from nowcasting_dataloader.fake import FakeDataset
 5 | from nowcasting_dataset.config.model import Configuration
 6 | 
 7 | 
 8 | def test_init():
 9 | 
10 |     _ = Model()
11 | 
12 | 
13 | def test_model_forward(configuration):
14 | 
15 |     # start model
16 |     model = Model(forecast_minutes=configuration.input_data.default_forecast_minutes)
17 | 
18 |     # create fake data loader
19 |     train_dataset = FakeDataset(configuration=configuration)
20 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
21 | 
22 |     # satellite data
23 |     x = next(iter(train_dataloader))
24 | 
25 |     # run data through model
26 |     y = model(x)
27 | 
28 |     # check out put is the correct shape
29 |     assert len(y.shape) == 2
30 |     assert y.shape[0] == configuration.process.batch_size
31 |     assert y.shape[1] == configuration.input_data.default_forecast_minutes // 5
32 | 
33 | 
34 | def test_trainer(configuration):
35 | 
36 |     # start model
37 |     model = Model(forecast_minutes=configuration.input_data.default_forecast_minutes)
38 | 
39 |     # create fake data loader
40 |     train_dataset = FakeDataset(configuration=configuration)
41 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
42 | 
43 |     # set up trainer
44 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
45 | 
46 |     # test over training set
47 |     _ = trainer.test(model, train_dataloader)
48 | 


--------------------------------------------------------------------------------
/tests/models/baseline/test_baseline_model_gsp.py:
--------------------------------------------------------------------------------
  1 | from predict_pv_yield.models.baseline.last_value import Model
  2 | import torch
  3 | import pytorch_lightning as pl
  4 | import pandas as pd
  5 | from nowcasting_dataloader.fake import FakeDataset
  6 | from nowcasting_dataset.config.model import Configuration
  7 | import tempfile
  8 | 
  9 | 
 10 | 
 11 | def test_init():
 12 | 
 13 |     _ = Model(output_variable="gsp_yield")
 14 | 
 15 | 
 16 | def test_model_forward(configuration):
 17 | 
 18 |     # start model
 19 |     model = Model(
 20 |         forecast_minutes=configuration.input_data.default_forecast_minutes,
 21 |         history_minutes=configuration.input_data.default_history_minutes,
 22 |         output_variable="gsp_yield",
 23 |     )
 24 | 
 25 |     # create fake data loader
 26 |     train_dataset = FakeDataset(configuration=configuration)
 27 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
 28 | 
 29 |     # satellite data
 30 |     x = next(iter(train_dataloader))
 31 | 
 32 |     # run data through model
 33 |     y = model(x)
 34 | 
 35 |     # check out put is the correct shape
 36 |     assert len(y.shape) == 2
 37 |     assert y.shape[0] == configuration.process.batch_size
 38 |     assert y.shape[1] == configuration.input_data.default_forecast_minutes // 30
 39 | 
 40 | 
 41 | def test_model_validation(configuration):
 42 | 
 43 |     # start model
 44 |     model = Model(
 45 |         forecast_minutes=configuration.input_data.default_forecast_minutes,
 46 |         history_minutes=configuration.input_data.default_history_minutes,
 47 |         output_variable="gsp_yield",
 48 |     )
 49 | 
 50 |     # create fake data loader
 51 |     train_dataset = FakeDataset(configuration=configuration)
 52 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
 53 | 
 54 |     # satellite data
 55 |     x = next(iter(train_dataloader))
 56 | 
 57 |     # run data through model
 58 |     model.validation_step(x, 0)
 59 | 
 60 | 
 61 | def test_trainer(configuration):
 62 | 
 63 |     # start model
 64 |     model = Model(
 65 |         forecast_minutes=configuration.input_data.default_forecast_minutes,
 66 |         history_minutes=configuration.input_data.default_history_minutes,
 67 |         output_variable="gsp_yield",
 68 |     )
 69 | 
 70 |     # create fake data loader
 71 |     train_dataset = FakeDataset(configuration=configuration)
 72 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
 73 | 
 74 |     # set up trainer
 75 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
 76 | 
 77 |     # test over training set
 78 |     _ = trainer.test(model, train_dataloader)
 79 | 
 80 | 
 81 | def test_trainer_validation(configuration):
 82 | 
 83 |     # start model
 84 |     model = Model(
 85 |         forecast_minutes=configuration.input_data.default_forecast_minutes,
 86 |         history_minutes=configuration.input_data.default_history_minutes,
 87 |         output_variable="gsp_yield",
 88 |     )
 89 | 
 90 |     # create fake data loader
 91 |     train_dataset = FakeDataset(configuration=configuration)
 92 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
 93 | 
 94 |     # set up trainer
 95 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
 96 | 
 97 |     with tempfile.TemporaryDirectory() as tmpdirname:
 98 |         model.results_file_name = f'{tmpdirname}/temp'
 99 | 
100 |         # test over validation set
101 |         _ = trainer.validate(model, train_dataloader)
102 | 
103 |         # check csv file of validation results has been made
104 |         results_df = pd.read_csv(f'{model.results_file_name}_0.csv')
105 | 
106 |         assert len(results_df) == len(train_dataloader) * configuration.process.batch_size * model.forecast_len_30
107 |         assert 't0_datetime_utc' in results_df.keys()
108 |         assert 'target_datetime_utc' in results_df.keys()
109 |         assert 'gsp_id' in results_df.keys()
110 |         assert "actual_gsp_pv_outturn_mw" in results_df.keys()
111 |         assert "forecast_gsp_pv_outturn_mw" in results_df.keys()
112 | 


--------------------------------------------------------------------------------
/tests/models/conv3d/test_conv3d_model.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model import Model
 2 | import torch
 3 | import pytorch_lightning as pl
 4 | from predict_pv_yield.utils import load_config
 5 | from nowcasting_dataloader.fake import FakeDataset
 6 | from nowcasting_dataset.config.model import Configuration
 7 | 
 8 | 
 9 | 
10 | def test_init():
11 | 
12 |     config_file = "configs/model/conv3d.yaml"
13 |     config = load_config(config_file)
14 | 
15 |     _ = Model(**config)
16 | 
17 | 
18 | def test_model_forward(configuration_conv3d):
19 | 
20 |     config_file = "tests/configs/model/conv3d.yaml"
21 |     config = load_config(config_file)
22 | 
23 |     dataset_configuration = configuration_conv3d
24 | 
25 |     # start model
26 |     model = Model(**config)
27 | 
28 |     # create fake data loader
29 |     train_dataset = FakeDataset(configuration=dataset_configuration)
30 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
31 |     x = next(iter(train_dataloader))
32 | 
33 |     # run data through model
34 |     y = model(x)
35 | 
36 |     # check out put is the correct shape
37 |     assert len(y.shape) == 2
38 |     assert y.shape[0] == 2
39 |     assert y.shape[1] == model.forecast_len_5
40 | 
41 | 
42 | def test_train(configuration_conv3d):
43 | 
44 |     config_file = "tests/configs/model/conv3d.yaml"
45 |     config = load_config(config_file)
46 | 
47 |     dataset_configuration = configuration_conv3d
48 | 
49 |     # start model
50 |     model = Model(**config)
51 | 
52 |     # create fake data loader
53 |     train_dataset = FakeDataset(configuration=dataset_configuration)
54 |     train_dataset.length=2
55 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
56 | 
57 |     # fit model
58 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
59 |     trainer.fit(model, train_dataloader)
60 | 
61 |     # predict over training set
62 |     _ = trainer.predict(model, train_dataloader)
63 | 


--------------------------------------------------------------------------------
/tests/models/conv3d/test_conv3d_model_gsp.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model import Model
 2 | import torch
 3 | import pytorch_lightning as pl
 4 | from predict_pv_yield.utils import load_config
 5 | from nowcasting_dataloader.fake import FakeDataset
 6 | from nowcasting_dataset.config.model import Configuration
 7 | 
 8 | 
 9 | def test_init():
10 | 
11 |     config_file = "tests/configs/model/conv3d_gsp.yaml"
12 |     config = load_config(config_file)
13 | 
14 |     _ = Model(**config)
15 | 
16 | 
17 | def test_model_forward(configuration_conv3d):
18 | 
19 |     config_file = "tests/configs/model/conv3d_gsp.yaml"
20 |     config = load_config(config_file)
21 | 
22 |     dataset_configuration = configuration_conv3d
23 | 
24 |     # start model
25 |     model = Model(**config)
26 | 
27 |     # create fake data loader
28 |     train_dataset = FakeDataset(configuration=dataset_configuration)
29 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
30 |     x = next(iter(train_dataloader))
31 | 
32 |     # run data through model
33 |     y = model(x)
34 | 
35 |     # check out put is the correct shape
36 |     assert len(y.shape) == 2
37 |     assert y.shape[0] == 2
38 |     assert y.shape[1] == model.forecast_len_30
39 | 
40 | 
41 | def test_train(configuration_conv3d):
42 | 
43 |     config_file = "tests/configs/model/conv3d_gsp.yaml"
44 |     config = load_config(config_file)
45 | 
46 |     dataset_configuration = configuration_conv3d
47 | 
48 |     # start model
49 |     model = Model(**config)
50 | 
51 |     # create fake data loader
52 |     train_dataset = FakeDataset(configuration=dataset_configuration)
53 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
54 | 
55 |     # fit model
56 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
57 |     trainer.fit(model, train_dataloader)
58 | 
59 |     # predict over training set
60 |     _ = trainer.predict(model, train_dataloader)
61 | 


--------------------------------------------------------------------------------
/tests/models/conv3d/test_conv3d_model_nwp.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model_nwp import Model
 2 | import torch
 3 | import pytorch_lightning as pl
 4 | from predict_pv_yield.utils import load_config
 5 | from nowcasting_dataloader.fake import FakeDataset
 6 | from nowcasting_dataset.config.model import Configuration
 7 | 
 8 | 
 9 | def test_init():
10 | 
11 |     config_file = "tests/configs/model/conv3d_nwp.yaml"
12 |     config = load_config(config_file)
13 | 
14 |     _ = Model(**config)
15 | 
16 | 
17 | def test_model_forward(configuration_conv3d):
18 | 
19 |     config_file = "tests/configs/model/conv3d_nwp.yaml"
20 |     config = load_config(config_file)
21 | 
22 |     # start model
23 |     model = Model(**config)
24 | 
25 |     dataset_configuration = configuration_conv3d
26 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
27 | 
28 |     # create fake data loader
29 |     train_dataset = FakeDataset(configuration=dataset_configuration)
30 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
31 |     x = next(iter(train_dataloader))
32 | 
33 |     # run data through model
34 |     y = model(x)
35 | 
36 |     # check out put is the correct shape
37 |     assert len(y.shape) == 2
38 |     assert y.shape[0] == 2
39 |     assert y.shape[1] == model.forecast_len_30
40 | 
41 | 
42 | def test_model_forward_no_satellite(configuration_conv3d):
43 | 
44 |     config_file = "tests/configs/model/conv3d_nwp.yaml"
45 |     config = load_config(config_file)
46 |     config['include_future_satellite'] = False
47 | 
48 |     # start model
49 |     model = Model(**config)
50 | 
51 |     dataset_configuration = configuration_conv3d
52 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
53 | 
54 |     # create fake data loader
55 |     train_dataset = FakeDataset(configuration=dataset_configuration)
56 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
57 |     x = next(iter(train_dataloader))
58 | 
59 |     # run data through model
60 |     y = model(x)
61 | 
62 |     # check out put is the correct shape
63 |     assert len(y.shape) == 2
64 |     assert y.shape[0] == 2
65 |     assert y.shape[1] == model.forecast_len_30
66 | 
67 | 
68 | def test_train(configuration_conv3d):
69 | 
70 |     config_file = "tests/configs/model/conv3d_nwp.yaml"
71 |     config = load_config(config_file)
72 | 
73 |     dataset_configuration = configuration_conv3d
74 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
75 | 
76 |     # start model
77 |     model = Model(**config)
78 | 
79 |     # create fake data loader
80 |     train_dataset = FakeDataset(configuration=dataset_configuration)
81 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
82 | 
83 |     # fit model
84 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
85 |     trainer.fit(model, train_dataloader)
86 | 
87 |     # predict over training set
88 |     _ = trainer.predict(model, train_dataloader)
89 | 


--------------------------------------------------------------------------------
/tests/models/conv3d/test_conv3d_model_sat_nwp.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.conv3d.model_sat_nwp import Model
 2 | import torch
 3 | import pytorch_lightning as pl
 4 | from predict_pv_yield.utils import load_config
 5 | from nowcasting_dataloader.fake import FakeDataset
 6 | from nowcasting_dataset.config.model import Configuration
 7 | 
 8 | 
 9 | def test_init():
10 | 
11 |     config_file = "tests/configs/model/conv3d_sat_nwp.yaml"
12 |     config = load_config(config_file)
13 | 
14 |     _ = Model(**config)
15 | 
16 | 
17 | def test_model_forward(configuration_conv3d):
18 | 
19 |     config_file = "tests/configs/model/conv3d_sat_nwp.yaml"
20 |     config = load_config(config_file)
21 | 
22 |     # start model
23 |     model = Model(**config)
24 | 
25 |     dataset_configuration = configuration_conv3d
26 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
27 | 
28 |     # create fake data loader
29 |     train_dataset = FakeDataset(configuration=dataset_configuration)
30 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
31 |     x = next(iter(train_dataloader))
32 | 
33 |     # run data through model
34 |     y = model(x)
35 | 
36 |     # check out put is the correct shape
37 |     assert len(y.shape) == 2
38 |     assert y.shape[0] == 2
39 |     assert y.shape[1] == model.forecast_len_30
40 | 
41 | 
42 | def test_model_forward_no_satellite(configuration_conv3d):
43 | 
44 |     config_file = "tests/configs/model/conv3d_sat_nwp.yaml"
45 |     config = load_config(config_file)
46 |     config['include_future_satellite'] = False
47 | 
48 |     # start model
49 |     model = Model(**config)
50 | 
51 |     dataset_configuration = configuration_conv3d
52 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
53 | 
54 |     # create fake data loader
55 |     train_dataset = FakeDataset(configuration=dataset_configuration)
56 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
57 |     x = next(iter(train_dataloader))
58 | 
59 |     # run data through model
60 |     y = model(x)
61 | 
62 |     # check out put is the correct shape
63 |     assert len(y.shape) == 2
64 |     assert y.shape[0] == 2
65 |     assert y.shape[1] == model.forecast_len_30
66 | 
67 | 
68 | def test_train(configuration_conv3d):
69 | 
70 |     config_file = "tests/configs/model/conv3d_sat_nwp.yaml"
71 |     config = load_config(config_file)
72 | 
73 |     dataset_configuration = configuration_conv3d
74 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
75 | 
76 |     # start model
77 |     model = Model(**config)
78 | 
79 |     # create fake data loader
80 |     train_dataset = FakeDataset(configuration=dataset_configuration)
81 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
82 | 
83 |     # fit model
84 |     trainer = pl.Trainer(gpus=0, max_epochs=1)
85 |     trainer.fit(model, train_dataloader)
86 | 
87 |     # predict over training set
88 |     _ = trainer.predict(model, train_dataloader)
89 | 


--------------------------------------------------------------------------------
/tests/models/perceiver/test_perceiver.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.perceiver.perceiver import PerceiverModel, params
 2 | from nowcasting_dataloader.fake import FakeDataset
 3 | import torch
 4 | from nowcasting_dataset.config.model import Configuration
 5 | 
 6 | 
 7 | def test_init_model():
 8 |     """Initilize the model"""
 9 |     _ = PerceiverModel(history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"])
10 | 
11 | 
12 | def test_model_forward(configuration_perceiver):
13 | 
14 |     dataset_configuration = configuration_perceiver
15 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 64
16 |     dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16
17 | 
18 |     model = PerceiverModel(
19 |         history_minutes=30,
20 |         forecast_minutes=60,
21 |         nwp_channels=params["nwp_channels"],
22 |         embedding_dem=2048
23 |     )  # doesnt do anything
24 | 
25 |     # set up fake data
26 |     train_dataset = FakeDataset(configuration=dataset_configuration)
27 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
28 |     # get data
29 |     x = next(iter(train_dataloader))
30 | 
31 |     # run data through model
32 |     y = model(x)
33 | 
34 |     # check out put is the correct shape
35 |     assert len(y.shape) == 2
36 |     assert y.shape[0] == dataset_configuration.process.batch_size
37 |     assert y.shape[1] == 60 // 5
38 | 


--------------------------------------------------------------------------------
/tests/models/perceiver/test_perceiver_conv3d_sat_nwp.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.perceiver.perceiver_conv3d_nwp_sat import Model, params, TOTAL_SEQ_LEN
 2 | from predict_pv_yield.data.dataloader import FakeDataset
 3 | import torch
 4 | from nowcasting_dataset.config.model import Configuration
 5 | 
 6 | 
 7 | 
 8 | 
 9 | def test_init_model():
10 |     """Initilize the model"""
11 |     _ = Model(
12 |         history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"], output_variable="gsp_yield"
13 |     )
14 | 
15 | 
16 | def test_model_forward(configuration_perceiver):
17 | 
18 |     dataset_configuration = configuration_perceiver
19 | 
20 |     model = Model(
21 |         history_minutes=30,
22 |         forecast_minutes=60,
23 |         nwp_channels=params["nwp_channels"],
24 |         output_variable="gsp_yield",
25 |     )  # doesnt do anything
26 | 
27 |     # set up fake data
28 |     train_dataset = FakeDataset(configuration=dataset_configuration)
29 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
30 |     # get data
31 |     x = next(iter(train_dataloader))
32 | 
33 |     # run data through model
34 |     y = model(x)
35 | 
36 |     # check out put is the correct shape
37 |     assert len(y.shape) == 2
38 |     assert y.shape[0] == dataset_configuration.process.batch_size
39 |     assert y.shape[1] == 60 // 30
40 | 
41 | 
42 | def test_model_forward_no_forward_satelite(configuration_perceiver):
43 | 
44 |     dataset_configuration = configuration_perceiver
45 | 
46 |     model = Model(
47 |         history_minutes=30,
48 |         forecast_minutes=60,
49 |         nwp_channels=params["nwp_channels"],
50 |         output_variable="gsp_yield",
51 |         use_future_satellite_images=False
52 |     )  # doesnt do anything
53 | 
54 |     train_dataset = FakeDataset(configuration=dataset_configuration)
55 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
56 |     # get data
57 |     x = next(iter(train_dataloader))
58 | 
59 |     # run data through model
60 |     y = model(x)
61 | 
62 |     # check out put is the correct shape
63 |     assert len(y.shape) == 2
64 |     assert y.shape[0] == dataset_configuration.process.batch_size
65 |     assert y.shape[1] == 60 // 30
66 | 


--------------------------------------------------------------------------------
/tests/models/perceiver/test_perceiver_gsp.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.perceiver.perceiver import PerceiverModel, params
 2 | from predict_pv_yield.data.dataloader import FakeDataset
 3 | from nowcasting_dataset.config.model import Configuration
 4 | import torch
 5 | 
 6 | 
 7 | def test_init_model():
 8 |     """Initilize the model"""
 9 |     _ = PerceiverModel(
10 |         history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"], output_variable="gsp_yield"
11 |     )
12 | 
13 | 
14 | def test_model_forward(configuration_perceiver):
15 | 
16 |     dataset_configuration = configuration_perceiver
17 |     dataset_configuration.process.batch_size = 2
18 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 64
19 |     dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16
20 | 
21 |     model = PerceiverModel(
22 |         history_minutes=30,
23 |         forecast_minutes=60,
24 |         nwp_channels=params["nwp_channels"],
25 |         output_variable="gsp_yield",
26 |     )  # doesnt do anything
27 | 
28 |     # set up fake data
29 |     train_dataset = FakeDataset(configuration=dataset_configuration)
30 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
31 |     # get data
32 |     x = next(iter(train_dataloader))
33 | 
34 |     # run data through model
35 |     y = model(x)
36 | 
37 |     # check out put is the correct shape
38 |     assert len(y.shape) == 2
39 |     assert y.shape[0] == dataset_configuration.process.batch_size
40 |     assert y.shape[1] == 60 // 30
41 | 


--------------------------------------------------------------------------------
/tests/models/perceiver/test_perceiver_sat_nwp.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.models.perceiver.perceiver_nwp_sat import Model, params, TOTAL_SEQ_LEN
 2 | from predict_pv_yield.data.dataloader import FakeDataset
 3 | import torch
 4 | 
 5 | 
 6 | def test_init_model():
 7 |     """Initilize the model"""
 8 |     _ = Model(
 9 |         history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"], output_variable="gsp_yield"
10 |     )
11 | 
12 | 
13 | def test_model_forward(configuration_perceiver):
14 | 
15 |     dataset_configuration = configuration_perceiver
16 |     dataset_configuration.process.batch_size = 2
17 |     dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16
18 |     dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16
19 | 
20 |     model = Model(
21 |         history_minutes=30,
22 |         forecast_minutes=60,
23 |         nwp_channels=params["nwp_channels"],
24 |         output_variable="gsp_yield",
25 |     )  # doesnt do anything
26 | 
27 |     batch_size = 2
28 |     # set up fake data
29 |     train_dataset = FakeDataset(configuration=dataset_configuration)
30 |     train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None)
31 |     # get data
32 |     x = next(iter(train_dataloader))
33 | 
34 |     # run data through model
35 |     y = model(x)
36 | 
37 |     # check out put is the correct shape
38 |     assert len(y.shape) == 2
39 |     assert y.shape[0] == batch_size
40 |     assert y.shape[1] == 60 // 30
41 | 


--------------------------------------------------------------------------------
/tests/test_training.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.training import train
 2 | import os
 3 | 
 4 | from hydra import compose, initialize
 5 | import tilemapbase
 6 | 
 7 | 
 8 | def test_train():
 9 | 
10 |     os.environ["NEPTUNE_API_TOKEN"] = "not_a_token"
11 | 
12 |     # for Github actions need to create this
13 |     tilemapbase.init(create=True)
14 | 
15 |     initialize(config_path="../configs", job_name="test_app")
16 |     config = compose(
17 |         config_name="config",
18 |         overrides=["logger=csv",
19 |                    "experiment=example_simple",
20 |                    "datamodule.fake_data=true",
21 |                    "datamodule.data_path=tests/configs/dataset",
22 |                    "trainer.fast_dev_run=true", ],
23 |     )
24 | 
25 |     train(config=config)
26 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from predict_pv_yield.utils import extras, print_config
 2 | 
 3 | from hydra import compose, initialize
 4 | import hydra
 5 | 
 6 | import os
 7 | 
 8 | 
 9 | def test_utils():
10 |     """
11 |     Test that util functions work. This just runs them. Perhaps slightly harder to check they work how they should.
12 |     """
13 |     os.environ["NEPTUNE_API_TOKEN"] = "not_a_token"
14 | 
15 |     hydra.core.global_hydra.GlobalHydra.instance().clear()
16 |     initialize(config_path="../configs", job_name="test_app")
17 |     config = compose(config_name="config")
18 | 
19 |     extras(config)
20 | 
21 |     print_config(config)
22 | 


--------------------------------------------------------------------------------
/weights/conv3d/readme.md:
--------------------------------------------------------------------------------
 1 | Weights stored here are from
 2 | 
 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-189/charts
 4 | 
 5 | conv3d_channels 32
 6 | fc1_output_features 128
 7 | fc2_output_features 128
 8 | fc3_output_features 64
 9 | number_of_conv3d_layers 6
10 | 


--------------------------------------------------------------------------------