├── .github └── workflows │ └── python-app.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── configs ├── callbacks │ ├── default.yaml │ ├── none.yaml │ └── wandb.yaml ├── config.yaml ├── datamodule │ ├── netcdf_datamodule.yaml │ └── netcdf_datamodule_gcp.yaml ├── experiment │ ├── baseline.yaml │ ├── conv3d.yaml │ ├── conv3d_nwp.yaml │ ├── conv3d_sat_nwp.yaml │ ├── example_simple.yaml │ ├── perceiver.yaml │ ├── perceiver_conv3d_sat_nwp.yaml │ └── perceiver_sat_nwp.yaml ├── hparams_search │ └── conv3d_optuna.yaml ├── hydra │ └── default.yaml ├── logger │ ├── comet.yaml │ ├── csv.yaml │ ├── many_loggers.yaml │ ├── mlflow.yaml │ ├── neptune.yaml │ ├── tensorboard.yaml │ └── wandb.yaml ├── model │ ├── baseline.yaml │ ├── conv3d.yaml │ ├── conv3d_nwp.yaml │ ├── conv3d_sat_nwp.yaml │ ├── perceiver.yaml │ ├── perceiver_conv3d_sat_nwp.yaml │ └── perceiver_sat_nwp.yaml ├── readme.md └── trainer │ ├── all_params.yaml │ └── default.yaml ├── environment.yml ├── experiments ├── 001_CNN_concat_all_timesteps_as_channels.py ├── 002_cnn_processes_single_sat_image_then_rnn.py ├── 003_perceiver_processes_single_sat_image_then_rnn.py ├── 2021-08 │ ├── 2021-08-17 │ │ └── run_cnn3d.py │ ├── 2021-08-18 │ │ ├── Run baseline model.ipynb │ │ ├── run_baseline.py │ │ └── run_cnn3d.py │ ├── 2021-08-24 │ │ ├── run_cnn3d.py │ │ └── run_cnn3d_n_layers.py │ ├── 2021-08-26 │ │ └── run_cnn3d_n_layers.py │ ├── 2021-08-27 │ │ ├── experiments.md │ │ └── run_baseline.py │ └── 2021-08-31 │ │ ├── conv3d.py │ │ └── experiments.txt ├── 2021-09 │ ├── 2021-09-03 │ │ ├── conv3d.py │ │ └── experiments.txt │ ├── 2021-09-24 │ │ └── experiments.txt │ ├── 2021-09-27 │ │ └── experiments.txt │ └── 2021-09-28 │ │ └── experiments.txt ├── 2021-10 │ └── 2021-10-01 │ │ └── experiment.txt ├── 2021-11 │ ├── 2021-11-22.txt │ └── 2021-11-25.txt └── plot_003.ipynb ├── notebooks ├── 03_simplify_data_loading_remove_gpu_super_batch.ipynb ├── 04_simplify_data_loading_multiple_cpu_batches.ipynb ├── 05_more_image_inputs.ipynb ├── 06_skip_connections.ipynb ├── 07_multiple_historical_images.ipynb ├── 08_multiple_historical_images_as_separate_channels.ipynb ├── 09_horizon_represented_as_a_stripe.ipynb ├── 10_just_conv.ipynb ├── 11_just_conv_and_conv_over_time.ipynb ├── 12_just_3d_conv.ipynb ├── 13_3d_conv_with_optical_flow_predictions.ipynb ├── 14_back_to_2d_conv_AE.ipynb ├── 15_int16.ipynb ├── 16_maxpool.ipynb ├── 20.0_simplify_data_loading.ipynb ├── 20.1_simplify_data_loading.ipynb ├── 21.0_include_PV_data.ipynb ├── 22.0_refactor_data_loading_to_quickly_load_NWP.ipynb ├── 23.0_dask_client.ipynb ├── 24.0_dask_client_in_separate_worker_process.ipynb ├── 25.0_dask_client_in_separate_manually_defined_process.ipynb ├── 26.0_dask_client_in_separate_manually_defined_process_get_in_separate_thread.ipynb ├── 27.0_dask_client_in_separate_manually_defined_process_get_in_separate_thread_multiple_writer_processes.ipynb ├── 28.0_manual_processes.ipynb ├── dask_experiments.ipynb ├── debug_gcsfs_multiprocessing_issue.ipynb ├── lightning_experiment_with_loading_data_into_GPU.ipynb ├── lightning_experiment_with_loading_data_into_GPU_v2.ipynb ├── mean_and_std_of_satellite_imagery.ipynb ├── optical_flow_1.ipynb ├── sat_data_loader_1_multiple_chunks_on_gpu.ipynb └── sat_data_loader_2_lightning_1_chunk_on_gpu.ipynb ├── predict_pv_yield ├── __init__.py ├── data │ └── dataloader.py ├── models │ ├── __init__.py │ ├── base_model.py │ ├── baseline │ │ ├── last_value.py │ │ └── readme.md │ ├── conv3d │ │ ├── architect.png │ │ ├── conv3d_sat_nwp.png │ │ ├── model.py │ │ ├── model_nwp.py │ │ ├── model_sat_nwp.py │ │ └── readme.md │ ├── layers │ │ └── __init__.py │ └── perceiver │ │ ├── perceiver.py │ │ ├── perceiver_conv3d_nwp_sat.py │ │ └── perceiver_nwp_sat.py ├── netcdf_dataset.py ├── training.py └── utils.py ├── requirements.txt ├── run.py ├── setup.py ├── tests ├── __init__.py ├── configs │ ├── dataset │ │ └── configuration.yaml │ ├── experiment │ │ └── example_simple.yaml │ └── model │ │ ├── conv3d.yaml │ │ ├── conv3d_gsp.yaml │ │ ├── conv3d_nwp.yaml │ │ └── conv3d_sat_nwp.yaml ├── conftest.py ├── models │ ├── baseline │ │ ├── test_baseline_model.py │ │ └── test_baseline_model_gsp.py │ ├── conv3d │ │ ├── test_conv3d_model.py │ │ ├── test_conv3d_model_gsp.py │ │ ├── test_conv3d_model_nwp.py │ │ └── test_conv3d_model_sat_nwp.py │ └── perceiver │ │ ├── test_perceiver.py │ │ ├── test_perceiver_conv3d_sat_nwp.py │ │ ├── test_perceiver_gsp.py │ │ └── test_perceiver_sat_nwp.py ├── test_training.py └── test_utils.py └── weights └── conv3d └── readme.md /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: [push, pull_request] 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.9 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.9 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install flake8 pytest 23 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 24 | pip install -e . 25 | - name: Lint with flake8 26 | run: | 27 | # stop the build if there are Python syntax errors or undefined names 28 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 29 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 30 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 31 | - name: Test with pytest 32 | run: | 33 | pytest -s --cov=predict_pv_yield 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.9 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.1.0 7 | hooks: 8 | # list of supported hooks: https://pre-commit.com/hooks.html 9 | - id: trailing-whitespace 10 | - id: end-of-file-fixer 11 | - id: check-yaml 12 | - id: debug-statements 13 | - id: detect-private-key 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Open Climate Fix Ltd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro 2 | Early experiments on predicting solar electricity generation over the next few hours, using deep learning, satellite imagery, and as many other data sources as we can think of :) 3 | 4 | These experiments are focused on predicting solar PV yield. 5 | 6 | Please see [SatFlow](https://github.com/openclimatefix/satflow/) for complementary experiments on predicting the next few hours of satellite imagery (i.e. trying to predict how clouds are going to move!) 7 | 8 | And please see [OCF's Nowcasting page](https://github.com/openclimatefix/nowcasting) for more context. 9 | 10 | # Installation 11 | 12 | From within the cloned `predict_pv_yield` directory: 13 | 14 | ``` 15 | conda env create -f environment.yml 16 | conda activate predict_pv_yield 17 | pip install -e . 18 | ``` 19 | -------------------------------------------------------------------------------- /configs/callbacks/default.yaml: -------------------------------------------------------------------------------- 1 | model_checkpoint: 2 | _target_: pytorch_lightning.callbacks.ModelCheckpoint 3 | monitor: "MSE/Validation_epoch" # name of the logged metric which determines when model is improving 4 | mode: "min" # can be "max" or "min" 5 | save_top_k: 1 # save k best models (determined by above metric) 6 | save_last: True # additionaly always save model from last epoch 7 | verbose: False 8 | dirpath: "checkpoints/" 9 | filename: "epoch_{epoch:03d}" 10 | auto_insert_metric_name: False 11 | 12 | early_stopping: 13 | _target_: pytorch_lightning.callbacks.EarlyStopping 14 | monitor: "MSE/Validation_epoch" # name of the logged metric which determines when model is improving 15 | mode: "min" # can be "max" or "min" 16 | patience: 5 # how many epochs of not improving until training stops 17 | min_delta: 0 # minimum change in the monitored metric needed to qualify as an improvement 18 | -------------------------------------------------------------------------------- /configs/callbacks/none.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/configs/callbacks/none.yaml -------------------------------------------------------------------------------- /configs/callbacks/wandb.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - default.yaml 3 | 4 | watch_model: 5 | _target_: src.callbacks.wandb_callbacks.WatchModel 6 | log: "all" 7 | log_freq: 100 8 | 9 | upload_code_as_artifact: 10 | _target_: src.callbacks.wandb_callbacks.UploadCodeAsArtifact 11 | code_dir: ${work_dir}/src 12 | 13 | upload_ckpts_as_artifact: 14 | _target_: src.callbacks.wandb_callbacks.UploadCheckpointsAsArtifact 15 | ckpt_dir: "checkpoints/" 16 | upload_best_only: True 17 | 18 | log_f1_precision_recall_heatmap: 19 | _target_: src.callbacks.wandb_callbacks.LogF1PrecRecHeatmap 20 | 21 | log_confusion_matrix: 22 | _target_: src.callbacks.wandb_callbacks.LogConfusionMatrix 23 | 24 | log_image_predictions: 25 | _target_: src.callbacks.wandb_callbacks.LogImagePredictions 26 | num_samples: 8 27 | -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # specify here default training configuration 4 | defaults: 5 | - trainer: default.yaml 6 | - model: conv3d.yaml 7 | - datamodule: netcdf_datamodule.yaml 8 | - callbacks: default.yaml # set this to null if you don't want to use callbacks 9 | - logger: neptune # set logger here or use command line (e.g. `python run.py logger=wandb`) 10 | 11 | - experiment: null 12 | - hparams_search: null 13 | 14 | - hydra: default.yaml 15 | 16 | # enable color logging 17 | # - override hydra/hydra_logging: colorlog 18 | # - override hydra/job_logging: colorlog 19 | 20 | # path to original working directory 21 | # hydra hijacks working directory by changing it to the current log directory, 22 | # so it's useful to have this path as a special variable 23 | # learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory 24 | work_dir: ${hydra:runtime.cwd} 25 | 26 | # path to folder with data 27 | data_dir: ${work_dir}/data/ 28 | 29 | # use `python run.py debug=true` for easy debugging! 30 | # this will run 1 train, val and test loop with only 1 batch 31 | # equivalent to running `python run.py trainer.fast_dev_run=true` 32 | # (this is placed here just for easier access from command line) 33 | debug: False 34 | 35 | # pretty print config at the start of the run using Rich library 36 | print_config: True 37 | 38 | # disable python warnings if they annoy you 39 | ignore_warnings: True 40 | 41 | # check performance on test set, using the best model achieved during training 42 | # lightning chooses best model based on metric specified in checkpoint callback 43 | test_after_training: True 44 | -------------------------------------------------------------------------------- /configs/datamodule/netcdf_datamodule.yaml: -------------------------------------------------------------------------------- 1 | _target_: nowcasting_dataloader.datamodules.NetCDFDataModule 2 | 3 | temp_path: "." 4 | n_train_data: 4000 5 | n_val_data: 400 6 | num_workers: 8 7 | pin_memory: True 8 | data_path: "/mnt/storage_ssd_4tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v15/" 9 | fake_data: False 10 | shuffle_train: True 11 | -------------------------------------------------------------------------------- /configs/datamodule/netcdf_datamodule_gcp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.data.dataloader.NetCDFDataModule 2 | 3 | temp_path: "." 4 | n_train_data: 24900 5 | n_val_data: 1000 6 | num_workers: 8 7 | pin_memory: True 8 | data_path: "gs://solar-pv-nowcasting-data/prepared_ML_training_data/v6/" 9 | fake_data: False 10 | -------------------------------------------------------------------------------- /configs/experiment/baseline.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: baseline.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | validate_only: '1' # by putting this key in the config file, the model does not get trained. 18 | 19 | trainer: 20 | min_epochs: 1 21 | max_epochs: 1 22 | 23 | datamodule: 24 | n_train_data: 2 25 | n_val_data: 10 26 | -------------------------------------------------------------------------------- /configs/experiment/conv3d.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: conv3d.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 10 21 | 22 | datamodule: 23 | n_train_data: 4000 24 | n_val_data: 400 25 | 26 | model: 27 | conv3d_channels: 32 28 | -------------------------------------------------------------------------------- /configs/experiment/conv3d_nwp.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: conv3d_nwp.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 10 21 | 22 | datamodule: 23 | n_train_data: 4000 24 | n_val_data: 400 25 | 26 | model: 27 | conv3d_channels: 32 28 | -------------------------------------------------------------------------------- /configs/experiment/conv3d_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: conv3d_sat_nwp.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 10 21 | 22 | datamodule: 23 | n_train_data: 4000 24 | n_val_data: 400 25 | 26 | model: 27 | conv3d_channels: 32 28 | -------------------------------------------------------------------------------- /configs/experiment/example_simple.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: baseline.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 2 21 | 22 | datamodule: 23 | n_train_data: 2 24 | n_val_data: 2 25 | fake_data: 1 26 | 27 | validate_only: '1' # by putting this key in the config file, the model does not get trained. 28 | -------------------------------------------------------------------------------- /configs/experiment/perceiver.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: perceiver.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 10 21 | 22 | datamodule: 23 | n_train_data: 4000 24 | n_val_data: 400 25 | -------------------------------------------------------------------------------- /configs/experiment/perceiver_conv3d_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: perceiver_conv3d_sat_nwp.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 50 21 | 22 | datamodule: 23 | n_train_data: 4000 24 | n_val_data: 400 25 | -------------------------------------------------------------------------------- /configs/experiment/perceiver_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: perceiver_sat_nwp.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 10 21 | 22 | datamodule: 23 | n_train_data: 4000 24 | n_val_data: 400 25 | -------------------------------------------------------------------------------- /configs/hparams_search/conv3d_optuna.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # example hyperparameter optimization of some experiment with Optuna: 4 | # python run.py -m hparams_search=conv3d_optuna experiment=conv3d_sat_nwp 5 | 6 | defaults: 7 | - override /hydra/sweeper: optuna 8 | 9 | # choose metric which will be optimized by Optuna 10 | optimized_metric: "MSE/Validation_epoch" 11 | 12 | hydra: 13 | # here we define Optuna hyperparameter search 14 | # it optimizes for value returned from function with @hydra.main decorator 15 | # learn more here: https://hydra.cc/docs/next/plugins/optuna_sweeper 16 | sweeper: 17 | _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper 18 | storage: null 19 | study_name: null 20 | n_jobs: 1 21 | 22 | # 'minimize' or 'maximize' the objective 23 | direction: minimize 24 | 25 | # number of experiments that will be executed 26 | n_trials: 20 27 | 28 | # choose Optuna hyperparameter sampler 29 | # learn more here: https://optuna.readthedocs.io/en/stable/reference/samplers.html 30 | sampler: 31 | _target_: optuna.samplers.TPESampler 32 | seed: 12345 33 | consider_prior: true 34 | prior_weight: 1.0 35 | consider_magic_clip: true 36 | consider_endpoints: false 37 | n_startup_trials: 10 38 | n_ei_candidates: 24 39 | multivariate: false 40 | warn_independent_sampling: true 41 | 42 | # define range of hyperparameters 43 | search_space: 44 | model.include_pv_yield_history: 45 | type: categorical 46 | choices: [ true, false ] 47 | model.include_future_satellite: 48 | type: categorical 49 | choices: [ true, false ] 50 | -------------------------------------------------------------------------------- /configs/hydra/default.yaml: -------------------------------------------------------------------------------- 1 | # output paths for hydra logs 2 | run: 3 | dir: logs/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} 4 | sweep: 5 | dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S} 6 | subdir: ${hydra.job.num} 7 | 8 | # you can set here environment variables that are universal for all users 9 | # for system specific variables (like data paths) it's better to use .env file! 10 | job: 11 | env_set: 12 | EXAMPLE_VAR: "example_value" 13 | -------------------------------------------------------------------------------- /configs/logger/comet.yaml: -------------------------------------------------------------------------------- 1 | # https://www.comet.ml 2 | 3 | comet: 4 | _target_: pytorch_lightning.loggers.comet.CometLogger 5 | api_key: ${oc.env:COMET_API_TOKEN} # api key is laoded from environment variable 6 | project_name: "template-tests" 7 | experiment_name: null 8 | -------------------------------------------------------------------------------- /configs/logger/csv.yaml: -------------------------------------------------------------------------------- 1 | # csv logger built in lightning 2 | 3 | csv: 4 | _target_: pytorch_lightning.loggers.csv_logs.CSVLogger 5 | save_dir: "." 6 | name: "csv/" 7 | version: null 8 | prefix: "" 9 | -------------------------------------------------------------------------------- /configs/logger/many_loggers.yaml: -------------------------------------------------------------------------------- 1 | # train with many loggers at once 2 | 3 | defaults: 4 | # - aim.yaml 5 | # - comet.yaml 6 | - csv.yaml 7 | # - mlflow.yaml 8 | # - neptune.yaml 9 | # - tensorboard.yaml 10 | - wandb.yaml 11 | -------------------------------------------------------------------------------- /configs/logger/mlflow.yaml: -------------------------------------------------------------------------------- 1 | # https://mlflow.org 2 | 3 | mlflow: 4 | _target_: pytorch_lightning.loggers.mlflow.MLFlowLogger 5 | experiment_name: default 6 | tracking_uri: null 7 | tags: null 8 | save_dir: ./mlruns 9 | prefix: "" 10 | artifact_location: null 11 | -------------------------------------------------------------------------------- /configs/logger/neptune.yaml: -------------------------------------------------------------------------------- 1 | # https://neptune.ai 2 | 3 | neptune: 4 | _target_: neptune.new.integrations.pytorch_lightning.NeptuneLogger 5 | api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable 6 | project: OpenClimateFix/predict-pv-yield 7 | prefix: "" 8 | -------------------------------------------------------------------------------- /configs/logger/tensorboard.yaml: -------------------------------------------------------------------------------- 1 | # https://www.tensorflow.org/tensorboard/ 2 | 3 | tensorboard: 4 | _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger 5 | save_dir: "tensorboard/" 6 | name: "default" 7 | version: null 8 | log_graph: False 9 | default_hp_metric: True 10 | prefix: "" 11 | -------------------------------------------------------------------------------- /configs/logger/wandb.yaml: -------------------------------------------------------------------------------- 1 | # https://wandb.ai 2 | 3 | wandb: 4 | _target_: pytorch_lightning.loggers.wandb.WandbLogger 5 | project: "template-tests" 6 | name: null 7 | save_dir: "." 8 | offline: False # set True to store all logs only locally 9 | id: null # pass correct id to resume experiment! 10 | # entity: "" # set to name of your wandb team or just remove it 11 | log_model: False 12 | prefix: "" 13 | job_type: "train" 14 | group: "" 15 | tags: [] 16 | -------------------------------------------------------------------------------- /configs/model/baseline.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.baseline.last_value.Model 2 | 3 | forecast_minutes: 120 4 | history_minutes: 30 5 | output_variable: gsp_yield 6 | -------------------------------------------------------------------------------- /configs/model/conv3d.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model.Model 2 | 3 | include_pv_yield: True 4 | include_nwp: True 5 | forecast_minutes: 120 6 | history_minutes: 30 7 | number_of_conv3d_layers: 6 8 | image_size_pixels: 24 9 | number_sat_channels: 11 10 | conv3d_channels: 32 11 | fc1_output_features: 128 12 | fc2_output_features: 128 13 | fc3_output_features: 64 14 | output_variable: gsp_yield 15 | -------------------------------------------------------------------------------- /configs/model/conv3d_nwp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model_nwp.Model 2 | 3 | include_pv_or_gsp_yield_history: True 4 | forecast_minutes: 120 5 | history_minutes: 30 6 | number_of_conv3d_layers: 6 7 | conv3d_channels: 32 8 | fc1_output_features: 128 9 | fc2_output_features: 128 10 | fc3_output_features: 64 11 | number_nwp_channels: 1 12 | -------------------------------------------------------------------------------- /configs/model/conv3d_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model_sat_nwp.Model 2 | 3 | include_pv_or_gsp_yield_history: True 4 | include_nwp: True 5 | forecast_minutes: 120 6 | history_minutes: 30 7 | number_of_conv3d_layers: 6 8 | image_size_pixels: 24 9 | number_sat_channels: 11 10 | conv3d_channels: 32 11 | fc1_output_features: 128 12 | fc2_output_features: 128 13 | fc3_output_features: 64 14 | output_variable: gsp_yield 15 | include_pv_yield_history: False 16 | include_future_satellite: True 17 | -------------------------------------------------------------------------------- /configs/model/perceiver.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.perceiver.perceiver.PerceiverModel 2 | 3 | forecast_minutes: 30 4 | history_minutes: 60 5 | batch_size: 8 6 | num_latents: 128 7 | latent_dim: 64 8 | embedding_dem: 16 9 | output_variable: gsp_yield 10 | -------------------------------------------------------------------------------- /configs/model/perceiver_conv3d_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.perceiver.perceiver_conv3d_nwp_sat.Model 2 | 3 | forecast_minutes: 30 4 | history_minutes: 60 5 | batch_size: 32 6 | num_latents: 24 7 | latent_dim: 24 8 | embedding_dem: 0 9 | output_variable: gsp_yield 10 | conv3d_channels: 8 11 | use_future_satellite_images: 0 12 | -------------------------------------------------------------------------------- /configs/model/perceiver_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.perceiver.perceiver_nwp_sat.Model 2 | 3 | forecast_minutes: 30 4 | history_minutes: 60 5 | batch_size: 8 6 | num_latents: 128 7 | latent_dim: 64 8 | embedding_dem: 0 9 | output_variable: gsp_yield 10 | -------------------------------------------------------------------------------- /configs/readme.md: -------------------------------------------------------------------------------- 1 | The following folders how the configuration files 2 | 3 | This idea is copied from 4 | https://github.com/ashleve/lightning-hydra-template/blob/main/configs/experiment/example_simple.yaml 5 | 6 | run experiments by: 7 | `python run.py experiment=example_simple ` 8 | -------------------------------------------------------------------------------- /configs/trainer/all_params.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.Trainer 2 | 3 | # default values for all trainer parameters 4 | checkpoint_callback: True 5 | default_root_dir: null 6 | gradient_clip_val: 0.0 7 | process_position: 0 8 | num_nodes: 1 9 | num_processes: 1 10 | gpus: null 11 | auto_select_gpus: False 12 | tpu_cores: null 13 | log_gpu_memory: null 14 | progress_bar_refresh_rate: 1 15 | overfit_batches: 0.0 16 | track_grad_norm: -1 17 | check_val_every_n_epoch: 1 18 | fast_dev_run: False 19 | accumulate_grad_batches: 1 20 | max_epochs: 1 21 | min_epochs: 1 22 | max_steps: null 23 | min_steps: null 24 | limit_train_batches: 1.0 25 | limit_val_batches: 1.0 26 | limit_test_batches: 1.0 27 | val_check_interval: 1.0 28 | flush_logs_every_n_steps: 100 29 | log_every_n_steps: 50 30 | accelerator: null 31 | sync_batchnorm: False 32 | precision: 32 33 | weights_summary: "top" 34 | weights_save_path: null 35 | num_sanity_val_steps: 2 36 | truncated_bptt_steps: null 37 | resume_from_checkpoint: null 38 | profiler: null 39 | benchmark: False 40 | deterministic: False 41 | reload_dataloaders_every_epoch: False 42 | auto_lr_find: False 43 | replace_sampler_ddp: True 44 | terminate_on_nan: False 45 | auto_scale_batch_size: False 46 | prepare_data_per_node: True 47 | plugins: null 48 | amp_backend: "native" 49 | amp_level: "O2" 50 | move_metrics_to_cpu: False 51 | -------------------------------------------------------------------------------- /configs/trainer/default.yaml: -------------------------------------------------------------------------------- 1 | _target_: pytorch_lightning.Trainer 2 | 3 | # set `1` to train on GPU, `0` to train on CPU only 4 | gpus: 0 5 | auto_select_gpus: False 6 | 7 | min_epochs: 1 8 | max_epochs: 10 9 | 10 | weights_summary: null 11 | progress_bar_refresh_rate: 5 12 | resume_from_checkpoint: null 13 | fast_dev_run: false 14 | profiler: 'simple' 15 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: predict_pv_yield 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | dependencies: 6 | - python>=3.9 7 | - pip 8 | - rich 9 | - python-dotenv 10 | 11 | # Scientific Python 12 | - numpy 13 | - pandas 14 | - matplotlib 15 | - xarray 16 | - ipykernel 17 | - h5netcdf 18 | - omegaconf 19 | - hydra-core 20 | 21 | # Machine learning 22 | - pytorch::pytorch # explicitly specify pytorch channel to prevent conda from using conda-forge for pytorch, and hence installing the CPU-only version. 23 | - pytorch-lightning 24 | 25 | # Development tools 26 | - pytest 27 | - pytest-cov 28 | - flake8 29 | - jedi 30 | - black 31 | 32 | - pip: 33 | - neptune-client[pytorch-lightning] 34 | - tilemapbase # For plotting human-readable geographical maps. 35 | - perceiver_pytorch 36 | - nowcasting_dataset 37 | - nowcasting_utils 38 | - nowcasting_dataloader 39 | -------------------------------------------------------------------------------- /experiments/001_CNN_concat_all_timesteps_as_channels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # In[1]: 5 | 6 | 7 | from nowcasting_dataset.datamodule import NowcastingDataModule 8 | from pathlib import Path 9 | import matplotlib.pyplot as plt 10 | import matplotlib.dates as mdates 11 | import pandas as pd 12 | 13 | import torch 14 | from torch import nn 15 | import torch.nn.functional as F 16 | import pytorch_lightning as pl 17 | 18 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 19 | 20 | import logging 21 | logging.basicConfig() 22 | logger = logging.getLogger('nowcasting_dataset') 23 | logger.setLevel(logging.DEBUG) 24 | 25 | 26 | # In[2]: 27 | 28 | 29 | import numpy as np 30 | 31 | 32 | # In[3]: 33 | 34 | 35 | BUCKET = Path('solar-pv-nowcasting-data') 36 | 37 | # Solar PV data 38 | PV_PATH = BUCKET / 'PV/PVOutput.org' 39 | PV_DATA_FILENAME = PV_PATH / 'UK_PV_timeseries_batch.nc' 40 | PV_METADATA_FILENAME = PV_PATH / 'UK_PV_metadata.csv' 41 | 42 | # SAT_FILENAME = BUCKET / 'satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep_quarter_geospatial.zarr' 43 | SAT_FILENAME = BUCKET / 'satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr' 44 | 45 | # Numerical weather predictions 46 | #NWP_BASE_PATH = BUCKET / 'NWP/UK_Met_Office/UKV_zarr' 47 | #NWP_BASE_PATH = BUCKET / 'NWP/UK_Met_Office/UKV_single_step_and_single_timestep_all_vars.zarr' 48 | NWP_BASE_PATH = BUCKET / 'NWP/UK_Met_Office/UKV_single_step_and_single_timestep_all_vars_full_spatial_2018_7-12_float32.zarr' 49 | 50 | 51 | # In[4]: 52 | 53 | 54 | params = dict( 55 | batch_size=32, 56 | history_len=6, #: Number of timesteps of history, not including t0. 57 | forecast_len=12, #: Number of timesteps of forecast. 58 | nwp_channels=( 59 | 't', 'dswrf', 'prate', 'r', 'sde', 'si10', 'vis', 'lcc', 'mcc', 'hcc') 60 | ) 61 | 62 | 63 | # In[5]: 64 | 65 | 66 | data_module = NowcastingDataModule( 67 | pv_power_filename=PV_DATA_FILENAME, 68 | pv_metadata_filename=f'gs://{PV_METADATA_FILENAME}', 69 | sat_filename = f'gs://{SAT_FILENAME}', 70 | # sat_channels =('HRV', 'WV_062', 'WV_073'), 71 | nwp_base_path = f'gs://{NWP_BASE_PATH}', 72 | pin_memory = True, #: Passed to DataLoader. 73 | num_workers = 22, #: Passed to DataLoader. 74 | prefetch_factor = 256, #: Passed to DataLoader. 75 | n_samples_per_timestep = 8, #: Passed to NowcastingDataset 76 | **params 77 | ) 78 | 79 | 80 | # In[6]: 81 | 82 | 83 | data_module.prepare_data() 84 | 85 | 86 | # In[7]: 87 | 88 | 89 | data_module.setup() 90 | 91 | 92 | # ## Define very simple ML model 93 | 94 | # In[8]: 95 | 96 | 97 | import tilemapbase 98 | from nowcasting_dataset.geospatial import osgb_to_lat_lon 99 | 100 | 101 | # In[9]: 102 | 103 | 104 | tilemapbase.init(create=True) 105 | 106 | 107 | # In[10]: 108 | 109 | 110 | def plot_example(batch, model_output, example_i: int=0, border: int=0): 111 | fig = plt.figure(figsize=(20, 20)) 112 | ncols=4 113 | nrows=2 114 | 115 | # Satellite data 116 | extent = ( 117 | float(batch['sat_x_coords'][example_i, 0].cpu().numpy()), 118 | float(batch['sat_x_coords'][example_i, -1].cpu().numpy()), 119 | float(batch['sat_y_coords'][example_i, -1].cpu().numpy()), 120 | float(batch['sat_y_coords'][example_i, 0].cpu().numpy())) # left, right, bottom, top 121 | 122 | def _format_ax(ax): 123 | #ax.set_xlim(extent[0]-border, extent[1]+border) 124 | #ax.set_ylim(extent[2]-border, extent[3]+border) 125 | # ax.coastlines(color='black') 126 | ax.scatter( 127 | batch['x_meters_center'][example_i].cpu(), 128 | batch['y_meters_center'][example_i].cpu(), 129 | s=500, color='white', marker='x') 130 | 131 | ax = fig.add_subplot(nrows, ncols, 1) #, projection=ccrs.OSGB(approx=False)) 132 | sat_data = batch['sat_data'][example_i, :, :, :, 0].cpu().numpy() 133 | sat_min = np.min(sat_data) 134 | sat_max = np.max(sat_data) 135 | ax.imshow(sat_data[0], extent=extent, interpolation='none', vmin=sat_min, vmax=sat_max) 136 | ax.set_title('t = -{}'.format(params['history_len'])) 137 | _format_ax(ax) 138 | 139 | ax = fig.add_subplot(nrows, ncols, 2) 140 | ax.imshow(sat_data[params['history_len']+1], extent=extent, interpolation='none', vmin=sat_min, vmax=sat_max) 141 | ax.set_title('t = 0') 142 | _format_ax(ax) 143 | 144 | ax = fig.add_subplot(nrows, ncols, 3) 145 | ax.imshow(sat_data[-1], extent=extent, interpolation='none', vmin=sat_min, vmax=sat_max) 146 | ax.set_title('t = {}'.format(params['forecast_len'])) 147 | _format_ax(ax) 148 | 149 | ax = fig.add_subplot(nrows, ncols, 4) 150 | lat_lon_bottom_left = osgb_to_lat_lon(extent[0], extent[2]) 151 | lat_lon_top_right = osgb_to_lat_lon(extent[1], extent[3]) 152 | tiles = tilemapbase.tiles.build_OSM() 153 | lat_lon_extent = tilemapbase.Extent.from_lonlat( 154 | longitude_min=lat_lon_bottom_left[1], 155 | longitude_max=lat_lon_top_right[1], 156 | latitude_min=lat_lon_bottom_left[0], 157 | latitude_max=lat_lon_top_right[0]) 158 | plotter = tilemapbase.Plotter(lat_lon_extent, tile_provider=tiles, zoom=6) 159 | plotter.plot(ax, tiles) 160 | 161 | ############## TIMESERIES ################## 162 | # NWP 163 | ax = fig.add_subplot(nrows, ncols, 5) 164 | nwp_dt_index = pd.to_datetime(batch['nwp_target_time'][example_i].cpu().numpy(), unit='s') 165 | pd.DataFrame( 166 | batch['nwp'][example_i, :, :, 0, 0].T.cpu().numpy(), 167 | index=nwp_dt_index, 168 | columns=params['nwp_channels']).plot(ax=ax) 169 | ax.set_title('NWP') 170 | 171 | # datetime features 172 | ax = fig.add_subplot(nrows, ncols, 6) 173 | ax.set_title('datetime features') 174 | datetime_feature_cols = ['hour_of_day_sin', 'hour_of_day_cos', 'day_of_year_sin', 'day_of_year_cos'] 175 | datetime_features_df = pd.DataFrame(index=nwp_dt_index, columns=datetime_feature_cols) 176 | for key in datetime_feature_cols: 177 | datetime_features_df[key] = batch[key][example_i].cpu().numpy() 178 | datetime_features_df.plot(ax=ax) 179 | ax.legend() 180 | ax.set_xlabel(nwp_dt_index[0].date()) 181 | 182 | # PV yield 183 | ax = fig.add_subplot(nrows, ncols, 7) 184 | ax.set_title('PV yield for PV ID {:,d}'.format(batch['pv_system_id'][example_i].cpu())) 185 | pv_actual = pd.Series( 186 | batch['pv_yield'][example_i].cpu().numpy(), 187 | index=nwp_dt_index, 188 | name='actual') 189 | pv_pred = pd.Series( 190 | model_output[example_i].detach().cpu().numpy(), 191 | index=nwp_dt_index[params['history_len']+1:], 192 | name='prediction') 193 | pd.concat([pv_actual, pv_pred], axis='columns').plot(ax=ax) 194 | ax.legend() 195 | 196 | # fig.tight_layout() 197 | 198 | return fig 199 | 200 | 201 | # In[11]: 202 | 203 | 204 | # plot_example(batch, model_output, example_i=20); 205 | 206 | 207 | # In[12]: 208 | 209 | 210 | SAT_X_MEAN = np.float32(309000) 211 | SAT_X_STD = np.float32(316387.42073603) 212 | SAT_Y_MEAN = np.float32(519000) 213 | SAT_Y_STD = np.float32(406454.17945938) 214 | 215 | 216 | # In[13]: 217 | 218 | 219 | from neptune.new.types import File 220 | 221 | 222 | # In[14]: 223 | 224 | 225 | TOTAL_SEQ_LEN = params['history_len'] + params['forecast_len'] + 1 226 | CHANNELS = 144 227 | KERNEL = 3 228 | EMBEDDING_DIM = 16 229 | NWP_SIZE = 10 * 2 * 2 * TOTAL_SEQ_LEN # channels x width x height 230 | N_DATETIME_FEATURES = 4 * TOTAL_SEQ_LEN 231 | 232 | class LitAutoEncoder(pl.LightningModule): 233 | def __init__( 234 | self, 235 | history_len = params['history_len'], 236 | forecast_len = params['forecast_len'], 237 | 238 | ): 239 | super().__init__() 240 | self.history_len = history_len 241 | self.forecast_len = forecast_len 242 | 243 | self.sat_conv1 = nn.Conv2d(in_channels=history_len+6, out_channels=CHANNELS, kernel_size=KERNEL)#, groups=history_len+1) 244 | self.sat_conv2 = nn.Conv2d(in_channels=CHANNELS, out_channels=CHANNELS, kernel_size=KERNEL) #, groups=CHANNELS//2) 245 | self.sat_conv3 = nn.Conv2d(in_channels=CHANNELS, out_channels=CHANNELS, kernel_size=KERNEL) #, groups=CHANNELS) 246 | 247 | self.maxpool = nn.MaxPool2d(kernel_size=KERNEL) 248 | 249 | self.fc1 = nn.Linear( 250 | in_features=CHANNELS * 11 * 11, 251 | out_features=256) 252 | 253 | self.fc2 = nn.Linear(in_features=256 + EMBEDDING_DIM + NWP_SIZE + N_DATETIME_FEATURES + history_len+1, out_features=128) 254 | #self.fc2 = nn.Linear(in_features=EMBEDDING_DIM + N_DATETIME_FEATURES, out_features=128) 255 | self.fc3 = nn.Linear(in_features=128, out_features=128) 256 | self.fc4 = nn.Linear(in_features=128, out_features=128) 257 | self.fc5 = nn.Linear(in_features=128, out_features=params['forecast_len']) 258 | 259 | if EMBEDDING_DIM: 260 | self.pv_system_id_embedding = nn.Embedding( 261 | num_embeddings=len(data_module.pv_data_source.pv_metadata), 262 | embedding_dim=EMBEDDING_DIM) 263 | 264 | def forward(self, x): 265 | # ******************* Satellite imagery ************************* 266 | # Shape: batch_size, seq_length, width, height, channel 267 | sat_data = x['sat_data'][:, :self.history_len+1] 268 | batch_size, seq_len, width, height, n_chans = sat_data.shape 269 | 270 | # Move seq_length to be the last dim, ready for changing the shape 271 | sat_data = sat_data.permute(0, 2, 3, 4, 1) 272 | 273 | # Stack timesteps into the channel dimension 274 | sat_data = sat_data.view(batch_size, width, height, seq_len * n_chans) 275 | 276 | sat_data = sat_data.permute(0, 3, 1, 2) # Conv2d expects channels to be the 2nd dim! 277 | 278 | ### EXTRA CHANNELS 279 | # Center marker 280 | center_marker = torch.zeros((batch_size, 1, width, height), dtype=torch.float32, device=self.device) 281 | half_width = width // 2 282 | center_marker[..., half_width-2:half_width+2, half_width-2:half_width+2] = 1 283 | 284 | # geo-spatial x 285 | x_coords = x['sat_x_coords'] - SAT_X_MEAN 286 | x_coords /= SAT_X_STD 287 | x_coords = x_coords.unsqueeze(1).expand(-1, width, -1).unsqueeze(1) 288 | 289 | # geo-spatial y 290 | y_coords = x['sat_y_coords'] - SAT_Y_MEAN 291 | y_coords /= SAT_Y_STD 292 | y_coords = y_coords.unsqueeze(-1).expand(-1, -1, height).unsqueeze(1) 293 | 294 | # pixel x & y 295 | pixel_range = (torch.arange(width, device=self.device) - 64) / 37 296 | pixel_range = pixel_range.unsqueeze(0).unsqueeze(0) 297 | pixel_x = pixel_range.unsqueeze(-2).expand(batch_size, 1, width, -1) 298 | pixel_y = pixel_range.unsqueeze(-1).expand(batch_size, 1, -1, height) 299 | 300 | # Concat 301 | sat_data = torch.cat((sat_data, center_marker, x_coords, y_coords, pixel_x, pixel_y), dim=1) 302 | 303 | del center_marker, x_coords, y_coords, pixel_x, pixel_y 304 | 305 | # Pass data through the network :) 306 | out = F.relu(self.sat_conv1(sat_data)) 307 | out = self.maxpool(out) 308 | out = F.relu(self.sat_conv2(out)) 309 | out = self.maxpool(out) 310 | out = F.relu(self.sat_conv3(out)) 311 | 312 | out = out.view(-1, CHANNELS * 11 * 11) 313 | out = F.relu(self.fc1(out)) 314 | 315 | # *********************** NWP Data ************************************** 316 | nwp_data = x['nwp'].float() # Shape: batch_size, channel, seq_length, width, height 317 | batch_size, n_nwp_chans, nwp_seq_len, nwp_width, nwp_height = nwp_data.shape 318 | nwp_data = nwp_data.reshape(batch_size, n_nwp_chans * nwp_seq_len * nwp_width * nwp_height) 319 | 320 | # Concat 321 | out = torch.cat( 322 | ( 323 | out, 324 | x['pv_yield'][:, :self.history_len+1], 325 | nwp_data, 326 | x['hour_of_day_sin'], 327 | x['hour_of_day_cos'], 328 | x['day_of_year_sin'], 329 | x['day_of_year_cos'], 330 | ), 331 | dim=1) 332 | del nwp_data 333 | 334 | # Embedding of PV system ID 335 | if EMBEDDING_DIM: 336 | pv_embedding = self.pv_system_id_embedding(x['pv_system_row_number']) 337 | out = torch.cat( 338 | ( 339 | out, 340 | pv_embedding 341 | ), 342 | dim=1) 343 | 344 | # Fully connected layers. 345 | out = F.relu(self.fc2(out)) 346 | out = F.relu(self.fc3(out)) 347 | out = F.relu(self.fc4(out)) 348 | out = F.relu(self.fc5(out)) # PV yield is in range [0, 1]. ReLU should train more cleanly than sigmoid. 349 | 350 | return out 351 | 352 | def _training_or_validation_step(self, batch, is_train_step): 353 | y_hat = self(batch) 354 | y = batch['pv_yield'][:, -self.forecast_len:] 355 | #y = torch.rand((32, 1), device=self.device) 356 | mse_loss = F.mse_loss(y_hat, y) 357 | nmae_loss = (y_hat - y).abs().mean() 358 | # TODO: Compute correlation coef using np.corrcoef(tensor with shape (2, num_timesteps))[0, 1] 359 | # on each example, and taking the mean across the batch? 360 | tag = "Train" if is_train_step else "Validation" 361 | self.log_dict({f'MSE/{tag}': mse_loss}, on_step=is_train_step, on_epoch=True) 362 | self.log_dict({f'NMAE/{tag}': nmae_loss}, on_step=is_train_step, on_epoch=True) 363 | 364 | return nmae_loss 365 | 366 | def training_step(self, batch, batch_idx): 367 | return self._training_or_validation_step(batch, is_train_step=True) 368 | 369 | def validation_step(self, batch, batch_idx): 370 | if batch_idx == 0: 371 | # Plot example 372 | model_output = self(batch) 373 | fig = plot_example(batch, model_output) 374 | self.logger.experiment['validation/plot'].log(File.as_image(fig)) 375 | 376 | return self._training_or_validation_step(batch, is_train_step=False) 377 | 378 | def configure_optimizers(self): 379 | optimizer = torch.optim.Adam(self.parameters(), lr=0.001) 380 | return optimizer 381 | 382 | 383 | # In[15]: 384 | 385 | 386 | model = LitAutoEncoder() 387 | 388 | 389 | # In[16]: 390 | 391 | 392 | #train_ds = data_module.train_dataset 393 | #train_ds.per_worker_init(0) 394 | #for batch in train_ds: 395 | # break 396 | 397 | 398 | # In[17]: 399 | 400 | 401 | #model_output = model(batch) 402 | 403 | 404 | # In[18]: 405 | 406 | 407 | #plot_example(batch, model_output, example_i=2); 408 | 409 | 410 | # In[19]: 411 | 412 | 413 | logger = NeptuneLogger( 414 | project='OpenClimateFix/predict-pv-yield', 415 | #params=params, 416 | #experiment_name='climatology', 417 | #experiment_id='PRED-1' 418 | ) 419 | 420 | 421 | # In[20]: 422 | 423 | 424 | logger.version 425 | 426 | 427 | # In[21]: 428 | 429 | 430 | trainer = pl.Trainer(gpus=1, max_epochs=10_000, logger=logger) 431 | 432 | 433 | # In[ ]: 434 | 435 | 436 | trainer.fit(model, data_module) 437 | 438 | 439 | # In[ ]: 440 | -------------------------------------------------------------------------------- /experiments/002_cnn_processes_single_sat_image_then_rnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | import os 5 | 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | import pytorch_lightning as pl 10 | 11 | from predict_pv_yield.netcdf_dataset import NetCDFDataset, worker_init_fn 12 | from predict_pv_yield.visualisation import plot_example 13 | 14 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 15 | from neptune.new.types import File 16 | 17 | import logging 18 | logging.basicConfig() 19 | _LOG = logging.getLogger('predict_pv_yield') 20 | _LOG.setLevel(logging.DEBUG) 21 | 22 | 23 | params = dict( 24 | batch_size=32, 25 | history_len=6, #: Number of timesteps of history, not including t0. 26 | forecast_len=12, #: Number of timesteps of forecast. 27 | image_size_pixels=32, 28 | nwp_channels=( 29 | 't', 'dswrf', 'prate', 'r', 'sde', 'si10', 'vis', 'lcc', 'mcc', 'hcc'), 30 | sat_channels=( 31 | 'HRV', 'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120', 32 | 'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073') 33 | ) 34 | 35 | 36 | SAT_X_MEAN = np.float32(309000) 37 | SAT_X_STD = np.float32(316387.42073603) 38 | SAT_Y_MEAN = np.float32(519000) 39 | SAT_Y_STD = np.float32(406454.17945938) 40 | 41 | 42 | TOTAL_SEQ_LEN = params['history_len'] + params['forecast_len'] + 1 43 | CHANNELS = 32 44 | N_CHANNELS_LAST_CONV = 4 45 | KERNEL = 3 46 | EMBEDDING_DIM = 16 47 | NWP_SIZE = 10 * 2 * 2 # channels x width x height 48 | N_DATETIME_FEATURES = 4 49 | CNN_OUTPUT_SIZE = N_CHANNELS_LAST_CONV * ((params['image_size_pixels'] - 6) ** 2) 50 | FC_OUTPUT_SIZE = 8 51 | RNN_HIDDEN_SIZE = 16 52 | 53 | 54 | def get_dataloaders(): 55 | DATA_PATH = 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v2/' 56 | TEMP_PATH = '/home/jack/temp/' 57 | 58 | train_dataset = NetCDFDataset( 59 | 12_500, 60 | os.path.join(DATA_PATH, 'train'), 61 | os.path.join(TEMP_PATH, 'train')) 62 | 63 | #validation_dataset = NetCDFDataset(1_000, 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v2/validation/', '/home/jack/temp/validation') 64 | 65 | dataloader_config = dict( 66 | pin_memory=True, 67 | num_workers=24, 68 | prefetch_factor=8, 69 | worker_init_fn=worker_init_fn, 70 | persistent_workers=True, 71 | 72 | # Disable automatic batching because dataset 73 | # returns complete batches. 74 | batch_size=None, 75 | ) 76 | 77 | train_dataloader = torch.utils.data.DataLoader( 78 | train_dataset, **dataloader_config) 79 | 80 | return train_dataloader 81 | 82 | 83 | class LitModel(pl.LightningModule): 84 | def __init__( 85 | self, 86 | history_len=params['history_len'], 87 | forecast_len=params['forecast_len'], 88 | ): 89 | super().__init__() 90 | self.history_len = history_len 91 | self.forecast_len = forecast_len 92 | 93 | self.sat_conv1 = nn.Conv2d( 94 | in_channels=len(params['sat_channels'])+5, 95 | out_channels=CHANNELS, kernel_size=KERNEL) 96 | self.sat_conv2 = nn.Conv2d( 97 | in_channels=CHANNELS, 98 | out_channels=CHANNELS, kernel_size=KERNEL) 99 | self.sat_conv3 = nn.Conv2d( 100 | in_channels=CHANNELS, 101 | out_channels=N_CHANNELS_LAST_CONV, kernel_size=KERNEL) 102 | 103 | self.fc1 = nn.Linear( 104 | in_features=CNN_OUTPUT_SIZE, 105 | out_features=256) 106 | 107 | self.fc2 = nn.Linear( 108 | in_features=256 + EMBEDDING_DIM, 109 | out_features=128) 110 | 111 | self.fc3 = nn.Linear(in_features=128, out_features=64) 112 | self.fc4 = nn.Linear(in_features=64, out_features=32) 113 | self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE) 114 | 115 | if EMBEDDING_DIM: 116 | self.pv_system_id_embedding = nn.Embedding( 117 | num_embeddings=940, 118 | embedding_dim=EMBEDDING_DIM) 119 | 120 | self.encoder_rnn = nn.GRU( 121 | # plus 1 for history 122 | input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + 1 + NWP_SIZE, 123 | hidden_size=RNN_HIDDEN_SIZE, 124 | num_layers=2, 125 | batch_first=True) 126 | self.decoder_rnn = nn.GRU( 127 | input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + NWP_SIZE, 128 | hidden_size=RNN_HIDDEN_SIZE, 129 | num_layers=2, 130 | batch_first=True) 131 | 132 | self.decoder_fc1 = nn.Linear( 133 | in_features=RNN_HIDDEN_SIZE, 134 | out_features=8) 135 | self.decoder_fc2 = nn.Linear( 136 | in_features=8, 137 | out_features=1) 138 | 139 | # EXTRA CHANNELS 140 | # Center marker 141 | new_batch_size = params['batch_size'] * TOTAL_SEQ_LEN 142 | self.center_marker = torch.zeros( 143 | ( 144 | new_batch_size, 145 | 1, 146 | params['image_size_pixels'], 147 | params['image_size_pixels'] 148 | ), 149 | dtype=torch.float32, device=self.device) 150 | half_width = params['image_size_pixels'] // 2 151 | self.center_marker[ 152 | ..., half_width-2:half_width+2, half_width-2:half_width+2] = 1 153 | 154 | # pixel x & y 155 | pixel_range = ( 156 | torch.arange(params['image_size_pixels'], device=self.device) 157 | - 64) / 37 158 | pixel_range = pixel_range.unsqueeze(0).unsqueeze(0) 159 | self.pixel_x = pixel_range.unsqueeze(-2).expand( 160 | new_batch_size, 1, params['image_size_pixels'], -1) 161 | self.pixel_y = pixel_range.unsqueeze(-1).expand( 162 | new_batch_size, 1, -1, params['image_size_pixels']) 163 | 164 | def forward(self, x): 165 | # ******************* Satellite imagery ************************* 166 | # Shape: batch_size, seq_length, width, height, channel 167 | # TODO: Use optical flow, not actual sat images of the future! 168 | sat_data = x['sat_data'] 169 | batch_size, seq_len, width, height, n_chans = sat_data.shape 170 | 171 | # Stack timesteps as extra examples 172 | new_batch_size = batch_size * seq_len 173 | # 0 1 2 3 174 | sat_data = sat_data.reshape(new_batch_size, width, height, n_chans) 175 | 176 | # Conv2d expects channels to be the 2nd dim! 177 | sat_data = sat_data.permute(0, 3, 1, 2) 178 | # Now shape: new_batch_size, n_chans, width, height 179 | 180 | # EXTRA CHANNELS 181 | # geo-spatial x 182 | x_coords = x['sat_x_coords'] # shape: batch_size, image_size_pixels 183 | x_coords = x_coords - SAT_X_MEAN 184 | x_coords = x_coords / SAT_X_STD 185 | x_coords = x_coords.unsqueeze(1).expand(-1, width, -1).unsqueeze(1) 186 | x_coords = x_coords.repeat_interleave(repeats=TOTAL_SEQ_LEN, dim=0) 187 | 188 | # geo-spatial y 189 | y_coords = x['sat_y_coords'] # shape: batch_size, image_size_pixels 190 | y_coords = y_coords - SAT_Y_MEAN 191 | y_coords = y_coords / SAT_Y_STD 192 | y_coords = y_coords.unsqueeze(-1).expand(-1, -1, height).unsqueeze(1) 193 | y_coords = y_coords.repeat_interleave(repeats=TOTAL_SEQ_LEN, dim=0) 194 | 195 | # Concat 196 | if sat_data.device != self.center_marker.device: 197 | self.center_marker = self.center_marker.to(sat_data.device) 198 | self.pixel_x = self.pixel_x.to(sat_data.device) 199 | self.pixel_y = self.pixel_y.to(sat_data.device) 200 | 201 | sat_data = torch.cat( 202 | ( 203 | sat_data, self.center_marker, 204 | x_coords, y_coords, self.pixel_x, self.pixel_y 205 | ), 206 | dim=1) 207 | 208 | del x_coords, y_coords 209 | 210 | # Pass data through the network :) 211 | out = F.relu(self.sat_conv1(sat_data)) 212 | out = F.relu(self.sat_conv2(out)) 213 | out = F.relu(self.sat_conv3(out)) 214 | 215 | out = out.reshape(new_batch_size, CNN_OUTPUT_SIZE) 216 | out = F.relu(self.fc1(out)) 217 | 218 | # ********************** Embedding of PV system ID ******************** 219 | if EMBEDDING_DIM: 220 | pv_row = x['pv_system_row_number'].repeat_interleave(TOTAL_SEQ_LEN) 221 | pv_embedding = self.pv_system_id_embedding(pv_row) 222 | out = torch.cat( 223 | ( 224 | out, 225 | pv_embedding 226 | ), 227 | dim=1) 228 | 229 | # Fully connected layers. 230 | out = F.relu(self.fc2(out)) 231 | out = F.relu(self.fc3(out)) 232 | out = F.relu(self.fc4(out)) 233 | out = F.relu(self.fc5(out)) 234 | 235 | # ******************* PREP DATA FOR RNN ******************************* 236 | out = out.reshape(batch_size, TOTAL_SEQ_LEN, FC_OUTPUT_SIZE) 237 | 238 | # The RNN encoder gets recent history: satellite, NWP, 239 | # datetime features, and recent PV history. The RNN decoder 240 | # gets what we know about the future: satellite, NWP, and 241 | # datetime features. 242 | 243 | # *********************** NWP Data ************************************ 244 | # Shape: batch_size, channel, seq_length, width, height 245 | nwp_data = x['nwp'].float() 246 | # RNN expects seq_len to be dim 1. 247 | nwp_data = nwp_data.permute(0, 2, 1, 3, 4) 248 | batch_size, nwp_seq_len, n_nwp_chans, nwp_width, nwp_height = ( 249 | nwp_data.shape) 250 | nwp_data = nwp_data.reshape( 251 | batch_size, nwp_seq_len, n_nwp_chans * nwp_width * nwp_height) 252 | 253 | # Concat 254 | rnn_input = torch.cat( 255 | ( 256 | out, 257 | nwp_data, 258 | x['hour_of_day_sin'].unsqueeze(-1), 259 | x['hour_of_day_cos'].unsqueeze(-1), 260 | x['day_of_year_sin'].unsqueeze(-1), 261 | x['day_of_year_cos'].unsqueeze(-1), 262 | ), 263 | dim=2) 264 | 265 | pv_yield_history = x['pv_yield'][:, :self.history_len+1].unsqueeze(-1) 266 | encoder_input = torch.cat( 267 | ( 268 | rnn_input[:, :self.history_len+1], 269 | pv_yield_history 270 | ), 271 | dim=2) 272 | 273 | encoder_output, encoder_hidden = self.encoder_rnn(encoder_input) 274 | decoder_output, _ = self.decoder_rnn( 275 | rnn_input[:, -self.forecast_len:], encoder_hidden) 276 | # decoder_output is shape batch_size, seq_len, rnn_hidden_size 277 | 278 | decoder_output = F.relu(self.decoder_fc1(decoder_output)) 279 | decoder_output = self.decoder_fc2(decoder_output) 280 | 281 | return decoder_output.squeeze() 282 | 283 | def _training_or_validation_step(self, batch, is_train_step): 284 | y_hat = self(batch) 285 | y = batch['pv_yield'][:, -self.forecast_len:] 286 | mse_loss = F.mse_loss(y_hat, y) 287 | nmae_loss = (y_hat - y).abs().mean() 288 | # TODO: Compute correlation coef using np.corrcoef(tensor with 289 | # shape (2, num_timesteps))[0, 1] on each example, and taking 290 | # the mean across the batch? 291 | tag = "Train" if is_train_step else "Validation" 292 | self.log_dict( 293 | {f'MSE/{tag}': mse_loss}, on_step=is_train_step, on_epoch=True) 294 | self.log_dict( 295 | {f'NMAE/{tag}': nmae_loss}, on_step=is_train_step, on_epoch=True) 296 | 297 | return nmae_loss 298 | 299 | def training_step(self, batch, batch_idx): 300 | return self._training_or_validation_step(batch, is_train_step=True) 301 | 302 | def validation_step(self, batch, batch_idx): 303 | if batch_idx == 0: 304 | # Plot example 305 | model_output = self(batch) 306 | fig = plot_example( 307 | batch, model_output, history_len=params['history_len'], 308 | forecast_len=params['forecast_len'], 309 | nwp_channels=params['nwp_channels']) 310 | self.logger.experiment['validation/plot'].log(File.as_image(fig)) 311 | 312 | return self._training_or_validation_step(batch, is_train_step=False) 313 | 314 | def configure_optimizers(self): 315 | optimizer = torch.optim.Adam(self.parameters(), lr=0.001) 316 | return optimizer 317 | 318 | 319 | def main(): 320 | train_dataloader = get_dataloaders() 321 | model = LitModel() 322 | logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield') 323 | logger.log_hyperparams(params) 324 | _LOG.info(f'logger.version = {logger.version}') 325 | trainer = pl.Trainer(gpus=1, max_epochs=10_000, logger=logger) 326 | trainer.fit(model, train_dataloader) 327 | 328 | 329 | if __name__ == '__main__': 330 | main() 331 | -------------------------------------------------------------------------------- /experiments/003_perceiver_processes_single_sat_image_then_rnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | import os 5 | 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | import pytorch_lightning as pl 10 | 11 | from predict_pv_yield.netcdf_dataset import NetCDFDataset, worker_init_fn 12 | from predict_pv_yield.visualisation import plot_example 13 | 14 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 15 | from neptune.new.types import File 16 | 17 | from perceiver_pytorch import Perceiver 18 | 19 | import logging 20 | logging.basicConfig() 21 | _LOG = logging.getLogger('predict_pv_yield') 22 | _LOG.setLevel(logging.DEBUG) 23 | 24 | 25 | params = dict( 26 | # DATA 27 | # TODO: Everything that relates to the dataset should come automatically 28 | # from a yaml file stored with the dataset. 29 | batch_size=32, 30 | history_len=6, #: Number of timesteps of history, not including t0. 31 | forecast_len=12, #: Number of timesteps of forecast. 32 | image_size_pixels=64, 33 | nwp_channels=( 34 | 't', 'dswrf', 'prate', 'r', 'sde', 'si10', 'vis', 'lcc', 'mcc', 'hcc'), 35 | sat_channels=( 36 | 'HRV', 'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120', 37 | 'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073'), 38 | 39 | # TRAINING 40 | precision=16, #: 16, 32, or 64-bit precision for data. 41 | val_check_interval=1_000, #: Check validation this many batches, or proportion of the epoch 42 | ) 43 | 44 | 45 | SAT_X_MEAN = np.float32(309000) 46 | SAT_X_STD = np.float32(316387.42073603) 47 | SAT_Y_MEAN = np.float32(519000) 48 | SAT_Y_STD = np.float32(406454.17945938) 49 | 50 | 51 | TOTAL_SEQ_LEN = params['history_len'] + params['forecast_len'] + 1 52 | EMBEDDING_DIM = 16 53 | NWP_SIZE = len(params['nwp_channels']) * 2 * 2 # channels x width x height 54 | N_DATETIME_FEATURES = 4 55 | PERCEIVER_OUTPUT_SIZE = 512 56 | FC_OUTPUT_SIZE = 8 57 | RNN_HIDDEN_SIZE = 16 58 | 59 | 60 | def get_dataloaders(): 61 | DATA_PATH = 'gs://solar-pv-nowcasting-data/prepared_ML_training_data/v3/' 62 | TEMP_PATH = '/home/jack/temp/' 63 | 64 | train_dataset = NetCDFDataset( 65 | 24_900, 66 | os.path.join(DATA_PATH, 'train'), 67 | os.path.join(TEMP_PATH, 'train')) 68 | 69 | validation_dataset = NetCDFDataset( 70 | 900, 71 | os.path.join(DATA_PATH, 'validation'), 72 | os.path.join(TEMP_PATH, 'validation')) 73 | 74 | dataloader_config = dict( 75 | pin_memory=True, 76 | num_workers=16, 77 | prefetch_factor=8, 78 | worker_init_fn=worker_init_fn, 79 | persistent_workers=True, 80 | 81 | # Disable automatic batching because dataset 82 | # returns complete batches. 83 | batch_size=None, 84 | ) 85 | 86 | train_dataloader = torch.utils.data.DataLoader( 87 | train_dataset, **dataloader_config) 88 | 89 | validation_dataloader = torch.utils.data.DataLoader( 90 | validation_dataset, **dataloader_config) 91 | 92 | return train_dataloader, validation_dataloader 93 | 94 | 95 | class LitModel(pl.LightningModule): 96 | def __init__( 97 | self, 98 | history_len=params['history_len'], 99 | forecast_len=params['forecast_len'], 100 | ): 101 | super().__init__() 102 | self.history_len = history_len 103 | self.forecast_len = forecast_len 104 | 105 | self.perceiver = Perceiver( 106 | input_channels=len(params['sat_channels']), 107 | input_axis=2, 108 | num_freq_bands=6, 109 | max_freq=10, 110 | depth=2, 111 | num_latents=128, 112 | latent_dim=64, 113 | num_classes=PERCEIVER_OUTPUT_SIZE, 114 | ) 115 | 116 | self.fc1 = nn.Linear( 117 | in_features=PERCEIVER_OUTPUT_SIZE, 118 | out_features=256) 119 | 120 | self.fc2 = nn.Linear( 121 | in_features=256 + EMBEDDING_DIM, 122 | out_features=128) 123 | 124 | self.fc3 = nn.Linear(in_features=128, out_features=64) 125 | self.fc4 = nn.Linear(in_features=64, out_features=32) 126 | self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE) 127 | 128 | if EMBEDDING_DIM: 129 | self.pv_system_id_embedding = nn.Embedding( 130 | num_embeddings=940, 131 | embedding_dim=EMBEDDING_DIM) 132 | 133 | self.encoder_rnn = nn.GRU( 134 | # plus 1 for history 135 | input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + 1 + NWP_SIZE, 136 | hidden_size=RNN_HIDDEN_SIZE, 137 | num_layers=2, 138 | batch_first=True) 139 | self.decoder_rnn = nn.GRU( 140 | input_size=FC_OUTPUT_SIZE + N_DATETIME_FEATURES + NWP_SIZE, 141 | hidden_size=RNN_HIDDEN_SIZE, 142 | num_layers=2, 143 | batch_first=True) 144 | 145 | self.decoder_fc1 = nn.Linear( 146 | in_features=RNN_HIDDEN_SIZE, 147 | out_features=8) 148 | self.decoder_fc2 = nn.Linear( 149 | in_features=8, 150 | out_features=1) 151 | 152 | def forward(self, x): 153 | # ******************* Satellite imagery ************************* 154 | # Shape: batch_size, seq_length, width, height, channel 155 | # TODO: Use optical flow, not actual sat images of the future! 156 | sat_data = x['sat_data'] 157 | batch_size, seq_len, width, height, n_chans = sat_data.shape 158 | 159 | # Stack timesteps as examples (to make a large batch) 160 | new_batch_size = batch_size * seq_len 161 | # 0 1 2 3 162 | sat_data = sat_data.reshape(new_batch_size, width, height, n_chans) 163 | 164 | # Pass data through the network :) 165 | out = self.perceiver(sat_data) 166 | 167 | out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE) 168 | out = F.relu(self.fc1(out)) 169 | 170 | # ********************** Embedding of PV system ID ******************** 171 | if EMBEDDING_DIM: 172 | pv_row = x['pv_system_row_number'].repeat_interleave(TOTAL_SEQ_LEN) 173 | pv_embedding = self.pv_system_id_embedding(pv_row) 174 | out = torch.cat( 175 | ( 176 | out, 177 | pv_embedding 178 | ), 179 | dim=1) 180 | 181 | # Fully connected layers. 182 | out = F.relu(self.fc2(out)) 183 | out = F.relu(self.fc3(out)) 184 | out = F.relu(self.fc4(out)) 185 | out = F.relu(self.fc5(out)) 186 | 187 | # ******************* PREP DATA FOR RNN ******************************* 188 | out = out.reshape(batch_size, TOTAL_SEQ_LEN, FC_OUTPUT_SIZE) 189 | 190 | # The RNN encoder gets recent history: satellite, NWP, 191 | # datetime features, and recent PV history. The RNN decoder 192 | # gets what we know about the future: satellite, NWP, and 193 | # datetime features. 194 | 195 | # *********************** NWP Data ************************************ 196 | # Shape: batch_size, channel, seq_length, width, height 197 | nwp_data = x['nwp'].float() 198 | # RNN expects seq_len to be dim 1. 199 | nwp_data = nwp_data.permute(0, 2, 1, 3, 4) 200 | batch_size, nwp_seq_len, n_nwp_chans, nwp_width, nwp_height = ( 201 | nwp_data.shape) 202 | nwp_data = nwp_data.reshape( 203 | batch_size, nwp_seq_len, n_nwp_chans * nwp_width * nwp_height) 204 | 205 | # Concat 206 | rnn_input = torch.cat( 207 | ( 208 | out, 209 | nwp_data, 210 | x['hour_of_day_sin'].unsqueeze(-1), 211 | x['hour_of_day_cos'].unsqueeze(-1), 212 | x['day_of_year_sin'].unsqueeze(-1), 213 | x['day_of_year_cos'].unsqueeze(-1), 214 | ), 215 | dim=2) 216 | 217 | pv_yield_history = x['pv_yield'][:, :self.history_len+1].unsqueeze(-1) 218 | encoder_input = torch.cat( 219 | ( 220 | rnn_input[:, :self.history_len+1], 221 | pv_yield_history 222 | ), 223 | dim=2) 224 | 225 | encoder_output, encoder_hidden = self.encoder_rnn(encoder_input) 226 | decoder_output, _ = self.decoder_rnn( 227 | rnn_input[:, -self.forecast_len:], encoder_hidden) 228 | # decoder_output is shape batch_size, seq_len, rnn_hidden_size 229 | 230 | decoder_output = F.relu(self.decoder_fc1(decoder_output)) 231 | decoder_output = self.decoder_fc2(decoder_output) 232 | 233 | return decoder_output.squeeze() 234 | 235 | def _training_or_validation_step(self, batch, is_train_step): 236 | y_hat = self(batch) 237 | y = batch['pv_yield'][:, -self.forecast_len:] 238 | mse_loss = F.mse_loss(y_hat, y) 239 | nmae_loss = (y_hat - y).abs().mean() 240 | # TODO: Compute correlation coef using np.corrcoef(tensor with 241 | # shape (2, num_timesteps))[0, 1] on each example, and taking 242 | # the mean across the batch? 243 | tag = "Train" if is_train_step else "Validation" 244 | self.log_dict( 245 | { 246 | f'MSE/{tag}': mse_loss, 247 | f'NMAE/{tag}': nmae_loss 248 | }, 249 | on_step=is_train_step, 250 | on_epoch=True, 251 | sync_dist=True # Required for distributed training (even multi-GPU on signle machine) 252 | ) 253 | 254 | return nmae_loss 255 | 256 | def training_step(self, batch, batch_idx): 257 | return self._training_or_validation_step(batch, is_train_step=True) 258 | 259 | def validation_step(self, batch, batch_idx): 260 | INTERESTING_EXAMPLES = (1, 5, 6, 7, 9, 11, 17, 19) 261 | name = f'validation/plot/epoch{self.current_epoch}' 262 | if batch_idx == 0: 263 | # Plot example 264 | model_output = self(batch) 265 | for example_i in INTERESTING_EXAMPLES: 266 | fig = plot_example( 267 | batch, model_output, history_len=params['history_len'], 268 | forecast_len=params['forecast_len'], 269 | nwp_channels=params['nwp_channels'], 270 | example_i=example_i, 271 | epoch=self.current_epoch) 272 | self.logger.experiment[name].log(File.as_image(fig)) 273 | fig.close() 274 | 275 | return self._training_or_validation_step(batch, is_train_step=False) 276 | 277 | def configure_optimizers(self): 278 | optimizer = torch.optim.Adam(self.parameters(), lr=0.0005) 279 | return optimizer 280 | 281 | 282 | def main(): 283 | train_dataloader, validation_dataloader = get_dataloaders() 284 | model = LitModel() 285 | logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield') 286 | logger.log_hyperparams(params) 287 | _LOG.info(f'logger.version = {logger.version}') 288 | trainer = pl.Trainer( 289 | gpus=-1, max_epochs=10_000, logger=logger, 290 | precision=params['precision'], 291 | val_check_interval=params['val_check_interval'], 292 | accelerator='ddp', 293 | plugins=pl.plugins.DDPPlugin(find_unused_parameters=False) 294 | ) 295 | trainer.fit(model, train_dataloader, validation_dataloader) 296 | 297 | 298 | if __name__ == '__main__': 299 | main() 300 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-17/run_cnn3d.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model import Model, params 2 | 3 | import os 4 | 5 | import torch.nn.functional as F 6 | import pytorch_lightning as pl 7 | 8 | from predict_pv_yield.data.dataloader import get_dataloaders 9 | 10 | from predict_pv_yield.visualisation.visualisation import plot_example 11 | 12 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 13 | 14 | import logging 15 | 16 | logging.basicConfig() 17 | _LOG = logging.getLogger("predict_pv_yield") 18 | _LOG.setLevel(logging.DEBUG) 19 | 20 | 21 | 22 | def main(): 23 | train_dataloader, validation_dataloader = get_dataloaders() 24 | model = Model() 25 | logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield') 26 | logger.log_hyperparams(params) 27 | _LOG.info(f'logger.version = {logger.version}') 28 | trainer = pl.Trainer(gpus=0, max_epochs=1, logger=logger) 29 | trainer.fit(model, train_dataloader) 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-18/run_baseline.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.baseline.last_value import Model 2 | from predict_pv_yield.data.dataloader import get_dataloaders 3 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 4 | 5 | import pytorch_lightning as pl 6 | import logging 7 | 8 | logging.basicConfig() 9 | _LOG = logging.getLogger("predict_pv_yield") 10 | _LOG.setLevel(logging.DEBUG) 11 | 12 | 13 | def main(): 14 | train_dataloader, validation_dataloader = get_dataloaders(n_train_data=10, n_validation_data=10) 15 | model = Model() 16 | logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield") 17 | _LOG.info(f"logger.version = {logger.version}") 18 | trainer = pl.Trainer(gpus=0, max_epochs=10, logger=logger) 19 | 20 | # dont need to train baseline model 21 | # trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) 22 | 23 | trainer.validate(model, validation_dataloader) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | 29 | 30 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-124/charts 31 | # 32 | # {'Validation: MAE': 0.08886486291885376, 'Validation: MSE': 0.02136283740401268} 33 | # 34 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-18/run_cnn3d.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytorch_lightning as pl 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 5 | 6 | from predict_pv_yield.data.dataloader import get_dataloaders 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default 8 | 9 | logging.basicConfig() 10 | _LOG = logging.getLogger("predict_pv_yield") 11 | _LOG.setLevel(logging.DEBUG) 12 | 13 | 14 | def main(): 15 | train_dataloader, validation_dataloader = get_dataloaders(n_train_data=10, n_validation_data=10) 16 | model = Model() 17 | logger = NeptuneLogger(project='OpenClimateFix/predict-pv-yield') 18 | logger.log_hyperparams(model_configuration_default) 19 | _LOG.info(f'logger.version = {logger.version}') 20 | trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger) 21 | trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) 22 | 23 | # run validation 24 | trainer.validate(model, validation_dataloader) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | 30 | 31 | # Managed to run it on GCP. 32 | # Results are logged to https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-120/monitoring 33 | # Notes: 34 | # 1. Large training set, and one epoch took a day, so should use GPU for this model. I was a bit suprised as I didnt 35 | # think the model was so big. 36 | # 2. Need to work on validationm general validation method. Good to base line against a really simple model. For 37 | # validation might need to think carefully about metrics that will be used. 38 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-24/run_cnn3d.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytorch_lightning as pl 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 5 | 6 | from predict_pv_yield.data.dataloader import get_dataloaders 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default 8 | 9 | logging.basicConfig() 10 | _LOG = logging.getLogger("predict_pv_yield") 11 | _LOG.setLevel(logging.DEBUG) 12 | 13 | _LOG = logging.getLogger("nowcasting_dataset") 14 | _LOG.setLevel(logging.INFO) 15 | 16 | 17 | def main(): 18 | train_dataloader, validation_dataloader = get_dataloaders( 19 | n_train_data=24900, 20 | n_validation_data=1000, 21 | data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/", 22 | cloud="gcp", 23 | ) 24 | model = Model() 25 | 26 | logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield") 27 | logger.log_hyperparams(model_configuration_default) 28 | _LOG.info(f"logger.version = {logger.version}") 29 | trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger) 30 | trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) 31 | 32 | # run validation 33 | trainer.validate(model, validation_dataloader) 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | 39 | 40 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-130/monitoring 41 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-24/run_cnn3d_n_layers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytorch_lightning as pl 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 5 | 6 | from predict_pv_yield.data.dataloader import get_dataloaders 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default 8 | 9 | logging.basicConfig() 10 | _LOG = logging.getLogger("predict_pv_yield") 11 | _LOG.setLevel(logging.DEBUG) 12 | 13 | _LOG = logging.getLogger("nowcasting_dataset") 14 | _LOG.setLevel(logging.INFO) 15 | 16 | 17 | def main(): 18 | train_dataloader, validation_dataloader = get_dataloaders( 19 | n_train_data=24900, 20 | n_validation_data=1000, 21 | data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/", 22 | cloud="gcp", 23 | ) 24 | 25 | model_configuration = dict(conv3d_channels=8, kennel=3, number_of_conv3d_layers=6) 26 | model = Model(model_configuration=model_configuration) 27 | 28 | logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield") 29 | logger.log_hyperparams(model_configuration_default) 30 | _LOG.info(f"logger.version = {logger.version}") 31 | trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger) 32 | trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) 33 | 34 | # run validation 35 | trainer.validate(model, validation_dataloader) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | 41 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-133/monitoring 42 | 43 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-132/monitoring 44 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-131/monitoring 45 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-26/run_cnn3d_n_layers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytorch_lightning as pl 4 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 5 | 6 | from predict_pv_yield.data.dataloader import get_dataloaders 7 | from predict_pv_yield.models.conv3d.model import Model, model_configuration_default 8 | 9 | logging.basicConfig() 10 | _LOG = logging.getLogger("predict_pv_yield") 11 | _LOG.setLevel(logging.DEBUG) 12 | 13 | _LOG = logging.getLogger("nowcasting_dataset") 14 | _LOG.setLevel(logging.INFO) 15 | 16 | 17 | def main(): 18 | train_dataloader, validation_dataloader = get_dataloaders( 19 | n_train_data=2000, 20 | n_validation_data=1000, 21 | data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/", 22 | cloud="gcp", 23 | ) 24 | 25 | model_configuration = dict(conv3d_channels=8, kennel=3, number_of_conv3d_layers=6) 26 | model = Model(model_configuration=model_configuration) 27 | 28 | logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield") 29 | logger.log_hyperparams(model_configuration_default) 30 | _LOG.info(f"logger.version = {logger.version}") 31 | trainer = pl.Trainer(gpus=1, max_epochs=10, logger=logger) 32 | trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) 33 | 34 | # run validation 35 | trainer.validate(model, validation_dataloader) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | 41 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-137/charts 42 | # ran with 2000 train data 43 | 44 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-138/charts 45 | # ran with 10000 in train data 46 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-27/experiments.md: -------------------------------------------------------------------------------- 1 | # Daily Experiments 2 | 3 | Ran hydra for the first time, for hyper parameters optermization. 4 | It did 2 full runs, then I think ran out of memory caused a funny error. 5 | Now have install 'psutil' so that cpu and memory is logged to neptune. 6 | 7 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-160/monitoring 8 | Validation error after 10 epochs - 0.073 9 | 10 | conv3d_channels = 32 11 | fc1_output_features = 16 12 | fc2_output_features = 128 13 | fc3_output_features = 16 14 | 15 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-161/monitoring 16 | Validation error after 10 epochs - 0.073 17 | 18 | conv3d_channels = 32 19 | fc1_output_features = 32 20 | fc2_output_features = 16 21 | fc3_output_features = 16 22 | 23 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-162/monitoring 24 | Validation error after 2 epochs - 0.076 (then error happened in 3rd epoch) 25 | 26 | conv3d_channels = 32 27 | fc1_output_features = 64 28 | fc2_output_features = 16 29 | fc3_output_features = 8 30 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-27/run_baseline.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.baseline.last_value import Model 2 | from predict_pv_yield.data.dataloader import get_dataloaders 3 | from neptune.new.integrations.pytorch_lightning import NeptuneLogger 4 | 5 | import pytorch_lightning as pl 6 | import logging 7 | 8 | logging.basicConfig() 9 | _LOG = logging.getLogger("predict_pv_yield") 10 | _LOG.setLevel(logging.DEBUG) 11 | 12 | 13 | def main(): 14 | train_dataloader, validation_dataloader = get_dataloaders(n_validation_data=1000, cloud='aws') 15 | model = Model() 16 | # logger = NeptuneLogger(project="OpenClimateFix/predict-pv-yield") 17 | # _LOG.info(f"logger.version = {logger.version}") 18 | trainer = pl.Trainer(gpus=0, max_epochs=10) 19 | 20 | # dont need to train baseline model 21 | # trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=validation_dataloader) 22 | 23 | trainer.validate(model, validation_dataloader) 24 | 25 | 26 | if __name__ == "__main__": 27 | main() 28 | 29 | 30 | # https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-124/charts 31 | # 32 | # {'Validation: MAE': 0.08886486291885376, 'Validation: MSE': 0.02136283740401268} 33 | # 34 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-31/conv3d.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model import Model 2 | 3 | from predict_pv_yield.data.dataloader import get_dataloaders 4 | from pytorch_lightning.utilities.cloud_io import load as pl_load 5 | import torch 6 | 7 | weights = './weights/conv3d/last.ckpt' 8 | checkpoint = pl_load(weights, map_location=torch.device('cpu')) 9 | 10 | model = Model(conv3d_channels=32, 11 | fc1_output_features=32, 12 | fc2_output_features=16, 13 | fc3_output_features=16, 14 | include_time=False, 15 | number_of_conv3d_layers=4) 16 | model.load_from_checkpoint(weights) 17 | 18 | train_dataset, validation_dataset = get_dataloaders() 19 | -------------------------------------------------------------------------------- /experiments/2021-08/2021-08-31/experiments.txt: -------------------------------------------------------------------------------- 1 | Ran Perceiver model 2 | 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-186/charts 4 | 5 | Ran it using batch size 8, 6 | Each epoch taking about 3 hours when running on GCP - n1-standard-8, NVIDIA Tesla P100 7 | -------------------------------------------------------------------------------- /experiments/2021-09/2021-09-03/conv3d.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model import Model 2 | 3 | from predict_pv_yield.data.dataloader import get_dataloaders 4 | from pytorch_lightning.utilities.cloud_io import load as pl_load 5 | import torch 6 | import pandas as pd 7 | 8 | from predict_pv_yield.visualisation.line import plot_one_result, plot_batch_results 9 | 10 | weights = "./weights/conv3d/epoch_009.ckpt" 11 | checkpoint = pl_load(weights, map_location=torch.device("cpu")) 12 | 13 | model = Model( 14 | conv3d_channels=32, 15 | fc1_output_features=128, 16 | fc2_output_features=128, 17 | fc3_output_features=64, 18 | include_time=True, 19 | forecast_len=12, 20 | history_len=6, 21 | number_of_conv3d_layers=6, 22 | ) 23 | model.load_state_dict(checkpoint["state_dict"]) 24 | 25 | train_dataset, validation_dataset = get_dataloaders( 26 | cloud="gcp", data_path="gs://solar-pv-nowcasting-data/prepared_ML_training_data/v4/" 27 | ) 28 | validation_dataset = iter(validation_dataset) 29 | x = next(validation_dataset) 30 | 31 | y_hat_all = model(x) 32 | 33 | # plot one 34 | batch_index = 0 35 | y = x["pv_yield"][batch_index][7:, 0].detach().numpy() 36 | y_hat = y_hat_all[batch_index].detach().numpy() 37 | time = pd.to_datetime(x["sat_datetime_index"][batch_index][7:].detach().numpy(), unit="s") 38 | 39 | fig = plot_one_result(x=time, y=y, y_hat=y_hat) 40 | fig.show(renderer="browser") 41 | 42 | # plot all of batch 43 | y = x["pv_yield"][:, 7:, 0].detach().numpy() 44 | y_hat = y_hat_all.detach().numpy() 45 | time = [pd.to_datetime(x, unit="s") for x in x["sat_datetime_index"][:, 7:].detach().numpy()] 46 | 47 | fig = plot_batch_results(x=time, y=y, y_hat=y_hat, model_name=model.name) 48 | fig.show(renderer="browser") 49 | -------------------------------------------------------------------------------- /experiments/2021-09/2021-09-03/experiments.txt: -------------------------------------------------------------------------------- 1 | Ran perciever RNN model 2 | 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-245/charts 4 | 5 | Includes validation images, so we can see how the model is perforaming after in epoch 6 | 7 | due memory of gpu had to go 8 | 9 | forecast_len: 12 10 | history_len: 6 11 | batch_size: 8 12 | num_latents: 32 13 | latent_dim: 32 14 | embedding_dem: 10 15 | -------------------------------------------------------------------------------- /experiments/2021-09/2021-09-24/experiments.txt: -------------------------------------------------------------------------------- 1 | # Baseline 2 | 3 | Ran baseline on new v6 GCP dataset 4 | 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-283/monitoring 6 | 7 | Takes about 4 minutes to run the validation epoch 8 | 9 | This is just for forecast 1 timestep in the future 10 | MAE = 0.0562 11 | 12 | # Conv3d 13 | 14 | ran not using nwp data, or datetime features 15 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-314/monitoring 16 | 17 | MAE = 0.0401 18 | -------------------------------------------------------------------------------- /experiments/2021-09/2021-09-27/experiments.txt: -------------------------------------------------------------------------------- 1 | # Baseline 2 | 3 | Ran baseline on new v6 GCP dataset 4 | 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-283/monitoring 6 | 7 | Takes about 4 minutes to run the validation epoch 8 | 9 | This is just for forecast 1 timestep in the future 10 | MAE = 0.0562 11 | 12 | # Conv3d 13 | 14 | ran not using nwp data, or datetimte features 15 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-314/monitoring 16 | 17 | MAE = 0.0401 18 | 19 | # Conv3d (Sat and NWP) 20 | 21 | Using both sat and nwp into two separate convolution nets. 22 | 23 | https://app.neptune.ai/OpenClimateFix/predict-pv-yield/e/PRED-320 24 | 25 | MAE = 0.0376 - this was after 10 epochs, and I think it was still going down. 26 | -------------------------------------------------------------------------------- /experiments/2021-09/2021-09-28/experiments.txt: -------------------------------------------------------------------------------- 1 | 1. Perceiver NWP SAT 2 | 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-331/monitoring 4 | 5 | Ran with 6 | - batch_size of 6, as GPU was out of memory 7 | - num_latents: int = 64, 8 | - latent_dim: int = 64, 9 | - embedding_dem: int = 0, 10 | 11 | Each epoch takes about 3 hours 12 | 13 | Decided to stop it earlier 14 | 15 | 1. Perceiver Conv3d NWP SAT 16 | 17 | Idea is to have 1 conv3d + max pool later before the perceiver model 18 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-331/monitoring 19 | 20 | Conv3d did not make much memory difference, the biggest being, changing the 21 | - num_latents 22 | - latent_dim 23 | 24 | To get batch 32, set 25 | - num_latents = 16 26 | - latent_dim = 16 27 | - PERCEIVER_OUTPUT_SIZE = 512 28 | OR 29 | To get batch 32, set 30 | - num_latents = 24 31 | - latent_dim = 24 32 | - PERCEIVER_OUTPUT_SIZE = 128 33 | 34 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-349/monitoring 35 | 36 | ~ 4 hours per epoch 37 | 38 | MAE = 0.0308 (after 10 epochs) 39 | -------------------------------------------------------------------------------- /experiments/2021-10/2021-10-01/experiment.txt: -------------------------------------------------------------------------------- 1 | 1. Perceiver Conv3d NWP SAT 2 | 3 | No future satellite images 4 | 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-378/charts 6 | 7 | ~ 4 hours per epoch 8 | 9 | MAE = 0.0365 (after 22 epochs), compared to MAE 0.0304 when future satellite images were included 10 | -------------------------------------------------------------------------------- /experiments/2021-11/2021-11-22.txt: -------------------------------------------------------------------------------- 1 | 1. Conv3d - no nwp 2 | n_training_batches= 450 3 | n_test_batches= 450 4 | 5 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-433/charts 6 | 7 | 2. Con3d with nwp 8 | 9 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-440/charts 10 | 11 | 3. Con3d with nwp 12 | 13 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-445/ 14 | 15 | n_training_batches = 769 - limited by nwp data 16 | n_test_batches= 400 (from training set) 17 | -------------------------------------------------------------------------------- /experiments/2021-11/2021-11-25.txt: -------------------------------------------------------------------------------- 1 | 1. Baseline model 2 | 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-493/monitoring 4 | 5 | on test set of 400 6 | 7 | DATALOADER:0 VALIDATE RESULTS 8 | {'MAE_EXP/Validation': 1.8552579879760742, 9 | 'MAE_EXP/Validation_epoch': 1.8552579879760742, 10 | 'MSE/Validation': 0.006537176202982664, 11 | 'MSE/Validation_epoch': 0.006537176202982664, 12 | 'MSE_EXP/Validation': 0.20918963849544525, 13 | 'MSE_EXP/Validation_epoch': 0.20918963849544525, 14 | 'MSE_forecast_horizon_0/Validation': 0.05797681212425232, 15 | 'MSE_forecast_horizon_0/Validation_epoch': 0.05797681212425232, 16 | 'NMAE/Validation': 0.05797681212425232, 17 | 'NMAE/Validation_epoch': 0.05797681212425232} 18 | 19 | 20 | 2. conv3d sat and nwp 21 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-490/monitoring 22 | 23 | this is with no nwp data NMAE/Validation ~ 0.0676 24 | 25 | data: 26 | sat (no hrv) 27 | gsp history 28 | 29 | Total epochs: 4 30 | 31 | 3. https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-504/monitoring 32 | 33 | this is with nwp data NMAE/Validation ~ 0.0601 34 | 35 | data: 36 | nwp 37 | sat (no hrv) 38 | gsp history 39 | 40 | Total epochs: 4 41 | 42 | 43 | 4. https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-523/all 44 | 45 | data: 46 | nwp 47 | sat (no hrv) 48 | gsp history 49 | pv history 50 | 51 | After 1 epoch: NMAE/Validation ~ 0.0597 52 | 53 | 5. https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-525/all 54 | 55 | data: 56 | nwp 57 | sat (no hrv) 58 | no gsp history 59 | pv history 60 | 61 | After X epoch: NMAE/Validation 62 | -------------------------------------------------------------------------------- /notebooks/debug_gcsfs_multiprocessing_issue.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ecf5eff1-679a-42f1-b153-46e93cdf58bf", 6 | "metadata": {}, 7 | "source": [ 8 | "Code experiments for my GCSFS bug report: https://github.com/dask/gcsfs/issues/379\n", 9 | "\n", 10 | "```shell\n", 11 | "conda create --name test_gcsfs python=3.8 gcsfs ipykernel\n", 12 | "```" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "adequate-virgin", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import multiprocessing\n", 23 | "from concurrent.futures import ProcessPoolExecutor\n", 24 | "import gcsfs\n", 25 | "import time\n", 26 | "gcsfs.__version__" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "02197441-5221-42e3-910f-b56f3b02992a", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "multiprocessing.set_start_method('spawn')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "id": "moderate-escape", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "gcs = gcsfs.GCSFileSystem() # Works fine!" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "id": "7e9685f6-8ad5-4c1c-ad0c-edcf391ef0e5", 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "fs_map = gcs.get_mapper('solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16')" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "id": "5b7c3007-b02e-4374-95ac-c33b4bb20188", 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "ItemsView()" 69 | ] 70 | }, 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "fs_map.items()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 6, 83 | "id": "05d76d01-e477-4cf5-9faa-d8d122fc7bf4", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def process_pool():\n", 88 | " with ProcessPoolExecutor(max_workers=1) as executor:\n", 89 | " for i in range(8):\n", 90 | " future = executor.submit(gcsfs.GCSFileSystem)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 7, 96 | "id": "041cdc7c-111a-4415-a097-d4eb8f0ca818", 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "process_pool()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 8, 106 | "id": "449057a8-d16a-478d-b8a5-4c033ce92cef", 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "process_pool()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 3, 116 | "id": "a58c4aa8-51be-4fff-ad67-a8652f4c8b71", 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "def worker_loop(results_queue):\n", 121 | " print('worker')\n", 122 | " \n", 123 | " \n", 124 | " #gcs = gcsfs.GCSFileSystem()\n", 125 | " \n", 126 | " while True:\n", 127 | " results_queue.put(None)\n", 128 | " time.sleep(1)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 4, 134 | "id": "723d464a-49f2-44ef-88c2-b785952f15ff", 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "workers = []\n", 139 | "results_queue = multiprocessing.Queue()\n", 140 | "for _ in range(4):\n", 141 | " worker = multiprocessing.Process(\n", 142 | " target=worker_loop,\n", 143 | " args=(results_queue,)\n", 144 | " )\n", 145 | " worker.daemon = True\n", 146 | " worker.start()\n", 147 | " workers.append(worker)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 5, 153 | "id": "9789c6df-a20b-4a04-84a6-5588cd9e95ad", 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "" 160 | ] 161 | }, 162 | "execution_count": 5, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "workers[0]" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "id": "c6a4622d-0e03-4c82-87be-0f6a48ce769e", 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "results_queue.get()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "2c18440d-05bc-4113-b6f1-b16b45b26dc3", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "test_gcsfs", 193 | "language": "python", 194 | "name": "test_gcsfs" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.8.8" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 5 211 | } 212 | -------------------------------------------------------------------------------- /predict_pv_yield/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/__init__.py -------------------------------------------------------------------------------- /predict_pv_yield/data/dataloader.py: -------------------------------------------------------------------------------- 1 | import os 2 | from nowcasting_dataloader.datasets import NetCDFDataset, worker_init_fn 3 | from nowcasting_dataloader.fake import FakeDataset 4 | from nowcasting_dataset.config.load import load_yaml_configuration 5 | from typing import Tuple 6 | import logging 7 | import torch 8 | from pytorch_lightning import LightningDataModule 9 | 10 | 11 | 12 | _LOG = logging.getLogger(__name__) 13 | _LOG.setLevel(logging.DEBUG) 14 | 15 | torch.set_default_dtype(torch.float32) 16 | 17 | 18 | def get_dataloaders( 19 | n_train_data: int = 24900, 20 | n_validation_data: int = 900, 21 | cloud: str = "gcp", 22 | temp_path=".", 23 | data_path="prepared_ML_training_data/v4/", 24 | ) -> Tuple: 25 | 26 | configuration = load_yaml_configuration(filename=f'{data_path}/configuration.yaml') 27 | 28 | data_module = NetCDFDataModule( 29 | temp_path=temp_path, data_path=data_path, cloud=cloud, n_train_data=n_train_data, n_val_data=n_validation_data 30 | ) 31 | 32 | train_dataloader = data_module.train_dataloader() 33 | validation_dataloader = data_module.val_dataloader() 34 | 35 | return train_dataloader, validation_dataloader 36 | 37 | 38 | class NetCDFDataModule(LightningDataModule): 39 | """ 40 | Example of LightningDataModule for NETCDF dataset. 41 | A DataModule implements 5 key methods: 42 | - prepare_data (things to do on 1 GPU/TPU, not on every GPU/TPU in distributed mode) 43 | - setup (things to do on every accelerator in distributed mode) 44 | - train_dataloader (the training dataloader) 45 | - val_dataloader (the validation dataloader(s)) 46 | - test_dataloader (the test dataloader(s)) 47 | This allows you to share a full dataset without explaining how to download, 48 | split, transform and process the data. 49 | Read the docs: 50 | https://pytorch-lightning.readthedocs.io/en/latest/extensions/datamodules.html 51 | """ 52 | 53 | def __init__( 54 | self, 55 | temp_path: str = ".", 56 | n_train_data: int = 24900, 57 | n_val_data: int = 1000, 58 | cloud: str = "aws", 59 | num_workers: int = 8, 60 | pin_memory: bool = True, 61 | data_path="prepared_ML_training_data/v4/", 62 | fake_data: bool = False, 63 | ): 64 | """ 65 | fake_data: random data is created and used instead. This is useful for testing 66 | """ 67 | super().__init__() 68 | 69 | self.temp_path = temp_path 70 | self.data_path = data_path 71 | self.cloud = cloud 72 | self.n_train_data = n_train_data 73 | self.n_val_data = n_val_data 74 | self.num_workers = num_workers 75 | self.pin_memory = pin_memory 76 | self.fake_data = fake_data 77 | 78 | filename = os.path.join(data_path, 'configuration.yaml') 79 | _LOG.debug(f'Will be loading the configuration file {filename}') 80 | self.configuration = load_yaml_configuration(filename=filename) 81 | 82 | self.dataloader_config = dict( 83 | pin_memory=self.pin_memory, 84 | num_workers=self.num_workers, 85 | prefetch_factor=8, 86 | worker_init_fn=worker_init_fn, 87 | persistent_workers=True, 88 | # Disable automatic batching because dataset 89 | # returns complete batches. 90 | batch_size=None, 91 | ) 92 | 93 | def train_dataloader(self): 94 | if self.fake_data: 95 | train_dataset = FakeDataset(configuration=self.configuration) 96 | else: 97 | train_dataset = NetCDFDataset( 98 | self.n_train_data, 99 | os.path.join(self.data_path, "train"), 100 | os.path.join(self.temp_path, "train"), 101 | configuration=self.configuration 102 | ) 103 | 104 | return torch.utils.data.DataLoader(train_dataset, **self.dataloader_config) 105 | 106 | def val_dataloader(self): 107 | if self.fake_data: 108 | val_dataset = FakeDataset(configuration=self.configuration) 109 | else: 110 | val_dataset = NetCDFDataset( 111 | self.n_val_data, 112 | os.path.join(self.data_path, "test"), 113 | os.path.join(self.temp_path, "test"), 114 | configuration=self.configuration 115 | ) 116 | 117 | return torch.utils.data.DataLoader(val_dataset, **self.dataloader_config) 118 | 119 | def test_dataloader(self): 120 | if self.fake_data: 121 | test_dataset = FakeDataset(configuration=self.configuration) 122 | else: 123 | # TODO need to change this to a test folder 124 | test_dataset = NetCDFDataset( 125 | self.n_val_data, 126 | os.path.join(self.data_path, "test"), 127 | os.path.join(self.temp_path, "test"), 128 | configuration=self.configuration 129 | ) 130 | 131 | return torch.utils.data.DataLoader(test_dataset, **self.dataloader_config) 132 | -------------------------------------------------------------------------------- /predict_pv_yield/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/__init__.py -------------------------------------------------------------------------------- /predict_pv_yield/models/base_model.py: -------------------------------------------------------------------------------- 1 | import pytorch_lightning as pl 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | from nowcasting_utils.visualization.visualization import plot_example 6 | from nowcasting_utils.visualization.line import plot_batch_results 7 | from nowcasting_dataset.data_sources.nwp.nwp_data_source import NWP_VARIABLE_NAMES 8 | from nowcasting_utils.models.loss import WeightedLosses 9 | from nowcasting_utils.models.metrics import mae_each_forecast_horizon, mse_each_forecast_horizon 10 | from nowcasting_dataloader.batch import BatchML 11 | from nowcasting_utils.metrics.validation import make_validation_results, save_validation_results_to_logger 12 | 13 | import pandas as pd 14 | import numpy as np 15 | 16 | import logging 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | activities = [torch.profiler.ProfilerActivity.CPU] 21 | if torch.cuda.is_available(): 22 | activities.append(torch.profiler.ProfilerActivity.CUDA) 23 | 24 | default_output_variable = "pv_yield" 25 | 26 | 27 | class BaseModel(pl.LightningModule): 28 | 29 | # default batch_size 30 | batch_size = 32 31 | 32 | # results file name 33 | results_file_name = "results_epoch" 34 | 35 | # list of results dataframes. This is used to save validation results 36 | results_dfs = [] 37 | 38 | def __init__(self): 39 | super().__init__() 40 | 41 | self.history_len_5 = ( 42 | self.history_minutes // 5 43 | ) # the number of historic timestemps for 5 minutes data 44 | self.forecast_len_5 = ( 45 | self.forecast_minutes // 5 46 | ) # the number of forecast timestemps for 5 minutes data 47 | 48 | self.history_len_30 = ( 49 | self.history_minutes // 30 50 | ) # the number of historic timestemps for 5 minutes data 51 | self.forecast_len_30 = ( 52 | self.forecast_minutes // 30 53 | ) # the number of forecast timestemps for 5 minutes data 54 | 55 | # the number of historic timesteps for 60 minutes data 56 | # Note that ceil is taken as for 30 minutes of history data, one history value will be used 57 | self.history_len_60 = int(np.ceil(self.history_minutes / 60)) 58 | self.forecast_len_60 = ( 59 | self.forecast_minutes // 60 60 | ) # the number of forecast timestemps for 60 minutes data 61 | 62 | if not hasattr(self, "output_variable"): 63 | print("setting") 64 | self.output_variable = default_output_variable 65 | 66 | if self.output_variable == "pv_yield": 67 | self.forecast_len = self.forecast_len_5 68 | self.history_len = self.history_len_5 69 | self.number_of_samples_per_batch = 128 70 | else: 71 | self.forecast_len = self.forecast_len_30 72 | self.history_len = self.history_len_30 73 | self.number_of_samples_per_batch = 32 74 | self.number_of_pv_samples_per_batch = 128 75 | 76 | self.weighted_losses = WeightedLosses(forecast_length=self.forecast_len) 77 | 78 | def _training_or_validation_step(self, batch, tag: str, return_model_outputs: bool = False): 79 | """ 80 | batch: The batch data 81 | tag: either 'Train', 'Validation' , 'Test' 82 | """ 83 | 84 | if type(batch) == dict: 85 | batch = BatchML(**batch) 86 | 87 | # put the batch data through the model 88 | y_hat = self(batch) 89 | 90 | # get the true result out. Select the first data point, as this is the pv system in the center of the image 91 | if self.output_variable == "gsp_yield": 92 | y = batch.gsp.gsp_yield 93 | else: 94 | y = batch.pv.pv_yield 95 | y = y[0 : self.batch_size, -self.forecast_len :, 0] 96 | 97 | # calculate mse, mae 98 | mse_loss = F.mse_loss(y_hat, y) 99 | nmae_loss = (y_hat - y).abs().mean() 100 | 101 | # calculate mse, mae with exp weighted loss 102 | mse_exp = self.weighted_losses.get_mse_exp(output=y_hat, target=y) 103 | mae_exp = self.weighted_losses.get_mae_exp(output=y_hat, target=y) 104 | 105 | # TODO: Compute correlation coef using np.corrcoef(tensor with 106 | # shape (2, num_timesteps))[0, 1] on each example, and taking 107 | # the mean across the batch? 108 | self.log_dict( 109 | { 110 | f"MSE/{tag}": mse_loss, 111 | f"NMAE/{tag}": nmae_loss, 112 | f"MSE_EXP/{tag}": mse_exp, 113 | f"MAE_EXP/{tag}": mae_exp, 114 | }, 115 | on_step=True, 116 | on_epoch=True, 117 | sync_dist=True # Required for distributed training 118 | # (even multi-GPU on signle machine). 119 | ) 120 | 121 | if tag != "Train": 122 | # add metrics for each forecast horizon 123 | mse_each_forecast_horizon_metric = mse_each_forecast_horizon(output=y_hat, target=y) 124 | mae_each_forecast_horizon_metric = mae_each_forecast_horizon(output=y_hat, target=y) 125 | 126 | metrics_mse = { 127 | f"MSE_forecast_horizon_{i}/{tag}": mse_each_forecast_horizon_metric[i] 128 | for i in range(self.forecast_len_30) 129 | } 130 | metrics_mae = { 131 | f"MSE_forecast_horizon_{i}/{tag}": mae_each_forecast_horizon_metric[i] 132 | for i in range(self.forecast_len_30) 133 | } 134 | 135 | self.log_dict( 136 | {**metrics_mse, **metrics_mae}, 137 | on_step=True, 138 | on_epoch=True, 139 | sync_dist=True # Required for distributed training 140 | # (even multi-GPU on signle machine). 141 | ) 142 | 143 | if return_model_outputs: 144 | return nmae_loss, y_hat 145 | else: 146 | return nmae_loss 147 | 148 | def training_step(self, batch, batch_idx): 149 | 150 | if (batch_idx == 0) and (self.current_epoch == 0): 151 | return self._training_or_validation_step(batch, tag="Train") 152 | else: 153 | return self._training_or_validation_step(batch, tag="Train") 154 | 155 | def validation_step(self, batch: BatchML, batch_idx): 156 | 157 | if type(batch) == dict: 158 | batch = BatchML(**batch) 159 | 160 | # get model outputs 161 | nmae_loss, model_output = self._training_or_validation_step( 162 | batch, tag="Validation", return_model_outputs=True 163 | ) 164 | 165 | INTERESTING_EXAMPLES = (1, 5, 6, 7, 9, 11, 17, 19) 166 | name = f"validation/plot/epoch_{self.current_epoch}_{batch_idx}" 167 | if batch_idx in [0, 1, 2, 3, 4]: 168 | 169 | # make sure the interesting example doesnt go above the batch size 170 | INTERESTING_EXAMPLES = (i for i in INTERESTING_EXAMPLES if i < self.batch_size) 171 | 172 | for example_i in INTERESTING_EXAMPLES: 173 | # 1. Plot example 174 | if 0: 175 | fig = plot_example( 176 | batch, 177 | model_output, 178 | history_minutes=self.history_len_5 * 5, 179 | forecast_minutes=self.forecast_len_5 * 5, 180 | nwp_channels=NWP_VARIABLE_NAMES, 181 | example_i=example_i, 182 | epoch=self.current_epoch, 183 | output_variable=self.output_variable, 184 | ) 185 | 186 | # save fig to log 187 | self.logger.experiment[-1].log_image(name, fig) 188 | try: 189 | fig.close() 190 | except Exception as _: 191 | # could not close figure 192 | pass 193 | 194 | # 2. plot summary batch of predictions and results 195 | # make x,y data 196 | if self.output_variable == "gsp_yield": 197 | y = batch.gsp.gsp_yield[0 : self.batch_size, :, 0].cpu().numpy() 198 | else: 199 | y = batch.pv.pv_yield[0 : self.batch_size, :, 0].cpu().numpy() 200 | y_hat = model_output[0 : self.batch_size].cpu().numpy() 201 | time = [ 202 | pd.to_datetime(x, unit="ns") 203 | for x in batch.gsp.gsp_datetime_index[0 : self.batch_size].cpu().numpy() 204 | ] 205 | time_hat = [ 206 | pd.to_datetime(x, unit="ns") 207 | for x in batch.gsp.gsp_datetime_index[ 208 | 0 : self.batch_size, self.history_len_30 + 1 : 209 | ] 210 | .cpu() 211 | .numpy() 212 | ] 213 | 214 | # plot and save to logger 215 | fig = plot_batch_results(model_name=self.name, y=y, y_hat=y_hat, x=time, x_hat=time_hat) 216 | fig.write_html(f"temp_{batch_idx}.html") 217 | try: 218 | self.logger.experiment[-1][name].upload(f"temp_{batch_idx}.html") 219 | except: 220 | pass 221 | 222 | # save validation results 223 | capacity = batch.gsp.gsp_capacity[:,-self.forecast_len_30:,0].cpu().numpy() 224 | predictions = model_output.cpu().numpy() 225 | truths = batch.gsp.gsp_yield[:, -self.forecast_len_30:, 0].cpu().numpy() 226 | predictions = predictions * capacity 227 | truths = truths * capacity 228 | 229 | results = make_validation_results(truths_mw=truths, 230 | predictions_mw=predictions, 231 | capacity_mwp=capacity, 232 | gsp_ids=batch.gsp.gsp_id[:, 0].cpu(), 233 | batch_idx=batch_idx, 234 | t0_datetimes_utc=pd.to_datetime(batch.metadata.t0_datetime_utc)) 235 | 236 | # append so in 'validation_epoch_end' the file is saved 237 | if batch_idx == 0: 238 | self.results_dfs = [] 239 | self.results_dfs.append(results) 240 | 241 | return nmae_loss 242 | 243 | def validation_epoch_end(self, outputs): 244 | 245 | logger.info("Validation epoch end") 246 | 247 | save_validation_results_to_logger(results_dfs=self.results_dfs, 248 | results_file_name=self.results_file_name, 249 | current_epoch=self.current_epoch, 250 | logger=self.logger) 251 | 252 | def test_step(self, batch, batch_idx): 253 | self._training_or_validation_step(batch, tag="Test") 254 | 255 | def configure_optimizers(self): 256 | optimizer = torch.optim.Adam(self.parameters(), lr=0.0005) 257 | return optimizer 258 | -------------------------------------------------------------------------------- /predict_pv_yield/models/baseline/last_value.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from predict_pv_yield.models.base_model import BaseModel 4 | from nowcasting_dataloader.batch import BatchML 5 | 6 | 7 | logging.basicConfig() 8 | _LOG = logging.getLogger("predict_pv_yield") 9 | _LOG.setLevel(logging.DEBUG) 10 | 11 | 12 | class Model(BaseModel): 13 | name = "last_value" 14 | 15 | def __init__(self, forecast_minutes: int = 12, history_minutes: int = 6, output_variable="pv_yield"): 16 | """ 17 | Simple baseline model that takes the last pv yield value and copies it forward 18 | """ 19 | 20 | self.forecast_minutes = forecast_minutes 21 | self.history_minutes = history_minutes 22 | self.output_variable = output_variable 23 | 24 | super().__init__() 25 | 26 | def forward(self, x:BatchML): 27 | 28 | if type(x) == dict: 29 | x = BatchML(**x) 30 | 31 | # Shape: batch_size, seq_length, n_sites 32 | if self.output_variable == 'gsp_yield': 33 | gsp_yield = x.gsp.gsp_yield 34 | else: 35 | gsp_yield = x.pv.pv_yield 36 | 37 | # take the last value non forecaster value and the first in the pv yeild 38 | # (this is the pv site we are preditcting for) 39 | y_hat = gsp_yield[:, -self.forecast_len - 1, 0] 40 | 41 | # expand the last valid forward n predict steps 42 | out = y_hat.unsqueeze(1).repeat(1, self.forecast_len) 43 | # shape: batch_size, forecast_len 44 | 45 | return out 46 | -------------------------------------------------------------------------------- /predict_pv_yield/models/baseline/readme.md: -------------------------------------------------------------------------------- 1 | # Baseline Models 2 | 3 | Idea is to have a really simple baseline model for predicting pv yield. 4 | First model is just to use the last pv yield amount. 5 | The reason to have this model is so that as we develop more complicated models, 6 | we can see how much 'better' they are doing. 7 | 8 | 9 | Want to try and keep the same setup as other pytorch models to 'make' it similar to run. 10 | -------------------------------------------------------------------------------- /predict_pv_yield/models/conv3d/architect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/conv3d/architect.png -------------------------------------------------------------------------------- /predict_pv_yield/models/conv3d/conv3d_sat_nwp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/conv3d/conv3d_sat_nwp.png -------------------------------------------------------------------------------- /predict_pv_yield/models/conv3d/model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | 7 | from predict_pv_yield.models.base_model import BaseModel 8 | from nowcasting_dataloader.batch import BatchML 9 | 10 | logging.basicConfig() 11 | _LOG = logging.getLogger("predict_pv_yield") 12 | 13 | 14 | class Model(BaseModel): 15 | 16 | name = "conv3d" 17 | 18 | def __init__( 19 | self, 20 | include_pv_yield: bool = True, 21 | include_nwp: bool = True, 22 | forecast_minutes: int = 30, 23 | history_minutes: int = 60, 24 | number_of_conv3d_layers: int = 4, 25 | conv3d_channels: int = 32, 26 | image_size_pixels: int = 64, 27 | number_sat_channels: int = 12, 28 | fc1_output_features: int = 128, 29 | fc2_output_features: int = 128, 30 | fc3_output_features: int = 64, 31 | output_variable: str = "pv_yield", 32 | ): 33 | """ 34 | 3d conv model, that takes in different data streams 35 | 36 | architecture is roughly satellite image time series goes into many 3d convolution layers. 37 | Final convolutional layer goes to full connected layer. This is joined by other data inputs like 38 | - pv yield 39 | - nwp data 40 | - time variables 41 | Then there ~4 fully connected layers which end up forecasting the pv yield intp the future 42 | 43 | include_pv_yield: include pv yield data 44 | include_nwp: include nwp data 45 | forecast_len: the amount of minutes that should be forecasted 46 | history_len: the amount of historical minutes that are used 47 | number_of_conv3d_layers, number of convolution 3d layers that are use 48 | conv3d_channels, the amount of convolution 3d channels 49 | image_size_pixels: the input satellite image size 50 | number_sat_channels: number of nwp channels 51 | fc1_output_features: number of fully connected outputs nodes out of the the first fully connected layer 52 | fc2_output_features: number of fully connected outputs nodes out of the the second fully connected layer 53 | fc3_output_features: number of fully connected outputs nodes out of the the third fully connected layer 54 | output_variable: the output variable to be predicted 55 | """ 56 | 57 | self.include_pv_yield = include_pv_yield 58 | self.include_nwp = include_nwp 59 | self.number_of_conv3d_layers = number_of_conv3d_layers 60 | self.number_of_nwp_features = 10 * 19 * 2 * 2 61 | self.fc1_output_features = fc1_output_features 62 | self.fc2_output_features = fc2_output_features 63 | self.fc3_output_features = fc3_output_features 64 | self.forecast_minutes = forecast_minutes 65 | self.history_minutes = history_minutes 66 | self.output_variable = output_variable 67 | 68 | super().__init__() 69 | 70 | conv3d_channels = conv3d_channels 71 | 72 | self.number_of_nwp_features = 10 * 19 * 2 * 2 73 | 74 | self.cnn_output_size = ( 75 | conv3d_channels 76 | * ((image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2) 77 | * (self.forecast_len_5 + self.history_len_5 + 1 - 2 * self.number_of_conv3d_layers) 78 | ) 79 | 80 | self.sat_conv0 = nn.Conv3d( 81 | in_channels=number_sat_channels, 82 | out_channels=conv3d_channels, 83 | kernel_size=(3, 3, 3), 84 | padding=0, 85 | ) 86 | for i in range(0, self.number_of_conv3d_layers - 1): 87 | layer = nn.Conv3d( 88 | in_channels=conv3d_channels, out_channels=conv3d_channels, kernel_size=(3, 3, 3), padding=0 89 | ) 90 | setattr(self, f"conv3d_{i + 1}", layer) 91 | 92 | self.fc1 = nn.Linear(in_features=self.cnn_output_size, out_features=self.fc1_output_features) 93 | self.fc2 = nn.Linear(in_features=self.fc1_output_features, out_features=self.fc2_output_features) 94 | 95 | fc3_in_features = self.fc2_output_features 96 | if include_pv_yield: 97 | fc3_in_features += self.number_of_samples_per_batch * (self.history_len_30 + 1) 98 | if include_nwp: 99 | self.fc_nwp = nn.Linear(in_features=self.number_of_nwp_features, out_features=128) 100 | fc3_in_features += 128 101 | 102 | self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features) 103 | self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len) 104 | # self.fc5 = nn.Linear(in_features=32, out_features=8) 105 | # self.fc6 = nn.Linear(in_features=8, out_features=1) 106 | 107 | def forward(self, x): 108 | 109 | if type(x) == dict: 110 | x = BatchML(**x) 111 | # ******************* Satellite imagery ************************* 112 | # Shape: batch_size, channel, seq_length, height, width 113 | sat_data = x.satellite.data.float() 114 | batch_size, n_chans, seq_len, height, width = sat_data.shape 115 | 116 | # :) Pass data through the network :) 117 | out = F.relu(self.sat_conv0(sat_data)) 118 | for i in range(0, self.number_of_conv3d_layers - 1): 119 | layer = getattr(self, f"conv3d_{i + 1}") 120 | out = F.relu(layer(out)) 121 | 122 | out = out.reshape(batch_size, self.cnn_output_size) 123 | 124 | # Fully connected layers 125 | out = F.relu(self.fc1(out)) 126 | out = F.relu(self.fc2(out)) 127 | # which has shape (batch_size, 128) 128 | 129 | # add pv yield 130 | if self.include_pv_yield: 131 | pv_yield_history = x[self.output_variable][:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float() 132 | 133 | pv_yield_history = pv_yield_history.reshape( 134 | pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2] 135 | ) 136 | out = torch.cat((out, pv_yield_history), dim=1) 137 | 138 | # *********************** NWP Data ************************************ 139 | if self.include_nwp: 140 | # Shape: batch_size, channel, seq_length, height, width 141 | nwp_data = x["nwp"].float() 142 | nwp_data = nwp_data.flatten(start_dim=1) 143 | 144 | # fully connected layer 145 | out_nwp = F.relu(self.fc_nwp(nwp_data)) 146 | 147 | # join with other FC layer 148 | out = torch.cat((out, out_nwp), dim=1) 149 | 150 | # Fully connected layers. 151 | out = F.relu(self.fc3(out)) 152 | out = self.fc4(out) 153 | 154 | out = out.reshape(batch_size, self.forecast_len) 155 | 156 | return out 157 | -------------------------------------------------------------------------------- /predict_pv_yield/models/conv3d/model_nwp.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | 7 | from predict_pv_yield.models.base_model import BaseModel 8 | from nowcasting_dataloader.batch import BatchML 9 | 10 | logging.basicConfig() 11 | _LOG = logging.getLogger("predict_pv_yield") 12 | 13 | 14 | class Model(BaseModel): 15 | 16 | name = "conv3d_sat_nwp" 17 | 18 | def __init__( 19 | self, 20 | include_pv_or_gsp_yield_history: bool = True, 21 | include_nwp: bool = True, 22 | forecast_minutes: int = 30, 23 | history_minutes: int = 60, 24 | number_of_conv3d_layers: int = 4, 25 | conv3d_channels: int = 32, 26 | nwp_image_size_pixels: int = 64, 27 | number_nwp_channels: int = 10, 28 | fc1_output_features: int = 128, 29 | fc2_output_features: int = 128, 30 | fc3_output_features: int = 64, 31 | output_variable: str = "gsp_yield", 32 | embedding_dem: int = 16, 33 | include_pv_yield_history: int = True, 34 | include_future_satellite: int = True, 35 | ): 36 | """ 37 | 3d conv model, that takes in different data streams 38 | 39 | architecture is roughly 40 | 1. nwp time series goes into many 3d convolution layers. 41 | 2. Final convolutional layer goes to full connected layer. This is joined by other data inputs like 42 | - pv yield 43 | - time variables 44 | Then there ~4 fully connected layers which end up forecasting the pv yield / gsp into the future 45 | 46 | include_pv_or_gsp_yield_history: include pv yield data 47 | include_nwp: include nwp data 48 | forecast_len: the amount of minutes that should be forecasted 49 | history_len: the amount of historical minutes that are used 50 | number_of_conv3d_layers, number of convolution 3d layers that are use 51 | conv3d_channels, the amount of convolution 3d channels 52 | image_size_pixels: the input satellite image size 53 | nwp_image_size_pixels: the input nwp image size 54 | number_sat_channels: number of nwp channels 55 | fc1_output_features: number of fully connected outputs nodes out of the the first fully connected layer 56 | fc2_output_features: number of fully connected outputs nodes out of the the second fully connected layer 57 | fc3_output_features: number of fully connected outputs nodes out of the the third fully connected layer 58 | output_variable: the output variable to be predicted 59 | number_nwp_channels: The number of nwp channels there are 60 | include_future_satellite: option to include future satellite images, or not 61 | """ 62 | 63 | self.include_pv_or_gsp_yield_history = include_pv_or_gsp_yield_history 64 | self.include_nwp = include_nwp 65 | self.number_of_conv3d_layers = number_of_conv3d_layers 66 | self.number_of_nwp_features = 128 67 | self.fc1_output_features = fc1_output_features 68 | self.fc2_output_features = fc2_output_features 69 | self.fc3_output_features = fc3_output_features 70 | self.forecast_minutes = forecast_minutes 71 | self.history_minutes = history_minutes 72 | self.output_variable = output_variable 73 | self.number_nwp_channels = number_nwp_channels 74 | self.embedding_dem = embedding_dem 75 | self.include_pv_yield_history = include_pv_yield_history 76 | self.include_future_satellite = include_future_satellite 77 | 78 | super().__init__() 79 | 80 | conv3d_channels = conv3d_channels 81 | 82 | self.nwp_cnn_output_size = ( 83 | conv3d_channels 84 | * ((nwp_image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2) 85 | * (self.forecast_len_60 + self.history_len_60 + 1) 86 | ) 87 | 88 | # nwp 89 | self.nwp_conv0 = nn.Conv3d( 90 | in_channels=number_nwp_channels, 91 | out_channels=conv3d_channels, 92 | kernel_size=(3, 3, 3), 93 | padding=(1, 0, 0), 94 | ) 95 | for i in range(0, self.number_of_conv3d_layers - 1): 96 | layer = nn.Conv3d( 97 | in_channels=conv3d_channels, 98 | out_channels=conv3d_channels, 99 | kernel_size=(3, 3, 3), 100 | padding=(1, 0, 0), 101 | ) 102 | setattr(self, f"nwp_conv{i + 1}", layer) 103 | 104 | self.nwp_fc1 = nn.Linear( 105 | in_features=self.nwp_cnn_output_size, out_features=self.fc1_output_features 106 | ) 107 | self.nwp_fc2 = nn.Linear( 108 | in_features=self.fc1_output_features, out_features=self.number_of_nwp_features 109 | ) 110 | 111 | if self.embedding_dem: 112 | self.pv_system_id_embedding = nn.Embedding( 113 | num_embeddings=940, embedding_dim=self.embedding_dem 114 | ) 115 | 116 | if self.include_pv_yield_history: 117 | self.pv_fc1 = nn.Linear( 118 | in_features=self.number_of_pv_samples_per_batch * (self.history_len_5 + 1), 119 | out_features=128, 120 | ) 121 | 122 | fc3_in_features = self.number_of_nwp_features 123 | 124 | self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features) 125 | self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len) 126 | 127 | 128 | def forward(self, x): 129 | 130 | if type(x) == dict: 131 | x = BatchML(**x) 132 | 133 | # shape: batch_size, n_chans, seq_len, height, width 134 | nwp_data = x.nwp.data.float() 135 | out_nwp = F.relu(self.nwp_conv0(nwp_data)) 136 | for i in range(0, self.number_of_conv3d_layers - 1): 137 | layer = getattr(self, f"nwp_conv{i + 1}") 138 | out_nwp = F.relu(layer(out_nwp)) 139 | 140 | # fully connected layers 141 | out_nwp = out_nwp.reshape(nwp_data.shape[0], self.nwp_cnn_output_size) 142 | out_nwp = F.relu(self.nwp_fc1(out_nwp)) 143 | out = F.relu(self.nwp_fc2(out_nwp)) 144 | 145 | # which has shape (batch_size, 128) 146 | 147 | # Fully connected layers. 148 | out = F.relu(self.fc3(out)) 149 | out = self.fc4(out) 150 | 151 | out = out.reshape(nwp_data.shape[0], self.forecast_len) 152 | 153 | return out 154 | -------------------------------------------------------------------------------- /predict_pv_yield/models/conv3d/model_sat_nwp.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | 7 | from predict_pv_yield.models.base_model import BaseModel 8 | from nowcasting_dataloader.batch import BatchML 9 | 10 | logging.basicConfig() 11 | _LOG = logging.getLogger("predict_pv_yield") 12 | 13 | 14 | class Model(BaseModel): 15 | 16 | name = "conv3d_sat_nwp" 17 | 18 | def __init__( 19 | self, 20 | include_pv_or_gsp_yield_history: bool = True, 21 | include_nwp: bool = True, 22 | forecast_minutes: int = 30, 23 | history_minutes: int = 60, 24 | number_of_conv3d_layers: int = 4, 25 | conv3d_channels: int = 32, 26 | image_size_pixels: int = 64, 27 | nwp_image_size_pixels: int = 64, 28 | number_sat_channels: int = 12, 29 | number_nwp_channels: int = 10, 30 | fc1_output_features: int = 128, 31 | fc2_output_features: int = 128, 32 | fc3_output_features: int = 64, 33 | output_variable: str = "pv_yield", 34 | embedding_dem: int = 16, 35 | include_pv_yield_history: int = True, 36 | include_future_satellite: int = True, 37 | ): 38 | """ 39 | 3d conv model, that takes in different data streams 40 | 41 | architecture is roughly 42 | 1. satellite image time series goes into many 3d convolution layers. 43 | 2. nwp time series goes into many 3d convolution layers. 44 | 3. Final convolutional layer goes to full connected layer. This is joined by other data inputs like 45 | - pv yield 46 | - time variables 47 | Then there ~4 fully connected layers which end up forecasting the pv yield / gsp into the future 48 | 49 | include_pv_or_gsp_yield_history: include pv yield data 50 | include_nwp: include nwp data 51 | forecast_len: the amount of minutes that should be forecasted 52 | history_len: the amount of historical minutes that are used 53 | number_of_conv3d_layers, number of convolution 3d layers that are use 54 | conv3d_channels, the amount of convolution 3d channels 55 | image_size_pixels: the input satellite image size 56 | nwp_image_size_pixels: the input nwp image size 57 | number_sat_channels: number of nwp channels 58 | fc1_output_features: number of fully connected outputs nodes out of the the first fully connected layer 59 | fc2_output_features: number of fully connected outputs nodes out of the the second fully connected layer 60 | fc3_output_features: number of fully connected outputs nodes out of the the third fully connected layer 61 | output_variable: the output variable to be predicted 62 | number_nwp_channels: The number of nwp channels there are 63 | include_future_satellite: option to include future satellite images, or not 64 | """ 65 | 66 | self.include_pv_or_gsp_yield_history = include_pv_or_gsp_yield_history 67 | self.include_nwp = include_nwp 68 | self.number_of_conv3d_layers = number_of_conv3d_layers 69 | self.number_of_nwp_features = 128 70 | self.fc1_output_features = fc1_output_features 71 | self.fc2_output_features = fc2_output_features 72 | self.fc3_output_features = fc3_output_features 73 | self.forecast_minutes = forecast_minutes 74 | self.history_minutes = history_minutes 75 | self.output_variable = output_variable 76 | self.number_nwp_channels = number_nwp_channels 77 | self.embedding_dem = embedding_dem 78 | self.include_pv_yield_history = include_pv_yield_history 79 | self.include_future_satellite = include_future_satellite 80 | 81 | super().__init__() 82 | 83 | conv3d_channels = conv3d_channels 84 | 85 | if include_future_satellite: 86 | cnn_output_size_time = self.forecast_len_5 + self.history_len_5 + 1 87 | else: 88 | cnn_output_size_time = self.history_len_5 + 1 89 | self.cnn_output_size = ( 90 | conv3d_channels 91 | * ((image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2) 92 | * cnn_output_size_time 93 | ) 94 | 95 | self.nwp_cnn_output_size = ( 96 | conv3d_channels 97 | * ((nwp_image_size_pixels - 2 * self.number_of_conv3d_layers) ** 2) 98 | * (self.forecast_len_60 + self.history_len_60 + 1) 99 | ) 100 | 101 | # conv0 102 | self.sat_conv0 = nn.Conv3d( 103 | in_channels=number_sat_channels, 104 | out_channels=conv3d_channels, 105 | kernel_size=(3, 3, 3), 106 | padding=(1, 0, 0), 107 | ) 108 | for i in range(0, self.number_of_conv3d_layers - 1): 109 | layer = nn.Conv3d( 110 | in_channels=conv3d_channels, 111 | out_channels=conv3d_channels, 112 | kernel_size=(3, 3, 3), 113 | padding=(1, 0, 0), 114 | ) 115 | setattr(self, f"sat_conv{i + 1}", layer) 116 | 117 | self.fc1 = nn.Linear( 118 | in_features=self.cnn_output_size, out_features=self.fc1_output_features 119 | ) 120 | self.fc2 = nn.Linear( 121 | in_features=self.fc1_output_features, out_features=self.fc2_output_features 122 | ) 123 | 124 | # nwp 125 | if include_nwp: 126 | self.nwp_conv0 = nn.Conv3d( 127 | in_channels=number_nwp_channels, 128 | out_channels=conv3d_channels, 129 | kernel_size=(3, 3, 3), 130 | padding=(1, 0, 0), 131 | ) 132 | for i in range(0, self.number_of_conv3d_layers - 1): 133 | layer = nn.Conv3d( 134 | in_channels=conv3d_channels, 135 | out_channels=conv3d_channels, 136 | kernel_size=(3, 3, 3), 137 | padding=(1, 0, 0), 138 | ) 139 | setattr(self, f"nwp_conv{i + 1}", layer) 140 | 141 | self.nwp_fc1 = nn.Linear( 142 | in_features=self.nwp_cnn_output_size, out_features=self.fc1_output_features 143 | ) 144 | self.nwp_fc2 = nn.Linear( 145 | in_features=self.fc1_output_features, out_features=self.number_of_nwp_features 146 | ) 147 | 148 | if self.embedding_dem: 149 | self.pv_system_id_embedding = nn.Embedding( 150 | num_embeddings=940, embedding_dim=self.embedding_dem 151 | ) 152 | 153 | if self.include_pv_yield_history: 154 | self.pv_fc1 = nn.Linear( 155 | in_features=self.number_of_pv_samples_per_batch * (self.history_len_5 + 1), 156 | out_features=128, 157 | ) 158 | 159 | fc3_in_features = self.fc2_output_features 160 | if include_pv_or_gsp_yield_history: 161 | fc3_in_features += self.number_of_samples_per_batch * (self.history_len_30 + 1) 162 | if include_nwp: 163 | fc3_in_features += 128 164 | if self.embedding_dem: 165 | fc3_in_features += self.embedding_dem 166 | if self.include_pv_yield_history: 167 | fc3_in_features += 128 168 | 169 | self.fc3 = nn.Linear(in_features=fc3_in_features, out_features=self.fc3_output_features) 170 | self.fc4 = nn.Linear(in_features=self.fc3_output_features, out_features=self.forecast_len) 171 | # self.fc5 = nn.Linear(in_features=32, out_features=8) 172 | # self.fc6 = nn.Linear(in_features=8, out_features=1) 173 | 174 | def forward(self, x): 175 | 176 | if type(x) == dict: 177 | x = BatchML(**x) 178 | 179 | # ******************* Satellite imagery ************************* 180 | # Shape: batch_size, channel, seq_length, height, width 181 | sat_data = x.satellite.data.float() 182 | batch_size, n_chans, seq_len, height, width = sat_data.shape 183 | 184 | if not self.include_future_satellite: 185 | sat_data = sat_data[:, :, : self.history_len_5 + 1] 186 | 187 | # :) Pass data through the network :) 188 | out = F.relu(self.sat_conv0(sat_data)) 189 | for i in range(0, self.number_of_conv3d_layers - 1): 190 | layer = getattr(self, f"sat_conv{i + 1}") 191 | out = F.relu(layer(out)) 192 | 193 | out = out.reshape(batch_size, self.cnn_output_size) 194 | 195 | # Fully connected layers 196 | out = F.relu(self.fc1(out)) 197 | out = F.relu(self.fc2(out)) 198 | # which has shape (batch_size, 128) 199 | 200 | # add pv yield 201 | if self.include_pv_or_gsp_yield_history: 202 | if self.output_variable == "gsp_yield": 203 | pv_yield_history = ( 204 | x.gsp.gsp_yield[:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float() 205 | ) 206 | else: 207 | pv_yield_history = ( 208 | x.pv.pv_yield[:, : self.history_len_30 + 1].nan_to_num(nan=0.0).float() 209 | ) 210 | 211 | pv_yield_history = pv_yield_history.reshape( 212 | pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2] 213 | ) 214 | # join up 215 | out = torch.cat((out, pv_yield_history), dim=1) 216 | 217 | # add the pv yield history. This can be used if trying to predict gsp 218 | if self.include_pv_yield_history: 219 | # just take the first 128 220 | pv_yield_history = ( 221 | x.pv.pv_yield[:, : self.history_len_5 + 1, :128].nan_to_num(nan=0.0).float() 222 | ) 223 | 224 | pv_yield_history = pv_yield_history.reshape( 225 | pv_yield_history.shape[0], pv_yield_history.shape[1] * pv_yield_history.shape[2] 226 | ) 227 | pv_yield_history = F.relu(self.pv_fc1(pv_yield_history)) 228 | 229 | out = torch.cat((out, pv_yield_history), dim=1) 230 | 231 | # *********************** NWP Data ************************************ 232 | if self.include_nwp: 233 | 234 | # shape: batch_size, n_chans, seq_len, height, width 235 | nwp_data = x.nwp.data.float() 236 | 237 | out_nwp = F.relu(self.nwp_conv0(nwp_data)) 238 | for i in range(0, self.number_of_conv3d_layers - 1): 239 | layer = getattr(self, f"nwp_conv{i + 1}") 240 | out_nwp = F.relu(layer(out_nwp)) 241 | 242 | # fully connected layers 243 | out_nwp = out_nwp.reshape(batch_size, self.nwp_cnn_output_size) 244 | out_nwp = F.relu(self.nwp_fc1(out_nwp)) 245 | out_nwp = F.relu(self.nwp_fc2(out_nwp)) 246 | 247 | # join with other FC layer 248 | out = torch.cat((out, out_nwp), dim=1) 249 | 250 | # ********************** Embedding of PV system ID ******************** 251 | if self.embedding_dem: 252 | if self.output_variable == "pv_yield": 253 | id = x.pv.pv_system_row_number[0 : self.batch_size, 0] 254 | else: 255 | id = x.gsp.gsp_id[0 : self.batch_size, 0] 256 | 257 | id = id.type(torch.IntTensor) 258 | id = id.to(out.device) 259 | id_embedding = self.pv_system_id_embedding(id) 260 | out = torch.cat((out, id_embedding), dim=1) 261 | 262 | # Fully connected layers. 263 | out = F.relu(self.fc3(out)) 264 | out = self.fc4(out) 265 | 266 | out = out.reshape(batch_size, self.forecast_len) 267 | 268 | return out 269 | -------------------------------------------------------------------------------- /predict_pv_yield/models/conv3d/readme.md: -------------------------------------------------------------------------------- 1 | # Convolution 3d network 2 | 3 | Idea is to use convolution 3d networks. 4 | 5 | ![](architect.png) 6 | 7 | Satellite images go into several 3D convolution with no padding. 8 | Then they are moved into a 2 fully connected layers. 9 | 10 | NWP data, historic pv yield and time features are added to the fully connected layers. 11 | 12 | Final outcome is PV yield for the future. 13 | 14 | ## Conv_nwp 15 | 16 | We have built a model that takes nwp input data only. 17 | This has several 3dd cnn layers and then a few fully connected layers. 18 | 19 | The only nwp channel is `dswrf` 20 | 21 | training run is https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-951/charts 22 | -------------------------------------------------------------------------------- /predict_pv_yield/models/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/predict_pv_yield/models/layers/__init__.py -------------------------------------------------------------------------------- /predict_pv_yield/models/perceiver/perceiver.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | import numpy as np 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | from perceiver_pytorch import Perceiver 8 | 9 | from predict_pv_yield.models.base_model import BaseModel 10 | from nowcasting_dataloader.batch import BatchML 11 | 12 | from nowcasting_dataset.consts import NWP_VARIABLE_NAMES, SAT_VARIABLE_NAMES 13 | 14 | 15 | params = dict( 16 | # DATA 17 | # TODO: Everything that relates to the dataset should come automatically 18 | # from a yaml file stored with the dataset. 19 | batch_size=32, 20 | history_minutes=30, #: Number of timesteps of history, not including t0. 21 | forecast_minutes=120, #: Number of timesteps of forecast. 22 | image_size_pixels=64, 23 | nwp_channels=NWP_VARIABLE_NAMES[0:10], 24 | sat_channels=SAT_VARIABLE_NAMES[1:], 25 | ) 26 | 27 | 28 | SAT_X_MEAN = np.float32(309000) 29 | SAT_X_STD = np.float32(316387.42073603) 30 | SAT_Y_MEAN = np.float32(519000) 31 | SAT_Y_STD = np.float32(406454.17945938) 32 | 33 | 34 | TOTAL_SEQ_LEN = params["history_minutes"] // 5 + params["forecast_minutes"] // 5 + 1 35 | NWP_SIZE = len(params["nwp_channels"]) * 64 * 64 # channels x width x height 36 | N_DATETIME_FEATURES = 4 37 | PERCEIVER_OUTPUT_SIZE = 512 38 | FC_OUTPUT_SIZE = 8 39 | RNN_HIDDEN_SIZE = 16 40 | 41 | 42 | class PerceiverModel(BaseModel): 43 | 44 | name = "perceiver" 45 | 46 | def __init__( 47 | self, 48 | history_minutes: int = params["history_minutes"], 49 | forecast_minutes: int = params["forecast_minutes"], 50 | nwp_channels: Iterable[str] = params["nwp_channels"], 51 | batch_size: int = 32, 52 | num_latents: int = 128, 53 | latent_dim: int = 64, 54 | embedding_dem: int = 16, 55 | output_variable: str = "pv_yield", 56 | ): 57 | self.history_minutes = history_minutes 58 | self.forecast_minutes = forecast_minutes 59 | self.nwp_channels = nwp_channels 60 | self.batch_size = batch_size 61 | self.num_latents = num_latents 62 | self.latent_dim = latent_dim 63 | self.embedding_dem = embedding_dem 64 | self.output_variable = output_variable 65 | 66 | self.total_seq_length = self.history_minutes // 5 + self.forecast_minutes //5 + 1 67 | 68 | super().__init__() 69 | 70 | self.perceiver = Perceiver( 71 | input_channels=len(params["sat_channels"]), 72 | input_axis=2, 73 | num_freq_bands=6, 74 | max_freq=10, 75 | depth=self.total_seq_length, 76 | num_latents=self.num_latents, 77 | latent_dim=self.latent_dim, 78 | num_classes=PERCEIVER_OUTPUT_SIZE, 79 | weight_tie_layers=True, 80 | ) 81 | 82 | self.fc1 = nn.Linear(in_features=PERCEIVER_OUTPUT_SIZE, out_features=256) 83 | 84 | self.fc2 = nn.Linear(in_features=256 + self.embedding_dem, out_features=128) 85 | 86 | self.fc3 = nn.Linear(in_features=128, out_features=64) 87 | self.fc4 = nn.Linear(in_features=64, out_features=32) 88 | self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE) 89 | 90 | if self.embedding_dem: 91 | self.pv_system_id_embedding = nn.Embedding(num_embeddings=2048, embedding_dim=self.embedding_dem) 92 | 93 | # TODO: Get rid of RNNs! 94 | self.encoder_rnn = nn.GRU( 95 | # plus 1 for history 96 | input_size=FC_OUTPUT_SIZE + 1 + NWP_SIZE, 97 | hidden_size=RNN_HIDDEN_SIZE, 98 | num_layers=2, 99 | batch_first=True, 100 | ) 101 | self.decoder_rnn = nn.GRU( 102 | input_size=FC_OUTPUT_SIZE + NWP_SIZE, 103 | hidden_size=RNN_HIDDEN_SIZE, 104 | num_layers=2, 105 | batch_first=True, 106 | ) 107 | 108 | self.decoder_fc1 = nn.Linear(in_features=RNN_HIDDEN_SIZE, out_features=8) 109 | self.decoder_fc2 = nn.Linear(in_features=8, out_features=1) 110 | 111 | def forward(self, x: BatchML): 112 | 113 | if type(x) == dict: 114 | x = BatchML(**x) 115 | 116 | # ******************* Satellite imagery ************************* 117 | # Shape: batch_size, channel, seq_length, height, width 118 | # TODO: Use optical flow, not actual sat images of the future! 119 | sat_data = x.satellite.data[0 : self.batch_size].float() 120 | batch_size, n_chans, seq_len, width, height = sat_data.shape 121 | 122 | # Stack timesteps as examples (to make a large batch) 123 | sat_data = sat_data.permute(0, 2, 3, 4, 1) # move channels to the end 124 | new_batch_size = batch_size * seq_len 125 | # 0 1 2 3 126 | sat_data = sat_data.reshape(new_batch_size, width, height, n_chans) 127 | 128 | # Pass data through the network :) 129 | out = self.perceiver(sat_data) 130 | 131 | out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE) 132 | out = F.relu(self.fc1(out)) 133 | 134 | # ********************** Embedding of PV system ID ******************** 135 | if self.embedding_dem: 136 | if self.output_variable == 'pv_yield': 137 | id = x.pv.pv_system_row_number[0 : self.batch_size, 0] 138 | else: 139 | id = x.gsp.gsp_id[0: self.batch_size, 0] 140 | id = id.type(torch.IntTensor).repeat_interleave(self.total_seq_length) 141 | id = id.to(out.device) 142 | id_embedding = self.pv_system_id_embedding(id) 143 | print(f'{id_embedding.shape=}') 144 | print(f'{out.shape=}') 145 | out = torch.cat((out, id_embedding), dim=1) 146 | 147 | # Fully connected layers. 148 | out = F.relu(self.fc2(out)) 149 | out = F.relu(self.fc3(out)) 150 | out = F.relu(self.fc4(out)) 151 | out = F.relu(self.fc5(out)) 152 | 153 | # ******************* PREP DATA FOR RNN ******************************* 154 | out = out.reshape(batch_size, self.total_seq_length, FC_OUTPUT_SIZE) 155 | 156 | # The RNN encoder gets recent history: satellite, NWP, 157 | # datetime features, and recent PV history. The RNN decoder 158 | # gets what we know about the future: satellite, NWP, and 159 | # datetime features. 160 | 161 | # *********************** NWP Data ************************************ 162 | # Shape: batch_size, channel, seq_length, width, height 163 | nwp_data = x.nwp.data[0 : self.batch_size].float().float() 164 | 165 | # RNN expects seq_len to be dim 1. 166 | nwp_data = nwp_data.permute(0, 2, 1, 3, 4) 167 | batch_size, nwp_seq_len, n_nwp_chans, nwp_width, nwp_height = nwp_data.shape 168 | 169 | # nwp to have the same sel_len as sat. I think there is a better solution than this 170 | nwp_data_zeros = torch.zeros(size=(batch_size, seq_len - nwp_seq_len, n_nwp_chans, nwp_width, nwp_height), device=nwp_data.device) 171 | nwp_data = torch.cat([nwp_data, nwp_data_zeros], dim=1) 172 | 173 | nwp_data = nwp_data.reshape(batch_size, seq_len, n_nwp_chans * nwp_width * nwp_height) 174 | 175 | # Concat 176 | rnn_input = torch.cat( 177 | ( 178 | out, 179 | nwp_data, 180 | ), 181 | dim=2, 182 | ) 183 | 184 | if self.output_variable == 'pv_yield': 185 | # take the history of the pv yield of this system, 186 | pv_yield_history = x.pv.pv_yield[0: self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float() 187 | encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2) 188 | elif self.output_variable == 'gsp_yield': 189 | # take the history of the gsp yield of this system, 190 | gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float() 191 | encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2) 192 | 193 | encoder_output, encoder_hidden = self.encoder_rnn(encoder_input) 194 | decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden) 195 | # decoder_output is shape batch_size, seq_len, rnn_hidden_size 196 | 197 | decoder_output = F.relu(self.decoder_fc1(decoder_output)) 198 | decoder_output = self.decoder_fc2(decoder_output) 199 | 200 | return decoder_output.squeeze(dim=-1) 201 | -------------------------------------------------------------------------------- /predict_pv_yield/models/perceiver/perceiver_conv3d_nwp_sat.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | import numpy as np 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | from perceiver_pytorch import Perceiver 8 | 9 | from predict_pv_yield.models.base_model import BaseModel 10 | from nowcasting_dataloader.batch import BatchML 11 | 12 | from nowcasting_dataset.consts import NWP_VARIABLE_NAMES, SAT_VARIABLE_NAMES 13 | 14 | 15 | params = dict( 16 | # DATA 17 | # TODO: Everything that relates to the dataset should come automatically 18 | # from a yaml file stored with the dataset. 19 | batch_size=32, 20 | history_minutes=30, #: Number of timesteps of history, not including t0. 21 | forecast_minutes=120, #: Number of timesteps of forecast. 22 | image_size_pixels=64, 23 | nwp_channels=NWP_VARIABLE_NAMES[0:10], 24 | sat_channels=SAT_VARIABLE_NAMES[1:], 25 | ) 26 | 27 | 28 | SAT_X_MEAN = np.float32(309000) 29 | SAT_X_STD = np.float32(316387.42073603) 30 | SAT_Y_MEAN = np.float32(519000) 31 | SAT_Y_STD = np.float32(406454.17945938) 32 | 33 | 34 | TOTAL_SEQ_LEN = params["history_minutes"] // 5 + params["forecast_minutes"] // 5 + 1 35 | NWP_SIZE = len(params["nwp_channels"]) * 2 * 2 # channels x width x height 36 | N_DATETIME_FEATURES = 4 37 | PERCEIVER_OUTPUT_SIZE = 512 38 | FC_OUTPUT_SIZE = 8 39 | RNN_HIDDEN_SIZE = 16 40 | 41 | 42 | class Conv3dMaxPool(nn.Module): 43 | 44 | def __init__(self, out_channels:int, in_channels:int): 45 | super().__init__() 46 | # convultion later, and pad so the output is the same size 47 | self.sat_conv3d = nn.Conv3d( 48 | in_channels=in_channels, 49 | out_channels=out_channels, 50 | kernel_size=(3, 3, 3), padding=(1, 1, 1) 51 | ) 52 | # take max pool, keep time sequence the same length 53 | self.sat_maxpool = nn.MaxPool3d(3, stride=(1, 2, 2), padding=(1, 1, 1)) 54 | def forward(self, x): 55 | 56 | x = self.sat_conv3d(x) 57 | return self.sat_maxpool(x) 58 | 59 | 60 | class Model(BaseModel): 61 | 62 | name = "perceiver_conv3d_nwp_sat" 63 | 64 | def __init__( 65 | self, 66 | history_minutes: int, 67 | forecast_minutes: int, 68 | nwp_channels: Iterable[str] = params["nwp_channels"], 69 | batch_size: int = 32, 70 | num_latents: int = 128, 71 | latent_dim: int = 64, 72 | embedding_dem: int = 16, 73 | output_variable: str = "pv_yield", 74 | conv3d_channels: int = 16, 75 | use_future_satellite_images: bool = True, # option not to use future sat images 76 | ): 77 | """ 78 | Idea is to have a conv3d (+max pool) layer before both sat and nwp data go into perceiver model. 79 | """ 80 | self.history_minutes = history_minutes 81 | self.forecast_minutes = forecast_minutes 82 | self.nwp_channels = nwp_channels 83 | self.batch_size = batch_size 84 | self.num_latents = num_latents 85 | self.latent_dim = latent_dim 86 | self.embedding_dem = embedding_dem 87 | self.output_variable = output_variable 88 | self.use_future_satellite_images = use_future_satellite_images 89 | 90 | self.total_seq_length = self.history_minutes // 5 + self.forecast_minutes // 5 + 1 91 | 92 | super().__init__() 93 | 94 | self.sat_conv3d_maxpool = Conv3dMaxPool(out_channels=conv3d_channels, in_channels=len(params['sat_channels'])) 95 | self.nwp_conv3d_maxpool = Conv3dMaxPool(out_channels=conv3d_channels, in_channels=len(nwp_channels)) 96 | 97 | self.perceiver = Perceiver( 98 | input_channels=2*conv3d_channels, 99 | input_axis=2, 100 | num_freq_bands=6, 101 | max_freq=10, 102 | depth= self.total_seq_length, 103 | num_latents=self.num_latents, 104 | latent_dim=self.latent_dim, 105 | num_classes=PERCEIVER_OUTPUT_SIZE, 106 | weight_tie_layers=True, 107 | ) 108 | 109 | self.fc1 = nn.Linear(in_features=PERCEIVER_OUTPUT_SIZE, out_features=256) 110 | 111 | self.fc2 = nn.Linear(in_features=256 + self.embedding_dem, out_features=128) 112 | 113 | self.fc3 = nn.Linear(in_features=128, out_features=64) 114 | self.fc4 = nn.Linear(in_features=64, out_features=32) 115 | self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE) 116 | 117 | if self.embedding_dem: 118 | self.pv_system_id_embedding = nn.Embedding(num_embeddings=2048, embedding_dim=self.embedding_dem) 119 | 120 | # TODO: Get rid of RNNs! 121 | self.encoder_rnn = nn.GRU( 122 | # plus 1 for history 123 | input_size=FC_OUTPUT_SIZE + 1, 124 | hidden_size=RNN_HIDDEN_SIZE, 125 | num_layers=2, 126 | batch_first=True, 127 | ) 128 | self.decoder_rnn = nn.GRU( 129 | input_size=FC_OUTPUT_SIZE, 130 | hidden_size=RNN_HIDDEN_SIZE, 131 | num_layers=2, 132 | batch_first=True, 133 | ) 134 | 135 | self.decoder_fc1 = nn.Linear(in_features=RNN_HIDDEN_SIZE, out_features=8) 136 | self.decoder_fc2 = nn.Linear(in_features=8, out_features=1) 137 | 138 | def forward(self, x): 139 | 140 | if type(x) == dict: 141 | x = BatchML(**x) 142 | 143 | # ******************* Satellite imagery ************************* 144 | # Shape: batch_size, channel, seq_length, height, width 145 | # TODO: Use optical flow, not actual sat images of the future! 146 | sat_data = x.satellite.data[0 : self.batch_size].float() 147 | 148 | if not self.use_future_satellite_images: 149 | sat_data[:, -self.forecast_len_5: ] = 0 # This might not be the best way to do it 150 | 151 | sat_data = self.sat_conv3d_maxpool(sat_data) 152 | sat_data = sat_data.permute(0, 2, 3, 4, 1) 153 | 154 | # Stack timesteps as examples (to make a large batch) 155 | batch_size, seq_len, width, height, n_chans = sat_data.shape 156 | new_batch_size = batch_size * seq_len 157 | # 0 1 2 3 158 | sat_data = sat_data.reshape(new_batch_size, width, height, n_chans) 159 | 160 | # *********************** NWP Data ************************************ 161 | # Shape: batch_size, seq_length, width, height, channel 162 | nwp_data = x.nwp.data[0 : self.batch_size].float() 163 | nwp_data = self.nwp_conv3d_maxpool(nwp_data) 164 | # Perciever expects seq_len to be dim 1, and channels at the end 165 | nwp_data = nwp_data.permute(0, 2, 3, 4, 1) 166 | batch_size, nwp_seq_len, nwp_width, nwp_height, n_nwp_chans = nwp_data.shape 167 | 168 | # nwp to have the same sel_len as sat. I think there is a better solution than this 169 | nwp_data_zeros = torch.zeros(size=(batch_size, seq_len - nwp_seq_len, nwp_width, nwp_height, n_nwp_chans), device=nwp_data.device) 170 | nwp_data = torch.cat([nwp_data, nwp_data_zeros], dim=1) 171 | 172 | nwp_data = nwp_data.reshape(new_batch_size, nwp_width, nwp_height, n_nwp_chans) 173 | 174 | assert nwp_width == width, f'widths should be the same({nwp_width},{width})' 175 | assert nwp_height == height, f'heights should be the same({nwp_height},{height})' 176 | 177 | data = torch.cat((sat_data, nwp_data), dim=-1) 178 | 179 | # Perceiver 180 | # Pass data through the network :) 181 | out = self.perceiver(data) 182 | 183 | out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE) 184 | out = F.relu(self.fc1(out)) 185 | 186 | # ********************** Embedding of PV system ID ******************** 187 | if self.embedding_dem: 188 | pv_row = ( 189 | x.pv.pv_system_row_number[0 : self.batch_size, 0].type(torch.IntTensor).repeat_interleave(self.total_seq_length) 190 | ) 191 | pv_row = pv_row.to(out.device) 192 | pv_embedding = self.pv_system_id_embedding(pv_row) 193 | out = torch.cat((out, pv_embedding), dim=1) 194 | 195 | # Fully connected layers. 196 | out = F.relu(self.fc2(out)) 197 | out = F.relu(self.fc3(out)) 198 | out = F.relu(self.fc4(out)) 199 | out = F.relu(self.fc5(out)) 200 | 201 | # ******************* PREP DATA FOR RNN ******************************* 202 | out = out.reshape(batch_size, self.total_seq_length, FC_OUTPUT_SIZE) 203 | 204 | # The RNN encoder gets recent history: satellite, NWP, 205 | # datetime features, and recent PV history. The RNN decoder 206 | # gets what we know about the future: satellite, NWP, and 207 | # datetime features. 208 | 209 | ####### Time inputs 210 | 211 | # Concat 212 | rnn_input = torch.cat( 213 | ( 214 | out, 215 | ), 216 | dim=2, 217 | ) 218 | 219 | if self.output_variable == 'pv_yield': 220 | # take the history of the pv yield of this system, 221 | pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float() 222 | encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2) 223 | elif self.output_variable == 'gsp_yield': 224 | # take the history of the gsp yield of this system, 225 | gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float() 226 | encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2) 227 | 228 | encoder_output, encoder_hidden = self.encoder_rnn(encoder_input) 229 | decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden) 230 | # decoder_output is shape batch_size, seq_len, rnn_hidden_size 231 | 232 | decoder_output = F.relu(self.decoder_fc1(decoder_output)) 233 | decoder_output = self.decoder_fc2(decoder_output) 234 | 235 | return decoder_output.squeeze(dim=-1) 236 | -------------------------------------------------------------------------------- /predict_pv_yield/models/perceiver/perceiver_nwp_sat.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable 2 | import numpy as np 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | from perceiver_pytorch import Perceiver 8 | 9 | from predict_pv_yield.models.base_model import BaseModel 10 | from nowcasting_dataloader.batch import BatchML 11 | 12 | from nowcasting_dataset.consts import NWP_VARIABLE_NAMES, SAT_VARIABLE_NAMES 13 | 14 | 15 | params = dict( 16 | # DATA 17 | # TODO: Everything that relates to the dataset should come automatically 18 | # from a yaml file stored with the dataset. 19 | batch_size=32, 20 | history_minutes=30, #: Number of timesteps of history, not including t0. 21 | forecast_minutes=120, #: Number of timesteps of forecast. 22 | image_size_pixels=64, 23 | nwp_channels=NWP_VARIABLE_NAMES[0:10], 24 | sat_channels=SAT_VARIABLE_NAMES[1:], 25 | ) 26 | 27 | 28 | SAT_X_MEAN = np.float32(309000) 29 | SAT_X_STD = np.float32(316387.42073603) 30 | SAT_Y_MEAN = np.float32(519000) 31 | SAT_Y_STD = np.float32(406454.17945938) 32 | 33 | 34 | TOTAL_SEQ_LEN = params["history_minutes"] // 5 + params["forecast_minutes"] // 5 + 1 35 | NWP_SIZE = len(params["nwp_channels"]) * 2 * 2 # channels x width x height 36 | PERCEIVER_OUTPUT_SIZE = 512 37 | FC_OUTPUT_SIZE = 8 38 | RNN_HIDDEN_SIZE = 16 39 | 40 | 41 | class Model(BaseModel): 42 | 43 | name = "perceiver_nwp_sat" 44 | 45 | def __init__( 46 | self, 47 | history_minutes: int, 48 | forecast_minutes: int, 49 | nwp_channels: Iterable[str] = params["nwp_channels"], 50 | batch_size: int = 32, 51 | num_latents: int = 128, 52 | latent_dim: int = 64, 53 | embedding_dem: int = 16, 54 | output_variable: str = "pv_yield", 55 | ): 56 | self.history_minutes = history_minutes 57 | self.forecast_minutes = forecast_minutes 58 | self.nwp_channels = nwp_channels 59 | self.batch_size = batch_size 60 | self.num_latents = num_latents 61 | self.latent_dim = latent_dim 62 | self.embedding_dem = embedding_dem 63 | self.output_variable = output_variable 64 | 65 | self.total_seq_length = self.history_minutes // 5 + self.forecast_minutes // 5 + 1 66 | 67 | super().__init__() 68 | 69 | self.perceiver = Perceiver( 70 | input_channels=len(params["sat_channels"]) + len(nwp_channels), 71 | input_axis=2, 72 | num_freq_bands=6, 73 | max_freq=10, 74 | depth=self.total_seq_length, 75 | num_latents=self.num_latents, 76 | latent_dim=self.latent_dim, 77 | num_classes=PERCEIVER_OUTPUT_SIZE, 78 | weight_tie_layers=True, 79 | ) 80 | 81 | self.fc1 = nn.Linear(in_features=PERCEIVER_OUTPUT_SIZE, out_features=256) 82 | 83 | self.fc2 = nn.Linear(in_features=256 + self.embedding_dem, out_features=128) 84 | 85 | self.fc3 = nn.Linear(in_features=128, out_features=64) 86 | self.fc4 = nn.Linear(in_features=64, out_features=32) 87 | self.fc5 = nn.Linear(in_features=32, out_features=FC_OUTPUT_SIZE) 88 | 89 | if self.embedding_dem: 90 | self.pv_system_id_embedding = nn.Embedding(num_embeddings=2048, embedding_dim=self.embedding_dem) 91 | 92 | # TODO: Get rid of RNNs! 93 | self.encoder_rnn = nn.GRU( 94 | # plus 1 for history 95 | input_size=FC_OUTPUT_SIZE + 1, 96 | hidden_size=RNN_HIDDEN_SIZE, 97 | num_layers=2, 98 | batch_first=True, 99 | ) 100 | self.decoder_rnn = nn.GRU( 101 | input_size=FC_OUTPUT_SIZE, 102 | hidden_size=RNN_HIDDEN_SIZE, 103 | num_layers=2, 104 | batch_first=True, 105 | ) 106 | 107 | self.decoder_fc1 = nn.Linear(in_features=RNN_HIDDEN_SIZE, out_features=8) 108 | self.decoder_fc2 = nn.Linear(in_features=8, out_features=1) 109 | 110 | def forward(self, x: BatchML): 111 | 112 | if type(x) == dict: 113 | x = BatchML(**x) 114 | 115 | # ******************* Satellite imagery ************************* 116 | # Shape: batch_size, channel, seq_length, height, width 117 | # TODO: Use optical flow, not actual sat images of the future! 118 | sat_data = x.satellite.data[0 : self.batch_size].float() 119 | batch_size, n_chans, seq_len, width, height = sat_data.shape 120 | sat_data = sat_data.permute(0, 2, 3, 4, 1) 121 | 122 | # Stack timesteps as examples (to make a large batch) 123 | new_batch_size = batch_size * seq_len 124 | # 0 1 2 3 125 | sat_data = sat_data.reshape(new_batch_size, width, height, n_chans) 126 | 127 | # *********************** NWP Data ************************************ 128 | # Shape: batch_size, channel, seq_length, height, width 129 | nwp_data = x.nwp.data[0: self.batch_size].float() 130 | # Perciever expects seq_len to be dim 1, and channels at the end 131 | nwp_data = nwp_data.permute(0, 2, 3, 4, 1) 132 | batch_size, nwp_seq_len, nwp_width, nwp_height, n_nwp_chans = nwp_data.shape 133 | 134 | # nwp to have the same sel_len as sat. I think there is a better solution than this 135 | nwp_data_zeros = torch.zeros(size=(batch_size, seq_len - nwp_seq_len, nwp_width, nwp_height, n_nwp_chans), device=nwp_data.device) 136 | nwp_data = torch.cat([nwp_data, nwp_data_zeros], dim=1) 137 | 138 | nwp_data = nwp_data.reshape(new_batch_size, nwp_width, nwp_height, n_nwp_chans) 139 | 140 | assert nwp_width == width, f'data {nwp_width} should be the model {width}' 141 | assert nwp_height == height 142 | 143 | data = torch.cat((sat_data, nwp_data), dim=-1) 144 | 145 | # Perceiver 146 | # Pass data through the network :) 147 | out = self.perceiver(data) 148 | 149 | out = out.reshape(new_batch_size, PERCEIVER_OUTPUT_SIZE) 150 | out = F.relu(self.fc1(out)) 151 | 152 | # ********************** Embedding of PV system ID ******************** 153 | if self.embedding_dem: 154 | pv_row = ( 155 | x.pv.pv_system_row_number[0 : self.batch_size, 0].type(torch.IntTensor).repeat_interleave(self.total_seq_length) 156 | ) 157 | pv_row = pv_row.to(out.device) 158 | pv_embedding = self.pv_system_id_embedding(pv_row) 159 | print(out.shape) 160 | print(pv_embedding.shape) 161 | out = torch.cat((out, pv_embedding), dim=1) 162 | 163 | # Fully connected layers. 164 | out = F.relu(self.fc2(out)) 165 | out = F.relu(self.fc3(out)) 166 | out = F.relu(self.fc4(out)) 167 | out = F.relu(self.fc5(out)) 168 | 169 | # ******************* PREP DATA FOR RNN ******************************* 170 | out = out.reshape(batch_size, self.total_seq_length, FC_OUTPUT_SIZE) 171 | 172 | # The RNN encoder gets recent history: satellite, NWP, 173 | # datetime features, and recent PV history. The RNN decoder 174 | # gets what we know about the future: satellite, NWP, and 175 | # datetime features. 176 | 177 | ####### Time inputs 178 | 179 | # Concat 180 | rnn_input = torch.cat( 181 | ( 182 | out, 183 | ), 184 | dim=2, 185 | ) 186 | 187 | if self.output_variable == 'pv_yield': 188 | # take the history of the pv yield of this system, 189 | pv_yield_history = x.pv.pv_yield[0 : self.batch_size][:, : self.history_len_5 + 1, 0].unsqueeze(-1).float() 190 | encoder_input = torch.cat((rnn_input[:, : self.history_len_5 + 1], pv_yield_history), dim=2) 191 | elif self.output_variable == 'gsp_yield': 192 | # take the history of the gsp yield of this system, 193 | gsp_history = x.gsp.gsp_yield[0: self.batch_size][:, : self.history_len_30 + 1, 0].unsqueeze(-1).float() 194 | encoder_input = torch.cat((rnn_input[:, : self.history_len_30 + 1], gsp_history), dim=2) 195 | 196 | 197 | encoder_output, encoder_hidden = self.encoder_rnn(encoder_input) 198 | decoder_output, _ = self.decoder_rnn(rnn_input[:, -self.forecast_len :], encoder_hidden) 199 | # decoder_output is shape batch_size, seq_len, rnn_hidden_size 200 | 201 | decoder_output = F.relu(self.decoder_fc1(decoder_output)) 202 | decoder_output = self.decoder_fc2(decoder_output) 203 | 204 | return decoder_output.squeeze(dim=-1) 205 | -------------------------------------------------------------------------------- /predict_pv_yield/netcdf_dataset.py: -------------------------------------------------------------------------------- 1 | ######## 2 | # Moved this to 'nowcasting_dataset' repo - https://github.com/openclimatefix/nowcasting_dataset 3 | ######### 4 | 5 | # import gcsfs 6 | # import os 7 | # import numpy as np 8 | # import xarray as xr 9 | # from nowcasting_dataset import utils as nd_utils 10 | # from nowcasting_dataset import example 11 | # import torch 12 | # 13 | # 14 | # # TODO: Take these from nowcasting_dataset. 15 | # SAT_VARIABLE_NAMES = ( 16 | # 'HRV', 'IR_016', 'IR_039', 'IR_087', 'IR_097', 'IR_108', 'IR_120', 17 | # 'IR_134', 'VIS006', 'VIS008', 'WV_062', 'WV_073') 18 | # 19 | # SAT_MEAN = xr.DataArray( 20 | # data=[ 21 | # 93.23458, 131.71373, 843.7779 , 736.6148 , 771.1189 , 589.66034, 22 | # 862.29816, 927.69586, 90.70885, 107.58985, 618.4583 , 532.47394], 23 | # dims=['sat_variable'], 24 | # coords={'sat_variable': list(SAT_VARIABLE_NAMES)}).astype(np.float32) 25 | # 26 | # SAT_STD = xr.DataArray( 27 | # data=[ 28 | # 115.34247 , 139.92636 , 36.99538 , 57.366386, 30.346825, 29 | # 149.68007 , 51.70631 , 35.872967, 115.77212 , 120.997154, 30 | # 98.57828 , 99.76469], 31 | # dims=['sat_variable'], 32 | # coords={'sat_variable': list(SAT_VARIABLE_NAMES)}).astype(np.float32) 33 | # 34 | # 35 | # 36 | # class NetCDFDataset(torch.utils.data.Dataset): 37 | # """Loads data saved by the `prepare_ml_training_data.py` script.""" 38 | # 39 | # def __init__( 40 | # self, n_batches: int, src_path: str, tmp_path: str): 41 | # """ 42 | # Args: 43 | # n_batches: Number of batches available on disk. 44 | # src_path: The full path (including 'gs://') to the data on 45 | # Google Cloud storage. 46 | # tmp_path: The full path to the local temporary directory 47 | # (on a local filesystem). 48 | # """ 49 | # self.n_batches = n_batches 50 | # self.src_path = src_path 51 | # self.tmp_path = tmp_path 52 | # 53 | # def per_worker_init(self, worker_id: int): 54 | # self.gcs = gcsfs.GCSFileSystem() 55 | # 56 | # def __len__(self): 57 | # return self.n_batches 58 | # 59 | # def __getitem__(self, batch_idx: int) -> example.Example: 60 | # """Returns a whole batch at once. 61 | # 62 | # Args: 63 | # batch_idx: The integer index of the batch. Must be in the range 64 | # [0, self.n_batches). 65 | # 66 | # Returns: 67 | # NamedDict where each value is a numpy array. The size of this 68 | # array's first dimension is the batch size. 69 | # """ 70 | # if not 0 <= batch_idx < self.n_batches: 71 | # raise IndexError( 72 | # 'batch_idx must be in the range' 73 | # f' [0, {self.n_batches}), not {batch_idx}!') 74 | # netcdf_filename = nd_utils.get_netcdf_filename(batch_idx) 75 | # remote_netcdf_filename = os.path.join(self.src_path, netcdf_filename) 76 | # local_netcdf_filename = os.path.join(self.tmp_path, netcdf_filename) 77 | # self.gcs.get(remote_netcdf_filename, local_netcdf_filename) 78 | # netcdf_batch = xr.load_dataset(local_netcdf_filename) 79 | # os.remove(local_netcdf_filename) 80 | # 81 | # batch = example.Example( 82 | # sat_datetime_index=netcdf_batch.sat_time_coords, 83 | # nwp_target_time=netcdf_batch.nwp_time_coords) 84 | # for key in [ 85 | # 'nwp', 'nwp_x_coords', 'nwp_y_coords', 86 | # 'sat_data', 'sat_x_coords', 'sat_y_coords', 87 | # 'pv_yield', 'pv_system_id', 'pv_system_row_number', 88 | # 'pv_system_x_coords', 'pv_system_y_coords', 89 | # 'x_meters_center', 'y_meters_center' 90 | # ] + list(example.DATETIME_FEATURE_NAMES): 91 | # try: 92 | # batch[key] = netcdf_batch[key] 93 | # except KeyError: 94 | # pass 95 | # 96 | # sat_data = batch['sat_data'] 97 | # if sat_data.dtype == np.int16: 98 | # sat_data = sat_data.astype(np.float32) 99 | # sat_data = sat_data - SAT_MEAN 100 | # sat_data /= SAT_STD 101 | # batch['sat_data'] = sat_data 102 | # 103 | # batch = example.to_numpy(batch) 104 | # 105 | # return batch 106 | # 107 | # 108 | # def worker_init_fn(worker_id): 109 | # """Configures each dataset worker process. 110 | # 111 | # Just has one job! To call NowcastingDataset.per_worker_init(). 112 | # """ 113 | # # get_worker_info() returns information specific to each worker process. 114 | # worker_info = torch.utils.data.get_worker_info() 115 | # if worker_info is None: 116 | # print('worker_info is None!') 117 | # else: 118 | # # The NowcastingDataset copy in this worker process. 119 | # dataset_obj = worker_info.dataset 120 | # dataset_obj.per_worker_init(worker_info.id) 121 | -------------------------------------------------------------------------------- /predict_pv_yield/training.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import hydra 4 | from omegaconf import DictConfig 5 | from pytorch_lightning import ( 6 | Callback, 7 | LightningDataModule, 8 | LightningModule, 9 | Trainer, 10 | seed_everything, 11 | ) 12 | from pytorch_lightning.loggers import LightningLoggerBase 13 | 14 | from predict_pv_yield import utils 15 | import torch 16 | 17 | log = utils.get_logger(__name__) 18 | 19 | torch.set_default_dtype(torch.float32) 20 | 21 | 22 | def train(config: DictConfig) -> Optional[float]: 23 | """Contains training pipeline. 24 | Instantiates all PyTorch Lightning objects from config. 25 | 26 | Args: 27 | config (DictConfig): Configuration composed by Hydra. 28 | 29 | Returns: 30 | Optional[float]: Metric score for hyperparameter optimization. 31 | """ 32 | 33 | # Set seed for random number generators in pytorch, numpy and python.random 34 | if "seed" in config: 35 | seed_everything(config.seed, workers=True) 36 | 37 | # Init lightning datamodule 38 | log.info(f"Instantiating datamodule <{config.datamodule._target_}>") 39 | datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule) 40 | 41 | # Init lightning model 42 | log.info(f"Instantiating model <{config.model._target_}>") 43 | model: LightningModule = hydra.utils.instantiate(config.model) 44 | 45 | # Init lightning callbacks 46 | callbacks: List[Callback] = [] 47 | if "callbacks" in config: 48 | for _, cb_conf in config.callbacks.items(): 49 | if "_target_" in cb_conf: 50 | log.info(f"Instantiating callback <{cb_conf._target_}>") 51 | callbacks.append(hydra.utils.instantiate(cb_conf)) 52 | 53 | # Init lightning loggers 54 | logger: List[LightningLoggerBase] = [] 55 | if "logger" in config: 56 | for _, lg_conf in config.logger.items(): 57 | if "_target_" in lg_conf: 58 | log.info(f"Instantiating logger <{lg_conf._target_}>") 59 | logger.append(hydra.utils.instantiate(lg_conf)) 60 | 61 | # Init lightning trainer 62 | log.info(f"Instantiating trainer <{config.trainer._target_}>") 63 | trainer: Trainer = hydra.utils.instantiate( 64 | config.trainer, callbacks=callbacks, logger=logger, _convert_="partial" 65 | ) 66 | 67 | # Send some parameters from config to all lightning loggers 68 | log.info("Logging hyperparameters!") 69 | utils.log_hyperparameters( 70 | config=config, 71 | model=model, 72 | datamodule=datamodule, 73 | trainer=trainer, 74 | callbacks=callbacks, 75 | logger=logger, 76 | ) 77 | 78 | # Train the model 79 | log.info("Starting training!") 80 | if 'validate_only' in config: 81 | trainer.validate(model=model, datamodule=datamodule) 82 | else: 83 | trainer.fit(model=model, datamodule=datamodule) 84 | 85 | # Evaluate model on test set, using the best model achieved during training 86 | if config.get("test_after_training") and not config.trainer.get("fast_dev_run"): 87 | log.info("Starting testing!") 88 | trainer.test() 89 | 90 | # Make sure everything closed properly 91 | log.info("Finalizing!") 92 | utils.finish( 93 | config=config, 94 | model=model, 95 | datamodule=datamodule, 96 | trainer=trainer, 97 | callbacks=callbacks, 98 | logger=logger, 99 | ) 100 | 101 | # Print path to best checkpoint 102 | log.info(f"Best checkpoint path:\n{trainer.checkpoint_callback.best_model_path}") 103 | 104 | # Return metric score for hyperparameter optimization 105 | optimized_metric = config.get("optimized_metric") 106 | if optimized_metric: 107 | return trainer.callback_metrics[optimized_metric] 108 | -------------------------------------------------------------------------------- /predict_pv_yield/utils.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | import os 3 | import predict_pv_yield 4 | import logging 5 | import os 6 | import warnings 7 | from typing import List, Sequence 8 | 9 | import pytorch_lightning as pl 10 | import rich.syntax 11 | import rich.tree 12 | from omegaconf import DictConfig, OmegaConf 13 | from pytorch_lightning.utilities import rank_zero_only 14 | 15 | 16 | def load_config(config_file): 17 | """ 18 | Open yam configruation file, and get rid eof '_target_' line 19 | """ 20 | 21 | # get full path of config file 22 | path = os.path.dirname(predict_pv_yield.__file__) 23 | config_file = f"{path}/../{config_file}" 24 | 25 | 26 | with open(config_file, "r") as cfg: 27 | config = yaml.load(cfg, Loader=yaml.FullLoader) 28 | 29 | if "_target_" in config.keys(): 30 | config.pop("_target_") # This is only for Hydra 31 | 32 | return config 33 | 34 | 35 | def get_logger(name=__name__, level=logging.INFO) -> logging.Logger: 36 | """Initializes multi-GPU-friendly python logger.""" 37 | 38 | logger = logging.getLogger(name) 39 | logger.setLevel(level) 40 | 41 | # this ensures all logging levels get marked with the rank zero decorator 42 | # otherwise logs would get multiplied for each GPU process in multi-GPU setup 43 | for level in ("debug", "info", "warning", "error", "exception", "fatal", "critical"): 44 | setattr(logger, level, rank_zero_only(getattr(logger, level))) 45 | 46 | return logger 47 | 48 | 49 | def extras(config: DictConfig) -> None: 50 | """A couple of optional utilities, controlled by main config file: 51 | - disabling warnings 52 | - easier access to debug mode 53 | - forcing debug friendly configuration 54 | 55 | Modifies DictConfig in place. 56 | 57 | Args: 58 | config (DictConfig): Configuration composed by Hydra. 59 | """ 60 | 61 | log = get_logger() 62 | 63 | # enable adding new keys to config 64 | OmegaConf.set_struct(config, False) 65 | 66 | # disable python warnings if 67 | if config.get("ignore_warnings"): 68 | log.info("Disabling python warnings! ") 69 | warnings.filterwarnings("ignore") 70 | 71 | # set if 72 | if config.get("debug"): 73 | log.info("Running in debug mode! ") 74 | config.trainer.fast_dev_run = True 75 | 76 | # force debugger friendly configuration if 77 | if config.trainer.get("fast_dev_run"): 78 | log.info("Forcing debugger friendly configuration! ") 79 | # Debuggers don't like GPUs or multiprocessing 80 | if config.trainer.get("gpus"): 81 | config.trainer.gpus = 0 82 | if config.datamodule.get("pin_memory"): 83 | config.datamodule.pin_memory = False 84 | if config.datamodule.get("num_workers"): 85 | config.datamodule.num_workers = 0 86 | 87 | # disable adding new keys to config 88 | OmegaConf.set_struct(config, True) 89 | 90 | 91 | @rank_zero_only 92 | def print_config( 93 | config: DictConfig, 94 | fields: Sequence[str] = ( 95 | "trainer", 96 | "model", 97 | "datamodule", 98 | "callbacks", 99 | "logger", 100 | "seed", 101 | ), 102 | resolve: bool = True, 103 | ) -> None: 104 | """Prints content of DictConfig using Rich library and its tree structure. 105 | 106 | Args: 107 | config (DictConfig): Configuration composed by Hydra. 108 | fields (Sequence[str], optional): Determines which main fields from config will 109 | be printed and in what order. 110 | resolve (bool, optional): Whether to resolve reference fields of DictConfig. 111 | """ 112 | 113 | style = "dim" 114 | tree = rich.tree.Tree("CONFIG", style=style, guide_style=style) 115 | 116 | for field in fields: 117 | branch = tree.add(field, style=style, guide_style=style) 118 | 119 | config_section = config.get(field) 120 | branch_content = str(config_section) 121 | if isinstance(config_section, DictConfig): 122 | branch_content = OmegaConf.to_yaml(config_section, resolve=resolve) 123 | 124 | branch.add(rich.syntax.Syntax(branch_content, "yaml")) 125 | 126 | rich.print(tree) 127 | 128 | with open("config_tree.txt", "w") as fp: 129 | rich.print(tree, file=fp) 130 | 131 | 132 | def empty(*args, **kwargs): 133 | pass 134 | 135 | 136 | @rank_zero_only 137 | def log_hyperparameters( 138 | config: DictConfig, 139 | model: pl.LightningModule, 140 | datamodule: pl.LightningDataModule, 141 | trainer: pl.Trainer, 142 | callbacks: List[pl.Callback], 143 | logger: List[pl.loggers.LightningLoggerBase], 144 | ) -> None: 145 | """This method controls which parameters from Hydra config are saved by Lightning loggers. 146 | 147 | Additionaly saves: 148 | - number of trainable model parameters 149 | """ 150 | 151 | hparams = {} 152 | 153 | # choose which parts of hydra config will be saved to loggers 154 | hparams["trainer"] = config["trainer"] 155 | hparams["model"] = config["model"] 156 | hparams["datamodule"] = config["datamodule"] 157 | if "seed" in config: 158 | hparams["seed"] = config["seed"] 159 | if "callbacks" in config: 160 | hparams["callbacks"] = config["callbacks"] 161 | 162 | # save number of model parameters 163 | hparams["model/params_total"] = sum(p.numel() for p in model.parameters()) 164 | hparams["model/params_trainable"] = sum( 165 | p.numel() for p in model.parameters() if p.requires_grad 166 | ) 167 | hparams["model/params_not_trainable"] = sum( 168 | p.numel() for p in model.parameters() if not p.requires_grad 169 | ) 170 | 171 | # send hparams to all loggers 172 | trainer.logger.log_hyperparams(hparams) 173 | 174 | # disable logging any more hyperparameters for all loggers 175 | # this is just a trick to prevent trainer from logging hparams of model, 176 | # since we already did that above 177 | trainer.logger.log_hyperparams = empty 178 | 179 | 180 | def finish( 181 | config: DictConfig, 182 | model: pl.LightningModule, 183 | datamodule: pl.LightningDataModule, 184 | trainer: pl.Trainer, 185 | callbacks: List[pl.Callback], 186 | logger: List[pl.loggers.LightningLoggerBase], 187 | ) -> None: 188 | """Makes sure everything closed properly.""" 189 | 190 | # without this sweeps with wandb logger might crash! 191 | for lg in logger: 192 | if isinstance(lg, pl.loggers.wandb.WandbLogger): 193 | import wandb 194 | 195 | wandb.finish() 196 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nowcasting_dataset 2 | nowcasting_utils 3 | nowcasting_dataloader 4 | numpy 5 | pandas 6 | matplotlib 7 | xarray 8 | ipykernel 9 | h5netcdf 10 | torch 11 | pytorch-lightning 12 | perceiver_pytorch 13 | pytest 14 | pytest-cov 15 | flake8 16 | jedi 17 | tables 18 | tilemapbase 19 | neptune-client[pytorch-lightning] 20 | rich 21 | omegaconf 22 | hydra-core 23 | python-dotenv 24 | hydra-optuna-sweeper 25 | black 26 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["HYDRA_FULL_ERROR"] = "1" 4 | import dotenv 5 | import hydra 6 | from omegaconf import DictConfig 7 | 8 | # this file can be run for example using 9 | # python run.py experiment=example_simple 10 | 11 | # load environment variables from `.env` file if it exists 12 | # recursively searches for `.env` in all folders starting from work dir 13 | dotenv.load_dotenv(override=True) 14 | 15 | 16 | @hydra.main(config_path="configs/", config_name="config.yaml") 17 | def main(config: DictConfig): 18 | 19 | # Imports should be nested inside @hydra.main to optimize tab completion 20 | # Read more here: https://github.com/facebookresearch/hydra/issues/934 21 | from predict_pv_yield.utils import extras, print_config 22 | from predict_pv_yield.training import train 23 | 24 | # A couple of optional utilities: 25 | # - disabling python warnings 26 | # - easier access to debug mode 27 | # - forcing debug friendly configuration 28 | # - forcing multi-gpu friendly configuration 29 | # You can safely get rid of this line if you don't want those 30 | extras(config) 31 | 32 | # 33 | 34 | # Pretty print config using Rich library 35 | if config.get("print_config"): 36 | print_config(config, resolve=True) 37 | 38 | # Train model 39 | return train(config) 40 | 41 | 42 | if __name__ == "__main__": 43 | main() 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='predict_pv_yield', 5 | version='0.1', 6 | packages=find_packages()) 7 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openclimatefix/predict_pv_yield/3fcdf110808b29220ac0f0931e26dc694366bffa/tests/__init__.py -------------------------------------------------------------------------------- /tests/configs/dataset/configuration.yaml: -------------------------------------------------------------------------------- 1 | general: 2 | description: example configuration 3 | name: example 4 | git: null 5 | input_data: 6 | gsp: 7 | gsp_zarr_path: tests/data/gsp/test.zarr 8 | nwp: 9 | nwp_channels: 10 | - t 11 | nwp_image_size_pixels: 2 12 | nwp_zarr_path: tests/data/nwp_data/test.zarr 13 | pv: 14 | pv_filename: tests/data/pv_data/test.nc 15 | pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv 16 | satellite: 17 | satellite_channels: 18 | - HRV 19 | satellite_image_size_pixels: 64 20 | satellite_zarr_path: tests/data/sat_data.zarr 21 | hrvsatellite: 22 | hrvsatellite_channels: 23 | - HRV 24 | hrvsatellite_image_size_pixels: 64 25 | hrvsatellite_zarr_path: tests/data/sat_data.zarr 26 | opticalflow: 27 | opticalflow_zarr_path: /mnt/storage_ssd_8tb/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/satellite/EUMETSAT/SEVIRI_RSS/zarr/v3/eumetsat_seviri_uk.zarr 28 | opticalflow_input_image_size_pixels: 94 29 | opticalflow_output_image_size_pixels: 24 30 | opticalflow_source_data_source_class_name: SatelliteDataSource 31 | opticalflow_channels: 32 | - IR_016 33 | sun: 34 | sun_zarr_path: tests/data/sun/test.zarr 35 | topographic: 36 | topographic_filename: tests/data/europe_dem_2km_osgb.tif 37 | default_history_minutes: 120 38 | default_forecast_minutes: 30 39 | output_data: 40 | filepath: not used by unittests! 41 | process: 42 | batch_size: 2 43 | local_temp_path: ~/temp/ 44 | seed: 1234 45 | upload_every_n_batches: 16 46 | -------------------------------------------------------------------------------- /tests/configs/experiment/example_simple.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # to execute this experiment run: 4 | # python run.py experiment=example_simple.yaml 5 | 6 | defaults: 7 | - override /trainer: default.yaml # choose trainer from 'configs/trainer/' 8 | - override /model: basline.yaml 9 | - override /datamodule: netcdf_datamodule.yaml 10 | - override /callbacks: default.yaml 11 | - override /logger: neptune.yaml 12 | 13 | # all parameters below will be merged with parameters from default configurations set above 14 | # this allows you to overwrite only specified parameters 15 | 16 | seed: 518 17 | 18 | trainer: 19 | min_epochs: 1 20 | max_epochs: 2 21 | 22 | datamodule: 23 | n_train_data: 2 24 | n_val_data: 2 25 | 26 | 27 | model: 28 | conv3d_channels: 4 29 | -------------------------------------------------------------------------------- /tests/configs/model/conv3d.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model.Model 2 | 3 | include_pv_yield: False 4 | include_nwp: False 5 | forecast_minutes: 60 6 | history_minutes: 60 7 | number_of_conv3d_layers: 4 8 | conv3d_channels: 32 9 | image_size_pixels: 16 # this is made smaller for testing 10 | number_sat_channels: 11 11 | fc1_output_features: 16 12 | fc2_output_features: 16 13 | fc3_output_features: 16 14 | -------------------------------------------------------------------------------- /tests/configs/model/conv3d_gsp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model.Model 2 | 3 | include_pv_yield: False 4 | include_nwp: False 5 | forecast_minutes: 60 6 | history_minutes: 60 7 | number_of_conv3d_layers: 4 8 | conv3d_channels: 32 9 | image_size_pixels: 16 # this is made smaller for testing 10 | number_sat_channels: 11 11 | fc1_output_features: 16 12 | fc2_output_features: 16 13 | fc3_output_features: 16 14 | output_variable: gsp_yield 15 | -------------------------------------------------------------------------------- /tests/configs/model/conv3d_nwp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model.Model 2 | 3 | include_pv_or_gsp_yield_history: False 4 | include_nwp: True 5 | forecast_minutes: 60 6 | history_minutes: 60 7 | number_of_conv3d_layers: 4 8 | conv3d_channels: 32 9 | nwp_image_size_pixels: 16 10 | number_nwp_channels: 10 11 | fc1_output_features: 16 12 | fc2_output_features: 16 13 | fc3_output_features: 16 14 | output_variable: gsp_yield 15 | include_pv_yield_history: True 16 | -------------------------------------------------------------------------------- /tests/configs/model/conv3d_sat_nwp.yaml: -------------------------------------------------------------------------------- 1 | _target_: predict_pv_yield.models.conv3d.model.Model 2 | 3 | include_pv_or_gsp_yield_history: False 4 | include_nwp: True 5 | forecast_minutes: 60 6 | history_minutes: 60 7 | number_of_conv3d_layers: 4 8 | conv3d_channels: 32 9 | image_size_pixels: 16 # this is made smaller for testing 10 | nwp_image_size_pixels: 16 11 | number_sat_channels: 11 12 | number_nwp_channels: 10 13 | fc1_output_features: 16 14 | fc2_output_features: 16 15 | fc3_output_features: 16 16 | output_variable: gsp_yield 17 | include_pv_yield_history: True 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from nowcasting_dataset.config.model import Configuration 4 | from predict_pv_yield.utils import load_config 5 | 6 | 7 | @pytest.fixture() 8 | def configuration(): 9 | configuration = Configuration() 10 | configuration.input_data = configuration.input_data.set_all_to_defaults() 11 | configuration.process.batch_size = 2 12 | configuration.input_data.default_history_minutes = 30 13 | configuration.input_data.default_forecast_minutes = 60 14 | configuration.input_data.nwp.nwp_image_size_pixels = 16 15 | 16 | return configuration 17 | 18 | 19 | @pytest.fixture() 20 | def configuration_conv3d(): 21 | 22 | config_file = "tests/configs/model/conv3d.yaml" 23 | config = load_config(config_file) 24 | 25 | dataset_configuration = Configuration() 26 | dataset_configuration.process.batch_size = 2 27 | dataset_configuration.input_data.default_history_minutes = config['history_minutes'] 28 | dataset_configuration.input_data.default_forecast_minutes = config['forecast_minutes'] 29 | dataset_configuration.input_data = dataset_configuration.input_data.set_all_to_defaults() 30 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 2 31 | dataset_configuration.input_data.satellite.satellite_image_size_pixels = config['image_size_pixels'] 32 | dataset_configuration.input_data.satellite.forecast_minutes = config['forecast_minutes'] 33 | dataset_configuration.input_data.satellite.history_minutes = config['history_minutes'] 34 | 35 | return dataset_configuration 36 | 37 | 38 | @pytest.fixture() 39 | def configuration_perceiver(): 40 | 41 | dataset_configuration = Configuration() 42 | dataset_configuration.input_data = dataset_configuration.input_data.set_all_to_defaults() 43 | dataset_configuration.process.batch_size = 2 44 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 45 | dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16 46 | dataset_configuration.input_data.default_history_minutes = 30 47 | dataset_configuration.input_data.default_forecast_minutes = 120 48 | dataset_configuration.input_data.nwp.nwp_channels = dataset_configuration.input_data.nwp.nwp_channels[0:10] 49 | 50 | return dataset_configuration 51 | -------------------------------------------------------------------------------- /tests/models/baseline/test_baseline_model.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.baseline.last_value import Model 2 | import torch 3 | import pytorch_lightning as pl 4 | from nowcasting_dataloader.fake import FakeDataset 5 | from nowcasting_dataset.config.model import Configuration 6 | 7 | 8 | def test_init(): 9 | 10 | _ = Model() 11 | 12 | 13 | def test_model_forward(configuration): 14 | 15 | # start model 16 | model = Model(forecast_minutes=configuration.input_data.default_forecast_minutes) 17 | 18 | # create fake data loader 19 | train_dataset = FakeDataset(configuration=configuration) 20 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 21 | 22 | # satellite data 23 | x = next(iter(train_dataloader)) 24 | 25 | # run data through model 26 | y = model(x) 27 | 28 | # check out put is the correct shape 29 | assert len(y.shape) == 2 30 | assert y.shape[0] == configuration.process.batch_size 31 | assert y.shape[1] == configuration.input_data.default_forecast_minutes // 5 32 | 33 | 34 | def test_trainer(configuration): 35 | 36 | # start model 37 | model = Model(forecast_minutes=configuration.input_data.default_forecast_minutes) 38 | 39 | # create fake data loader 40 | train_dataset = FakeDataset(configuration=configuration) 41 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 42 | 43 | # set up trainer 44 | trainer = pl.Trainer(gpus=0, max_epochs=1) 45 | 46 | # test over training set 47 | _ = trainer.test(model, train_dataloader) 48 | -------------------------------------------------------------------------------- /tests/models/baseline/test_baseline_model_gsp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.baseline.last_value import Model 2 | import torch 3 | import pytorch_lightning as pl 4 | import pandas as pd 5 | from nowcasting_dataloader.fake import FakeDataset 6 | from nowcasting_dataset.config.model import Configuration 7 | import tempfile 8 | 9 | 10 | 11 | def test_init(): 12 | 13 | _ = Model(output_variable="gsp_yield") 14 | 15 | 16 | def test_model_forward(configuration): 17 | 18 | # start model 19 | model = Model( 20 | forecast_minutes=configuration.input_data.default_forecast_minutes, 21 | history_minutes=configuration.input_data.default_history_minutes, 22 | output_variable="gsp_yield", 23 | ) 24 | 25 | # create fake data loader 26 | train_dataset = FakeDataset(configuration=configuration) 27 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 28 | 29 | # satellite data 30 | x = next(iter(train_dataloader)) 31 | 32 | # run data through model 33 | y = model(x) 34 | 35 | # check out put is the correct shape 36 | assert len(y.shape) == 2 37 | assert y.shape[0] == configuration.process.batch_size 38 | assert y.shape[1] == configuration.input_data.default_forecast_minutes // 30 39 | 40 | 41 | def test_model_validation(configuration): 42 | 43 | # start model 44 | model = Model( 45 | forecast_minutes=configuration.input_data.default_forecast_minutes, 46 | history_minutes=configuration.input_data.default_history_minutes, 47 | output_variable="gsp_yield", 48 | ) 49 | 50 | # create fake data loader 51 | train_dataset = FakeDataset(configuration=configuration) 52 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 53 | 54 | # satellite data 55 | x = next(iter(train_dataloader)) 56 | 57 | # run data through model 58 | model.validation_step(x, 0) 59 | 60 | 61 | def test_trainer(configuration): 62 | 63 | # start model 64 | model = Model( 65 | forecast_minutes=configuration.input_data.default_forecast_minutes, 66 | history_minutes=configuration.input_data.default_history_minutes, 67 | output_variable="gsp_yield", 68 | ) 69 | 70 | # create fake data loader 71 | train_dataset = FakeDataset(configuration=configuration) 72 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 73 | 74 | # set up trainer 75 | trainer = pl.Trainer(gpus=0, max_epochs=1) 76 | 77 | # test over training set 78 | _ = trainer.test(model, train_dataloader) 79 | 80 | 81 | def test_trainer_validation(configuration): 82 | 83 | # start model 84 | model = Model( 85 | forecast_minutes=configuration.input_data.default_forecast_minutes, 86 | history_minutes=configuration.input_data.default_history_minutes, 87 | output_variable="gsp_yield", 88 | ) 89 | 90 | # create fake data loader 91 | train_dataset = FakeDataset(configuration=configuration) 92 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 93 | 94 | # set up trainer 95 | trainer = pl.Trainer(gpus=0, max_epochs=1) 96 | 97 | with tempfile.TemporaryDirectory() as tmpdirname: 98 | model.results_file_name = f'{tmpdirname}/temp' 99 | 100 | # test over validation set 101 | _ = trainer.validate(model, train_dataloader) 102 | 103 | # check csv file of validation results has been made 104 | results_df = pd.read_csv(f'{model.results_file_name}_0.csv') 105 | 106 | assert len(results_df) == len(train_dataloader) * configuration.process.batch_size * model.forecast_len_30 107 | assert 't0_datetime_utc' in results_df.keys() 108 | assert 'target_datetime_utc' in results_df.keys() 109 | assert 'gsp_id' in results_df.keys() 110 | assert "actual_gsp_pv_outturn_mw" in results_df.keys() 111 | assert "forecast_gsp_pv_outturn_mw" in results_df.keys() 112 | -------------------------------------------------------------------------------- /tests/models/conv3d/test_conv3d_model.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model import Model 2 | import torch 3 | import pytorch_lightning as pl 4 | from predict_pv_yield.utils import load_config 5 | from nowcasting_dataloader.fake import FakeDataset 6 | from nowcasting_dataset.config.model import Configuration 7 | 8 | 9 | 10 | def test_init(): 11 | 12 | config_file = "configs/model/conv3d.yaml" 13 | config = load_config(config_file) 14 | 15 | _ = Model(**config) 16 | 17 | 18 | def test_model_forward(configuration_conv3d): 19 | 20 | config_file = "tests/configs/model/conv3d.yaml" 21 | config = load_config(config_file) 22 | 23 | dataset_configuration = configuration_conv3d 24 | 25 | # start model 26 | model = Model(**config) 27 | 28 | # create fake data loader 29 | train_dataset = FakeDataset(configuration=dataset_configuration) 30 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 31 | x = next(iter(train_dataloader)) 32 | 33 | # run data through model 34 | y = model(x) 35 | 36 | # check out put is the correct shape 37 | assert len(y.shape) == 2 38 | assert y.shape[0] == 2 39 | assert y.shape[1] == model.forecast_len_5 40 | 41 | 42 | def test_train(configuration_conv3d): 43 | 44 | config_file = "tests/configs/model/conv3d.yaml" 45 | config = load_config(config_file) 46 | 47 | dataset_configuration = configuration_conv3d 48 | 49 | # start model 50 | model = Model(**config) 51 | 52 | # create fake data loader 53 | train_dataset = FakeDataset(configuration=dataset_configuration) 54 | train_dataset.length=2 55 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 56 | 57 | # fit model 58 | trainer = pl.Trainer(gpus=0, max_epochs=1) 59 | trainer.fit(model, train_dataloader) 60 | 61 | # predict over training set 62 | _ = trainer.predict(model, train_dataloader) 63 | -------------------------------------------------------------------------------- /tests/models/conv3d/test_conv3d_model_gsp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model import Model 2 | import torch 3 | import pytorch_lightning as pl 4 | from predict_pv_yield.utils import load_config 5 | from nowcasting_dataloader.fake import FakeDataset 6 | from nowcasting_dataset.config.model import Configuration 7 | 8 | 9 | def test_init(): 10 | 11 | config_file = "tests/configs/model/conv3d_gsp.yaml" 12 | config = load_config(config_file) 13 | 14 | _ = Model(**config) 15 | 16 | 17 | def test_model_forward(configuration_conv3d): 18 | 19 | config_file = "tests/configs/model/conv3d_gsp.yaml" 20 | config = load_config(config_file) 21 | 22 | dataset_configuration = configuration_conv3d 23 | 24 | # start model 25 | model = Model(**config) 26 | 27 | # create fake data loader 28 | train_dataset = FakeDataset(configuration=dataset_configuration) 29 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 30 | x = next(iter(train_dataloader)) 31 | 32 | # run data through model 33 | y = model(x) 34 | 35 | # check out put is the correct shape 36 | assert len(y.shape) == 2 37 | assert y.shape[0] == 2 38 | assert y.shape[1] == model.forecast_len_30 39 | 40 | 41 | def test_train(configuration_conv3d): 42 | 43 | config_file = "tests/configs/model/conv3d_gsp.yaml" 44 | config = load_config(config_file) 45 | 46 | dataset_configuration = configuration_conv3d 47 | 48 | # start model 49 | model = Model(**config) 50 | 51 | # create fake data loader 52 | train_dataset = FakeDataset(configuration=dataset_configuration) 53 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 54 | 55 | # fit model 56 | trainer = pl.Trainer(gpus=0, max_epochs=1) 57 | trainer.fit(model, train_dataloader) 58 | 59 | # predict over training set 60 | _ = trainer.predict(model, train_dataloader) 61 | -------------------------------------------------------------------------------- /tests/models/conv3d/test_conv3d_model_nwp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model_nwp import Model 2 | import torch 3 | import pytorch_lightning as pl 4 | from predict_pv_yield.utils import load_config 5 | from nowcasting_dataloader.fake import FakeDataset 6 | from nowcasting_dataset.config.model import Configuration 7 | 8 | 9 | def test_init(): 10 | 11 | config_file = "tests/configs/model/conv3d_nwp.yaml" 12 | config = load_config(config_file) 13 | 14 | _ = Model(**config) 15 | 16 | 17 | def test_model_forward(configuration_conv3d): 18 | 19 | config_file = "tests/configs/model/conv3d_nwp.yaml" 20 | config = load_config(config_file) 21 | 22 | # start model 23 | model = Model(**config) 24 | 25 | dataset_configuration = configuration_conv3d 26 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 27 | 28 | # create fake data loader 29 | train_dataset = FakeDataset(configuration=dataset_configuration) 30 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 31 | x = next(iter(train_dataloader)) 32 | 33 | # run data through model 34 | y = model(x) 35 | 36 | # check out put is the correct shape 37 | assert len(y.shape) == 2 38 | assert y.shape[0] == 2 39 | assert y.shape[1] == model.forecast_len_30 40 | 41 | 42 | def test_model_forward_no_satellite(configuration_conv3d): 43 | 44 | config_file = "tests/configs/model/conv3d_nwp.yaml" 45 | config = load_config(config_file) 46 | config['include_future_satellite'] = False 47 | 48 | # start model 49 | model = Model(**config) 50 | 51 | dataset_configuration = configuration_conv3d 52 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 53 | 54 | # create fake data loader 55 | train_dataset = FakeDataset(configuration=dataset_configuration) 56 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 57 | x = next(iter(train_dataloader)) 58 | 59 | # run data through model 60 | y = model(x) 61 | 62 | # check out put is the correct shape 63 | assert len(y.shape) == 2 64 | assert y.shape[0] == 2 65 | assert y.shape[1] == model.forecast_len_30 66 | 67 | 68 | def test_train(configuration_conv3d): 69 | 70 | config_file = "tests/configs/model/conv3d_nwp.yaml" 71 | config = load_config(config_file) 72 | 73 | dataset_configuration = configuration_conv3d 74 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 75 | 76 | # start model 77 | model = Model(**config) 78 | 79 | # create fake data loader 80 | train_dataset = FakeDataset(configuration=dataset_configuration) 81 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 82 | 83 | # fit model 84 | trainer = pl.Trainer(gpus=0, max_epochs=1) 85 | trainer.fit(model, train_dataloader) 86 | 87 | # predict over training set 88 | _ = trainer.predict(model, train_dataloader) 89 | -------------------------------------------------------------------------------- /tests/models/conv3d/test_conv3d_model_sat_nwp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.conv3d.model_sat_nwp import Model 2 | import torch 3 | import pytorch_lightning as pl 4 | from predict_pv_yield.utils import load_config 5 | from nowcasting_dataloader.fake import FakeDataset 6 | from nowcasting_dataset.config.model import Configuration 7 | 8 | 9 | def test_init(): 10 | 11 | config_file = "tests/configs/model/conv3d_sat_nwp.yaml" 12 | config = load_config(config_file) 13 | 14 | _ = Model(**config) 15 | 16 | 17 | def test_model_forward(configuration_conv3d): 18 | 19 | config_file = "tests/configs/model/conv3d_sat_nwp.yaml" 20 | config = load_config(config_file) 21 | 22 | # start model 23 | model = Model(**config) 24 | 25 | dataset_configuration = configuration_conv3d 26 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 27 | 28 | # create fake data loader 29 | train_dataset = FakeDataset(configuration=dataset_configuration) 30 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 31 | x = next(iter(train_dataloader)) 32 | 33 | # run data through model 34 | y = model(x) 35 | 36 | # check out put is the correct shape 37 | assert len(y.shape) == 2 38 | assert y.shape[0] == 2 39 | assert y.shape[1] == model.forecast_len_30 40 | 41 | 42 | def test_model_forward_no_satellite(configuration_conv3d): 43 | 44 | config_file = "tests/configs/model/conv3d_sat_nwp.yaml" 45 | config = load_config(config_file) 46 | config['include_future_satellite'] = False 47 | 48 | # start model 49 | model = Model(**config) 50 | 51 | dataset_configuration = configuration_conv3d 52 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 53 | 54 | # create fake data loader 55 | train_dataset = FakeDataset(configuration=dataset_configuration) 56 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 57 | x = next(iter(train_dataloader)) 58 | 59 | # run data through model 60 | y = model(x) 61 | 62 | # check out put is the correct shape 63 | assert len(y.shape) == 2 64 | assert y.shape[0] == 2 65 | assert y.shape[1] == model.forecast_len_30 66 | 67 | 68 | def test_train(configuration_conv3d): 69 | 70 | config_file = "tests/configs/model/conv3d_sat_nwp.yaml" 71 | config = load_config(config_file) 72 | 73 | dataset_configuration = configuration_conv3d 74 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 75 | 76 | # start model 77 | model = Model(**config) 78 | 79 | # create fake data loader 80 | train_dataset = FakeDataset(configuration=dataset_configuration) 81 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 82 | 83 | # fit model 84 | trainer = pl.Trainer(gpus=0, max_epochs=1) 85 | trainer.fit(model, train_dataloader) 86 | 87 | # predict over training set 88 | _ = trainer.predict(model, train_dataloader) 89 | -------------------------------------------------------------------------------- /tests/models/perceiver/test_perceiver.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.perceiver.perceiver import PerceiverModel, params 2 | from nowcasting_dataloader.fake import FakeDataset 3 | import torch 4 | from nowcasting_dataset.config.model import Configuration 5 | 6 | 7 | def test_init_model(): 8 | """Initilize the model""" 9 | _ = PerceiverModel(history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"]) 10 | 11 | 12 | def test_model_forward(configuration_perceiver): 13 | 14 | dataset_configuration = configuration_perceiver 15 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 64 16 | dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16 17 | 18 | model = PerceiverModel( 19 | history_minutes=30, 20 | forecast_minutes=60, 21 | nwp_channels=params["nwp_channels"], 22 | embedding_dem=2048 23 | ) # doesnt do anything 24 | 25 | # set up fake data 26 | train_dataset = FakeDataset(configuration=dataset_configuration) 27 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 28 | # get data 29 | x = next(iter(train_dataloader)) 30 | 31 | # run data through model 32 | y = model(x) 33 | 34 | # check out put is the correct shape 35 | assert len(y.shape) == 2 36 | assert y.shape[0] == dataset_configuration.process.batch_size 37 | assert y.shape[1] == 60 // 5 38 | -------------------------------------------------------------------------------- /tests/models/perceiver/test_perceiver_conv3d_sat_nwp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.perceiver.perceiver_conv3d_nwp_sat import Model, params, TOTAL_SEQ_LEN 2 | from predict_pv_yield.data.dataloader import FakeDataset 3 | import torch 4 | from nowcasting_dataset.config.model import Configuration 5 | 6 | 7 | 8 | 9 | def test_init_model(): 10 | """Initilize the model""" 11 | _ = Model( 12 | history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"], output_variable="gsp_yield" 13 | ) 14 | 15 | 16 | def test_model_forward(configuration_perceiver): 17 | 18 | dataset_configuration = configuration_perceiver 19 | 20 | model = Model( 21 | history_minutes=30, 22 | forecast_minutes=60, 23 | nwp_channels=params["nwp_channels"], 24 | output_variable="gsp_yield", 25 | ) # doesnt do anything 26 | 27 | # set up fake data 28 | train_dataset = FakeDataset(configuration=dataset_configuration) 29 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 30 | # get data 31 | x = next(iter(train_dataloader)) 32 | 33 | # run data through model 34 | y = model(x) 35 | 36 | # check out put is the correct shape 37 | assert len(y.shape) == 2 38 | assert y.shape[0] == dataset_configuration.process.batch_size 39 | assert y.shape[1] == 60 // 30 40 | 41 | 42 | def test_model_forward_no_forward_satelite(configuration_perceiver): 43 | 44 | dataset_configuration = configuration_perceiver 45 | 46 | model = Model( 47 | history_minutes=30, 48 | forecast_minutes=60, 49 | nwp_channels=params["nwp_channels"], 50 | output_variable="gsp_yield", 51 | use_future_satellite_images=False 52 | ) # doesnt do anything 53 | 54 | train_dataset = FakeDataset(configuration=dataset_configuration) 55 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 56 | # get data 57 | x = next(iter(train_dataloader)) 58 | 59 | # run data through model 60 | y = model(x) 61 | 62 | # check out put is the correct shape 63 | assert len(y.shape) == 2 64 | assert y.shape[0] == dataset_configuration.process.batch_size 65 | assert y.shape[1] == 60 // 30 66 | -------------------------------------------------------------------------------- /tests/models/perceiver/test_perceiver_gsp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.perceiver.perceiver import PerceiverModel, params 2 | from predict_pv_yield.data.dataloader import FakeDataset 3 | from nowcasting_dataset.config.model import Configuration 4 | import torch 5 | 6 | 7 | def test_init_model(): 8 | """Initilize the model""" 9 | _ = PerceiverModel( 10 | history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"], output_variable="gsp_yield" 11 | ) 12 | 13 | 14 | def test_model_forward(configuration_perceiver): 15 | 16 | dataset_configuration = configuration_perceiver 17 | dataset_configuration.process.batch_size = 2 18 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 64 19 | dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16 20 | 21 | model = PerceiverModel( 22 | history_minutes=30, 23 | forecast_minutes=60, 24 | nwp_channels=params["nwp_channels"], 25 | output_variable="gsp_yield", 26 | ) # doesnt do anything 27 | 28 | # set up fake data 29 | train_dataset = FakeDataset(configuration=dataset_configuration) 30 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 31 | # get data 32 | x = next(iter(train_dataloader)) 33 | 34 | # run data through model 35 | y = model(x) 36 | 37 | # check out put is the correct shape 38 | assert len(y.shape) == 2 39 | assert y.shape[0] == dataset_configuration.process.batch_size 40 | assert y.shape[1] == 60 // 30 41 | -------------------------------------------------------------------------------- /tests/models/perceiver/test_perceiver_sat_nwp.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.models.perceiver.perceiver_nwp_sat import Model, params, TOTAL_SEQ_LEN 2 | from predict_pv_yield.data.dataloader import FakeDataset 3 | import torch 4 | 5 | 6 | def test_init_model(): 7 | """Initilize the model""" 8 | _ = Model( 9 | history_minutes=3, forecast_minutes=3, nwp_channels=params["nwp_channels"], output_variable="gsp_yield" 10 | ) 11 | 12 | 13 | def test_model_forward(configuration_perceiver): 14 | 15 | dataset_configuration = configuration_perceiver 16 | dataset_configuration.process.batch_size = 2 17 | dataset_configuration.input_data.nwp.nwp_image_size_pixels = 16 18 | dataset_configuration.input_data.satellite.satellite_image_size_pixels = 16 19 | 20 | model = Model( 21 | history_minutes=30, 22 | forecast_minutes=60, 23 | nwp_channels=params["nwp_channels"], 24 | output_variable="gsp_yield", 25 | ) # doesnt do anything 26 | 27 | batch_size = 2 28 | # set up fake data 29 | train_dataset = FakeDataset(configuration=dataset_configuration) 30 | train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=None) 31 | # get data 32 | x = next(iter(train_dataloader)) 33 | 34 | # run data through model 35 | y = model(x) 36 | 37 | # check out put is the correct shape 38 | assert len(y.shape) == 2 39 | assert y.shape[0] == batch_size 40 | assert y.shape[1] == 60 // 30 41 | -------------------------------------------------------------------------------- /tests/test_training.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.training import train 2 | import os 3 | 4 | from hydra import compose, initialize 5 | import tilemapbase 6 | 7 | 8 | def test_train(): 9 | 10 | os.environ["NEPTUNE_API_TOKEN"] = "not_a_token" 11 | 12 | # for Github actions need to create this 13 | tilemapbase.init(create=True) 14 | 15 | initialize(config_path="../configs", job_name="test_app") 16 | config = compose( 17 | config_name="config", 18 | overrides=["logger=csv", 19 | "experiment=example_simple", 20 | "datamodule.fake_data=true", 21 | "datamodule.data_path=tests/configs/dataset", 22 | "trainer.fast_dev_run=true", ], 23 | ) 24 | 25 | train(config=config) 26 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from predict_pv_yield.utils import extras, print_config 2 | 3 | from hydra import compose, initialize 4 | import hydra 5 | 6 | import os 7 | 8 | 9 | def test_utils(): 10 | """ 11 | Test that util functions work. This just runs them. Perhaps slightly harder to check they work how they should. 12 | """ 13 | os.environ["NEPTUNE_API_TOKEN"] = "not_a_token" 14 | 15 | hydra.core.global_hydra.GlobalHydra.instance().clear() 16 | initialize(config_path="../configs", job_name="test_app") 17 | config = compose(config_name="config") 18 | 19 | extras(config) 20 | 21 | print_config(config) 22 | -------------------------------------------------------------------------------- /weights/conv3d/readme.md: -------------------------------------------------------------------------------- 1 | Weights stored here are from 2 | 3 | https://app.neptune.ai/o/OpenClimateFix/org/predict-pv-yield/e/PRED-189/charts 4 | 5 | conv3d_channels 32 6 | fc1_output_features 128 7 | fc2_output_features 128 8 | fc3_output_features 64 9 | number_of_conv3d_layers 6 10 | --------------------------------------------------------------------------------