├── BatchRL ├── ml │ ├── __init__.py │ ├── time_series.py │ ├── sklearn_util.py │ ├── keras_util.py │ └── keras_layers.py ├── agents │ ├── __init__.py │ ├── agents_heuristic.py │ ├── base_agent.py │ └── keras_agents.py ├── dynamics │ ├── __init__.py │ ├── const.py │ ├── day_periodic.py │ ├── classical.py │ ├── sin_cos_time.py │ ├── composite.py │ ├── base_hyperopt.py │ └── battery_model.py ├── envs │ └── __init__.py ├── rest │ ├── __init__.py │ ├── pw_cl.py │ └── pw_gui.py ├── tests │ ├── __init__.py │ ├── data │ │ ├── test_pw.txt │ │ ├── test_login.txt │ │ ├── test_upload_file.txt │ │ └── TestUploadFolder │ │ │ ├── test_upload_file.txt │ │ │ └── SubFolder │ │ │ └── test_upload_file.txt │ ├── test_keras.py │ └── test_opcua.py ├── util │ ├── __init__.py │ ├── share_data.py │ └── notify.py ├── data_processing │ ├── __init__.py │ └── preprocess.py ├── run_tests.ps1 ├── opcua_empa │ ├── __init__.py │ ├── run_opcua.py │ ├── room_control_client.py │ └── controller.py ├── requirements.txt ├── eval_all_rl.ps1 ├── commands.ps1 └── README.md ├── Models └── README.md ├── Plots └── README.md ├── batchRLOverv.pdf ├── load_euler ├── make_doc.ps1 ├── Data └── README.md ├── DocFiles ├── install.rst └── cookbook.rst ├── doc_helper.py ├── ps_test.ps1 ├── automate.ps1 ├── README.md ├── conf.py ├── setup.py └── batchRLOverv.drawio /BatchRL/ml/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/agents/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/dynamics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/envs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/rest/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/data_processing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BatchRL/tests/data/test_pw.txt: -------------------------------------------------------------------------------- 1 | test_pw -------------------------------------------------------------------------------- /BatchRL/run_tests.ps1: -------------------------------------------------------------------------------- 1 | python -m unittest discover -v . -------------------------------------------------------------------------------- /BatchRL/tests/data/test_login.txt: -------------------------------------------------------------------------------- 1 | test_user 2 | test_pw -------------------------------------------------------------------------------- /BatchRL/tests/data/test_upload_file.txt: -------------------------------------------------------------------------------- 1 | Blah Blah Blah 2 | 3 | Fuck Yeah -------------------------------------------------------------------------------- /Models/README.md: -------------------------------------------------------------------------------- 1 | # Models 2 | 3 | This folder contains the trained keras models. -------------------------------------------------------------------------------- /BatchRL/tests/data/TestUploadFolder/test_upload_file.txt: -------------------------------------------------------------------------------- 1 | Blah Blah Blah 2 | 3 | Fuck Yeah -------------------------------------------------------------------------------- /Plots/README.md: -------------------------------------------------------------------------------- 1 | # The Plot folder 2 | 3 | It contains the plots generated during the project. -------------------------------------------------------------------------------- /BatchRL/tests/data/TestUploadFolder/SubFolder/test_upload_file.txt: -------------------------------------------------------------------------------- 1 | Blah Blah Blah 2 | 3 | Fuck Yeah -------------------------------------------------------------------------------- /batchRLOverv.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BratislavS/MIMO_DRL_Building_control/HEAD/batchRLOverv.pdf -------------------------------------------------------------------------------- /BatchRL/opcua_empa/__init__.py: -------------------------------------------------------------------------------- 1 | """Opcua Empa Package. 2 | 3 | .. moduleauthor:: Christian Baumann 4 | """ 5 | -------------------------------------------------------------------------------- /load_euler: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | module load python 4 | module load hdf5 5 | source venv/bin/activate 6 | 7 | echo "Loaded libraries and activated python environment." 8 | 9 | -------------------------------------------------------------------------------- /make_doc.ps1: -------------------------------------------------------------------------------- 1 | cd .. 2 | rm Docs 3 | mkdir Docs 4 | cd Docs 5 | sphinx-apidoc -F -H 'BatchRL' -A 'Chris' -o . '../MasterThesis/BatchRL/' 6 | cp ../MasterThesis/conf.py . 7 | python ../MasterThesis/doc_helper.py 8 | ./make html SPHINXBUILD='python $(shell which sphinx-build)' 9 | cd ../MasterThesis -------------------------------------------------------------------------------- /Data/README.md: -------------------------------------------------------------------------------- 1 | # The Data Folder 2 | 3 | Here the data used for the ML training is stored. 4 | 5 | The command in the file `PowershellSCP` can be used 6 | to transfer the necessary files for the neural network training 7 | to the Euler cluster. 8 | 9 | The folder `Datasets` contains the processed data. -------------------------------------------------------------------------------- /BatchRL/rest/pw_cl.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | from getpass import getpass 3 | 4 | 5 | def get_pw() -> Tuple[str, str]: 6 | """Commandline login getter tool 7 | 8 | Returns: 9 | Tuple with username and password strings. 10 | """ 11 | username = input("Please enter username: ") 12 | pw = getpass("Enter password: ") 13 | return username, pw 14 | -------------------------------------------------------------------------------- /BatchRL/requirements.txt: -------------------------------------------------------------------------------- 1 | mock 2 | scikit-learn 3 | requests 4 | opcua 5 | tensorflow==1.14 6 | matplotlib 7 | pandas 8 | scipy 9 | numpy 10 | keras 11 | gym 12 | hyperopt 13 | statsmodels 14 | keras-rl 15 | cryptography 16 | pydot 17 | pydrive 18 | sphinx 19 | sphinx_rtd_theme 20 | sphinx_autodoc_typehints 21 | sphinx-argparse 22 | wxpython 23 | pypiwin32 24 | requests_negotiate_sspi -------------------------------------------------------------------------------- /BatchRL/ml/time_series.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from statsmodels.tsa.arima_model import ARIMA 4 | 5 | 6 | class AR_Model(object): 7 | """ 8 | AR Time Series Model. 9 | """ 10 | 11 | def __init__(self, lag=1): 12 | self.lag = lag 13 | 14 | self.model = None 15 | self.model_fit = None 16 | 17 | def fit(self, data): 18 | self.model = ARIMA(data, order=(self.lag, 0, 0)) 19 | self.model_fit = self.model.fit(trend='nc', disp=False) 20 | 21 | def predict(self, data): 22 | ar_coeff = self.model_fit.arparams 23 | sig = np.sqrt(self.model_fit.sigma2) 24 | y_hat = 0.0 25 | for i in range(1, len(ar_coeff) + 1): 26 | y_hat += ar_coeff[i - 1] * data[-i] 27 | return y_hat + np.random.normal(0, sig) 28 | -------------------------------------------------------------------------------- /DocFiles/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | To install everything needed, run: 6 | 7 | .. code-block:: bash 8 | 9 | python setup.py 10 | 11 | This will setup the virtual environment 12 | and install all required python libraries. It will also ask 13 | for credentials for different accounts. This includes the access 14 | to the NEST database and the OPCUA client. Also an email account 15 | for sending notifications for experiment termination is desired. 16 | These are not necessarily needed, depending on the code that is run. 17 | But it will result in a runtime error if any of these accounts is needed. 18 | 19 | Afterwards open Powershell in the folder 20 | `MasterThesis` and then run: 21 | 22 | .. code-block:: bash 23 | 24 | ./automate.ps1 -act 25 | 26 | to activate the virtual environment. If you use 27 | :option:`-docs` instead, the documentation will be 28 | built. Using :option:`-test` will run the unit tests 29 | of this project. 30 | -------------------------------------------------------------------------------- /BatchRL/eval_all_rl.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .SYNOPSIS 3 | Script to run evaluation of a few models. 4 | 5 | .DESCRIPTION 6 | Runs the python script BatchRL.py with the -r option 7 | to run the room model using the trained 8 | RL agent. Does this using different parameters, i.e. with and without 9 | the battery, (not) using the physical model. 10 | 11 | .NOTES 12 | Author: Christian Baumann 13 | Last Edit: 2019-01-14 14 | Version 1.0 - initial release 15 | #> 16 | 17 | # Array definition 18 | $true_false = @("t","f") 19 | $pen_facs = @("2.5", "50.0") 20 | 21 | # Performance evaluations 22 | foreach ($add_bat in $true_false){ 23 | foreach ($p in $pen_facs){ 24 | foreach ($phys in $true_false){ 25 | # Exclude case of battery model where not the 26 | # physical model was used. 27 | if (($add_bat -eq "f") -or ($phys -eq "t")){ 28 | python .\BatchRL.py -r -v -fl $p -bo $add_bat t $phys 29 | } 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /doc_helper.py: -------------------------------------------------------------------------------- 1 | """This is run before the documentation is built.""" 2 | 3 | import os 4 | from shutil import copyfile 5 | 6 | 7 | DOC_DIR = "../MasterThesis/DocFiles" 8 | 9 | 10 | def main(): 11 | """Copies the files in `DOC_DIR` to the documentation folder. 12 | 13 | This function might be a bit fragile, but it does its job. 14 | """ 15 | print("Setting up documentation...") 16 | add_str = [] 17 | for f in os.listdir(DOC_DIR): 18 | full_path = os.path.join(DOC_DIR, f) 19 | a, _ = f.split(".") 20 | add_str += [f" {a}\n"] 21 | dest_path = f"../Docs/{f}" 22 | copyfile(full_path, dest_path) 23 | 24 | s_ind = ":caption: Contents:" 25 | with open("../Docs/index.rst", "r") as f: 26 | data = f.read() 27 | d1, d2 = data.split(s_ind) 28 | 29 | with open("../Docs/index.rst", "w") as f: 30 | f.write(d1 + s_ind + "\n\n" + "".join(add_str) + " " + d2.lstrip()) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /BatchRL/ml/sklearn_util.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from typing import TYPE_CHECKING 4 | 5 | from util.util import param_dict_to_name 6 | 7 | if TYPE_CHECKING: 8 | from dynamics.classical import SKLearnModel 9 | 10 | 11 | def get_skl_model_name(skl_model) -> str: 12 | """Defines a name for the skl model for saving.""" 13 | params = skl_model.get_params() 14 | ext = param_dict_to_name(params) 15 | name = skl_model.__class__.__name__ + ext 16 | return name 17 | 18 | 19 | class SKLoader: 20 | """Wrapper class for sklearn models to be used as a Keras model 21 | in terms of saving and loading parameters. 22 | 23 | Enables the use of `train_decorator` with the fit() method. 24 | """ 25 | 26 | def __init__(self, skl_mod, parent: 'SKLearnModel'): 27 | self.skl_mod = skl_mod 28 | self.p = parent 29 | 30 | def load_weights(self, full_path: str): 31 | with open(full_path, "rb") as f: 32 | mod = pickle.load(f) 33 | self.skl_mod = mod 34 | self.p.skl_mod = mod 35 | self.p.is_fitted = True 36 | 37 | def save(self, path: str) -> None: 38 | with open(path, "wb") as f: 39 | pickle.dump(self.skl_mod, f) 40 | -------------------------------------------------------------------------------- /ps_test.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .SYNOPSIS 3 | Script with some example commands. 4 | 5 | .DESCRIPTION 6 | Gives an overview of some PS commands that are 7 | used frequently. Look at the source code, the actual output 8 | is pretty meaningless on its own. 9 | 10 | .PARAMETER temp 11 | A dummy parameter. 12 | 13 | .EXAMPLE 14 | It can e.g. be run as: 15 | PS C:\> ./ps_test -temp 5 16 | 17 | .NOTES 18 | Author: Christian Baumann 19 | Last Edit: 2019-01-14 20 | Version 1.0 - initial release 21 | #> 22 | 23 | # Parameter definition 24 | param( 25 | [int]$temp, 26 | [switch]$b = $false, 27 | [string]$s = "hoi") 28 | 29 | Write-Host "Got parameter: $temp, b: $b" 30 | 31 | # Variable definition 32 | $v = 15 33 | Write-Host "This is a variable: $v" 34 | 35 | # Array definition 36 | $array = @("val_1","val_2","val_3") 37 | 38 | # For loop over array 39 | for ($i=0; $i -lt $array.length; $i++){ 40 | $ai = $array[$i] 41 | Write-Host "Array element at position $i is: $ai" 42 | # Alternatively: echo $array[$i] 43 | } 44 | 45 | # Or with for each 46 | foreach ($i in $array){ 47 | Write-Host $i 48 | 49 | # If clause 50 | if ($i -eq "val_1") { 51 | Write-Host hello 52 | } 53 | } 54 | 55 | # Functions 56 | 57 | # Definition 58 | Function Hello ($name) 59 | { 60 | Write-Host "hoi $name" 61 | } 62 | 63 | # Call the function 64 | Hello ("Hans") 65 | 66 | # Activate python environment 67 | Invoke-Expression "$($PSScriptRoot)/venv/Scripts/Activate.ps1" 68 | -------------------------------------------------------------------------------- /BatchRL/commands.ps1: -------------------------------------------------------------------------------- 1 | # (Train and) Evaluate trained agents 2 | # Trained on Euler 3 | python .\BatchRL.py -v -r --train_data train_val --fl 50.0 21.0 25.0 -in 2000000 -bo f t --room_nr 43 4 | 5 | # Trained on Remote 6 | python .\BatchRL.py -v -r --train_data all --fl 50.0 --rl_sampling all --hop_eval_data test -in 500000 -bo f t --room_nr 43 7 | python .\BatchRL.py -v -r --train_data all --fl 50.0 --rl_sampling all --hop_eval_data test -in 500000 -bo f t --room_nr 41 8 | 9 | # Run Opcua controller 10 | # Test controller 11 | python BatchRL.py -v -u -bo f t f t f --data_end_date 2020-01-21 --hop_eval_data test --train_data all -fl 50.0 21.0 26.0 --room_nr 41 --rl_sampling all -in 100 12 | 13 | # Euler Jobs 14 | # Jobs submitted on Euler (31.01.20) 15 | bsub -n 4 -W 24:00 python BatchRL.py -r -v -bo f f f f --data_end_date 2020-01-21 --hop_eval_data test --train_data all -fl 50.0 21.0 26.0 --room_nr 43 --rl_sampling all 16 | bsub -n 4 -W 24:00 python BatchRL.py -r -v -bo f f f f --data_end_date 2020-01-21 --hop_eval_data test --train_data all -fl 50.0 21.0 26.0 --room_nr 41 --rl_sampling all --sam_heat 17 | bsub -n 4 -W 24:00 python BatchRL.py -r -v -bo f f f f --data_end_date 2020-01-21 --hop_eval_data test --train_data all -fl 50.0 21.0 26.0 --room_nr 43 --rl_sampling all --sam_heat 18 | bsub -n 4 -W 24:00 python BatchRL.py -r -v -bo f f f f --data_end_date 2020-01-21 --hop_eval_data test --train_data all -fl 50.0 21.0 26.0 --room_nr 41 --rl_sampling all 19 | 20 | # Other jobs 21 | # Cleanup 22 | python .\BatchRL.py -v -c 23 | 24 | -------------------------------------------------------------------------------- /BatchRL/README.md: -------------------------------------------------------------------------------- 1 | # All the python code 2 | 3 | The file `BatchRL.py` is the main function, run this to run 4 | everything. 5 | 6 | ## Running tests 7 | 8 | The script `run_tests.ps1` let's you run some unit tests. 9 | If you are not using Powershell, just type: 10 | ```console 11 | $ python -m unittest discover . 12 | ``` 13 | and it should run. It does not contain all tests, there is 14 | another function in the main script `BatchRL.py` that runs some 15 | tests that need some more time to run. 16 | 17 | ## Running on Euler 18 | 19 | ### Connecting to Euler using Putty 20 | 21 | First set uf a VPN connection if not in ETH network. 22 | Then run putty, select `euler.ethz.ch` as Host Name and connect, 23 | it will ask for credentials and then you should be logged in. 24 | 25 | ### Connecting to Euler from Linux terminal 26 | 27 | Use: 28 | ```console 29 | $ ssh username@euler.ethz.ch 30 | ``` 31 | 32 | ### Setting things up 33 | 34 | You can use git to clone this repository, then you 35 | do not have to copy the code manually. 36 | You might need to install some additional 37 | python libraries, do this using the flag `--user`. 38 | 39 | ### Running on Euler 40 | 41 | To load the necessary libraries, run: 42 | ```console 43 | $ module load python 44 | $ module load hdf5 45 | ``` 46 | or, alternatively, execute the script that does that: 47 | ```console 48 | $ source ../load_euler 49 | ``` 50 | Also remember to copy the data if it has changed 51 | since the last time. The command for doing this 52 | is in [Data](../Data). You also might need to make the 53 | script files executable, for that purpose, use: `chmod +x script.ext`. 54 | -------------------------------------------------------------------------------- /automate.ps1: -------------------------------------------------------------------------------- 1 | <# 2 | .SYNOPSIS 3 | Script that runs various commands commands. 4 | 5 | .DESCRIPTION 6 | What is run depends on the flags that are passed. Pass '-act' 7 | to activate the python environment, '-docs' to build 8 | the documentations or one of '-cp_plots', '-cp_hop', '-cp_data' 9 | to copy plot / hyperopf / data from / to Euler via scp. (Copying 10 | data needs a VPN connection to ETH.) 11 | 12 | .PARAMETER cp_plots 13 | Set if you want to copy plots from Euler. 14 | You will need a vpn connection! 15 | .PARAMETER act 16 | Set if you want to activate the python environment. 17 | .PARAMETER docs 18 | Set if you want to build the documentation. 19 | .PARAMETER test 20 | Run the unit tests. Activates the python env if it has not 21 | yet been done. 22 | 23 | .EXAMPLE 24 | If you want to do everything, run: 25 | PS C:\> ./automate -cp_plots -act -docs 26 | 27 | .NOTES 28 | Author: Christian Baumann 29 | Last Edit: 2019-01-15 30 | Version 1.0 - initial release 31 | #> 32 | 33 | # Parameter definition 34 | param( 35 | [switch]$cp_plots = $false, 36 | [switch]$cp_data = $false, 37 | [switch]$cp_hop = $false, 38 | [switch]$cp_rl = $false, 39 | [switch]$act = $false, 40 | [switch]$test = $false, 41 | [switch]$docs = $false) 42 | 43 | # Copy plots from Euler 44 | if ($cp_plots){ 45 | scp -rp chbauman@euler.ethz.ch:MT/MasterThesis/Plots/ ./EulerPlots/ 46 | } 47 | # Copy data to Euler 48 | if ($cp_data){ 49 | Invoke-Expression "scp -rp $($PSScriptRoot)/Data/Datasets/ chbauman@euler.ethz.ch:MT/MasterThesis/Data/" 50 | } 51 | # Copy hyperoptimization data from Euler 52 | if ($cp_hop){ 53 | Invoke-Expression "scp -rp chbauman@euler.ethz.ch:MT/MasterThesis/Models/Hop/ $($PSScriptRoot)/Models/" 54 | } 55 | # Copy RL agents from Euler 56 | if ($cp_rl){ 57 | Invoke-Expression "scp -rp chbauman@euler.ethz.ch:MT/MasterThesis/Models/RL/ $($PSScriptRoot)/Models" 58 | } 59 | 60 | # Run tests 61 | if ($test){ 62 | Invoke-Expression "$($PSScriptRoot)/venv/Scripts/Activate.ps1" 63 | Invoke-Expression "cd $($PSScriptRoot)/BatchRL; ./run_tests.ps1" 64 | } 65 | 66 | # Activate python env 67 | if ($act){ 68 | Invoke-Expression "$($PSScriptRoot)/venv/Scripts/Activate.ps1" 69 | } 70 | 71 | # Build the docs 72 | if ($docs){ 73 | Invoke-Expression "$($PSScriptRoot)/venv/Scripts/Activate.ps1" 74 | Invoke-Expression "cd $PSScriptRoot; ./make_doc.ps1" 75 | } -------------------------------------------------------------------------------- /BatchRL/dynamics/const.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | import numpy as np 4 | 5 | from dynamics.base_model import BaseDynamicsModel 6 | from data_processing.dataset import Dataset 7 | 8 | 9 | class NoDisturbanceModel(BaseDynamicsModel, ABC): 10 | """Interface for models without a disturbance.""" 11 | 12 | def model_disturbance(self, data_str: str = 'train'): 13 | """No need to model, no disturbance used.""" 14 | self.modeled_disturbance = True 15 | 16 | def disturb(self) -> np.ndarray: 17 | """No disturbance, model is exact.""" 18 | return np.zeros((self.n_pred,), dtype=np.float32) 19 | 20 | 21 | class ConstModel(BaseDynamicsModel): 22 | """The naive model that predicts the last input seen. 23 | 24 | If the input series to the model are not specified, the 25 | same as the output series are taken. 26 | If there are more input than output series, the output 27 | consists of the last observation of the first few input series. 28 | """ 29 | 30 | name: str = "Naive" #: Base name of model. 31 | n_out: int #: Number of series that are predicted. 32 | 33 | def __init__(self, dataset: Dataset, pred_inds: np.ndarray = None, **kwargs): 34 | """Initializes the constant model. 35 | 36 | All series specified by prep_inds are predicted by the last seen value. 37 | 38 | Args: 39 | dataset: Dataset containing the data. 40 | pred_inds: Indices of series to predict, all if None. 41 | kwargs: Kwargs for base class, e.g. `in_inds`. 42 | """ 43 | # Set in_inds to pred_inds if not specified. 44 | in_inds = kwargs.get('in_inds') 45 | if in_inds is None: 46 | kwargs['in_inds'] = pred_inds 47 | elif pred_inds is not None: 48 | if len(in_inds) < len(pred_inds): 49 | raise ValueError("Need at least as many input series as output series!") 50 | 51 | # Init base class 52 | super().__init__(dataset, self.name, pred_inds, **kwargs) 53 | 54 | # Save data 55 | self.n_out = len(self.out_inds) 56 | self.nc = dataset.n_c 57 | 58 | def fit(self, verbose: int = 0, train_data: str = "train") -> None: 59 | """No need to fit anything.""" 60 | self.fit_data = train_data 61 | if verbose > 0: 62 | print(f"Model const, no fitting on part: '{train_data}' needed!") 63 | 64 | def predict(self, in_data: np.ndarray) -> np.ndarray: 65 | """Make predictions by just returning the last input. 66 | 67 | TODO: Fix this! 68 | 69 | Args: 70 | in_data: Prepared data. 71 | 72 | Returns: 73 | Same as input 74 | """ 75 | # return np.copy(in_data[:, -1, self.p_out_inds]) 76 | return np.copy(in_data[:, -1, :self.n_out]) 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This is the repository for the work presented in Applied Energy publication Svetozarevic, B., Baumann, C., Muntwiler, S., Di Natale, L., Zeilinger, M.N. and Heer, P., 2021. Data-driven control of room temperature and bidirectional EV charging using deep reinforcement learning: Simulations and experiments. Applied Energy, p.118127. https://www.sciencedirect.com/science/article/pii/S0306261921014045. The code here was mainly written during the master thesis of C. Baumann at Empa Duebendorf in Switzerland. 2 | 3 | ## Installation 4 | 5 | To install all necessary stuff, run: 6 | 7 | ```console 8 | $ python setup.py 9 | ``` 10 | 11 | This will setup a virtual python env and guide you through 12 | other steps needed. Afterwards, to build the docs, run: 13 | 14 | ```console 15 | $ ./automate.ps1 -docs 16 | ``` 17 | 18 | They will be built in the parent folder 19 | next to the current folder. Refer to the documentation 20 | for further instructions on how to use this repository. 21 | Tested on Windows only. If you are running MacOS or Linux, 22 | you might have to adjust the commands used in `automate.ps1`. 23 | Also, `setup.py` executes some system calls that might be different 24 | for other operating systems. 25 | 26 | ## Documentation 27 | 28 | The code was written in such a way that it 29 | allows for generating a documentation automatically with Sphinx. 30 | To generate it first install Sphinx and then use: 31 | 32 | ```console 33 | $ cd .. 34 | $ mkdir Docs 35 | $ cd Docs 36 | $ sphinx-apidoc -F -H 'BatchRL' -A 'Chris' -o . '../MasterThesis/BatchRL/' 37 | $ cp ../MasterThesis/conf.py . 38 | $ ./make html 39 | ``` 40 | 41 | Alternatively, you can directly use the Windows Powershell script [make_doc.ps1](make_doc.ps1) 42 | which basically runs these commands. The script also removes previously existing 43 | folders 'Docs', so it can be used to update the documentation. 44 | 45 | ## Repo structure 46 | 47 | This repository contains the following sub-directories. 48 | 49 | ### [Overleaf @ ...](https://github.com/chbauman/Master-ThesisOverLeaf) 50 | 51 | This folder contains the overleaf repository 52 | with all the latex code. 53 | 54 | ### [BatchRL](BatchRL) 55 | 56 | This folder contains the python code 57 | for batch reinforcement learning. The code was tested using Python version 3.6, 58 | it is not compatible with version 3.5 or below. It was tested using PyCharm and 59 | Visual Studio, part of the code was also run on Euler. 60 | 61 | ### [Data](Data) 62 | 63 | Here the data is put when the code is 64 | executed. Should be empty on the git repo 65 | except for the [README.md](Data/README.md). 66 | 67 | ### [Models](Models) 68 | 69 | This folder will be used to store the 70 | trained neural network models. 71 | 72 | ### [Plots](Plots) 73 | 74 | In this folder, the plots will be saved 75 | that will be generated. 76 | 77 | ### [DocFiles](DocFiles) 78 | 79 | This folder contains some documentation 80 | files that will be used when creating the 81 | documentation with Sphinx. 82 | -------------------------------------------------------------------------------- /BatchRL/dynamics/day_periodic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from dynamics.base_model import BaseDynamicsModel 4 | from util.numerics import make_periodic 5 | from data_processing.dataset import Dataset 6 | 7 | 8 | class Periodic1DayModel(BaseDynamicsModel): 9 | """ 10 | The 1-day periodic model that predicts the values as the ones 11 | from the last day. 12 | """ 13 | 14 | # Member variables 15 | hist: np.ndarray 16 | pred_t: int = 0 17 | tot_t: int = 0 18 | 19 | def __init__(self, d: Dataset, exo_inds: np.ndarray = None, alpha: float = 0.1): 20 | """ 21 | Initializes model. 22 | 23 | :param d: Dataset to use. 24 | :param exo_inds: The indices of the series that will be predicted. 25 | :param alpha: The decay parameter alpha. 26 | """ 27 | name = "1DayPeriodic_Alpha" + str(alpha) 28 | super(Periodic1DayModel, self).__init__(d, name, exo_inds, exo_inds) 29 | 30 | # Save parameters 31 | self.data = d 32 | self.n: int = 60 * 24 // d.dt 33 | self.alpha: float = alpha 34 | 35 | def init_1day(self, day_data: np.ndarray) -> None: 36 | """ 37 | Sets the history and resets time. Makes the history periodic 38 | for predictions longer than one day. 39 | 40 | :param day_data: New history data to use. 41 | """ 42 | self.pred_t = 0 43 | self.tot_t = 0 44 | dat_copy = np.copy(day_data) 45 | n_feat = dat_copy.shape[-1] 46 | n_hists = dat_copy.shape[0] 47 | for k in range(n_feat): 48 | for i in range(n_hists): 49 | dat_copy[i, :, k] = make_periodic(dat_copy[i, :, k]) 50 | self.hist = dat_copy 51 | 52 | def fit(self, verbose: int = 0, train_data: str = "train") -> None: 53 | """No need to fit anything. 54 | """ 55 | pass 56 | 57 | def curr_alpha(self, t: int) -> float: 58 | """ 59 | The decay function: 60 | :math:`\\alpha_t = e^{-\\alpha t} \\in [0, 1]` 61 | 62 | :param t: Time variable. 63 | :return: Current weight of the input. 64 | """ 65 | return np.exp(-self.alpha * t) 66 | 67 | def predict(self, in_data: np.ndarray) -> np.ndarray: 68 | """ 69 | Make predictions converging to the data in self.hist 70 | when predicting for multiple times in a row. 71 | Starts with predicting the last input. 72 | 73 | :param in_data: Prepared data. 74 | :return: New prediction. 75 | """ 76 | 77 | # Update time 78 | self.pred_t = (self.pred_t + 1) % self.n 79 | self.tot_t += 1 80 | 81 | # Make prediction 82 | in_data = in_data[:, :, self.p_in_indices] 83 | curr_in = in_data[:, -1, :] 84 | curr_h = self.hist[:, self.pred_t, self.p_in_indices] 85 | curr_a = self.curr_alpha(self.tot_t) 86 | curr_out = curr_a * curr_in + (1.0 - curr_a) * curr_h 87 | return curr_out 88 | 89 | def disturb(self): 90 | """ 91 | Returns a sample of noise of length n. 92 | """ 93 | raise NotImplementedError("Disturbance for naive model not implemented!") 94 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | import sys 15 | 16 | sys.path.insert(0, '..\\MasterThesis\\BatchRL') 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'BatchRL' 21 | copyright = '2020, Christian Baumann' 22 | author = 'Christian Baumann' 23 | version = '1.3.1' 24 | 25 | show_authors = True 26 | 27 | # -- General configuration --------------------------------------------------- 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = [ 33 | 'sphinx.ext.autodoc', 34 | 'sphinx.ext.napoleon', 35 | 'sphinx.ext.viewcode', 36 | 'sphinx.ext.todo', 37 | 'sphinx_autodoc_typehints', 38 | 'sphinx_rtd_theme', 39 | 'sphinxarg.ext', 40 | ] 41 | 42 | # Type checking options 43 | # set_type_checking_flag = True 44 | always_document_param_types = True 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['_templates'] 48 | 49 | # The language for content autogenerated by Sphinx. Refer to documentation 50 | # for a list of supported languages. 51 | # 52 | # This is also used if you do content translation via gettext catalogs. 53 | # Usually you set "language" from the command line for these cases. 54 | language = 'en' 55 | 56 | # List of patterns, relative to source directory, that match files and 57 | # directories to ignore when looking for source files. 58 | # This pattern also affects html_static_path and html_extra_path. 59 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 60 | 61 | # -- Options for HTML output ------------------------------------------------- 62 | 63 | # The theme to use for HTML and HTML Help pages. See the documentation for 64 | # a list of builtin themes. 65 | # 66 | html_theme = 'sphinx_rtd_theme' 67 | 68 | # Add any paths that contain custom static files (such as style sheets) here, 69 | # relative to this directory. They are copied after the builtin static files, 70 | # so a file named "default.css" will overwrite the builtin "default.css". 71 | html_static_path = ['_static'] 72 | 73 | # -- Options for todo extension ---------------------------------------------- 74 | 75 | # If true, `todo` and `todoList` produce output, else they produce nothing. 76 | todo_include_todos = True 77 | 78 | 79 | # Not skipping __init__ 80 | def skip(app, what, name, obj, would_skip, options): 81 | if name in ["__init__", "__len__", "__str__", "__add__", "__getitem__"]: 82 | return False 83 | return would_skip 84 | 85 | 86 | def setup(app): 87 | app.connect("autodoc-skip-member", skip) 88 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup module. 2 | 3 | Sets up everything that is necessary for this project to 4 | be run. Asks for various login info that will be needed. 5 | If used, one can also specify dummy values as login info 6 | and change it later (or never). 7 | 8 | Tested on Windows only! 9 | """ 10 | 11 | import os 12 | 13 | NEST_LOGIN_FILE = "rest_login.txt" 14 | OPCUA_LOGIN_FILE = "opcua_login.txt" 15 | EMAIL_LOGIN_FILE = "email_receiver_login.txt" 16 | DEBUG_EMAIL_LOGIN_FILE = "email_receiver_debug_login.txt" 17 | EMAIL_SEND_LOGIN_FILE = "notify_email_login.txt" 18 | VENV_DIR = "venv" 19 | 20 | 21 | def str2bool(v) -> bool: 22 | """Converts a string to a boolean. 23 | 24 | Raises: 25 | ValueError: If it cannot be converted. 26 | """ 27 | if isinstance(v, bool): 28 | return v 29 | if v.lower() in ('yes', 'true', 't', 'y', '1', '1.0'): 30 | return True 31 | elif v.lower() in ('no', 'false', 'f', 'n', '0', '0.0'): 32 | return False 33 | else: 34 | raise ValueError(f"Boolean value expected, got {v}") 35 | 36 | 37 | def get_login_and_write_to_file(file_name: str, name: str) -> None: 38 | """Asks the user for the login data. 39 | 40 | If the file already exists, asks if the user wants 41 | to overwrite it. 42 | 43 | Args: 44 | file_name: The file containing the login info. 45 | name: The name of whatever the login is for. 46 | """ 47 | # Check if login already exists 48 | f_exists = os.path.isfile(file_name) 49 | create_file = not f_exists 50 | if f_exists: 51 | parse_error = True 52 | while parse_error: 53 | ans = input(f"Overwrite {name} login info? ") 54 | try: 55 | create_file = str2bool(ans) 56 | parse_error = False 57 | except ValueError: 58 | print("Your input was not understood!") 59 | 60 | # Ask for login and save to file 61 | if create_file: 62 | nest_user = input(f"Provide your {name} username: ") 63 | nest_pw = input("And password: ") 64 | 65 | with open(file_name, "w") as f: 66 | f.write(nest_user + "\n") 67 | f.write(nest_pw + "\n") 68 | 69 | 70 | def main(): 71 | print("Setting up everything...") 72 | 73 | # Check platform 74 | using_win = os.name == 'nt' 75 | if not using_win: 76 | print("May not work, only tested on Windows!") 77 | 78 | if not os.path.isdir(VENV_DIR): 79 | print("Setting up virtual environment...") 80 | cmd = "py" if using_win else "python3" 81 | act_path = os.path.join(VENV_DIR, "Scripts", "activate") 82 | req_path = os.path.join("BatchRL", "requirements.txt") 83 | os.system(f"{cmd} -m venv {VENV_DIR}") 84 | os.system(f"{act_path} & {cmd} -m pip install -r {req_path}") 85 | print("Venv setup done :)") 86 | print("") 87 | 88 | # Get NEST login data and store in file 89 | get_login_and_write_to_file(NEST_LOGIN_FILE, "NEST database") 90 | 91 | # Get NEST login data and store in file 92 | get_login_and_write_to_file(OPCUA_LOGIN_FILE, "Opcua client") 93 | 94 | # Get notification email login data and store in file 95 | get_login_and_write_to_file(EMAIL_LOGIN_FILE, "Notification receiver email") 96 | 97 | # Get notification email login data and store in file 98 | get_login_and_write_to_file(EMAIL_SEND_LOGIN_FILE, "Notification sender email") 99 | 100 | # Get debug notification email login data and store in file 101 | get_login_and_write_to_file(DEBUG_EMAIL_LOGIN_FILE, "Debug notification receiver email") 102 | 103 | print("Setup done!") 104 | 105 | 106 | if __name__ == '__main__': 107 | main() 108 | -------------------------------------------------------------------------------- /BatchRL/rest/pw_gui.py: -------------------------------------------------------------------------------- 1 | """Login GUI to get login data from a user. 2 | 3 | The main function is `get_pw`, it uses the other classes 4 | to do all the stuff. 5 | """ 6 | 7 | from typing import Tuple 8 | 9 | import wx 10 | 11 | 12 | class LoginHolder: 13 | """Class that holds the login information. 14 | """ 15 | 16 | login_data: Tuple[str, str] = None #: The login information 17 | 18 | def __init__(self): 19 | pass 20 | 21 | 22 | class LoginDialog(wx.Dialog): 23 | """Class defining the login dialog GUI 24 | 25 | Adapted from: https://dzone.com/articles/wxpython-how-create-login 26 | """ 27 | 28 | def __init__(self, lh: LoginHolder): 29 | """ 30 | Constructs the login dialog to ask for 31 | username and password. 32 | 33 | Args: 34 | lh: LoginHolder, class where the login data will be saved. 35 | """ 36 | 37 | wx.Dialog.__init__(self, None, title="Login") 38 | self.lh = lh 39 | 40 | # User info 41 | user_sizer = wx.BoxSizer(wx.HORIZONTAL) 42 | user_lbl = wx.StaticText(self, label="Username:") 43 | user_sizer.Add(user_lbl, 0, wx.ALL | wx.CENTER, 5) 44 | self.user = wx.TextCtrl(self) 45 | user_sizer.Add(self.user, 0, wx.ALL, 5) 46 | 47 | # Password info 48 | p_sizer = wx.BoxSizer(wx.HORIZONTAL) 49 | p_lbl = wx.StaticText(self, label="Password:") 50 | p_sizer.Add(p_lbl, 0, wx.ALL | wx.CENTER, 5) 51 | self.password = wx.TextCtrl(self, style=wx.TE_PASSWORD | wx.TE_PROCESS_ENTER) 52 | p_sizer.Add(self.password, 0, wx.ALL, 5) 53 | 54 | # Add text fields 55 | main_sizer = wx.BoxSizer(wx.VERTICAL) 56 | main_sizer.Add(user_sizer, 0, wx.ALL, 5) 57 | main_sizer.Add(p_sizer, 0, wx.ALL, 5) 58 | 59 | # Login button 60 | btn = wx.Button(self, label="Login") 61 | btn.Bind(wx.EVT_BUTTON, self.on_login) 62 | main_sizer.Add(btn, 0, wx.ALL | wx.CENTER, 5) 63 | 64 | self.SetSizer(main_sizer) 65 | 66 | def on_login(self, event) -> None: 67 | """ 68 | Saves login data to login holder and destroys dialog. 69 | Executed when the login button is clicked. 70 | 71 | Args: 72 | event: Not used 73 | Returns: 74 | None 75 | """ 76 | 77 | # Save login data and destroy 78 | user_password = self.password.GetValue() 79 | username = self.user.GetValue() 80 | self.lh.login_data = (username, user_password) 81 | self.Destroy() 82 | 83 | 84 | class MainFrame(wx.Frame): 85 | """ 86 | Frame calling the login dialog only. 87 | """ 88 | 89 | def __init__(self, lh: LoginHolder): 90 | """ 91 | Constructor of single frame which calls 92 | the LoginDialog when initialized. 93 | 94 | :param lh: LoginHolder, class where the login data will be saved. 95 | """ 96 | self.lh = lh 97 | wx.Frame.__init__(self, None, title="Main App") 98 | wx.Panel(self) 99 | 100 | # Ask user to login 101 | dlg = LoginDialog(self.lh) 102 | dlg.ShowModal() 103 | self.Destroy() 104 | 105 | 106 | def get_pw() -> Tuple[str, str]: 107 | """The main function to get the login info. 108 | 109 | Opens GUI and retrieves the login information 110 | and returns it. 111 | 112 | Returns: 113 | Tuple containing username and password. 114 | """ 115 | lh = LoginHolder() 116 | app = wx.App(False) 117 | frame = MainFrame(lh) 118 | 119 | # Run GUI 120 | frame.Show(False) 121 | app.MainLoop() 122 | if frame: 123 | frame.Destroy() 124 | 125 | return lh.login_data 126 | -------------------------------------------------------------------------------- /BatchRL/dynamics/classical.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from data_processing.dataset import Dataset 4 | from dynamics.base_model import BaseDynamicsModel 5 | from ml.sklearn_util import SKLoader, get_skl_model_name 6 | from util.numerics import check_shape, trf_mean_and_std 7 | from util.util import train_decorator 8 | 9 | 10 | class SKLearnModel(BaseDynamicsModel): 11 | """The naive model that predicts the last input seen. 12 | 13 | """ 14 | 15 | name: str = "Linear" #: Base name of model. 16 | n_out: int #: Number of series that are predicted. 17 | 18 | def __init__(self, data: Dataset, skl_model, residual: bool = True, clip_ind: int = None, **kwargs): 19 | """Initializes the constant model. 20 | 21 | All series specified by prep_inds are predicted by the last seen value. 22 | 23 | Args: 24 | dataset: Dataset containing the data. 25 | kwargs: Kwargs for base class, e.g. `in_inds`. 26 | """ 27 | # Construct meaningful name 28 | name = get_skl_model_name(skl_model) 29 | if kwargs.get('name'): 30 | # I need the walrus! 31 | name = kwargs['name'] 32 | kwargs['name'] = name 33 | 34 | # Init base class 35 | super().__init__(data, **kwargs) 36 | 37 | # Save model 38 | self.m = SKLoader(skl_model, self) 39 | 40 | # Save data 41 | self.n_out = len(self.out_inds) 42 | self.nc = data.n_c 43 | self.residual_learning = residual 44 | 45 | # Fitting model 46 | self.is_fitted = False 47 | self.skl_mod = skl_model 48 | 49 | self.clip_ind = clip_ind 50 | self.clip_val = None 51 | if clip_ind is not None: 52 | assert clip_ind < len(self.out_inds) 53 | self.clip_val = trf_mean_and_std(0, mean_and_std=self.data.scaling[clip_ind], remove=True) 54 | 55 | @train_decorator() 56 | def fit(self, verbose: int = 0, train_data: str = "train") -> None: 57 | """Fit linear model.""" 58 | 59 | # Check if already fitted 60 | if self.is_fitted and train_data == self.fit_data: 61 | if verbose: 62 | print("Already fitted!") 63 | return 64 | self.fit_data = train_data 65 | 66 | # Prepare the data 67 | input_data, output_data = self.get_fit_data(train_data, 68 | residual_output=self.residual_learning) 69 | in_sh = input_data.shape 70 | first_sh, last_sh = in_sh[0], in_sh[-1] 71 | input_data_2d = input_data.reshape((first_sh, -1)) 72 | 73 | # Fit 74 | if verbose > 0: 75 | print("Fitting sklearn model...") 76 | self.skl_mod.fit(input_data_2d, output_data) 77 | self.is_fitted = True 78 | 79 | def predict(self, in_data: np.ndarray) -> np.ndarray: 80 | """Make predictions by applying the linear model. 81 | 82 | Args: 83 | in_data: Prepared data. 84 | 85 | Returns: 86 | The predictions. 87 | """ 88 | check_shape(in_data, (-1, -1, -1)) 89 | 90 | # Add previous state contribution 91 | prev_state = self._extract_output(in_data) 92 | 93 | # Flatten 94 | sh = in_data.shape 95 | in_data_res = in_data.reshape((sh[0], -1)) 96 | prev_state_res = prev_state.reshape((sh[0], -1)) 97 | 98 | # Predict 99 | p = prev_state_res + self.skl_mod.predict(in_data_res) 100 | if self.clip_ind is not None: 101 | p_to_be_clipped = np.copy(p[:, self.clip_ind]) 102 | p_to_be_clipped = np.clip(p_to_be_clipped, self.clip_val, a_max=None) 103 | p[:, self.clip_ind] = p_to_be_clipped 104 | return p 105 | -------------------------------------------------------------------------------- /BatchRL/dynamics/sin_cos_time.py: -------------------------------------------------------------------------------- 1 | """Defines the Sin-Cos-Time model. 2 | 3 | Since the time is usually decoded as sin(t) 4 | and cos(t), this model recalculates t and predicts 5 | the next sin and cos values based on that. 6 | """ 7 | import numpy as np 8 | 9 | from dynamics.base_model import BaseDynamicsModel 10 | from data_processing.dataset import Dataset 11 | from util.numerics import add_mean_and_std, rem_mean_and_std 12 | 13 | 14 | class SCTimeModel(BaseDynamicsModel): 15 | """The time prediction model. 16 | 17 | Predicts the time exactly, up to numerical round-off 18 | based on only the last time. 19 | """ 20 | 21 | def __init__(self, dataset: Dataset, time_ind: int = None): 22 | """Initialize the Sine-Cosine-Time model. 23 | 24 | It predicts the next values given only the previous 25 | values of the sine and the cosine of the time. 26 | 27 | Args: 28 | dataset: Dataset containing two time series, sin(time) and cos(time). 29 | time_ind: Specifying which column holds the sin(t) series. 30 | The cos(t) series is assumed to be in column `time_ind` + 1. 31 | """ 32 | # Compute indices and name 33 | name = dataset.name + "_Exact" 34 | if time_ind is None: 35 | time_ind = dataset.d - 2 36 | if time_ind > dataset.d - 2 or time_ind < 0: 37 | raise IndexError("Time index out of range.") 38 | inds = np.array([time_ind, time_ind + 1], dtype=np.int32) 39 | super(SCTimeModel, self).__init__(dataset, name, inds, inds) 40 | 41 | # Save parameters 42 | self.dx = 2 * np.pi / (24 * 60 / dataset.dt) 43 | 44 | # Scaling parameters 45 | s_ind, c_ind = self.out_inds 46 | if dataset.is_scaled[s_ind] != dataset.is_scaled[c_ind]: 47 | raise AttributeError("Be fucking consistent with the scaling!") 48 | self.is_scaled = dataset.is_scaled[s_ind] and dataset.is_scaled[c_ind] 49 | self.s_scale, self.c_scale = dataset.scaling[s_ind], dataset.scaling[c_ind] 50 | self.s_ind_prep, self.c_ind_prep = dataset.to_prepared(self.out_inds) 51 | 52 | def fit(self, verbose: int = 0, train_data: str = "train") -> None: 53 | """No need to fit anything, model is deterministic.""" 54 | self.fit_data = train_data 55 | if verbose > 0: 56 | print(f"Exact model, nothing to fit on part: '{train_data}'!") 57 | return 58 | 59 | def predict(self, in_data: np.ndarray) -> np.ndarray: 60 | """Compute the next sin(t) and cos(t) value given the values at the last timestep. 61 | 62 | Args: 63 | in_data: Prepared data. 64 | 65 | Returns: 66 | The next time: (sin(t + dt), cos(t + dt)). 67 | """ 68 | 69 | in_sh = in_data.shape 70 | 71 | # Get previous values 72 | s = np.copy(in_data[:, -1, 0]) 73 | c = np.copy(in_data[:, -1, 1]) 74 | 75 | # Scale back 76 | if self.is_scaled: 77 | s = add_mean_and_std(s, self.s_scale) 78 | c = add_mean_and_std(c, self.c_scale) 79 | 80 | # Compute new 81 | if np.max(c) > 1.00 or np.min(c) < -1.00: 82 | print(np.max(c)) 83 | print(np.min(c)) 84 | raise ValueError("Invalid value encountered!") 85 | x = np.arccos(c) 86 | x = np.where(s < 0, -x, x) + self.dx 87 | s_new = np.sin(x) 88 | c_new = np.cos(x) 89 | 90 | # Evaluate and scale 91 | if self.is_scaled: 92 | s_new = rem_mean_and_std(s_new, self.s_scale) 93 | c_new = rem_mean_and_std(c_new, self.c_scale) 94 | 95 | # Concatenate and return 96 | out_dat = np.empty((in_sh[0], 2), dtype=in_data.dtype) 97 | out_dat[:, 0] = s_new 98 | out_dat[:, 1] = c_new 99 | return out_dat 100 | 101 | def model_disturbance(self, data_str: str = 'train'): 102 | """No need to model, no disturbance used.""" 103 | self.modeled_disturbance = True 104 | 105 | def disturb(self) -> np.ndarray: 106 | """No disturbance, model is exact.""" 107 | return np.zeros((self.n_pred,), dtype=np.float32) 108 | -------------------------------------------------------------------------------- /batchRLOverv.drawio: -------------------------------------------------------------------------------- 1 | 7V1bc9q6Fv41zOzzQMc3bPMYcml7mlASzt6nPS97BFbArbGoMUnorz/yRWBLwjYg2c4eZzpTLGzZLH36tG5a7unXq7ePAVgvH5ADvZ6mOG89/aanaaqhab3on+LskhbbNpKGReA66UmHhqn7G6aNStq6dR24yZ0YIuSF7jrfOEe+D+dhrg0EAXrNn/aMvPxd12ABmYbpHHhs639dJ1ymraqiHL74BN3FMr21PUi/mIH5z0WAtn56v56mP8d/ydcrQPpKz98sgYNeM036bU+/DhAKk0+rt2voRbIlYkuuuzvy7f65A+iHVS54/N+f64fhLRz/+ve378HHH+PHX6iv6kk3L8DbQvI74qcNd0RC0HeuIkHjo7kHNht33tNHy3Dl4QYVf4RvbvgNf1Y+DNKj79FR+vnmLXuwIwd+GOwyF0WH30l/0cHhsviIXLcJA/RzP1BYhqNn5IcpqobkMH12jojSpg3aBvP052kzA9qqNpwpmmPY1nN/mKIQBAsYFskuFRV0cghLpf8RohXEj45PeD3gioBnmUEUaQugB0L3JY9LkMJ7se9uf4cJcvHP0pR0KuI/LMv4onQqmgTDpJPkZ6fXZaFCd2VaVFeGQXWVSIfpCn/I/PRDU4xFPi7/DH5/+2vxd+gM1n3w8EUxnr1v/YEAXCZYuUYeCuILdCX+46LogGHtLBBbJ6H4YsxyZaaxuOXLtl24HSo5pNnmmaBVDaojtV7I2oN6IZsBEWbxDISz+E2BWQ5hNYtfMgsKIHw+TIl+kMUp2C6+oM+f/9bMX2/z3afZemj+7putguleIbgYp4NmcSp0yc9h7TyoWWVQO3HN10fzbfACnfRALFD5ElU6pGY7wtABu8xp6+iEzfEHNs38fXQ7p8niD0mPYunaaiNdt4+tzapaRUfXctQKrY041SRx/QVAJc6DMqDqHU5l4NRS6sXpZbhsh8VWWcMw7VaB9p9isqn25Zit13PQmC7McYrxRWq0CqnGgHaKWec6xerShnUrfx9Dr0EbHoqcCKal5khZwaCoOBs0ubMhuu0EBi4WFwzSTuqeIVa7JohNu3rbP0F0yjc9EDtBmICAWj45Nq/uygM+zE+J+dL1nHuwQ9to0DYhmP8kR6MlCtzfGJ7goPyAgGBXyZ0wjS5MW+OAUwzXeAKQmFJ04IEZ9Eb7qBRRo3wUP9ZJNoDreZkz07BWMp3uwMr1Isn/BQMH+IA/6Xj3AZ678CP2wHMrmn0VtagXGITwrVc0VY7o0oRIMzPJMjlTyRhSQOVMmyc4D4G/wM96yu14E5eOpAAPS8MHIRxFo7a5UM9hwMuaj3EMt/8cQFgRxllkagqN3XIzUisf66Nzrnzwy0ZXo7XT7OjmJF0gVm4AUGNly0g0P19fl24Ip2sQrySvAVjnBb0IgOPCw+xJpy41HU3z7s40cTvCHbnhjoxKBalqlaVKIoEspg1blpD566pAzUSEC0O4t/o4MZdoJsdGt1RVGbwvTYXQq3WmXkKpJZYmRy0xtPx91Dr0dlY1uZ9OPhewkFrOQv9opeJySjRN2qTkkKTFmQe6dTlFcgOlJBElg4IvVzeTDgUyUUCximrXB4IitzbDBPiB8PNoJlhFA+zPNut4MTG9yAiZBfjTIvo0vuloQypgBgPKah0SFskghqdakXQd4YgxBGhW5wYkj3hNibbGV8kuTf47W2F6n6ElmqLsevUnYamADEzvHjuqkkpVRp6q1AHLVLWubWyqxvjusYOARAholG9Lb1i7UdnFCms3o1H/5nGcVWYyeDB/baPU+lhW/U0s5Ct8wnD9lihA6ddEAfpjCl4i594Eee58V6QwnX+Pz6t1gF7gCg/Uvzr4SoQvFULTbY6uVS9+WQp7wh3j4f2jd33bG406PEi11ihjTW0YDhrrtxmBcL7M8o8AvvmY+pPxKU/3HcAkAsxQ8wir0SVUuMcjA7Cbm8nH6DJfDLZmKV5XyGHjSB205EHLspuFlslGZGJtnDiaBGDLQdtZFGmVoYM99j0IAt/1Fx1qZRoQJmVDmg0zoikgxem8HYVWdQ/XsW2xBweaiPTUs/1g6di8t3TVPs2g50YSjWY9YeRnZLXG684PIpPGVNoVNlSbNiVN1pQkNoOSUfgfYLhEaTrREY5LxzWbsJbNOGBHlYuAuzuCnAnauKGLohHy4HO02EaCdufAu6e+DtE6M5xBIrX92Vdp+wyFIVqV0RsXLvFDVQdES4jKoAimT2GoMk+V9CMqEdPkGj3HnopObFbV4vMNOsQ6GKRz72ikQqcIuuQGNiWm/A+Qk7ExYD2Zt28hjBlbU+LQhhB3ZqKNK88ogHOw6fyNUpcJfUgtE5bOi+4OjjOHeA9AzeUIiL6bT/2vWn5gf1AxftwyhZdM6pasI4zCSyc4n6vw1l0FpuY92ueaX/wKSMWm3mX7to7lS4tDdLu2c9MhaU2UCVcR0aeqRppOh9CLVRF6S2Wp6kLHZwUnmzL4YtPLHu77q3gnwQxsIt2FdtZR8/Q9bi0gg8HzXXG3GugCVnN+rbEK3qt2bTXQK0uZ3j6WkbKpypIyqxKY7NKCuY8ACwXYnF4gH3i3h1ZqN1ZGvGUqMkY9ZYXHbXeud8D9YWYkixG9zv1n6frJF+llSRKek7noBwzDXXoMtiGKRnL/Q+5RZIWftEgSa7tYN7cLdPPqyCldeS6tvEBRrrlPEihZU86gU27aNrEyMiG6yXXBrO7MtEspRqcdCtYHY5j5M1ial+TZYxcl1i0wRr7n+hBEfoCHCBki/ALIx1+Ox1GXSSnXDm7S4KbSDq+B8kHJ/um14Y2/b4R1EXy+fhoLSqjraSMC3A5j8jBG+WAttSlK40OMTRZ4gnMInTj+rnxKtH386RoLKkAsHxWGK35sV2si3fRbTgSDFxo4eZfmEeTR0DkzDNGujQOUYmTQPsqqtnZZP6I2XvJvc9TUzlvmumadcnpJzEJj/BSFp5vUs5c9uk09jF3cvUolYehGyflqoSglBURMhiLiKEiaT6SsSXAT4jPwf3sNpltVpK0qxoDO3mk8g9pkzaXJ9Gu/2+UqEwd0Ir1Vn8bK5wpWnZhMu9RmuUbzkK4QNWicDCzWHf103a0JUnHA7HjnxrhlwaDwDRwZGHzEC4LSmZ61AILSXe2mLE9+siQbMRnBJXhxUeTV2GfM+dgWjaJXp5idZ2fJCctjS8ezJfYivUNcpQv/VX7vTFlHoqKz/MptVc0iUrb26Pn09sNSo/G04K9mFz6PHDPNYp2FD8hxn905iJI62URTOQFfvZzZTq4bOKTHVx8y3LWHTK6Un6lLYi9SzTwbDogqtMQe1zHy+05UwXTl+u4mdOenO2wZb+3eynYPe5S71fMoxoqnSGXkMcVXoxBBw2VhbDYS9bCN3scX78iiygY99scwfEXBT3b+d2ARDJYhXXdMb76GkM067/aq1W3UGC8OLG6+7sOQNzsfD8R80+siknWRDlO/zmB1dW6usixd3WbdOncQhNsgKsgxhR6cJyhSgO9EwHqLKickTR1Yal6hdA5WJDEOv+guuzzdQLjGLffl+3qlJ8YJV001Oyd9UzcZ8au8JDlzIEv+FaqhN5qJeLKM6cKtA5v1cu93VOUTEWVkArH4/uKj14j9nP1SeYcJ0E930yUlGNh1s9z+MuQk3J48AHa5/C1u0Wkp8mf9Rp0j8SRHonHq+NOv/6uvUgA3Z4XddNLVjKgFB/SmCa3h/DjW43XzOO7qddWABHpnrqrorcqcZK3eG1KypitR825ARrvdmy6sxAa1J9OvHQTq0zxUpfHqHpbGgODWf3ED5CceeSXdbccr6FcYtDwlFzYX4JRXfKNdm6bpNUejq/tU3mJa1pGo8hv0ffTisKFKbX8hIa2q0di+WXy+SUVVS6Kk1oB+nOLTabbOB1XlBD2HHGfT41OXZyR1WaYrx/Fe41brHgYRL0rgV4PbVwUoqwYn9b2Yxzw42WICx9wk2UICRze1tYTh3+O7tvmIZO2PDpE8RBZue+tQKXaxZDXXpORwt1aKcpTTOpBpqs0ujazFegEPnff+9HdLQ1rHQjLWRpHvCz2hnNPZlcwkQLIoqlFa+Im8x7ytuLSplMvKVjm9lUmjOpKMSwHlp/kl9s6rsNcALAvhVkqX7VLaBkreSLUMtWL1mHLC1KUBk+92lkWY7TAlSkvpiSbQdgG18XX9VK/mgPJqGvnNEBf7+fgmYYWkJskGdZ6erXfHzy2rF0nzs6590N4rQQvwPwp4c4UkIMuBLYmllrN1u15VMRTF1kazviCbVXfjNzV2viBRviBDpxnOaDp2bbIJUzUR1Vlqp3WS2tkwT5HyJx1PiQ3wKgxkp1dP06uOqcQxlU5Xo1caZ6ohq+9PJl2qlcBBp6uFmEbDpYOGGjPkV3qX1y9yyCnL3dIaz6bbLy91qSR0qQKhb0UpM6XOV0N4Vn5hQlRL1BBh0YEabXz+rjqe6pzmkh/2THu7eHw30YM7IATR870AFzPPIfUc3352KMF7lN0qbMzL0lDKYkf26rHzgNlVJoBfaHph08I0DtIM+t03FcgFHwYoSsk/DDGWzjLZn67f/h8= -------------------------------------------------------------------------------- /BatchRL/dynamics/composite.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | 5 | from data_processing.dataset import Dataset 6 | from dynamics.base_model import BaseDynamicsModel 7 | from util.numerics import has_duplicates 8 | from util.util import ProgWrap, prog_verb 9 | 10 | 11 | class CompositeModel(BaseDynamicsModel): 12 | """The composite model, combining multiple models. 13 | 14 | All models need to be based on the same dataset. 15 | """ 16 | 17 | model_list: List[BaseDynamicsModel] 18 | 19 | def __init__(self, dataset: Dataset, model_list: List[BaseDynamicsModel], new_name: str = None): 20 | """Initialize the Composite model. 21 | 22 | All individual model need to be initialized with the same dataset! 23 | 24 | Args: 25 | dataset: The common `Dataset`. 26 | model_list: A list of dynamics models defined for the same dataset. 27 | new_name: The name to give to this model, default produces very long names. 28 | 29 | Raises: 30 | ValueError: If the model in list do not have access to `dataset` or if 31 | any series is predicted by multiple models. 32 | """ 33 | # Compute name and check datasets 34 | name = dataset.name + "Composite" 35 | for m in model_list: 36 | name += f"_{m.name}" 37 | if m.data != dataset: 38 | raise ValueError(f"Model {m.name} needs to model the same dataset " 39 | "as the Composite model.") 40 | if new_name is not None: 41 | name = new_name 42 | 43 | # Collect indices and initialize base class. 44 | n_pred_full = dataset.d - dataset.n_c 45 | all_out_inds = np.concatenate([m.out_inds for m in model_list]) 46 | if has_duplicates(all_out_inds): 47 | raise ValueError("Predicting one or more series multiple times.") 48 | out_inds = dataset.from_prepared(np.arange(n_pred_full)) 49 | super().__init__(dataset, name, out_inds, None) 50 | 51 | # Reset the indices, since we do not want to permute twice! 52 | self.p_in_indices = np.arange(dataset.d) 53 | 54 | # We allow only full models, i.e. when combined, the models have to predict 55 | # all series except for the controlled ones. 56 | if self.n_pred != n_pred_full or len(all_out_inds) != n_pred_full: 57 | raise ValueError("You need to predict all non-control series!") 58 | 59 | # Save models 60 | self.model_list = model_list 61 | 62 | def init_1day(self, day_data: np.ndarray) -> None: 63 | """Calls the same function on all models in list. 64 | 65 | Args: 66 | day_data: The data for the initialization. 67 | """ 68 | for m in self.model_list: 69 | m.init_1day(day_data) 70 | 71 | def fit(self, verbose: int = 0, train_data: str = "train") -> None: 72 | """Fits all the models.""" 73 | self.fit_data = train_data 74 | with ProgWrap(f"Fitting sub-models on part: '{train_data}'...", verbose > 0): 75 | for ct, m in enumerate(self.model_list): 76 | print(f"Fitting model {ct}: {m.name}") 77 | m.fit(verbose=prog_verb(verbose), train_data=train_data) 78 | 79 | def predict(self, in_data: np.ndarray) -> np.ndarray: 80 | """Aggregated prediction by predicting with all models. 81 | 82 | Args: 83 | in_data: Prepared data. 84 | 85 | Returns: 86 | Aggregated predictions. 87 | """ 88 | # Get shape of prediction 89 | in_sh = in_data.shape 90 | out_dat = np.empty((in_sh[0], self.n_pred), dtype=in_data.dtype) 91 | 92 | # Predict with all the models 93 | for m in self.model_list: 94 | in_inds = m.p_in_indices 95 | out_inds = m.p_out_inds 96 | pred_in_dat = in_data[:, :, in_inds] 97 | preds = m.predict(pred_in_dat) 98 | out_dat[:, out_inds] = preds 99 | 100 | return out_dat 101 | 102 | def disturb(self): 103 | """Returns a sample of noise. 104 | """ 105 | out_dat = np.empty((self.n_pred,), dtype=np.float32) 106 | 107 | # Disturb with all the models 108 | curr_ind = 0 109 | for m in self.model_list: 110 | n_pred_m = m.n_pred 111 | out_inds = m.p_out_inds 112 | out_dat[out_inds] = m.disturb() 113 | curr_ind += n_pred_m 114 | 115 | return out_dat 116 | 117 | def model_disturbance(self, data_str: str = 'train'): 118 | """Models the disturbances for all sub-models.""" 119 | for m in self.model_list: 120 | m.model_disturbance(data_str) 121 | self.modeled_disturbance = True 122 | -------------------------------------------------------------------------------- /DocFiles/cookbook.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Cookbook 3 | ======== 4 | 5 | This page explains how to use the main script, `BatchRL.py`. 6 | You will need to `cd` into the folder `BatchRL` and activate 7 | the virtual environment before running these commands. 8 | 9 | Verbose mode 10 | ------------ 11 | 12 | One option that can be used in all cases, 13 | is :option:`-v`: 14 | 15 | python BatchRL.py -v [other_options] 16 | 17 | In this case the output will usually be more 18 | verbose than without that option. 19 | 20 | Retrieve data from the NEST database 21 | ------------------------------------ 22 | 23 | If you want to loat the data from the database to your 24 | local PC, 25 | simply run:: 26 | 27 | python BatchRL.py -d --data_end_date 2020-02-21 28 | 29 | This will retrieve, process, and store the data from 30 | beginning of 2019 until the specified date with the option 31 | :option:`--data_end_date`. There is no need to specify a room 32 | number, this will load the data for all room. Also includes 33 | the data of the battery. 34 | 35 | Battery 36 | ------- 37 | 38 | Running the script with the option :option:`-b` for 39 | battery:: 40 | 41 | python BatchRL.py -b --data_end_date 2020-02-21 42 | 43 | Will fit and evaluate the battery model, based on the 44 | data that was collected up to the specified date. 45 | 46 | Hyperparameter optimization 47 | --------------------------- 48 | 49 | Using the option :option:`-o` for optimize will run 50 | the hyperparameter optimization:: 51 | 52 | python BatchRL.py -p --room_nr 41 --hop_eval_data val 53 | 54 | In this case you can specify the room number with the option 55 | :option:`--room_nr`. In this case, room 41 was chosen. Refer 56 | to the report about more information about which room this is 57 | exactly. 58 | Further, you can also specify the set where the objective of 59 | the hyperparameter tuning is evaluated, it can either be 60 | `val` or `test`, for validation or test set, respectively. 61 | Not that you may also specify the data using :option:`--data_end_date`. 62 | Using the option :option:`-in 50`, one can specify the number of 63 | models that are fitted during the optimization. 64 | 65 | Evaluating models 66 | ----------------- 67 | 68 | Using the option :option:`-m` for model evaluation, will evaluate 69 | the the models:: 70 | 71 | python BatchRL.py -m --train_data train_val [other_options] 72 | 73 | It uses the corresponding hyperparameters from the hyperparameter 74 | optimization. Therefore, the hyperparameter optimization must 75 | be run before calling the script with this option. Additionally to 76 | the flags that can be used for the hyperparameter tuning, you may 77 | specify the training set using :option:`--train_data`, possibilities 78 | include: "train", "train_val" and "all". 79 | 80 | Reinforcement learning 81 | ---------------------- 82 | 83 | The reinforcement learning agent can be trained and evaluated using 84 | the flag :option:`-r`:: 85 | 86 | python BatchRL.py -r -in 1000000 10000 -fl 50.0 22.0 24.0 87 | 88 | Using the flag :option:`-in 1000000 10000`, lets you specify 89 | the number of steps used for training and the number of steps used 90 | for evaluation of the RL agent. :option:`-fl 50.0 22.0 24.0` lets 91 | you specify the balance factor alpha, and the lower and the upper 92 | temperature bounds. Note that also the previous flags, i.e. 93 | :option:`--data_end_date`, :option:`--room_nr` and :option:`--train_data` 94 | may be specified to determine which model/data/room to use. 95 | 96 | Room control using RL 97 | --------------------- 98 | 99 | The trained reinforcement learning agent can be run on the 100 | real system using:: 101 | 102 | python BatchRL.py -u -fl 50.0 22.0 24.0 103 | 104 | As in the previous case, you can specify the balance 105 | factor and the temperature bounds. Also the other 106 | flags specifying the room/model/data will be needed 107 | to determine what exactly should be controlled. 108 | 109 | Rule-Based controller 110 | --------------------- 111 | 112 | To run the rule-based controller, use:: 113 | 114 | python BatchRL.py --rule_based -fl 21.0 115 | 116 | With the flag :option:`-fl 21.0`, you specify 117 | that the valves will be opened, when the temperature drops 118 | below 21.0 degrees. Note that this is only applicable for 119 | heating cases. 120 | 121 | Cleanup 122 | ------- 123 | 124 | Running the script with the option :option:`-c` for 125 | cleanup:: 126 | 127 | python BatchRL.py -c 128 | 129 | Will cleanup the temporary files that were 130 | generated during debugging and testing. 131 | 132 | Default 133 | ------- 134 | 135 | When not specifying any of the above options, 136 | the function `curr_tests()` from BatchRL.py will 137 | be run. Add your custom code for testing or debugging 138 | there. 139 | 140 | For more details about how to run the code, consider 141 | the actual code or contact the author. 142 | -------------------------------------------------------------------------------- /BatchRL/ml/keras_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Sequence, Union, Any 3 | 4 | from keras import Model 5 | from keras.layers import Dense, Dropout, BatchNormalization 6 | from keras.models import Sequential 7 | from keras.regularizers import l2 8 | 9 | from ml.sklearn_util import SKLoader 10 | from util.util import dynamic_model_dir, create_dir, DEFAULT_TRAIN_SET, DEFAULT_EVAL_SET 11 | 12 | KerasModel = Union[Sequential, Model, SKLoader] 13 | 14 | 15 | def soft_update_params(model_to_update, other, lam: float = 1.0) -> None: 16 | """ 17 | Soft parameter update: 18 | :math:`\\theta' = \\lambda * \\theta + (1 - \\lambda) * \\theta'` 19 | 20 | :param model_to_update: Model where the parameters will be updated. 21 | :param other: Model where the parameters of the model should be updated to. 22 | :param lam: Factor determining how much the parameters are updated. 23 | :return: None 24 | """ 25 | params = other.get_weights() 26 | 27 | if lam == 1.0: 28 | model_to_update.set_weights(params) 29 | return 30 | else: 31 | orig_params = model_to_update.get_weights() 32 | for ct, el in enumerate(orig_params): 33 | orig_params[ct] = (1.0 - lam) * el + lam * params[ct] 34 | model_to_update.set_weights(orig_params) 35 | 36 | 37 | def max_loss(y_true, y_pred): 38 | """ 39 | Loss independent of the true labels for 40 | optimization without it, i.e. maximize y_pred 41 | directly. 42 | 43 | :param y_true: True labels, not used here. 44 | :param y_pred: Output for maximization. 45 | :return: -y_pred since this will be minimized. 46 | """ 47 | return -y_pred 48 | 49 | 50 | def getMLPModel(mlp_layers: Sequence = (20, 20), out_dim: int = 1, 51 | trainable: bool = True, 52 | dropout: bool = False, 53 | bn: bool = False, 54 | ker_reg: float = 0.01): 55 | """Returns a sequential MLP keras model. 56 | 57 | Args: 58 | mlp_layers: The numbers of neurons per layer. 59 | out_dim: The output dimension. 60 | trainable: Whether the parameters should be trainable. 61 | dropout: Whether to use dropout. 62 | bn: Whether to use batch normalization. 63 | ker_reg: Kernel regularization weight. 64 | 65 | Returns: 66 | Sequential keras MLP model. 67 | """ 68 | model = Sequential() 69 | if bn: 70 | model.add(BatchNormalization(trainable=trainable, name="bn0")) 71 | 72 | # Add layers 73 | n_fc_layers = len(mlp_layers) 74 | for i in range(n_fc_layers): 75 | next_layer = Dense(mlp_layers[i], 76 | activation='relu', 77 | trainable=trainable, 78 | kernel_regularizer=l2(ker_reg), 79 | name=f"dense{i}") 80 | model.add(next_layer) 81 | if bn: 82 | model.add(BatchNormalization(trainable=trainable, name=f"bn{i + 1}")) 83 | if dropout: 84 | model.add(Dropout(0.2)) 85 | 86 | # Reduce to 1D 87 | last = Dense(out_dim, activation=None, trainable=trainable, name="last_dense") 88 | model.add(last) 89 | return model 90 | 91 | 92 | class KerasBase: 93 | """Base class for keras models. 94 | 95 | Provides an interface for saving and loading models. 96 | """ 97 | 98 | model_path: str = dynamic_model_dir 99 | m: KerasModel 100 | 101 | def _model_path_name(self, name, train_data: str): 102 | ext = f"_TDP_{train_data}" if train_data != DEFAULT_TRAIN_SET else "" 103 | return self.get_path(f"{name}{ext}") 104 | 105 | def save_model(self, m, name: str, 106 | train_data: str = DEFAULT_TRAIN_SET) -> None: 107 | """Saves a keras model. 108 | 109 | Args: 110 | m: Keras model. 111 | name: Name of the model. 112 | train_data: Train data specifier. 113 | """ 114 | m.save(self._model_path_name(name, train_data)) 115 | 116 | def load_if_exists(self, m, name: str, 117 | train_data: str = DEFAULT_TRAIN_SET) -> bool: 118 | """Loads the keras model if it exists. 119 | 120 | Returns true if it could be loaded, else False. 121 | 122 | Args: 123 | m: Keras model to be loaded. 124 | name: Name of model. 125 | train_data: Train data specifier. 126 | 127 | Returns: 128 | True if model could be loaded else False. 129 | """ 130 | full_path = self._model_path_name(name, train_data) 131 | found = os.path.isfile(full_path) 132 | # print(f"Model: {full_path}, found? {found}") 133 | if found: 134 | m.load_weights(full_path) 135 | return found 136 | 137 | def get_path(self, name: str, ext: str = ".h5", 138 | env: Any = None, 139 | hop_eval_set: str = DEFAULT_EVAL_SET) -> str: 140 | """ 141 | Returns the path where the model parameters 142 | are stored. 143 | 144 | Args: 145 | name: Model name. 146 | ext: Filename extension. 147 | env: Environment with a name attribute. 148 | hop_eval_set: Hyperparameter opt. evaluation set. 149 | 150 | Returns: 151 | Model parameter file path. 152 | """ 153 | res_folder = self.model_path 154 | if env is not None and hasattr(env, "name"): 155 | hop_ext = f"_HEV_{hop_eval_set}" if hop_eval_set != DEFAULT_EVAL_SET else "" 156 | res_folder = os.path.join(res_folder, env.name + hop_ext) 157 | create_dir(res_folder) 158 | return os.path.join(res_folder, name + ext) 159 | -------------------------------------------------------------------------------- /BatchRL/opcua_empa/run_opcua.py: -------------------------------------------------------------------------------- 1 | """Module for running the opcua client. 2 | 3 | May be removed later and moved to BatchRL.py if 4 | it is high-level enough. 5 | """ 6 | from typing import List 7 | 8 | from agents.keras_agents import default_ddpg_agent, DEF_RL_LR 9 | from dynamics.load_models import load_room_env 10 | from opcua_empa.controller import ValveToggler, ValveTest2Controller, FixTimeConstController, RLController, RuleBased 11 | from opcua_empa.opcua_util import check_room_list 12 | from opcua_empa.opcuaclient_subscription import OpcuaClient 13 | from opcua_empa.room_control_client import run_control 14 | from tests.test_opcua import OfflineClient 15 | from util.util import prog_verb, ProgWrap, DEFAULT_ROOM_NR, DEFAULT_EVAL_SET 16 | 17 | 18 | def try_opcua(verbose: int = 1, room_list: List[int] = None, debug: bool = True): 19 | """Runs the opcua client.""" 20 | 21 | if verbose: 22 | if debug: 23 | print("Running in debug mode!") 24 | 25 | # Choose experiment name 26 | exp_name = "Test" 27 | 28 | # Check list with room numbers 29 | check_room_list(room_list) 30 | 31 | # Define room and control 32 | # tc = ToggleController(n_mins=60 * 100, start_low=True, max_n_minutes=60 * 16) 33 | # tc = ValveToggler(n_steps_delay=30, n_steps_max=2 * 60) 34 | tc = ValveTest2Controller() 35 | room_list = [43] if room_list is None else room_list 36 | used_control = [(i, tc) for i in room_list] 37 | if debug: 38 | room_list = [41] 39 | used_control = [(r, ValveToggler(n_steps_delay=30)) 40 | for r in room_list] 41 | exp_name = "Offline_DebugValveToggle" 42 | 43 | # Use offline client in debug mode 44 | cl_class = OfflineClient if debug else OpcuaClient 45 | run_control(used_control=used_control, 46 | exp_name=exp_name, 47 | user=None, 48 | password=None, 49 | verbose=verbose, 50 | _client_class=cl_class) 51 | 52 | 53 | def run_rl_control(room_nr: int = DEFAULT_ROOM_NR, 54 | notify_failure: bool = False, 55 | debug: bool = False, 56 | verbose: int = 5, 57 | n_steps: int = None, 58 | hop_eval_set: str = DEFAULT_EVAL_SET, 59 | notify_debug: bool = None, 60 | agent_lr: float = DEF_RL_LR, 61 | **env_kwargs, 62 | ): 63 | """Runs the RL agent via the opcua client. 64 | 65 | Args: 66 | room_nr: 67 | notify_failure: Whether to send a mail upon failure. 68 | debug: 69 | verbose: 70 | n_steps: 71 | hop_eval_set: 72 | agent_lr: 73 | notify_debug: Whether to use debug mail address to send notifications, 74 | Ignored, if `notify_failure` is False. 75 | **env_kwargs: Keyword arguments for environment, see :func:`load_room_env`. 76 | """ 77 | full_debug: bool = False 78 | 79 | assert room_nr in [41, 43], f"Invalid room number: {room_nr}" 80 | 81 | if notify_debug is None: 82 | notify_debug = debug 83 | msg = f"Using {'debug' if notify_debug else 'original'} " \ 84 | f"mail address for notifications." 85 | if verbose: 86 | print(msg) 87 | 88 | next_verbose = prog_verb(verbose) 89 | m_name = "FullState_Comp_ReducedTempConstWaterWeather" 90 | n_hours = 24 * 3 if not debug else 3 91 | 92 | rl_cont = None 93 | if not full_debug: 94 | # Load the model and init env 95 | with ProgWrap(f"Loading environment...", verbose > 0): 96 | env = load_room_env(m_name, 97 | verbose=next_verbose, 98 | room_nr=room_nr, 99 | hop_eval_set=hop_eval_set, 100 | **env_kwargs) 101 | 102 | # Define default agents and compare 103 | with ProgWrap(f"Initializing agents...", verbose > 0): 104 | agent = default_ddpg_agent(env, n_steps, fitted=True, 105 | verbose=next_verbose, 106 | hop_eval_set=hop_eval_set, 107 | lr=agent_lr) 108 | if verbose: 109 | print(agent) 110 | 111 | # Choose controller 112 | rl_cont = RLController(agent, n_steps_max=3600 * n_hours, 113 | const_debug=debug, 114 | verbose=next_verbose) 115 | else: 116 | if verbose: 117 | print("Using constant model without an agent.") 118 | 119 | f_cont = FixTimeConstController(val=21.0, max_n_minutes=n_hours * 60) 120 | cont = f_cont if full_debug else rl_cont 121 | used_control = [(room_nr, cont)] 122 | 123 | exp_name = "DefaultExperiment" 124 | if debug: 125 | exp_name += "Debug" 126 | 127 | # Run control 128 | run_control(used_control=used_control, 129 | exp_name=exp_name, 130 | user=None, 131 | password=None, 132 | debug=notify_debug, 133 | verbose=verbose, 134 | _client_class=OpcuaClient, 135 | notify_failures=notify_failure) 136 | 137 | 138 | def run_rule_based_control(room_nr: int = DEFAULT_ROOM_NR, *, 139 | min_temp: float = 21.0, 140 | name_ext: str = "", 141 | notify_debug: bool = True, 142 | verbose: int = 5, 143 | ) -> None: 144 | """Runs rule-based controller for heating season. 145 | 146 | Args: 147 | room_nr: 148 | min_temp: 149 | name_ext: 150 | notify_debug: 151 | verbose: 152 | """ 153 | 154 | exp_name = f"RuleBased_{min_temp}{name_ext}" 155 | controller = RuleBased(min_temp, n_steps_max=3600 * 24 * 31) 156 | used_control = [(room_nr, controller)] 157 | 158 | # Run control 159 | run_control(used_control=used_control, 160 | exp_name=exp_name, 161 | user=None, 162 | password=None, 163 | debug=notify_debug, 164 | verbose=verbose, 165 | _client_class=OpcuaClient, 166 | notify_failures=True) 167 | -------------------------------------------------------------------------------- /BatchRL/util/share_data.py: -------------------------------------------------------------------------------- 1 | """Module for sharing data via Google Drive. 2 | 3 | Based on the `pydrive` library. It can be pretty 4 | slow though, especially if you want to upload multiple files. 5 | Therefore you should prefer zipping multiple files and then 6 | only uploading the zip file, as does 7 | :func:`util.share_data.upload_folder_zipped`. 8 | 9 | If you just cloned from Github, you will need to setup 10 | the Google Drive API and create a `settings.yaml` file 11 | in the `BatchRL` folder for the authentication. 12 | """ 13 | import os 14 | import shutil 15 | import zipfile 16 | 17 | from util.util import TEMP_DIR, EULER 18 | 19 | if not EULER: 20 | from pydrive.auth import GoogleAuth 21 | from pydrive.drive import GoogleDrive 22 | from pydrive.files import GoogleDriveFile 23 | else: 24 | GoogleAuth = None 25 | GoogleDrive = None 26 | GoogleDriveFile = None 27 | 28 | 29 | FOLDER_MIME_TYPE = "application/vnd.google-apps.folder" 30 | 31 | 32 | def g_drive_login() -> GoogleDrive: 33 | """Login to Google Drive and create and return drive object.""" 34 | g_login = GoogleAuth() 35 | g_login.LocalWebserverAuth() 36 | drive = GoogleDrive(g_login) 37 | print("Authentication successful") 38 | return drive 39 | 40 | 41 | def upload_folder_zipped(f_path, out_file_name: str = None, 42 | remove_existing: bool = False): 43 | """Uploads the content of the folder `f_path` to Google Drive. 44 | 45 | If `out_file_name` is specified, this will be the name of the 46 | uploaded file, otherwise the name of the folder will be used. 47 | If `remove_existing` is True, existing files with the same 48 | name will be removed. 49 | """ 50 | f_name = os.path.basename(f_path) 51 | if out_file_name is None: 52 | out_file_name = f_name 53 | out_path = os.path.join(TEMP_DIR, out_file_name) 54 | shutil.make_archive(out_path, 'zip', f_path) 55 | file_zip_path = out_file_name + ".zip" 56 | drive = None 57 | if remove_existing: 58 | f_list, drive = get_root_files() 59 | found = [f for f in f_list if f["title"] == file_zip_path] 60 | for f in found: 61 | f.Delete() 62 | upload_file(out_path + ".zip", drive=drive) 63 | 64 | 65 | def download_and_extract_zipped_folder(base_name: str, extract_dir: str, 66 | remove_old_files: bool = False): 67 | f_name = base_name + ".zip" 68 | 69 | # Find file on Drive 70 | f_list, drive = get_root_files() 71 | found = [f for f in f_list if f["title"] == f_name] 72 | f = None 73 | if len(found) > 1: 74 | print("Found multiple files, choosing newest.") 75 | sorted_files = sorted(found, key=lambda f: f["modifiedDate"]) 76 | f = sorted_files[-1] 77 | if remove_old_files: 78 | for old_f in sorted_files[:-1]: 79 | old_f.Delete() 80 | elif len(found) == 0: 81 | raise FileNotFoundError(f"No such file found: {f_name}") 82 | 83 | # Download to Temp folder 84 | out_temp_path = os.path.join(TEMP_DIR, f_name) 85 | f.GetContentFile(out_temp_path) 86 | 87 | # Unzip into folder 88 | with zipfile.ZipFile(out_temp_path, "r") as zip_ref: 89 | zip_ref.extractall(extract_dir) 90 | 91 | 92 | def get_root_files(): 93 | drive = g_drive_login() 94 | 95 | # Auto-iterate through all files in the root folder. 96 | file_list = drive.ListFile({'q': "'root' in parents and trashed=false"}).GetList() 97 | 98 | return file_list, drive 99 | 100 | 101 | def _rec_list(parent_dir: GoogleDriveFile, drive: GoogleDrive, lvl: int = 0): 102 | par_id = parent_dir["id"] 103 | ind = " " * 4 * lvl 104 | if parent_dir["mimeType"] == FOLDER_MIME_TYPE: 105 | # Found folder, recursively iterate over children. 106 | print(f"{ind}Folder: {parent_dir['title']}") 107 | file_list = drive.ListFile({'q': f"'{par_id}' in parents and trashed=false"}).GetList() 108 | for f in file_list: 109 | _rec_list(f, drive, lvl + 1) 110 | else: 111 | # Found file 112 | print(f"{ind}File: {parent_dir['title']}") 113 | 114 | 115 | def list_files_recursively() -> None: 116 | """Lists the whole content of your Google Drive recursively. 117 | 118 | This is extremely slow!""" 119 | file_list, drive = get_root_files() 120 | 121 | # Iterate over all found files. 122 | for file1 in file_list: 123 | _rec_list(parent_dir=file1, drive=drive) 124 | 125 | 126 | def upload_file(file_path, folder: str = None, drive=None): 127 | """Uploads a file to Google Drive. 128 | 129 | If `folder` is not None, a folder with that name 130 | will be created and the file will be put into it. 131 | """ 132 | if drive is None: 133 | drive = g_drive_login() 134 | 135 | if folder is not None: 136 | assert type(folder) == str 137 | # Create folder. 138 | folder_metadata = { 139 | 'title': folder, 140 | # The mimetype defines this new file as a folder, so don't change this. 141 | 'mimeType': FOLDER_MIME_TYPE, 142 | } 143 | folder = drive.CreateFile(folder_metadata) 144 | folder.Upload() 145 | print("Uploaded Folder.") 146 | 147 | # Create file on drive. 148 | fn = os.path.basename(file_path) 149 | if folder is None: 150 | f = drive.CreateFile({'title': fn}) 151 | else: 152 | assert isinstance(folder, GoogleDriveFile) 153 | folder_id = folder["id"] 154 | f = drive.CreateFile({"title": fn, "parents": [{"kind": "drive#fileLink", "id": folder_id}]}) 155 | 156 | # Set and upload content. 157 | f.SetContentFile(file_path) 158 | f.Upload() 159 | print(f"The file: {file_path} has been uploaded") 160 | 161 | 162 | def test_file_upload(): 163 | """This is slow and requires user interaction.""" 164 | TEST_DATA_DIR = "./tests/data" 165 | local_test_file = os.path.join(TEST_DATA_DIR, "test_upload_file.txt") 166 | upload_file(local_test_file, folder="test") 167 | 168 | 169 | def test_folder_zip(): 170 | """This is slow and requires user interaction.""" 171 | TEST_DATA_DIR = "./tests/data" 172 | local_test_file = os.path.join(TEST_DATA_DIR, "TestUploadFolder") 173 | upload_folder_zipped(local_test_file) 174 | download_and_extract_zipped_folder("TestUploadFolder", local_test_file, 175 | remove_old_files=True) 176 | -------------------------------------------------------------------------------- /BatchRL/agents/agents_heuristic.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Sequence, Union, Tuple 3 | 4 | import numpy as np 5 | 6 | from agents.base_agent import AgentBase 7 | from envs.dynamics_envs import FullRoomEnv, RoomBatteryEnv, BatteryEnv 8 | from util.util import Arr, Num 9 | 10 | 11 | def get_const_agents(env: Union[FullRoomEnv, RoomBatteryEnv, BatteryEnv] 12 | ) -> Tuple['ConstActionAgent', 'ConstActionAgent']: 13 | """Defines two constant agents that can be used for analysis. 14 | 15 | Args: 16 | env: The environment. 17 | 18 | Returns: 19 | Tuple with two ConstActionAgent 20 | """ 21 | n_agents = 2 22 | 23 | heat_pars = (0.0, 1.0) 24 | bat_pars = (-3.0, 6.0) 25 | 26 | # Define constant action based on env. 27 | if isinstance(env, FullRoomEnv): 28 | c = [np.array(heat_pars[i]) for i in range(n_agents)] 29 | elif isinstance(env, BatteryEnv): 30 | bat_pars = (10.0, -8.0) 31 | c = [np.array(bat_pars[i]) for i in range(n_agents)] 32 | elif isinstance(env, RoomBatteryEnv): 33 | c = [np.array([heat_pars[i], bat_pars[i]]) for i in range(n_agents)] 34 | else: 35 | raise TypeError(f"Env: {env} not supported!") 36 | 37 | a1, a2 = ConstActionAgent(env, c[0]), ConstActionAgent(env, c[1]) 38 | 39 | # Set plot name 40 | if isinstance(env, FullRoomEnv): 41 | a1.plot_name = "Valves Closed" 42 | a2.plot_name = "Valves Open" 43 | elif isinstance(env, BatteryEnv): 44 | a1.plot_name = "Charging (10 kW)" 45 | a2.plot_name = "Discharging (8 kW)" 46 | elif isinstance(env, RoomBatteryEnv): 47 | a1.plot_name = "Closed, Discharge" 48 | a2.plot_name = "Open, Charge" 49 | return a1, a2 50 | 51 | 52 | class RuleBasedAgent(AgentBase): 53 | """Agent applying rule-based heating control. 54 | 55 | """ 56 | bounds: Sequence #: The sequence specifying the rule for control. 57 | const_charge_rate: Num 58 | env: Union[FullRoomEnv, RoomBatteryEnv] 59 | 60 | w_inds_orig = [2, 3] 61 | r_temp_ind_orig = 5 62 | 63 | def __init__(self, env: Union[FullRoomEnv, RoomBatteryEnv], 64 | rule: Sequence, 65 | const_charge_rate: Num = None, 66 | strict: bool = False, 67 | rbc_dt_inc: float = None): 68 | """Initializer. 69 | 70 | Args: 71 | env: The RL environment. 72 | rule: The temperature bounds. 73 | const_charge_rate: The charging rate if the env includes the battery. 74 | strict: Whether to apply strict heating / cooling, start as soon as the 75 | room temperature deviates from the midpoint of the temperature bounds. 76 | """ 77 | name = "RuleBasedControl" 78 | super().__init__(env, name=name) 79 | 80 | # Check input 81 | assert len(rule) == 2, "Rule needs to consist of two values!" 82 | if isinstance(env, RoomBatteryEnv): 83 | assert const_charge_rate is not None, "Need to specify charging rate!" 84 | elif const_charge_rate is not None: 85 | warnings.warn("Ignored charging rate!") 86 | const_charge_rate = None 87 | 88 | # Store parameters 89 | self.const_charge_rate = const_charge_rate 90 | if strict: 91 | mid = 0.5 * (rule[0] + rule[1]) 92 | self.bounds = (mid, mid) 93 | else: 94 | if rbc_dt_inc is not None: 95 | rule = (rule[0] + rbc_dt_inc, rule[1]) 96 | self.bounds = rule 97 | 98 | self.plot_name = "Rule-Based" 99 | 100 | def __str__(self): 101 | return f"Rule-Based Agent with bounds {self.bounds}" 102 | 103 | def get_action(self, state) -> Arr: 104 | """Defines the control strategy. 105 | 106 | Args: 107 | state: The current state. 108 | 109 | Returns: 110 | Next control action. 111 | """ 112 | # Find water and room temperatures 113 | w_in_temp = self.env.get_unscaled(state, self.w_inds_orig[0]) 114 | r_temp = self.env.get_unscaled(state, self.r_temp_ind_orig) 115 | 116 | # Determine if you want to do heating / cooling or not 117 | heat_action = 0.0 118 | if r_temp < self.bounds[0] and w_in_temp > r_temp: 119 | # Heating 120 | heat_action = 1.0 121 | if r_temp > self.bounds[1] and w_in_temp < r_temp: 122 | # Cooling 123 | heat_action = 1.0 124 | # Return 125 | final_action = heat_action 126 | if self.const_charge_rate is not None: 127 | final_action = np.array([heat_action, self.const_charge_rate]) 128 | return final_action 129 | 130 | 131 | class ConstActionAgent(AgentBase): 132 | """Applies a constant control input. 133 | 134 | Can be used for comparison, e.g. if you want 135 | to compare an agent to always heating or never heating. 136 | Does not really need the environment. 137 | """ 138 | rule: Arr #: The constant control input / action. 139 | out_num: int #: The dimensionality of the action space. 140 | 141 | def __init__(self, env, rule: Arr): 142 | try: 143 | if len(rule) > 0: 144 | name = f"Const_{'_'.join(str(e) for e in rule)}" 145 | else: 146 | raise TypeError 147 | except TypeError: 148 | name = f"Const_{rule}" 149 | super().__init__(env, name=name) 150 | 151 | self.out_num = env.nb_actions 152 | self.rule = rule 153 | 154 | # Check rule 155 | if isinstance(rule, (np.ndarray, np.generic)): 156 | r_s, n_out = rule.shape, self.out_num 157 | if self.out_num > 1: 158 | assert r_s == (n_out,), f"Rule shape: {r_s} incompatible!" 159 | else: 160 | assert r_s == (n_out,) or r_s == (), f"Rule shape: {r_s} incompatible!" 161 | 162 | def __str__(self): 163 | return f"Constant Agent with value {self.rule}" 164 | 165 | def get_action(self, state) -> Arr: 166 | """Defines the control strategy. 167 | 168 | Using broadcasting it can handle numpy array rules 169 | of shape (`out_num`, ) 170 | 171 | Args: 172 | state: The current state. 173 | 174 | Returns: 175 | Next control action. 176 | """ 177 | return self.rule * np.ones((self.out_num,), dtype=np.float32) 178 | -------------------------------------------------------------------------------- /BatchRL/util/notify.py: -------------------------------------------------------------------------------- 1 | """Notification module. 2 | 3 | Can be used to send mails from your gmail account. 4 | You need to allow unsave apps access under your 5 | account settings. 6 | 7 | It is assumed that the `debug_email` and the normal email (`receiver_email`) 8 | do have the same password, stored in the file `python_notifyer.txt`. 9 | For obvious reasons, this file is not on Github, it should be placed 10 | in the root folder of the Github repository. 11 | """ 12 | import os 13 | import smtplib 14 | import ssl 15 | import sys 16 | import time 17 | import traceback 18 | from pathlib import Path 19 | from typing import List, Callable 20 | 21 | from util.util import force_decorator_factory 22 | 23 | curr_dir = Path(os.path.dirname(os.path.realpath(__file__))) 24 | 25 | # Signal codes, valid on Windows at least 26 | codes = [ 27 | "Close Event (e.g. KeyboardInterrupt)", 28 | "Logoff (e.g. Ctrl + Pause / Break or Ctrl + Fn + B)", 29 | "Shutdown (e.g. X in Powershell, or disconnected in PyCharm)," 30 | ] 31 | 32 | 33 | def set_exit_handler(func: Callable) -> None: 34 | """Catching kill events. 35 | 36 | Should work for windows and linux, only tested on windows. 37 | 38 | Not working in Powershell: 39 | If process is killed via task manager :( 40 | If PC is shutdown. 41 | Not working in PyCharm: 42 | If exited and process is terminated. (Works for exiting and disconnecting 43 | or killing PyCharm via task manager.) 44 | 45 | From: https://danielkaes.wordpress.com/2009/06/04/how-to-catch-kill-events-with-python/ 46 | 47 | Args: 48 | func: The function to execute when handling the exit. 49 | """ 50 | if os.name == "nt": 51 | try: 52 | import win32api 53 | win32api.SetConsoleCtrlHandler(func, True) 54 | except ImportError: 55 | version = ".".join(map(str, sys.version_info[:2])) 56 | raise Exception(f"pywin32 not installed for Python {version}") 57 | else: 58 | import signal 59 | signal.signal(signal.SIGTERM, func) 60 | 61 | 62 | def test_kill_event() -> None: 63 | """Test for catching kill events. 64 | 65 | You have 30 seconds to kill the execution and see what 66 | happens, then check your mail. :) 67 | """ 68 | with FailureNotifier("test", verbose=0, debug=True): 69 | print("Sleeping...") 70 | time.sleep(30.0) 71 | print("Done Sleeping, you were too late!") 72 | raise ValueError("Fuck") 73 | 74 | 75 | class FailureNotifier: 76 | """Context manager for failure notifications. 77 | 78 | Sends a mail if an error happens while it is active including 79 | the stack trace if available. 80 | 81 | Uses :func:`util.notify.set_exit_handler` to catch all kinds 82 | of interrupts, but will not provide the stacktrace in those cases. 83 | """ 84 | 85 | _sent_mail: bool = False 86 | 87 | def __init__(self, name: str, verbose: int = 1, 88 | debug: bool = False, exit_fun: Callable = None): 89 | self.name = name 90 | self.verbose = verbose 91 | self.debug = debug 92 | self.exit_fun = exit_fun 93 | 94 | def __enter__(self): 95 | """Enter context, sets exit handler for uncaught interrupts.""" 96 | 97 | if self.verbose: 98 | print("Entering FailureNotifier...") 99 | 100 | # Set the exit handler to the exit function since 101 | # e.g. if you press X on the powershell console, this 102 | # will not be caught by the context manager. 103 | def on_exit(sig, func=None): 104 | # Skip sig == 0 cases? 105 | if self.verbose: 106 | print("Exiting because of interrupt, sending notification...") 107 | sig_desc = codes[sig] if sig < len(codes) else "None" 108 | msg = f"Program was mysteriously killed by somebody or something. " \ 109 | f"Clues are: {sig_desc} (Code: {sig})" 110 | if os.name != "nt": 111 | msg += f"Func: {func}" 112 | self._on_exit(msg=msg) 113 | 114 | if self.verbose: 115 | print("Notification sent.") 116 | 117 | set_exit_handler(on_exit) 118 | return self 119 | 120 | def _on_exit(self, msg: str) -> None: 121 | """Called when an error happens.""" 122 | if self.exit_fun is not None: 123 | self.exit_fun(None, None, None) 124 | sub = f"Error while executing '{self.name}'." 125 | if not self._sent_mail: 126 | send_mail(self.debug, subject=sub, 127 | msg=msg) 128 | self._sent_mail = True 129 | 130 | def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): 131 | """Exits context, sends a mail if Exception happened.""" 132 | if self.verbose: 133 | print("Exiting FailureNotifier..") 134 | 135 | p_msg = "Exited FailureNotifier " 136 | if exc_type is not None: 137 | # Unhandled exception happened, notify owner. 138 | msg = traceback.format_exc() 139 | self._on_exit(msg=msg) 140 | p_msg += "with unhandled Error." 141 | else: 142 | p_msg += "successfully." 143 | 144 | if self.verbose: 145 | print(p_msg) 146 | 147 | 148 | def login_from_file(file_name: str) -> List[str]: 149 | """Loads login information from a file.""" 150 | assert os.path.isfile(file_name), f"File: {file_name} not found!" 151 | with open(file_name, "r") as f: 152 | return [l.rstrip() for l in f if l.rstrip() != ""] 153 | 154 | 155 | @force_decorator_factory() 156 | def send_mail(debug: bool = True, 157 | subject: str = "Hello there!", 158 | msg: str = "General Kenobi", 159 | use_ssl: bool = True) -> None: 160 | """Sends a mail via python. 161 | 162 | Decorated with the force decorator since a connection timeout 163 | is likely to happen which will prevent the mail from being sent. 164 | 165 | Not tested for the case `use_ssl` = False. 166 | 167 | Args: 168 | debug: Whether to use debug mode, will send the mail to the 169 | debug address. 170 | subject: Subject of the mail. 171 | msg: Message of the mail. 172 | use_ssl: Whether to use SSL, use default. 173 | """ 174 | # Define message 175 | message = f"Subject: {subject}\n\n{msg}\n\n" \ 176 | f"This is an automatically generated message, do not reply!" 177 | 178 | # Choose port and smtp client 179 | port = 465 if use_ssl else 587 # Port for SSL 180 | smtp_server = smtplib.SMTP_SSL if use_ssl else smtplib.SMTP 181 | ssl.create_default_context() 182 | 183 | # Load password and select mail account 184 | receiver_email, receiver_pw = login_from_file("../email_receiver_login.txt") 185 | debug_email, debug_pw = login_from_file("../email_receiver_debug_login.txt") 186 | sender_email, password = login_from_file("../notify_email_login.txt") 187 | rec_mail = debug_email if debug else receiver_email 188 | 189 | # Send the mail 190 | with smtp_server("smtp.gmail.com", port) as server: 191 | server.login(sender_email, password) 192 | send_errs = server.sendmail(sender_email, rec_mail, message) 193 | if len(send_errs) > 0: 194 | print(f"Error(s) happened: {send_errs}") 195 | -------------------------------------------------------------------------------- /BatchRL/tests/test_keras.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import numpy as np 4 | from keras import Input, Sequential, backend as K 5 | from keras.engine import Layer 6 | from keras.layers import Add 7 | 8 | from data_processing.dataset import SeriesConstraint 9 | from ml.keras_layers import SeqInput, ConstrainedNoise, FeatureSlice, \ 10 | ExtractInput, IdDense, IdRecurrent, ClipByValue, ConstrainOutput 11 | from util.util import rem_first 12 | from util.numerics import check_in_range 13 | 14 | 15 | def get_multi_input_layer_output(layer: Layer, inp_list, learning_phase: float = 1.0): 16 | """Tests layers with multiple input and / or output. 17 | 18 | Args: 19 | layer: The layer to test. 20 | inp_list: The list with input arrays. 21 | learning_phase: Whether to use learning or testing mode. 22 | 23 | Returns: 24 | The processed input. 25 | """ 26 | if not isinstance(inp_list, list): 27 | inp_list = [inp_list] 28 | inputs = [Input(shape=rem_first(el.shape)) for el in inp_list if el is not None] 29 | if len(inputs) == 1: 30 | layer_out_tensor = layer(*inputs) 31 | else: 32 | layer_out_tensor = layer(inputs) 33 | k_fun = K.function([*inputs, K.learning_phase()], [layer_out_tensor]) 34 | layer_out = k_fun([*inp_list, learning_phase])[0] 35 | return layer_out 36 | 37 | 38 | def get_test_layer_output(layer: Layer, np_input, learning_phase: float = 1.0): 39 | """Test a keras layer. 40 | 41 | Builds a model with only the layer given and 42 | returns the output when given `np.input` as input. 43 | 44 | Args: 45 | layer: The keras layer. 46 | np_input: The input to the layer. 47 | learning_phase: Whether learning is active or not. 48 | 49 | Returns: 50 | The layer output. 51 | """ 52 | # Construct sequential model with only one layer 53 | m = Sequential() 54 | m.add(layer) 55 | out, inp = m.output, m.input 56 | k_fun = K.function([inp, K.learning_phase()], [out]) 57 | layer_out = k_fun([np_input, learning_phase])[0] 58 | return layer_out 59 | 60 | 61 | class TestKeras(TestCase): 62 | """Test case class for keras tests. 63 | 64 | Run from `BatchRL` folder, otherwise the relative paths 65 | will be wrong and there will be folders generated in the wrong place. 66 | Use the Powershell script `run_tests.ps1` if possible. 67 | """ 68 | 69 | def __init__(self, *args, **kwargs): 70 | super().__init__(*args, **kwargs) 71 | 72 | # Define shapes 73 | self.seq_shape = (2, 6, 4) 74 | self.seq_len_red = 2 75 | self.b_s, self.seq_len_test, self.n_feats = self.seq_shape 76 | self.seq_shape_red = (self.b_s, self.seq_len_red, self.n_feats) 77 | 78 | # Define the data 79 | self.seq_input = np.arange(self.b_s * self.seq_len_red * self.n_feats).reshape(self.seq_shape_red) 80 | self.seq_input_long = np.arange(self.b_s * self.seq_len_test * self.n_feats).reshape(self.seq_shape) 81 | self.feat_indices = np.array([0, 2], dtype=np.int32) 82 | self.n_feats_chosen = len(self.feat_indices) 83 | self.output = -1. * np.arange(self.b_s * self.n_feats_chosen).reshape((self.b_s, self.n_feats_chosen)) 84 | self.id_1 = np.array([[1, 2, 3]]) 85 | self.id_2 = np.array([[2, 2, 2]]) 86 | 87 | def test_multiple_input(self): 88 | # Test multi input test 89 | add_out = get_multi_input_layer_output(Add(), [self.id_1, self.id_2]) 90 | exp_out = self.id_1 + self.id_2 91 | self.assertTrue(np.array_equal(add_out, exp_out), 92 | "Multi Input layer test not working!") 93 | 94 | def test_seq_input(self): 95 | # Test SeqInput 96 | inp_layer = SeqInput(input_shape=(self.seq_len_red, self.n_feats)) 97 | layer_out = get_test_layer_output(inp_layer, self.seq_input, 1.0) 98 | self.assertTrue(np.allclose(layer_out, self.seq_input), "SeqInput layer not implemented correctly!!") 99 | 100 | def test_constraint_layer(self): 101 | # Test Constraint Layer 102 | consts = [ 103 | SeriesConstraint('interval', [0.0, 1.0]), 104 | SeriesConstraint(), 105 | SeriesConstraint('exact'), 106 | SeriesConstraint('exact'), 107 | ] 108 | noise_level = 5.0 109 | const_layer = ConstrainedNoise(noise_level, consts, input_shape=(self.seq_len_red, self.n_feats)) 110 | layer_out = get_test_layer_output(const_layer, self.seq_input, 1.0) 111 | layer_out_test = get_test_layer_output(const_layer, self.seq_input, 0.0) 112 | self.assertTrue(np.allclose(layer_out[:, :, 2:], self.seq_input[:, :, 2:]), 113 | "Exact constraint in Constrained Noise layer not implemented correctly!!") 114 | self.assertTrue(check_in_range(layer_out[:, :, 0], 0.0, 1.00001), 115 | "Interval constraint in Constrained Noise layer not implemented correctly!!") 116 | self.assertTrue(np.allclose(layer_out_test[:, :, 1:], self.seq_input[:, :, 1:]), 117 | "Noise layer during testing still active!!") 118 | 119 | def test_feature_slice(self): 120 | # Test FeatureSlice layer 121 | lay = FeatureSlice(np.array(self.feat_indices), input_shape=(self.seq_len_red, self.n_feats)) 122 | layer_out = get_test_layer_output(lay, self.seq_input) 123 | self.assertTrue(np.array_equal(layer_out, self.seq_input[:, -1, self.feat_indices]), 124 | "FeatureSlice layer not working!!") 125 | 126 | def test_extract_input_layer(self): 127 | # Test ExtractInput layer 128 | lay = ExtractInput(np.array(self.feat_indices), seq_len=3, curr_ind=1) 129 | l_out = get_multi_input_layer_output(lay, [self.seq_input_long, self.output]) 130 | l_out2 = get_multi_input_layer_output(lay, [self.seq_input_long, None]) 131 | l_out3 = get_multi_input_layer_output(lay, self.seq_input_long) 132 | exp_out32 = np.copy(self.seq_input_long)[:, 1:4, :] 133 | exp_out = np.copy(exp_out32) 134 | exp_out[:, -1, self.feat_indices] = self.output 135 | self.assertTrue(np.array_equal(l_out, exp_out), "ExtractInput layer not working!") 136 | self.assertTrue(np.array_equal(l_out2, exp_out32), "ExtractInput layer not working!") 137 | self.assertTrue(np.array_equal(l_out3, exp_out32), "ExtractInput layer not working!") 138 | 139 | def test_id_recurrent_layer(self): 140 | # Test IdRecurrent layer 141 | lay = IdRecurrent(3, input_shape=rem_first(self.seq_input_long.shape)) 142 | l_out = get_test_layer_output(lay, self.seq_input_long) 143 | self.assertTrue(np.array_equal(l_out, self.seq_input_long[:, :, :3]), 144 | "IdRecurrent not working correctly!") 145 | lay = IdDense(1, input_shape=rem_first(self.output.shape)) 146 | l_out = get_test_layer_output(lay, self.output) 147 | self.assertTrue(np.array_equal(l_out, self.output[:, :1]), 148 | "IdDense not working correctly!") 149 | 150 | def test_clip_layer(self): 151 | # Test ClipByValue layer 152 | c_layer = ClipByValue(0.0, 1.0, input_shape=rem_first(self.seq_shape)) 153 | l_out = get_test_layer_output(c_layer, self.seq_input_long) 154 | self.assertTrue(np.all(l_out >= 0.0) and np.all(l_out <= 1.0), 155 | "ClipByValue not working correctly!") 156 | 157 | def test_constrain_output(self): 158 | # Test ConstrainOutput layer 159 | ints = [(0.0, 2.0), (-1.0, 5.0), (-1.0, 5.0), (-1.0, 5.0)] 160 | c_layer = ConstrainOutput(ints, input_shape=rem_first(self.seq_shape)) 161 | l_out = get_test_layer_output(c_layer, self.seq_input_long) 162 | self.assertTrue(np.all(l_out[:, :, 0] <= 2.0) and np.all(l_out[:, :, 1:] <= 5.0), 163 | "ClipByValue not working correctly!") 164 | self.assertTrue(np.all(l_out[:, :, 0] >= 0.0) and np.all(l_out[:, :, 1:] >= -1.0), 165 | "ClipByValue not working correctly!") 166 | 167 | # Test 2d input 168 | c_layer_2d = ConstrainOutput(ints[:2], input_shape=rem_first(self.output.shape)) 169 | l_out = get_test_layer_output(c_layer_2d, self.output) 170 | self.assertTrue(np.all(l_out[:, 0] <= 2.0) and np.all(l_out[:, 1:] <= 5.0), 171 | "ClipByValue not working correctly!") 172 | self.assertTrue(np.all(l_out[:, 0] >= 0.0) and np.all(l_out[:, 1:] >= -1.0), 173 | "ClipByValue not working correctly!") 174 | 175 | pass 176 | -------------------------------------------------------------------------------- /BatchRL/dynamics/base_hyperopt.py: -------------------------------------------------------------------------------- 1 | """The hyperparameter optimization module. 2 | 3 | Defines a class that extends the base model class `BaseDynamicsModel` 4 | for hyperparameter optimization. 5 | """ 6 | import os 7 | import pickle 8 | from abc import ABC, abstractmethod 9 | from typing import Dict, List, Tuple 10 | 11 | from hyperopt import fmin, tpe 12 | 13 | from dynamics.base_model import BaseDynamicsModel 14 | from util.share_data import upload_folder_zipped, download_and_extract_zipped_folder 15 | from util.util import create_dir, EULER, MODEL_DIR, DEFAULT_EVAL_SET, yeet 16 | 17 | # Define path for optimization results. 18 | hop_path = os.path.join(MODEL_DIR, "Hop") #: The path to all hyperopt data. 19 | create_dir(hop_path) 20 | 21 | OptHP = Tuple[Dict, float] #: The type of the stored info. 22 | 23 | 24 | def upload_hop_pars(): 25 | print("Uploading hyperopt parameters to Google Drive.") 26 | upload_folder_zipped(hop_path) 27 | 28 | 29 | def download_hop_pars(): 30 | print("Downloading hyperopt parameters from Google Drive.") 31 | download_and_extract_zipped_folder("Hop", hop_path) 32 | 33 | 34 | def check_eval_data(eval_data: str): 35 | if eval_data not in ["test", "val"]: 36 | yeet(f"Invalid evaluation set for hyperopt: {eval_data}") 37 | 38 | 39 | def save_hp(name_hp: str, opt_hp: OptHP) -> None: 40 | """Save hyperparameters.""" 41 | with open(name_hp, 'wb') as f: 42 | pickle.dump(opt_hp, f) 43 | 44 | 45 | def load_hp(name_hp) -> OptHP: 46 | """Load hyperparameters.""" 47 | with open(name_hp, 'rb') as f: 48 | opt_hp = pickle.load(f) 49 | return opt_hp 50 | 51 | 52 | class HyperOptimizableModel(BaseDynamicsModel, ABC): 53 | """The abstract base class for models using hyperopt. 54 | 55 | Need to override the abstract methods and set `base_name` 56 | in constructor. 57 | """ 58 | param_list: List[Dict] = [] #: List of tried parameters. 59 | base_name: str #: Base name independent of hyperparameters. 60 | curr_val: float = 10e100 #: Start value for optimization. 61 | 62 | @abstractmethod 63 | def get_space(self) -> Dict: 64 | """Defines the hyperopt space with the hyper parameters 65 | to be optimized for a given model. 66 | 67 | Returns: 68 | hyperopt space definition. 69 | """ 70 | pass 71 | 72 | @classmethod 73 | @abstractmethod 74 | def get_base_name(cls, **kwargs) -> str: 75 | """Returns the unique name given all the non-hyperparameter parameters.""" 76 | pass 77 | 78 | @abstractmethod 79 | def conf_model(self, hp_sample: Dict) -> 'HyperOptimizableModel': 80 | """Configure new model with given parameters. 81 | 82 | Initializes another model with the parameters as 83 | specified by the sample, which is a sample of the specified 84 | hyperopt space. 85 | 86 | Args: 87 | hp_sample: Sample of hyperopt space. 88 | 89 | Returns: 90 | Another model with the same type as self, initialized 91 | with the parameters in the sample. 92 | """ 93 | pass 94 | 95 | @abstractmethod 96 | def hyper_objective(self, eval_data: str = DEFAULT_EVAL_SET) -> float: 97 | """ 98 | Defines the objective to be used for hyperopt. 99 | It will be minimized, i.e. it has to be some kind of 100 | loss, e.g. validation loss. 101 | Model assumed to be fitted first. 102 | 103 | Returns: 104 | Numerical value from evaluation of the objective. 105 | """ 106 | pass 107 | 108 | def optimize(self, n: int = 100, verbose: int = 1, 109 | eval_data: str = DEFAULT_EVAL_SET, 110 | data_ext: str = "") -> Dict: 111 | """Does the full hyper parameter optimization with 112 | the given objective and space. 113 | 114 | Args: 115 | n: Number of model initializations, fits and objective 116 | computations. 117 | verbose: The verbosity level for fmin. 118 | eval_data: Evaluation set for the optimization. 119 | data_ext: Extension to differentiate used data. 120 | 121 | Returns: 122 | The optimized hyper parameters. 123 | """ 124 | fit_data = "train" if eval_data == "val" else "train_val" 125 | 126 | hp_space = self.get_space() 127 | self.param_list = [] 128 | 129 | # Load the previously optimum if exists 130 | save_path = self._get_opt_hp_f_name(self.base_name, ext=data_ext) 131 | try: 132 | _, self.curr_val = load_hp(save_path) 133 | if verbose: 134 | print("Found previous hyperparameters!") 135 | except FileNotFoundError: 136 | if verbose: 137 | print("No previous hyperparameters found!") 138 | 139 | # Define final objective function 140 | def f(hp_sample: Dict) -> float: 141 | """Fits model and evaluates it. 142 | 143 | Args: 144 | hp_sample: Model parameters. 145 | 146 | Returns: 147 | Value of the objective. 148 | """ 149 | mod = self.conf_model(hp_sample) 150 | self.param_list += [hp_sample] 151 | mod.fit(train_data=fit_data) 152 | curr_obj = mod.hyper_objective(eval_data=eval_data) 153 | 154 | # Save if new skl_mod are better 155 | if curr_obj < self.curr_val: 156 | self.curr_val = curr_obj 157 | save_hp(save_path, (hp_sample, self.curr_val)) 158 | return curr_obj 159 | 160 | # Do parameter search 161 | best = fmin( 162 | fn=f, 163 | space=hp_space, 164 | algo=tpe.suggest, 165 | max_evals=n, 166 | verbose=verbose > 0, 167 | show_progressbar=verbose > 0, 168 | ) 169 | 170 | return best 171 | 172 | @classmethod 173 | def _get_opt_hp_f_name(cls, b_name: str, ext: str = ""): 174 | """Determines the file path given the model name.""" 175 | return os.path.join(hop_path, f"{b_name}_OPT_HP{ext}.pkl") 176 | 177 | @classmethod 178 | def from_best_hp(cls, verbose: int = 0, ext: str = "", **kwargs): 179 | """Initialize a model with the best previously found hyperparameters. 180 | 181 | Returns: 182 | An instance of the same class initialized with the optimal 183 | hyperparameters. 184 | """ 185 | base_name = cls.get_base_name(include_data_name=False, **kwargs) 186 | name_hp = cls._get_opt_hp_f_name(base_name, ext=ext) 187 | try: 188 | if verbose: 189 | print("Loading model from hyperparameters.") 190 | opt_hp = load_hp(name_hp) 191 | except FileNotFoundError: 192 | print(name_hp) 193 | raise FileNotFoundError("No hyperparameters found, need to run optimize() first!") 194 | hp_params, val = opt_hp 195 | init_params = cls._hp_sample_to_kwargs(hp_params) 196 | return cls(**kwargs, **init_params) 197 | 198 | @classmethod 199 | def _hp_sample_to_kwargs(cls, hp_sample: Dict) -> Dict: 200 | """Converts the sample from the hyperopt space to kwargs for initialization. 201 | 202 | Needs to be overridden if a general `hp_sample` cannot be 203 | passed to `__init__` as kwargs. 204 | 205 | Returns: 206 | Dict with kwargs for initialization. 207 | """ 208 | return hp_sample 209 | 210 | 211 | def optimize_model(mod: HyperOptimizableModel, verbose: bool = True, 212 | n_restarts: int = None, 213 | eval_data: str = DEFAULT_EVAL_SET, 214 | data_ext: str = "") -> None: 215 | """Executes the hyperparameter optimization of a model. 216 | 217 | Uses `n_restarts` calls to fit, if it is None, 218 | uses reduced number of model trainings if not on Euler. 219 | 220 | Args: 221 | mod: Model whose hyperparameters are to be optimized. 222 | verbose: Whether to print the result to the console. 223 | n_restarts: How many models should be fitted during the 224 | optimization. If None, uses different default values depending 225 | on whether `EULER` is True or not. 226 | eval_data: Evaluation set for the optimization. 227 | data_ext: Extension to differentiate used data. 228 | """ 229 | n_opt = 50 if EULER else 2 230 | if n_restarts is not None: 231 | n_opt = n_restarts 232 | opt_params = mod.optimize(n_opt, verbose=verbose, eval_data=eval_data, 233 | data_ext=data_ext) 234 | 235 | if verbose: 236 | print(f"Optimal parameters: {opt_params}.") 237 | -------------------------------------------------------------------------------- /BatchRL/agents/base_agent.py: -------------------------------------------------------------------------------- 1 | """Defines the interface for RL agents. 2 | 3 | Also provides a function to remove agents that 4 | were not trained for long, e.g. for debugging or testing, which is 5 | :func:`remove_agents`. 6 | 7 | Further also functions to up- and download agents to 8 | Google Drive, :func:`upload_trained_agents` and :func:`download_trained_agents`. 9 | """ 10 | import os 11 | import warnings 12 | from abc import ABC, abstractmethod 13 | from typing import Dict, TYPE_CHECKING, Callable 14 | 15 | import numpy as np 16 | 17 | from util.numerics import npf32 18 | from util.share_data import upload_folder_zipped, download_and_extract_zipped_folder 19 | from util.util import Arr, fix_seed, MODEL_DIR, create_dir, remove_files_in_sub_folders 20 | from util.visualize import rl_plot_path 21 | 22 | if TYPE_CHECKING: 23 | from envs.base_dynamics_env import DynEnv 24 | 25 | # Define directory for agent models 26 | RL_MODEL_DIR = os.path.join(MODEL_DIR, "RL") #: Folder for RL models. 27 | create_dir(RL_MODEL_DIR) 28 | 29 | 30 | def upload_trained_agents(verbose: int = 1): 31 | """Uploads all RL models to Google Drive. 32 | 33 | Uploads all data in folder `RL_MODEL_DIR`. 34 | """ 35 | if verbose: 36 | print("Uploading agent neural network parameters to Google Drive.") 37 | upload_folder_zipped(RL_MODEL_DIR) 38 | 39 | 40 | def download_trained_agents(verbose: int = 1): 41 | """Download trained agents from Google Drive. 42 | 43 | They need to be in a folder named `RL` and will 44 | be put into the folder `RL_MODEL_DIR`. 45 | """ 46 | if verbose: 47 | print("Downloading agent neural network parameters from Google Drive.") 48 | download_and_extract_zipped_folder("RL", RL_MODEL_DIR) 49 | 50 | 51 | def remove_agents(min_steps: int = 10000, verbose: int = 5) -> None: 52 | """Removes all agents that were trained for less than `min_steps` steps. 53 | 54 | For cleaning up agents that were produced when testing 55 | something or debugging. Also deletes empty folders, but not 56 | if the folder is empty only after removing the agents, so you may 57 | want to run it twice. 58 | 59 | Args: 60 | min_steps: Minimum number of training steps for an agent not to be 61 | deleted. 62 | verbose: Whether to print infos. 63 | """ 64 | def remove_f(f): 65 | rem_file = False 66 | try: 67 | n_ep = int(f.split("_")[1][3:]) 68 | rem_file = n_ep < min_steps 69 | except (IndexError, ValueError): 70 | if verbose: 71 | print(f"Invalid file name: {f}") 72 | return rem_file 73 | 74 | remove_files_in_sub_folders(RL_MODEL_DIR, remove_f, 75 | True, verbose=verbose > 0) 76 | 77 | def remove_agent_eval(f): 78 | rem_file = False 79 | 80 | # Remove analysis plots 81 | 82 | i = f.find("_DDPG_") 83 | if i >= 0: 84 | num = f[(i + 6):].split(".")[0] 85 | try: 86 | n_eps = int(num) 87 | rem_file = n_eps < min_steps 88 | except ValueError: 89 | n_str = num.split("_")[0] 90 | if not n_str[:3] == "NEP": 91 | n_eps = int(n_str) 92 | rem_file = n_eps < min_steps 93 | else: 94 | n_eps = int(n_str[3:]) 95 | rem_file = n_eps < min_steps 96 | 97 | # Remove train rewards plots 98 | try: 99 | if f.find("DDPG_NEP") >= 0: 100 | n_eps = int(f[8:].split("_")[0]) 101 | rem_file = n_eps < min_steps 102 | except ValueError as e: 103 | pass 104 | # print(f"{e} happened.") 105 | 106 | # Remove evaluation plot 107 | try: 108 | if f.find("DetailAnalysis") >= 0: 109 | n_eps = int(f.split("_")[1]) 110 | rem_file = n_eps < min_steps 111 | except ValueError as e: 112 | pass 113 | # print(f"{e} happened.") 114 | 115 | return rem_file 116 | 117 | remove_files_in_sub_folders(rl_plot_path, remove_agent_eval, 118 | True, verbose=verbose > 0) 119 | 120 | 121 | class AbstractAgent(ABC): 122 | """Base class for all agents.""" 123 | 124 | @abstractmethod 125 | def get_action(self, state) -> Arr: 126 | """Defines the control strategy. 127 | 128 | Args: 129 | state: The current state. 130 | 131 | Returns: 132 | Next control action. 133 | """ 134 | pass 135 | 136 | 137 | class AgentBase(AbstractAgent, ABC): 138 | """Base class for an agent / control strategy. 139 | 140 | Might be specific for a certain environment accessible 141 | by attribute `env`. 142 | """ 143 | env: 'DynEnv' #: The corresponding environment 144 | name: str #: The name of the Agent / control strategy 145 | fit_data: str = None 146 | plot_name: str = None 147 | 148 | def __init__(self, env: 'DynEnv', name: str = "Abstract Agent"): 149 | self.env = env 150 | self.name = name 151 | 152 | def fit(self, verbose: int = 0, train_data: str = "") -> None: 153 | """No fitting needed.""" 154 | pass 155 | 156 | def get_plot_name(self) -> str: 157 | if self.plot_name is not None: 158 | return self.plot_name 159 | return self.get_short_name() 160 | 161 | def get_short_name(self) -> str: 162 | return self.name 163 | 164 | def get_info(self) -> Dict: 165 | return {} 166 | 167 | def __str__(self): 168 | """Generic string conversion.""" 169 | return f"Agent of class {self.__class__.__name__} with name {self.name}" 170 | 171 | def eval(self, n_steps: int = 100, reset_seed: bool = False, 172 | detailed: bool = False, 173 | use_noise: bool = False, scale_states: bool = False, 174 | episode_marker: Callable = None, 175 | return_inds: bool = False, 176 | verbose: int = 0): 177 | """Evaluates the agent for a given number of steps. 178 | 179 | If the number is greater than the number of steps in an episode, the 180 | env is reset and a new episode is started. 181 | 182 | Args: 183 | n_steps: Number of steps. 184 | reset_seed: Whether to reset the seed at start. 185 | detailed: Whether to return all parts of the reward. 186 | use_noise: Whether to use noise during the evaluation. 187 | scale_states: Whether to scale the state trajectory to 188 | original values, only used if `detailed` is True. 189 | episode_marker: Function mapping from state to natural numbers. 190 | return_inds: 191 | verbose: 192 | 193 | Returns: 194 | The mean received reward if `detailed` is False, else 195 | all the rewards for all steps. 196 | """ 197 | # Fix seed if needed. 198 | if reset_seed: 199 | fix_seed() 200 | 201 | if verbose: 202 | print(f"Evaluating agent: {self}") 203 | 204 | # Initialize env and reward. 205 | s_curr = self.env.reset(use_noise=use_noise) 206 | ep_mark = 0 if episode_marker is None else episode_marker(s_curr) 207 | all_rewards = npf32((n_steps,)) 208 | 209 | ret_inds = None 210 | ret_ct = 1 211 | if return_inds: 212 | ret_inds = np.empty(((n_steps + 1) // self.env.n_ts_per_eps + 1,), dtype=np.int) 213 | ret_inds[0] = self.env.curr_ind 214 | 215 | # Detailed stuff 216 | det_rewards, state_t, ep_marks = None, None, None 217 | actions, scaled_actions = None, None 218 | if detailed: 219 | n_det = len(self.env.reward_descs) 220 | n_ac = self.env.act_dim 221 | n_states = self.env.state_dim - n_ac 222 | actions = npf32((n_steps, n_ac), fill=np.nan) 223 | scaled_actions = npf32((n_steps, n_ac), fill=np.nan) 224 | det_rewards = npf32((n_steps, n_det), fill=np.nan) 225 | state_t = npf32((n_steps, n_states), fill=np.nan) 226 | ep_marks = npf32((n_steps, ), fill=np.nan) 227 | elif scale_states: 228 | warnings.warn(f"Argument: {scale_states} ignored!") 229 | 230 | # Evaluate for `n_steps` steps. 231 | for k in range(n_steps): 232 | 233 | # Determine action 234 | a = self.get_action(s_curr) 235 | scaled_a = self.env.scale_action_for_step(a) 236 | 237 | # Save actions 238 | if actions is not None: 239 | actions[k, :] = a 240 | scaled_actions[k, :] = scaled_a 241 | 242 | # Execute step 243 | s_curr, r, fin, _ = self.env.step(a) 244 | 245 | # Store rewards 246 | all_rewards[k] = r 247 | if det_rewards is not None: 248 | det_rew = self.env.detailed_reward(s_curr, scaled_a) 249 | det_rewards[k, :] = det_rew 250 | state_t[k, :] = s_curr 251 | ep_marks[k] = ep_mark 252 | 253 | # Reset env if episode is over. 254 | if fin: 255 | s_curr = self.env.reset(use_noise=use_noise) 256 | ep_mark = 0 if episode_marker is None else episode_marker(s_curr) 257 | if return_inds: 258 | ret_inds[ret_ct] = self.env.curr_ind 259 | ret_ct += 1 260 | 261 | # This is an ugly hack, because I am running out of time! 262 | if hasattr(self, 'action_range') and self.action_range is not None: 263 | # assert hasattr(self, 'get_info'), f"(Go fuck your-) self: {self}" 264 | scaling = self.get_info().get('action_scaled_01') 265 | p1 = np.array([i[0] for i in scaling], dtype=np.float32) 266 | p2 = np.array([i[1] - i[0] for i in scaling], dtype=np.float32) 267 | a_scaling_pars = p1, p2 268 | actions = a_scaling_pars[0] + actions * a_scaling_pars[1] 269 | 270 | # Return all rewards 271 | if detailed: 272 | if scale_states: 273 | state_t = self.env.scale_state(state_t, remove_mean=False) 274 | if return_inds: 275 | return all_rewards, det_rewards, state_t, ep_marks, actions, scaled_actions, ret_inds 276 | return all_rewards, det_rewards, state_t, ep_marks, actions, scaled_actions 277 | 278 | # Return mean reward. 279 | return np.sum(all_rewards) / n_steps 280 | -------------------------------------------------------------------------------- /BatchRL/opcua_empa/room_control_client.py: -------------------------------------------------------------------------------- 1 | """Client that combines the node definitions and the client. 2 | 3 | Mainly about the class :class:`ControlClient` which 4 | uses composition to combine the classes :class:`opcua_empa.opcua_util.NodeAndValues` 5 | and :class:`opcua_empa.opcuaclient_subscription.OpcuaClient`. 6 | 7 | .. moduleauthor:: Christian Baumann 8 | """ 9 | import logging 10 | import threading 11 | import time 12 | import traceback 13 | from datetime import datetime 14 | from threading import Lock 15 | from typing import List, Callable, Tuple 16 | 17 | import numpy as np 18 | import pandas as pd 19 | 20 | from opcua_empa.controller import ControlT 21 | from opcua_empa.opcua_util import NodeAndValues 22 | from opcua_empa.opcuaclient_subscription import OpcuaClient 23 | from util.notify import send_mail, set_exit_handler, login_from_file 24 | from util.numerics import check_in_range 25 | from util.util import ProgWrap 26 | 27 | print_fun = logging.warning 28 | 29 | 30 | def run_control(used_control: ControlT, 31 | exp_name: str = None, 32 | *args, verbose: int = 0, 33 | debug: bool = False, 34 | **kwargs): 35 | """Runs the controller until termination. 36 | 37 | Takes the same arguments as :func:`ControlClient.__init__`, except 38 | for an additional one, `debug` which decides where to send the mail to. 39 | """ 40 | 41 | with ControlClient(used_control, exp_name, *args, 42 | verbose=1 if verbose > 0 else 0, 43 | debug_mail=debug, **kwargs) as client: 44 | cont = True 45 | while cont: 46 | if not client.is_disconnected: 47 | cont = client.read_publish_wait_check() 48 | else: 49 | time.sleep(0.5) 50 | 51 | 52 | class ControlClient: 53 | """Client combining the node definition and the opcua client. 54 | 55 | Use it as a context manager! 56 | """ 57 | 58 | TEMP_MIN_MAX = (20.0, 25.0) #: Temperature bounds, experiment will be aborted if temperature leaves these bounds. 59 | 60 | write_nodes: List[str] #: List with the read nodes as strings. 61 | read_nodes: List[str] #: List with the write nodes as strings. 62 | 63 | termination_reason: str = None 64 | 65 | _n_pub: int = 0 66 | 67 | # Current write values 68 | _curr_temp_sp: float = None 69 | 70 | # Current measured values 71 | _curr_valves: Tuple = None 72 | _curr_meas_temp_sp: float = None 73 | _curr_meas_temp: float = None 74 | _curr_meas_res_ack: float = None 75 | 76 | _start_time: datetime = None 77 | 78 | _started_exiting: bool = False 79 | _exited: bool = False 80 | exit_lock = Lock() 81 | _add_msg: str = None 82 | 83 | # Fail count 84 | n_bad_res_max: int = 60 #: Experiment will be aborted if there are more consecutive failures. 85 | _n_bad_res: int = 0 86 | 87 | def __init__(self, 88 | used_control: ControlT, 89 | exp_name: str = None, 90 | user: str = None, 91 | password: str = None, *, 92 | verbose: int = 1, 93 | no_data_saving: bool = False, 94 | notify_failures: bool = False, 95 | debug_mail: bool = True, 96 | _client_class: Callable = OpcuaClient): 97 | """Initializer. 98 | 99 | A non-default `_client_class` should be used for testing / debugging only. 100 | E.g. use :class:`tests.test_opcua.OfflineClient` if you are working offline and 101 | want to test something. 102 | """ 103 | assert len(used_control) == 1, "Only one room supported!" 104 | 105 | # Load login data from file if not specified 106 | if user is None or password is None: 107 | user, password = login_from_file("../opcua_login.txt") 108 | 109 | self.notify_failures = notify_failures 110 | self.verbose = verbose 111 | self._start_time = datetime.now() 112 | self.client = _client_class(user=user, password=password) 113 | self.node_gen = NodeAndValues(used_control, exp_name=exp_name) 114 | 115 | self.deb_mail = debug_mail 116 | 117 | if no_data_saving: 118 | self.node_gen.save_cached_data = self._no_save 119 | 120 | def _no_save(self, verbose: bool = False): 121 | """Used to overwrite the save function of `self.node_gen`.""" 122 | if self.verbose or verbose: 123 | print("Not saving data...") 124 | 125 | @property 126 | def is_disconnected(self): 127 | return self._exited 128 | 129 | def __enter__(self): 130 | """Setup the ControlClient. 131 | 132 | Define nodes, initialize dataframes and enter 133 | and subscribe with client.""" 134 | 135 | # Get node strings 136 | self.write_nodes = self.node_gen.get_nodes() 137 | self.read_nodes = self.node_gen.get_read_nodes() 138 | 139 | # Initialize dataframes 140 | self.df_write = pd.DataFrame({'node': self.write_nodes, 'value': None}) 141 | self.df_read = pd.DataFrame({'node': self.read_nodes}) 142 | 143 | # Connect client and subscribe 144 | self.client.__enter__() 145 | self.client.subscribe(self.df_read, sleep_after=1.0) 146 | 147 | # Set exit handler 148 | def on_exit(sig, func=None): 149 | add_msg = f"Program was mysteriously killed by somebody or something. " 150 | self._add_msg = add_msg 151 | self.__exit__(None, None, None) 152 | 153 | set_exit_handler(on_exit) 154 | 155 | return self 156 | 157 | def __exit__(self, exc_type, exc_val, exc_tb): 158 | """Save data and exit client.""" 159 | if exc_type is not None: 160 | self._add_msg = traceback.format_exc() 161 | 162 | if self.verbose: 163 | print(f"Thread: {threading.currentThread().name} in __exit__().") 164 | print("AddMessage: ", self._add_msg) 165 | 166 | self.exit_lock.acquire() 167 | if not self._exited: 168 | self._exited = True 169 | self.exit_lock.release() 170 | 171 | if self.verbose: 172 | print("Actually exiting :)") 173 | 174 | # Exit client and save data 175 | self.client.__exit__(exc_type, exc_val, exc_tb) 176 | self.node_gen.save_cached_data(self.verbose) 177 | 178 | # Kill threads 179 | for t in threading.enumerate(): 180 | if "MainThread" not in t.name: 181 | if hasattr(t, "stop"): 182 | t.stop() 183 | if not isinstance(t, threading._DummyThread): 184 | print(f"Joining thread: {t.name}") 185 | t.join() 186 | 187 | if self.verbose: 188 | print("Joined threads.") 189 | 190 | # Notify reason of termination 191 | with ProgWrap(f"Sending notification...", self.verbose > 0): 192 | self.notify_me() 193 | 194 | else: 195 | self.exit_lock.release() 196 | 197 | def _print_set_on_change(self, attr_name: str, val, msg: str) -> None: 198 | """Sets and prints attribute with name `attr_name` if its value changed.""" 199 | curr_val = getattr(self, attr_name) 200 | if curr_val is None or curr_val != val: 201 | setattr(self, attr_name, val) 202 | if self.verbose > 0: 203 | print_fun(f"{msg}: {val}") 204 | elif self.verbose > 1: 205 | print_fun(f"{msg}: {val}") 206 | 207 | def notify_me(self) -> None: 208 | """Sends a notification mail with the reason of termination. 209 | 210 | Does nothing if `self.notify_failures` is False. 211 | """ 212 | # Check if notifications are enabled... 213 | if not self.notify_failures: 214 | if self.verbose: 215 | print("Not sending email notification!") 216 | return 217 | 218 | # Set subject 219 | sub = "Experiment Termination Notification" 220 | 221 | # Set message 222 | msg = self.termination_reason 223 | if msg is None: 224 | msg = "Unknown termination reason :(" 225 | 226 | if self._add_msg is not None: 227 | msg += f"\n\n{self._add_msg}" 228 | 229 | # Add some more information 230 | msg += f"\n\nExperiment name: {self.node_gen.experiment_name}" 231 | msg += f"\n\nStarting date and time: {self._start_time}" 232 | 233 | # Send mail 234 | send_mail(subject=sub, msg=msg, debug=self.deb_mail) 235 | 236 | def _write_values(self): 237 | # Compute and publish current control input 238 | self.df_write["value"] = self.node_gen.compute_current_values() 239 | self.client.publish(self.df_write, log_time=self.verbose > 1, sleep_after=1.0) 240 | self._print_set_on_change("_curr_temp_sp", self.df_write['value'][0], 241 | msg="Written temperature setpoint") 242 | 243 | def read_publish_wait_check(self) -> bool: 244 | """Read and publish values, wait, and check if termination is reached. 245 | 246 | If `self.verbose` is True, some information is logged. 247 | 248 | Returns: 249 | Whether termination is reached. 250 | """ 251 | # Read and extract values 252 | read_vals = self.client.read_values() 253 | cont = True 254 | if read_vals is None: 255 | self._n_bad_res += 1 256 | if self._n_bad_res > self.n_bad_res_max: 257 | self.termination_reason = "Internet connection lost :(" 258 | cont = False 259 | else: 260 | try: 261 | ext_values = self.node_gen.extract_values(read_vals, return_temp_setp=True) 262 | 263 | self._print_set_on_change("_curr_meas_temp_sp", ext_values[2][0], 264 | msg="Measured Temp. Setpoint") 265 | self._print_set_on_change("_curr_meas_temp", ext_values[1][0], 266 | msg="Measured Room Temp.") 267 | self._print_set_on_change("_curr_meas_res_ack", ext_values[0][0], 268 | msg="Research Acknowledgement") 269 | valve_tuple = tuple(self.node_gen.get_valve_values()[0]) 270 | self._print_set_on_change("_curr_valves", valve_tuple, 271 | msg="Valves") 272 | 273 | # Check that the research acknowledgement is true. 274 | # Wait for at least 20s before requiring to be true, takes some time. 275 | res_ack_true = np.all(ext_values[0]) or self._n_pub < 20 276 | if res_ack_true: 277 | self._n_bad_res = 0 278 | else: 279 | self._n_bad_res += 1 280 | res_ack_true = self._n_bad_res < self.n_bad_res_max 281 | 282 | # Check measured temperatures, stop if too low or high. 283 | temps_in_bound = check_in_range(np.array(ext_values[1]), *self.TEMP_MIN_MAX) 284 | 285 | # Stop if (first) controller gives termination signal. 286 | terminate_now = self.node_gen.control[0][1].terminate() 287 | cont = res_ack_true and temps_in_bound and not terminate_now 288 | 289 | # Print the reason of termination. 290 | if not temps_in_bound: 291 | self.termination_reason = "Temperature bounds reached, aborting experiment." 292 | if not res_ack_true: 293 | self.termination_reason = "Research mode confirmation lost :(" 294 | if terminate_now: 295 | self.termination_reason = "Experiment time over!" 296 | except ValueError: 297 | print("Fuck!") 298 | 299 | # Compute and publish current control input 300 | self._write_values() 301 | 302 | # Print Info 303 | if self.verbose > 0: 304 | if self._n_bad_res != 0: 305 | print_fun(f"Aborting experiment in: {self.n_bad_res_max - self._n_bad_res + 1} steps.") 306 | if not cont: 307 | print_fun(self.termination_reason) 308 | 309 | # Increment publishing counter and return termination criterion. 310 | self._n_pub += 1 311 | return cont 312 | -------------------------------------------------------------------------------- /BatchRL/dynamics/battery_model.py: -------------------------------------------------------------------------------- 1 | """The Battery Model is defined here. 2 | 3 | In the class :class:`BatteryModel`. 4 | """ 5 | import os 6 | from typing import Callable, Tuple 7 | 8 | import numpy as np 9 | 10 | from data_processing.dataset import Dataset 11 | from dynamics.base_model import BaseDynamicsModel 12 | from util.numerics import fit_linear_bf_1d, npf32, trf_mean_and_std 13 | from util.util import print_if_verb, yeet, Num 14 | from util.visualize import scatter_plot, OVERLEAF_IMG_DIR, basic_plot, LONG_FIG_SIZE 15 | 16 | 17 | def clean_battery_dataset(ds: Dataset) -> None: 18 | """Removes the ugly data in the battery dataset. 19 | 20 | TODO: Do this by considering the data specifying when 21 | the experiments were... 22 | """ 23 | n = ds.data.shape[0] 24 | up_lim = int(0.6 * n) 25 | low_lim = int(0.3 * n) 26 | ds.data[low_lim: up_lim] = np.nan 27 | 28 | 29 | # Fit pw. linear model: $y = \alpha_1 + \alpha_2 * x * \alpha_3 * max(0, x)$ 30 | def pw_lin_fun_factory(cont_at_zero: bool = True, through_zero: bool = False) -> Callable: 31 | fun_list = [ 32 | lambda x: x, 33 | lambda x: np.maximum(0.0, x), 34 | ] 35 | 36 | # Add more functions depending on arguments 37 | if not cont_at_zero: 38 | fun_list += [lambda x: np.where(x > 0, 1.0, 0.0)] 39 | if not through_zero: 40 | fun_list = [lambda x: 1.0] + fun_list 41 | 42 | # Count the number of functions 43 | n_fun = len(fun_list) 44 | 45 | # Define and return feature function 46 | def feat_fun(x: float): 47 | if np.isscalar(x): 48 | x = np.array([x]) 49 | n_x = len(x) 50 | res = npf32((n_x, n_fun)) 51 | for ct, f in enumerate(fun_list): 52 | res[:, ct] = f(x) 53 | return res 54 | 55 | return feat_fun 56 | 57 | 58 | class BatteryModel(BaseDynamicsModel): 59 | """Dynamics model of the battery. 60 | 61 | The model of the battery: 62 | :math:`s_t`: SoC at time t 63 | Model: 64 | :math:`s_{t+1} = s_t + \\eta(s_t) p_{t+1}`, 65 | :math:`p_{t+1}`: 66 | average charging power from time t to t+1 (control input) 67 | """ 68 | 69 | hard_soc_limits: Tuple[Num, Num] = (10.0, 90.0) #: SoC limit for prediction. 70 | _scaled_soc_limits: Tuple[Num, Num] 71 | 72 | _feat_fun: Callable = None #: The function specifying the features. 73 | params: np.ndarray = None #: Parameters of the pw linear model. 74 | 75 | # The data to plot after fitting 76 | p: np.ndarray = None 77 | ds: np.ndarray = None 78 | soc: np.ndarray = None 79 | 80 | masked_p: np.ndarray = None 81 | masked_ds: np.ndarray = None 82 | masked_soc: np.ndarray = None 83 | init_data: Tuple[np.ndarray, ...] 84 | nan_mask: np.ndarray 85 | exp_mask: np.ndarray 86 | 87 | _scatter_plot_size = LONG_FIG_SIZE 88 | 89 | def __init__(self, dataset: Dataset, base_ind: int = None): 90 | """Initializes the battery model with the specified dataset. 91 | 92 | Args: 93 | dataset: Dataset with data to fit modes. 94 | """ 95 | in_inds, out_inds = None, None 96 | if base_ind is not None: 97 | in_inds = np.array([base_ind, base_ind + 1], dtype=np.int32) 98 | out_inds = np.array([base_ind], dtype=np.int32) 99 | super().__init__(dataset, dataset.name, 100 | out_inds=out_inds, 101 | in_inds=in_inds) 102 | 103 | # Define feature function 104 | self._feat_fun = pw_lin_fun_factory(cont_at_zero=True) 105 | 106 | # Scale soc limits 107 | if dataset.fully_scaled: 108 | scale = np.copy(dataset.scaling[self.in_inds[0]]) 109 | self._scaled_soc_limits = tuple(trf_mean_and_std(np.array(self.hard_soc_limits), 110 | scale, remove=True)) 111 | else: 112 | self._scaled_soc_limits = self.hard_soc_limits 113 | 114 | def fit(self, verbose: int = 0, train_data: str = "train") -> None: 115 | """Fits the battery model. 116 | 117 | Does nothing if it has already been fitted. 118 | `predict` throws an error if the model wasn't fitted 119 | before calling it. 120 | 121 | Args: 122 | verbose: Verbosity, 0: silent. 123 | train_data: Training data set. 124 | """ 125 | self.fit_data = train_data 126 | if self.params is not None: 127 | print_if_verb(verbose, "Battery model already fitted!") 128 | return 129 | else: 130 | print_if_verb(verbose, "Fitting battery model...") 131 | 132 | # Get data 133 | d = self.data 134 | dat = d.data # split_dict[train_data].get_rel_data() 135 | 136 | # Extract data 137 | s_ind, p_ind = self.in_inds 138 | p = np.copy(dat[1:, p_ind]) 139 | soc = np.copy(dat[:-1, s_ind]) 140 | ds = np.copy(dat[1:, s_ind] - soc) 141 | self.init_data = p, ds, soc 142 | 143 | # Remove nans 144 | not_nans = np.logical_not(np.logical_or(np.isnan(p), np.isnan(ds))) 145 | self.nan_mask = not_nans 146 | p, ds, soc = (dat[not_nans] for dat in [p, ds, soc]) 147 | self.p, self.ds, self.soc = p, ds, soc 148 | 149 | # Reduce data to exclude strange part 150 | exclude: bool = False 151 | if exclude: 152 | n_p = len(p) 153 | n = n_p // 3 154 | m = np.zeros((n_p,), dtype=np.bool) 155 | m[:n] = True 156 | m[-n:] = True 157 | self.masked_p, self.masked_ds = p[m], ds[m] 158 | self.masked_soc = soc[m] 159 | self.exp_mask = m 160 | else: 161 | self.masked_p, self.masked_ds = p, ds 162 | self.masked_soc, self.exp_mask = soc, np.ones(self.masked_p.shape, dtype=np.bool) 163 | 164 | # Fit parameters 165 | params = fit_linear_bf_1d(self.masked_p, self.masked_ds, self._feat_fun) 166 | self.params = params 167 | 168 | # Remove outliers based on fit 169 | fitted_ds = self._eval_at(self.masked_p) 170 | errs = np.abs(fitted_ds - self.masked_ds) 171 | thresh = np.max(errs) / 3.4 # This is heuristic 172 | self.masked_p = self.masked_p[errs < thresh] 173 | self.masked_ds = self.masked_ds[errs < thresh] 174 | 175 | # Filter the full data 176 | full_p = self.data.data[1:, 1] 177 | full_ds = self.data.data[1:, 0] - self.data.data[:-1, 0] 178 | errs = np.abs(self._eval_at(full_p) - full_ds) 179 | data_m1 = self.data.data[:-1, :] 180 | data_m1[errs > thresh] = np.nan 181 | self.data.data = data_m1 182 | self.data.split_data() 183 | 184 | # Update params 185 | params = fit_linear_bf_1d(self.masked_p, self.masked_ds, self._feat_fun) 186 | self.params = params 187 | 188 | def predict(self, in_data: np.ndarray) -> np.ndarray: 189 | """Make predictions using the fitted model on the provided data. 190 | 191 | Args: 192 | in_data: Prepared data. 193 | 194 | Returns: 195 | Predictions 196 | """ 197 | p = np.copy(in_data[:, -1, 1]) 198 | s_t = np.copy(in_data[:, -1, 0]) 199 | 200 | # Evaluate model and clip to limits 201 | s_tp1 = s_t + self._eval_at(p) 202 | s_tp1 = np.clip(s_tp1, *self._scaled_soc_limits) 203 | return s_tp1.reshape((-1, 1)) 204 | 205 | def disturb(self): 206 | """Returns a sample of noise. 207 | """ 208 | return 0 209 | 210 | def _eval_at(self, p): 211 | """Evaluates the model for a given active power `p`.""" 212 | if self.params is None: 213 | yeet("Need to fit battery model first!") 214 | res = 0 215 | f_eval = self._feat_fun(p) 216 | for ct, p in enumerate(self.params): 217 | res += p * f_eval[:, ct] 218 | return res 219 | 220 | def _get_plot_name(self, base: str, put_on_ol: bool = False): 221 | ol_name = os.path.join(OVERLEAF_IMG_DIR, base) 222 | return ol_name if put_on_ol else self.get_plt_path(base) 223 | 224 | @staticmethod 225 | def _get_labs(set_title: bool = False): 226 | return {'title': 'Battery model' if set_title else None, 227 | 'xlab': 'Active power [kW]', 228 | 'ylab': r'$\Delta$ SoC [%]'} 229 | 230 | def plot_all_data(self, put_on_ol: bool = False, 231 | overwrite: bool = False, set_title: bool = False): 232 | """Plots all the data.""" 233 | # Define plot labels 234 | labs = self._get_labs(set_title) 235 | 236 | # Get scaling 237 | d = self.data 238 | scale = np.copy(d.scaling[self.in_inds]) 239 | scale[0, 0] = 0.0 240 | 241 | # Check if file exists 242 | before_plt_path = self._get_plot_name("WithOutliers", put_on_ol) 243 | if not os.path.isfile(before_plt_path + ".pdf") or overwrite: 244 | # Plot data 245 | scatter_plot(self.p, self.ds, lab_dict=labs, 246 | show=False, 247 | m_and_std_x=scale[1], 248 | m_and_std_y=scale[0], 249 | add_line=True, 250 | save_name=before_plt_path, 251 | fig_size=self._scatter_plot_size) 252 | 253 | def analyze_bat_model(self, put_on_ol: bool = False, 254 | overwrite: bool = False) -> None: 255 | """This is basically the fit method, but it also 256 | does some data analysis and makes some battery data specific plots. 257 | """ 258 | if self.fit_data is None: 259 | self.fit() 260 | 261 | # Get scaling 262 | d = self.data 263 | scale = np.copy(d.scaling[self.in_inds]) 264 | scale[0, 0] = 0.0 265 | 266 | # Define plot labels 267 | labs = {'title': '', 268 | 'xlab': 'Active power [kW]', 269 | 'ylab': r'$\Delta$ SoC [%]'} 270 | 271 | # Plot residuals vs. SoC 272 | res_plt_path = self._get_plot_name("ResVsSoc", put_on_ol) 273 | res_time_plt_path = self._get_plot_name("ResVsTime", put_on_ol) 274 | if not os.path.isfile(res_plt_path + ".pdf") or overwrite or \ 275 | not os.path.isfile(res_time_plt_path + ".pdf"): 276 | labs_res = {"title": "", "xlab": "State of charge [%]", 277 | "ylab": r"Residuals ($\Delta$ SoC [%])"} 278 | p_orig, _, soc_orig = self.init_data 279 | res = soc_orig[:-1] + self._eval_at(p_orig[:-1]) - soc_orig[1:] 280 | last_bool = self.nan_mask[-1] 281 | n_used = len(self.exp_mask) - last_bool 282 | filtered_res = res[self.nan_mask[:-1]][self.exp_mask[:n_used]] 283 | 284 | # Plot data 285 | n_soc = len(self.masked_soc) - last_bool 286 | scatter_plot(self.masked_soc[:n_soc], filtered_res, lab_dict=labs_res, 287 | show=False, 288 | m_and_std_x=d.scaling[self.in_inds[0]], 289 | m_and_std_y=scale[0], 290 | add_line=True, 291 | save_name=res_plt_path, 292 | fig_size=self._scatter_plot_size) 293 | 294 | # Plot residuals vs. time 295 | time_labs = ("Timestep", "Residuals") 296 | basic_plot(None, filtered_res, res_time_plt_path, 297 | time_labs, 298 | fig_size=self._scatter_plot_size) 299 | 300 | # Check if plot already exists 301 | after_plt_path = self._get_plot_name("Cleaned", put_on_ol) 302 | if os.path.isfile(after_plt_path + ".pdf") and not overwrite: 303 | return 304 | 305 | # Eval for pw linear line 306 | x_pw_line = np.array([np.min(self.p), -0.001, 0, 0.001, np.max(self.p)], dtype=np.float32) 307 | y_pw_line = self._eval_at(x_pw_line) 308 | 309 | # Plot model 310 | scatter_plot(self.masked_p, self.masked_ds, lab_dict=labs, 311 | show=False, 312 | add_line=False, 313 | m_and_std_x=scale[1], 314 | m_and_std_y=scale[0], 315 | custom_line=[x_pw_line, y_pw_line], 316 | custom_label='Piece-wise linear fit', 317 | save_name=after_plt_path, 318 | fig_size=self._scatter_plot_size) 319 | -------------------------------------------------------------------------------- /BatchRL/data_processing/preprocess.py: -------------------------------------------------------------------------------- 1 | """Data preprocessing module. 2 | 3 | Used to process data series from the NEST database. 4 | The parameters for the functions need to be chosen heuristically. 5 | """ 6 | from typing import Sequence, Tuple, List, Dict 7 | 8 | import numpy as np 9 | import scipy.ndimage 10 | 11 | from util.util import Num, floor_datetime_to_min 12 | 13 | 14 | def clean_data(dat: Tuple, rem_values: Sequence = (), 15 | n_cons_least: int = 60, 16 | const_excepts: Sequence = (), 17 | verbose: bool = True) -> Tuple[np.ndarray, np.ndarray]: 18 | """Removes all values with a specified value 'rem_val' 19 | and removes all sequences where there are at 20 | least 'n_cons_least' consecutive 21 | values having the exact same value. If the value 22 | occurring multiple times is in 'const_excepts' then 23 | it is not removed. 24 | 25 | Args: 26 | dat: The tuple with the values and the datetimes. 27 | rem_values: The sequence of values to remove. 28 | n_cons_least: The minimum number of consecutive constant values. 29 | const_excepts: The values that are excepted from removing. 30 | verbose: Whether to print info to console. 31 | 32 | Returns: 33 | Tuple with values and datetimes without removed entries. 34 | """ 35 | # Check input 36 | assert n_cons_least > 0, f"Invalid argument n_cons_least = {n_cons_least}" 37 | 38 | # Extract data 39 | values, dates = dat 40 | tot_dat = values.shape[0] 41 | 42 | # Make copy 43 | new_values = np.copy(values) 44 | new_dates = np.copy(dates) 45 | 46 | # Initialize 47 | prev_val = np.nan 48 | count = 0 49 | num_occ = 1 50 | con_streak = False 51 | 52 | # Add cleaned values and dates 53 | for (v, d) in zip(values, dates): 54 | 55 | if v not in rem_values: 56 | 57 | # Monitor how many times the same value occurred 58 | if v == prev_val and v not in const_excepts: 59 | 60 | num_occ += 1 61 | if num_occ == n_cons_least: 62 | con_streak = True 63 | count -= n_cons_least - 1 64 | else: 65 | con_streak = False 66 | num_occ = 1 67 | 68 | # Add value if it has not occurred too many times 69 | if not con_streak: 70 | new_values[count] = v 71 | new_dates[count] = d 72 | count += 1 73 | prev_val = v 74 | 75 | else: 76 | # Reset streak 77 | con_streak = False 78 | num_occ = 1 79 | 80 | # Return clean data 81 | if verbose: 82 | print(f"{tot_dat - count} data points removed.") 83 | assert count > 0, "All data thrown away while cleaning!" 84 | return new_values[:count], new_dates[:count] 85 | 86 | 87 | def remove_out_interval(dat: Tuple, interval: Tuple[Num, Num] = (0.0, 100.0)) -> None: 88 | """Removes values that do not lie within the interval. 89 | 90 | The data in `dat` will be changed, nothing will be returned. 91 | 92 | Args: 93 | dat: Raw time series tuple (values, dates). 94 | interval: Interval where the values have to lie within. 95 | """ 96 | values, dates = dat 97 | values[values > interval[1]] = np.nan 98 | values[values < interval[0]] = np.nan 99 | 100 | 101 | def clip_to_interval(dat: Tuple, interval: Sequence = (0.0, 100.0)) -> None: 102 | """Clips the values of the time series that are 103 | out of the interval to lie within. 104 | 105 | The data in `dat` will be changed, nothing will be returned. 106 | 107 | Args: 108 | dat: Raw time series tuple (values, dates). 109 | interval: Interval where the values will lie within. 110 | """ 111 | values, dates = dat 112 | values[values > interval[1]] = interval[1] 113 | values[values < interval[0]] = interval[0] 114 | 115 | 116 | def interpolate_time_series(dat: Tuple, dt_mins: int, 117 | lin_ip: bool = False, 118 | verbose: bool = True) -> Tuple[np.ndarray, np.ndarray]: 119 | """Interpolates the given time series. 120 | 121 | Produces another one with equidistant timesteps 122 | and NaNs if values are missing. 123 | 124 | Args: 125 | dat: Raw time series tuple (values, datetimes). 126 | dt_mins: The number of minutes in a time interval. 127 | lin_ip: Whether to use linear interpolation instead of averaging. 128 | verbose: Verbosity 129 | 130 | Returns: 131 | The data tuple (values, datetimes) with equidistant 132 | datetime values. 133 | """ 134 | 135 | # Unpack 136 | values, dates = dat 137 | 138 | # Datetime of first and last data point 139 | start_dt = floor_datetime_to_min(dates[0], dt_mins) 140 | end_dt = floor_datetime_to_min(dates[-1], dt_mins) 141 | interval = np.timedelta64(dt_mins, 'm') 142 | n_ts = int((end_dt - start_dt) / interval + 1) 143 | if verbose: 144 | print(f"Total: {n_ts} Timesteps") 145 | 146 | # Initialize 147 | new_values = np.empty((n_ts,), dtype=np.float32) 148 | new_values.fill(np.nan) 149 | count = 0 150 | last_dt = dates[0] 151 | last_val = values[0] 152 | curr_val = (last_dt - start_dt) / interval * last_val 153 | curr_dt = dates[0] 154 | v = 0.0 155 | 156 | # Loop over data points 157 | for ct, v in enumerate(values[1:]): 158 | curr_dt = dates[ct + 1] 159 | curr_upper_lim = start_dt + (count + 1) * interval 160 | if curr_dt >= curr_upper_lim: 161 | if curr_dt <= curr_upper_lim + interval: 162 | # Next datetime in next interval 163 | curr_val += (curr_upper_lim - last_dt) / interval * v 164 | if not lin_ip: 165 | new_values[count] = curr_val 166 | else: 167 | new_values[count] = last_val + (v - last_val) * (curr_upper_lim - last_dt) / (curr_dt - last_dt) 168 | count += 1 169 | curr_val = (curr_dt - curr_upper_lim) / interval * v 170 | else: 171 | # Data missing! 172 | curr_val += (curr_upper_lim - last_dt) / interval * last_val 173 | if not lin_ip: 174 | new_values[count] = curr_val 175 | else: 176 | new_values[count] = last_val 177 | count += 1 178 | n_data_missing = int((curr_dt - curr_upper_lim) / interval) 179 | if verbose: 180 | print(f"Missing {n_data_missing} data points :(") 181 | for k in range(n_data_missing): 182 | new_values[count] = np.nan 183 | count += 1 184 | dt_start_new_iv = curr_dt - curr_upper_lim - n_data_missing * interval 185 | curr_val = dt_start_new_iv / interval * v 186 | 187 | else: 188 | # Next datetime still in same interval 189 | curr_val += (curr_dt - last_dt) / interval * v 190 | 191 | # Update 192 | last_dt = curr_dt 193 | last_val = v 194 | 195 | # Add last one 196 | curr_val += (end_dt + interval - curr_dt) / interval * v 197 | new_values[count] = curr_val 198 | 199 | # Return 200 | return new_values, start_dt 201 | 202 | 203 | def fill_holes_linear_interpolate(time_series: np.ndarray, max_width: int = 1) -> None: 204 | """Fills the holes of a uniform time series 205 | with a width up to `max_width` 206 | by linearly interpolating between the previous and 207 | next data point. 208 | 209 | Mutates `time_series`, does not return anything. 210 | 211 | Args: 212 | time_series: The time series that is processed. 213 | max_width: Sequences of at most that many nans are removed by interpolation. 214 | """ 215 | # Return if there are no NaNs 216 | nan_bool = np.isnan(time_series) 217 | if np.sum(nan_bool) == 0: 218 | return 219 | 220 | # Neglect NaNs at beginning and end 221 | non_nans = np.where(nan_bool == 0)[0] 222 | nan_bool[:non_nans[0]] = False 223 | nan_bool[non_nans[-1]:] = False 224 | 225 | # Find all indices with NaNs 226 | all_nans = np.argwhere(nan_bool) 227 | 228 | # Initialize iterators 229 | ind_ind = 0 230 | 231 | while ind_ind < all_nans.shape[0]: 232 | s_ind = all_nans[ind_ind][0] 233 | streak_len = np.where(nan_bool[s_ind:] == 0)[0][0] 234 | if streak_len <= max_width: 235 | 236 | # Interpolate values 237 | low_val = time_series[s_ind - 1] 238 | high_val = time_series[s_ind + streak_len] 239 | for k in range(streak_len): 240 | curr_val = low_val * (k + 1) + high_val * (streak_len - k) 241 | curr_val /= streak_len + 1 242 | time_series[s_ind + k] = curr_val 243 | 244 | ind_ind += streak_len 245 | 246 | 247 | def remove_outliers(time_series: np.ndarray, 248 | grad_clip: Num = 100.0, 249 | clip_int: Sequence = None) -> None: 250 | """ Removes data points that lie outside 251 | the specified interval 'clip_int' and ones 252 | with a gradient larger than grad_clip. 253 | 254 | Mutates `time_series`, does not return anything. 255 | 256 | Args: 257 | time_series: The time series to process. 258 | grad_clip: The maximum gradient magnitude. 259 | clip_int: The interval where the data has to lie within. 260 | """ 261 | 262 | # Helper functions 263 | def grad_fd(x1, x2): 264 | if x2 is None or x1 is None: 265 | return np.nan 266 | if np.isnan(x1) or np.isnan(x2): 267 | return np.nan 268 | return x2 - x1 269 | 270 | def is_outlier(x, x_tm1, x_tp1=None): 271 | g1 = grad_fd(x_tm1, x) 272 | g2 = grad_fd(x, x_tp1) 273 | if np.isnan(g1): 274 | return True if np.absolute(g2) > 1.5 * grad_clip else False 275 | if np.isnan(g2): 276 | return True if np.absolute(g1) > 1.5 * grad_clip else False 277 | rej = np.absolute(g1) > grad_clip and np.absolute(g2) > grad_clip 278 | rej = rej and g1 * g2 < 0 279 | return rej 280 | 281 | def reject_outliers(x, x_tm1, x_tp1=None): 282 | if is_outlier(x, x_tm1, x_tp1): 283 | return np.nan 284 | return x 285 | 286 | # First and last values 287 | time_series[0] = reject_outliers(time_series[0], time_series[1]) 288 | time_series[-1] = reject_outliers(time_series[-1], time_series[-2]) 289 | 290 | # Iterate 291 | for ct, el in enumerate(time_series[1:-1]): 292 | if el != np.nan: 293 | # Remove large gradient outliers 294 | time_series[ct + 1] = reject_outliers(el, 295 | time_series[ct + 2], 296 | time_series[ct]) 297 | 298 | # Clip to interval 299 | if clip_int is not None: 300 | if el < clip_int[0] or el > clip_int[1]: 301 | time_series[ct + 1] = np.nan 302 | return 303 | 304 | 305 | def gaussian_filter_ignoring_nans(time_series: np.ndarray, 306 | sigma: float = 2.0) -> np.ndarray: 307 | """Applies 1-dimensional Gaussian Filtering ignoring occurrences of NaNs. 308 | 309 | From: https://stackoverflow.com/questions/18697532/gaussian-filtering-a-image-with-nan-in-python 310 | 311 | Args: 312 | time_series: The time series to process. 313 | sigma: Gaussian filter standard deviation. 314 | 315 | Returns: 316 | Filtered time series. 317 | """ 318 | 319 | v = time_series.copy() 320 | v[np.isnan(time_series)] = 0 321 | vv = scipy.ndimage.filters.gaussian_filter1d(v, sigma=sigma) 322 | 323 | w = 0 * time_series.copy() + 1 324 | w[np.isnan(time_series)] = 0 325 | ww = scipy.ndimage.filters.gaussian_filter1d(w, sigma=sigma) 326 | 327 | z = vv / ww 328 | z[np.isnan(time_series)] = np.nan 329 | return z 330 | 331 | 332 | def standardize(data: np.ndarray, m: List[Dict]) -> Tuple[np.ndarray, List[Dict]]: 333 | """Removes mean and scales std to 1.0 ignoring nans. 334 | 335 | Stores the parameters in the meta information. 336 | 337 | Args: 338 | data: 2D Numpy array with series as columns. 339 | m: List of metadata dicts. 340 | 341 | Returns: 342 | Processed array and modified list of dicts. 343 | """ 344 | s = data.shape 345 | n_feat = s[1] 346 | 347 | # Compute Mean and StD ignoring NaNs 348 | f_mean = np.nanmean(data, axis=0).reshape((1, n_feat)) 349 | f_std = np.nanstd(data, axis=0).reshape((1, n_feat)) 350 | 351 | # Process and store info 352 | proc_data = (data - f_mean) / f_std 353 | for k in range(n_feat): 354 | m[k]['mean_and_std'] = [f_mean[0, k], f_std[0, k]] 355 | 356 | return proc_data, m 357 | -------------------------------------------------------------------------------- /BatchRL/tests/test_opcua.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import List 3 | from unittest import TestCase 4 | 5 | import pandas as pd 6 | import numpy as np 7 | 8 | import opcua_empa.opcua_util 9 | from dynamics.composite import CompositeModel 10 | from envs.dynamics_envs import RoomBatteryEnv, PWProfile, FullRoomEnv 11 | from opcua_empa.controller import FixTimeConstController, ValveToggler, RLController, \ 12 | setpoint_toggle_frac, setpoint_from_fraction 13 | from opcua_empa.opcua_util import NodeAndValues, read_experiment_data 14 | from opcua_empa.opcuaclient_subscription import OpcuaClient, MAX_TEMP, MIN_TEMP 15 | from opcua_empa.room_control_client import ControlClient, run_control 16 | from tests.test_dynamics import get_full_composite_model 17 | from tests.test_rl import KerasDDPGTest 18 | from util.util import get_min_diff 19 | 20 | 21 | class OfflineClient(OpcuaClient): 22 | """Test client that works offline and returns arbitrary values. 23 | 24 | One room only, with three valves! 25 | Will run until `read_values` is called `N_STEPS_MAX` times, then, 26 | the read temperature will be set to out of bounds and the 27 | experiment will terminate. 28 | """ 29 | 30 | N_STEPS_MAX = 10 31 | 32 | _step_ind: int = 0 33 | 34 | subscribed: bool = False 35 | 36 | node_strs: List[str] 37 | n_read_vals: int 38 | 39 | def connect(self) -> bool: 40 | self._connected = True 41 | return True 42 | 43 | def disconnect(self) -> None: 44 | self.assert_connected() 45 | pass 46 | 47 | def read_values(self) -> pd.DataFrame: 48 | assert self.subscribed, "No subscription!" 49 | self._step_ind += 1 50 | r_temp = "22.0" if self._step_ind < self.N_STEPS_MAX else "35.0" 51 | r_vals = ["1", r_temp, "28.0", "1"] 52 | valves = ["1", "1", "1"] 53 | exo = ["5.0", "0.0", "26.0", "26.0"] 54 | vals = r_vals + valves + exo 55 | return pd.DataFrame({'node': self.node_strs, 56 | 'value': vals}) 57 | 58 | def publish(self, df_write: pd.DataFrame, 59 | log_time: bool = False, 60 | sleep_after: float = None) -> None: 61 | assert len(df_write) == 3, f"Only one room supported! (df_write = {df_write})" 62 | self.assert_connected() 63 | 64 | def subscribe(self, df_read: pd.DataFrame, 65 | sleep_after: float = None) -> None: 66 | self.assert_connected() 67 | self.subscribed = True 68 | pd_sub_df: pd.DataFrame = df_read.sort_index() 69 | self.node_strs = [opcua_empa.opcua_util._trf_node(i) for i in pd_sub_df['node']] 70 | self.n_read_vals = len(self.node_strs) 71 | assert self.n_read_vals == 11, f"Wrong number of read nodes: {self.n_read_vals}" 72 | 73 | def assert_connected(self): 74 | assert self._connected, "Not connected!" 75 | 76 | pass 77 | 78 | 79 | class TestOpcua(TestCase): 80 | """Tests the opcua client and related stuff. 81 | """ 82 | 83 | def __init__(self, *args, **kwargs): 84 | super().__init__(*args, **kwargs) 85 | self.c_val = 10.0 86 | self.cont = [(41, FixTimeConstController(val=self.c_val, max_n_minutes=1))] 87 | 88 | def test_string_manipulation(self): 89 | inp = "Hoi_Du" 90 | exp = "strHoi.strDu" 91 | res = opcua_empa.opcua_util._th_string_to_node_name(inp) 92 | self.assertEqual(res[-len(exp):], exp) 93 | 94 | def test_min_diff(self): 95 | d1 = datetime.datetime(2005, 7, 14, 13, 30) 96 | d2 = datetime.datetime(2005, 7, 14, 12, 30) 97 | min_diff = get_min_diff(d2, d1) 98 | self.assertAlmostEqual(min_diff, 60.0) 99 | 100 | def test_node_and_values(self): 101 | nav = NodeAndValues(self.cont) 102 | nodes = nav.get_nodes() 103 | self.assertEqual(len(self.cont) * 3, len(nodes)) 104 | vals = nav.compute_current_values() 105 | self.assertEqual(vals[0], self.c_val) 106 | self.assertEqual(vals[1], True) 107 | 108 | def test_offline_client(self): 109 | nav = NodeAndValues(self.cont) 110 | read_nodes = nav.get_read_nodes() 111 | write_nodes = nav.get_nodes() 112 | df_read = pd.DataFrame({'node': read_nodes}) 113 | df_write = pd.DataFrame({'node': write_nodes, 'value': None}) 114 | with OfflineClient() as client: 115 | client.subscribe(df_read) 116 | client.publish(df_write) 117 | r_vals = client.read_values() 118 | nav.extract_values(r_vals) 119 | 120 | def test_valve_toggler(self): 121 | class OCToggle(OfflineClient): 122 | t_state = False 123 | op = ["1" for _ in range(3)] 124 | cl = ["0" for _ in range(3)] 125 | 126 | def read_values(self): 127 | self.t_state = not self.t_state 128 | vals = super().read_values() 129 | vals['value'][4:7] = self.op if self.t_state else self.cl 130 | return vals 131 | 132 | def publish(self, df_write: pd.DataFrame, 133 | log_time: bool = False, 134 | sleep_after: float = None) -> None: 135 | super().publish(df_write, log_time, sleep_after) 136 | temp_set = df_write['value'][0] 137 | 138 | assert (self.t_state and temp_set == MIN_TEMP) or \ 139 | (not self.t_state and temp_set == MAX_TEMP) 140 | 141 | vt = [(41, ValveToggler(n_steps_delay=0))] 142 | run_control(vt, 143 | exp_name="OfflineValveTogglerTest", 144 | verbose=0, 145 | no_data_saving=True, 146 | debug=True, 147 | _client_class=OCToggle, 148 | notify_failures=False) 149 | 150 | def test_control_client(self): 151 | with ControlClient(self.cont, 152 | exp_name="OfflineTest", 153 | verbose=0, 154 | no_data_saving=True, 155 | _client_class=OfflineClient, 156 | notify_failures=False) as cc: 157 | cc.read_publish_wait_check() 158 | pass 159 | 160 | def test_run_control(self): 161 | run_control(self.cont, 162 | exp_name="OfflineRunControlTest", 163 | verbose=0, 164 | no_data_saving=True, 165 | debug=True, 166 | _client_class=OfflineClient) 167 | 168 | def test_node_and_val_saving(self): 169 | n = 5 170 | nav = NodeAndValues(self.cont, n_max=n, 171 | exp_name="OfflineNAVSavingTest") 172 | for k in range(n): 173 | nav.compute_current_values() 174 | self.assertEqual(nav._curr_read_n, 0) 175 | self.assertEqual(nav._curr_write_n, 0) 176 | nav.compute_current_values() 177 | nav.save_cached_data(verbose=0) 178 | 179 | def test_setpoint_toggle_time(self): 180 | dt = 15 181 | delay_close, delay_open = 5.0, 3.0 182 | res1, b1 = setpoint_toggle_frac(True, dt, 0.5, delay_open, delay_close) 183 | res2, b2 = setpoint_toggle_frac(False, dt, 0.5, delay_open, delay_close) 184 | res3, b3 = setpoint_toggle_frac(True, dt, 1.0, delay_open, delay_close) 185 | res4, b4 = setpoint_toggle_frac(False, dt, 0.01, delay_open, delay_close) 186 | res5, b5 = setpoint_toggle_frac(True, dt, 0.1, delay_open, delay_close) 187 | self.assertAlmostEqual(res1, 0.5 - 1 / 3) 188 | self.assertAlmostEqual(res2, 0.5 - 1 / 5) 189 | self.assertTrue(res3 >= 1.0) 190 | self.assertTrue(res4 >= 1.0) 191 | self.assertAlmostEqual(res5, 0.0) 192 | self.assertTrue(not b1 and not b4) 193 | 194 | def test_compute_curr_setpoint(self): 195 | dt = 15 196 | t1 = np.datetime64('2019-12-31T00:33:29') 197 | t_start = np.datetime64('2019-12-31T00:30:00') 198 | res1 = setpoint_from_fraction(0.5, True, False, dt, 199 | start_time=t_start, curr_time=t1) 200 | res2 = setpoint_from_fraction(0.5, False, True, dt, 201 | start_time=t_start, curr_time=t1) 202 | self.assertTrue(res1 and not res2) 203 | self.assertTrue(not res2) 204 | 205 | 206 | class TestOpcuaRL(TestCase): 207 | """Tests the opcua client and related stuff. 208 | """ 209 | 210 | def __init__(self, *args, **kwargs): 211 | super().__init__(*args, **kwargs) 212 | self.c_val = 10.0 213 | self.cont = [(41, FixTimeConstController(val=self.c_val, max_n_minutes=1))] 214 | 215 | # Setup keras test agent 216 | mod = get_full_composite_model(add_battery=True, standardized=True) 217 | assert isinstance(mod, CompositeModel), "No composite model!" 218 | p = PWProfile() 219 | self.full_env = RoomBatteryEnv(mod, p, max_eps=5) 220 | action_range = self.full_env.action_range 221 | self.test_agent = KerasDDPGTest(self.full_env, 222 | action_range=action_range, 223 | action=0.5) 224 | self.rl_cont = [(41, RLController(self.test_agent, verbose=0))] 225 | 226 | room_mod = get_full_composite_model(add_battery=False, standardized=True) 227 | self.room_env = FullRoomEnv(room_mod, max_eps=5) 228 | self.test_agent_room = KerasDDPGTest(self.room_env, 229 | action_range=self.room_env.action_range, 230 | action=0.5) 231 | 232 | self.rl_cont_room = [(41, RLController(self.test_agent_room, verbose=0))] 233 | 234 | @staticmethod 235 | def get_test_scaling(): 236 | s = np.empty((10, 2)) 237 | s.fill(1.0) 238 | s[:, 0] = 2.0 239 | return s 240 | 241 | def test_rl_controller(self): 242 | with ControlClient(self.rl_cont, 243 | exp_name="OfflineRLControllerTest", 244 | verbose=0, 245 | no_data_saving=True, 246 | _client_class=OfflineClient, 247 | notify_failures=False) as cc: 248 | cc.read_publish_wait_check() 249 | 250 | def test_rl_controller_room_only(self): 251 | with ControlClient(self.rl_cont_room, 252 | exp_name="OfflineRoomRLControllerTest", 253 | verbose=0, 254 | no_data_saving=True, 255 | _client_class=OfflineClient, 256 | notify_failures=False) as cc: 257 | cc.read_publish_wait_check() 258 | 259 | def test_controller_scaling(self): 260 | cont = self.rl_cont[0][1] 261 | self.assertTrue(cont._scaling.shape == (10, 2), "Wrong shape!!") 262 | cont._scaling = self.get_test_scaling() 263 | 264 | rand_in = np.random.normal(0.0, 1.0, (10,)) 265 | scaled_in = cont.scale_for_agent(rand_in) 266 | scaled_in_mean_added = cont.scale_for_agent(rand_in, remove_mean=False) 267 | self.assertTrue(np.allclose(rand_in, scaled_in + 2.0)) 268 | self.assertTrue(np.allclose(rand_in, scaled_in_mean_added - 2.0)) 269 | 270 | def test_time_adding(self): 271 | cont = self.rl_cont[0][1] 272 | cont._scaling = self.get_test_scaling() 273 | t_ind = 0 274 | state = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0]) 275 | time_state = cont.add_time_to_state(state, t_ind) 276 | self.assertTrue(np.allclose(time_state[-2:], np.array([0.0, 1.0]))) 277 | self.assertEqual(len(time_state), 8) 278 | 279 | def test_rl_controller_call(self): 280 | with ControlClient(self.rl_cont_room, 281 | exp_name="OfflineRLControllerCallTest", 282 | verbose=0, 283 | no_data_saving=True, 284 | _client_class=OfflineClient, 285 | notify_failures=False, 286 | ) as cc: 287 | cont = self.rl_cont_room[0][1] 288 | cont._curr_ts_ind = 0 289 | cc.read_publish_wait_check() 290 | cont._curr_ts_ind = 5 291 | cc.read_publish_wait_check() 292 | 293 | def test_experiment_read_data(self): 294 | with ControlClient(self.rl_cont, 295 | exp_name="OfflineReadDataTest", 296 | verbose=0, 297 | _client_class=OfflineClient) as cc: 298 | cc.node_gen.n_max = 6 299 | cont = True 300 | while cont: 301 | cont = cc.read_publish_wait_check() 302 | 303 | exp_name = cc.node_gen.experiment_name + "_PT_0" 304 | 305 | dat = read_experiment_data(exp_name, verbose=0) 306 | assert len(dat) == 4 307 | d1, d2, d3, d4 = dat 308 | assert len(d1) == len(d2) 309 | assert len(d3) == len(d4) 310 | 311 | pass 312 | -------------------------------------------------------------------------------- /BatchRL/agents/keras_agents.py: -------------------------------------------------------------------------------- 1 | """A few keras RL agents. 2 | 3 | Based on the agents of the keras-rl library, the agents 4 | here are basically wrappers of those adding functionality 5 | to work with the present framework. 6 | """ 7 | import os 8 | from typing import Sequence, Dict 9 | 10 | from keras import Input, Model, Sequential 11 | from keras.layers import Flatten, Concatenate, Activation 12 | from keras.optimizers import Adam 13 | from rl.agents import DDPGAgent 14 | from rl.agents.dqn import DQNAgent, NAFAgent 15 | from rl.core import Agent 16 | from rl.memory import SequentialMemory 17 | from rl.policy import BoltzmannQPolicy 18 | from rl.random import OrnsteinUhlenbeckProcess 19 | 20 | from agents.base_agent import AgentBase, RL_MODEL_DIR 21 | from envs.dynamics_envs import FullRoomEnv, RLDynEnv, RangeListT 22 | from ml.keras_util import getMLPModel, KerasBase 23 | from util.util import make_param_ext, train_decorator, DEFAULT_TRAIN_SET, get_rl_steps, prog_verb, ProgWrap, \ 24 | DEFAULT_EVAL_SET 25 | from util.visualize import plot_rewards 26 | 27 | 28 | # Constants, do not change! 29 | DEF_RL_LR = 0.00001 30 | DEF_GAMMA = 0.99 31 | 32 | # Change values here 33 | used_lr = 0.001 34 | used_gamma = DEF_GAMMA 35 | 36 | 37 | def ddpg_agent_name(n_steps: int, lr: float = DEF_RL_LR, 38 | gam: float = DEF_GAMMA) -> str: 39 | lr_ext = "" if lr == DEF_RL_LR else f"_LR{lr}" 40 | g_ext = "" if gam == DEF_GAMMA else f"_G{gam}" 41 | return f"DDPG_NEP{n_steps}{lr_ext}{g_ext}" 42 | 43 | 44 | class KerasBaseAgent(AgentBase, KerasBase): 45 | """The interface for all keras-rl agent wrappers.""" 46 | 47 | m: Agent #: The keras-rl agent. 48 | model_path: str = RL_MODEL_DIR #: Where to store the model parameters. 49 | 50 | def __init__(self, **kwargs): 51 | super().__init__(**kwargs) 52 | if kwargs.get('name') is None: 53 | print("Please provide a name for the agent!") 54 | 55 | def get_action(self, state): 56 | """Use the keras-rl model to get an action.""" 57 | if self.m.training: 58 | # TODO: Is this OK? 59 | self.m.training = False 60 | assert not self.m.training, "Still in training mode!" 61 | return self.m.forward(state) 62 | 63 | 64 | class DQNBaseAgent(KerasBaseAgent): 65 | 66 | def __init__(self, env: FullRoomEnv, n_train_steps: int = 10000): 67 | # Initialize super class 68 | name = "DQN" 69 | super().__init__(env=env, name=name) 70 | 71 | self.n_train_steps = n_train_steps 72 | 73 | # Build Q-function model. 74 | nb_actions = env.nb_actions 75 | n_state_vars = env.m.n_pred 76 | inputs = Input(shape=(1, n_state_vars)) 77 | flat_inputs = Flatten()(inputs) 78 | model = getMLPModel(out_dim=nb_actions) 79 | model = Model(inputs=inputs, outputs=model(flat_inputs)) 80 | # model.summary() 81 | 82 | # Configure and compile our agent. 83 | memory = SequentialMemory(limit=50000, window_length=1) 84 | policy = BoltzmannQPolicy() 85 | self.m = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, 86 | policy=policy, 87 | gamma=0.9, 88 | train_interval=100, 89 | target_model_update=500) 90 | self.m.compile(Adam(lr=1e-5), metrics=['mae']) 91 | 92 | raise NotImplementedError("Deprecated!") 93 | 94 | @train_decorator() 95 | def fit(self) -> None: 96 | # Fit and plot rewards 97 | hist = self.m.fit(self.env, nb_steps=self.n_train_steps, visualize=False, verbose=1) 98 | train_plot = self.env.get_plt_path("test") 99 | plot_rewards(hist, train_plot) 100 | # dqn.test(env, nb_episodes=5, visualize=True) 101 | 102 | 103 | class NAFBaseAgent(KerasBaseAgent): 104 | """This does not work! 105 | 106 | TODO: Fix this! 107 | """ 108 | 109 | def __init__(self, env: FullRoomEnv): 110 | # Initialize super class 111 | name = "NAF" 112 | super().__init__(env=env, name=name) 113 | print("Why don't you work??????") 114 | 115 | # Build Q-function model. 116 | nb_actions = env.nb_actions 117 | n_state_vars = env.m.n_pred 118 | 119 | # V model 120 | inputs = Input(shape=(1, n_state_vars)) 121 | flat_inputs = Flatten()(inputs) 122 | v_model = getMLPModel(out_dim=1) 123 | v_model = Model(inputs=inputs, outputs=v_model(flat_inputs)) 124 | 125 | # Mu model 126 | inputs = Input(shape=(1, n_state_vars)) 127 | flat_inputs = Flatten()(inputs) 128 | m_model = getMLPModel(out_dim=nb_actions) 129 | m_model = Model(inputs=inputs, outputs=m_model(flat_inputs)) 130 | 131 | # L model 132 | n_out_l = (nb_actions * nb_actions + nb_actions) // 2 133 | action_input = Input(shape=(nb_actions,), name='action_input') 134 | state_inputs = Input(shape=(1, n_state_vars)) 135 | flat_inputs = Flatten()(state_inputs) 136 | x = Concatenate()([action_input, flat_inputs]) 137 | l_model = getMLPModel(out_dim=n_out_l) 138 | l_model = Model(inputs=[action_input, state_inputs], outputs=l_model(x)) 139 | 140 | # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and 141 | # even the metrics! 142 | memory = SequentialMemory(limit=100000, window_length=1) 143 | random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) 144 | self.m = NAFAgent(nb_actions=nb_actions, V_model=v_model, L_model=l_model, mu_model=m_model, 145 | memory=memory, nb_steps_warmup=100, random_process=random_process, 146 | gamma=.99, target_model_update=1e-3) 147 | self.m.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) 148 | 149 | raise NotImplementedError("Deprecated!") 150 | 151 | def fit(self, verbose: int = 0, **kwargs) -> None: 152 | # Fit and plot rewards 153 | hist = self.m.fit(self.env, nb_steps=100000, visualize=False, verbose=1) 154 | train_plot = self.env.get_plt_path("test") 155 | plot_rewards(hist, train_plot) 156 | 157 | 158 | class DDPGBaseAgent(KerasBaseAgent): 159 | """The wrapper of the keras-rl DDPG agent. 160 | 161 | Suited for continuous action and state space. 162 | Range of allowed actions can be specified. 163 | """ 164 | 165 | def __init__(self, env: RLDynEnv, 166 | n_steps: int = 50000, 167 | lr: float = 0.001, 168 | gamma: float = 0.9, 169 | layers: Sequence[int] = (100, 100), 170 | reg: float = 0.01, 171 | action_range: RangeListT = None): 172 | """Constructor. 173 | 174 | Args: 175 | env: The underlying environment. 176 | n_steps: The number of steps to train. 177 | lr: The base learning rate. 178 | gamma: The discount factor. 179 | layers: The layer architecture of the MLP for the actor and the critic network. 180 | reg: The regularization factor for the networks. 181 | action_range: The range of the actions the actor can take. 182 | """ 183 | # Find unique name based on parameters. 184 | param_ex_list = [("N", n_steps), 185 | ("LR", lr), 186 | ("GAM", gamma), 187 | ("L", layers), 188 | ("REG", reg), 189 | ("AR", action_range)] 190 | # Set parameters 191 | self.n_steps = n_steps 192 | self.lr = lr 193 | self.gamma = gamma 194 | 195 | # Create name 196 | name = f"{self.get_short_name()}_{env.name}{make_param_ext(param_ex_list)}" 197 | 198 | # Initialize super class. 199 | super().__init__(env=env, name=name) 200 | 201 | # Save reference to env and extract relevant dimensions. 202 | self.env = env 203 | self.nb_actions = env.nb_actions 204 | self.n_state_vars = env.m.n_pred 205 | 206 | # Network parameters 207 | self.layers = layers 208 | self.reg = reg 209 | if action_range is not None: 210 | assert len(action_range) == env.nb_actions, "Wrong amount of ranges!" 211 | self.action_range = action_range 212 | 213 | # Build the model. 214 | self._build_agent_model() 215 | 216 | self.plot_name = "DDPG" 217 | 218 | def __str__(self) -> str: 219 | return f"DDPG Agent with layers {self.layers}." 220 | 221 | def _build_agent_model(self) -> None: 222 | """Builds the Keras model of the agent.""" 223 | # Build actor model 224 | actor = Sequential() 225 | actor.add(Flatten(input_shape=(1, self.n_state_vars))) 226 | actor.add(getMLPModel(mlp_layers=self.layers, 227 | out_dim=self.nb_actions, 228 | ker_reg=self.reg)) 229 | 230 | # Clip actions to desired interval 231 | if self.action_range is not None: 232 | actor.add(Activation('sigmoid')) 233 | # actor.add(get_constrain_layer(self.action_range)) 234 | pass 235 | # actor.add(ConstrainOutput(self.action_range)) 236 | 237 | # Build critic model 238 | action_input = Input(shape=(self.nb_actions,), name='action_input') 239 | observation_input = Input(shape=(1, self.n_state_vars), name='observation_input') 240 | flattened_observation = Flatten()(observation_input) 241 | x = Concatenate()([action_input, flattened_observation]) 242 | x = getMLPModel(mlp_layers=self.layers, out_dim=1, ker_reg=self.reg)(x) 243 | critic = Model(inputs=[action_input, observation_input], outputs=x) 244 | 245 | # Configure and compile the agent. 246 | memory = SequentialMemory(limit=500000, window_length=1) 247 | random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=.15, mu=0., sigma=.05) 248 | self.m = DDPGAgent(nb_actions=self.nb_actions, actor=actor, critic=critic, critic_action_input=action_input, 249 | memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, 250 | random_process=random_process, gamma=self.gamma, target_model_update=1e-3) 251 | opt = Adam(lr=self.lr, clipnorm=1.0) 252 | self.m.compile(opt, metrics=['mae']) 253 | 254 | def get_info(self) -> Dict: 255 | return {'action_scaled_01': self.action_range} 256 | 257 | def load_if_exists(self, m, name: str, 258 | train_data: str = DEFAULT_EVAL_SET) -> bool: 259 | """Loads the keras model if it exists. 260 | 261 | Returns true if it could be loaded, else False. 262 | Overrides the function in `KerasBase`, but in this 263 | case there are two models to load. 264 | 265 | Args: 266 | m: Keras-rl agent model to be loaded. 267 | name: Name of model. 268 | train_data: Hyperparameter opt. evaluation set. 269 | 270 | Returns: 271 | True if model could be loaded else False. 272 | """ 273 | full_path = self._save_path() 274 | # full_path = self.get_path(name, env=self.env, 275 | # hop_eval_set=train_data) 276 | path_actor = full_path[:-3] + "_actor.h5" 277 | path_critic = full_path[:-3] + "_critic.h5" 278 | 279 | if os.path.isfile(path_actor) and os.path.isfile(path_critic): 280 | m.load_weights(full_path) 281 | return True 282 | 283 | if self.env.dummy_use: 284 | raise ValueError(f"No trained model {full_path[:-3]} found!") 285 | return False 286 | 287 | def _save_path(self): 288 | return self.env.get_model_path(self.name + ".h5") 289 | 290 | def save_model(self, m, name: str, train_data: str = DEFAULT_EVAL_SET) -> None: 291 | """Saves a keras model. 292 | 293 | Needs to be overridden here since the keras-rl 294 | `DDPGAgent` class does not have a `save` method. 295 | 296 | Args: 297 | m: Keras-rl agent model. 298 | name: Name of the model. 299 | train_data: Hyperparameter opt. evaluation set. 300 | """ 301 | w_path = self._save_path() 302 | m.save_weights(w_path) 303 | # m.save_weights(self.get_path(name, env=self.env, 304 | # hop_eval_set=train_data)) 305 | 306 | @train_decorator() 307 | def fit(self, verbose: int = 1, train_data: str = DEFAULT_TRAIN_SET) -> None: 308 | """Fit the agent using the environment. 309 | 310 | Makes a plot of the rewards received during the training. 311 | """ 312 | # Fit 313 | if verbose: 314 | print("Actually fitting...") 315 | 316 | self.env.use_noise = True 317 | hist = self.m.fit(self.env, nb_steps=self.n_steps, 318 | visualize=False, verbose=min(verbose, 1), nb_max_episode_steps=200) 319 | 320 | # Check if fully trained 321 | n_steps_trained = hist.history['nb_steps'][-1] 322 | if n_steps_trained <= self.n_steps - self.env.n_ts_per_eps: 323 | if verbose: 324 | print(f"Training aborted after {n_steps_trained} steps, " 325 | f"saving parameters anyway...") 326 | # Rename for parameter saving 327 | self.name = ddpg_agent_name(n_steps_trained) 328 | 329 | # Plot rewards 330 | train_plot = self.env.get_plt_path(self.name + "_train_rewards") 331 | plot_rewards(hist, train_plot) 332 | 333 | def get_short_name(self): 334 | return ddpg_agent_name(self.n_steps, self.lr, self.gamma) 335 | 336 | 337 | def default_ddpg_agent(env: RLDynEnv, 338 | n_steps: int = None, 339 | fitted: bool = True, 340 | verbose: int = 1, 341 | hop_eval_set: str = DEFAULT_EVAL_SET, 342 | lr: float = used_lr) -> DDPGBaseAgent: 343 | # Choose step number 344 | if n_steps is None: 345 | n_steps = get_rl_steps(eul=True) 346 | 347 | gam = used_gamma 348 | 349 | # Initialize agent 350 | with ProgWrap(f"Initializing DDPG agent...", verbose > 0): 351 | agent = DDPGBaseAgent(env, 352 | action_range=env.action_range, 353 | n_steps=n_steps, 354 | gamma=gam, 355 | lr=lr) 356 | agent.name = agent.get_short_name() 357 | # agent.name = ddpg_agent_name(n_steps, lr=lr, gam=gam) 358 | 359 | # Fit if requested 360 | if fitted: 361 | with ProgWrap(f"Fitting DDPG agent...", verbose > 0): 362 | agent.fit(verbose=prog_verb(verbose), train_data=hop_eval_set) 363 | 364 | return agent 365 | -------------------------------------------------------------------------------- /BatchRL/ml/keras_layers.py: -------------------------------------------------------------------------------- 1 | """Custom keras layers. 2 | 3 | Define your custom keras layers here. 4 | There is also a function that tests the layers 5 | for some example input. 6 | """ 7 | from keras import backend as K, activations 8 | from keras.layers import Layer, GaussianNoise, Lambda 9 | 10 | from data_processing.dataset import SeriesConstraint 11 | from util.util import * 12 | 13 | 14 | class SeqInput(Layer): 15 | """Dummy Layer, it lets you specify the input shape 16 | when used as a first layer in a Sequential model. 17 | """ 18 | 19 | def __init__(self, **kwargs): 20 | """Initializes the layer. 21 | 22 | Args: 23 | **kwargs: kwargs for super. 24 | """ 25 | super(SeqInput, self).__init__(**kwargs) 26 | 27 | def call(self, x, **kwargs): 28 | """Returns `x` unchanged. 29 | 30 | Args: 31 | x: Input tensor. 32 | 33 | Returns: 34 | `x` unchanged. 35 | """ 36 | return x 37 | 38 | def compute_output_shape(self, input_shape): 39 | """The shape stays the same. 40 | 41 | Args: 42 | input_shape: The shape of the input. 43 | 44 | Returns: 45 | Same as input. 46 | """ 47 | return input_shape 48 | 49 | 50 | class IdRecurrent(Layer): 51 | """Dummy Layer, it passes the same values that are input into it. 52 | 53 | If `n` is specified, only a reduced number of features is 54 | returned, cannot be larger than the total number of features. 55 | If `return_sequences` is True, sequences are returned, else 56 | only the last element of the sequence. 57 | """ 58 | 59 | def __init__(self, n: int = None, return_sequences: bool = True, **kwargs): 60 | """Initializes the layer. 61 | 62 | Args: 63 | n: Number of output features. 64 | return_sequences: Whether to return sequences. 65 | **kwargs: kwargs for super. 66 | """ 67 | super().__init__(**kwargs) 68 | self.n = n 69 | self.r_s = return_sequences 70 | 71 | def call(self, x, **kwargs): 72 | """Returns `x` unchanged.""" 73 | assert len(x.shape) == 3, "Only implemented for 3D tensor input." 74 | if self.n is None: 75 | ret_val = x 76 | else: 77 | ret_val = x[:, :, :self.n] 78 | if self.r_s: 79 | return ret_val 80 | return ret_val[:, -1, :] 81 | 82 | def compute_output_shape(self, input_shape): 83 | """The shape stays the same.""" 84 | out_shape = [k for k in input_shape] 85 | if self.n is not None: 86 | out_shape[1] = self.n 87 | if not self.r_s: 88 | out_shape = [out_shape[0], out_shape[2]] 89 | return tuple(out_shape) 90 | 91 | 92 | class IdDense(Layer): 93 | """Dummy Layer, it passes the same values that are input into it. 94 | 95 | If `n` is specified, only a reduced number of features is 96 | returned, cannot be larger than the total number of features. 97 | """ 98 | 99 | def __init__(self, n: int = None, **kwargs): 100 | """Initializes the layer. 101 | 102 | Args: 103 | n: Number of output features. 104 | **kwargs: kwargs for super. 105 | """ 106 | super().__init__(**kwargs) 107 | self.n = n 108 | 109 | def call(self, x, **kwargs): 110 | """Returns `x` unchanged.""" 111 | assert len(x.shape) == 2, "Only implemented for 2D tensor input." 112 | if self.n is None: 113 | ret_val = x 114 | else: 115 | ret_val = x[:, :self.n] 116 | return ret_val 117 | 118 | def compute_output_shape(self, input_shape): 119 | """The shape stays the same.""" 120 | out_shape = [k for k in input_shape] 121 | if self.n is not None: 122 | out_shape[1] = self.n 123 | return tuple(out_shape) 124 | 125 | 126 | class ClipByValue(Layer): 127 | """Clipping layer. 128 | 129 | Clips all values in the input tensors into the 130 | range [`low`, `high`]. 131 | """ 132 | 133 | def __init__(self, low: float = 0.0, high: float = 1.0, **kwargs): 134 | """Initializes the layer. 135 | 136 | Args: 137 | """ 138 | super().__init__(**kwargs) 139 | self.low = low 140 | self.high = high 141 | 142 | def call(self, x, **kwargs): 143 | """Returns clipped `x`.""" 144 | return K.clip(x, self.low, self.high) 145 | 146 | def compute_output_shape(self, input_shape): 147 | """The shape stays the same.""" 148 | return input_shape 149 | 150 | 151 | class ConstrainedNoise(Layer): 152 | """ 153 | Constrained noise layer. 154 | Note that the clipping will be active during testing. 155 | """ 156 | 157 | consts: Sequence[SeriesConstraint] #: Sequence of constraints 158 | input_noise_std: float #: The std of the Gaussian noise to add 159 | is_input: bool 160 | 161 | def __init__(self, input_noise_std: float = 0, 162 | consts: Sequence[SeriesConstraint] = None, 163 | is_input: bool = True, 164 | **kwargs): 165 | """ 166 | Adds Gaussian noise with mean 0 and std as specified 167 | and then applies the constraints. 168 | 169 | Args: 170 | input_noise_std: The level of noise. 171 | consts: The list of constraints. 172 | is_input: Whether it is applied to an input tensor (3D) 173 | or an output tensor (2D). 174 | **kwargs: Layer kwargs. 175 | """ 176 | super(ConstrainedNoise, self).__init__(**kwargs) 177 | self.input_noise_std = input_noise_std 178 | self.consts = consts 179 | self.is_input = is_input 180 | 181 | def call(self, x, **kwargs): 182 | """ 183 | Builds the layer given the input x. 184 | 185 | Args: 186 | x: The input to the layer. 187 | 188 | Returns: 189 | The output of the layer satisfying the constraints. 190 | """ 191 | x_modify = x 192 | 193 | # Add noise if std > 0 194 | if self.input_noise_std > 0: 195 | gn_layer = GaussianNoise(self.input_noise_std) 196 | x_modify = gn_layer(x_modify) 197 | 198 | # Enforce constraints 199 | if self.consts is not None: 200 | 201 | # Check shape 202 | n_feats = len(self.consts) 203 | # n_feats_actual = x_modify.shape[-1] 204 | # assert n_feats == n_feats_actual, f"Shape mismatch: {n_feats} constraints " \ 205 | # f"and {n_feats_actual} features!" 206 | 207 | noise_x = x_modify 208 | 209 | # Split features 210 | if self.is_input: 211 | feature_tensors = [noise_x[:, :, ct:(ct + 1)] for ct in range(n_feats)] 212 | else: 213 | feature_tensors = [noise_x[:, ct:(ct + 1)] for ct in range(n_feats)] 214 | for ct, c in enumerate(self.consts): 215 | if c[0] is None: 216 | continue 217 | elif c[0] == 'interval': 218 | iv = c[1] 219 | feature_tensors[ct] = K.clip(feature_tensors[ct], iv[0], iv[1]) 220 | elif c[0] == 'exact' and self.input_noise_std > 0: 221 | feature_tensors[ct] = x[:, :, ct:(ct + 1)] 222 | else: 223 | raise ValueError(f"Constraint type {c[0]} not supported!!") 224 | 225 | # Concatenate again 226 | x_modify = K.concatenate(feature_tensors, axis=-1) 227 | 228 | return x_modify 229 | 230 | def compute_output_shape(self, input_shape): 231 | """ 232 | The shape stays the same. 233 | 234 | Args: 235 | input_shape: The shape of the input. 236 | 237 | Returns: 238 | Same as input. 239 | """ 240 | return input_shape 241 | 242 | 243 | class FeatureSlice(Layer): 244 | """Extracts specified features from tensor. 245 | 246 | TODO: Make it more efficient by considering not single slices but 247 | multiple consecutive ones. 248 | """ 249 | 250 | slicing_indices: np.ndarray #: The array with the indices. 251 | n_feats: int #: The number of selected features. 252 | n_dims: int #: The number of dimensions of the input tensor. 253 | return_last_seq: bool #: Whether to only return the last slice of each sequence. 254 | 255 | def __init__(self, s_inds: np.ndarray, 256 | n_dims: int = 3, 257 | return_last_seq: bool = True, 258 | **kwargs): 259 | """Initialize layer. 260 | 261 | Args: 262 | s_inds: The array with the indices. 263 | n_dims: The number of dimensions of the input tensor. 264 | **kwargs: The kwargs for super(), e.g. `name`. 265 | """ 266 | super(FeatureSlice, self).__init__(**kwargs) 267 | self.slicing_indices = s_inds 268 | self.n_feats = len(s_inds) 269 | self.n_dims = n_dims 270 | self.return_last_seq = return_last_seq 271 | 272 | if n_dims == 2: 273 | raise NotImplementedError("Not implemented for 2D tensors!") 274 | 275 | def call(self, x, **kwargs): 276 | """ 277 | Builds the layer given the input x. Selects the features 278 | specified in `slicing_indices` and concatenates them. 279 | 280 | Args: 281 | x: The input to the layer. 282 | 283 | Returns: 284 | The output of the layer containing the slices. 285 | """ 286 | s = -1 if self.return_last_seq else slice(None) 287 | feature_tensors = [x[:, s, ct:(ct + 1)] for ct in self.slicing_indices] 288 | return K.concatenate(feature_tensors, axis=-1) 289 | 290 | def compute_output_shape(self, input_shape): 291 | """ 292 | The shape only changes in the feature dimension. 293 | 294 | Args: 295 | input_shape: The shape of the input. 296 | 297 | Returns: 298 | Same as input with the last dimension changed. 299 | """ 300 | s = input_shape 301 | if self.return_last_seq: 302 | return s[0], self.n_feats 303 | return s[0], s[1], self.n_feats 304 | 305 | 306 | class ExtractInput(Layer): 307 | """Input extraction layer. 308 | 309 | Given a tensor with a large sequence length, 310 | this layer constructs the next input for the RNN 311 | with the previous output of the RNN. 312 | """ 313 | 314 | slicing_indices: np.ndarray #: The array with the indices. 315 | n_feats: int #: The number of selected features that are predicted. 316 | seq_len: int #: The sequence length. 317 | curr_ind: int #: The current prediction index. 318 | 319 | def __init__(self, s_inds: np.ndarray, 320 | seq_len: int, 321 | curr_ind: int = 0, 322 | **kwargs): 323 | """Initialize layer. 324 | 325 | Args: 326 | s_inds: The array with the indices. 327 | seq_len: The sequence length. 328 | curr_ind: The current offset. 329 | **kwargs: The kwargs for super(), e.g. `name`. 330 | """ 331 | super(ExtractInput, self).__init__(**kwargs) 332 | self.slicing_indices = s_inds 333 | self.n_feats = len(s_inds) 334 | self.seq_len = seq_len 335 | self.curr_ind = curr_ind 336 | 337 | def call(self, x, **kwargs): 338 | """ 339 | Builds the layer given the full data and the 340 | last prediction. 341 | 342 | Args: 343 | x: A list with the full data and the last prediction. If there is 344 | only one input, we will just return the input slice. 345 | 346 | Returns: 347 | The output of the layer that can be fed to the basic RNN. 348 | """ 349 | end_ind = self.curr_ind + self.seq_len 350 | if not isinstance(x, list): 351 | # No prediction from last step given. 352 | return x[:, self.curr_ind: end_ind, :] 353 | x_in, x_out = x 354 | if len(x_out.shape) == 2: 355 | x_out = K.reshape(x_out, (-1, 1, self.n_feats)) 356 | 357 | x_s = x_in.shape 358 | if x_s[-2] <= end_ind: 359 | raise ValueError("curr_ind or seq_len too big!") 360 | x_prev = x_in[:, self.curr_ind: (end_ind - 1), :] 361 | x_next = x_in[:, (end_ind - 1): end_ind, :] 362 | 363 | # Extract slices 364 | pred_ind = np.zeros((x_s[-1],), dtype=np.bool) 365 | pred_ind[self.slicing_indices] = True 366 | feat_tensor_list = [] 367 | ct = 0 368 | for k in range(x_s[-1]): 369 | if not pred_ind[k]: 370 | feat_tensor_list += [x_next[:, :, k: (k + 1)]] 371 | else: 372 | feat_tensor_list += [x_out[:, :, ct: (ct + 1)]] 373 | ct += 1 374 | 375 | # Concatenate 376 | out_next = K.concatenate(feat_tensor_list, axis=-1) 377 | return K.concatenate([x_prev, out_next], axis=-2) 378 | 379 | def compute_output_shape(self, input_shape): 380 | """ 381 | The sequence length changes to `seq_len`. 382 | 383 | Args: 384 | input_shape: The shape of the input. 385 | 386 | Returns: 387 | Same as input with the second dimension set to sequence length. 388 | 389 | Raises: 390 | ValueError: If the tensor shapes are incompatible with the layer. 391 | """ 392 | s0 = input_shape 393 | if isinstance(s0, list): 394 | if len(s0) != 2: 395 | raise ValueError("Need exactly two tensors if there are multiple!") 396 | if s0[1][1] != self.n_feats: 397 | raise ValueError("Invalid indices or tensor shape!") 398 | s0 = s0[0] 399 | if len(s0) != 3: 400 | raise ValueError("Only implemented for 3D tensors!") 401 | return s0[0], self.seq_len, s0[2] 402 | 403 | 404 | class ConstrainOutput(Layer): 405 | """Activation layer. 406 | 407 | Applies sigmoid to the tensor and scales the 408 | intermediate output to assert an output in a given range 409 | for each output feature. 410 | """ 411 | 412 | def __init__(self, ranges: List[Tuple[Num, Num]], **kwargs): 413 | """Initializes the layer. 414 | 415 | Args: 416 | ranges: List of intervals. 417 | """ 418 | super().__init__(**kwargs) 419 | self.low = np.array([i[0] for i in ranges], dtype=np.float32) 420 | self.dist = np.array([i[1] - i[0] for i in ranges], dtype=np.float32) 421 | 422 | def call(self, x, **kwargs): 423 | """Returns clipped `x`.""" 424 | activated = activations.sigmoid(x) 425 | l_tensor = K.constant(self.low.reshape((1, -1))) 426 | d_tensor = K.constant(self.dist.reshape((1, -1))) 427 | return activated * d_tensor + l_tensor 428 | 429 | def compute_output_shape(self, input_shape): 430 | """The shape stays the same.""" 431 | return input_shape 432 | 433 | 434 | def get_constrain_layer(ranges: List[Tuple[Num, Num]]): 435 | """The same layer as the one above, but using a Lambda layer.""" 436 | low = np.array([i[0] for i in ranges], dtype=np.float32) 437 | dist = np.array([i[1] - i[0] for i in ranges], dtype=np.float32) 438 | 439 | l_tensor = K.constant(low.reshape((1, -1))) 440 | d_tensor = K.constant(dist.reshape((1, -1))) 441 | 442 | def constrain_sigmoid(x): 443 | activated = activations.sigmoid(x) 444 | return activated * d_tensor + l_tensor 445 | 446 | ConstrainOutputLambda = Lambda(constrain_sigmoid) 447 | return ConstrainOutputLambda 448 | -------------------------------------------------------------------------------- /BatchRL/opcua_empa/controller.py: -------------------------------------------------------------------------------- 1 | """Controller interface for opcua client. 2 | 3 | Defines controllers that can be used to 4 | do control on the real system using the opcua client. 5 | """ 6 | from abc import ABC, abstractmethod 7 | from datetime import datetime 8 | from typing import List, Tuple 9 | from typing import TYPE_CHECKING 10 | 11 | import numpy as np 12 | 13 | from agents.base_agent import AgentBase, AbstractAgent 14 | from opcua_empa.opcuaclient_subscription import MAX_TEMP, MIN_TEMP 15 | from util.numerics import int_to_sin_cos 16 | from util.util import Num, get_min_diff, day_offset_ts, print_if_verb, ts_per_day, floor_datetime_to_min 17 | 18 | if TYPE_CHECKING: 19 | # Avoiding cyclic imports for type checking 20 | from data_processing.dataset import Dataset 21 | from envs.dynamics_envs import RLDynEnv 22 | 23 | 24 | class Controller(ABC): 25 | """Base controller interface. 26 | 27 | A controller needs to implement the __call__ function 28 | and optionally a termination criterion: `terminate()`. 29 | """ 30 | 31 | state: np.ndarray = None 32 | 33 | @abstractmethod 34 | def __call__(self, values): 35 | """Returns the current control input.""" 36 | pass 37 | 38 | def terminate(self) -> bool: 39 | return False 40 | 41 | def set_state(self, curr_state: np.ndarray) -> None: 42 | self.state = curr_state 43 | 44 | 45 | class FixTimeController(Controller, ABC): 46 | """Fixed-time controller. 47 | 48 | Runs for a fixed number of timesteps. 49 | """ 50 | max_n_minutes: int #: The maximum allowed runtime in minutes. 51 | 52 | _start_time: datetime #: The starting time. 53 | 54 | def __init__(self, max_n_minutes: int = None): 55 | self.max_n_minutes = max_n_minutes 56 | self._start_time = datetime.now() 57 | 58 | def terminate(self) -> bool: 59 | """Checks if the maximum time is reached. 60 | 61 | Returns: 62 | True if the max. runtime is reached, else False. 63 | """ 64 | if self.max_n_minutes is None: 65 | return False 66 | h_diff = get_min_diff(self._start_time, t2=None) 67 | return h_diff > self.max_n_minutes 68 | 69 | 70 | ControlT = List[Tuple[int, Controller]] #: Room number to controller map type 71 | 72 | 73 | class FixTimeConstController(FixTimeController): 74 | """Const Controller. 75 | 76 | Runs for a fixed amount of time if `max_n_minutes` is specified. 77 | Sets the value to be controlled to constant `val`. 78 | Control inputs do not depend on current time or on state! 79 | """ 80 | 81 | val: Num #: The numerical value to be set. 82 | 83 | def __init__(self, val: Num = MIN_TEMP, max_n_minutes: int = None): 84 | super().__init__(max_n_minutes) 85 | self.val = val 86 | 87 | def __call__(self, values=None) -> Num: 88 | return self.val 89 | 90 | 91 | class ToggleController(FixTimeController): 92 | """Toggle controller. 93 | 94 | Toggles every `n_mins` between two values. 95 | Control inputs only depend on current time and not on state! 96 | """ 97 | 98 | def __init__(self, val_low: Num = MIN_TEMP, val_high: Num = MAX_TEMP, n_mins: int = 2, 99 | start_low: bool = True, max_n_minutes: int = None): 100 | """Controller that toggles every `n_mins` between two values. 101 | 102 | Args: 103 | val_low: The lower value. 104 | val_high: The higher value. 105 | n_mins: The number of minutes in an interval. 106 | start_low: Whether to start with `val_low`. 107 | max_n_minutes: The maximum number of minutes the controller should run. 108 | """ 109 | super().__init__(max_n_minutes) 110 | self.v_low = val_low 111 | self.v_high = val_high 112 | self.dt = n_mins 113 | self.start_low = start_low 114 | 115 | def __call__(self, values=None) -> Num: 116 | """Computes the current value according to the current time.""" 117 | min_diff = get_min_diff(self._start_time, t2=None) 118 | is_start_state = int(min_diff) % (2 * self.dt) < self.dt 119 | is_low = is_start_state if self.start_low else not is_start_state 120 | return self.v_low if is_low else self.v_high 121 | 122 | 123 | class ValveToggler(FixTimeController): 124 | """Controller that toggles as soon as the valves have toggled.""" 125 | 126 | n_delay: int #: How many steps to wait with toggling back. 127 | TOL: float = 0.05 128 | 129 | _step_count: int = 0 130 | _curr_valve_state: bool = False 131 | 132 | def __init__(self, n_steps_delay: int = 10, n_steps_max: int = 60 * 60, 133 | verbose: int = 0): 134 | super().__init__(n_steps_max) 135 | self.n_delay = n_steps_delay 136 | self.verbose = verbose 137 | 138 | def __call__(self, values=None): 139 | 140 | v = self.state[4] # Extract valve state 141 | if v > 1.0 - self.TOL: 142 | if not self._curr_valve_state: 143 | # Valves just opened 144 | self._step_count = 0 145 | print_if_verb(self.verbose, "Valves opened!!!") 146 | self._curr_valve_state = True 147 | elif v < self.TOL: 148 | if self._curr_valve_state: 149 | # Valves just closed 150 | print_if_verb(self.verbose, "Valves closed!!!") 151 | self._step_count = 0 152 | self._curr_valve_state = False 153 | 154 | ret_min = self._curr_valve_state 155 | 156 | # If valves just switched, ignore change 157 | if self._step_count < self.n_delay: 158 | ret_min = not ret_min 159 | 160 | # Convert bool to temperature 161 | ret = MIN_TEMP if ret_min else MAX_TEMP 162 | 163 | # Increment and return 164 | self._step_count += 1 165 | return ret 166 | 167 | 168 | class RuleBased(FixTimeController): 169 | """Rule based heating agent. 170 | 171 | Starts heating as soon as the temperature drops below `self.min_temp`. 172 | """ 173 | def __call__(self, values=None): 174 | t = self.state[5] # Extract room temp 175 | return MIN_TEMP if t >= self.min_temp else MAX_TEMP 176 | 177 | def __init__(self, min_temp: float = 21.0, n_steps_max: int = 60 * 60, 178 | verbose: int = 0): 179 | super().__init__(n_steps_max) 180 | self.min_temp = min_temp 181 | self.verbose = verbose 182 | 183 | 184 | def setpoint_toggle_frac(prev_state: bool, dt: int, action: Num, delay_open: Num, 185 | delay_close: Num, tol: float = 0.05) -> Tuple[float, bool]: 186 | """Computes the time the setpoint needs to toggle. 187 | 188 | Since the opening and the closing of the valves are delayed, 189 | the setpoint needs to change earlier to ensure the correct valve behavior. 190 | 191 | This has to be computed once at every beginning of a timestep 192 | of length `dt`. 193 | 194 | Args: 195 | action: The action in [0, 1] 196 | prev_state: The previous valve state, open: True, closed: False 197 | delay_open: The time needed to open the valves in minutes. 198 | delay_close: The time needed to close the valves in minutes. 199 | dt: The number of minutes in a timestep. 200 | tol: Tolerance 201 | 202 | Returns: 203 | The setpoint toggle time in [0, 2]. 204 | """ 205 | # Check input 206 | assert tol >= 0 and 0.0 <= action <= 1.0 207 | assert delay_close >= 0 and delay_open >= 0, "Delays cannot be negative!" 208 | 209 | # Compute toggle time 210 | valve_tog = action if prev_state else 1.0 - action 211 | valve_tog_approx = 2.0 if valve_tog + tol >= 1.0 else valve_tog 212 | delay_needed = delay_close if prev_state else delay_open 213 | res = max(0.0, valve_tog_approx - delay_needed / dt) 214 | next_state = prev_state if res >= 1.0 else not prev_state 215 | return res, next_state 216 | 217 | 218 | def setpoint_from_fraction(setpoint_frac: float, prev_state: bool, 219 | next_state: bool, dt: int, start_time: np.datetime64 = None, 220 | curr_time: np.datetime64 = None) -> bool: 221 | """Computes the current setpoint according to the current toggle fraction. 222 | 223 | Handles the current time. 224 | 225 | Args: 226 | setpoint_frac: The current setpoint fraction as computed 227 | by :func:`setpoint_toggle_frac`. 228 | prev_state: Previous valve state. 229 | next_state: Valve state at the end of the current timestep. 230 | dt: The number of minutes in a timestep. 231 | start_time: Start time of the step, automatically computed if None. 232 | curr_time: Current time, automatically computed if None. 233 | 234 | Returns: 235 | Whether the temperature setpoint should be set to high. 236 | """ 237 | # Handle datetimes 238 | td = np.timedelta64(dt, 'm') 239 | if curr_time is None: 240 | curr_time = np.datetime64('now') 241 | if start_time is None: 242 | start_time = floor_datetime_to_min(curr_time, dt) 243 | time_fraction_passed = (curr_time - start_time) / td 244 | assert time_fraction_passed <= 1.0 245 | 246 | # Use setpoint_frac to determine output 247 | return prev_state if time_fraction_passed < setpoint_frac else next_state 248 | 249 | 250 | class BaseRLController(FixTimeController): 251 | """Controller that uses a RL agent to do control.""" 252 | 253 | default_val: Num = 21.0 254 | agent: AbstractAgent = None #: RL agent 255 | dt: int = None 256 | 257 | #: The valve opening and closing delays in minutes. 258 | valve_delays: Tuple[float, float] = (0.5, 0.5) 259 | 260 | verbose: int 261 | const_debug: bool #: Whether to output a constant value 262 | 263 | # Protected member variables 264 | _step_start_state: bool = None #: Open: True, closed: False 265 | _next_start_state: bool = None 266 | _toggle_time_fraction: float = None 267 | _init_phase: bool = True 268 | 269 | _curr_ts_ind: int 270 | 271 | def __init__(self, rl_agent: AbstractAgent, 272 | dt: int, 273 | n_steps_max: int = 60 * 60, 274 | const_debug: bool = False, 275 | verbose: int = 3): 276 | super().__init__(n_steps_max) 277 | self.agent = rl_agent 278 | self.dt = dt 279 | self.verbose = verbose 280 | self.const_debug = const_debug 281 | 282 | self._curr_ts_ind = self.get_dt_ind() 283 | 284 | def get_dt_ind(self) -> int: 285 | """Computes the index of the current timestep.""" 286 | t_now = np.datetime64('now') 287 | return day_offset_ts(t_now, mins=self.dt, remaining=False) - 1 288 | 289 | def prepared_state(self, next_ts_ind: int = None) -> np.ndarray: 290 | return self.state 291 | 292 | def __call__(self, values=None) -> float: 293 | 294 | if self._step_start_state is None: 295 | # __call__ is called for the first time, 296 | # set _step_start_state to valve state. 297 | valve_state = self.state[4] 298 | self._step_start_state = valve_state > 0.5 299 | 300 | # If next timestep started, compute next control input 301 | next_ts_ind = self.get_dt_ind() 302 | if next_ts_ind != self._curr_ts_ind: 303 | self._init_phase = False 304 | # Update start state 305 | self._step_start_state = self._next_start_state 306 | 307 | prepared_state = self.prepared_state(next_ts_ind) 308 | ac = self.agent.get_action(prepared_state) 309 | if self.verbose: 310 | print(f"Step {next_ts_ind}, Action: {ac}") 311 | 312 | # Compute toggle point 313 | tog_frac, next_state = setpoint_toggle_frac(self._step_start_state, 314 | self.dt, ac, 315 | *self.valve_delays) 316 | self._next_start_state = next_state 317 | self._toggle_time_fraction = tog_frac 318 | self._curr_ts_ind = next_ts_ind 319 | 320 | # If it is still in warm-up phase return default value 321 | if self._init_phase: 322 | return self.default_val 323 | 324 | # Find and return the actual temperature setpoint 325 | tog_state = setpoint_from_fraction(self._toggle_time_fraction, 326 | self._step_start_state, 327 | self._next_start_state, 328 | self.dt) 329 | 330 | if self.const_debug: 331 | return 21.0 332 | return MAX_TEMP if tog_state else MIN_TEMP 333 | 334 | 335 | class RLController(BaseRLController): 336 | """Controller using an :class:`agents.base_agent.AgentBase` RL agent to do control.""" 337 | 338 | data_ref: 'Dataset' = None #: Dataset of model of env 339 | env: 'RLDynEnv' = None 340 | 341 | _scaling: np.ndarray = None 342 | 343 | def __init__(self, rl_agent: AgentBase, n_steps_max: int = 60 * 60, 344 | const_debug: bool = False, 345 | verbose: int = 3): 346 | 347 | self.data_ref = rl_agent.env.m.data 348 | dt = self.data_ref.dt 349 | super().__init__(rl_agent, dt, n_steps_max, verbose=verbose > 0, 350 | const_debug=const_debug) 351 | 352 | assert isinstance(self.agent, AgentBase) 353 | env = self.agent.env 354 | 355 | # Check if model is a room model with or without battery. 356 | # Cannot directly check with isinstance because of cyclic imports. 357 | env_class_name = env.__class__.__name__ 358 | if env_class_name == "RoomBatteryEnv": 359 | self.battery = True 360 | print_if_verb(self.verbose, "Full model including battery!") 361 | elif env_class_name == "FullRoomEnv": 362 | self.battery = False 363 | print_if_verb(self.verbose, "Room only model!") 364 | else: 365 | raise NotImplementedError(f"Env: {env} is not supported!") 366 | 367 | # Save scaling info 368 | assert not self.data_ref.partially_scaled, "Fuck this!" 369 | if self.data_ref.fully_scaled: 370 | self._scaling = self.data_ref.scaling 371 | 372 | def prepared_state(self, next_ts_ind: int = None) -> np.ndarray: 373 | """Prepares the current state to be fed to the agent.""" 374 | time_state = self.add_time_to_state(self.state, next_ts_ind) 375 | if self.battery: 376 | # TODO: Implement this case 377 | raise NotImplementedError("Fuck") 378 | scaled_state = self.scale_for_agent(time_state) 379 | return scaled_state[self.data_ref.non_c_inds] 380 | 381 | def scale_for_agent(self, curr_state, remove_mean: bool = True) -> np.ndarray: 382 | """Scales the given state.""" 383 | assert len(curr_state) == 8 + 2 * self.battery, "Shape mismatch!" 384 | if remove_mean: 385 | return (curr_state - self._scaling[:, 0]) / self._scaling[:, 1] 386 | else: 387 | return self._scaling[:, 1] * curr_state + self._scaling[:, 0] 388 | 389 | def add_time_to_state(self, curr_state: np.ndarray, t_ind: int = None) -> np.ndarray: 390 | """Appends the sin and cos of the daytime to the state.""" 391 | assert len(curr_state) == 6, f"Invalid shape of state: {curr_state}" 392 | if t_ind is None: 393 | t_ind = self.get_dt_ind() 394 | n_ts_per_day = ts_per_day(self.dt) 395 | t = np.array(int_to_sin_cos(t_ind, n_ts_per_day)) 396 | return np.concatenate((curr_state, t)) 397 | 398 | 399 | class ValveTest2Controller(BaseRLController): 400 | """Testing controller. 401 | 402 | Uses the RL agent setting, i.e. makes a decision 403 | at the beginning of each 15 minutes interval. 404 | Assumes no valve delay, therefore can be used to 405 | measure the valve delay. 406 | """ 407 | 408 | class RandomAgent(AbstractAgent): 409 | """Helper agent class. 410 | 411 | Returns a random action in each step. 412 | """ 413 | def __init__(self, verbose): 414 | self.verbose = verbose 415 | 416 | def get_action(self, state) -> float: 417 | rand_ac = np.random.uniform(0.0, 1.0) 418 | if self.verbose: 419 | print(f"Action: {rand_ac}") 420 | return rand_ac 421 | 422 | def __init__(self, n_hours: int = 3, verbose: int = 1): 423 | super().__init__(self.RandomAgent(verbose), dt=15, n_steps_max=n_hours * 60) 424 | self.valve_delays = (0.0, 0.0) 425 | --------------------------------------------------------------------------------