├── __init__.py ├── requirements.txt ├── redd.py ├── template.yaml ├── utils.py ├── run-train.sh ├── main.py ├── run-test.sh ├── dataset_test.py ├── redd.yaml ├── settings.yaml ├── test.py ├── model.py ├── train.py ├── dataset.py ├── colab └── project-devel.ipynb └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.5 2 | scikit-learn==0.24.1 3 | scipy==1.6.0 4 | pandas==1.2.1 5 | PyYAML==5.4.1 6 | torch==1.7.1 7 | -------------------------------------------------------------------------------- /redd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | # Acquisition properties 5 | # Timezone: US/Eastern 6 | # Frequency: 1 Hz 7 | 8 | channel_name = "channel_%d.dat" 9 | 10 | timezone = "US/Eastern" 11 | 12 | 13 | def load(name, path, channels, start=None, end=None): 14 | """ 15 | REDD dataset parser. Parse REDD raw data from public 16 | available REDD dataset files 17 | 18 | Merge time series from multiple files and preprocess it 19 | - Filter out unrequired intervals 20 | - Remove duplicates 21 | - Create time serie index 22 | """ 23 | # WARNING: Time series inner join. Ignoring non-synced 24 | # datapoints from loaded channels 25 | df = pd.concat( 26 | [ 27 | pd.read_csv( 28 | os.path.join(path, channel_name % channel), 29 | sep=" ", 30 | names=["timestamp", name], 31 | ).set_index("timestamp") 32 | for channel in channels 33 | ], 34 | axis=1, 35 | join="inner", 36 | ) 37 | df = df.sum(axis=1) 38 | df.index = pd.to_datetime(df.index, unit="s", utc=True).tz_convert(timezone) 39 | df = df[~df.index.duplicated(keep="first")].sort_index() # Remove duplicates 40 | 41 | if start and end: 42 | # Filter out unrequired data from timeseries 43 | start_ = df.index[0].to_pydatetime() 44 | end_ = df.index[-1].to_pydatetime() 45 | if start < start_: 46 | start = start_ 47 | if end > end_: 48 | end = end_ 49 | df = df[start:end] 50 | df.name = name 51 | return df.sort_index() 52 | -------------------------------------------------------------------------------- /template.yaml: -------------------------------------------------------------------------------- 1 | - name: building 2 | path: house_ 3 | mains: 4 | channels: [] 5 | unknown: 6 | channels: [] 7 | appliances: 8 | - name: oven 9 | id: oven 10 | activity: cooking 11 | channels: [] 12 | - name: refrigerator 13 | id: refrigerator 14 | activity: cooking 15 | channels: [] 16 | - name: dishwasher 17 | id: dishwasher 18 | activity: cleaning 19 | channels: [] 20 | - name: kitchen_outlet 21 | id: kitchen_outlets 22 | activity: cooking 23 | channels: [] 24 | - name: lighting 25 | id: lighting 26 | activity: lighting 27 | channels: [] 28 | - name: washer_dryer 29 | id: washer_dryer 30 | activity: cleaning 31 | channels: [] 32 | - name: microwave 33 | id: microwave 34 | activity: cooking 35 | channels: [] 36 | - name: bathroom_outlet 37 | id: bathroom_gfi 38 | activity: selfcare 39 | channels: [] 40 | - name: electric_heater 41 | id: electric_heat 42 | activity: hvac 43 | channels: [] 44 | - name: stove 45 | id: stove 46 | activity: cooking 47 | channels: [] 48 | - name: electronics 49 | id: electronics 50 | activity: 51 | channels: [] 52 | - name: disposal 53 | id: disposal 54 | activity: cooking 55 | channels: [] 56 | - name: furance 57 | id: furance 58 | activity: cooking 59 | channels: [] 60 | - name: smoke_alarms 61 | id: smoke_alarms 62 | activity: security 63 | channels: [] 64 | - name: air_conditioner 65 | id: air_conditioning 66 | activity: hvac 67 | channels: [] 68 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import sys 5 | 6 | import yaml 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | import pprint 14 | import matplotlib.pyplot as plt 15 | 16 | 17 | def load_yaml(path): 18 | """ 19 | Load YAML file 20 | """ 21 | _yaml = yaml.safe_load(open(path, "r")) 22 | return _yaml if _yaml else {} 23 | 24 | 25 | def error(labels, outputs): 26 | """ 27 | Calcualte L1 error 28 | """ 29 | err = F.l1_loss(labels, outputs) 30 | return err 31 | 32 | 33 | def save_model(model, optimizer, hparams, appliance, transform, file_name_model, error): 34 | """ 35 | Save model and metadata to file 36 | """ 37 | torch.save( 38 | { 39 | "model_state_dict": model.state_dict(), 40 | "optimizer_state_dict": optimizer.state_dict(), 41 | "hparams": hparams, 42 | "appliance": appliance, 43 | "transform": transform, 44 | "error": error, 45 | }, 46 | file_name_model, 47 | ) 48 | 49 | 50 | def load_model(file_name_model, model, optimizer=None): 51 | """ 52 | Load model and metadata from file 53 | """ 54 | if torch.cuda.is_available(): 55 | state = torch.load(file_name_model) 56 | else: 57 | state = torch.load(file_name_model, map_location=torch.device("cpu")) 58 | 59 | model.load_state_dict(state["model_state_dict"]) 60 | if optimizer: 61 | optimizer.load_state_dict(state["optimizer_state_dict"]) 62 | 63 | hparams = state.get("hparams", None) 64 | appliance = state.get("appliance", None) 65 | 66 | transform = state.get("transform", None) 67 | error = state.get("error", None) 68 | 69 | print("=========== ARCHITECTURE ==========") 70 | print("Reloading appliance") 71 | pprint.pprint(appliance) 72 | print("Reloading transform") 73 | pprint.pprint(transform) 74 | print("===================================") 75 | return transform, error 76 | 77 | 78 | def save_dataset(transform, train_, test_, filename): 79 | """ 80 | Save training and testing dataset to file 81 | """ 82 | torch.save({"transform": transform, "train": train_, "test": test_}, filename) 83 | 84 | 85 | def plot_window( 86 | x, y, yhat, reg, clas, alphas, loss, err, classification_enabled, filename 87 | ): 88 | """ 89 | Plot sliding window to visualize disaggregation results, keep track 90 | of results in training or testing and debugging 91 | 92 | Plotting multipel time series 93 | - Aggregated demand 94 | - Appliance demand 95 | - Disaggregation prediction 96 | - Regression branch prediction 97 | - Classification branch prediction 98 | """ 99 | subplt_x = 4 100 | subplt_y = 4 101 | plt.figure(1, figsize=(20, 16)) 102 | plt.subplots_adjust(top=0.88) 103 | 104 | idxs = np.random.randint(len(x), size=(subplt_x * subplt_y)) 105 | for i, idx in enumerate(idxs): 106 | x_, y_, yhat_, reg_, clas_ = ( 107 | x.detach().numpy()[idx][0], 108 | y.detach().numpy()[idx], 109 | yhat.detach().numpy()[idx], 110 | reg.detach().numpy()[idx], 111 | clas.detach().numpy()[idx], 112 | ) 113 | alphas_ = alphas.detach().numpy()[idx].flatten() 114 | ax1 = plt.subplot(subplt_x, subplt_y, i + 1) 115 | ax2 = ax1.twinx() 116 | ax1.plot(range(len(x_)), x_, color="b", label="x") 117 | ax1.plot(range(len(y_)), y_, color="r", label="y") 118 | ax1.plot(range(len(reg_)), reg_, color="black", label="reg") 119 | ax1.plot(range(len(yhat_)), yhat_, alpha=0.5, color="orange", label="yhat") 120 | ax2.fill_between( 121 | range(len(alphas_)), alphas_, alpha=0.5, color="lightgrey", label="alpha" 122 | ) 123 | if classification_enabled: 124 | alphas_max = np.max(alphas_) 125 | ax2.plot( 126 | range(len(clas_)), 127 | clas_ * alphas_max, 128 | color="cyan", 129 | alpha=0.25, 130 | label="reg", 131 | ) 132 | 133 | plt.suptitle(f"loss {loss:.2f} error {err:.2f}") 134 | ax1.legend() 135 | ax2.legend() 136 | plt.legend() 137 | plt.tight_layout() 138 | plt.savefig(filename) 139 | plt.clf() 140 | -------------------------------------------------------------------------------- /run-train.sh: -------------------------------------------------------------------------------- 1 | # 2 | # TRAINING LAUNCHER 3 | # Train each of the appliances and models analyzed in the project and described in settings.yaml 4 | # See documentation describing each of the appliance analyzed 5 | # See documentation describing each of the model architectures evaluated 6 | 7 | mkdir output-train 8 | 9 | ################ DISHWASHER 10 | 11 | # Experiment 1 12 | mkdir output-train/dishwasher 13 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher --path output-train/dishwasher --train --epochs 5 --disable-random > output-train/dishwasher/results-train.log 14 | 15 | # Experiment 2 16 | mkdir output-train/dishwasher-norm 17 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-norm --path output-train/dishwasher-norm --train --epochs 5 --disable-random > output-train/dishwasher-norm/results-train.log 18 | 19 | # Experiment 4 20 | mkdir output-train/dishwasher-onlyregression 21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression --path output-train/dishwasher-onlyregression --train --epochs 5 --disable-random > output-train/dishwasher-onlyregression/results-train.log 22 | 23 | # Experiment 5 24 | mkdir output-train/dishwasher-onlyregression-norm 25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression-norm --path output-train/dishwasher-onlyregression-norm --train --epochs 5 --disable-random > output-train/dishwasher-onlyregression-norm/results-train.log 26 | 27 | # Experiment 7 28 | mkdir output-train/dishwasher-classattention 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-classattention --path output-train/dishwasher-classattention --train --epochs 5 --disable-random > output-train/dishwasher-classattention/results-train.log 30 | 31 | ################ FRIDGE 32 | 33 | # Experiment 1 34 | mkdir output-train/fridge 35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge --path output-train/fridge --train --epochs 5 --disable-random > output-train/fridge/results-train.log 36 | 37 | # Experiment 2 38 | mkdir output-train/fridge-norm 39 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-norm --path output-train/fridge-norm --train --epochs 5 --disable-random > output-train/fridge-norm/results-train.log 40 | 41 | # Experiment 4 42 | mkdir output-train/fridge-onlyregression 43 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression --path output-train/fridge-onlyregression --train --epochs 5 --disable-random > output-train/fridge-onlyregression/results-train.log 44 | 45 | # Experiment 5 46 | mkdir output-train/fridge-onlyregression-norm 47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression-norm --path output-train/fridge-onlyregression-norm --train --epochs 5 --disable-random > output-train/fridge-onlyregression-norm/results-train.log 48 | 49 | # Experiment 7 50 | mkdir output-train/fridge-classattention 51 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-classattention --path output-train/fridge-classattention --train --epochs 5 --disable-random > output-train/fridge-classattention/results-train.log 52 | 53 | ################ MICROWAVE 54 | 55 | # Experiment 1 56 | mkdir output-train/microwave 57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave --path output-train/microwave --train --epochs 5 --disable-random > output-train/microwave/results-train.log 58 | 59 | # Experiment 2 60 | mkdir output-train/microwave-norm 61 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-norm --path output-train/microwave-norm --train --epochs 5 --disable-random > output-train/microwave-norm/results-train.log 62 | 63 | # Experiment 4 64 | mkdir output-train/microwave-onlyregression 65 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression --path output-train/microwave-onlyregression --train --epochs 5 --disable-random > output-train/microwave-onlyregression/results-train.log 66 | 67 | # Experiment 5 68 | mkdir output-train/microwave-onlyregression-norm 69 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression-norm --path output-train/microwave-onlyregression-norm --train --epochs 5 --disable-random > output-train/microwave-onlyregression-norm/results-train.log 70 | 71 | # Experiment 7 72 | mkdir output-train/microwave-classattention 73 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-classattention --path output-train/microwave-classattention --train --epochs 5 --disable-random > output-train/microwave-classattention/results-train.log 74 | 75 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | 5 | from datetime import datetime 6 | from argparse import ArgumentParser 7 | 8 | import torch 9 | from ray import tune 10 | 11 | from utils import error, load_yaml 12 | from train import train_model 13 | from test import test_model 14 | 15 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 16 | 17 | 18 | def get_arguments(): 19 | """ 20 | Command line arguments parser 21 | --settings 22 | Path to settings yaml file where all disaggregation scenarios 23 | and model hyperparameters are described 24 | --appliance 25 | Name of the appliance to train or test 26 | --path 27 | Path to output folder where resuls are saved 28 | --train 29 | Set to train or unset to test 30 | --tune 31 | Set to enable automatic architecture hyperparameters tunning 32 | --epochs 33 | Number of epochs to train 34 | --disable-plot 35 | Disable sliding window plotting during train or test 36 | --disable-random 37 | Disable randomness in processing 38 | """ 39 | parser = ArgumentParser(description="nilm-project") 40 | parser.add_argument("--settings") 41 | parser.add_argument("--appliance") 42 | parser.add_argument("--path") 43 | parser.add_argument("--train", action="store_true") 44 | parser.add_argument("--tune", action="store_true") 45 | parser.add_argument("--epochs") 46 | parser.add_argument("--disable-plot", action="store_true") 47 | parser.add_argument("--disable-random", action="store_true") 48 | return parser.parse_args() 49 | 50 | 51 | def main(): 52 | """ 53 | Main task called from command line. Command line arguments 54 | and train or test is launched 55 | """ 56 | args = get_arguments() 57 | 58 | if args.disable_random: # Disable randomness 59 | torch.manual_seed(7) 60 | 61 | train = args.train 62 | tune_enabled = args.tune 63 | output = args.path 64 | plot_disabled = args.disable_plot 65 | 66 | # Load settings from YAML file where generic and appliance 67 | # specific details and model hyperparmeters are described 68 | settings = load_yaml(args.settings) 69 | appliance = args.appliance 70 | 71 | dataset = settings["dataset"] 72 | hparams = settings["hparams"] 73 | if args.epochs: 74 | hparams["epochs"] = int(args.epochs) 75 | 76 | appliance = settings["appliances"][appliance] 77 | 78 | datapath = dataset["path"] 79 | if train: 80 | # DO TRAIN 81 | 82 | print("==========================================") 83 | print(f"Training ONGOING") 84 | print("==========================================") 85 | 86 | if not tune_enabled: 87 | # If no automatic hyperparameter tunning is enabled 88 | # use network hyperparameter from settings and train 89 | # the model 90 | model, transform = train_model( 91 | datapath, 92 | output, 93 | appliance, 94 | hparams, 95 | doplot=not plot_disabled, 96 | reload=False, # Do not reload models by default 97 | ) 98 | else: 99 | # If automatic hyperparameter tunning is enabled 100 | # specify hyperparameters grid search and tune the model 101 | config = { 102 | "datapath": datapath, 103 | "output": output, 104 | "appliance": appliance, 105 | "hparams": hparams, 106 | "doplot": not plot_disabled, 107 | "reload": False, 108 | "tune": { 109 | "F": tune.grid_search([16, 32, 64]), 110 | "K": tune.grid_search([4, 8, 16]), 111 | "H": tune.grid_search([256, 512, 1024]), 112 | }, 113 | } 114 | analysis = tune.run( 115 | train_model_wrapper, # Use wrapper to adapt training model 116 | metric="val_loss", 117 | mode="min", 118 | num_samples=5, 119 | config=config, 120 | ) 121 | print("==========================================") 122 | print(f"Best hyperparameters") 123 | print(analysis.best_config) 124 | print("==========================================") 125 | 126 | print("==========================================") 127 | print(f"Training DONE") 128 | print("==========================================") 129 | else: 130 | # DO TEST 131 | 132 | print("==========================================") 133 | print(f"Testing ONGOING") 134 | print("==========================================") 135 | test_model(datapath, output, appliance, hparams, doplot=not plot_disabled) 136 | print("==========================================") 137 | print(f"Testing DONE") 138 | print("==========================================") 139 | 140 | 141 | if __name__ == "__main__": 142 | main() 143 | -------------------------------------------------------------------------------- /run-test.sh: -------------------------------------------------------------------------------- 1 | # 2 | # TESTING LAUNCHER 3 | # Test each of the appliances and models analyzed in the project and described in settings.yaml 4 | # See documentation describing each of the appliance analyzed 5 | # See documentation describing each of the model architectures evaluated 6 | 7 | mkdir output-test 8 | 9 | ############### DISHWASHER 10 | 11 | # Experiment 1 12 | mkdir output-test/dishwasher 13 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher --path output-test/dishwasher --epochs 1 --disable-random > output-test/dishwasher/results-test.log 14 | 15 | # Experiment 2 16 | mkdir output-test/dishwasher-norm 17 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-norm --path output-test/dishwasher-norm --epochs 1 --disable-random > output-test/dishwasher-norm/results-test.log 18 | 19 | # Experiment 3 20 | mkdir output-test/dishwasher-norm-trainnorm 21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-norm --path output-test/dishwasher-norm-trainnorm --epochs 1 --disable-random > output-test/dishwasher-norm-trainnorm/results-test.log 22 | 23 | # Experiment 4 24 | mkdir output-test/dishwasher-onlyregression 25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression --path output-test/dishwasher-onlyregression --epochs 1 --disable-random > output-test/dishwasher-onlyregression/results-test.log 26 | 27 | # Experiment 5 28 | mkdir output-test/dishwasher-onlyregression-norm 29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression-norm --path output-test/dishwasher-onlyregression-norm --epochs 1 --disable-random > output-test/dishwasher-onlyregression-norm/results-test.log 30 | 31 | # Experiment 5 32 | mkdir output-test/dishwasher-onlyregression-norm-trainnorm 33 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression-norm --path output-test/dishwasher-onlyregression-norm-trainnorm --epochs 1 --disable-random > output-test/dishwasher-onlyregression-norm-trainnorm/results-test.log 34 | 35 | # Experiment 7 36 | mkdir output-test/dishwasher-classattention 37 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-classattention --path output-test/dishwasher-classattention --epochs 1 --disable-random > output-test/dishwasher-classattention/results-test.log 38 | 39 | ################ FRIDGE 40 | 41 | # Experiment 1 42 | mkdir output-test/fridge 43 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge --path output-test/fridge --epochs 1 --disable-random > output-test/fridge/results-test.log 44 | 45 | # Experiment 2 46 | mkdir output-test/fridge-norm 47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-norm --path output-test/fridge-norm --epochs 1 --disable-random > output-test/fridge-norm/results-test.log 48 | 49 | # Experiment 3 50 | mkdir output-test/fridge-norm-trainnorm 51 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-norm --path output-test/fridge-norm-trainnorm --epochs 1 --disable-random > output-test/fridge-norm-trainnorm/results-test.log 52 | 53 | # Experiment 4 54 | mkdir output-test/fridge-onlyregression 55 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression --path output-test/fridge-onlyregression --epochs 1 --disable-random > output-test/fridge-onlyregression/results-test.log 56 | 57 | # Experiment 5 58 | mkdir output-test/fridge-onlyregression-norm 59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression-norm --path output-test/fridge-onlyregression-norm --epochs 1 --disable-random > output-test/fridge-onlyregression-norm/results-test.log 60 | 61 | # Experiment 6 62 | mkdir output-test/fridge-onlyregression-norm-trainnorm 63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression-norm --path output-test/fridge-onlyregression-norm-trainnorm --epochs 1 --disable-random > output-test/fridge-onlyregression-norm-trainnorm/results-test.log 64 | 65 | # Experiment 7 66 | mkdir output-test/fridge-classattention 67 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-classattention --path output-test/fridge-classattention --epochs 1 --disable-random > output-test/fridge-classattention/results-test.log 68 | 69 | ################# MICROWAVE 70 | 71 | # Experiment 1 72 | mkdir output-test/microwave 73 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave --path output-test/microwave --epochs 1 --disable-random > output-test/microwave/results-test.log 74 | 75 | # Experiment 2 76 | mkdir output-test/microwave-norm 77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-norm --path output-test/microwave-norm --epochs 1 --disable-random > output-test/microwave-norm/results-test.log 78 | 79 | # Experiment 3 80 | mkdir output-test/microwave-norm-trainnorm 81 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-norm --path output-test/microwave-norm-trainnorm --epochs 1 --disable-random > output-test/microwave-norm-trainnorm/results-test.log 82 | 83 | # Experiment 4 84 | mkdir output-test/microwave-onlyregression 85 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression --path output-test/microwave-onlyregression --epochs 1 --disable-random > output-test/microwave-onlyregression/results-test.log 86 | 87 | # Experiment 5 88 | mkdir output-test/microwave-onlyregression-norm 89 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression-norm --path output-test/microwave-onlyregression-norm --epochs 1 --disable-random > output-test/microwave-onlyregression-norm/results-test.log 90 | 91 | # Experiment 6 92 | mkdir output-test/microwave-onlyregression-norm-trainnorm 93 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression-norm --path output-test/microwave-onlyregression-norm-trainnorm --epochs 1 --disable-random > output-test/microwave-onlyregression-norm-trainnorm/results-test.log 94 | 95 | # Experiment 7 96 | mkdir output-test/microwave-classattention 97 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-classattention --path output-test/microwave-classattention --epochs 1 --disable-random > output-test/microwave-classattention/results-test.log 98 | -------------------------------------------------------------------------------- /dataset_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from datetime import datetime 3 | import pandas as pd 4 | 5 | import dataset 6 | 7 | 8 | def to_dt(x): 9 | return datetime.strptime(x, "%Y-%m-%d %H:%M:%S") 10 | 11 | 12 | class TestLoader(unittest.TestCase): 13 | 14 | """ 15 | Data preprocessing to impute small gaps and ignore larg gaps 16 | Ignore non 100% coverage days 17 | 18 | Extract from "Subtask Gated Networks for Non-Intrusive Load Monitoring" 19 | 20 | For REDD dataset,we preprocessed with the following procedure 21 | to handle missing values. First, we split the sequence so that the 22 | duration of missing values in subsequence is less than 20 seconds. 23 | Second,we filled the missing values in each subsequence by 24 | thebackward filling method. Finally, we only used the subsequences 25 | with more than one-day duration 26 | 27 | small gaps = ts < 20 seconds 28 | large gaps = ts > 20 seconds 29 | """ 30 | 31 | def is_equal(self, df, index, values): 32 | return self.assertTrue( 33 | (df.index == index).all() and (df.values == values).all() 34 | ) 35 | 36 | def setup_ts(self, name, start, end, gaps, freq="1S"): 37 | index = pd.date_range(start, end, freq=freq) 38 | values = range(len(index)) 39 | df = pd.DataFrame({name: values}, index) 40 | for gap_start, gap_end in gaps: 41 | mask = (df.index >= to_dt(gap_start)) & (df.index < to_dt(gap_end)) 42 | 43 | df = df[~mask] 44 | return df 45 | 46 | def setup_scenario(self, start, end, gaps={"mains": [], "appliance1": []}): 47 | freq = {"mains": "1S", "appliance1": "3S"} 48 | 49 | mains = self.setup_ts( 50 | "mains", to_dt(start), to_dt(end), gaps["mains"], freq=freq["mains"] 51 | ) 52 | appliance1 = self.setup_ts( 53 | "appliance1", 54 | to_dt(start), 55 | to_dt(end), 56 | gaps["appliance1"], 57 | freq=freq["appliance1"], 58 | ) 59 | return (mains, appliance1) 60 | 61 | def test_aligned_nomissing(self): 62 | """ 63 | Scenario: 64 | mains: 65 | coverage: 100% 66 | sampling period: 1 sec 67 | appliances: 68 | number of appliances: 1 69 | coverage: 100% 70 | sampling period: 3sec 71 | alignment: 72 | both series are aligned 73 | """ 74 | start = "2020-01-01 00:00:00" 75 | end = "2020-01-01 00:00:09" 76 | 77 | mains, appliance1 = self.setup_scenario(start, end) 78 | df = dataset.NilmDataset.align(mains, appliance1) 79 | expected_index = appliance1.index 80 | expected_values = [[0, 0], [3, 1], [6, 2], [9, 3]] 81 | self.is_equal(df, expected_index, expected_values) 82 | 83 | def test_aligned_nomissing_bfill(self): 84 | """ 85 | Scenario: 86 | mains: 87 | coverage: 100% 88 | sampling period: 1 sec 89 | appliances: 90 | number of appliances: 1 91 | coverage: 100% 92 | sampling period: 3sec 93 | alignment: 94 | both series are aligned 95 | """ 96 | start = "2020-01-01 00:00:00" 97 | end = "2020-01-01 00:00:09" 98 | 99 | mains, appliance1 = self.setup_scenario(start, end) 100 | df = dataset.NilmDataset.align(mains, appliance1, bfill=True) 101 | 102 | expected_index = mains.index 103 | expected_values = [ 104 | [0, 0], 105 | [1, 1], 106 | [2, 1], 107 | [3, 1], 108 | [4, 2], 109 | [5, 2], 110 | [6, 2], 111 | [7, 3], 112 | [8, 3], 113 | [9, 3], 114 | ] 115 | self.is_equal(df, expected_index, expected_values) 116 | 117 | def test_mains_small_missing(self): 118 | """ 119 | Scenario: 120 | mains: 121 | coverage: 2 x small gap in sequence 122 | sampling period: 1 sec 123 | appliances: 124 | number of appliances: 1 125 | coverage: 100% 126 | sampling period: 3sec 127 | alignment: 128 | both series are aligned 129 | """ 130 | 131 | start = "2020-01-01 00:00:00" 132 | end = "2020-01-01 00:00:09" 133 | gaps = { 134 | "mains": [], 135 | "appliance1": [("2020-01-01 00:00:03", "2020-01-01 00:00:04")], 136 | } 137 | mains, appliance1 = self.setup_scenario(start, end, gaps) 138 | data = dataset.NilmDataset.impute(appliance1, gapsize=3, subseqsize=1) 139 | self.assertEqual(len(data), 1) 140 | 141 | expected_index = pd.date_range( 142 | appliance1.index[0], appliance1.index[-1], freq="3S" 143 | ) 144 | expected_values = [[0], [2], [2], [3]] 145 | self.is_equal(data[0], expected_index, expected_values) 146 | 147 | def test_mains_large_missing(self): 148 | """ 149 | Scenario: 150 | mains: 151 | coverage: 2 x large gaps in sequence 152 | 1 x intraday gap 153 | 1 x interday gap 154 | sampling period: 1 sec 155 | appliances: 156 | coverage: 100% 157 | number of appliances: 1 158 | sampling period: 3sec 159 | alignment: 160 | both series are aligned 161 | """ 162 | 163 | start = "2020-01-01 00:00:00" 164 | end = "2020-01-01 00:01:00" 165 | gaps = { 166 | "mains": [], 167 | "appliance1": [("2020-01-01 00:00:25", "2020-01-01 00:00:45")], 168 | } 169 | mains, appliance1 = self.setup_scenario(start, end, gaps) 170 | data = dataset.NilmDataset.impute(appliance1, gapsize=2, subseqsize=8) 171 | self.assertEqual(len(data), 2) 172 | expected_index = pd.date_range( 173 | to_dt("2020-01-01 00:00:00"), to_dt("2020-01-01 00:00:24"), freq="3S" 174 | ) 175 | expected_values = [[0], [1], [2], [3], [4], [5], [6], [7], [8]] 176 | self.is_equal(data[0], expected_index, expected_values) 177 | 178 | expected_index = pd.date_range( 179 | to_dt("2020-01-01 00:00:36"), to_dt("2020-01-01 00:01:00"), freq="3S" 180 | ) 181 | 182 | expected_values = [ 183 | [15], # It's not the perfect imputation due non-aligned 3s (bfill) 184 | [15], # It's not the perfect imputation due non-aligned 3s (bfill) 185 | [15], 186 | [15], 187 | [16], 188 | [17], 189 | [18], 190 | [19], 191 | [20], 192 | ] 193 | self.is_equal(data[1], expected_index, expected_values) 194 | 195 | 196 | if __name__ == "__main__": 197 | unittest.main() 198 | -------------------------------------------------------------------------------- /redd.yaml: -------------------------------------------------------------------------------- 1 | name: REDD 2 | path: low_freq 3 | buildings: 4 | - name: building1 5 | path: house_1 6 | mains: 7 | channels: [1,2] 8 | unknown: 9 | channels: [20] 10 | appliances: 11 | - name: oven 12 | id: oven 13 | activity: cooking 14 | channels: [3,4] 15 | - name: refrigerator 16 | id: refrigerator 17 | activity: cooking 18 | channels: [5] 19 | - name: dishwasher 20 | id: dishwasher 21 | activity: cleaning 22 | channels: [6] 23 | - name: kitchen_outlets 24 | id: kitchen_outlets 25 | activity: cooking 26 | channels: [7,8,15,16] 27 | - name: lighting 28 | id: lighting 29 | activity: lighting 30 | channels: [9,17,18] 31 | - name: washer_dryer 32 | id: washer_dryer 33 | activity: cleaning 34 | channels: [10,20] 35 | - name: microwave 36 | id: microwave 37 | activity: cooking 38 | channels: [11] 39 | - name: bathroom_outlet 40 | id: bathroom_gfi 41 | activity: selfcare 42 | channels: [12] 43 | - name: electric_heater 44 | id: electric_heat 45 | activity: heating 46 | channels: [13] 47 | - name: stove 48 | id: stove 49 | activity: cooking 50 | channels: [14] 51 | - name: building2 52 | path: house_2 53 | mains: 54 | channels: [1,2] 55 | unknown: 56 | channels: [] 57 | appliances: 58 | - name: refrigerator 59 | id: refrigerator 60 | activity: cooking 61 | channels: [9] 62 | - name: dishwasher 63 | id: dishwasher 64 | activity: cleaning 65 | channels: [10] 66 | - name: kitchen_outlet 67 | id: kitchen_outlets 68 | activity: cooking 69 | channels: [3,8] 70 | - name: lighting 71 | id: lighting 72 | activity: lighting 73 | channels: [4] 74 | - name: washer_dryer 75 | id: washer_dryer 76 | activity: cleaning 77 | channels: [7] 78 | - name: microwave 79 | id: microwave 80 | activity: cooking 81 | channels: [6] 82 | - name: stove 83 | id: stove 84 | activity: cooking 85 | channels: [5] 86 | - name: disposal 87 | id: disposal 88 | activity: cooking 89 | channels: [11] 90 | - name: building3 91 | path: house_3 92 | mains: 93 | channels: [1,2] 94 | unknown: 95 | channels: [3,4,12] 96 | appliances: 97 | - name: refrigerator 98 | id: refrigerator 99 | activity: cooking 100 | channels: [7] 101 | - name: dishwasher 102 | id: dishwasher 103 | activity: cleaning 104 | channels: [9] 105 | - name: kitchen_outlet 106 | id: kitchen_outlets 107 | activity: cooking 108 | channels: [21,22] 109 | - name: lighting 110 | id: lighting 111 | activity: lighting 112 | channels: [5,11,15,17,19] 113 | - name: washer_dryer 114 | id: washer_dryer 115 | activity: cleaning 116 | channels: [13,14] 117 | - name: microwave 118 | id: microwave 119 | activity: cooking 120 | channels: [16] 121 | - name: bathroom_outlet 122 | id: bathroom_gfi 123 | activity: selfcare 124 | channels: [20] 125 | - name: disposal 126 | id: disposal 127 | activity: cooking 128 | channels: [8] 129 | - name: electronics 130 | id: electronics 131 | activity: consumer 132 | channels: [6] 133 | - name: furance 134 | id: furance 135 | activity: cooking 136 | channels: [10] 137 | - name: smoke_alarms 138 | id: smoke_alarms 139 | activity: security 140 | channels: [18] 141 | - name: building4 142 | path: house_4 143 | mains: 144 | channels: [1,2] 145 | unknown: 146 | channels: [6,12] 147 | appliances: 148 | - name: dishwasher 149 | id: dishwasher 150 | activity: cleaning 151 | channels: [15] 152 | - name: kitchen_outlet 153 | id: kitchen_outlets 154 | activity: cooking 155 | channels: [5,14] 156 | - name: lighting 157 | id: lighting 158 | activity: lighting 159 | channels: [3,13,18,19] 160 | - name: washer_dryer 161 | id: washer_dryer 162 | activity: cleaning 163 | channels: [7] 164 | - name: bathroom_outlet 165 | id: bathroom_gfi 166 | activity: selfcare 167 | channels: [16,17] 168 | - name: stove 169 | id: stove 170 | activity: cooking 171 | channels: [8] 172 | - name: furance 173 | id: furance 174 | activity: cooking 175 | channels: [4] 176 | - name: smoke_alarms 177 | id: smoke_alarms 178 | activity: security 179 | channels: [12] 180 | - name: air_conditioner 181 | id: air_conditioning 182 | activity: hvac 183 | channels: [9,10,20] 184 | - name: building5 185 | path: house_5 186 | mains: 187 | channels: [1,2] 188 | unknown: 189 | channels: [5,7,10,11,15,26] 190 | appliances: 191 | - name: refrigerator 192 | id: refrigerator 193 | activity: cooking 194 | channels: [18] 195 | - name: dishwasher 196 | id: dishwasher 197 | activity: cleaning 198 | channels: [20] 199 | - name: kitchen_outlet 200 | id: kitchen_outlets 201 | activity: cooking 202 | channels: [24,25] 203 | - name: lighting 204 | id: lighting 205 | activity: lighting 206 | channels: [4,14,17,19,23] 207 | - name: washer_dryer 208 | id: washer_dryer 209 | activity: cleaning 210 | channels: [8,9] 211 | - name: microwave 212 | id: microwave 213 | activity: cooking 214 | channels: [3] 215 | - name: bathroom_outlet 216 | id: bathroom_gfi 217 | activity: selfcare 218 | channels: [16] 219 | - name: electric_heater 220 | id: electric_heat 221 | activity: hvac 222 | channels: [12,13] 223 | - name: electronics 224 | id: electronics 225 | activity: 226 | channels: [22] 227 | - name: disposal 228 | id: disposal 229 | activity: cooking 230 | channels: [21] 231 | - name: furance 232 | id: furance 233 | activity: cooking 234 | channels: [6] 235 | - name: building6 236 | path: house_6 237 | mains: 238 | channels: [1,2] 239 | unknown: 240 | channels: [10,11] 241 | appliances: 242 | - name: refrigerator 243 | id: refrigerator 244 | activity: cooking 245 | channels: [8] 246 | - name: dishwasher 247 | id: dishwasher 248 | activity: cleaning 249 | channels: [9] 250 | - name: kitchen_outlet 251 | id: kitchen_outlets 252 | activity: cooking 253 | channels: [3,13] 254 | - name: lighting 255 | id: lighting 256 | activity: lighting 257 | channels: [14] 258 | - name: washer_dryer 259 | id: washer_dryer 260 | activity: cleaning 261 | channels: [4] 262 | - name: bathroom_outlet 263 | id: bathroom_gfi 264 | activity: selfcare 265 | channels: [7] 266 | - name: electric_heater 267 | id: electric_heat 268 | activity: hvac 269 | channels: [12] 270 | - name: stove 271 | id: stove 272 | activity: cooking 273 | channels: [5] 274 | - name: electronics 275 | id: electronics 276 | activity: 277 | channels: [6] 278 | - name: air_conditioner 279 | id: air_conditioning 280 | activity: hvac 281 | channels: [15,16,17] 282 | -------------------------------------------------------------------------------- /settings.yaml: -------------------------------------------------------------------------------- 1 | dataset: 2 | path: /tmp/redd 3 | hparams: 4 | lr: 0.001 5 | batch_size: 64 6 | epochs: 5 7 | train_size: 0.7 8 | test_size: 0.3 9 | 10 | appliances: 11 | dishwasher: 12 | name: dish washer 13 | filename: dishwasher.th 14 | buildings: 15 | train: 16 | - redd_house2 17 | - redd_house3 18 | - redd_house4 19 | - redd_house5 20 | - redd_house6 21 | test: 22 | - redd_house1 23 | active_threshold: 15.0 24 | active_ratio: 0.5 25 | active_oversample: 5 26 | normalization: False 27 | model: ModelPaperBackward 28 | hparams: 29 | L: 2304 30 | F: 32 31 | K: 16 32 | H: 512 33 | dishwasher-norm: 34 | name: dish washer 35 | filename: dishwasher.th 36 | buildings: 37 | train: 38 | - redd_house2 39 | - redd_house3 40 | - redd_house4 41 | - redd_house5 42 | - redd_house6 43 | test: 44 | - redd_house1 45 | active_threshold: 150.0 46 | active_ratio: 0.5 47 | active_oversample: 5 48 | normalization: True 49 | model: ModelPaper 50 | hparams: 51 | L: 1500 52 | F: 32 53 | K: 16 54 | H: 512 55 | dishwasher-onlyregression: 56 | name: dish washer 57 | filename: dishwasher.th 58 | buildings: 59 | train: 60 | - redd_house2 61 | - redd_house3 62 | - redd_house4 63 | - redd_house5 64 | - redd_house6 65 | test: 66 | - redd_house1 67 | active_threshold: 50.0 68 | active_ratio: 0.5 69 | active_oversample: 5 70 | normalization: False 71 | model: ModelOnlyRegression 72 | hparams: 73 | L: 2304 74 | F: 32 75 | K: 16 76 | H: 512 77 | dishwasher-onlyregression-norm: 78 | name: dish washer 79 | filename: dishwasher.th 80 | buildings: 81 | train: 82 | - redd_house2 83 | - redd_house3 84 | - redd_house4 85 | - redd_house5 86 | - redd_house6 87 | test: 88 | - redd_house1 89 | active_threshold: 50.0 90 | active_ratio: 0.5 91 | active_oversample: 5 92 | normalization: True 93 | model: ModelOnlyRegression 94 | hparams: 95 | L: 2304 96 | F: 32 97 | K: 16 98 | H: 512 99 | dishwasher-classattention: 100 | name: dish washer 101 | filename: dishwasher.th 102 | buildings: 103 | train: 104 | - redd_house2 105 | - redd_house3 106 | - redd_house4 107 | - redd_house5 108 | - redd_house6 109 | test: 110 | - redd_house1 111 | active_threshold: 15.0 112 | active_ratio: 0.5 113 | active_oversample: 5 114 | normalization: False 115 | model: ModelClassAttention 116 | hparams: 117 | L: 2304 118 | F: 32 119 | K: 16 120 | H: 512 121 | fridge: 122 | name: fridge 123 | filename: fridge.th 124 | 125 | buildings: 126 | train: 127 | - redd_house2 128 | - redd_house3 129 | - redd_house5 130 | - redd_house6 131 | test: 132 | - redd_house1 133 | active_threshold: 15.0 134 | active_ratio: null 135 | normalization: False 136 | model: ModelPaperBackward 137 | hparams: 138 | L: 496 139 | F: 32 140 | K: 8 141 | H: 1024 142 | fridge-norm: 143 | name: fridge 144 | filename: fridge.th 145 | 146 | buildings: 147 | train: 148 | - redd_house2 149 | - redd_house3 150 | - redd_house5 151 | - redd_house6 152 | test: 153 | - redd_house1 154 | active_threshold: 15.0 155 | active_ratio: null 156 | normalization: True 157 | model: ModelPaper 158 | hparams: 159 | L: 496 160 | F: 32 161 | K: 8 162 | H: 1024 163 | fridge-onlyregression: 164 | name: fridge 165 | filename: fridge.th 166 | 167 | buildings: 168 | train: 169 | - redd_house2 170 | - redd_house3 171 | - redd_house5 172 | - redd_house6 173 | test: 174 | - redd_house1 175 | active_threshold: 15.0 176 | active_ratio: null 177 | normalization: False 178 | onlyregression: True 179 | model: ModelOnlyRegression 180 | hparams: 181 | L: 496 182 | F: 32 183 | K: 8 184 | H: 1024 185 | fridge-onlyregression-norm: 186 | name: fridge 187 | filename: fridge.th 188 | 189 | buildings: 190 | train: 191 | - redd_house2 192 | - redd_house3 193 | - redd_house5 194 | - redd_house6 195 | test: 196 | - redd_house1 197 | active_threshold: 15.0 198 | active_ratio: null 199 | normalization: True 200 | model: ModelOnlyRegression 201 | hparams: 202 | L: 496 203 | F: 32 204 | K: 8 205 | H: 1024 206 | fridge-classattention: 207 | name: fridge 208 | filename: fridge.th 209 | 210 | buildings: 211 | train: 212 | - redd_house2 213 | - redd_house3 214 | - redd_house5 215 | - redd_house6 216 | test: 217 | - redd_house1 218 | active_threshold: 15.0 219 | active_ratio: null 220 | normalization: False 221 | model: ModelClassAttention 222 | hparams: 223 | L: 496 224 | F: 32 225 | K: 8 226 | H: 1024 227 | microwave: 228 | name: microwave 229 | filename: microwave.th 230 | buildings: 231 | train: 232 | - redd_house2 233 | - redd_house3 234 | - redd_house5 235 | test: 236 | - redd_house1 237 | active_threshold: 15.0 238 | active_ratio: 0.5 239 | active_oversample: 5 240 | normalization: False 241 | model: ModelPaperBackward 242 | hparams: 243 | L: 128 244 | F: 16 245 | K: 8 246 | H: 1024 247 | microwave-norm: 248 | name: microwave 249 | filename: microwave.th 250 | buildings: 251 | train: 252 | - redd_house2 253 | - redd_house3 254 | - redd_house5 255 | test: 256 | - redd_house1 257 | active_threshold: 100.0 258 | active_ratio: 0.5 259 | active_oversample: 5 260 | normalization: True 261 | model: ModelPaper 262 | hparams: 263 | L: 128 264 | F: 16 265 | K: 8 266 | H: 1024 267 | microwave-onlyregression: 268 | name: microwave 269 | filename: microwave.th 270 | buildings: 271 | train: 272 | - redd_house2 273 | - redd_house3 274 | - redd_house5 275 | test: 276 | - redd_house1 277 | active_threshold: 100.0 278 | active_ratio: 0.5 279 | active_oversample: 5 280 | normalization: False 281 | model: ModelOnlyRegression 282 | hparams: 283 | L: 128 284 | F: 16 285 | K: 8 286 | H: 1024 287 | microwave-onlyregression-norm: 288 | name: microwave 289 | filename: microwave.th 290 | buildings: 291 | train: 292 | - redd_house2 293 | - redd_house3 294 | - redd_house5 295 | test: 296 | - redd_house1 297 | active_threshold: 100.0 298 | active_ratio: 0.5 299 | active_oversample: 5 300 | normalization: True 301 | model: ModelOnlyRegression 302 | hparams: 303 | L: 128 304 | F: 16 305 | K: 8 306 | H: 1024 307 | microwave-classattention: 308 | name: microwave 309 | filename: microwave.th 310 | buildings: 311 | train: 312 | - redd_house2 313 | - redd_house3 314 | - redd_house5 315 | test: 316 | - redd_house1 317 | active_threshold: 15.0 318 | active_ratio: 0.5 319 | active_oversample: 5 320 | normalization: False 321 | model: ModelClassAttention 322 | hparams: 323 | L: 128 324 | F: 16 325 | K: 8 326 | H: 1024 327 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import sys 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | 13 | import model as nilmmodel 14 | import matplotlib.pyplot as plt 15 | 16 | from dataset import InMemoryKoreaDataset 17 | from utils import error 18 | from utils import save_model, load_model, save_dataset 19 | from utils import plot_window 20 | 21 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 22 | 23 | 24 | def test_single( 25 | model, test_loader, transform, appliance, batch_size=64, plotfilename=None 26 | ): 27 | """ 28 | Test specific pretrained model and appliance on test dataset 29 | """ 30 | 31 | errs, losses = [], [] 32 | 33 | L = appliance["hparams"]["L"] 34 | window_index = np.array(range(L)) 35 | 36 | # The disaggregation phase, also carried out with a sliding window 37 | # over the aggregated signal with hop size equal to 1 sample, 38 | # generates overlapped windows of the disaggregated signal. 39 | # reconstruct the overlapped windows by an means of a median 40 | # filter on the overlapped portions. 41 | 42 | # Use buffer to register overlapped result and apply median filter 43 | overlapped_y = {} 44 | overlapped_yhat = {} 45 | 46 | with torch.no_grad(): 47 | model.eval() 48 | for idx, (x, y, clas) in enumerate(test_loader): 49 | # model input data 50 | x = torch.unsqueeze(x, dim=1) 51 | 52 | x, y, clas = x.to(device), y.to(device), clas.to(device) 53 | yhat, reghat, alphas, clashat = model(x) 54 | 55 | # Force loss to 0 in test in order to reuse current implementation 56 | # but not used in testing analysis 57 | loss = 0.0 58 | 59 | # Calculate and use error to evaluate prediction 60 | err = error(y, yhat) 61 | 62 | err_ = err.item() 63 | losses.append(loss) 64 | errs.append(err_) 65 | 66 | x = x.cpu() 67 | y = y.cpu() 68 | yhat = yhat.cpu() 69 | if transform: 70 | # If transform enabled undo standardization in order to 71 | # prope evaluate error (paper benchmarking) and visualization 72 | if ( 73 | transform["sample_mean"] 74 | and transform["sample_std"] 75 | and transform["target_mean"] 76 | and transform["target_std"] 77 | ): 78 | 79 | # Undo standarization 80 | x = (x * transform["sample_std"]) + transform["sample_mean"] 81 | y = (y * transform["target_std"]) + transform["target_mean"] 82 | yhat = (yhat * transform["target_std"]) + transform["target_mean"] 83 | 84 | if idx % 100 == 0: 85 | # Plotting sliding window samples in order to debug or 86 | # keep track of current testing process 87 | print(f"test batch={idx+1} loss={loss:.2f} err={err:.2f}") 88 | if plotfilename: 89 | filename = plotfilename + f".{idx}.attention.png" 90 | reghat = reghat.cpu() 91 | if transform: 92 | # If transform enabled undo standardization in order 93 | # to proper visualize regression branch prediction 94 | if transform["target_std"] and transform["target_mean"]: 95 | reghat = (reghat * transform["sample_std"]) + transform[ 96 | "sample_mean" 97 | ] 98 | # Tricky workaround to rescale regression output and make 99 | # it easier to visualize and interpret results 100 | reghat = reghat / 10.0 101 | plot_window( 102 | x, 103 | y, 104 | yhat, 105 | reghat, 106 | clashat.cpu(), 107 | alphas.cpu(), 108 | loss, 109 | err_, 110 | model.classification_enabled, 111 | filename, 112 | ) 113 | 114 | y = y.numpy() 115 | yhat = yhat.numpy() 116 | 117 | # Update overlapping windows buffer to calculate median filter 118 | for offset, yy, yyhat in zip(range(batch_size), y, yhat): 119 | index = (idx * batch_size) + window_index + offset 120 | 121 | for index_, yy_, yyhat_ in zip(index, yy, yyhat): 122 | overlapped_y[index_] = yy_ 123 | overlapped_yhat.setdefault(index_, []) 124 | overlapped_yhat[index_].append(yyhat_) 125 | 126 | if len(overlapped_yhat[index_]) == L: 127 | # Calculate median if all overlapped windows in specfic 128 | # index are already available. Done prevent memory 129 | # overrun 130 | overlapped_yhat[index_] = np.median( 131 | np.array(overlapped_yhat[index_]) 132 | ) 133 | # Final buffers with sigle-point single-prediction after median filter 134 | final_y = [] 135 | final_yhat = [] 136 | index = sorted(list(overlapped_yhat.keys())) 137 | 138 | # Calculate median if all overlapped windows in specfic 139 | # index are already available. Done prevent memory 140 | # overrun 141 | for i in index: 142 | if isinstance(overlapped_yhat[i], list): 143 | overlapped_yhat[i] = np.median(np.array(overlapped_yhat[i])) 144 | 145 | # Update final prediction buffers 146 | final_yhat.append(overlapped_yhat[i]) 147 | final_y.append(overlapped_y[i]) 148 | 149 | final_y = np.array(final_y) 150 | final_yhat = np.array(final_yhat) 151 | 152 | filename = plotfilename + f".result.csv" 153 | result = pd.DataFrame({"y": final_y, "yhat": final_yhat}) 154 | result.to_csv(filename, index=None, sep=";") 155 | 156 | # Calculate MAE over single-point single-prediction time series 157 | return np.nanmean(np.abs(final_yhat - final_y)) 158 | 159 | 160 | def test_model(datapath, output, appliance, hparams, doplot=None): 161 | """ 162 | Test specific pretrained model and appliance on testing 163 | dataset 164 | """ 165 | 166 | # Load appliance specifications and model hyperparameters 167 | # from settings 168 | 169 | buildings = appliance["buildings"]["test"] 170 | name = appliance["name"] 171 | 172 | batch_size = hparams["batch_size"] 173 | params = appliance["hparams"] 174 | 175 | transform_enabled = appliance.get("normalization", False) 176 | model_type = appliance.get("model", "ModelPaper") 177 | 178 | # Initialize model network architecture using specified 179 | # hyperaparameters in settings 180 | model_type = getattr(nilmmodel, model_type) 181 | model = model_type(params["L"], params["F"], params["K"], params["H"]) 182 | model = model.to(device) 183 | 184 | # Load pretrained mofrl from file 185 | name = appliance["name"] 186 | filename = os.path.join(output, appliance["filename"]) 187 | transform, record_err = load_model(filename, model) 188 | 189 | if not transform_enabled: 190 | transform = None 191 | 192 | filename = os.path.join(output, appliance["filename"]) 193 | plotfilename = None 194 | if doplot: 195 | plotfilename = filename 196 | 197 | # Initialize active settings described in documentation. 198 | # Used to identify whether an appliance is classified as active 199 | # Used to enableoversampling to fix sliding windows active/inactive 200 | # imbalance 201 | active_threshold = appliance.get("active_threshold", 0.15) 202 | active_ratio = appliance.get("active_ratio", 0.5) 203 | active_oversample = appliance.get("active_oversample", 2) 204 | 205 | # Load test dataset 206 | my_dataset = InMemoryKoreaDataset( 207 | datapath, 208 | buildings, 209 | name, 210 | windowsize=params["L"], 211 | active_threshold=False, 212 | active_ratio=False, 213 | active_oversample=False, 214 | transform_enabled=transform_enabled, 215 | transform=None, # Using test standarization 216 | # NOTE: Enable this to use training standarization 217 | # transform=transform, 218 | ) 219 | 220 | # Load dataset transformation parameters from training 221 | transform = { 222 | "sample_mean": my_dataset.sample_mean, 223 | "sample_std": my_dataset.sample_std, 224 | "target_mean": my_dataset.target_mean, 225 | "target_std": my_dataset.target_std, 226 | } 227 | 228 | # Initialized test data loader using settings batch size 229 | test_loader = torch.utils.data.DataLoader( 230 | my_dataset, batch_size=hparams["batch_size"] 231 | ) 232 | 233 | # Launch testing on test dataset 234 | output = os.path.join(output, f"{name}") 235 | err = test_single( 236 | model, test_loader, transform, appliance, batch_size, plotfilename 237 | ) 238 | print(f"Test err={err:.2f}") 239 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import sys 4 | 5 | sys.path.append("/content/gdrive/MyDrive/ColabNotebooks") 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | class AdditiveAttention(torch.nn.Module): 12 | """ 13 | Attention mechanism for the models 14 | """ 15 | def __init__(self, dim=5): 16 | super().__init__() 17 | 18 | self.dim = dim 19 | 20 | # Using paper notation (W, V) 21 | self.W = nn.Linear(self.dim, self.dim) 22 | self.V = nn.Linear(self.dim, 1, bias=False) 23 | 24 | def forward(self, h): 25 | # Paper attenation mechanism 26 | # et = V*tanh(W*ht + b) 27 | # αt = softmax(et) 28 | # c = sum(αt*ht) 29 | layer_1 = self.W(h) 30 | layer_1 = torch.tanh(layer_1) 31 | layer_2 = self.V(layer_1) 32 | alphas = F.softmax(layer_2, dim=1) 33 | c = h * alphas # [batch, l, 2*h] x [batch, l, 1] = [batch, l, 2*h] 34 | output = torch.sum( 35 | c, 1 36 | ) # sum elements in dimension 1 (seq_length) [batch, 2*h] 37 | return output, alphas 38 | 39 | class AdditiveAttentionBackwards(AdditiveAttention): 40 | """ 41 | Attention mechanism for the models 42 | NOTE: Nearly same implementation as main additive attention 43 | but used in order to make it backwards compatible as some 44 | models have already been trained using this mode. Otherwise 45 | fails loading the model due non-matching architecture 46 | """ 47 | 48 | def forward(self, h): 49 | output, alphas = super().forward(h) 50 | return output 51 | 52 | class ModelPaper(nn.Module): 53 | """ 54 | Implementation of the network architecture described 55 | in the paper 56 | Both regression and classification branches enabled 57 | """ 58 | def __init__(self, l, filters, kernel, hunits): 59 | super().__init__() 60 | 61 | self.regression_enabled = True 62 | self.classification_enabled = True 63 | 64 | self.conv = nn.Sequential( 65 | nn.Conv1d(1, filters, kernel, padding=kernel // 2), 66 | nn.ReLU(), 67 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 68 | nn.ReLU(), 69 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 70 | nn.ReLU(), 71 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 72 | nn.ReLU(), 73 | ) 74 | 75 | self.lstm = nn.LSTM( 76 | input_size=filters, 77 | hidden_size=hunits, 78 | num_layers=1, 79 | bidirectional=True, 80 | batch_first=True, 81 | ) 82 | # input [batch, l-x(from convs), filters] 83 | # output [batch, l-x(de les convs), 2*hunits] 84 | 85 | self.attention = AdditiveAttention(dim=(2 * hunits)) 86 | self.regression = nn.Sequential( 87 | nn.Linear(2 * hunits, hunits), 88 | nn.ReLU(), 89 | nn.Linear(hunits, l) 90 | ) 91 | 92 | self.classification1 = nn.Sequential( 93 | nn.Conv1d(1, 10, 10, 1), 94 | nn.ReLU(), 95 | nn.Conv1d(10, 30, 8, 1), 96 | nn.ReLU(), 97 | nn.Conv1d(30, 40, 6, 1), 98 | nn.ReLU(), 99 | nn.Conv1d(40, 50, 5, 1), 100 | nn.ReLU(), 101 | nn.Conv1d(50, 50, 5, 1), 102 | nn.ReLU(), 103 | nn.Conv1d(50, 50, 5, 1), 104 | nn.ReLU(), 105 | ) # output --> [batch, 50, l-33] 106 | 107 | self.classification2 = nn.Sequential( 108 | nn.Flatten(start_dim=1) 109 | ) # flatten --> [batch, (l-33)*50] 110 | 111 | self.classification3 = nn.Sequential( 112 | nn.Linear((l - 33) * 50, 1024), 113 | nn.ReLU(), 114 | nn.Linear(1024, l), 115 | nn.Sigmoid() 116 | ) 117 | 118 | def forward(self, x): 119 | reg = self.conv(x) 120 | reg = reg.permute(0, 2, 1) 121 | output_lstm, (h_n, c_n) = self.lstm(reg) 122 | context, alphas = self.attention(output_lstm) 123 | reg = self.regression(context) 124 | 125 | clas1 = self.classification1(x) 126 | clas2 = self.classification2(clas1) 127 | clas = self.classification3(clas2) 128 | y = reg * clas 129 | return y, reg, alphas, clas 130 | 131 | class ModelPaperBackward(nn.Module): 132 | """ 133 | Implementation of the network architecture described 134 | in the paper 135 | NOTE: Nearly same implementation as ModelPaper 136 | but used in order to make it backwards compatible as some 137 | models have already been trained using this mode. Otherwise 138 | fails loading the model due non-matching architecture 139 | 140 | Both regression and classification branches enabled 141 | """ 142 | def __init__(self, l, filters, kernel, hunits): 143 | super().__init__() 144 | 145 | self.regression_enabled = True 146 | self.classification_enabled = True 147 | 148 | self.conv = nn.Sequential( 149 | nn.Conv1d(1, filters, kernel, padding=kernel // 2), 150 | nn.ReLU(), 151 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 152 | nn.ReLU(), 153 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 154 | nn.ReLU(), 155 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 156 | nn.ReLU(), 157 | ) 158 | 159 | self.lstm = nn.LSTM( 160 | input_size=filters, 161 | hidden_size=hunits, 162 | num_layers=1, 163 | bidirectional=True, 164 | batch_first=True, 165 | ) 166 | # input [batch, l-x(from convs), filters] 167 | # output [batch, l-x(from convs), 2*hunits] 168 | 169 | self.regression = nn.Sequential( 170 | AdditiveAttentionBackwards( 171 | dim=2 * hunits 172 | ), 173 | # input [batch, l (LSTM), 2*hunits] 174 | # output [batch, 2*hunits] 175 | nn.Linear(2 * hunits, hunits), 176 | nn.ReLU(), 177 | nn.Linear(hunits, l), 178 | ) 179 | 180 | self.classification1 = nn.Sequential( 181 | nn.Conv1d(1, 10, 10, 1), 182 | nn.ReLU(), 183 | nn.Conv1d(10, 30, 8, 1), 184 | nn.ReLU(), 185 | nn.Conv1d(30, 40, 6, 1), 186 | nn.ReLU(), 187 | nn.Conv1d(40, 50, 5, 1), 188 | nn.ReLU(), 189 | nn.Conv1d(50, 50, 5, 1), 190 | nn.ReLU(), 191 | nn.Conv1d(50, 50, 5, 1), 192 | nn.ReLU(), 193 | ) # output [batch, 50, l-33] 194 | 195 | self.classification2 = nn.Sequential( 196 | nn.Flatten(start_dim=1) 197 | ) # flatten --> [batch, (l-33)*50] 198 | 199 | self.classification3 = nn.Sequential( 200 | nn.Linear((l - 33) * 50, 1024), 201 | nn.ReLU(), 202 | nn.Linear(1024, l), 203 | nn.Sigmoid() 204 | ) 205 | 206 | def forward(self, x): 207 | reg = self.conv(x) 208 | reg = reg.permute(0, 2, 1) 209 | output_lstm, (h_n, c_n) = self.lstm(reg) 210 | reg = self.regression(output_lstm) 211 | 212 | clas1 = self.classification1(x) 213 | clas2 = self.classification2(clas1) 214 | clas = self.classification3(clas2) 215 | 216 | y = reg * clas 217 | alphas = torch.zeros(reg.shape) 218 | return y, reg, alphas, clas 219 | 220 | class ModelOnlyRegression(nn.Module): 221 | """ 222 | Implementation of the network architecture described 223 | in the paper but removing classification branch. 224 | Only regression branch is trained and used to predict 225 | appliance disaggregation 226 | 227 | Only regression branch enabled 228 | """ 229 | def __init__(self, l, filters, kernel, hunits): 230 | super().__init__() 231 | 232 | self.regression_enabled = True 233 | self.classification_enabled = False 234 | 235 | self.conv = nn.Sequential( 236 | nn.Conv1d(1, filters, kernel, padding=kernel // 2), 237 | nn.ReLU(), 238 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 239 | nn.ReLU(), 240 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 241 | nn.ReLU(), 242 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 243 | nn.ReLU(), 244 | ) 245 | 246 | self.lstm = nn.LSTM( 247 | input_size=filters, 248 | hidden_size=hunits, 249 | num_layers=1, 250 | bidirectional=True, 251 | batch_first=True, 252 | ) 253 | # input [batch, l-x(from convs), filters] 254 | # output [batch, l-x(from convs), 2*hunits] 255 | 256 | self.attention = AdditiveAttention(dim=(2 * hunits)) 257 | self.regression = nn.Sequential( 258 | nn.Linear(2 * hunits, hunits), 259 | nn.ReLU(), 260 | nn.Linear(hunits, l) 261 | ) 262 | 263 | def forward(self, x): 264 | reg = self.conv(x) 265 | reg = reg.permute(0, 2, 1) 266 | output_lstm, (h_n, c_n) = self.lstm(reg) 267 | context, alphas = self.attention(output_lstm) 268 | reg = self.regression(context) 269 | 270 | y = reg 271 | clas = reg # TEMPFIX to make it easy to integrate to de code (?) 272 | return y, reg, alphas, clas 273 | 274 | class ModelClassAttention(nn.Module): 275 | """ 276 | Implementation of the network architecture described 277 | in the paper but fitting classification with attention. 278 | Attention is used in both regression and classification 279 | 280 | Both regression and classification branches enabled 281 | """ 282 | def __init__(self, l, filters, kernel, hunits): 283 | super().__init__() 284 | 285 | self.regression_enabled = True 286 | self.classification_enabled = True 287 | 288 | self.conv = nn.Sequential( 289 | nn.Conv1d(1, filters, kernel, padding=kernel // 2), 290 | nn.ReLU(), 291 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 292 | nn.ReLU(), 293 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 294 | nn.ReLU(), 295 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2), 296 | nn.ReLU(), 297 | ) 298 | 299 | self.lstm = nn.LSTM( 300 | input_size=filters, 301 | hidden_size=hunits, 302 | num_layers=1, 303 | bidirectional=True, 304 | batch_first=True, 305 | ) 306 | # input [batch, l-x(from convs), filters] 307 | # output [batch, l-x(from convs), 2*hunits] 308 | 309 | self.attention = AdditiveAttention(dim=(2 * hunits)) 310 | self.regression = nn.Sequential( 311 | nn.Linear(2 * hunits, hunits), 312 | nn.ReLU(), 313 | nn.Linear(hunits, l) 314 | ) 315 | 316 | self.classification1 = nn.Sequential( 317 | nn.Conv1d(1, 10, 10, 1), 318 | nn.ReLU(), 319 | nn.Conv1d(10, 30, 8, 1), 320 | nn.ReLU(), 321 | nn.Conv1d(30, 40, 6, 1), 322 | nn.ReLU(), 323 | nn.Conv1d(40, 50, 5, 1), 324 | nn.ReLU(), 325 | nn.Conv1d(50, 50, 5, 1), 326 | nn.ReLU(), 327 | nn.Conv1d(50, 50, 5, 1), 328 | nn.ReLU(), 329 | ) # output [batch, 50, l-33] 330 | 331 | self.classification2 = nn.Sequential( 332 | nn.Flatten(start_dim=1) 333 | ) # flatten [batch, (l-33)*50] 334 | 335 | self.classification3 = nn.Sequential( 336 | nn.Linear((l - 33) * 50 + 2 * hunits, 1024), 337 | nn.ReLU(), 338 | nn.Linear(1024, l), 339 | nn.Sigmoid(), 340 | ) 341 | 342 | def forward(self, x): 343 | reg = self.conv(x) 344 | reg = reg.permute(0, 2, 1) 345 | output_lstm, (h_n, c_n) = self.lstm(reg) 346 | context, alphas = self.attention(output_lstm) 347 | reg = self.regression(context) 348 | 349 | clas1 = self.classification1(x) 350 | clas2 = self.classification2(clas1) 351 | clas3 = torch.cat((clas2, context), 1) 352 | clas = self.classification3(clas3) 353 | 354 | y = reg * clas 355 | return y, reg, alphas, clas 356 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import sys 5 | import pprint 6 | 7 | from datetime import datetime 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.optim as optim 15 | 16 | import model as nilmmodel 17 | import matplotlib.pyplot as plt 18 | 19 | from dataset import InMemoryKoreaDataset 20 | from utils import error 21 | from utils import save_model, load_model, save_dataset 22 | from utils import plot_window 23 | 24 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 25 | 26 | 27 | def summary(path, results): 28 | """ 29 | Helper method used to save training results 30 | Plot train vs validation loss and error to diagnose 31 | - Underfitting 32 | - Overfitting 33 | - Good fitting 34 | """ 35 | df = pd.DataFrame( 36 | [ 37 | { 38 | "epoch": x[0][0], 39 | "train_loss": x[0][1], 40 | "train_err": x[0][2], 41 | "eval_loss": x[1][1], 42 | "eval_err": x[1][2], 43 | } 44 | for x in results 45 | ] 46 | ).set_index("epoch") 47 | 48 | # Plot train vs eval loss to make diagnose 49 | columns = ["train_loss", "eval_loss"] 50 | filename = os.path.join(path, "results-loss.csv") 51 | df[columns].round(3).to_csv(filename, sep=";") 52 | filename = os.path.join(path, "results-loss.png") 53 | 54 | plt.figure(1, figsize=(10, 8)) 55 | df[columns].round(3).plot() 56 | plt.savefig(filename) 57 | plt.clf() 58 | 59 | # Plot train vs eval error to make diagnose 60 | columns = ["train_err", "eval_err"] 61 | filename = os.path.join(path, "results-error.csv") 62 | df[columns].round(3).to_csv(filename, sep=";") 63 | filename = os.path.join(path, "results-error.png") 64 | 65 | plt.figure(1, figsize=(10, 8)) 66 | df[columns].round(3).plot() 67 | plt.savefig(filename) 68 | plt.clf() 69 | 70 | 71 | def train_single_epoch( 72 | epoch, model, train_loader, transform, optimizer, eval_loader, plotfilename=None 73 | ): 74 | """ 75 | Train single epoch for specific model and appliance 76 | """ 77 | model.train() 78 | errs, losses = [], [] 79 | 80 | start = datetime.now() # setup a timer for the train 81 | for idx, (x, y, clas) in enumerate(train_loader): 82 | # Prepare model input data 83 | x = torch.unsqueeze(x, dim=1) 84 | 85 | optimizer.zero_grad() 86 | x, y, clas = x.to(device), y.to(device), clas.to(device) 87 | yhat, reghat, alphas, clashat = model(x) 88 | 89 | # Calculate prediction loss. See network architecture 90 | # and loss details in documentation 91 | loss_out = F.mse_loss(yhat, y) 92 | 93 | # Different loss functions are used depending on model_type 94 | # If classification is disabled loss function do not take 95 | # care of classification loss 96 | if model.classification_enabled: 97 | loss_clas = F.binary_cross_entropy(clashat, clas) 98 | loss = loss_out + loss_clas 99 | else: 100 | loss = loss_out 101 | 102 | loss.backward() 103 | optimizer.step() 104 | err = error(y, yhat) 105 | 106 | loss_, err_ = loss.item(), err.item() 107 | losses.append(loss_) 108 | errs.append(err_) 109 | 110 | if idx % 100 == 0: 111 | # Plotting sliding window samples in order to debug or 112 | # keep track of current testing process 113 | print(f"train epoch={epoch} batch={idx+1} loss={loss:.2f} err={err:.2f}") 114 | if plotfilename: 115 | filename = plotfilename + f".{idx}.png" 116 | x = x.cpu() 117 | y = y.cpu() 118 | yhat = yhat.cpu() 119 | reghat = reghat.cpu() 120 | if transform: 121 | # If transform enabled undo standardization in order 122 | # to proper visualize regression branch prediction 123 | x = (x * transform["sample_std"]) + transform["sample_mean"] 124 | y = (y * transform["target_std"]) + transform["target_mean"] 125 | yhat = (yhat * transform["target_std"]) + transform["target_mean"] 126 | reghat = (reghat * transform["sample_std"]) + transform[ 127 | "sample_mean" 128 | ] 129 | # Tricky workaround to rescale regression output and make 130 | # it easier to visualize and interpret results 131 | reghat = reghat / 10.0 132 | plot_window( 133 | x, 134 | y, 135 | yhat, 136 | reghat, 137 | clashat.cpu(), 138 | alphas.cpu(), 139 | loss_, 140 | err_, 141 | model.classification_enabled, 142 | filename, 143 | ) 144 | 145 | end = datetime.now() 146 | total_seconds = (end - start).seconds 147 | print("------------------------------------------") 148 | print(f"Epoch seconds: {total_seconds}") 149 | print("------------------------------------------") 150 | 151 | return np.mean(losses), np.mean(errs) 152 | 153 | 154 | def eval_single_epoch(model, eval_loader, transform, plotfilename=None): 155 | """ 156 | Eval single epoch for specific model and appliance 157 | """ 158 | 159 | errs, losses = [], [] 160 | with torch.no_grad(): 161 | model.eval() 162 | for idx, (x, y, clas) in enumerate(eval_loader): 163 | # Prepare model input data 164 | x = torch.unsqueeze(x, dim=1) 165 | 166 | x, y, clas = x.to(device), y.to(device), clas.to(device) 167 | yhat, reghat, alphas, clashat = model(x) 168 | 169 | # Calculate prediction loss. See network architecture 170 | # and loss details in documentation 171 | loss_out = F.mse_loss(yhat, y) 172 | 173 | # Different loss functions are used depending on model_type 174 | # If classification is disabled loss function do not take 175 | # care of classification loss 176 | if model.classification_enabled: 177 | loss_clas = F.binary_cross_entropy(clashat, clas) 178 | loss = loss_out + loss_clas 179 | else: 180 | loss = loss_out 181 | err = error(y, yhat) 182 | 183 | loss_, err_ = loss.item(), err.item() 184 | losses.append(loss_) 185 | errs.append(err_) 186 | 187 | if idx % 100 == 0: 188 | # Plotting sliding window samples in order to debug or 189 | # keep track of current testing process 190 | print(f"eval batch={idx+1} loss={loss:.2f} err={err:.2f}") 191 | if plotfilename: 192 | filename = plotfilename + f".{idx}.attention.png" 193 | x = x.cpu() 194 | y = y.cpu() 195 | yhat = yhat.cpu() 196 | reghat = reghat.cpu() 197 | if transform: 198 | # If transform enabled undo standardization in order 199 | # to proper visualize regression branch prediction 200 | x = (x * transform["sample_std"]) + transform["sample_mean"] 201 | y = (y * transform["target_std"]) + transform["target_mean"] 202 | yhat = (yhat * transform["target_std"]) + transform[ 203 | "target_mean" 204 | ] 205 | reghat = (reghat * transform["sample_std"]) + transform[ 206 | "sample_mean" 207 | ] 208 | # Tricky workaround to rescale regression output and make 209 | # it easier to visualize and interpret results 210 | reghat = reghat / 10.0 211 | plot_window( 212 | x, 213 | y, 214 | yhat, 215 | reghat, 216 | clashat.cpu(), 217 | alphas.cpu(), 218 | loss_, 219 | err_, 220 | model.classification_enabled, 221 | filename, 222 | ) 223 | return np.mean(losses), np.mean(errs) 224 | 225 | 226 | def train_model(datapath, output, appliance, hparams, doplot=None, reload=True): 227 | """ 228 | Train specific model and appliance 229 | """ 230 | 231 | # Load appliance specifications and hyperparameters from 232 | # settings 233 | buildings = appliance["buildings"]["train"] 234 | name = appliance["name"] 235 | params = appliance["hparams"] 236 | record_err = np.inf 237 | 238 | # Load whether data transformation is required. See details 239 | # on data normalization in documentation 240 | transform_enabled = appliance.get("normalization", False) 241 | # Load specific network architecture to train 242 | model_type = appliance.get("model", "ModelPaper") 243 | 244 | # Initialize active settings described in documentation. 245 | # Used to identify whether an appliance is classified as active 246 | # Used to enableoversampling to fix sliding windows active/inactive 247 | # imbalance 248 | active_threshold = appliance.get("active_threshold", 0.15) 249 | active_ratio = appliance.get("active_ratio", 0.5) 250 | active_oversample = appliance.get("active_oversample", 2) 251 | 252 | transform = None # Data transformation disabled by default 253 | 254 | # Load train dataset 255 | my_dataset = InMemoryKoreaDataset( 256 | datapath, 257 | buildings, 258 | name, 259 | windowsize=params["L"], 260 | active_threshold=active_threshold, 261 | active_ratio=active_ratio, 262 | active_oversample=active_oversample, 263 | transform_enabled=transform_enabled, 264 | ) 265 | 266 | if transform_enabled: 267 | # Load dataset transformation parameters from dataset 268 | transform = { 269 | "sample_mean": my_dataset.sample_mean, 270 | "sample_std": my_dataset.sample_std, 271 | "target_mean": my_dataset.target_mean, 272 | "target_std": my_dataset.target_std, 273 | } 274 | print(transform) 275 | 276 | # Size train and evaluation dataset 277 | total_size = len(my_dataset) 278 | train_size = int(hparams["train_size"] * (total_size)) 279 | eval_size = total_size - train_size 280 | 281 | print("============= DATASET =============") 282 | print(f"Total size: {total_size}".format(total_size)) 283 | print(f"Train size: {train_size}".format(train_size)) 284 | print(f"Eval size: {eval_size}".format(eval_size)) 285 | print("===================================") 286 | print("=========== ARCHITECTURE ==========") 287 | pprint.pprint(appliance) 288 | print("===================================") 289 | 290 | # Split and randomize train and evaluation dataset 291 | train_dataset, eval_dataset = torch.utils.data.random_split( 292 | my_dataset, (train_size, eval_size) 293 | ) 294 | 295 | # Save train dataset in order to use it in later 296 | # training sessions or debugging 297 | filename = os.path.join(output, "dataset.pt") 298 | save_dataset(transform, train_dataset, eval_dataset, filename) 299 | 300 | # Initialize train dataset loader 301 | train_loader = torch.utils.data.DataLoader( 302 | train_dataset, batch_size=hparams["batch_size"], shuffle=True 303 | ) 304 | # Initialize evaluation dataset loader 305 | eval_loader = torch.utils.data.DataLoader( 306 | eval_dataset, batch_size=hparams["batch_size"] 307 | ) 308 | 309 | model_type = getattr(nilmmodel, model_type) 310 | model = model_type(params["L"], params["F"], params["K"], params["H"]) 311 | model = model.to(device) 312 | 313 | # Initialize optimizer 314 | optimizer = optim.Adam(model.parameters(), hparams["lr"]) 315 | scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9) 316 | 317 | if reload: 318 | # Reload pretrained model in order to continue 319 | # previous training sessions 320 | filename = os.path.join(output, appliance["filename"]) 321 | print("====================================") 322 | print("Reloading model: ", filename) 323 | print("====================================") 324 | transform, record_err = load_model(filename, model, optimizer) 325 | 326 | results = [] 327 | 328 | start = datetime.now() 329 | for epoch in range(hparams["epochs"]): 330 | # Iterate over training epochs 331 | filename = os.path.join(output, appliance["filename"] + str(epoch)) 332 | 333 | plotfilename = None 334 | if doplot: 335 | plotfilename = filename 336 | 337 | err_ = None 338 | try: 339 | # Train single epoch 340 | loss, err = train_single_epoch( 341 | epoch, 342 | model, 343 | train_loader, 344 | transform, 345 | optimizer, 346 | eval_loader, 347 | plotfilename, 348 | ) 349 | print("==========================================") 350 | print(f"train epoch={epoch} loss={loss:.2f} err={err:.2f}") 351 | print("==========================================") 352 | 353 | loss_, err_ = eval_single_epoch(model, eval_loader, transform) 354 | print("==========================================") 355 | print(f"eval loss={loss_:.2f} err={err_:.2f}") 356 | print("==========================================") 357 | 358 | # tune.report(eval_loss=loss_) 359 | results.append([(epoch, loss, err), (epoch, loss_, err_)]) 360 | 361 | if err_ < record_err: 362 | # Compare current epoch error against previous 363 | # epochs error (minimum historic error) to check whether current 364 | # trained model is better than previous ones (best historic error) 365 | # Set and save current trained model as best historic trained 366 | # model if current error is lower than historic error 367 | filename = os.path.join(output, appliance["filename"]) 368 | save_model( 369 | model, optimizer, hparams, appliance, transform, filename, err_ 370 | ) 371 | record_err = err_ 372 | except Exception as e: 373 | print(e) 374 | 375 | scheduler.step() 376 | 377 | end = datetime.now() 378 | total_seconds = (end - start).seconds 379 | print("------------------------------------------") 380 | print(f"Total seconds: {total_seconds}") 381 | print("------------------------------------------") 382 | 383 | # Save model training results 384 | summary(output, results) 385 | 386 | return model, transform 387 | 388 | 389 | def train_model_wrapper(config): 390 | """ 391 | Wrapper to adapt model training to tune interface 392 | """ 393 | datapath = config["datapath"] 394 | output = config["output"] 395 | appliance = config["appliance"] 396 | hparams = config["hparams"] 397 | doplot = config["doplot"] 398 | reload = config["reload"] 399 | tune_hparams = config["tune"] 400 | 401 | appliance["hparams"]["F"] = tune_hparams["F"] 402 | appliance["hparams"]["K"] = tune_hparams["K"] 403 | appliance["hparams"]["H"] = tune_hparams["H"] 404 | 405 | return train_model(datapath, output, appliance, hparams, doplot, reload) 406 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | from torch.utils.data import Dataset 8 | 9 | import redd 10 | import utils 11 | 12 | 13 | class Building: 14 | """ 15 | Building consumption handler - definition of appliances and main 16 | consumption. 17 | """ 18 | 19 | def __init__(self, path, name, spec): 20 | self.path = path 21 | self.name = name 22 | 23 | self.mains = spec["mains"] 24 | self.appliances = spec["appliances"] 25 | 26 | def get_appliances(self): 27 | """ 28 | Get list of appliances 29 | """ 30 | return [x["id"] for x in self.appliances] 31 | 32 | def load_mains(self, start, end): 33 | """ 34 | Load mains consumption from start to end time interval. Using 35 | dataset specific loader. Online data loader to prevent memory overrun. 36 | Do not save whole dataset in memory 37 | """ 38 | return redd.load("mains", self.path, self.mains["channels"], start, end) 39 | 40 | def load_appliances(self, appliances=[], start=None, end=None): 41 | """ 42 | Load appliance consumption from start to end time interval. Using 43 | dataset specific loader. Online data loader prevent memory overrun. 44 | Do not save whole dataset in memory 45 | """ 46 | if not appliances: 47 | appliances = [x["id"] for x in self.appliances] 48 | 49 | # WARNING: Time series inner join. Ignoring non-synced 50 | # datapoints from loaded chanels 51 | return pd.concat( 52 | [ 53 | redd.load(x["id"], self.path, x["channels"], start, end) 54 | for x in self.appliances 55 | if x["id"] in appliances 56 | ], 57 | axis=1, 58 | join="inner", 59 | ) 60 | 61 | 62 | class NilmDataset: 63 | """ 64 | NILM dataset handler 65 | NOTE: This dataset handler is used when datset preprocessing required 66 | - Alignment 67 | - Imputation 68 | Not used in current analysis due already preprocessed available 69 | dataset (non-public available and obtained once project ongoing). 70 | """ 71 | 72 | def __init__(self, spec, path): 73 | self.path = path 74 | spec = utils.load_yaml(spec) 75 | 76 | path = os.path.join(self.path, spec["path"]) 77 | # Load all buildings in settings 78 | self.buildings = { 79 | x["name"]: Building(os.path.join(path, x["path"]), x["name"], x) 80 | for x in spec["buildings"] 81 | } 82 | 83 | def get_buildings(self): 84 | """ 85 | Get list of buildings 86 | """ 87 | return list(self.buildings.keys()) 88 | 89 | def get_appliances(self, building): 90 | """ 91 | Get list of appliances 92 | """ 93 | return self.buildings[building].get_appliances() 94 | 95 | def load_mains(self, building, start=None, end=None): 96 | """ 97 | Load mains consumption from start to endi time interval. Using 98 | dataset specific loader. Online data loader to prevent memory overrun. 99 | Do not save whole dataset in memory 100 | """ 101 | return self.buildings[building].load_mains(start, end) 102 | 103 | def load_appliances(self, building, appliances=[], start=None, end=None): 104 | """ 105 | Load appliance consumption from start to end time interval. Using 106 | dataset specific loader. Online data loader to prevent memory overrun. 107 | Do not save whole dataset in memory 108 | """ 109 | return self.buildings[building].load_appliances(appliances, start, end) 110 | 111 | @staticmethod 112 | def align(df1, df2, bfill=False): 113 | """ 114 | Align two timeseries with different acquisition frequency 115 | """ 116 | # Time alignment required due different acq frequency 117 | if bfill: 118 | # Raw backward filling done 119 | newindex = df1.index 120 | df2_ = df2.reindex(newindex, method="bfill") 121 | df = pd.concat([df1, df2_], axis=1, join="inner") 122 | else: 123 | df = pd.concat([df1, df2], axis=1, join="inner") 124 | 125 | return df[~df.isnull().any(axis=1)] 126 | 127 | @staticmethod 128 | def impute(df, gapsize=20, subseqsize=28800): 129 | """ 130 | Data preprocessing to impute small gaps and ignore larg gaps 131 | Ignore non 100% coverage days 132 | 133 | Extract from "Subtask Gated Networks for Non-Intrusive Load Monitoring" 134 | 135 | For REDD dataset,we preprocessed with the following procedure 136 | to handle missing values. First, we split the sequence so that the 137 | duration of missing values in subsequence is less than 20 seconds. 138 | Second,we filled the missing values in each subsequence by 139 | thebackward filling method. Finally, we only used the subsequences 140 | with more than one-day duration 141 | """ 142 | df = df.sort_index() 143 | 144 | start = df.index[0] 145 | end = df.index[-1] 146 | newindex = pd.date_range(start, end, freq="1S") 147 | 148 | # Appliance time series are not aligned to 3s (ie. 3,4 sec period) 149 | # Use 1sec reindex in order to align to 3sec timeserie 150 | df = df.reindex(newindex, method="bfill", limit=4) 151 | newindex = pd.date_range(start, end, freq="3S") 152 | mask = df.index.isin(newindex) 153 | df = df[mask] 154 | # WARNING 155 | # if there is a gap with more than limit number of consecutive NaNs, 156 | # it will only be partially filled. 157 | df = df.fillna(method="bfill", limit=gapsize) 158 | columns = df.columns 159 | 160 | df["rowindex"] = range(df.shape[0]) 161 | df = df[~df.iloc[:, 0].isnull()] 162 | 163 | diffseq = df["rowindex"].diff() 164 | diffsec = df.index.to_series().diff().dt.total_seconds() 165 | # Find big gaps to split data in subsequences 166 | mask = diffseq > gapsize 167 | 168 | # List of continuous data subsequences 169 | its_index = diffsec[mask].index 170 | its_offset = diffsec[mask].values 171 | 172 | data = [] 173 | if sum(mask) > 0: 174 | start = df.index[0] 175 | 176 | # Iterate over continuous data subsequences 177 | for idx, (it, offset) in enumerate(zip(its_index, its_offset)): 178 | end = it - pd.Timedelta(seconds=offset) 179 | subseq = df[start:end] 180 | 181 | # Check where subsquences in large enough. If the subsquence 182 | # is not large enough then ignore, otherwise consider it valid 183 | if subseq.shape[0] > subseqsize: 184 | data.append(subseq[columns]) 185 | start = it 186 | 187 | # Check where subsquences in large enough. If the subsquence 188 | # is not large enough then ignore, otherwise consider it valid 189 | end = df.index[-1] 190 | subseq = df[start:end] 191 | if subseq.shape[0] > subseqsize: 192 | data.append(subseq[columns]) 193 | else: 194 | # One single subsequence (valid or invalid) 195 | data.append(df[columns]) 196 | return data 197 | 198 | ## Filterout days without minimum amount of seconds 199 | # tmp = df.groupby("date").apply(lambda x: x.shape[0]) 200 | # valid_dates = tmp[tmp >= subseqsize].index 201 | # mask = df["date" ].isin(valid_dates) 202 | # return df[mask].drop(columns=["date"]) 203 | 204 | def load(self, building, appliances=[], start=None, end=None, bfill=False): 205 | return self.impute( 206 | self.align( 207 | self.load_mains(building, start, end), 208 | self.load_appliances(building, appliances, start, end), 209 | bfill, 210 | ) 211 | ) 212 | 213 | def load_raw(self, building, appliances=[], start=None, end=None, bfill=False): 214 | return self.align( 215 | self.load_mains(building, start, end), 216 | self.load_appliances(building, appliances, start, end), 217 | bfill, 218 | ) 219 | 220 | 221 | class InMemoryDataset(Dataset): 222 | """ 223 | Inmemory dataset 224 | WARNING: Not the best option due potential memory overrun but did not fail 225 | Not used in current analysis due already preprocessed available 226 | dataset (non-public available and obtained once project ongoing). 227 | """ 228 | 229 | def __init__( 230 | self, spec, path, buildings, appliance, windowsize=34459, start=None, end=None 231 | ): 232 | super().__init__() 233 | 234 | self.buildings = buildings 235 | self.appliance = appliance 236 | self.windowsize = windowsize 237 | 238 | dataset = NilmDataset(spec, path) 239 | 240 | # Dataset is structured as multiple long size windows 241 | self.data = [] 242 | # As sliding windows are used to acces data, a lookup-table 243 | # is created as sequential index to reference each sliding 244 | # window (long window + offset within long window). 245 | self.datamap = {} 246 | 247 | data_index = 0 248 | window_index = 0 249 | for building in buildings: 250 | for x in dataset.load(building, [appliance], start, end): 251 | # Calculate number of sliding windows in the long time window 252 | n_windows = x.shape[0] - windowsize + 1 253 | 254 | # Add loaded data to dataset 255 | self.data.append(x.reset_index()) 256 | # Update data index iteraring over all sliding windows in 257 | # dataset. Each of the indexes in global map corresponds 258 | # to specific long time window and offset 259 | self.datamap.update( 260 | {window_index + i: (data_index, i) for i in range(n_windows)} 261 | ) 262 | data_index += 1 263 | 264 | window_index += n_windows 265 | self.total_size = window_index 266 | 267 | def __len__(self): 268 | return self.total_size 269 | 270 | def __getitem__(self, idx): 271 | # Each of the indexes in global map corresponds 272 | # to specific long time window and offset. Obtain 273 | # long time window and offset 274 | data_index, window_index = self.datamap[idx] 275 | 276 | # Obtain start end offset in the long time window 277 | start = window_index 278 | end = self.windowsize + window_index 279 | 280 | # Access data 281 | sample = self.data[data_index].loc[start:end, "mains"] 282 | target = self.data[data_index].loc[start:end, self.appliance] 283 | 284 | return (torch.tensor(sample.values), torch.tensor(target.values)) 285 | 286 | 287 | class InMemoryKoreaDataset(Dataset): 288 | """ 289 | Inmemory dataset 290 | WARNING: Not the best option, due potential memory overrun but did not fail 291 | 292 | Arguments: 293 | windowsize: Sliding window size 294 | active_threshold: Active threshold used in classification 295 | Default value in paper 15W 296 | active_ratio: In order to prevent imbalance in data it's required 297 | to balance number of active/inactive appliance windows. In most 298 | of the cases the number of inactive windows is larger than 299 | the number of active windows. Active ratio forces the ratio 300 | between active/inactive windows by removing active/inactive 301 | windows (in most cases inactive windows) till fulfilling the ratio 302 | active_oversample: In order to prevent overfitting oversampling is done 303 | in active windows. This argument forces random oversampling 304 | active_oversample times available active windows 305 | transform_enabled: Used to enable data preprocessing transformation, 306 | in this case standardization 307 | transform: Transformation properties, in case of standardization 308 | mean and standard deviation 309 | """ 310 | 311 | sample_mean = None 312 | sample_std = None 313 | target_mean = None 314 | target_std = None 315 | 316 | def __init__( 317 | self, 318 | path, 319 | buildings, 320 | appliance, 321 | windowsize=496, 322 | active_threshold=15.0, 323 | active_ratio=None, 324 | active_oversample=None, 325 | transform_enabled=False, 326 | transform=None, 327 | ): 328 | super().__init__() 329 | 330 | self.transform_enabled = transform_enabled 331 | 332 | self.appliance = appliance 333 | self.windowsize = windowsize 334 | self.active_threshold = active_threshold 335 | 336 | # Dataset is structured as multiple long size windows 337 | self.data = [] 338 | # As sliding windows are used to acces data, a lookup-table 339 | # is created as sequential index to reference each sliding 340 | # window (long window + offset within long window). 341 | self.datamap = {} 342 | 343 | filenames = os.listdir(path) 344 | 345 | columns = ["main", self.appliance] 346 | 347 | # Using original long time windows as non-related time interval windows 348 | # in order to prevent mixing days and concatenating not continuous 349 | # data. Original data has gaps between dataset files 350 | self.data = [ 351 | pd.read_csv(os.path.join(path, filename), usecols=columns, sep=",") 352 | for filename in filenames 353 | for building in buildings 354 | if filename.startswith(building) 355 | ] 356 | 357 | df = pd.concat(self.data) 358 | # Data transformation 359 | if transform_enabled: 360 | if transform: 361 | self.sample_mean = transform["sample_mean"] 362 | self.sample_std = transform["sample_std"] 363 | self.target_mean = transform["target_mean"] 364 | self.target_std = transform["target_std"] 365 | else: 366 | self.sample_mean = df["main"].mean() 367 | self.sample_std = df["main"].std() 368 | self.target_mean = df[appliance].mean() 369 | self.target_std = df[appliance].std() 370 | 371 | data_index = 0 372 | window_index = 0 373 | 374 | for subseq in self.data: 375 | n_windows = subseq.shape[0] - windowsize + 1 # +1 why? 376 | # Update data index iteraring over all sliding windows in 377 | # dataset. Each of the indexes in global map corresponds 378 | # to specific long time window and offset 379 | self.datamap.update( 380 | {window_index + i: (data_index, i) for i in range(n_windows)} 381 | ) 382 | data_index += 1 383 | window_index += n_windows 384 | 385 | self.total_size = window_index 386 | 387 | if active_ratio: 388 | # Fix imbalance required 389 | map_indexes = list(self.datamap.keys()) 390 | # Shuffle indexes in order to prevent oversampling using same 391 | # building or continuous windows 392 | random.shuffle(map_indexes) 393 | 394 | # Active and inactive buffers are used to manage classified 395 | # sliding windows and use them later to fix imbalance 396 | active_indexes = [] 397 | inactive_indexes = [] 398 | 399 | # Classify every sliding window as active or inactive using 400 | # active_threshold as threshold 401 | for i, index in enumerate(map_indexes): 402 | data_index, window_index = self.datamap[index] 403 | start = window_index 404 | end = self.windowsize + window_index 405 | 406 | # Retreive sliding window from data 407 | subseq = self.data[data_index].loc[start : (end - 1), self.appliance] 408 | if subseq.shape[0] != self.windowsize: 409 | continue 410 | 411 | # Fill active and inactive buffers to be used later to 412 | # fix imbalance 413 | if (subseq > active_threshold).any(): # is there any active ? 414 | active_indexes.append(index) 415 | else: 416 | inactive_indexes.append(index) 417 | 418 | if (i % 1000) == 0: 419 | print( 420 | "Loading {0}: {1}/{2}".format( 421 | self.appliance, i, len(map_indexes) 422 | ) 423 | ) 424 | if active_oversample: 425 | # If oversample is required increase representation 426 | active_indexes = active_indexes * active_oversample 427 | 428 | # Identify imbalance by calculating active/inactive ratio 429 | n_active = len(active_indexes) 430 | n_inactive = len(inactive_indexes) 431 | 432 | # Update number of active/inactive windows to fulfill required 433 | # ratio and fix imbalance 434 | n_inactive_ = int((n_active * (1.0 - active_ratio)) / active_ratio) 435 | n_active_ = int((n_inactive * active_ratio) / (1.0 - active_ratio)) 436 | 437 | if n_inactive > n_inactive_: 438 | n_inactive = n_inactive_ 439 | else: 440 | n_active = n_active_ 441 | 442 | # Obtain valid indexes after imbalance analysis 443 | valid_indexes = active_indexes[:n_active] + inactive_indexes[:n_inactive] 444 | 445 | # Update datamap with fixed indexes in order to point to 446 | # proper sliding windows 447 | datamap = {} 448 | for dst_index, src_index in enumerate(valid_indexes): 449 | datamap[dst_index] = self.datamap[src_index] 450 | self.datamap = datamap 451 | self.total_size = len(self.datamap.keys()) 452 | 453 | def __len__(self): 454 | return self.total_size 455 | 456 | def __getitem__(self, idx): 457 | # Loader asking for specific sliding window in specific index 458 | # Calculate long time window and offset in order to retrieve data 459 | # Input data is obtained from mains time serie, target data is 460 | # obtained from appliance timeserie and classification is 461 | # done over mains time serie 462 | data_index, window_index = self.datamap[idx] 463 | start = window_index 464 | end = self.windowsize + window_index 465 | 466 | # Retreive mains data as sample data 467 | sample = self.data[data_index].loc[start : (end - 1), "main"] 468 | # Retreive appliance data as target data 469 | target = self.data[data_index].loc[start : (end - 1), self.appliance] 470 | 471 | # Calculate classification 472 | classification = torch.zeros(target.values.shape[0]) 473 | if self.active_threshold: 474 | classification = (target.values > self.active_threshold).astype(int) 475 | 476 | # WARNING: This is not the proper way as both train and test values 477 | # used. It's just a first approach 478 | if self.transform_enabled: 479 | # Standarization enabled 480 | sample = (sample - self.sample_mean) / self.sample_std 481 | target = (target - self.target_mean) / self.target_std 482 | 483 | return ( 484 | torch.tensor(sample.values, dtype=torch.float32), # Input 485 | torch.tensor(target.values, dtype=torch.float32), # Target 486 | torch.tensor(classification, dtype=torch.float32), # Classification 487 | ) 488 | 489 | 490 | if __name__ == "__main__": 491 | # Default dataset handler used to explore data in colab 492 | # not used in training or prediction 493 | 494 | spec = sys.argv[1] 495 | path = sys.argv[2] 496 | appliance = sys.argv[3] 497 | 498 | # NOTE: Raw dataset explorer 499 | # from datetime import datetime 500 | # import pytz 501 | # tz = pytz.timezone("US/Eastern") 502 | # start = datetime(2011, 4, 20, 0,0,0) 503 | # end = datetime(2011, 4, 22, 0,0,0) 504 | # start = tz.localize(start) 505 | # end = tz.localize(end) 506 | 507 | # building = "building1" 508 | # appliances = ["refrigerator"] 509 | # dataset = NilmDataset(spec, path) 510 | # raw_mains = dataset.load_mains(building) 511 | # raw_appliances = dataset.load_appliances(building, appliances) 512 | 513 | # raw_df = dataset.load_raw(building, appliances) 514 | # clean_df = dataset.load(building, appliances) 515 | 516 | # buildings = ["building1", "building2"] 517 | # my_dataset = InMemoryDataset(spec, path, buildings, "refrigerator") 518 | 519 | # NOTE: Korea dataset explorer 520 | buildings = ["redd_house1"] 521 | my_dataset = InMemoryKoreaDataset(path, buildings, appliance) 522 | -------------------------------------------------------------------------------- /colab/project-devel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "Copia de Project 2.ipynb", 8 | "provenance": [], 9 | "collapsed_sections": [], 10 | "toc_visible": true, 11 | "machine_shape": "hm" 12 | }, 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "name": "python3" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "code", 21 | "metadata": { 22 | "id": "pJbYXou6chZf", 23 | "colab": { 24 | "base_uri": "https://localhost:8080/" 25 | }, 26 | "outputId": "28c6f77c-eeaa-479f-c577-f3da0331c57c" 27 | }, 28 | "source": [ 29 | "!nvidia-smi" 30 | ], 31 | "execution_count": null, 32 | "outputs": [ 33 | { 34 | "output_type": "stream", 35 | "text": [ 36 | "Sat Apr 17 11:32:16 2021 \n", 37 | "+-----------------------------------------------------------------------------+\n", 38 | "| NVIDIA-SMI 460.67 Driver Version: 460.32.03 CUDA Version: 11.2 |\n", 39 | "|-------------------------------+----------------------+----------------------+\n", 40 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", 41 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", 42 | "| | | MIG M. |\n", 43 | "|===============================+======================+======================|\n", 44 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", 45 | "| N/A 76C P8 12W / 70W | 0MiB / 15109MiB | 0% Default |\n", 46 | "| | | N/A |\n", 47 | "+-------------------------------+----------------------+----------------------+\n", 48 | " \n", 49 | "+-----------------------------------------------------------------------------+\n", 50 | "| Processes: |\n", 51 | "| GPU GI CI PID Type Process name GPU Memory |\n", 52 | "| ID ID Usage |\n", 53 | "|=============================================================================|\n", 54 | "| No running processes found |\n", 55 | "+-----------------------------------------------------------------------------+\n" 56 | ], 57 | "name": "stdout" 58 | } 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "metadata": { 64 | "id": "UUElMiSlyoMu", 65 | "colab": { 66 | "base_uri": "https://localhost:8080/" 67 | }, 68 | "outputId": "77b5c9cd-9d17-41ee-8adf-0f7f1a2ef6e6" 69 | }, 70 | "source": [ 71 | "## Mount grdive unit in order to load data and import source files\n", 72 | "from google.colab import drive\n", 73 | "drive.mount('/content/gdrive/')" 74 | ], 75 | "execution_count": null, 76 | "outputs": [ 77 | { 78 | "output_type": "stream", 79 | "text": [ 80 | "Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount(\"/content/gdrive/\", force_remount=True).\n" 81 | ], 82 | "name": "stdout" 83 | } 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "metadata": { 89 | "id": "Mf5PxKCN_1kA", 90 | "colab": { 91 | "base_uri": "https://localhost:8080/" 92 | }, 93 | "outputId": "3452ce72-040b-417e-9fc0-7768e3d78337" 94 | }, 95 | "source": [ 96 | "!ls /content/gdrive/MyDrive/ColabNotebooks/" 97 | ], 98 | "execution_count": null, 99 | "outputs": [ 100 | { 101 | "output_type": "stream", 102 | "text": [ 103 | "'Copia de Project 2.ipynb' microwave_out redd.yaml\n", 104 | " dataset.py\t\t modelclassatt.py run.sh\n", 105 | " dataset_test.py\t model.py\t settings.yaml\n", 106 | " dishwasher_out\t\t 'Project 2.ipynb' template.yaml\n", 107 | " fridge_out\t\t 'Project antic' utils.py\n", 108 | " __init__.py\t\t __pycache__\n", 109 | " main.py\t\t redd.py\n" 110 | ], 111 | "name": "stdout" 112 | } 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "metadata": { 118 | "id": "q5JgFZET__SG" 119 | }, 120 | "source": [ 121 | "## Include ColabNotebooks to syspath to let python load libraries\n", 122 | "import sys\n", 123 | "sys.path.append('/content/gdrive/MyDrive/ColabNotebooks')" 124 | ], 125 | "execution_count": null, 126 | "outputs": [] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "metadata": { 131 | "id": "CGzJnN_OACQY", 132 | "colab": { 133 | "base_uri": "https://localhost:8080/" 134 | }, 135 | "outputId": "2d2a50fb-1eee-451c-916b-320d8ed1c61b" 136 | }, 137 | "source": [ 138 | "!ls /content/gdrive/MyDrive/datasetKorea/redd" 139 | ], 140 | "execution_count": null, 141 | "outputs": [ 142 | { 143 | "output_type": "stream", 144 | "text": [ 145 | "redd_house1_0.csv redd_house1_8.csv redd_house3_0.csv redd_house4_3.csv\n", 146 | "redd_house1_10.csv redd_house1_9.csv redd_house3_1.csv redd_house4_4.csv\n", 147 | "redd_house1_1.csv redd_house2_0.csv redd_house3_2.csv redd_house4_5.csv\n", 148 | "redd_house1_2.csv redd_house2_1.csv redd_house3_3.csv redd_house5_0.csv\n", 149 | "redd_house1_3.csv redd_house2_2.csv redd_house3_4.csv redd_house6_0.csv\n", 150 | "redd_house1_4.csv redd_house2_3.csv redd_house3_5.csv redd_house6_1.csv\n", 151 | "redd_house1_5.csv redd_house2_4.csv redd_house4_0.csv redd_house6_2.csv\n", 152 | "redd_house1_6.csv redd_house2_5.csv redd_house4_1.csv redd_house6_3.csv\n", 153 | "redd_house1_7.csv redd_house2_6.csv redd_house4_2.csv\n" 154 | ], 155 | "name": "stdout" 156 | } 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "colab": { 163 | "base_uri": "https://localhost:8080/" 164 | }, 165 | "id": "q-Lw8xnNACYG", 166 | "outputId": "fa35b159-c08f-43e8-98b3-022aa82ec596" 167 | }, 168 | "source": [ 169 | "!pip install ray" 170 | ], 171 | "execution_count": null, 172 | "outputs": [ 173 | { 174 | "output_type": "stream", 175 | "text": [ 176 | "Requirement already satisfied: ray in /usr/local/lib/python3.7/dist-packages (1.2.0)\n", 177 | "Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from ray) (3.12.4)\n", 178 | "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ray) (1.0.2)\n", 179 | "Requirement already satisfied: colorama in /usr/local/lib/python3.7/dist-packages (from ray) (0.4.4)\n", 180 | "Requirement already satisfied: redis>=3.5.0 in /usr/local/lib/python3.7/dist-packages (from ray) (3.5.3)\n", 181 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray) (2.23.0)\n", 182 | "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray) (3.0.12)\n", 183 | "Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from ray) (0.10.1)\n", 184 | "Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray) (2.6.0)\n", 185 | "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from ray) (3.7.4.post0)\n", 186 | "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray) (7.1.2)\n", 187 | "Requirement already satisfied: gpustat in /usr/local/lib/python3.7/dist-packages (from ray) (0.6.0)\n", 188 | "Requirement already satisfied: opencensus in /usr/local/lib/python3.7/dist-packages (from ray) (0.7.12)\n", 189 | "Requirement already satisfied: aioredis in /usr/local/lib/python3.7/dist-packages (from ray) (1.3.1)\n", 190 | "Requirement already satisfied: py-spy>=0.2.0 in /usr/local/lib/python3.7/dist-packages (from ray) (0.3.5)\n", 191 | "Requirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from ray) (1.32.0)\n", 192 | "Requirement already satisfied: colorful in /usr/local/lib/python3.7/dist-packages (from ray) (0.5.4)\n", 193 | "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from ray) (1.19.5)\n", 194 | "Requirement already satisfied: aiohttp-cors in /usr/local/lib/python3.7/dist-packages (from ray) (0.7.0)\n", 195 | "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray) (3.13)\n", 196 | "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from protobuf>=3.8.0->ray) (1.15.0)\n", 197 | "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from protobuf>=3.8.0->ray) (54.2.0)\n", 198 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (2020.12.5)\n", 199 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (3.0.4)\n", 200 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (2.10)\n", 201 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (1.24.3)\n", 202 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (1.6.3)\n", 203 | "Requirement already satisfied: async-timeout<4.0,>=3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (3.0.1)\n", 204 | "Requirement already satisfied: typing-extensions>=3.6.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (3.7.4.3)\n", 205 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (5.1.0)\n", 206 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (20.3.0)\n", 207 | "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray) (7.352.0)\n", 208 | "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from gpustat->ray) (5.4.8)\n", 209 | "Requirement already satisfied: blessings>=1.6 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray) (1.7)\n", 210 | "Requirement already satisfied: opencensus-context==0.1.2 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray) (0.1.2)\n", 211 | "Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray) (1.26.3)\n", 212 | "Requirement already satisfied: hiredis in /usr/local/lib/python3.7/dist-packages (from aioredis->ray) (2.0.0)\n", 213 | "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (1.28.1)\n", 214 | "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (2018.9)\n", 215 | "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (1.53.0)\n", 216 | "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (20.9)\n", 217 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (0.2.8)\n", 218 | "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.6\" in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (4.7.2)\n", 219 | "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (4.2.1)\n", 220 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (2.4.7)\n", 221 | "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (0.4.8)\n" 222 | ], 223 | "name": "stdout" 224 | } 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "metadata": { 230 | "id": "ZSt-KQgAA0RR" 231 | }, 232 | "source": [ 233 | "# -*- coding: utf-8 -*-\n", 234 | "import os\n", 235 | "import sys\n", 236 | "from argparse import ArgumentParser\n", 237 | "\n", 238 | "import numpy as np\n", 239 | "import pandas as pd\n", 240 | "import torch\n", 241 | "import torch.nn as nn\n", 242 | "import torch.nn.functional as F\n", 243 | "import torch.optim as optim\n", 244 | "from ray import tune\n", 245 | "\n", 246 | "import matplotlib.pyplot as plt\n", 247 | "\n", 248 | "from dataset import InMemoryKoreaDataset\n", 249 | "from model import ModelPaperBackward as Model\n", 250 | "from utils import error, load_yaml" 251 | ], 252 | "execution_count": null, 253 | "outputs": [] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "id": "JrdjNfu1eD5B" 259 | }, 260 | "source": [ 261 | "####Single epoch (Train, Eval, Test)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "metadata": { 267 | "id": "VGBkRyghd-Cb" 268 | }, 269 | "source": [ 270 | "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n", 271 | "\n", 272 | "def train_single_epoch(epoch, model, train_loader, optimizer, eval_loader,\n", 273 | " plotfilename=None):\n", 274 | " model.train()\n", 275 | " errs, losses = [], []\n", 276 | " x = torch.unsqueeze(x, dim=1)\n", 277 | "\n", 278 | " optimizer.zero_grad()\n", 279 | " x, y, clas = x.to(device), y.to(device), clas.to(device)\n", 280 | "\n", 281 | "def eval_single_epoch(model, eval_loader, plotfilename=None):\n", 282 | " errs, losses = [], []\n", 283 | " with torch.no_grad():\n", 284 | " model.eval()\n", 285 | " for idx, (x, y, clas) in enumerate(eval_loader):\n", 286 | " x = torch.unsqueeze(x, dim=1)\n", 287 | "\n", 288 | " x, y, clas = x.to(device), y.to(device), clas.to(device)\n", 289 | " y_, reg_, alphas_, clas_ = model(x)\n", 290 | "\n", 291 | " loss_clas = F.binary_cross_entropy(clas_, clas)\n", 292 | " loss_out = F.mse_loss(y_, y)\n", 293 | " loss = loss_out + loss_clas\n", 294 | " err = error(y, y_)\n", 295 | "\n", 296 | " loss_, err_ = loss.item(), err.item()\n", 297 | " losses.append(loss_)\n", 298 | " errs.append(err_)\n", 299 | "\n", 300 | " if idx % 50 == 0:\n", 301 | " print(f\"eval epoch={epoch} batch={idx+1} loss={loss:.2f} err={err:.2f}\")\n", 302 | " if plotfilename:\n", 303 | " filename = plotfilename + f\".{idx}eval.png\"\n", 304 | " plot_window(\n", 305 | " x.cpu(),\n", 306 | " y.cpu(),\n", 307 | " y_.cpu(),\n", 308 | " reg_.cpu(),\n", 309 | " clas_.cpu(),\n", 310 | " alphas_.cpu(),\n", 311 | " loss_,\n", 312 | " err_,\n", 313 | " filename,\n", 314 | " )\n", 315 | " return np.mean(losses), np.mean(errs)\n", 316 | "\n", 317 | "\n", 318 | "def test_single(model, test_loader, appliance, plotfilename=None):\n", 319 | " errs, losses = [], []\n", 320 | " with torch.no_grad():\n", 321 | " model.eval()\n", 322 | " for idx, (x, y, clas) in enumerate(test_loader):\n", 323 | " x = torch.unsqueeze(x, dim=1)\n", 324 | "\n", 325 | " x, y, clas = x.to(device), y.to(device), clas.to(device)\n", 326 | " y_, reg_, alphas_, clas_ = model(x)\n", 327 | "\n", 328 | " loss_clas = F.binary_cross_entropy(clas_, clas)\n", 329 | " loss_out = F.mse_loss(y_, y)\n", 330 | " loss = loss_out + loss_clas\n", 331 | " err = error(y, y_)\n", 332 | "\n", 333 | " loss_, err_ = loss.item(), err.item()\n", 334 | " losses.append(loss_)\n", 335 | " errs.append(err_)\n", 336 | "\n", 337 | " if idx % 500 == 0:\n", 338 | " print(f\"eval batch={idx+1} loss={loss:.2f} err={err:.2f}\")\n", 339 | " if plotfilename:\n", 340 | " filename = plotfilename + f\".{idx}.attention.png\"\n", 341 | " plot_window(\n", 342 | " x.cpu(),\n", 343 | " y.cpu(),\n", 344 | " y_.cpu(),\n", 345 | " reg_.cpu(),\n", 346 | " clas_.cpu(),\n", 347 | " alphas_.cpu(),\n", 348 | " loss_,\n", 349 | " err_,\n", 350 | " filename,\n", 351 | " )\n", 352 | "\n", 353 | " return np.mean(losses), np.mean(errs)\n" 354 | ], 355 | "execution_count": null, 356 | "outputs": [] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": { 361 | "id": "BU6kw9VqeKZu" 362 | }, 363 | "source": [ 364 | "###Train model" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "metadata": { 370 | "id": "E-HHDS8deRLK" 371 | }, 372 | "source": [ 373 | "def train_model(datapath, output, appliance, hparams, doplot=None, load=True):\n", 374 | " buildings = appliance[\"buildings\"][\"train\"]\n", 375 | " name = appliance[\"name\"]\n", 376 | " params = appliance[\"hparams\"]\n", 377 | " record_err = np.inf\n", 378 | "\n", 379 | " active_threshold = appliance.get(\"active_threshold\", 0.15)\n", 380 | " active_ratio = appliance.get(\"active_ratio\", 0.5)\n", 381 | " active_oversample = appliance.get(\"active_oversample\", 2)\n", 382 | "\n", 383 | " my_dataset = InMemoryKoreaDataset(\n", 384 | " datapath,\n", 385 | " buildings,\n", 386 | " name,\n", 387 | " windowsize=params[\"L\"],\n", 388 | " active_threshold=active_threshold,\n", 389 | " active_ratio=active_ratio,\n", 390 | " active_oversample=active_oversample,\n", 391 | " )\n", 392 | "\n", 393 | " total_size = len(my_dataset)\n", 394 | " train_size = int(hparams[\"train_size\"] * (total_size))\n", 395 | " eval_size = total_size - train_size\n", 396 | "\n", 397 | " print(\"============= DATASET =============\")\n", 398 | " print(f\"Total size: {total_size}\".format(total_size))\n", 399 | " print(f\"Train size: {train_size}\".format(train_size))\n", 400 | " print(f\"Eval size: {eval_size}\".format(eval_size))\n", 401 | " print(\"===================================\")\n", 402 | "\n", 403 | " train_dataset, eval_dataset = torch.utils.data.random_split(\n", 404 | " my_dataset, (train_size, eval_size)\n", 405 | " )\n", 406 | "\n", 407 | " filename = os.path.join(output, \"dataset.pt\")\n", 408 | " save_dataset(train_dataset, eval_dataset, filename)\n", 409 | "\n", 410 | " train_loader = torch.utils.data.DataLoader(\n", 411 | " train_dataset, batch_size=hparams[\"batch_size\"], shuffle=True\n", 412 | " )\n", 413 | " eval_loader = torch.utils.data.DataLoader(\n", 414 | " eval_dataset, batch_size=hparams[\"batch_size\"]\n", 415 | " )\n", 416 | "\n", 417 | " model = Model(params[\"L\"], params[\"F\"], params[\"K\"], params[\"H\"])\n", 418 | " model = model.to(device)\n", 419 | "\n", 420 | " optimizer = optim.Adam(model.parameters(), hparams[\"lr\"])\n", 421 | " # scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)\n", 422 | "\n", 423 | " if load:\n", 424 | " filename = os.path.join(output, appliance[\"filename\"])\n", 425 | " record_err = load_model(filename, model, optimizer)\n", 426 | "\n", 427 | " results = []\n", 428 | " for epoch in range(hparams[\"epochs\"]):\n", 429 | " filename = os.path.join(output, appliance[\"filename\"] + str(epoch))\n", 430 | "\n", 431 | " plotfilename = None\n", 432 | " if doplot:\n", 433 | " plotfilename = filename\n", 434 | "\n", 435 | " err_ = None\n", 436 | " try:\n", 437 | " train_single_epoch(\n", 438 | " epoch, model, train_loader, optimizer, eval_loader, plotfilename)\n", 439 | "\n", 440 | " loss_, err_ = eval_single_epoch(model, eval_loader)\n", 441 | " print(\"==========================================\")\n", 442 | " print(f\"eval loss={loss:.2f} err={err:.2f}\")\n", 443 | " print(\"==========================================\")\n", 444 | "\n", 445 | " # tune.report(eval_loss=loss_)\n", 446 | " results.append([(epoch, loss, err), (epoch, loss_, err_)])\n", 447 | "\n", 448 | " if err_ < record_err:\n", 449 | " filename = os.path.join(output, appliance[\"filename\"])\n", 450 | " save(model, optimizer, filename, err_)\n", 451 | " record_err = err_\n", 452 | " except Exception as e:\n", 453 | " print(e)\n", 454 | "\n", 455 | " # scheduler.step()\n", 456 | " summary(output, results)\n", 457 | "\n", 458 | " return model\n", 459 | "\n", 460 | "\n", 461 | "def train_model_wrapper(config):\n", 462 | " datapath = config[\"datapath\"]\n", 463 | " output = config[\"output\"]\n", 464 | " appliance = config[\"appliance\"]\n", 465 | " hparams = config[\"hparams\"]\n", 466 | " doplot = config[\"doplot\"]\n", 467 | " load = config[\"load\"]\n", 468 | " tune_hparams = config[\"tune\"]\n", 469 | "\n", 470 | " appliance[\"hparams\"][\"F\"] = tune_hparams[\"F\"]\n", 471 | " appliance[\"hparams\"][\"K\"] = tune_hparams[\"K\"]\n", 472 | " appliance[\"hparams\"][\"H\"] = tune_hparams[\"H\"]\n", 473 | "\n", 474 | " return train_model(datapath, output, appliance, hparams, doplot, load)\n" 475 | ], 476 | "execution_count": null, 477 | "outputs": [] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": { 482 | "id": "gZn2iwRCeSly" 483 | }, 484 | "source": [ 485 | "###Test model" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "metadata": { 491 | "id": "jlQkkLBZenJ3" 492 | }, 493 | "source": [ 494 | "def test_model(datapath, output, appliance, hparams, doplot=None):\n", 495 | " buildings = appliance[\"buildings\"][\"test\"]\n", 496 | " name = appliance[\"name\"]\n", 497 | " params = appliance[\"hparams\"]\n", 498 | "\n", 499 | " filename = os.path.join(output, appliance[\"filename\"])\n", 500 | " plotfilename = None\n", 501 | " if doplot:\n", 502 | " plotfilename = filename\n", 503 | "\n", 504 | " active_threshold = appliance.get(\"active_threshold\", 0.15)\n", 505 | " active_ratio = appliance.get(\"active_ratio\", 0.5)\n", 506 | " active_oversample = appliance.get(\"active_oversample\", 2)\n", 507 | "\n", 508 | " my_dataset = InMemoryKoreaDataset(\n", 509 | " datapath,\n", 510 | " buildings,\n", 511 | " name,\n", 512 | " windowsize=params[\"L\"],\n", 513 | " active_threshold=active_threshold,\n", 514 | " active_ratio=active_ratio,\n", 515 | " active_oversample=active_oversample,\n", 516 | " )\n", 517 | "\n", 518 | " my_dataset, _ = torch.utils.data.random_split(my_dataset, (len(my_dataset), 0))\n", 519 | "\n", 520 | " test_loader = torch.utils.data.DataLoader(\n", 521 | " my_dataset, batch_size=hparams[\"batch_size\"]\n", 522 | " )\n", 523 | "\n", 524 | " model = Model(params[\"L\"], params[\"F\"], params[\"K\"], params[\"H\"])\n", 525 | " model = model.to(device)\n", 526 | "\n", 527 | " name = appliance[\"name\"]\n", 528 | " filename = os.path.join(output, appliance[\"filename\"])\n", 529 | " load_model(filename, model)\n", 530 | "\n", 531 | " output = os.path.join(output, f\"{name}\")\n", 532 | " loss, err = test_single(model, test_loader, appliance, plotfilename)\n", 533 | " print(f\"Test loss={loss:.2f} err={err:.2f}\")" 534 | ], 535 | "execution_count": null, 536 | "outputs": [] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": { 541 | "id": "oSqH0LIkevF9" 542 | }, 543 | "source": [ 544 | "###Save/load" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "metadata": { 550 | "id": "cPLte57VevoJ" 551 | }, 552 | "source": [ 553 | "def save(model, optimizer, file_name_model, error):\n", 554 | " print(\"Guardant...\")\n", 555 | " torch.save(\n", 556 | " {\n", 557 | " \"error\": error,\n", 558 | " \"model_state_dict\": model.state_dict(),\n", 559 | " \"optimizer_state_dict\": optimizer.state_dict(),\n", 560 | " },\n", 561 | " file_name_model,\n", 562 | " )\n", 563 | " print(\"Model guardat!\")\n", 564 | "\n", 565 | "\n", 566 | "def save_dataset(train_, test_, filename):\n", 567 | " torch.save({\"train\": train_, \"test\": test_}, filename)\n", 568 | "\n", 569 | "\n", 570 | "def load_model(file_name_model, model, optimizer=None):\n", 571 | " print(\"Loading model...\")\n", 572 | " if torch.cuda.is_available():\n", 573 | " state = torch.load(file_name_model)\n", 574 | " else:\n", 575 | " state = torch.load(file_name_model, map_location=torch.device(\"cpu\"))\n", 576 | "\n", 577 | " model.load_state_dict(state[\"model_state_dict\"])\n", 578 | " error = state[\"error\"]\n", 579 | " print(\"Loaded model! Error rècord: {}\".format(error))\n", 580 | " if optimizer:\n", 581 | " optimizer.load_state_dict(state[\"optimizer_state_dict\"])\n", 582 | " return error" 583 | ], 584 | "execution_count": null, 585 | "outputs": [] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": { 590 | "id": "tS6GF_m8e064" 591 | }, 592 | "source": [ 593 | "###Plot" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "metadata": { 599 | "id": "em_QoB9qACcH" 600 | }, 601 | "source": [ 602 | "def plot(x, y, yhat, loss, err, filename):\n", 603 | " subplots = [221, 222, 223, 224]\n", 604 | " plt.figure(1, figsize=(10, 8))\n", 605 | " plt.subplots_adjust(top=0.88)\n", 606 | " for i in range(4):\n", 607 | " x_, y_, yhat_ = (\n", 608 | " x.detach().numpy()[i][0],\n", 609 | " y.detach().numpy()[i],\n", 610 | " yhat.detach().numpy()[i],\n", 611 | " )\n", 612 | " plt.subplot(subplots[i])\n", 613 | " plt.plot(range(len(x_)), x_, color=\"b\", label=\"x\")\n", 614 | " plt.plot(range(len(y_)), y_, color=\"g\", label=\"y\")\n", 615 | " plt.plot(range(len(yhat_)), yhat_, color=\"r\", label=\"yhat\")\n", 616 | "\n", 617 | " plt.suptitle(f\"loss {loss:.2f} error {err:.2f}\")\n", 618 | " plt.legend()\n", 619 | " plt.tight_layout()\n", 620 | " plt.savefig(filename)\n", 621 | " plt.clf()\n", 622 | "\n", 623 | "\n", 624 | "# def plot_window(x, y, yhat, alphas, loss, err, filename):\n", 625 | "# # Naive plot window\n", 626 | "# subplt_x = 4\n", 627 | "# subplt_y = 4\n", 628 | "# plt.figure(1, figsize=(20, 16))\n", 629 | "# plt.subplots_adjust(top=0.88)\n", 630 | "#\n", 631 | "# idxs = np.random.randint(len(x), size=(subplt_x * subplt_y))\n", 632 | "# for i, idx in enumerate(idxs):\n", 633 | "# x_, y_, yhat_ = (\n", 634 | "# x.detach().numpy()[idx][0],\n", 635 | "# y.detach().numpy()[idx],\n", 636 | "# yhat.detach().numpy()[idx],\n", 637 | "# )\n", 638 | "# alphas_ = alphas.detach().numpy()[idx].flatten()\n", 639 | "# ax1 = plt.subplot(subplt_x, subplt_y, i + 1)\n", 640 | "# ax2 = ax1.twinx()\n", 641 | "# ax1.plot(range(len(x_)), x_, color=\"b\", label=\"x\")\n", 642 | "# ax1.plot(range(len(y_)), y_, color=\"r\", label=\"y\")\n", 643 | "# ax1.plot(range(len(yhat_)), yhat_, color=\"orange\", label=\"yhat\")\n", 644 | "# ax2.fill_between(\n", 645 | "# range(len(alphas_)), alphas_, alpha=0.5, color=\"lightgrey\", label=\"alpha\"\n", 646 | "# )\n", 647 | "#\n", 648 | "# plt.suptitle(f\"loss {loss:.2f} error {err:.2f}\")\n", 649 | "# ax1.legend()\n", 650 | "# ax2.legend()\n", 651 | "# plt.legend()\n", 652 | "# plt.tight_layout()\n", 653 | "# plt.savefig(filename)\n", 654 | "# plt.clf()\n", 655 | "\n", 656 | "\n", 657 | "def plot_window(x, y, yhat, reg, clas, alphas, loss, err, filename):\n", 658 | " subplt_x = 4\n", 659 | " subplt_y = 4\n", 660 | " plt.figure(1, figsize=(20, 16))\n", 661 | " plt.subplots_adjust(top=0.88)\n", 662 | "\n", 663 | " idxs = np.random.randint(len(x), size=(subplt_x * subplt_y))\n", 664 | " for i, idx in enumerate(idxs):\n", 665 | " x_, y_, yhat_, reg_, clas_ = (\n", 666 | " x.detach().numpy()[idx][0],\n", 667 | " y.detach().numpy()[idx],\n", 668 | " yhat.detach().numpy()[idx],\n", 669 | " reg.detach().numpy()[idx],\n", 670 | " clas.detach().numpy()[idx],\n", 671 | " )\n", 672 | " alphas_ = alphas.detach().numpy()[idx].flatten()\n", 673 | " ax1 = plt.subplot(subplt_x, subplt_y, i + 1)\n", 674 | " ax2 = ax1.twinx()\n", 675 | " ax1.plot(range(len(x_)), x_, color=\"b\", label=\"x\")\n", 676 | " ax1.plot(range(len(y_)), y_, color=\"r\", label=\"y\")\n", 677 | " ax1.plot(range(len(reg_)), reg_, color=\"black\", label=\"reg\")\n", 678 | " ax1.plot(range(len(yhat_)), yhat_, alpha=0.5, color=\"orange\", label=\"yhat\")\n", 679 | " ax2.fill_between(\n", 680 | " range(len(alphas_)), alphas_, alpha=0.5, color=\"lightgrey\", label=\"alpha\"\n", 681 | " )\n", 682 | " alphas_max = np.max(alphas_)\n", 683 | " ax2.plot(\n", 684 | " range(len(clas_)), clas_ * alphas_max, color=\"cyan\", alpha=0.25, label=\"reg\"\n", 685 | " )\n", 686 | "\n", 687 | " plt.suptitle(f\"loss {loss:.2f} error {err:.2f}\")\n", 688 | " ax1.legend()\n", 689 | " ax2.legend()\n", 690 | " plt.legend()\n", 691 | " plt.tight_layout()\n", 692 | " plt.savefig(filename)\n", 693 | " plt.clf()\n", 694 | "\n", 695 | "\n", 696 | "def summary(path, results):\n", 697 | " df = pd.DataFrame(\n", 698 | " [\n", 699 | " {\n", 700 | " \"epoch\": x[0][0],\n", 701 | " \"train_loss\": x[0][1],\n", 702 | " \"train_err\": x[0][2],\n", 703 | " \"eval_loss\": x[1][1],\n", 704 | " \"eval_err\": x[1][2],\n", 705 | " }\n", 706 | " for x in results\n", 707 | " ]\n", 708 | " ).set_index(\"epoch\")\n", 709 | "\n", 710 | " columns = [\"train_loss\", \"eval_loss\"]\n", 711 | " filename = os.path.join(path, \"results-loss.csv\")\n", 712 | " df[columns].round(3).to_csv(filename, sep=\";\")\n", 713 | " filename = os.path.join(path, \"results-loss.png\")\n", 714 | "\n", 715 | " plt.figure(1, figsize=(10, 8))\n", 716 | " df[columns].round(3).plot()\n", 717 | " plt.savefig(filename)\n", 718 | " plt.clf()\n", 719 | "\n", 720 | " columns = [\"train_err\", \"eval_err\"]\n", 721 | " filename = os.path.join(path, \"results-error.csv\")\n", 722 | " df[columns].round(3).to_csv(filename, sep=\";\")\n", 723 | " filename = os.path.join(path, \"results-error.png\")\n", 724 | "\n", 725 | " plt.figure(1, figsize=(10, 8))\n", 726 | " df[columns].round(3).plot()\n", 727 | " plt.savefig(filename)\n", 728 | " plt.clf()" 729 | ], 730 | "execution_count": null, 731 | "outputs": [] 732 | }, 733 | { 734 | "cell_type": "markdown", 735 | "metadata": { 736 | "id": "jl__05W0e6lQ" 737 | }, 738 | "source": [ 739 | "###Main" 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "metadata": { 745 | "id": "AEbuoiWHACfL" 746 | }, 747 | "source": [ 748 | "def main(args):\n", 749 | "\n", 750 | " if args[\"disable_random\"]:\n", 751 | " torch.manual_seed(7)\n", 752 | "\n", 753 | " train = args[\"train\"]\n", 754 | " tune_enabled = args[\"tune\"]\n", 755 | " output = args[\"path\"]\n", 756 | " plot_disabled = args[\"disable_plot\"]\n", 757 | "\n", 758 | " settings = load_yaml(args[\"settings\"])\n", 759 | " appliance = args[\"appliance\"]\n", 760 | "\n", 761 | " dataset = settings[\"dataset\"]\n", 762 | " hparams = settings[\"hparams\"]\n", 763 | " if args[\"epochs\"]:\n", 764 | " hparams[\"epochs\"] = int(args[\"epochs\"])\n", 765 | "\n", 766 | " appliance = settings[\"appliances\"][appliance]\n", 767 | "\n", 768 | " datapath = dataset[\"path\"]\n", 769 | " if train:\n", 770 | " print(\"==========================================\")\n", 771 | " print(f\"Training ONGOING\")\n", 772 | " print(\"==========================================\")\n", 773 | "\n", 774 | " if not tune_enabled:\n", 775 | " my_model = train_model(\n", 776 | " datapath,\n", 777 | " output,\n", 778 | " appliance,\n", 779 | " hparams,\n", 780 | " doplot=not plot_disabled,\n", 781 | " load=True\n", 782 | " )\n", 783 | " else:\n", 784 | " config = {\n", 785 | " \"datapath\": datapath,\n", 786 | " \"output\": output,\n", 787 | " \"appliance\": appliance,\n", 788 | " \"hparams\": hparams,\n", 789 | " \"doplot\": not plot_disabled,\n", 790 | " \"load\": False,\n", 791 | " \"tune\": {\n", 792 | " \"F\": tune.grid_search([16, 32, 64]),\n", 793 | " \"K\": tune.grid_search([4, 8, 16]),\n", 794 | " \"H\": tune.grid_search([256, 512, 1024]),\n", 795 | " },\n", 796 | " }\n", 797 | " analysis = tune.run(\n", 798 | " train_model_wrapper,\n", 799 | " metric=\"val_loss\",\n", 800 | " mode=\"min\",\n", 801 | " num_samples=5,\n", 802 | " config=config,\n", 803 | " )\n", 804 | " print(\"==========================================\")\n", 805 | " print(f\"Best hyperparameters\")\n", 806 | " print(analysis.best_config)\n", 807 | " print(\"==========================================\")\n", 808 | "\n", 809 | " print(\"==========================================\")\n", 810 | " print(f\"Training DONE\")\n", 811 | " print(\"==========================================\")\n", 812 | " else:\n", 813 | " print(\"==========================================\")\n", 814 | " print(f\"Testing ONGOING\")\n", 815 | " print(\"==========================================\")\n", 816 | " test_model(datapath, output, appliance, hparams, doplot=not plot_disabled)\n", 817 | " print(\"==========================================\")\n", 818 | " print(f\"Testing DONE\")\n", 819 | " print(\"==========================================\")" 820 | ], 821 | "execution_count": null, 822 | "outputs": [] 823 | }, 824 | { 825 | "cell_type": "code", 826 | "metadata": { 827 | "colab": { 828 | "base_uri": "https://localhost:8080/" 829 | }, 830 | "id": "G-Hn1v_tAfzc", 831 | "outputId": "ea1ff142-db1f-4ee6-d1b1-fd973330e8a9" 832 | }, 833 | "source": [ 834 | "from google.colab import drive\n", 835 | "drive.mount('/content/gdrive')" 836 | ], 837 | "execution_count": null, 838 | "outputs": [ 839 | { 840 | "output_type": "stream", 841 | "text": [ 842 | "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n" 843 | ], 844 | "name": "stdout" 845 | } 846 | ] 847 | }, 848 | { 849 | "cell_type": "markdown", 850 | "metadata": { 851 | "id": "7usI-cKSHV6o" 852 | }, 853 | "source": [ 854 | "### Args i execució" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "metadata": { 860 | "id": "LTC0PxuvACjM" 861 | }, 862 | "source": [ 863 | "args = {\"settings\": \"/content/gdrive/MyDrive/ColabNotebooks/settings.yaml\", \"appliance\": \"microwave\",\n", 864 | " \"path\": \"/content/gdrive/MyDrive/ColabNotebooks/microwave_out\", \"train\": True, \"epochs\": 1,\n", 865 | " \"disable_random\": True, \"tune\": False, \"disable_plot\": False}\n", 866 | "\n", 867 | "main(args)" 868 | ], 869 | "execution_count": null, 870 | "outputs": [] 871 | }, 872 | { 873 | "cell_type": "code", 874 | "metadata": { 875 | "id": "2350M3EyQBQE" 876 | }, 877 | "source": [ 878 | "" 879 | ], 880 | "execution_count": null, 881 | "outputs": [] 882 | } 883 | ] 884 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Non-intrusive Load Disaggregation 2 | ## Introduction 3 | 4 | - Motivation and goals 5 | 6 | Climate change is one of the greatest challenges facing humanity, and machine learning approaches are a great solution to tackle this problem. In 2019, a group of machine learning experts developed a paper called "Tackling Climate Change with Machine Learning" [[1]](#1) focused on impactful uses of machine learning in reducing and responding to climate change challenges. 7 | 8 | One of the main domains of the many propositions is "Buildings and cities" and in more deep how to "optimize buildings energy consumption". The paper states "_while the energy consumed in buildings is responsible for a quarter of global energy-related emissions, a combination of easy-to-implement fixes and state-of-the-art strategies could reduce emissions for existing buildings by up to 90%_". This statement caught our attention to start this project. Find an optimization model to control and therefore optimize energy consumption in buildings. 9 | 10 | After extensive research, we decided to focus our study on Non-Intrusive Load Monitoring (NILM). NILM is the task of estimating the power demand of different appliances in a building given an aggregate power demand signal recorded by a single electric meter monitoring multiple appliances. 11 | 12 | Neural NILM is a non-linear regression problem that consists of training a neural network for each appliance in order to predict a time window of the appliance load given the corresponding time window of aggregated data. 13 | 14 | We adopted the "Non-Intrusive Load Monitoring with an Attention-based Deep Neural Network" [[2]](#2) paper developed by University of Rome Tor Vergata researchers, to be our Reference Paper. Other approaches to Neura NILM are presented in "Non-intrusive load disaggregation solutions for very low-rate smart meter data." [[3]](#3) and "Sequence-to-point learning with neural networks for non-intrusive load monitoring" [[4]](#4). 15 | 16 | - Dataset 17 | 18 | As to the dataset used, we selected the real-world dataset "the Reference Energy Disaggregation Data Set (REDD)" [[5]](#5). This dataset is one of the reference datasets used in NILM Reference Paper and contains data for six different houses from the USA. The data is collected at 1 second sampling period for the aggregate power consumption and 3 seconds for the appliance power consumption. The appliances used are the following: 19 | oven, refrigerator, dishwasher, kitchen_outlets, microwave, bathroom_outlet, lighting, washer_dryer, electric_heater, stove, disposal, electronics, furance, smoke_alarms, air_conditioner. 20 | Thus, in our model, we consider three appliances: dishwasher (DW), microwave (MW), and refrigerator (FR). These appliances are the same as the ones used in the Reference Paper to reach the same results. 21 | 22 | Dataset split 23 | 24 | The dataset is split using houses 2,3,4,5,6 to build the training set and house 1 as the test set. 25 | 26 | Captura de pantalla 2021-04-16 a las 13 41 36 27 | 28 | The actual dataset of our model is a combination of two datasets. 29 | - We found a deep learning research team from Seoul National University that had a pre-processed dataset that cleaned the data (see pre-processing section bellow) of the REDD dataset given by the Reference Paper. This dataset is used in their "Subtask Gated Networks for Non-Intrusive Load Monitoring paper" [[6]](#6). 30 | - On the other hand, in the REDD dataset there is a high active/inactive windows imbalance. This irregularity is observed especially in the case of the dishwasher and the microwave. As it is expected, due to the use of these appliances, much of the time a dishwasher and a microwave are not being used. Therefore there is a high overrepresentation of inactive windows. We implemented an oversampling process described in the pre-processing section (see below) to solve the problem. 31 | 32 | ## System architecture 33 | 34 | ### Preprocessing 35 | 36 | Initial project implementation was done using raw REDD dataset and it was necessary to pre-process the data as described in "Subtask gated networks for non-intrusive load monitoring" [[6]](#6), see details: 37 | 38 | 1. Data alignment. Align multiple time series with different acquisition frequencies. 39 | 2. Data imputation. Split the sequence so that the duration of missing values in subsequence is less than 20 seconds. Then fill the missing values in each subsequence by a backward filling method. 40 | 3. Data filtering. Only use the subsequences with more than one day duration 41 | 4. Generate sliding windows. Using sliding window over the aggregated signal with hop size equal to 1 sample 42 | 43 | Once authors from Seoul National University provided us the same dataset as the Reference Paper we disabled our data pre-processing. The main reason was to assure the same input data as the original paper to have the same, or similar, results. 44 | 45 | Oversampling is used to solve the problem of overrepresentation of inactive windows and the irregulatity of the active/innactive windows imbalance (described in the Dataset section). The process consist in replicating randomly picked active windows in each of the appliances to obtain a 50% - 50% class balance. The ratio between active/inactive windows is configurable in settings. 46 | 47 | After implementing oversampling the number of windows used for train, eval and test are listed below: 48 | 49 | 50 | | Appliance | Nº buildings train | Nº windows train | Nº windows eval | Nº buildings test | 51 | |-----------|--------------------|------------------|-----------------|-------------------| 52 | | dishwasher| 5| 289163 | 123927 | 1| 53 | | fridge | 4| 613167 | 262787 | 1| 54 | | microwave | 3| 82922 | 35538 | 1| 55 | 56 | 57 | ### Model architectures 58 | 59 | We've implemented three different model architectures: 60 | 61 | - Regression and classification enabled 62 | - Only regression enabled. 63 | - Regression and classification using the attention results. 64 | 65 | ![image](https://user-images.githubusercontent.com/7881377/115058996-3394f980-9ee6-11eb-874f-92aceee6f2f1.png) 66 | 67 | 68 | #### Regression and classification enabled 69 | The designed architecture adopted to solve the NILM problem is based on a classical end-to-end regression network with its encoder-decoder components. Adding an attention mechanism in between the encoder and decoder. Apart from the main end-to-end regression network, an auxiliary end-to-end classification subnetwork is joined. 70 | 71 | Why an attention-based model? 72 | The attention-based model helps with the energy disaggregation task. It assigns importance, thought weights, to every position in the aggregated signal which after successful training, will correspond to a state change of the target appliance. The addition of an attention mechanism in the regression subnetwork will allow the model to focus on selected time steps or windows rather than on non-target appliances. 73 | The attention scores are the way to weigh the importance of every position in our input sequence to infer the disaggregated signal. To represent correctly these weights we made the output of the attention layer be a 1D vector with the length of a window sequence. 74 | 75 | Both subnetworks have a different objective: 76 | - Regression end-to-end network: allows the subnetwork to “implicitly detect and assign more importance to some events (e.g. turning on or off of the appliance) and to specific signal sections”. 77 | - Classification end-to-end network: helps the disaggregation process by enforcing explicitly the on/off states of the appliances. 78 | 79 | Both subnetwork outcomes are concatenated at the end to outcome the disaggregated consumption of the appliances. 80 | 81 | Captura de pantalla 2021-04-16 a las 17 51 33 82 | 83 | 84 | #### Only regression enabled 85 | This architecture consists of suppressing the classification subnetwork, that does not have an attention layer, from the model. The regression branch is kept as in the original network. 86 | 87 | #### Regression and classification using the attention results 88 | In this final model modification, the output of the attention layer is used to compute the result of the regression subnetwork (in all the models). In this architecture, we concatenate the output of the regression subnetwork with the output of the stack of convolutional layers, in the classification subnetwork. This concatenated vector is fed to the 2 fully connected layers on top of the classification branch. The expectations of this architecture's behavior are described in the Experiment 7 hypothesis. 89 | 90 | ### Train 91 | 92 | - Methodology. Model training is done using the whole pre-processed train dataset and batches of size 64 via data loader. At first, we set the epochs at 10 epochs, in most of the cases we founded enough to do an initial analysis of model response and performance. The common do_load -> do_predict -> calculate_loss -> update_optimizer train sequence is done per each of the train batches in each epoch. The common do_load -> do_predict -> calculate_loss validation sequence is done per each of the validation batches in each epoch. 93 | 94 | - Loss function. An aggregated loss function is used for the joint optimization of both regression and classification network: L=Lout+Lclas, where Lout is the Mean Squared Error (MSE) between the overall output of the network and the ground truth of a single appliance, and Lclas is the Binary Cross-Entropy (BCE) that measures the classification error of the on/off state for the classification subnetwork. 95 | 96 | 97 | ### Test 98 | 99 | - Methodology. Model testing is done over the whole preprocessed test dataset using batches of size 64 via a data loader. The common do_load -> do_predict -> calculate_error test sequence is done per each of the test batches. 100 | - Error metrics. MAE (Mean Absolute Error) is used to evaluate the performance of the neural network. MAE is calculated after applying the prediction postprocessing described in the Postprocessing section. These are the metrics used in the Reference paper and are used as benchmarking criteria between the different experiments described below. 101 | 102 | ### Postprocessing 103 | 104 | The disaggregation phase is carried out with a sliding window over the aggregated signal with a hop size equal to 1 sample. That's the reason why the model generates overlapped windows of the disaggregated signal. We reconstruct the overlapped windows employing a median filter on the overlapped portion. 105 | 106 | ## Experiments 107 | 108 | The main goals of the experiments are: 109 | 110 | - Learn how to implement and deploy a DL system on a commercial cloud computing platform 111 | - Understand and interpret the current NILM neural network described in the paper 112 | - Understand which is the task of regression branch 113 | - Understand which is the task of classification branch 114 | - Understand which is the task of attention 115 | 116 | We proposed the three main architecture modifications evaluated in the experiments during the analysis of the reference paper. The experiments were not designed 117 | sequentially after processing the results of the previous experiment. 118 | 119 | Main architecture modifications: 120 | 121 | - Paper architecture - Regression and classification enabled 122 | - Paper modification 1 - Only regression enabled 123 | - Paper modification 2 - Regression and classification using the attention results 124 | 125 | We initially explored the data to have a first picture of the type and the amount of data available. We realized there was a high active/inactive windows imbalance in the case of dishwasher and microwave (as explained in the Dataset explanation). There would be enough total amount of windows to train the model, but not enough specific active windows to prevent a biased model. If no oversample was done the model would mainly predict null demand in inactive windows, which would be correct, but would fail to predict non-null demand inactive windows. Although disaggregation is a regression problem, this would be similar to high specificity and low sensitivity in an active/inactive appliance classification problem. 126 | 127 | ### Neural network response charts 128 | 129 | We generate charts with time series describing the response of the neural network in train, eval, and test. These charts are used to visualize and interpret the response of both whole and specific parts of the network. The main parts of interest are regression, classification, and attention. In most of the charts, the available time series are: 130 | 131 | - Building consumption. Aggregated consumption of the building. Used as input of the neural network 132 | - Predicted appliance consumption. Disaggregated appliance consumption predicted by the neural network 133 | - Real appliance consumption. Real applianced consumption obtained from the meter 134 | - Classification branch output. Prediction of the classification branch 135 | - Regression branch output. Prediction of the regression branch 136 | - Attention score. Describes the zone of interest for attention to improve regression 137 | 138 | 139 | ![image](https://user-images.githubusercontent.com/7881377/115200957-141feb80-a0f5-11eb-82ca-81249810fe0b.png) 140 | 141 | 142 | All the consumption time series are referenced to the left-Y axis. Classification and attention are referenced to the right-Y-axis. In both cases, there's a rescaling in some prediction results to make all of them fit in a single chart (ie. classification prediction is scaled to nearly maximum consumption, ...). In the report, there're two train and two test sample charts per each of the experiments and appliance to visualize the response and support conclusions. 143 | 144 | Interpretation of the charts focuses in: 145 | - Performance. Comparing real vs predicted series it's possible to identify the performance of the model 146 | - Characterization of the error. Comparing real vs predicted series it's possible to identify error specific patterns (peaks, plateaus, etc) 147 | - Correlation of the error with aggregated demand. Comparing error vs aggregated building consumption it's possible to identify the response of the model to crowded scenarios (multiple appliances) and single scenario (single appliance). It's also possible to identify the response of the model with different kinds of appliances, with different consumption patterns, running simultaneously. 148 | - Contribution of each of the branches. Analyzing the output of the branches is possible to identify the contribution of each of the branches to the prediction. It's possible to identify the objective of each branch and also its performance 149 | - Focus of attention. Analyzing the attention output it's possible to identify which parts of the window are important to the regression output. The attention can be used to: 150 | - Identify whether the important parts vary in the different scenarios. Maybe there is a scenario in which there are different appliances ON or there is a scenario with just one appliance that is consuming a lot is being used ON. The attention will help differenciate this two situations. 151 | - Identify whether there're specific important parts or the importance is homogeneous along with the window 152 | - Identify whether important parts are described in the appliance itself or the neighborhood. 153 | - Identify characteristic of important parts such as peaks, plateaus, etc. 154 | 155 | 156 | ## Paper architecture - Regression and classification enabled 157 | 158 | ### Experiment 1. Paper 159 | 160 | #### Hypothesis 161 | 162 | The regression subnetwork infers the power consumption, whereas the classification subnetwork focuses on the binary classification of the appliance state (on/off). The attention mechanism improves the representational power of the network to identify the positions in the aggregated input sequence with useful information to identify appliance-specific patterns. 163 | 164 | Additional group hypothesis: 165 | 166 | Specific appliance patterns are described by state changes and state duration which are related to the operating regime of the internal electricity consumption components. The operating regime of the internal components depends on multiple factors: 167 | 168 | - Appliance operating mode. 169 | - User-selected modes of operation. There're appliances with a small number of user modes (fridge, dishwasher) and appliances with a mid number of user modes (microwave). The higher number of user modes is the higher number of different patterns that can be described by the neural network. 170 | - Cycle duration. There're appliances with small duration time cycles describing the pattern per operating mode, such as the fridge and the microwave, and appliances with high duration time cycles, such as the dishwasher. The longer the cycle duration is, the more difficult it will be to describe the behavior of the pattern as the input sequence windows are longer. 171 | - Environmental factors (temperature, etc). There're appliances with dependencies to external variables like environmental factors. In this specific model, there's a high dependency on temperature on the fridge and lower dependency on the microwave and dishwasher. Weather dependency adds stochasticity to the system and consequently, complexity to the model. 172 | - Internal components demand. The main electricity consuming components are: 173 | - Heating/cooling. There's weather dependency load demand adds stochasticity to the system, hence complexity to the model. 174 | - Motors. Load demand is mainly related to the user mode and to the component internal operating regime. 175 | 176 | #### Experiment setup 177 | 178 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods: 179 | 180 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| 181 | |-----------|------------|----------------|----------------|-----------------------------| 182 | | dishwasher| TRUE | TRUE | FALSE | FALSE | 183 | | fridge | TRUE | TRUE | FALSE | FALSE | 184 | | microwave | TRUE | TRUE | FALSE | FALSE | 185 | 186 | #### Results 187 | 188 | See attached train vs loss curve to diagnose performance: 189 | 190 | ![image](https://user-images.githubusercontent.com/7881377/114513315-59967180-9c3a-11eb-974e-5e3d5eccacb8.png) 191 | 192 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation: 193 | 194 | ![image](https://user-images.githubusercontent.com/7881377/114427700-a4bd6f80-9bbb-11eb-9936-c94c54ea2a0e.png) 195 | ![image](https://user-images.githubusercontent.com/7881377/114305400-58e5ca00-9ad8-11eb-87e0-08fe03fc2ed2.png) 196 | 197 | Captura de pantalla 2021-04-16 a las 20 25 23 198 | 199 | See obtained error (previously introduced in error metrics section and extra training information): 200 | 201 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 202 | |------------|-------|----------|---------------| 203 | |dishwasher |28.25 |4 | 15 | 204 | |fridge |26.75 |4 | 25 | 205 | |microwave |31.47 |4 | 1.23 | 206 | 207 | #### Conclusions 208 | 209 | As was described in the hypothesis the main goal of the regression branch is 210 | predicting the maximum expected demand of the appliance. As was also expected 211 | the classification branch is modulating the regression results to match 212 | the appliance load pattern. Classification has high specificity and low sensitivity. 213 | 214 | In both cases, train and eval have good results but have 215 | less accurate results in test. Our hypothesis is that 216 | model does not generalize well due to the small number and variance of appliance patterns of the different train buildings. 217 | 218 | See samples of dishwasher consumption per building: 219 | 220 | ![image](https://user-images.githubusercontent.com/7881377/114833109-862fc200-9dcf-11eb-8306-87bc1b769e80.png) 221 | 222 | The classification network is in charge of modeling the patterns. As seen in the results, it is less accurate in the steady-state sections than expected. Hence, the instability, and in some cases, the high sensitive response is also related to the overrepresentation issue. 223 | 224 | In most cases, increasing the number of acquisition samples would not be a good solution to fix the instability issue as there would be more active windows but the same pattern. That's the case of appliances with components that do not depend on environmental factors (temperatures, etc) like microwave or dishwasher. In the case of appliances with environmental factors, it would help to have also samples from different seasons. We implemented oversampling but it's similar to increasing the number of samples from the same appliance rather than new ones. 225 | 226 | There's no more data available rather than the public dataset. As a solution, data augmentation can not be easily implemented due to the lack of a database of appliance loads. In this case, it makes no sense to create synthetic aggregated scenarios mixing appliances from different buildings because they're already mixed in the training dataset and properly predicted in eval. In the classification branch, we hypothesize that in some cases adding noise would help to decrease high sensitive responses. 227 | 228 | Attention in appliances with a high simultaneity factor(\*) focus mainly on state changes in the appliance, like switch on/switch off or high consuming components of the appliance. Also, it focuses on state duration. That would be the case of dishwasher or 229 | microwave. Attention in appliance with low 230 | simultaneity factor also focus in other sections of the windows out of the 231 | active section. That would be the case of the fridge. Our hypothesis is that in the case of high simultaneity factor 232 | scenarios, attention focuses on appliance pattern, and in the case of low 233 | simultaneity factors it additionally focuses on the neighborhood. Attention would perform better to identify highly specialized and specific features in a consumption window. 234 | 235 | (\*) simultaneity factor describes the probability of an appliance 236 | to be active while other appliances are active. A large simultaneity factor 237 | means that the appliance is usually active while others are also active. 238 | 239 | Regarding the hypothesis on type of appliances: 240 | - The neural network can model the different operating modes in the appliances, even the ones with a high number of operating modes 241 | - The neural network can model both heating/cooling and motor components 242 | - There's no specific conclusion about the capacity to model weather dependency as both train and test datasets were acquired under similar environments (season, etc) 243 | 244 | ### Experiment 2 and 3. Paper with standarization 245 | 246 | Standardization can be used to rescale the testing samples to better 247 | describe relative patterns rather than absolute value consumptions. Standardization transforms features such that their mean (μ) equals 0 and standard deviation (σ) equals 1. The range of the new min and max values is determined by the standard deviation of the initial un-normalized feature. 248 | 249 | ![Suource: https://becominghuman.ai/feature-scaling-in-machine-learning-20dd93bb1bcb](https://user-images.githubusercontent.com/7881377/115056599-57a30b80-9ee3-11eb-8aee-87f5e5c96401.png) 250 | 251 | Standardization is achieved by Z-score Normalization. Z-score is given by: 252 | 253 | ![Source: https://becominghuman.ai/feature-scaling-in-machine-learning-20dd93bb1bcb](https://user-images.githubusercontent.com/7881377/115056913-aea8e080-9ee3-11eb-80bf-1e304274b3c5.png) 254 | 255 | TThe standardization process is done over the specific dataset in each specific experiment. 256 | 257 | Although the model of appliances in train and test are different in terms of absolute consumptions, relative step changes in standardized data can be similar. 258 | This is an approach to bypass overrepresentation in data. In this case, the mean and standard value used in training is calculated over the train dataset, and the mean and standard deviation in the test is calculated over test dataset. 259 | 260 | #### Experiment 2. Paper with standardization - Using calculated standardization in test 261 | 262 | ##### Hypothesis 263 | 264 | The main difference between Experiment 1 and Experiment 2 is the addition of the standardization in the dataset of the model as explained in the past paragraph. In this experiment we are standarizing the data of the train in respect to the train and the test in respect to the test. 265 | 266 | Captura de pantalla 2021-04-17 a las 12 49 29 267 | 268 | We hypothesize that we will have a better outcome than Experiment 1. 269 | 270 | ##### Experiment setup 271 | 272 | See details of the experiments below. Each of the columns describes an specific option of the previously introduced network architectures and pre/post-processing methods: 273 | 274 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| 275 | |-----------|------------|----------------|----------------|-----------------------------| 276 | | dishwasher| TRUE | TRUE | TRUE | TRUE | 277 | | fridge | TRUE | TRUE | TRUE | TRUE | 278 | | microwave | TRUE | TRUE | TRUE | TRUE | 279 | 280 | ##### Results 281 | 282 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation: 283 | 284 | ![image](https://user-images.githubusercontent.com/7881377/114307075-54241480-9ade-11eb-9063-4cdbcef3c081.png) 285 | ![image](https://user-images.githubusercontent.com/7881377/114307086-600fd680-9ade-11eb-921e-2803c2115931.png) 286 | ![image](https://user-images.githubusercontent.com/7881377/114307171-b67d1500-9ade-11eb-9b4a-ed3653b6c354.png) 287 | 288 | Captura de pantalla 2021-04-16 a las 20 25 23 289 | 290 | See obtained error previously introduced in error metrics section and extra training information: 291 | 292 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 293 | |------------|-------|----------|---------------| 294 | |dishwasher |46.98 |10 |40 | 295 | |fridge |52.17 |10 |55 | 296 | |microwave |31.16 |10 |2.25 | 297 | 298 | ##### Conclusions 299 | 300 | Our hypothesis is refuted as results are worse than without different standardization in train and test. To understand better why this happened we have calculated the standard deviation of the fridge for House 1 and House 2 to see if the values are within the same region of consumption. 301 | 302 | ![WhatsApp Image 2021-04-17 at 12 28 06](https://user-images.githubusercontent.com/71388892/115109869-ab136900-9f78-11eb-86d9-70b30a624e3e.jpeg) 303 | 304 | As it can be seen in this box diagram the consumptions of the fridge for house 1 and house 2 don't follow a similar distribution. Therefore, now it is understandable why the results of Experiment 2 are worse than Experiment 1. We cannot standardize within different values because their consumption don't follow a similar distribution. 305 | 306 | #### Experiment 3. Paper with standarization - Using training standardization in test 307 | 308 | ##### Hypothesis 309 | We wanted to do the opposite of Experiment 2 to see if the dataset with the standardization of the train and test with the train values gave a better outcome. 310 | The standarization in the Experiment 3 is done as the following: 311 | 312 | Captura de pantalla 2021-04-17 a las 12 49 21 313 | 314 | Better results than experiment 2 are expected although not necessarily better than experiment 1. 315 | 316 | ##### Experiment setup 317 | 318 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods: 319 | 320 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| 321 | |-----------|------------|----------------|----------------|-----------------------------| 322 | | dishwasher| TRUE | TRUE | TRUE | FALSE | 323 | | fridge | TRUE | TRUE | TRUE | FALSE | 324 | | microwave | TRUE | TRUE | TRUE | FALSE | 325 | 326 | 327 | ##### Results 328 | 329 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation: 330 | 331 | ![image](https://user-images.githubusercontent.com/7881377/114313306-585c2c00-9af6-11eb-9a89-514cb3949e5c.png) 332 | ![image](https://user-images.githubusercontent.com/7881377/114313316-627e2a80-9af6-11eb-8af0-a05e28fe9b4a.png) 333 | ![image](https://user-images.githubusercontent.com/7881377/114313336-6f028300-9af6-11eb-86fc-292ea240fcd8.png) 334 | 335 | Captura de pantalla 2021-04-16 a las 20 25 23 336 | 337 | 338 | See obtained error previously introduced in error metrics section and extra training information: 339 | 340 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 341 | |------------|-------|----------|---------------| 342 | |dishwasher |31.19 |10 |32 | 343 | |fridge |39.67 |10 |55 | 344 | |microwave |23.72 |10 |2.25 | 345 | 346 | ##### Conclusions 347 | 348 | Our hypothesis is supported as the results are better than with different standardization in train and test (as it is done in Experiment 2). In reference with the Experiment 1 we got also worst results, therefore we could conclude that in this situation and with this dataset standarizing the data is not recommended. 349 | 350 | ## Paper modification 1 - Only regression enabled 351 | 352 | ### Experiment 4. Only regression without standarization 353 | 354 | #### Hypothesis 355 | 356 | The main hypothesis of this experiment is whether attention can detect the consumption pattern and replace what in previous experiments was the classification branch by modulating the output of the regression branch. 357 | By extracting the classifier branch, the model prediction is expected to detect the peaks (with the help of attention) but may predict values with the biggest difference to the input consumption than with the classification branch. 358 | 359 | #### Experiment setup 360 | 361 | See details of the experiments below. Each of the columns describe an specific option of the previously introduced network architectures and pre/post processing methods: 362 | 363 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| 364 | |-----------|------------|----------------|----------------|-----------------------------| 365 | | dishwasher| TRUE | FALSE | FALSE | FALSE | 366 | | fridge | TRUE | FALSE | FALSE | FALSE | 367 | | microwave | TRUE | FALSE | FALSE | FALSE | 368 | 369 | #### Results 370 | 371 | See attached train vs loss curve to diagnose performance: 372 | 373 | ![image](https://user-images.githubusercontent.com/7881377/114513412-70d55f00-9c3a-11eb-935d-b5218f09eec3.png) 374 | 375 | 376 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation: 377 | 378 | ![image](https://user-images.githubusercontent.com/7881377/114315323-c7d61980-9afe-11eb-9e45-1e730fc4661c.png) 379 | ![image](https://user-images.githubusercontent.com/7881377/114315334-d45a7200-9afe-11eb-8454-eeee8d1d346d.png) 380 | ![image](https://user-images.githubusercontent.com/7881377/114315342-dfad9d80-9afe-11eb-9bf0-bc9b3b33f7fb.png) 381 | 382 | Captura de pantalla 2021-04-16 a las 20 25 23 383 | 384 | See obtained error previously introduced in error metrics section and extra training information: 385 | 386 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 387 | |------------|-------|----------|---------------| 388 | |dishwasher |24.79 |10 |36 | 389 | |fridge |29.86 |10 |50.9 | 390 | |microwave |22.56 |10 |1.9 | 391 | 392 | #### Conclusions 393 | 394 | The results of this experiment are worse than the original paper. Our set of experiments has the lowest mean absolute error (close to Experiment 1 and Experiment 6). 395 | The main hypothesis was that the attention would improve the performance. Hence, the results were better than expected. Attention learns how to focus on peaks of consumption (much better than in Experiment 1) and gives the model the ability to generalize better than what can be seen in Experiment 1. 396 | Without the classification branch, attention weights train better and the attention values are bigger than in Experiment 1. By not having the classification branch that modulates the regression output, the attention must learn the changes and focus on the significant changes (changes that in Experiment 1 were handled by classification). 397 | Attention in the fridge focus, on state changes (peaks and on-mode) and state duration. But in the microwave case, attention focuses mainly on the switch on. We conclude that after the peak the model expects a long-term change of consumption and in the microwave case it does not occur. That’s the main difference between microwave and fridge. This hypothesis cannot be applied in the dishwasher, because of the peaks of other appliances during the time it is on (that produces noise). 398 | Lastly, the regression is more sensitive to changes and allows to catch the pattern of the input smoothly. But only having the regression model subtracts the model from the specification. 399 | 400 | ### Experiment 5. Only regression with standardization (using calculated standardization in test) 401 | 402 | #### Hypothesis 403 | 404 | The main difference with Experiment 4 is the addition of the standardization in the dataset of the model. In this experiment, we are applying standardization in both training and test sets after splitting the data. We calculate the mean and std variables of the train and test set and apply it respectively. 405 | The hypothesis is that we will have a better outcome than Experiment 4 because both datasets will be standardized in the same way. 406 | 407 | #### Experiment setup 408 | 409 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods: 410 | 411 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| 412 | |-----------|------------|----------------|----------------|-----------------------------| 413 | | dishwasher| TRUE | FALSE | TRUE | TRUE | 414 | | fridge | TRUE | FALSE | TRUE | TRUE | 415 | | microwave | TRUE | FALSE | TRUE | TRUE | 416 | 417 | #### Results 418 | 419 | ![image](https://user-images.githubusercontent.com/7881377/114315937-9874dc00-9b01-11eb-9d68-e672eb296853.png) 420 | ![image](https://user-images.githubusercontent.com/7881377/114315949-a3c80780-9b01-11eb-903e-e302e6412179.png) 421 | ![image](https://user-images.githubusercontent.com/7881377/114316065-2bae1180-9b02-11eb-9e51-20be9ce25466.png) 422 | 423 | Captura de pantalla 2021-04-16 a las 20 25 23 424 | 425 | See obtained error previously introduced in error metrics section and extra training information: 426 | 427 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 428 | |------------|-------|----------|---------------| 429 | |dishwasher |38.78 |10 |36 | 430 | |fridge |36.38 |10 |51.1 | 431 | |microwave |23.92 |10 |2.2 | 432 | 433 | #### Conclusions 434 | 435 | Our hypothesis is not supported as results are worse (significantly in the dishwasher and fridge). 436 | These results must be produced because the properties (mean and standard deviation) are different in each dataset. So, we are applying different rescaling and making the difference bigger. 437 | 438 | ### Experiment 6. Only regression with standardization (using training standardization in test) 439 | 440 | #### Hypothesis 441 | This experiment combines the only regression architecture with the standardization technique. In this case, we are applying the standardization for the testing set in terms of the mean and standard deviation of the training set. We expect it to improve the results of Experiment 5 given the outcome of Experiments 2 and 3. 442 | 443 | #### Experiment setup 444 | 445 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods: 446 | 447 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| 448 | |-----------|------------|----------------|----------------|-----------------------------| 449 | | dishwasher| TRUE | FALSE | TRUE | FALSE | 450 | | fridge | TRUE | FALSE | TRUE | FALSE | 451 | | microwave | TRUE | FALSE | TRUE | FALSE | 452 | 453 | #### Results 454 | 455 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation: 456 | 457 | ![image](https://user-images.githubusercontent.com/7881377/114316647-b09a2a80-9b04-11eb-8a76-6c11f0f5f88d.png) 458 | ![image](https://user-images.githubusercontent.com/7881377/114316663-bee84680-9b04-11eb-8f75-f66db4c007da.png) 459 | ![image](https://user-images.githubusercontent.com/7881377/114450835-c9731080-9bd6-11eb-97aa-9cfd69912d1b.png) 460 | 461 | Captura de pantalla 2021-04-16 a las 20 25 23 462 | 463 | 464 | See obtained error previously introduced in error metrics section and extra training information: 465 | 466 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 467 | |------------|-------|----------|---------------| 468 | |dishwasher |26.37 |10 |36 | 469 | |fridge |29.96 |10 |51.1 | 470 | |microwave |20.1 |10 |2.2 | 471 | 472 | #### Conclusions 473 | In general, the results are similar to the other experiments in terms of MAE score. In comparison with Experiment 5, the results are significantly better. In Experiments 2 and 3, using training standardization in the test set gave better results as well. 474 | 475 | ## Paper modification 2 - Regression and classification using the attention results 476 | 477 | ### Experiment 7. Using attention in regression and classification 478 | 479 | #### Hypothesis 480 | Concatenating the output of the attention layers with the current input of the MLP in the classification branch will affect the prediction of this branch. As we have observed in previously ran experiments, the attention scores peak when the power consumption of the house changes. Consequently, this information can help the classifier decide whether it is a change of consumption or not. 481 | 482 | #### Experiment setup 483 | 484 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods: 485 | 486 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| Attention Classification | 487 | |-----------|------------|----------------|----------------|-----------------------------|--------------------------| 488 | | dishwasher| TRUE | TRUE | FALSE | FALSE | TRUE | 489 | | fridge | TRUE | TRUE | FALSE | FALSE | TRUE | 490 | | microwave | TRUE | TRUE | FALSE | FALSE | TRUE | 491 | 492 | #### Results 493 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation: 494 | ![image](https://user-images.githubusercontent.com/75752252/114435689-bc4d2600-9bc4-11eb-8d4f-c2bdd0ec33c4.png) 495 | ![image](https://user-images.githubusercontent.com/75752252/114568196-38eb0d80-9c74-11eb-830d-21afdc2060bb.png) 496 | ![image](https://user-images.githubusercontent.com/75752252/114740961-9f425f80-9d4a-11eb-9419-3f17dee8e397.png) 497 | 498 | Captura de pantalla 2021-04-16 a las 20 25 23 499 | 500 | See obtained error previously introduced in error metrics section and extra training information: 501 | 502 | | Appliance | MAE | Nº Epochs| Nº Hours Train| 503 | |------------|-------|----------|---------------| 504 | |dishwasher |28.09 |3 |6 | 505 | |fridge |31.08 |4 |8 | 506 | |microwave |26.98 |10 |1 | 507 | 508 | #### Conclusions 509 | In this case, the results are similar to the other experiments, for upcoming experimentation, what we would propose to use the attention output to calculate the classification would be eliminating this branch's convolutional layers. The magnitude of the values of the concatenated vector that enters the MLP can differ between the ones coming from attention and the ones coming from CNN layers, so this can handicap the training process. 510 | 511 | One observation that can be made is that in the case of the dishwasher and the fridge, overfitting started in the 3rd and 4th epochs, as can be noted in the image below (dishwasher). 512 | 513 | ![results-error](https://user-images.githubusercontent.com/75752252/114723899-546d1b80-9d3b-11eb-969e-a7f3ba37015d.png) 514 | 515 | 516 | ### Experiment results summary 517 | 518 | ![image](https://user-images.githubusercontent.com/75752252/114727977-f4787400-9d3e-11eb-8355-df2109c81dcc.png) 519 | 520 | ## Implementation details 521 | 522 | - Files description 523 | - settings.yaml. YAML file describing each of the experiment parameters 524 | - Train, val and test dataset properties 525 | - Hyperparameters (oversampling factor, learning rate, window size, filter properties, ...) 526 | - redd.yaml YAML file describing REDD dataset files and parameters (building and channels filenames) 527 | - redd.py REDD dataset parser 528 | - dataset.py REDD dataset loader and preprocessing 529 | - run-train.sh Experiments training launcher using default arguments 530 | - run-test.sh Experiments testing launcher using default arguments 531 | - main.py Orchestrator of train and test actions. Multiple arguments supported to handle different experiments actions and scenarios 532 | - model.py Described models implementation 533 | - train.py Train handler. Manage multiple epoch training and evaluation on training dataset 534 | - test.py Test handler. Manage pre-trained model and testing on testing dataset 535 | - utils.py Data handlers, error and plot helping functions 536 | - Framework 537 | - Python3.7.9 538 | - Torch 1.7.1 539 | - Computing resources 540 | - Using pre-configured Cloud Deep Learning VM from Google Cloud Market 541 | - vCPU cores: 2 542 | - RAM: 13 GB 543 | - 1 NVIDIA Tesla K80 544 | 545 | In order to run the code follow instructions below: 546 | 547 | ``` 548 | 1) Clone the github project 549 | git clone https://github.com/abadiabosch/dlai-upc-2021-project.git 550 | 551 | 2) Install python requirements 552 | pip install -r requirements.txt 553 | 554 | 3) Download the dataset 555 | https://drive.google.com/drive/folders/1ey1UBfU41zjftiXjp6PmJ0OfXFhJYj4N?usp=sharing 556 | 557 | 4) Update settings.yaml to make dataset.path field point to the folder with *.csv downloaded in step 3 558 | dataset: 559 | path: 560 | 561 | 5) To train models run command below in source folder. Using default training settings, see main orchestrator arguments below 562 | sh run-train.sh 563 | 564 | 6) To test models rename output-train folder to output-test folder and run command below in source folder. Using default testing settings, 565 | see main orchestraro arguments below 566 | sh run-test.sh 567 | 568 | Main orchestrator command line arguments. See default settings in train and test launchers 569 | 570 | Command line arguments parser 571 | --settings 572 | Path to settings yaml file where all disaggregation scenarios and model hyperparameters are described 573 | --appliance 574 | Name of the appliance to train or test 575 | --path 576 | Path to output folder where resuls are saved 577 | --train 578 | Set to train or unset to test 579 | --tune 580 | Set to enable automatic architecture hyperparameters tunning 581 | --epochs 582 | Number of epochs to train 583 | --disable-plot 584 | Disable sliding window plotting during train or test 585 | --disable-random 586 | Disable randomness in processing 587 | 588 | ``` 589 | 590 | In order to run pre-trained models follow additional instructions below: 591 | ``` 592 | 1) Download models from 593 | https://drive.google.com/drive/folders/1gb_FmG1hs6lgSlSF9MLZ4w7rAgNEtfvC?usp=sharing 594 | 595 | 2) Copy each of the models (.th) to its path described in run-train.sh or run-test.sh 596 | ``` 597 | 598 | ## Conclusions 599 | 600 | - Conclusion 1: During the experiments; the results were significantly better in train and eval than in test. The explanation behind this outcome is that our model was trained with a dataset with a very low variation of patterns of appliances. This is because there were just 3 to 5 different types of the same appliance for training, to test the model for a totally different type of appliance. For example, the patterns of the fridge consumption in the training set were different from the pattern of the testing set, therefore, the model did not have a broad variety of load profile patterns to learn to infer from. In the next image, we can see the variation of one house from the training set and the house for the testing set for the fridge appliance. The variation is of the two houses is totally different, with this graphic our explanation is endorsed. 601 | 602 | ![WhatsApp Image 2021-04-17 at 12 45 21](https://user-images.githubusercontent.com/71388892/115111764-288fa700-9f82-11eb-845d-6bcef89c28e5.jpeg) 603 | 604 | - Conclusion 2: Classification is in charge of modeling the real consumption patterns of each window given. 605 | - Conclusion 3: Regression is in charge of infering the maximum consumption of the appliance in the input window. This applies to all the models but the "only regresion" one, as explained in conclusion 6. 606 | - Conclusion 4: Attention focuses on two scenarios related to the simultaneity of the appliances: 607 | - Scenarios with a high simultaneity factor: Attention focus on State changes of appliances (switch on/off) and state duration. Therefore, focuses on the appliance pattern. (case of the microwave and dishwasher) 608 | - Scenarios with low simultaneity factor: attention focuses on the neighborhood, outside of the active section of the appliance. (case of the fridge) 609 | - Conclusion 5: We don’t have clear conclusions of whether the standardization of the data set will produce better outcomes than the paper reference model. 610 | - Conclusion 6: Without a classification branch, the output is more smooth and therefore it does not capture adequately the peaks of consumption. This is because the regression branch is not prepared to do both tasks of inferring the maximum consumption and adapting to the exact pattern with instantaneous changes of power. 611 | - Conclusion 7: All the models took a big amount of time to be trained, the amount of data, the complexity of the forward and backward processes and the computational resources were the reason for that. 612 | 613 | ## Future work 614 | 615 | Transformers are state-of-the-art models with a high impact in deep learning. To continue developing the project we wanted to add this new attention mechanism to our model. Unfortunately, we did not have time to develop it. 616 | Therefore, as future work, we recommend applying transformers in replace to the attention layer in our model. 617 | The encoder module will use the input from the LSTM and will feed the self-attention block to reach the 1D convolution. A residual connection and layer normalization would be implemented. 618 | The encoder would follow a similar procedure as the encoder, adding a cross-attention. The cross attention would find which regions in the input consumption sequence are most relevant to constructing and therefore deserve the highest attention coefficients. 619 | Our hypothesis after applying transformers is to generate a better outcome than with the actual model. Being the model more efficient when selecting the regions in which the consumption sequence varies. 620 | 621 | ## References 622 | 623 | [1] 624 | Rolnick, D., Donti, P. L., Kaack, L. H., Kochanski, K., Lacoste, A., Sankaran, K., ... & Bengio, Y. (2019). 625 | Tackling climate change with machine learning. 626 | arXiv preprint arXiv:1906.05433. 627 | [https://arxiv.org/abs/1906.05433](https://arxiv.org/abs/1906.05433) 628 | 629 | [2] 630 | Piccialli, V., & Sudoso, A. M. (2021) 631 | Improving Non-Intrusive Load Disaggregation through an Attention-Based Deep Neural Network. 632 | Energies, 14(4), 847. 633 | [https://arxiv.org/abs/1912.00759](https://arxiv.org/abs/1912.00759) 634 | 635 | [3] 636 | Zhao, B., Ye, M., Stankovic, L., & Stankovic, V. (2020). 637 | Non-intrusive load disaggregation solutions for very low-rate smart meter data. 638 | Applied Energy, 268, 114949. 639 | [https://www.sciencedirect.com/science/article/abs/pii/S030626192030461X](https://www.sciencedirect.com/science/article/abs/pii/S030626192030461X) 640 | 641 | [4] 642 | Zhang, C., Zhong, M., Wang, Z., Goddard, N., & Sutton, C. (2018, April). 643 | Sequence-to-point learning with neural networks for non-intrusive load monitoring. 644 | In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 32, No. 1). 645 | [https://arxiv.org/abs/1612.09106]](https://arxiv.org/abs/1612.09106) 646 | 647 | [5] 648 | Kolter, J. Z., & Johnson, M. J. (2011, August) 649 | REDD: A public data set for energy disaggregation research. 650 | In Workshop on data mining applications in sustainability (SIGKDD), San Diego, CA (Vol. 25, No. Citeseer, pp. 59-62). 651 | [http://redd.csail.mit.edu/kolter-kddsust11.pdf](http://redd.csail.mit.edu/kolter-kddsust11.pdf) 652 | 653 | [6] 654 | Shin, C., Joo, S., Yim, J., Lee, H., Moon, T., & Rhee, W. (2019, July). 655 | Subtask gated networks for non-intrusive load monitoring. 656 | In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 33, No. 01, pp. 1150-1157). 657 | [https://www.mdpi.com/1996-1073/14/4/847/pdf](https://www.mdpi.com/1996-1073/14/4/847/pdf) 658 | 659 | Team Members colaborating on the project: Victor Gil, Sergi Bragos and Inés Ylla 660 | --------------------------------------------------------------------------------