├── __init__.py
├── requirements.txt
├── redd.py
├── template.yaml
├── utils.py
├── run-train.sh
├── main.py
├── run-test.sh
├── dataset_test.py
├── redd.yaml
├── settings.yaml
├── test.py
├── model.py
├── train.py
├── dataset.py
├── colab
└── project-devel.ipynb
└── README.md
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.19.5
2 | scikit-learn==0.24.1
3 | scipy==1.6.0
4 | pandas==1.2.1
5 | PyYAML==5.4.1
6 | torch==1.7.1
7 |
--------------------------------------------------------------------------------
/redd.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 |
4 | # Acquisition properties
5 | # Timezone: US/Eastern
6 | # Frequency: 1 Hz
7 |
8 | channel_name = "channel_%d.dat"
9 |
10 | timezone = "US/Eastern"
11 |
12 |
13 | def load(name, path, channels, start=None, end=None):
14 | """
15 | REDD dataset parser. Parse REDD raw data from public
16 | available REDD dataset files
17 |
18 | Merge time series from multiple files and preprocess it
19 | - Filter out unrequired intervals
20 | - Remove duplicates
21 | - Create time serie index
22 | """
23 | # WARNING: Time series inner join. Ignoring non-synced
24 | # datapoints from loaded channels
25 | df = pd.concat(
26 | [
27 | pd.read_csv(
28 | os.path.join(path, channel_name % channel),
29 | sep=" ",
30 | names=["timestamp", name],
31 | ).set_index("timestamp")
32 | for channel in channels
33 | ],
34 | axis=1,
35 | join="inner",
36 | )
37 | df = df.sum(axis=1)
38 | df.index = pd.to_datetime(df.index, unit="s", utc=True).tz_convert(timezone)
39 | df = df[~df.index.duplicated(keep="first")].sort_index() # Remove duplicates
40 |
41 | if start and end:
42 | # Filter out unrequired data from timeseries
43 | start_ = df.index[0].to_pydatetime()
44 | end_ = df.index[-1].to_pydatetime()
45 | if start < start_:
46 | start = start_
47 | if end > end_:
48 | end = end_
49 | df = df[start:end]
50 | df.name = name
51 | return df.sort_index()
52 |
--------------------------------------------------------------------------------
/template.yaml:
--------------------------------------------------------------------------------
1 | - name: building
2 | path: house_
3 | mains:
4 | channels: []
5 | unknown:
6 | channels: []
7 | appliances:
8 | - name: oven
9 | id: oven
10 | activity: cooking
11 | channels: []
12 | - name: refrigerator
13 | id: refrigerator
14 | activity: cooking
15 | channels: []
16 | - name: dishwasher
17 | id: dishwasher
18 | activity: cleaning
19 | channels: []
20 | - name: kitchen_outlet
21 | id: kitchen_outlets
22 | activity: cooking
23 | channels: []
24 | - name: lighting
25 | id: lighting
26 | activity: lighting
27 | channels: []
28 | - name: washer_dryer
29 | id: washer_dryer
30 | activity: cleaning
31 | channels: []
32 | - name: microwave
33 | id: microwave
34 | activity: cooking
35 | channels: []
36 | - name: bathroom_outlet
37 | id: bathroom_gfi
38 | activity: selfcare
39 | channels: []
40 | - name: electric_heater
41 | id: electric_heat
42 | activity: hvac
43 | channels: []
44 | - name: stove
45 | id: stove
46 | activity: cooking
47 | channels: []
48 | - name: electronics
49 | id: electronics
50 | activity:
51 | channels: []
52 | - name: disposal
53 | id: disposal
54 | activity: cooking
55 | channels: []
56 | - name: furance
57 | id: furance
58 | activity: cooking
59 | channels: []
60 | - name: smoke_alarms
61 | id: smoke_alarms
62 | activity: security
63 | channels: []
64 | - name: air_conditioner
65 | id: air_conditioning
66 | activity: hvac
67 | channels: []
68 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import sys
5 |
6 | import yaml
7 |
8 | import numpy as np
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 |
13 | import pprint
14 | import matplotlib.pyplot as plt
15 |
16 |
17 | def load_yaml(path):
18 | """
19 | Load YAML file
20 | """
21 | _yaml = yaml.safe_load(open(path, "r"))
22 | return _yaml if _yaml else {}
23 |
24 |
25 | def error(labels, outputs):
26 | """
27 | Calcualte L1 error
28 | """
29 | err = F.l1_loss(labels, outputs)
30 | return err
31 |
32 |
33 | def save_model(model, optimizer, hparams, appliance, transform, file_name_model, error):
34 | """
35 | Save model and metadata to file
36 | """
37 | torch.save(
38 | {
39 | "model_state_dict": model.state_dict(),
40 | "optimizer_state_dict": optimizer.state_dict(),
41 | "hparams": hparams,
42 | "appliance": appliance,
43 | "transform": transform,
44 | "error": error,
45 | },
46 | file_name_model,
47 | )
48 |
49 |
50 | def load_model(file_name_model, model, optimizer=None):
51 | """
52 | Load model and metadata from file
53 | """
54 | if torch.cuda.is_available():
55 | state = torch.load(file_name_model)
56 | else:
57 | state = torch.load(file_name_model, map_location=torch.device("cpu"))
58 |
59 | model.load_state_dict(state["model_state_dict"])
60 | if optimizer:
61 | optimizer.load_state_dict(state["optimizer_state_dict"])
62 |
63 | hparams = state.get("hparams", None)
64 | appliance = state.get("appliance", None)
65 |
66 | transform = state.get("transform", None)
67 | error = state.get("error", None)
68 |
69 | print("=========== ARCHITECTURE ==========")
70 | print("Reloading appliance")
71 | pprint.pprint(appliance)
72 | print("Reloading transform")
73 | pprint.pprint(transform)
74 | print("===================================")
75 | return transform, error
76 |
77 |
78 | def save_dataset(transform, train_, test_, filename):
79 | """
80 | Save training and testing dataset to file
81 | """
82 | torch.save({"transform": transform, "train": train_, "test": test_}, filename)
83 |
84 |
85 | def plot_window(
86 | x, y, yhat, reg, clas, alphas, loss, err, classification_enabled, filename
87 | ):
88 | """
89 | Plot sliding window to visualize disaggregation results, keep track
90 | of results in training or testing and debugging
91 |
92 | Plotting multipel time series
93 | - Aggregated demand
94 | - Appliance demand
95 | - Disaggregation prediction
96 | - Regression branch prediction
97 | - Classification branch prediction
98 | """
99 | subplt_x = 4
100 | subplt_y = 4
101 | plt.figure(1, figsize=(20, 16))
102 | plt.subplots_adjust(top=0.88)
103 |
104 | idxs = np.random.randint(len(x), size=(subplt_x * subplt_y))
105 | for i, idx in enumerate(idxs):
106 | x_, y_, yhat_, reg_, clas_ = (
107 | x.detach().numpy()[idx][0],
108 | y.detach().numpy()[idx],
109 | yhat.detach().numpy()[idx],
110 | reg.detach().numpy()[idx],
111 | clas.detach().numpy()[idx],
112 | )
113 | alphas_ = alphas.detach().numpy()[idx].flatten()
114 | ax1 = plt.subplot(subplt_x, subplt_y, i + 1)
115 | ax2 = ax1.twinx()
116 | ax1.plot(range(len(x_)), x_, color="b", label="x")
117 | ax1.plot(range(len(y_)), y_, color="r", label="y")
118 | ax1.plot(range(len(reg_)), reg_, color="black", label="reg")
119 | ax1.plot(range(len(yhat_)), yhat_, alpha=0.5, color="orange", label="yhat")
120 | ax2.fill_between(
121 | range(len(alphas_)), alphas_, alpha=0.5, color="lightgrey", label="alpha"
122 | )
123 | if classification_enabled:
124 | alphas_max = np.max(alphas_)
125 | ax2.plot(
126 | range(len(clas_)),
127 | clas_ * alphas_max,
128 | color="cyan",
129 | alpha=0.25,
130 | label="reg",
131 | )
132 |
133 | plt.suptitle(f"loss {loss:.2f} error {err:.2f}")
134 | ax1.legend()
135 | ax2.legend()
136 | plt.legend()
137 | plt.tight_layout()
138 | plt.savefig(filename)
139 | plt.clf()
140 |
--------------------------------------------------------------------------------
/run-train.sh:
--------------------------------------------------------------------------------
1 | #
2 | # TRAINING LAUNCHER
3 | # Train each of the appliances and models analyzed in the project and described in settings.yaml
4 | # See documentation describing each of the appliance analyzed
5 | # See documentation describing each of the model architectures evaluated
6 |
7 | mkdir output-train
8 |
9 | ################ DISHWASHER
10 |
11 | # Experiment 1
12 | mkdir output-train/dishwasher
13 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher --path output-train/dishwasher --train --epochs 5 --disable-random > output-train/dishwasher/results-train.log
14 |
15 | # Experiment 2
16 | mkdir output-train/dishwasher-norm
17 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-norm --path output-train/dishwasher-norm --train --epochs 5 --disable-random > output-train/dishwasher-norm/results-train.log
18 |
19 | # Experiment 4
20 | mkdir output-train/dishwasher-onlyregression
21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression --path output-train/dishwasher-onlyregression --train --epochs 5 --disable-random > output-train/dishwasher-onlyregression/results-train.log
22 |
23 | # Experiment 5
24 | mkdir output-train/dishwasher-onlyregression-norm
25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression-norm --path output-train/dishwasher-onlyregression-norm --train --epochs 5 --disable-random > output-train/dishwasher-onlyregression-norm/results-train.log
26 |
27 | # Experiment 7
28 | mkdir output-train/dishwasher-classattention
29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-classattention --path output-train/dishwasher-classattention --train --epochs 5 --disable-random > output-train/dishwasher-classattention/results-train.log
30 |
31 | ################ FRIDGE
32 |
33 | # Experiment 1
34 | mkdir output-train/fridge
35 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge --path output-train/fridge --train --epochs 5 --disable-random > output-train/fridge/results-train.log
36 |
37 | # Experiment 2
38 | mkdir output-train/fridge-norm
39 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-norm --path output-train/fridge-norm --train --epochs 5 --disable-random > output-train/fridge-norm/results-train.log
40 |
41 | # Experiment 4
42 | mkdir output-train/fridge-onlyregression
43 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression --path output-train/fridge-onlyregression --train --epochs 5 --disable-random > output-train/fridge-onlyregression/results-train.log
44 |
45 | # Experiment 5
46 | mkdir output-train/fridge-onlyregression-norm
47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression-norm --path output-train/fridge-onlyregression-norm --train --epochs 5 --disable-random > output-train/fridge-onlyregression-norm/results-train.log
48 |
49 | # Experiment 7
50 | mkdir output-train/fridge-classattention
51 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-classattention --path output-train/fridge-classattention --train --epochs 5 --disable-random > output-train/fridge-classattention/results-train.log
52 |
53 | ################ MICROWAVE
54 |
55 | # Experiment 1
56 | mkdir output-train/microwave
57 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave --path output-train/microwave --train --epochs 5 --disable-random > output-train/microwave/results-train.log
58 |
59 | # Experiment 2
60 | mkdir output-train/microwave-norm
61 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-norm --path output-train/microwave-norm --train --epochs 5 --disable-random > output-train/microwave-norm/results-train.log
62 |
63 | # Experiment 4
64 | mkdir output-train/microwave-onlyregression
65 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression --path output-train/microwave-onlyregression --train --epochs 5 --disable-random > output-train/microwave-onlyregression/results-train.log
66 |
67 | # Experiment 5
68 | mkdir output-train/microwave-onlyregression-norm
69 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression-norm --path output-train/microwave-onlyregression-norm --train --epochs 5 --disable-random > output-train/microwave-onlyregression-norm/results-train.log
70 |
71 | # Experiment 7
72 | mkdir output-train/microwave-classattention
73 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-classattention --path output-train/microwave-classattention --train --epochs 5 --disable-random > output-train/microwave-classattention/results-train.log
74 |
75 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import sys
4 |
5 | from datetime import datetime
6 | from argparse import ArgumentParser
7 |
8 | import torch
9 | from ray import tune
10 |
11 | from utils import error, load_yaml
12 | from train import train_model
13 | from test import test_model
14 |
15 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
16 |
17 |
18 | def get_arguments():
19 | """
20 | Command line arguments parser
21 | --settings
22 | Path to settings yaml file where all disaggregation scenarios
23 | and model hyperparameters are described
24 | --appliance
25 | Name of the appliance to train or test
26 | --path
27 | Path to output folder where resuls are saved
28 | --train
29 | Set to train or unset to test
30 | --tune
31 | Set to enable automatic architecture hyperparameters tunning
32 | --epochs
33 | Number of epochs to train
34 | --disable-plot
35 | Disable sliding window plotting during train or test
36 | --disable-random
37 | Disable randomness in processing
38 | """
39 | parser = ArgumentParser(description="nilm-project")
40 | parser.add_argument("--settings")
41 | parser.add_argument("--appliance")
42 | parser.add_argument("--path")
43 | parser.add_argument("--train", action="store_true")
44 | parser.add_argument("--tune", action="store_true")
45 | parser.add_argument("--epochs")
46 | parser.add_argument("--disable-plot", action="store_true")
47 | parser.add_argument("--disable-random", action="store_true")
48 | return parser.parse_args()
49 |
50 |
51 | def main():
52 | """
53 | Main task called from command line. Command line arguments
54 | and train or test is launched
55 | """
56 | args = get_arguments()
57 |
58 | if args.disable_random: # Disable randomness
59 | torch.manual_seed(7)
60 |
61 | train = args.train
62 | tune_enabled = args.tune
63 | output = args.path
64 | plot_disabled = args.disable_plot
65 |
66 | # Load settings from YAML file where generic and appliance
67 | # specific details and model hyperparmeters are described
68 | settings = load_yaml(args.settings)
69 | appliance = args.appliance
70 |
71 | dataset = settings["dataset"]
72 | hparams = settings["hparams"]
73 | if args.epochs:
74 | hparams["epochs"] = int(args.epochs)
75 |
76 | appliance = settings["appliances"][appliance]
77 |
78 | datapath = dataset["path"]
79 | if train:
80 | # DO TRAIN
81 |
82 | print("==========================================")
83 | print(f"Training ONGOING")
84 | print("==========================================")
85 |
86 | if not tune_enabled:
87 | # If no automatic hyperparameter tunning is enabled
88 | # use network hyperparameter from settings and train
89 | # the model
90 | model, transform = train_model(
91 | datapath,
92 | output,
93 | appliance,
94 | hparams,
95 | doplot=not plot_disabled,
96 | reload=False, # Do not reload models by default
97 | )
98 | else:
99 | # If automatic hyperparameter tunning is enabled
100 | # specify hyperparameters grid search and tune the model
101 | config = {
102 | "datapath": datapath,
103 | "output": output,
104 | "appliance": appliance,
105 | "hparams": hparams,
106 | "doplot": not plot_disabled,
107 | "reload": False,
108 | "tune": {
109 | "F": tune.grid_search([16, 32, 64]),
110 | "K": tune.grid_search([4, 8, 16]),
111 | "H": tune.grid_search([256, 512, 1024]),
112 | },
113 | }
114 | analysis = tune.run(
115 | train_model_wrapper, # Use wrapper to adapt training model
116 | metric="val_loss",
117 | mode="min",
118 | num_samples=5,
119 | config=config,
120 | )
121 | print("==========================================")
122 | print(f"Best hyperparameters")
123 | print(analysis.best_config)
124 | print("==========================================")
125 |
126 | print("==========================================")
127 | print(f"Training DONE")
128 | print("==========================================")
129 | else:
130 | # DO TEST
131 |
132 | print("==========================================")
133 | print(f"Testing ONGOING")
134 | print("==========================================")
135 | test_model(datapath, output, appliance, hparams, doplot=not plot_disabled)
136 | print("==========================================")
137 | print(f"Testing DONE")
138 | print("==========================================")
139 |
140 |
141 | if __name__ == "__main__":
142 | main()
143 |
--------------------------------------------------------------------------------
/run-test.sh:
--------------------------------------------------------------------------------
1 | #
2 | # TESTING LAUNCHER
3 | # Test each of the appliances and models analyzed in the project and described in settings.yaml
4 | # See documentation describing each of the appliance analyzed
5 | # See documentation describing each of the model architectures evaluated
6 |
7 | mkdir output-test
8 |
9 | ############### DISHWASHER
10 |
11 | # Experiment 1
12 | mkdir output-test/dishwasher
13 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher --path output-test/dishwasher --epochs 1 --disable-random > output-test/dishwasher/results-test.log
14 |
15 | # Experiment 2
16 | mkdir output-test/dishwasher-norm
17 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-norm --path output-test/dishwasher-norm --epochs 1 --disable-random > output-test/dishwasher-norm/results-test.log
18 |
19 | # Experiment 3
20 | mkdir output-test/dishwasher-norm-trainnorm
21 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-norm --path output-test/dishwasher-norm-trainnorm --epochs 1 --disable-random > output-test/dishwasher-norm-trainnorm/results-test.log
22 |
23 | # Experiment 4
24 | mkdir output-test/dishwasher-onlyregression
25 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression --path output-test/dishwasher-onlyregression --epochs 1 --disable-random > output-test/dishwasher-onlyregression/results-test.log
26 |
27 | # Experiment 5
28 | mkdir output-test/dishwasher-onlyregression-norm
29 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression-norm --path output-test/dishwasher-onlyregression-norm --epochs 1 --disable-random > output-test/dishwasher-onlyregression-norm/results-test.log
30 |
31 | # Experiment 5
32 | mkdir output-test/dishwasher-onlyregression-norm-trainnorm
33 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-onlyregression-norm --path output-test/dishwasher-onlyregression-norm-trainnorm --epochs 1 --disable-random > output-test/dishwasher-onlyregression-norm-trainnorm/results-test.log
34 |
35 | # Experiment 7
36 | mkdir output-test/dishwasher-classattention
37 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance dishwasher-classattention --path output-test/dishwasher-classattention --epochs 1 --disable-random > output-test/dishwasher-classattention/results-test.log
38 |
39 | ################ FRIDGE
40 |
41 | # Experiment 1
42 | mkdir output-test/fridge
43 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge --path output-test/fridge --epochs 1 --disable-random > output-test/fridge/results-test.log
44 |
45 | # Experiment 2
46 | mkdir output-test/fridge-norm
47 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-norm --path output-test/fridge-norm --epochs 1 --disable-random > output-test/fridge-norm/results-test.log
48 |
49 | # Experiment 3
50 | mkdir output-test/fridge-norm-trainnorm
51 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-norm --path output-test/fridge-norm-trainnorm --epochs 1 --disable-random > output-test/fridge-norm-trainnorm/results-test.log
52 |
53 | # Experiment 4
54 | mkdir output-test/fridge-onlyregression
55 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression --path output-test/fridge-onlyregression --epochs 1 --disable-random > output-test/fridge-onlyregression/results-test.log
56 |
57 | # Experiment 5
58 | mkdir output-test/fridge-onlyregression-norm
59 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression-norm --path output-test/fridge-onlyregression-norm --epochs 1 --disable-random > output-test/fridge-onlyregression-norm/results-test.log
60 |
61 | # Experiment 6
62 | mkdir output-test/fridge-onlyregression-norm-trainnorm
63 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-onlyregression-norm --path output-test/fridge-onlyregression-norm-trainnorm --epochs 1 --disable-random > output-test/fridge-onlyregression-norm-trainnorm/results-test.log
64 |
65 | # Experiment 7
66 | mkdir output-test/fridge-classattention
67 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance fridge-classattention --path output-test/fridge-classattention --epochs 1 --disable-random > output-test/fridge-classattention/results-test.log
68 |
69 | ################# MICROWAVE
70 |
71 | # Experiment 1
72 | mkdir output-test/microwave
73 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave --path output-test/microwave --epochs 1 --disable-random > output-test/microwave/results-test.log
74 |
75 | # Experiment 2
76 | mkdir output-test/microwave-norm
77 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-norm --path output-test/microwave-norm --epochs 1 --disable-random > output-test/microwave-norm/results-test.log
78 |
79 | # Experiment 3
80 | mkdir output-test/microwave-norm-trainnorm
81 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-norm --path output-test/microwave-norm-trainnorm --epochs 1 --disable-random > output-test/microwave-norm-trainnorm/results-test.log
82 |
83 | # Experiment 4
84 | mkdir output-test/microwave-onlyregression
85 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression --path output-test/microwave-onlyregression --epochs 1 --disable-random > output-test/microwave-onlyregression/results-test.log
86 |
87 | # Experiment 5
88 | mkdir output-test/microwave-onlyregression-norm
89 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression-norm --path output-test/microwave-onlyregression-norm --epochs 1 --disable-random > output-test/microwave-onlyregression-norm/results-test.log
90 |
91 | # Experiment 6
92 | mkdir output-test/microwave-onlyregression-norm-trainnorm
93 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-onlyregression-norm --path output-test/microwave-onlyregression-norm-trainnorm --epochs 1 --disable-random > output-test/microwave-onlyregression-norm-trainnorm/results-test.log
94 |
95 | # Experiment 7
96 | mkdir output-test/microwave-classattention
97 | CUDA_VISIBLE_DEVICES=0 python -u main.py --settings settings.yaml --appliance microwave-classattention --path output-test/microwave-classattention --epochs 1 --disable-random > output-test/microwave-classattention/results-test.log
98 |
--------------------------------------------------------------------------------
/dataset_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from datetime import datetime
3 | import pandas as pd
4 |
5 | import dataset
6 |
7 |
8 | def to_dt(x):
9 | return datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
10 |
11 |
12 | class TestLoader(unittest.TestCase):
13 |
14 | """
15 | Data preprocessing to impute small gaps and ignore larg gaps
16 | Ignore non 100% coverage days
17 |
18 | Extract from "Subtask Gated Networks for Non-Intrusive Load Monitoring"
19 |
20 | For REDD dataset,we preprocessed with the following procedure
21 | to handle missing values. First, we split the sequence so that the
22 | duration of missing values in subsequence is less than 20 seconds.
23 | Second,we filled the missing values in each subsequence by
24 | thebackward filling method. Finally, we only used the subsequences
25 | with more than one-day duration
26 |
27 | small gaps = ts < 20 seconds
28 | large gaps = ts > 20 seconds
29 | """
30 |
31 | def is_equal(self, df, index, values):
32 | return self.assertTrue(
33 | (df.index == index).all() and (df.values == values).all()
34 | )
35 |
36 | def setup_ts(self, name, start, end, gaps, freq="1S"):
37 | index = pd.date_range(start, end, freq=freq)
38 | values = range(len(index))
39 | df = pd.DataFrame({name: values}, index)
40 | for gap_start, gap_end in gaps:
41 | mask = (df.index >= to_dt(gap_start)) & (df.index < to_dt(gap_end))
42 |
43 | df = df[~mask]
44 | return df
45 |
46 | def setup_scenario(self, start, end, gaps={"mains": [], "appliance1": []}):
47 | freq = {"mains": "1S", "appliance1": "3S"}
48 |
49 | mains = self.setup_ts(
50 | "mains", to_dt(start), to_dt(end), gaps["mains"], freq=freq["mains"]
51 | )
52 | appliance1 = self.setup_ts(
53 | "appliance1",
54 | to_dt(start),
55 | to_dt(end),
56 | gaps["appliance1"],
57 | freq=freq["appliance1"],
58 | )
59 | return (mains, appliance1)
60 |
61 | def test_aligned_nomissing(self):
62 | """
63 | Scenario:
64 | mains:
65 | coverage: 100%
66 | sampling period: 1 sec
67 | appliances:
68 | number of appliances: 1
69 | coverage: 100%
70 | sampling period: 3sec
71 | alignment:
72 | both series are aligned
73 | """
74 | start = "2020-01-01 00:00:00"
75 | end = "2020-01-01 00:00:09"
76 |
77 | mains, appliance1 = self.setup_scenario(start, end)
78 | df = dataset.NilmDataset.align(mains, appliance1)
79 | expected_index = appliance1.index
80 | expected_values = [[0, 0], [3, 1], [6, 2], [9, 3]]
81 | self.is_equal(df, expected_index, expected_values)
82 |
83 | def test_aligned_nomissing_bfill(self):
84 | """
85 | Scenario:
86 | mains:
87 | coverage: 100%
88 | sampling period: 1 sec
89 | appliances:
90 | number of appliances: 1
91 | coverage: 100%
92 | sampling period: 3sec
93 | alignment:
94 | both series are aligned
95 | """
96 | start = "2020-01-01 00:00:00"
97 | end = "2020-01-01 00:00:09"
98 |
99 | mains, appliance1 = self.setup_scenario(start, end)
100 | df = dataset.NilmDataset.align(mains, appliance1, bfill=True)
101 |
102 | expected_index = mains.index
103 | expected_values = [
104 | [0, 0],
105 | [1, 1],
106 | [2, 1],
107 | [3, 1],
108 | [4, 2],
109 | [5, 2],
110 | [6, 2],
111 | [7, 3],
112 | [8, 3],
113 | [9, 3],
114 | ]
115 | self.is_equal(df, expected_index, expected_values)
116 |
117 | def test_mains_small_missing(self):
118 | """
119 | Scenario:
120 | mains:
121 | coverage: 2 x small gap in sequence
122 | sampling period: 1 sec
123 | appliances:
124 | number of appliances: 1
125 | coverage: 100%
126 | sampling period: 3sec
127 | alignment:
128 | both series are aligned
129 | """
130 |
131 | start = "2020-01-01 00:00:00"
132 | end = "2020-01-01 00:00:09"
133 | gaps = {
134 | "mains": [],
135 | "appliance1": [("2020-01-01 00:00:03", "2020-01-01 00:00:04")],
136 | }
137 | mains, appliance1 = self.setup_scenario(start, end, gaps)
138 | data = dataset.NilmDataset.impute(appliance1, gapsize=3, subseqsize=1)
139 | self.assertEqual(len(data), 1)
140 |
141 | expected_index = pd.date_range(
142 | appliance1.index[0], appliance1.index[-1], freq="3S"
143 | )
144 | expected_values = [[0], [2], [2], [3]]
145 | self.is_equal(data[0], expected_index, expected_values)
146 |
147 | def test_mains_large_missing(self):
148 | """
149 | Scenario:
150 | mains:
151 | coverage: 2 x large gaps in sequence
152 | 1 x intraday gap
153 | 1 x interday gap
154 | sampling period: 1 sec
155 | appliances:
156 | coverage: 100%
157 | number of appliances: 1
158 | sampling period: 3sec
159 | alignment:
160 | both series are aligned
161 | """
162 |
163 | start = "2020-01-01 00:00:00"
164 | end = "2020-01-01 00:01:00"
165 | gaps = {
166 | "mains": [],
167 | "appliance1": [("2020-01-01 00:00:25", "2020-01-01 00:00:45")],
168 | }
169 | mains, appliance1 = self.setup_scenario(start, end, gaps)
170 | data = dataset.NilmDataset.impute(appliance1, gapsize=2, subseqsize=8)
171 | self.assertEqual(len(data), 2)
172 | expected_index = pd.date_range(
173 | to_dt("2020-01-01 00:00:00"), to_dt("2020-01-01 00:00:24"), freq="3S"
174 | )
175 | expected_values = [[0], [1], [2], [3], [4], [5], [6], [7], [8]]
176 | self.is_equal(data[0], expected_index, expected_values)
177 |
178 | expected_index = pd.date_range(
179 | to_dt("2020-01-01 00:00:36"), to_dt("2020-01-01 00:01:00"), freq="3S"
180 | )
181 |
182 | expected_values = [
183 | [15], # It's not the perfect imputation due non-aligned 3s (bfill)
184 | [15], # It's not the perfect imputation due non-aligned 3s (bfill)
185 | [15],
186 | [15],
187 | [16],
188 | [17],
189 | [18],
190 | [19],
191 | [20],
192 | ]
193 | self.is_equal(data[1], expected_index, expected_values)
194 |
195 |
196 | if __name__ == "__main__":
197 | unittest.main()
198 |
--------------------------------------------------------------------------------
/redd.yaml:
--------------------------------------------------------------------------------
1 | name: REDD
2 | path: low_freq
3 | buildings:
4 | - name: building1
5 | path: house_1
6 | mains:
7 | channels: [1,2]
8 | unknown:
9 | channels: [20]
10 | appliances:
11 | - name: oven
12 | id: oven
13 | activity: cooking
14 | channels: [3,4]
15 | - name: refrigerator
16 | id: refrigerator
17 | activity: cooking
18 | channels: [5]
19 | - name: dishwasher
20 | id: dishwasher
21 | activity: cleaning
22 | channels: [6]
23 | - name: kitchen_outlets
24 | id: kitchen_outlets
25 | activity: cooking
26 | channels: [7,8,15,16]
27 | - name: lighting
28 | id: lighting
29 | activity: lighting
30 | channels: [9,17,18]
31 | - name: washer_dryer
32 | id: washer_dryer
33 | activity: cleaning
34 | channels: [10,20]
35 | - name: microwave
36 | id: microwave
37 | activity: cooking
38 | channels: [11]
39 | - name: bathroom_outlet
40 | id: bathroom_gfi
41 | activity: selfcare
42 | channels: [12]
43 | - name: electric_heater
44 | id: electric_heat
45 | activity: heating
46 | channels: [13]
47 | - name: stove
48 | id: stove
49 | activity: cooking
50 | channels: [14]
51 | - name: building2
52 | path: house_2
53 | mains:
54 | channels: [1,2]
55 | unknown:
56 | channels: []
57 | appliances:
58 | - name: refrigerator
59 | id: refrigerator
60 | activity: cooking
61 | channels: [9]
62 | - name: dishwasher
63 | id: dishwasher
64 | activity: cleaning
65 | channels: [10]
66 | - name: kitchen_outlet
67 | id: kitchen_outlets
68 | activity: cooking
69 | channels: [3,8]
70 | - name: lighting
71 | id: lighting
72 | activity: lighting
73 | channels: [4]
74 | - name: washer_dryer
75 | id: washer_dryer
76 | activity: cleaning
77 | channels: [7]
78 | - name: microwave
79 | id: microwave
80 | activity: cooking
81 | channels: [6]
82 | - name: stove
83 | id: stove
84 | activity: cooking
85 | channels: [5]
86 | - name: disposal
87 | id: disposal
88 | activity: cooking
89 | channels: [11]
90 | - name: building3
91 | path: house_3
92 | mains:
93 | channels: [1,2]
94 | unknown:
95 | channels: [3,4,12]
96 | appliances:
97 | - name: refrigerator
98 | id: refrigerator
99 | activity: cooking
100 | channels: [7]
101 | - name: dishwasher
102 | id: dishwasher
103 | activity: cleaning
104 | channels: [9]
105 | - name: kitchen_outlet
106 | id: kitchen_outlets
107 | activity: cooking
108 | channels: [21,22]
109 | - name: lighting
110 | id: lighting
111 | activity: lighting
112 | channels: [5,11,15,17,19]
113 | - name: washer_dryer
114 | id: washer_dryer
115 | activity: cleaning
116 | channels: [13,14]
117 | - name: microwave
118 | id: microwave
119 | activity: cooking
120 | channels: [16]
121 | - name: bathroom_outlet
122 | id: bathroom_gfi
123 | activity: selfcare
124 | channels: [20]
125 | - name: disposal
126 | id: disposal
127 | activity: cooking
128 | channels: [8]
129 | - name: electronics
130 | id: electronics
131 | activity: consumer
132 | channels: [6]
133 | - name: furance
134 | id: furance
135 | activity: cooking
136 | channels: [10]
137 | - name: smoke_alarms
138 | id: smoke_alarms
139 | activity: security
140 | channels: [18]
141 | - name: building4
142 | path: house_4
143 | mains:
144 | channels: [1,2]
145 | unknown:
146 | channels: [6,12]
147 | appliances:
148 | - name: dishwasher
149 | id: dishwasher
150 | activity: cleaning
151 | channels: [15]
152 | - name: kitchen_outlet
153 | id: kitchen_outlets
154 | activity: cooking
155 | channels: [5,14]
156 | - name: lighting
157 | id: lighting
158 | activity: lighting
159 | channels: [3,13,18,19]
160 | - name: washer_dryer
161 | id: washer_dryer
162 | activity: cleaning
163 | channels: [7]
164 | - name: bathroom_outlet
165 | id: bathroom_gfi
166 | activity: selfcare
167 | channels: [16,17]
168 | - name: stove
169 | id: stove
170 | activity: cooking
171 | channels: [8]
172 | - name: furance
173 | id: furance
174 | activity: cooking
175 | channels: [4]
176 | - name: smoke_alarms
177 | id: smoke_alarms
178 | activity: security
179 | channels: [12]
180 | - name: air_conditioner
181 | id: air_conditioning
182 | activity: hvac
183 | channels: [9,10,20]
184 | - name: building5
185 | path: house_5
186 | mains:
187 | channels: [1,2]
188 | unknown:
189 | channels: [5,7,10,11,15,26]
190 | appliances:
191 | - name: refrigerator
192 | id: refrigerator
193 | activity: cooking
194 | channels: [18]
195 | - name: dishwasher
196 | id: dishwasher
197 | activity: cleaning
198 | channels: [20]
199 | - name: kitchen_outlet
200 | id: kitchen_outlets
201 | activity: cooking
202 | channels: [24,25]
203 | - name: lighting
204 | id: lighting
205 | activity: lighting
206 | channels: [4,14,17,19,23]
207 | - name: washer_dryer
208 | id: washer_dryer
209 | activity: cleaning
210 | channels: [8,9]
211 | - name: microwave
212 | id: microwave
213 | activity: cooking
214 | channels: [3]
215 | - name: bathroom_outlet
216 | id: bathroom_gfi
217 | activity: selfcare
218 | channels: [16]
219 | - name: electric_heater
220 | id: electric_heat
221 | activity: hvac
222 | channels: [12,13]
223 | - name: electronics
224 | id: electronics
225 | activity:
226 | channels: [22]
227 | - name: disposal
228 | id: disposal
229 | activity: cooking
230 | channels: [21]
231 | - name: furance
232 | id: furance
233 | activity: cooking
234 | channels: [6]
235 | - name: building6
236 | path: house_6
237 | mains:
238 | channels: [1,2]
239 | unknown:
240 | channels: [10,11]
241 | appliances:
242 | - name: refrigerator
243 | id: refrigerator
244 | activity: cooking
245 | channels: [8]
246 | - name: dishwasher
247 | id: dishwasher
248 | activity: cleaning
249 | channels: [9]
250 | - name: kitchen_outlet
251 | id: kitchen_outlets
252 | activity: cooking
253 | channels: [3,13]
254 | - name: lighting
255 | id: lighting
256 | activity: lighting
257 | channels: [14]
258 | - name: washer_dryer
259 | id: washer_dryer
260 | activity: cleaning
261 | channels: [4]
262 | - name: bathroom_outlet
263 | id: bathroom_gfi
264 | activity: selfcare
265 | channels: [7]
266 | - name: electric_heater
267 | id: electric_heat
268 | activity: hvac
269 | channels: [12]
270 | - name: stove
271 | id: stove
272 | activity: cooking
273 | channels: [5]
274 | - name: electronics
275 | id: electronics
276 | activity:
277 | channels: [6]
278 | - name: air_conditioner
279 | id: air_conditioning
280 | activity: hvac
281 | channels: [15,16,17]
282 |
--------------------------------------------------------------------------------
/settings.yaml:
--------------------------------------------------------------------------------
1 | dataset:
2 | path: /tmp/redd
3 | hparams:
4 | lr: 0.001
5 | batch_size: 64
6 | epochs: 5
7 | train_size: 0.7
8 | test_size: 0.3
9 |
10 | appliances:
11 | dishwasher:
12 | name: dish washer
13 | filename: dishwasher.th
14 | buildings:
15 | train:
16 | - redd_house2
17 | - redd_house3
18 | - redd_house4
19 | - redd_house5
20 | - redd_house6
21 | test:
22 | - redd_house1
23 | active_threshold: 15.0
24 | active_ratio: 0.5
25 | active_oversample: 5
26 | normalization: False
27 | model: ModelPaperBackward
28 | hparams:
29 | L: 2304
30 | F: 32
31 | K: 16
32 | H: 512
33 | dishwasher-norm:
34 | name: dish washer
35 | filename: dishwasher.th
36 | buildings:
37 | train:
38 | - redd_house2
39 | - redd_house3
40 | - redd_house4
41 | - redd_house5
42 | - redd_house6
43 | test:
44 | - redd_house1
45 | active_threshold: 150.0
46 | active_ratio: 0.5
47 | active_oversample: 5
48 | normalization: True
49 | model: ModelPaper
50 | hparams:
51 | L: 1500
52 | F: 32
53 | K: 16
54 | H: 512
55 | dishwasher-onlyregression:
56 | name: dish washer
57 | filename: dishwasher.th
58 | buildings:
59 | train:
60 | - redd_house2
61 | - redd_house3
62 | - redd_house4
63 | - redd_house5
64 | - redd_house6
65 | test:
66 | - redd_house1
67 | active_threshold: 50.0
68 | active_ratio: 0.5
69 | active_oversample: 5
70 | normalization: False
71 | model: ModelOnlyRegression
72 | hparams:
73 | L: 2304
74 | F: 32
75 | K: 16
76 | H: 512
77 | dishwasher-onlyregression-norm:
78 | name: dish washer
79 | filename: dishwasher.th
80 | buildings:
81 | train:
82 | - redd_house2
83 | - redd_house3
84 | - redd_house4
85 | - redd_house5
86 | - redd_house6
87 | test:
88 | - redd_house1
89 | active_threshold: 50.0
90 | active_ratio: 0.5
91 | active_oversample: 5
92 | normalization: True
93 | model: ModelOnlyRegression
94 | hparams:
95 | L: 2304
96 | F: 32
97 | K: 16
98 | H: 512
99 | dishwasher-classattention:
100 | name: dish washer
101 | filename: dishwasher.th
102 | buildings:
103 | train:
104 | - redd_house2
105 | - redd_house3
106 | - redd_house4
107 | - redd_house5
108 | - redd_house6
109 | test:
110 | - redd_house1
111 | active_threshold: 15.0
112 | active_ratio: 0.5
113 | active_oversample: 5
114 | normalization: False
115 | model: ModelClassAttention
116 | hparams:
117 | L: 2304
118 | F: 32
119 | K: 16
120 | H: 512
121 | fridge:
122 | name: fridge
123 | filename: fridge.th
124 |
125 | buildings:
126 | train:
127 | - redd_house2
128 | - redd_house3
129 | - redd_house5
130 | - redd_house6
131 | test:
132 | - redd_house1
133 | active_threshold: 15.0
134 | active_ratio: null
135 | normalization: False
136 | model: ModelPaperBackward
137 | hparams:
138 | L: 496
139 | F: 32
140 | K: 8
141 | H: 1024
142 | fridge-norm:
143 | name: fridge
144 | filename: fridge.th
145 |
146 | buildings:
147 | train:
148 | - redd_house2
149 | - redd_house3
150 | - redd_house5
151 | - redd_house6
152 | test:
153 | - redd_house1
154 | active_threshold: 15.0
155 | active_ratio: null
156 | normalization: True
157 | model: ModelPaper
158 | hparams:
159 | L: 496
160 | F: 32
161 | K: 8
162 | H: 1024
163 | fridge-onlyregression:
164 | name: fridge
165 | filename: fridge.th
166 |
167 | buildings:
168 | train:
169 | - redd_house2
170 | - redd_house3
171 | - redd_house5
172 | - redd_house6
173 | test:
174 | - redd_house1
175 | active_threshold: 15.0
176 | active_ratio: null
177 | normalization: False
178 | onlyregression: True
179 | model: ModelOnlyRegression
180 | hparams:
181 | L: 496
182 | F: 32
183 | K: 8
184 | H: 1024
185 | fridge-onlyregression-norm:
186 | name: fridge
187 | filename: fridge.th
188 |
189 | buildings:
190 | train:
191 | - redd_house2
192 | - redd_house3
193 | - redd_house5
194 | - redd_house6
195 | test:
196 | - redd_house1
197 | active_threshold: 15.0
198 | active_ratio: null
199 | normalization: True
200 | model: ModelOnlyRegression
201 | hparams:
202 | L: 496
203 | F: 32
204 | K: 8
205 | H: 1024
206 | fridge-classattention:
207 | name: fridge
208 | filename: fridge.th
209 |
210 | buildings:
211 | train:
212 | - redd_house2
213 | - redd_house3
214 | - redd_house5
215 | - redd_house6
216 | test:
217 | - redd_house1
218 | active_threshold: 15.0
219 | active_ratio: null
220 | normalization: False
221 | model: ModelClassAttention
222 | hparams:
223 | L: 496
224 | F: 32
225 | K: 8
226 | H: 1024
227 | microwave:
228 | name: microwave
229 | filename: microwave.th
230 | buildings:
231 | train:
232 | - redd_house2
233 | - redd_house3
234 | - redd_house5
235 | test:
236 | - redd_house1
237 | active_threshold: 15.0
238 | active_ratio: 0.5
239 | active_oversample: 5
240 | normalization: False
241 | model: ModelPaperBackward
242 | hparams:
243 | L: 128
244 | F: 16
245 | K: 8
246 | H: 1024
247 | microwave-norm:
248 | name: microwave
249 | filename: microwave.th
250 | buildings:
251 | train:
252 | - redd_house2
253 | - redd_house3
254 | - redd_house5
255 | test:
256 | - redd_house1
257 | active_threshold: 100.0
258 | active_ratio: 0.5
259 | active_oversample: 5
260 | normalization: True
261 | model: ModelPaper
262 | hparams:
263 | L: 128
264 | F: 16
265 | K: 8
266 | H: 1024
267 | microwave-onlyregression:
268 | name: microwave
269 | filename: microwave.th
270 | buildings:
271 | train:
272 | - redd_house2
273 | - redd_house3
274 | - redd_house5
275 | test:
276 | - redd_house1
277 | active_threshold: 100.0
278 | active_ratio: 0.5
279 | active_oversample: 5
280 | normalization: False
281 | model: ModelOnlyRegression
282 | hparams:
283 | L: 128
284 | F: 16
285 | K: 8
286 | H: 1024
287 | microwave-onlyregression-norm:
288 | name: microwave
289 | filename: microwave.th
290 | buildings:
291 | train:
292 | - redd_house2
293 | - redd_house3
294 | - redd_house5
295 | test:
296 | - redd_house1
297 | active_threshold: 100.0
298 | active_ratio: 0.5
299 | active_oversample: 5
300 | normalization: True
301 | model: ModelOnlyRegression
302 | hparams:
303 | L: 128
304 | F: 16
305 | K: 8
306 | H: 1024
307 | microwave-classattention:
308 | name: microwave
309 | filename: microwave.th
310 | buildings:
311 | train:
312 | - redd_house2
313 | - redd_house3
314 | - redd_house5
315 | test:
316 | - redd_house1
317 | active_threshold: 15.0
318 | active_ratio: 0.5
319 | active_oversample: 5
320 | normalization: False
321 | model: ModelClassAttention
322 | hparams:
323 | L: 128
324 | F: 16
325 | K: 8
326 | H: 1024
327 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import sys
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import torch
9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | import torch.optim as optim
12 |
13 | import model as nilmmodel
14 | import matplotlib.pyplot as plt
15 |
16 | from dataset import InMemoryKoreaDataset
17 | from utils import error
18 | from utils import save_model, load_model, save_dataset
19 | from utils import plot_window
20 |
21 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
22 |
23 |
24 | def test_single(
25 | model, test_loader, transform, appliance, batch_size=64, plotfilename=None
26 | ):
27 | """
28 | Test specific pretrained model and appliance on test dataset
29 | """
30 |
31 | errs, losses = [], []
32 |
33 | L = appliance["hparams"]["L"]
34 | window_index = np.array(range(L))
35 |
36 | # The disaggregation phase, also carried out with a sliding window
37 | # over the aggregated signal with hop size equal to 1 sample,
38 | # generates overlapped windows of the disaggregated signal.
39 | # reconstruct the overlapped windows by an means of a median
40 | # filter on the overlapped portions.
41 |
42 | # Use buffer to register overlapped result and apply median filter
43 | overlapped_y = {}
44 | overlapped_yhat = {}
45 |
46 | with torch.no_grad():
47 | model.eval()
48 | for idx, (x, y, clas) in enumerate(test_loader):
49 | # model input data
50 | x = torch.unsqueeze(x, dim=1)
51 |
52 | x, y, clas = x.to(device), y.to(device), clas.to(device)
53 | yhat, reghat, alphas, clashat = model(x)
54 |
55 | # Force loss to 0 in test in order to reuse current implementation
56 | # but not used in testing analysis
57 | loss = 0.0
58 |
59 | # Calculate and use error to evaluate prediction
60 | err = error(y, yhat)
61 |
62 | err_ = err.item()
63 | losses.append(loss)
64 | errs.append(err_)
65 |
66 | x = x.cpu()
67 | y = y.cpu()
68 | yhat = yhat.cpu()
69 | if transform:
70 | # If transform enabled undo standardization in order to
71 | # prope evaluate error (paper benchmarking) and visualization
72 | if (
73 | transform["sample_mean"]
74 | and transform["sample_std"]
75 | and transform["target_mean"]
76 | and transform["target_std"]
77 | ):
78 |
79 | # Undo standarization
80 | x = (x * transform["sample_std"]) + transform["sample_mean"]
81 | y = (y * transform["target_std"]) + transform["target_mean"]
82 | yhat = (yhat * transform["target_std"]) + transform["target_mean"]
83 |
84 | if idx % 100 == 0:
85 | # Plotting sliding window samples in order to debug or
86 | # keep track of current testing process
87 | print(f"test batch={idx+1} loss={loss:.2f} err={err:.2f}")
88 | if plotfilename:
89 | filename = plotfilename + f".{idx}.attention.png"
90 | reghat = reghat.cpu()
91 | if transform:
92 | # If transform enabled undo standardization in order
93 | # to proper visualize regression branch prediction
94 | if transform["target_std"] and transform["target_mean"]:
95 | reghat = (reghat * transform["sample_std"]) + transform[
96 | "sample_mean"
97 | ]
98 | # Tricky workaround to rescale regression output and make
99 | # it easier to visualize and interpret results
100 | reghat = reghat / 10.0
101 | plot_window(
102 | x,
103 | y,
104 | yhat,
105 | reghat,
106 | clashat.cpu(),
107 | alphas.cpu(),
108 | loss,
109 | err_,
110 | model.classification_enabled,
111 | filename,
112 | )
113 |
114 | y = y.numpy()
115 | yhat = yhat.numpy()
116 |
117 | # Update overlapping windows buffer to calculate median filter
118 | for offset, yy, yyhat in zip(range(batch_size), y, yhat):
119 | index = (idx * batch_size) + window_index + offset
120 |
121 | for index_, yy_, yyhat_ in zip(index, yy, yyhat):
122 | overlapped_y[index_] = yy_
123 | overlapped_yhat.setdefault(index_, [])
124 | overlapped_yhat[index_].append(yyhat_)
125 |
126 | if len(overlapped_yhat[index_]) == L:
127 | # Calculate median if all overlapped windows in specfic
128 | # index are already available. Done prevent memory
129 | # overrun
130 | overlapped_yhat[index_] = np.median(
131 | np.array(overlapped_yhat[index_])
132 | )
133 | # Final buffers with sigle-point single-prediction after median filter
134 | final_y = []
135 | final_yhat = []
136 | index = sorted(list(overlapped_yhat.keys()))
137 |
138 | # Calculate median if all overlapped windows in specfic
139 | # index are already available. Done prevent memory
140 | # overrun
141 | for i in index:
142 | if isinstance(overlapped_yhat[i], list):
143 | overlapped_yhat[i] = np.median(np.array(overlapped_yhat[i]))
144 |
145 | # Update final prediction buffers
146 | final_yhat.append(overlapped_yhat[i])
147 | final_y.append(overlapped_y[i])
148 |
149 | final_y = np.array(final_y)
150 | final_yhat = np.array(final_yhat)
151 |
152 | filename = plotfilename + f".result.csv"
153 | result = pd.DataFrame({"y": final_y, "yhat": final_yhat})
154 | result.to_csv(filename, index=None, sep=";")
155 |
156 | # Calculate MAE over single-point single-prediction time series
157 | return np.nanmean(np.abs(final_yhat - final_y))
158 |
159 |
160 | def test_model(datapath, output, appliance, hparams, doplot=None):
161 | """
162 | Test specific pretrained model and appliance on testing
163 | dataset
164 | """
165 |
166 | # Load appliance specifications and model hyperparameters
167 | # from settings
168 |
169 | buildings = appliance["buildings"]["test"]
170 | name = appliance["name"]
171 |
172 | batch_size = hparams["batch_size"]
173 | params = appliance["hparams"]
174 |
175 | transform_enabled = appliance.get("normalization", False)
176 | model_type = appliance.get("model", "ModelPaper")
177 |
178 | # Initialize model network architecture using specified
179 | # hyperaparameters in settings
180 | model_type = getattr(nilmmodel, model_type)
181 | model = model_type(params["L"], params["F"], params["K"], params["H"])
182 | model = model.to(device)
183 |
184 | # Load pretrained mofrl from file
185 | name = appliance["name"]
186 | filename = os.path.join(output, appliance["filename"])
187 | transform, record_err = load_model(filename, model)
188 |
189 | if not transform_enabled:
190 | transform = None
191 |
192 | filename = os.path.join(output, appliance["filename"])
193 | plotfilename = None
194 | if doplot:
195 | plotfilename = filename
196 |
197 | # Initialize active settings described in documentation.
198 | # Used to identify whether an appliance is classified as active
199 | # Used to enableoversampling to fix sliding windows active/inactive
200 | # imbalance
201 | active_threshold = appliance.get("active_threshold", 0.15)
202 | active_ratio = appliance.get("active_ratio", 0.5)
203 | active_oversample = appliance.get("active_oversample", 2)
204 |
205 | # Load test dataset
206 | my_dataset = InMemoryKoreaDataset(
207 | datapath,
208 | buildings,
209 | name,
210 | windowsize=params["L"],
211 | active_threshold=False,
212 | active_ratio=False,
213 | active_oversample=False,
214 | transform_enabled=transform_enabled,
215 | transform=None, # Using test standarization
216 | # NOTE: Enable this to use training standarization
217 | # transform=transform,
218 | )
219 |
220 | # Load dataset transformation parameters from training
221 | transform = {
222 | "sample_mean": my_dataset.sample_mean,
223 | "sample_std": my_dataset.sample_std,
224 | "target_mean": my_dataset.target_mean,
225 | "target_std": my_dataset.target_std,
226 | }
227 |
228 | # Initialized test data loader using settings batch size
229 | test_loader = torch.utils.data.DataLoader(
230 | my_dataset, batch_size=hparams["batch_size"]
231 | )
232 |
233 | # Launch testing on test dataset
234 | output = os.path.join(output, f"{name}")
235 | err = test_single(
236 | model, test_loader, transform, appliance, batch_size, plotfilename
237 | )
238 | print(f"Test err={err:.2f}")
239 |
--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import os
3 | import sys
4 |
5 | sys.path.append("/content/gdrive/MyDrive/ColabNotebooks")
6 |
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 |
11 | class AdditiveAttention(torch.nn.Module):
12 | """
13 | Attention mechanism for the models
14 | """
15 | def __init__(self, dim=5):
16 | super().__init__()
17 |
18 | self.dim = dim
19 |
20 | # Using paper notation (W, V)
21 | self.W = nn.Linear(self.dim, self.dim)
22 | self.V = nn.Linear(self.dim, 1, bias=False)
23 |
24 | def forward(self, h):
25 | # Paper attenation mechanism
26 | # et = V*tanh(W*ht + b)
27 | # αt = softmax(et)
28 | # c = sum(αt*ht)
29 | layer_1 = self.W(h)
30 | layer_1 = torch.tanh(layer_1)
31 | layer_2 = self.V(layer_1)
32 | alphas = F.softmax(layer_2, dim=1)
33 | c = h * alphas # [batch, l, 2*h] x [batch, l, 1] = [batch, l, 2*h]
34 | output = torch.sum(
35 | c, 1
36 | ) # sum elements in dimension 1 (seq_length) [batch, 2*h]
37 | return output, alphas
38 |
39 | class AdditiveAttentionBackwards(AdditiveAttention):
40 | """
41 | Attention mechanism for the models
42 | NOTE: Nearly same implementation as main additive attention
43 | but used in order to make it backwards compatible as some
44 | models have already been trained using this mode. Otherwise
45 | fails loading the model due non-matching architecture
46 | """
47 |
48 | def forward(self, h):
49 | output, alphas = super().forward(h)
50 | return output
51 |
52 | class ModelPaper(nn.Module):
53 | """
54 | Implementation of the network architecture described
55 | in the paper
56 | Both regression and classification branches enabled
57 | """
58 | def __init__(self, l, filters, kernel, hunits):
59 | super().__init__()
60 |
61 | self.regression_enabled = True
62 | self.classification_enabled = True
63 |
64 | self.conv = nn.Sequential(
65 | nn.Conv1d(1, filters, kernel, padding=kernel // 2),
66 | nn.ReLU(),
67 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
68 | nn.ReLU(),
69 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
70 | nn.ReLU(),
71 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
72 | nn.ReLU(),
73 | )
74 |
75 | self.lstm = nn.LSTM(
76 | input_size=filters,
77 | hidden_size=hunits,
78 | num_layers=1,
79 | bidirectional=True,
80 | batch_first=True,
81 | )
82 | # input [batch, l-x(from convs), filters]
83 | # output [batch, l-x(de les convs), 2*hunits]
84 |
85 | self.attention = AdditiveAttention(dim=(2 * hunits))
86 | self.regression = nn.Sequential(
87 | nn.Linear(2 * hunits, hunits),
88 | nn.ReLU(),
89 | nn.Linear(hunits, l)
90 | )
91 |
92 | self.classification1 = nn.Sequential(
93 | nn.Conv1d(1, 10, 10, 1),
94 | nn.ReLU(),
95 | nn.Conv1d(10, 30, 8, 1),
96 | nn.ReLU(),
97 | nn.Conv1d(30, 40, 6, 1),
98 | nn.ReLU(),
99 | nn.Conv1d(40, 50, 5, 1),
100 | nn.ReLU(),
101 | nn.Conv1d(50, 50, 5, 1),
102 | nn.ReLU(),
103 | nn.Conv1d(50, 50, 5, 1),
104 | nn.ReLU(),
105 | ) # output --> [batch, 50, l-33]
106 |
107 | self.classification2 = nn.Sequential(
108 | nn.Flatten(start_dim=1)
109 | ) # flatten --> [batch, (l-33)*50]
110 |
111 | self.classification3 = nn.Sequential(
112 | nn.Linear((l - 33) * 50, 1024),
113 | nn.ReLU(),
114 | nn.Linear(1024, l),
115 | nn.Sigmoid()
116 | )
117 |
118 | def forward(self, x):
119 | reg = self.conv(x)
120 | reg = reg.permute(0, 2, 1)
121 | output_lstm, (h_n, c_n) = self.lstm(reg)
122 | context, alphas = self.attention(output_lstm)
123 | reg = self.regression(context)
124 |
125 | clas1 = self.classification1(x)
126 | clas2 = self.classification2(clas1)
127 | clas = self.classification3(clas2)
128 | y = reg * clas
129 | return y, reg, alphas, clas
130 |
131 | class ModelPaperBackward(nn.Module):
132 | """
133 | Implementation of the network architecture described
134 | in the paper
135 | NOTE: Nearly same implementation as ModelPaper
136 | but used in order to make it backwards compatible as some
137 | models have already been trained using this mode. Otherwise
138 | fails loading the model due non-matching architecture
139 |
140 | Both regression and classification branches enabled
141 | """
142 | def __init__(self, l, filters, kernel, hunits):
143 | super().__init__()
144 |
145 | self.regression_enabled = True
146 | self.classification_enabled = True
147 |
148 | self.conv = nn.Sequential(
149 | nn.Conv1d(1, filters, kernel, padding=kernel // 2),
150 | nn.ReLU(),
151 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
152 | nn.ReLU(),
153 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
154 | nn.ReLU(),
155 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
156 | nn.ReLU(),
157 | )
158 |
159 | self.lstm = nn.LSTM(
160 | input_size=filters,
161 | hidden_size=hunits,
162 | num_layers=1,
163 | bidirectional=True,
164 | batch_first=True,
165 | )
166 | # input [batch, l-x(from convs), filters]
167 | # output [batch, l-x(from convs), 2*hunits]
168 |
169 | self.regression = nn.Sequential(
170 | AdditiveAttentionBackwards(
171 | dim=2 * hunits
172 | ),
173 | # input [batch, l (LSTM), 2*hunits]
174 | # output [batch, 2*hunits]
175 | nn.Linear(2 * hunits, hunits),
176 | nn.ReLU(),
177 | nn.Linear(hunits, l),
178 | )
179 |
180 | self.classification1 = nn.Sequential(
181 | nn.Conv1d(1, 10, 10, 1),
182 | nn.ReLU(),
183 | nn.Conv1d(10, 30, 8, 1),
184 | nn.ReLU(),
185 | nn.Conv1d(30, 40, 6, 1),
186 | nn.ReLU(),
187 | nn.Conv1d(40, 50, 5, 1),
188 | nn.ReLU(),
189 | nn.Conv1d(50, 50, 5, 1),
190 | nn.ReLU(),
191 | nn.Conv1d(50, 50, 5, 1),
192 | nn.ReLU(),
193 | ) # output [batch, 50, l-33]
194 |
195 | self.classification2 = nn.Sequential(
196 | nn.Flatten(start_dim=1)
197 | ) # flatten --> [batch, (l-33)*50]
198 |
199 | self.classification3 = nn.Sequential(
200 | nn.Linear((l - 33) * 50, 1024),
201 | nn.ReLU(),
202 | nn.Linear(1024, l),
203 | nn.Sigmoid()
204 | )
205 |
206 | def forward(self, x):
207 | reg = self.conv(x)
208 | reg = reg.permute(0, 2, 1)
209 | output_lstm, (h_n, c_n) = self.lstm(reg)
210 | reg = self.regression(output_lstm)
211 |
212 | clas1 = self.classification1(x)
213 | clas2 = self.classification2(clas1)
214 | clas = self.classification3(clas2)
215 |
216 | y = reg * clas
217 | alphas = torch.zeros(reg.shape)
218 | return y, reg, alphas, clas
219 |
220 | class ModelOnlyRegression(nn.Module):
221 | """
222 | Implementation of the network architecture described
223 | in the paper but removing classification branch.
224 | Only regression branch is trained and used to predict
225 | appliance disaggregation
226 |
227 | Only regression branch enabled
228 | """
229 | def __init__(self, l, filters, kernel, hunits):
230 | super().__init__()
231 |
232 | self.regression_enabled = True
233 | self.classification_enabled = False
234 |
235 | self.conv = nn.Sequential(
236 | nn.Conv1d(1, filters, kernel, padding=kernel // 2),
237 | nn.ReLU(),
238 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
239 | nn.ReLU(),
240 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
241 | nn.ReLU(),
242 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
243 | nn.ReLU(),
244 | )
245 |
246 | self.lstm = nn.LSTM(
247 | input_size=filters,
248 | hidden_size=hunits,
249 | num_layers=1,
250 | bidirectional=True,
251 | batch_first=True,
252 | )
253 | # input [batch, l-x(from convs), filters]
254 | # output [batch, l-x(from convs), 2*hunits]
255 |
256 | self.attention = AdditiveAttention(dim=(2 * hunits))
257 | self.regression = nn.Sequential(
258 | nn.Linear(2 * hunits, hunits),
259 | nn.ReLU(),
260 | nn.Linear(hunits, l)
261 | )
262 |
263 | def forward(self, x):
264 | reg = self.conv(x)
265 | reg = reg.permute(0, 2, 1)
266 | output_lstm, (h_n, c_n) = self.lstm(reg)
267 | context, alphas = self.attention(output_lstm)
268 | reg = self.regression(context)
269 |
270 | y = reg
271 | clas = reg # TEMPFIX to make it easy to integrate to de code (?)
272 | return y, reg, alphas, clas
273 |
274 | class ModelClassAttention(nn.Module):
275 | """
276 | Implementation of the network architecture described
277 | in the paper but fitting classification with attention.
278 | Attention is used in both regression and classification
279 |
280 | Both regression and classification branches enabled
281 | """
282 | def __init__(self, l, filters, kernel, hunits):
283 | super().__init__()
284 |
285 | self.regression_enabled = True
286 | self.classification_enabled = True
287 |
288 | self.conv = nn.Sequential(
289 | nn.Conv1d(1, filters, kernel, padding=kernel // 2),
290 | nn.ReLU(),
291 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
292 | nn.ReLU(),
293 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
294 | nn.ReLU(),
295 | nn.Conv1d(filters, filters, kernel, padding=kernel // 2),
296 | nn.ReLU(),
297 | )
298 |
299 | self.lstm = nn.LSTM(
300 | input_size=filters,
301 | hidden_size=hunits,
302 | num_layers=1,
303 | bidirectional=True,
304 | batch_first=True,
305 | )
306 | # input [batch, l-x(from convs), filters]
307 | # output [batch, l-x(from convs), 2*hunits]
308 |
309 | self.attention = AdditiveAttention(dim=(2 * hunits))
310 | self.regression = nn.Sequential(
311 | nn.Linear(2 * hunits, hunits),
312 | nn.ReLU(),
313 | nn.Linear(hunits, l)
314 | )
315 |
316 | self.classification1 = nn.Sequential(
317 | nn.Conv1d(1, 10, 10, 1),
318 | nn.ReLU(),
319 | nn.Conv1d(10, 30, 8, 1),
320 | nn.ReLU(),
321 | nn.Conv1d(30, 40, 6, 1),
322 | nn.ReLU(),
323 | nn.Conv1d(40, 50, 5, 1),
324 | nn.ReLU(),
325 | nn.Conv1d(50, 50, 5, 1),
326 | nn.ReLU(),
327 | nn.Conv1d(50, 50, 5, 1),
328 | nn.ReLU(),
329 | ) # output [batch, 50, l-33]
330 |
331 | self.classification2 = nn.Sequential(
332 | nn.Flatten(start_dim=1)
333 | ) # flatten [batch, (l-33)*50]
334 |
335 | self.classification3 = nn.Sequential(
336 | nn.Linear((l - 33) * 50 + 2 * hunits, 1024),
337 | nn.ReLU(),
338 | nn.Linear(1024, l),
339 | nn.Sigmoid(),
340 | )
341 |
342 | def forward(self, x):
343 | reg = self.conv(x)
344 | reg = reg.permute(0, 2, 1)
345 | output_lstm, (h_n, c_n) = self.lstm(reg)
346 | context, alphas = self.attention(output_lstm)
347 | reg = self.regression(context)
348 |
349 | clas1 = self.classification1(x)
350 | clas2 = self.classification2(clas1)
351 | clas3 = torch.cat((clas2, context), 1)
352 | clas = self.classification3(clas3)
353 |
354 | y = reg * clas
355 | return y, reg, alphas, clas
356 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os
4 | import sys
5 | import pprint
6 |
7 | from datetime import datetime
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | import torch.optim as optim
15 |
16 | import model as nilmmodel
17 | import matplotlib.pyplot as plt
18 |
19 | from dataset import InMemoryKoreaDataset
20 | from utils import error
21 | from utils import save_model, load_model, save_dataset
22 | from utils import plot_window
23 |
24 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
25 |
26 |
27 | def summary(path, results):
28 | """
29 | Helper method used to save training results
30 | Plot train vs validation loss and error to diagnose
31 | - Underfitting
32 | - Overfitting
33 | - Good fitting
34 | """
35 | df = pd.DataFrame(
36 | [
37 | {
38 | "epoch": x[0][0],
39 | "train_loss": x[0][1],
40 | "train_err": x[0][2],
41 | "eval_loss": x[1][1],
42 | "eval_err": x[1][2],
43 | }
44 | for x in results
45 | ]
46 | ).set_index("epoch")
47 |
48 | # Plot train vs eval loss to make diagnose
49 | columns = ["train_loss", "eval_loss"]
50 | filename = os.path.join(path, "results-loss.csv")
51 | df[columns].round(3).to_csv(filename, sep=";")
52 | filename = os.path.join(path, "results-loss.png")
53 |
54 | plt.figure(1, figsize=(10, 8))
55 | df[columns].round(3).plot()
56 | plt.savefig(filename)
57 | plt.clf()
58 |
59 | # Plot train vs eval error to make diagnose
60 | columns = ["train_err", "eval_err"]
61 | filename = os.path.join(path, "results-error.csv")
62 | df[columns].round(3).to_csv(filename, sep=";")
63 | filename = os.path.join(path, "results-error.png")
64 |
65 | plt.figure(1, figsize=(10, 8))
66 | df[columns].round(3).plot()
67 | plt.savefig(filename)
68 | plt.clf()
69 |
70 |
71 | def train_single_epoch(
72 | epoch, model, train_loader, transform, optimizer, eval_loader, plotfilename=None
73 | ):
74 | """
75 | Train single epoch for specific model and appliance
76 | """
77 | model.train()
78 | errs, losses = [], []
79 |
80 | start = datetime.now() # setup a timer for the train
81 | for idx, (x, y, clas) in enumerate(train_loader):
82 | # Prepare model input data
83 | x = torch.unsqueeze(x, dim=1)
84 |
85 | optimizer.zero_grad()
86 | x, y, clas = x.to(device), y.to(device), clas.to(device)
87 | yhat, reghat, alphas, clashat = model(x)
88 |
89 | # Calculate prediction loss. See network architecture
90 | # and loss details in documentation
91 | loss_out = F.mse_loss(yhat, y)
92 |
93 | # Different loss functions are used depending on model_type
94 | # If classification is disabled loss function do not take
95 | # care of classification loss
96 | if model.classification_enabled:
97 | loss_clas = F.binary_cross_entropy(clashat, clas)
98 | loss = loss_out + loss_clas
99 | else:
100 | loss = loss_out
101 |
102 | loss.backward()
103 | optimizer.step()
104 | err = error(y, yhat)
105 |
106 | loss_, err_ = loss.item(), err.item()
107 | losses.append(loss_)
108 | errs.append(err_)
109 |
110 | if idx % 100 == 0:
111 | # Plotting sliding window samples in order to debug or
112 | # keep track of current testing process
113 | print(f"train epoch={epoch} batch={idx+1} loss={loss:.2f} err={err:.2f}")
114 | if plotfilename:
115 | filename = plotfilename + f".{idx}.png"
116 | x = x.cpu()
117 | y = y.cpu()
118 | yhat = yhat.cpu()
119 | reghat = reghat.cpu()
120 | if transform:
121 | # If transform enabled undo standardization in order
122 | # to proper visualize regression branch prediction
123 | x = (x * transform["sample_std"]) + transform["sample_mean"]
124 | y = (y * transform["target_std"]) + transform["target_mean"]
125 | yhat = (yhat * transform["target_std"]) + transform["target_mean"]
126 | reghat = (reghat * transform["sample_std"]) + transform[
127 | "sample_mean"
128 | ]
129 | # Tricky workaround to rescale regression output and make
130 | # it easier to visualize and interpret results
131 | reghat = reghat / 10.0
132 | plot_window(
133 | x,
134 | y,
135 | yhat,
136 | reghat,
137 | clashat.cpu(),
138 | alphas.cpu(),
139 | loss_,
140 | err_,
141 | model.classification_enabled,
142 | filename,
143 | )
144 |
145 | end = datetime.now()
146 | total_seconds = (end - start).seconds
147 | print("------------------------------------------")
148 | print(f"Epoch seconds: {total_seconds}")
149 | print("------------------------------------------")
150 |
151 | return np.mean(losses), np.mean(errs)
152 |
153 |
154 | def eval_single_epoch(model, eval_loader, transform, plotfilename=None):
155 | """
156 | Eval single epoch for specific model and appliance
157 | """
158 |
159 | errs, losses = [], []
160 | with torch.no_grad():
161 | model.eval()
162 | for idx, (x, y, clas) in enumerate(eval_loader):
163 | # Prepare model input data
164 | x = torch.unsqueeze(x, dim=1)
165 |
166 | x, y, clas = x.to(device), y.to(device), clas.to(device)
167 | yhat, reghat, alphas, clashat = model(x)
168 |
169 | # Calculate prediction loss. See network architecture
170 | # and loss details in documentation
171 | loss_out = F.mse_loss(yhat, y)
172 |
173 | # Different loss functions are used depending on model_type
174 | # If classification is disabled loss function do not take
175 | # care of classification loss
176 | if model.classification_enabled:
177 | loss_clas = F.binary_cross_entropy(clashat, clas)
178 | loss = loss_out + loss_clas
179 | else:
180 | loss = loss_out
181 | err = error(y, yhat)
182 |
183 | loss_, err_ = loss.item(), err.item()
184 | losses.append(loss_)
185 | errs.append(err_)
186 |
187 | if idx % 100 == 0:
188 | # Plotting sliding window samples in order to debug or
189 | # keep track of current testing process
190 | print(f"eval batch={idx+1} loss={loss:.2f} err={err:.2f}")
191 | if plotfilename:
192 | filename = plotfilename + f".{idx}.attention.png"
193 | x = x.cpu()
194 | y = y.cpu()
195 | yhat = yhat.cpu()
196 | reghat = reghat.cpu()
197 | if transform:
198 | # If transform enabled undo standardization in order
199 | # to proper visualize regression branch prediction
200 | x = (x * transform["sample_std"]) + transform["sample_mean"]
201 | y = (y * transform["target_std"]) + transform["target_mean"]
202 | yhat = (yhat * transform["target_std"]) + transform[
203 | "target_mean"
204 | ]
205 | reghat = (reghat * transform["sample_std"]) + transform[
206 | "sample_mean"
207 | ]
208 | # Tricky workaround to rescale regression output and make
209 | # it easier to visualize and interpret results
210 | reghat = reghat / 10.0
211 | plot_window(
212 | x,
213 | y,
214 | yhat,
215 | reghat,
216 | clashat.cpu(),
217 | alphas.cpu(),
218 | loss_,
219 | err_,
220 | model.classification_enabled,
221 | filename,
222 | )
223 | return np.mean(losses), np.mean(errs)
224 |
225 |
226 | def train_model(datapath, output, appliance, hparams, doplot=None, reload=True):
227 | """
228 | Train specific model and appliance
229 | """
230 |
231 | # Load appliance specifications and hyperparameters from
232 | # settings
233 | buildings = appliance["buildings"]["train"]
234 | name = appliance["name"]
235 | params = appliance["hparams"]
236 | record_err = np.inf
237 |
238 | # Load whether data transformation is required. See details
239 | # on data normalization in documentation
240 | transform_enabled = appliance.get("normalization", False)
241 | # Load specific network architecture to train
242 | model_type = appliance.get("model", "ModelPaper")
243 |
244 | # Initialize active settings described in documentation.
245 | # Used to identify whether an appliance is classified as active
246 | # Used to enableoversampling to fix sliding windows active/inactive
247 | # imbalance
248 | active_threshold = appliance.get("active_threshold", 0.15)
249 | active_ratio = appliance.get("active_ratio", 0.5)
250 | active_oversample = appliance.get("active_oversample", 2)
251 |
252 | transform = None # Data transformation disabled by default
253 |
254 | # Load train dataset
255 | my_dataset = InMemoryKoreaDataset(
256 | datapath,
257 | buildings,
258 | name,
259 | windowsize=params["L"],
260 | active_threshold=active_threshold,
261 | active_ratio=active_ratio,
262 | active_oversample=active_oversample,
263 | transform_enabled=transform_enabled,
264 | )
265 |
266 | if transform_enabled:
267 | # Load dataset transformation parameters from dataset
268 | transform = {
269 | "sample_mean": my_dataset.sample_mean,
270 | "sample_std": my_dataset.sample_std,
271 | "target_mean": my_dataset.target_mean,
272 | "target_std": my_dataset.target_std,
273 | }
274 | print(transform)
275 |
276 | # Size train and evaluation dataset
277 | total_size = len(my_dataset)
278 | train_size = int(hparams["train_size"] * (total_size))
279 | eval_size = total_size - train_size
280 |
281 | print("============= DATASET =============")
282 | print(f"Total size: {total_size}".format(total_size))
283 | print(f"Train size: {train_size}".format(train_size))
284 | print(f"Eval size: {eval_size}".format(eval_size))
285 | print("===================================")
286 | print("=========== ARCHITECTURE ==========")
287 | pprint.pprint(appliance)
288 | print("===================================")
289 |
290 | # Split and randomize train and evaluation dataset
291 | train_dataset, eval_dataset = torch.utils.data.random_split(
292 | my_dataset, (train_size, eval_size)
293 | )
294 |
295 | # Save train dataset in order to use it in later
296 | # training sessions or debugging
297 | filename = os.path.join(output, "dataset.pt")
298 | save_dataset(transform, train_dataset, eval_dataset, filename)
299 |
300 | # Initialize train dataset loader
301 | train_loader = torch.utils.data.DataLoader(
302 | train_dataset, batch_size=hparams["batch_size"], shuffle=True
303 | )
304 | # Initialize evaluation dataset loader
305 | eval_loader = torch.utils.data.DataLoader(
306 | eval_dataset, batch_size=hparams["batch_size"]
307 | )
308 |
309 | model_type = getattr(nilmmodel, model_type)
310 | model = model_type(params["L"], params["F"], params["K"], params["H"])
311 | model = model.to(device)
312 |
313 | # Initialize optimizer
314 | optimizer = optim.Adam(model.parameters(), hparams["lr"])
315 | scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)
316 |
317 | if reload:
318 | # Reload pretrained model in order to continue
319 | # previous training sessions
320 | filename = os.path.join(output, appliance["filename"])
321 | print("====================================")
322 | print("Reloading model: ", filename)
323 | print("====================================")
324 | transform, record_err = load_model(filename, model, optimizer)
325 |
326 | results = []
327 |
328 | start = datetime.now()
329 | for epoch in range(hparams["epochs"]):
330 | # Iterate over training epochs
331 | filename = os.path.join(output, appliance["filename"] + str(epoch))
332 |
333 | plotfilename = None
334 | if doplot:
335 | plotfilename = filename
336 |
337 | err_ = None
338 | try:
339 | # Train single epoch
340 | loss, err = train_single_epoch(
341 | epoch,
342 | model,
343 | train_loader,
344 | transform,
345 | optimizer,
346 | eval_loader,
347 | plotfilename,
348 | )
349 | print("==========================================")
350 | print(f"train epoch={epoch} loss={loss:.2f} err={err:.2f}")
351 | print("==========================================")
352 |
353 | loss_, err_ = eval_single_epoch(model, eval_loader, transform)
354 | print("==========================================")
355 | print(f"eval loss={loss_:.2f} err={err_:.2f}")
356 | print("==========================================")
357 |
358 | # tune.report(eval_loss=loss_)
359 | results.append([(epoch, loss, err), (epoch, loss_, err_)])
360 |
361 | if err_ < record_err:
362 | # Compare current epoch error against previous
363 | # epochs error (minimum historic error) to check whether current
364 | # trained model is better than previous ones (best historic error)
365 | # Set and save current trained model as best historic trained
366 | # model if current error is lower than historic error
367 | filename = os.path.join(output, appliance["filename"])
368 | save_model(
369 | model, optimizer, hparams, appliance, transform, filename, err_
370 | )
371 | record_err = err_
372 | except Exception as e:
373 | print(e)
374 |
375 | scheduler.step()
376 |
377 | end = datetime.now()
378 | total_seconds = (end - start).seconds
379 | print("------------------------------------------")
380 | print(f"Total seconds: {total_seconds}")
381 | print("------------------------------------------")
382 |
383 | # Save model training results
384 | summary(output, results)
385 |
386 | return model, transform
387 |
388 |
389 | def train_model_wrapper(config):
390 | """
391 | Wrapper to adapt model training to tune interface
392 | """
393 | datapath = config["datapath"]
394 | output = config["output"]
395 | appliance = config["appliance"]
396 | hparams = config["hparams"]
397 | doplot = config["doplot"]
398 | reload = config["reload"]
399 | tune_hparams = config["tune"]
400 |
401 | appliance["hparams"]["F"] = tune_hparams["F"]
402 | appliance["hparams"]["K"] = tune_hparams["K"]
403 | appliance["hparams"]["H"] = tune_hparams["H"]
404 |
405 | return train_model(datapath, output, appliance, hparams, doplot, reload)
406 |
--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import random
4 | import numpy as np
5 | import pandas as pd
6 | import torch
7 | from torch.utils.data import Dataset
8 |
9 | import redd
10 | import utils
11 |
12 |
13 | class Building:
14 | """
15 | Building consumption handler - definition of appliances and main
16 | consumption.
17 | """
18 |
19 | def __init__(self, path, name, spec):
20 | self.path = path
21 | self.name = name
22 |
23 | self.mains = spec["mains"]
24 | self.appliances = spec["appliances"]
25 |
26 | def get_appliances(self):
27 | """
28 | Get list of appliances
29 | """
30 | return [x["id"] for x in self.appliances]
31 |
32 | def load_mains(self, start, end):
33 | """
34 | Load mains consumption from start to end time interval. Using
35 | dataset specific loader. Online data loader to prevent memory overrun.
36 | Do not save whole dataset in memory
37 | """
38 | return redd.load("mains", self.path, self.mains["channels"], start, end)
39 |
40 | def load_appliances(self, appliances=[], start=None, end=None):
41 | """
42 | Load appliance consumption from start to end time interval. Using
43 | dataset specific loader. Online data loader prevent memory overrun.
44 | Do not save whole dataset in memory
45 | """
46 | if not appliances:
47 | appliances = [x["id"] for x in self.appliances]
48 |
49 | # WARNING: Time series inner join. Ignoring non-synced
50 | # datapoints from loaded chanels
51 | return pd.concat(
52 | [
53 | redd.load(x["id"], self.path, x["channels"], start, end)
54 | for x in self.appliances
55 | if x["id"] in appliances
56 | ],
57 | axis=1,
58 | join="inner",
59 | )
60 |
61 |
62 | class NilmDataset:
63 | """
64 | NILM dataset handler
65 | NOTE: This dataset handler is used when datset preprocessing required
66 | - Alignment
67 | - Imputation
68 | Not used in current analysis due already preprocessed available
69 | dataset (non-public available and obtained once project ongoing).
70 | """
71 |
72 | def __init__(self, spec, path):
73 | self.path = path
74 | spec = utils.load_yaml(spec)
75 |
76 | path = os.path.join(self.path, spec["path"])
77 | # Load all buildings in settings
78 | self.buildings = {
79 | x["name"]: Building(os.path.join(path, x["path"]), x["name"], x)
80 | for x in spec["buildings"]
81 | }
82 |
83 | def get_buildings(self):
84 | """
85 | Get list of buildings
86 | """
87 | return list(self.buildings.keys())
88 |
89 | def get_appliances(self, building):
90 | """
91 | Get list of appliances
92 | """
93 | return self.buildings[building].get_appliances()
94 |
95 | def load_mains(self, building, start=None, end=None):
96 | """
97 | Load mains consumption from start to endi time interval. Using
98 | dataset specific loader. Online data loader to prevent memory overrun.
99 | Do not save whole dataset in memory
100 | """
101 | return self.buildings[building].load_mains(start, end)
102 |
103 | def load_appliances(self, building, appliances=[], start=None, end=None):
104 | """
105 | Load appliance consumption from start to end time interval. Using
106 | dataset specific loader. Online data loader to prevent memory overrun.
107 | Do not save whole dataset in memory
108 | """
109 | return self.buildings[building].load_appliances(appliances, start, end)
110 |
111 | @staticmethod
112 | def align(df1, df2, bfill=False):
113 | """
114 | Align two timeseries with different acquisition frequency
115 | """
116 | # Time alignment required due different acq frequency
117 | if bfill:
118 | # Raw backward filling done
119 | newindex = df1.index
120 | df2_ = df2.reindex(newindex, method="bfill")
121 | df = pd.concat([df1, df2_], axis=1, join="inner")
122 | else:
123 | df = pd.concat([df1, df2], axis=1, join="inner")
124 |
125 | return df[~df.isnull().any(axis=1)]
126 |
127 | @staticmethod
128 | def impute(df, gapsize=20, subseqsize=28800):
129 | """
130 | Data preprocessing to impute small gaps and ignore larg gaps
131 | Ignore non 100% coverage days
132 |
133 | Extract from "Subtask Gated Networks for Non-Intrusive Load Monitoring"
134 |
135 | For REDD dataset,we preprocessed with the following procedure
136 | to handle missing values. First, we split the sequence so that the
137 | duration of missing values in subsequence is less than 20 seconds.
138 | Second,we filled the missing values in each subsequence by
139 | thebackward filling method. Finally, we only used the subsequences
140 | with more than one-day duration
141 | """
142 | df = df.sort_index()
143 |
144 | start = df.index[0]
145 | end = df.index[-1]
146 | newindex = pd.date_range(start, end, freq="1S")
147 |
148 | # Appliance time series are not aligned to 3s (ie. 3,4 sec period)
149 | # Use 1sec reindex in order to align to 3sec timeserie
150 | df = df.reindex(newindex, method="bfill", limit=4)
151 | newindex = pd.date_range(start, end, freq="3S")
152 | mask = df.index.isin(newindex)
153 | df = df[mask]
154 | # WARNING
155 | # if there is a gap with more than limit number of consecutive NaNs,
156 | # it will only be partially filled.
157 | df = df.fillna(method="bfill", limit=gapsize)
158 | columns = df.columns
159 |
160 | df["rowindex"] = range(df.shape[0])
161 | df = df[~df.iloc[:, 0].isnull()]
162 |
163 | diffseq = df["rowindex"].diff()
164 | diffsec = df.index.to_series().diff().dt.total_seconds()
165 | # Find big gaps to split data in subsequences
166 | mask = diffseq > gapsize
167 |
168 | # List of continuous data subsequences
169 | its_index = diffsec[mask].index
170 | its_offset = diffsec[mask].values
171 |
172 | data = []
173 | if sum(mask) > 0:
174 | start = df.index[0]
175 |
176 | # Iterate over continuous data subsequences
177 | for idx, (it, offset) in enumerate(zip(its_index, its_offset)):
178 | end = it - pd.Timedelta(seconds=offset)
179 | subseq = df[start:end]
180 |
181 | # Check where subsquences in large enough. If the subsquence
182 | # is not large enough then ignore, otherwise consider it valid
183 | if subseq.shape[0] > subseqsize:
184 | data.append(subseq[columns])
185 | start = it
186 |
187 | # Check where subsquences in large enough. If the subsquence
188 | # is not large enough then ignore, otherwise consider it valid
189 | end = df.index[-1]
190 | subseq = df[start:end]
191 | if subseq.shape[0] > subseqsize:
192 | data.append(subseq[columns])
193 | else:
194 | # One single subsequence (valid or invalid)
195 | data.append(df[columns])
196 | return data
197 |
198 | ## Filterout days without minimum amount of seconds
199 | # tmp = df.groupby("date").apply(lambda x: x.shape[0])
200 | # valid_dates = tmp[tmp >= subseqsize].index
201 | # mask = df["date" ].isin(valid_dates)
202 | # return df[mask].drop(columns=["date"])
203 |
204 | def load(self, building, appliances=[], start=None, end=None, bfill=False):
205 | return self.impute(
206 | self.align(
207 | self.load_mains(building, start, end),
208 | self.load_appliances(building, appliances, start, end),
209 | bfill,
210 | )
211 | )
212 |
213 | def load_raw(self, building, appliances=[], start=None, end=None, bfill=False):
214 | return self.align(
215 | self.load_mains(building, start, end),
216 | self.load_appliances(building, appliances, start, end),
217 | bfill,
218 | )
219 |
220 |
221 | class InMemoryDataset(Dataset):
222 | """
223 | Inmemory dataset
224 | WARNING: Not the best option due potential memory overrun but did not fail
225 | Not used in current analysis due already preprocessed available
226 | dataset (non-public available and obtained once project ongoing).
227 | """
228 |
229 | def __init__(
230 | self, spec, path, buildings, appliance, windowsize=34459, start=None, end=None
231 | ):
232 | super().__init__()
233 |
234 | self.buildings = buildings
235 | self.appliance = appliance
236 | self.windowsize = windowsize
237 |
238 | dataset = NilmDataset(spec, path)
239 |
240 | # Dataset is structured as multiple long size windows
241 | self.data = []
242 | # As sliding windows are used to acces data, a lookup-table
243 | # is created as sequential index to reference each sliding
244 | # window (long window + offset within long window).
245 | self.datamap = {}
246 |
247 | data_index = 0
248 | window_index = 0
249 | for building in buildings:
250 | for x in dataset.load(building, [appliance], start, end):
251 | # Calculate number of sliding windows in the long time window
252 | n_windows = x.shape[0] - windowsize + 1
253 |
254 | # Add loaded data to dataset
255 | self.data.append(x.reset_index())
256 | # Update data index iteraring over all sliding windows in
257 | # dataset. Each of the indexes in global map corresponds
258 | # to specific long time window and offset
259 | self.datamap.update(
260 | {window_index + i: (data_index, i) for i in range(n_windows)}
261 | )
262 | data_index += 1
263 |
264 | window_index += n_windows
265 | self.total_size = window_index
266 |
267 | def __len__(self):
268 | return self.total_size
269 |
270 | def __getitem__(self, idx):
271 | # Each of the indexes in global map corresponds
272 | # to specific long time window and offset. Obtain
273 | # long time window and offset
274 | data_index, window_index = self.datamap[idx]
275 |
276 | # Obtain start end offset in the long time window
277 | start = window_index
278 | end = self.windowsize + window_index
279 |
280 | # Access data
281 | sample = self.data[data_index].loc[start:end, "mains"]
282 | target = self.data[data_index].loc[start:end, self.appliance]
283 |
284 | return (torch.tensor(sample.values), torch.tensor(target.values))
285 |
286 |
287 | class InMemoryKoreaDataset(Dataset):
288 | """
289 | Inmemory dataset
290 | WARNING: Not the best option, due potential memory overrun but did not fail
291 |
292 | Arguments:
293 | windowsize: Sliding window size
294 | active_threshold: Active threshold used in classification
295 | Default value in paper 15W
296 | active_ratio: In order to prevent imbalance in data it's required
297 | to balance number of active/inactive appliance windows. In most
298 | of the cases the number of inactive windows is larger than
299 | the number of active windows. Active ratio forces the ratio
300 | between active/inactive windows by removing active/inactive
301 | windows (in most cases inactive windows) till fulfilling the ratio
302 | active_oversample: In order to prevent overfitting oversampling is done
303 | in active windows. This argument forces random oversampling
304 | active_oversample times available active windows
305 | transform_enabled: Used to enable data preprocessing transformation,
306 | in this case standardization
307 | transform: Transformation properties, in case of standardization
308 | mean and standard deviation
309 | """
310 |
311 | sample_mean = None
312 | sample_std = None
313 | target_mean = None
314 | target_std = None
315 |
316 | def __init__(
317 | self,
318 | path,
319 | buildings,
320 | appliance,
321 | windowsize=496,
322 | active_threshold=15.0,
323 | active_ratio=None,
324 | active_oversample=None,
325 | transform_enabled=False,
326 | transform=None,
327 | ):
328 | super().__init__()
329 |
330 | self.transform_enabled = transform_enabled
331 |
332 | self.appliance = appliance
333 | self.windowsize = windowsize
334 | self.active_threshold = active_threshold
335 |
336 | # Dataset is structured as multiple long size windows
337 | self.data = []
338 | # As sliding windows are used to acces data, a lookup-table
339 | # is created as sequential index to reference each sliding
340 | # window (long window + offset within long window).
341 | self.datamap = {}
342 |
343 | filenames = os.listdir(path)
344 |
345 | columns = ["main", self.appliance]
346 |
347 | # Using original long time windows as non-related time interval windows
348 | # in order to prevent mixing days and concatenating not continuous
349 | # data. Original data has gaps between dataset files
350 | self.data = [
351 | pd.read_csv(os.path.join(path, filename), usecols=columns, sep=",")
352 | for filename in filenames
353 | for building in buildings
354 | if filename.startswith(building)
355 | ]
356 |
357 | df = pd.concat(self.data)
358 | # Data transformation
359 | if transform_enabled:
360 | if transform:
361 | self.sample_mean = transform["sample_mean"]
362 | self.sample_std = transform["sample_std"]
363 | self.target_mean = transform["target_mean"]
364 | self.target_std = transform["target_std"]
365 | else:
366 | self.sample_mean = df["main"].mean()
367 | self.sample_std = df["main"].std()
368 | self.target_mean = df[appliance].mean()
369 | self.target_std = df[appliance].std()
370 |
371 | data_index = 0
372 | window_index = 0
373 |
374 | for subseq in self.data:
375 | n_windows = subseq.shape[0] - windowsize + 1 # +1 why?
376 | # Update data index iteraring over all sliding windows in
377 | # dataset. Each of the indexes in global map corresponds
378 | # to specific long time window and offset
379 | self.datamap.update(
380 | {window_index + i: (data_index, i) for i in range(n_windows)}
381 | )
382 | data_index += 1
383 | window_index += n_windows
384 |
385 | self.total_size = window_index
386 |
387 | if active_ratio:
388 | # Fix imbalance required
389 | map_indexes = list(self.datamap.keys())
390 | # Shuffle indexes in order to prevent oversampling using same
391 | # building or continuous windows
392 | random.shuffle(map_indexes)
393 |
394 | # Active and inactive buffers are used to manage classified
395 | # sliding windows and use them later to fix imbalance
396 | active_indexes = []
397 | inactive_indexes = []
398 |
399 | # Classify every sliding window as active or inactive using
400 | # active_threshold as threshold
401 | for i, index in enumerate(map_indexes):
402 | data_index, window_index = self.datamap[index]
403 | start = window_index
404 | end = self.windowsize + window_index
405 |
406 | # Retreive sliding window from data
407 | subseq = self.data[data_index].loc[start : (end - 1), self.appliance]
408 | if subseq.shape[0] != self.windowsize:
409 | continue
410 |
411 | # Fill active and inactive buffers to be used later to
412 | # fix imbalance
413 | if (subseq > active_threshold).any(): # is there any active ?
414 | active_indexes.append(index)
415 | else:
416 | inactive_indexes.append(index)
417 |
418 | if (i % 1000) == 0:
419 | print(
420 | "Loading {0}: {1}/{2}".format(
421 | self.appliance, i, len(map_indexes)
422 | )
423 | )
424 | if active_oversample:
425 | # If oversample is required increase representation
426 | active_indexes = active_indexes * active_oversample
427 |
428 | # Identify imbalance by calculating active/inactive ratio
429 | n_active = len(active_indexes)
430 | n_inactive = len(inactive_indexes)
431 |
432 | # Update number of active/inactive windows to fulfill required
433 | # ratio and fix imbalance
434 | n_inactive_ = int((n_active * (1.0 - active_ratio)) / active_ratio)
435 | n_active_ = int((n_inactive * active_ratio) / (1.0 - active_ratio))
436 |
437 | if n_inactive > n_inactive_:
438 | n_inactive = n_inactive_
439 | else:
440 | n_active = n_active_
441 |
442 | # Obtain valid indexes after imbalance analysis
443 | valid_indexes = active_indexes[:n_active] + inactive_indexes[:n_inactive]
444 |
445 | # Update datamap with fixed indexes in order to point to
446 | # proper sliding windows
447 | datamap = {}
448 | for dst_index, src_index in enumerate(valid_indexes):
449 | datamap[dst_index] = self.datamap[src_index]
450 | self.datamap = datamap
451 | self.total_size = len(self.datamap.keys())
452 |
453 | def __len__(self):
454 | return self.total_size
455 |
456 | def __getitem__(self, idx):
457 | # Loader asking for specific sliding window in specific index
458 | # Calculate long time window and offset in order to retrieve data
459 | # Input data is obtained from mains time serie, target data is
460 | # obtained from appliance timeserie and classification is
461 | # done over mains time serie
462 | data_index, window_index = self.datamap[idx]
463 | start = window_index
464 | end = self.windowsize + window_index
465 |
466 | # Retreive mains data as sample data
467 | sample = self.data[data_index].loc[start : (end - 1), "main"]
468 | # Retreive appliance data as target data
469 | target = self.data[data_index].loc[start : (end - 1), self.appliance]
470 |
471 | # Calculate classification
472 | classification = torch.zeros(target.values.shape[0])
473 | if self.active_threshold:
474 | classification = (target.values > self.active_threshold).astype(int)
475 |
476 | # WARNING: This is not the proper way as both train and test values
477 | # used. It's just a first approach
478 | if self.transform_enabled:
479 | # Standarization enabled
480 | sample = (sample - self.sample_mean) / self.sample_std
481 | target = (target - self.target_mean) / self.target_std
482 |
483 | return (
484 | torch.tensor(sample.values, dtype=torch.float32), # Input
485 | torch.tensor(target.values, dtype=torch.float32), # Target
486 | torch.tensor(classification, dtype=torch.float32), # Classification
487 | )
488 |
489 |
490 | if __name__ == "__main__":
491 | # Default dataset handler used to explore data in colab
492 | # not used in training or prediction
493 |
494 | spec = sys.argv[1]
495 | path = sys.argv[2]
496 | appliance = sys.argv[3]
497 |
498 | # NOTE: Raw dataset explorer
499 | # from datetime import datetime
500 | # import pytz
501 | # tz = pytz.timezone("US/Eastern")
502 | # start = datetime(2011, 4, 20, 0,0,0)
503 | # end = datetime(2011, 4, 22, 0,0,0)
504 | # start = tz.localize(start)
505 | # end = tz.localize(end)
506 |
507 | # building = "building1"
508 | # appliances = ["refrigerator"]
509 | # dataset = NilmDataset(spec, path)
510 | # raw_mains = dataset.load_mains(building)
511 | # raw_appliances = dataset.load_appliances(building, appliances)
512 |
513 | # raw_df = dataset.load_raw(building, appliances)
514 | # clean_df = dataset.load(building, appliances)
515 |
516 | # buildings = ["building1", "building2"]
517 | # my_dataset = InMemoryDataset(spec, path, buildings, "refrigerator")
518 |
519 | # NOTE: Korea dataset explorer
520 | buildings = ["redd_house1"]
521 | my_dataset = InMemoryKoreaDataset(path, buildings, appliance)
522 |
--------------------------------------------------------------------------------
/colab/project-devel.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "accelerator": "GPU",
6 | "colab": {
7 | "name": "Copia de Project 2.ipynb",
8 | "provenance": [],
9 | "collapsed_sections": [],
10 | "toc_visible": true,
11 | "machine_shape": "hm"
12 | },
13 | "kernelspec": {
14 | "display_name": "Python 3",
15 | "name": "python3"
16 | }
17 | },
18 | "cells": [
19 | {
20 | "cell_type": "code",
21 | "metadata": {
22 | "id": "pJbYXou6chZf",
23 | "colab": {
24 | "base_uri": "https://localhost:8080/"
25 | },
26 | "outputId": "28c6f77c-eeaa-479f-c577-f3da0331c57c"
27 | },
28 | "source": [
29 | "!nvidia-smi"
30 | ],
31 | "execution_count": null,
32 | "outputs": [
33 | {
34 | "output_type": "stream",
35 | "text": [
36 | "Sat Apr 17 11:32:16 2021 \n",
37 | "+-----------------------------------------------------------------------------+\n",
38 | "| NVIDIA-SMI 460.67 Driver Version: 460.32.03 CUDA Version: 11.2 |\n",
39 | "|-------------------------------+----------------------+----------------------+\n",
40 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
41 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
42 | "| | | MIG M. |\n",
43 | "|===============================+======================+======================|\n",
44 | "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
45 | "| N/A 76C P8 12W / 70W | 0MiB / 15109MiB | 0% Default |\n",
46 | "| | | N/A |\n",
47 | "+-------------------------------+----------------------+----------------------+\n",
48 | " \n",
49 | "+-----------------------------------------------------------------------------+\n",
50 | "| Processes: |\n",
51 | "| GPU GI CI PID Type Process name GPU Memory |\n",
52 | "| ID ID Usage |\n",
53 | "|=============================================================================|\n",
54 | "| No running processes found |\n",
55 | "+-----------------------------------------------------------------------------+\n"
56 | ],
57 | "name": "stdout"
58 | }
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "metadata": {
64 | "id": "UUElMiSlyoMu",
65 | "colab": {
66 | "base_uri": "https://localhost:8080/"
67 | },
68 | "outputId": "77b5c9cd-9d17-41ee-8adf-0f7f1a2ef6e6"
69 | },
70 | "source": [
71 | "## Mount grdive unit in order to load data and import source files\n",
72 | "from google.colab import drive\n",
73 | "drive.mount('/content/gdrive/')"
74 | ],
75 | "execution_count": null,
76 | "outputs": [
77 | {
78 | "output_type": "stream",
79 | "text": [
80 | "Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount(\"/content/gdrive/\", force_remount=True).\n"
81 | ],
82 | "name": "stdout"
83 | }
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "metadata": {
89 | "id": "Mf5PxKCN_1kA",
90 | "colab": {
91 | "base_uri": "https://localhost:8080/"
92 | },
93 | "outputId": "3452ce72-040b-417e-9fc0-7768e3d78337"
94 | },
95 | "source": [
96 | "!ls /content/gdrive/MyDrive/ColabNotebooks/"
97 | ],
98 | "execution_count": null,
99 | "outputs": [
100 | {
101 | "output_type": "stream",
102 | "text": [
103 | "'Copia de Project 2.ipynb' microwave_out redd.yaml\n",
104 | " dataset.py\t\t modelclassatt.py run.sh\n",
105 | " dataset_test.py\t model.py\t settings.yaml\n",
106 | " dishwasher_out\t\t 'Project 2.ipynb' template.yaml\n",
107 | " fridge_out\t\t 'Project antic' utils.py\n",
108 | " __init__.py\t\t __pycache__\n",
109 | " main.py\t\t redd.py\n"
110 | ],
111 | "name": "stdout"
112 | }
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "metadata": {
118 | "id": "q5JgFZET__SG"
119 | },
120 | "source": [
121 | "## Include ColabNotebooks to syspath to let python load libraries\n",
122 | "import sys\n",
123 | "sys.path.append('/content/gdrive/MyDrive/ColabNotebooks')"
124 | ],
125 | "execution_count": null,
126 | "outputs": []
127 | },
128 | {
129 | "cell_type": "code",
130 | "metadata": {
131 | "id": "CGzJnN_OACQY",
132 | "colab": {
133 | "base_uri": "https://localhost:8080/"
134 | },
135 | "outputId": "2d2a50fb-1eee-451c-916b-320d8ed1c61b"
136 | },
137 | "source": [
138 | "!ls /content/gdrive/MyDrive/datasetKorea/redd"
139 | ],
140 | "execution_count": null,
141 | "outputs": [
142 | {
143 | "output_type": "stream",
144 | "text": [
145 | "redd_house1_0.csv redd_house1_8.csv redd_house3_0.csv redd_house4_3.csv\n",
146 | "redd_house1_10.csv redd_house1_9.csv redd_house3_1.csv redd_house4_4.csv\n",
147 | "redd_house1_1.csv redd_house2_0.csv redd_house3_2.csv redd_house4_5.csv\n",
148 | "redd_house1_2.csv redd_house2_1.csv redd_house3_3.csv redd_house5_0.csv\n",
149 | "redd_house1_3.csv redd_house2_2.csv redd_house3_4.csv redd_house6_0.csv\n",
150 | "redd_house1_4.csv redd_house2_3.csv redd_house3_5.csv redd_house6_1.csv\n",
151 | "redd_house1_5.csv redd_house2_4.csv redd_house4_0.csv redd_house6_2.csv\n",
152 | "redd_house1_6.csv redd_house2_5.csv redd_house4_1.csv redd_house6_3.csv\n",
153 | "redd_house1_7.csv redd_house2_6.csv redd_house4_2.csv\n"
154 | ],
155 | "name": "stdout"
156 | }
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "metadata": {
162 | "colab": {
163 | "base_uri": "https://localhost:8080/"
164 | },
165 | "id": "q-Lw8xnNACYG",
166 | "outputId": "fa35b159-c08f-43e8-98b3-022aa82ec596"
167 | },
168 | "source": [
169 | "!pip install ray"
170 | ],
171 | "execution_count": null,
172 | "outputs": [
173 | {
174 | "output_type": "stream",
175 | "text": [
176 | "Requirement already satisfied: ray in /usr/local/lib/python3.7/dist-packages (1.2.0)\n",
177 | "Requirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.7/dist-packages (from ray) (3.12.4)\n",
178 | "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from ray) (1.0.2)\n",
179 | "Requirement already satisfied: colorama in /usr/local/lib/python3.7/dist-packages (from ray) (0.4.4)\n",
180 | "Requirement already satisfied: redis>=3.5.0 in /usr/local/lib/python3.7/dist-packages (from ray) (3.5.3)\n",
181 | "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from ray) (2.23.0)\n",
182 | "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from ray) (3.0.12)\n",
183 | "Requirement already satisfied: prometheus-client>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from ray) (0.10.1)\n",
184 | "Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from ray) (2.6.0)\n",
185 | "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from ray) (3.7.4.post0)\n",
186 | "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.7/dist-packages (from ray) (7.1.2)\n",
187 | "Requirement already satisfied: gpustat in /usr/local/lib/python3.7/dist-packages (from ray) (0.6.0)\n",
188 | "Requirement already satisfied: opencensus in /usr/local/lib/python3.7/dist-packages (from ray) (0.7.12)\n",
189 | "Requirement already satisfied: aioredis in /usr/local/lib/python3.7/dist-packages (from ray) (1.3.1)\n",
190 | "Requirement already satisfied: py-spy>=0.2.0 in /usr/local/lib/python3.7/dist-packages (from ray) (0.3.5)\n",
191 | "Requirement already satisfied: grpcio>=1.28.1 in /usr/local/lib/python3.7/dist-packages (from ray) (1.32.0)\n",
192 | "Requirement already satisfied: colorful in /usr/local/lib/python3.7/dist-packages (from ray) (0.5.4)\n",
193 | "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from ray) (1.19.5)\n",
194 | "Requirement already satisfied: aiohttp-cors in /usr/local/lib/python3.7/dist-packages (from ray) (0.7.0)\n",
195 | "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from ray) (3.13)\n",
196 | "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from protobuf>=3.8.0->ray) (1.15.0)\n",
197 | "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from protobuf>=3.8.0->ray) (54.2.0)\n",
198 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (2020.12.5)\n",
199 | "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (3.0.4)\n",
200 | "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (2.10)\n",
201 | "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->ray) (1.24.3)\n",
202 | "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (1.6.3)\n",
203 | "Requirement already satisfied: async-timeout<4.0,>=3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (3.0.1)\n",
204 | "Requirement already satisfied: typing-extensions>=3.6.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (3.7.4.3)\n",
205 | "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (5.1.0)\n",
206 | "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->ray) (20.3.0)\n",
207 | "Requirement already satisfied: nvidia-ml-py3>=7.352.0 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray) (7.352.0)\n",
208 | "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from gpustat->ray) (5.4.8)\n",
209 | "Requirement already satisfied: blessings>=1.6 in /usr/local/lib/python3.7/dist-packages (from gpustat->ray) (1.7)\n",
210 | "Requirement already satisfied: opencensus-context==0.1.2 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray) (0.1.2)\n",
211 | "Requirement already satisfied: google-api-core<2.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from opencensus->ray) (1.26.3)\n",
212 | "Requirement already satisfied: hiredis in /usr/local/lib/python3.7/dist-packages (from aioredis->ray) (2.0.0)\n",
213 | "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (1.28.1)\n",
214 | "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (2018.9)\n",
215 | "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (1.53.0)\n",
216 | "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core<2.0.0,>=1.0.0->opencensus->ray) (20.9)\n",
217 | "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (0.2.8)\n",
218 | "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.6\" in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (4.7.2)\n",
219 | "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (4.2.1)\n",
220 | "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (2.4.7)\n",
221 | "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core<2.0.0,>=1.0.0->opencensus->ray) (0.4.8)\n"
222 | ],
223 | "name": "stdout"
224 | }
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "metadata": {
230 | "id": "ZSt-KQgAA0RR"
231 | },
232 | "source": [
233 | "# -*- coding: utf-8 -*-\n",
234 | "import os\n",
235 | "import sys\n",
236 | "from argparse import ArgumentParser\n",
237 | "\n",
238 | "import numpy as np\n",
239 | "import pandas as pd\n",
240 | "import torch\n",
241 | "import torch.nn as nn\n",
242 | "import torch.nn.functional as F\n",
243 | "import torch.optim as optim\n",
244 | "from ray import tune\n",
245 | "\n",
246 | "import matplotlib.pyplot as plt\n",
247 | "\n",
248 | "from dataset import InMemoryKoreaDataset\n",
249 | "from model import ModelPaperBackward as Model\n",
250 | "from utils import error, load_yaml"
251 | ],
252 | "execution_count": null,
253 | "outputs": []
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {
258 | "id": "JrdjNfu1eD5B"
259 | },
260 | "source": [
261 | "####Single epoch (Train, Eval, Test)"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "metadata": {
267 | "id": "VGBkRyghd-Cb"
268 | },
269 | "source": [
270 | "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
271 | "\n",
272 | "def train_single_epoch(epoch, model, train_loader, optimizer, eval_loader,\n",
273 | " plotfilename=None):\n",
274 | " model.train()\n",
275 | " errs, losses = [], []\n",
276 | " x = torch.unsqueeze(x, dim=1)\n",
277 | "\n",
278 | " optimizer.zero_grad()\n",
279 | " x, y, clas = x.to(device), y.to(device), clas.to(device)\n",
280 | "\n",
281 | "def eval_single_epoch(model, eval_loader, plotfilename=None):\n",
282 | " errs, losses = [], []\n",
283 | " with torch.no_grad():\n",
284 | " model.eval()\n",
285 | " for idx, (x, y, clas) in enumerate(eval_loader):\n",
286 | " x = torch.unsqueeze(x, dim=1)\n",
287 | "\n",
288 | " x, y, clas = x.to(device), y.to(device), clas.to(device)\n",
289 | " y_, reg_, alphas_, clas_ = model(x)\n",
290 | "\n",
291 | " loss_clas = F.binary_cross_entropy(clas_, clas)\n",
292 | " loss_out = F.mse_loss(y_, y)\n",
293 | " loss = loss_out + loss_clas\n",
294 | " err = error(y, y_)\n",
295 | "\n",
296 | " loss_, err_ = loss.item(), err.item()\n",
297 | " losses.append(loss_)\n",
298 | " errs.append(err_)\n",
299 | "\n",
300 | " if idx % 50 == 0:\n",
301 | " print(f\"eval epoch={epoch} batch={idx+1} loss={loss:.2f} err={err:.2f}\")\n",
302 | " if plotfilename:\n",
303 | " filename = plotfilename + f\".{idx}eval.png\"\n",
304 | " plot_window(\n",
305 | " x.cpu(),\n",
306 | " y.cpu(),\n",
307 | " y_.cpu(),\n",
308 | " reg_.cpu(),\n",
309 | " clas_.cpu(),\n",
310 | " alphas_.cpu(),\n",
311 | " loss_,\n",
312 | " err_,\n",
313 | " filename,\n",
314 | " )\n",
315 | " return np.mean(losses), np.mean(errs)\n",
316 | "\n",
317 | "\n",
318 | "def test_single(model, test_loader, appliance, plotfilename=None):\n",
319 | " errs, losses = [], []\n",
320 | " with torch.no_grad():\n",
321 | " model.eval()\n",
322 | " for idx, (x, y, clas) in enumerate(test_loader):\n",
323 | " x = torch.unsqueeze(x, dim=1)\n",
324 | "\n",
325 | " x, y, clas = x.to(device), y.to(device), clas.to(device)\n",
326 | " y_, reg_, alphas_, clas_ = model(x)\n",
327 | "\n",
328 | " loss_clas = F.binary_cross_entropy(clas_, clas)\n",
329 | " loss_out = F.mse_loss(y_, y)\n",
330 | " loss = loss_out + loss_clas\n",
331 | " err = error(y, y_)\n",
332 | "\n",
333 | " loss_, err_ = loss.item(), err.item()\n",
334 | " losses.append(loss_)\n",
335 | " errs.append(err_)\n",
336 | "\n",
337 | " if idx % 500 == 0:\n",
338 | " print(f\"eval batch={idx+1} loss={loss:.2f} err={err:.2f}\")\n",
339 | " if plotfilename:\n",
340 | " filename = plotfilename + f\".{idx}.attention.png\"\n",
341 | " plot_window(\n",
342 | " x.cpu(),\n",
343 | " y.cpu(),\n",
344 | " y_.cpu(),\n",
345 | " reg_.cpu(),\n",
346 | " clas_.cpu(),\n",
347 | " alphas_.cpu(),\n",
348 | " loss_,\n",
349 | " err_,\n",
350 | " filename,\n",
351 | " )\n",
352 | "\n",
353 | " return np.mean(losses), np.mean(errs)\n"
354 | ],
355 | "execution_count": null,
356 | "outputs": []
357 | },
358 | {
359 | "cell_type": "markdown",
360 | "metadata": {
361 | "id": "BU6kw9VqeKZu"
362 | },
363 | "source": [
364 | "###Train model"
365 | ]
366 | },
367 | {
368 | "cell_type": "code",
369 | "metadata": {
370 | "id": "E-HHDS8deRLK"
371 | },
372 | "source": [
373 | "def train_model(datapath, output, appliance, hparams, doplot=None, load=True):\n",
374 | " buildings = appliance[\"buildings\"][\"train\"]\n",
375 | " name = appliance[\"name\"]\n",
376 | " params = appliance[\"hparams\"]\n",
377 | " record_err = np.inf\n",
378 | "\n",
379 | " active_threshold = appliance.get(\"active_threshold\", 0.15)\n",
380 | " active_ratio = appliance.get(\"active_ratio\", 0.5)\n",
381 | " active_oversample = appliance.get(\"active_oversample\", 2)\n",
382 | "\n",
383 | " my_dataset = InMemoryKoreaDataset(\n",
384 | " datapath,\n",
385 | " buildings,\n",
386 | " name,\n",
387 | " windowsize=params[\"L\"],\n",
388 | " active_threshold=active_threshold,\n",
389 | " active_ratio=active_ratio,\n",
390 | " active_oversample=active_oversample,\n",
391 | " )\n",
392 | "\n",
393 | " total_size = len(my_dataset)\n",
394 | " train_size = int(hparams[\"train_size\"] * (total_size))\n",
395 | " eval_size = total_size - train_size\n",
396 | "\n",
397 | " print(\"============= DATASET =============\")\n",
398 | " print(f\"Total size: {total_size}\".format(total_size))\n",
399 | " print(f\"Train size: {train_size}\".format(train_size))\n",
400 | " print(f\"Eval size: {eval_size}\".format(eval_size))\n",
401 | " print(\"===================================\")\n",
402 | "\n",
403 | " train_dataset, eval_dataset = torch.utils.data.random_split(\n",
404 | " my_dataset, (train_size, eval_size)\n",
405 | " )\n",
406 | "\n",
407 | " filename = os.path.join(output, \"dataset.pt\")\n",
408 | " save_dataset(train_dataset, eval_dataset, filename)\n",
409 | "\n",
410 | " train_loader = torch.utils.data.DataLoader(\n",
411 | " train_dataset, batch_size=hparams[\"batch_size\"], shuffle=True\n",
412 | " )\n",
413 | " eval_loader = torch.utils.data.DataLoader(\n",
414 | " eval_dataset, batch_size=hparams[\"batch_size\"]\n",
415 | " )\n",
416 | "\n",
417 | " model = Model(params[\"L\"], params[\"F\"], params[\"K\"], params[\"H\"])\n",
418 | " model = model.to(device)\n",
419 | "\n",
420 | " optimizer = optim.Adam(model.parameters(), hparams[\"lr\"])\n",
421 | " # scheduler = optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)\n",
422 | "\n",
423 | " if load:\n",
424 | " filename = os.path.join(output, appliance[\"filename\"])\n",
425 | " record_err = load_model(filename, model, optimizer)\n",
426 | "\n",
427 | " results = []\n",
428 | " for epoch in range(hparams[\"epochs\"]):\n",
429 | " filename = os.path.join(output, appliance[\"filename\"] + str(epoch))\n",
430 | "\n",
431 | " plotfilename = None\n",
432 | " if doplot:\n",
433 | " plotfilename = filename\n",
434 | "\n",
435 | " err_ = None\n",
436 | " try:\n",
437 | " train_single_epoch(\n",
438 | " epoch, model, train_loader, optimizer, eval_loader, plotfilename)\n",
439 | "\n",
440 | " loss_, err_ = eval_single_epoch(model, eval_loader)\n",
441 | " print(\"==========================================\")\n",
442 | " print(f\"eval loss={loss:.2f} err={err:.2f}\")\n",
443 | " print(\"==========================================\")\n",
444 | "\n",
445 | " # tune.report(eval_loss=loss_)\n",
446 | " results.append([(epoch, loss, err), (epoch, loss_, err_)])\n",
447 | "\n",
448 | " if err_ < record_err:\n",
449 | " filename = os.path.join(output, appliance[\"filename\"])\n",
450 | " save(model, optimizer, filename, err_)\n",
451 | " record_err = err_\n",
452 | " except Exception as e:\n",
453 | " print(e)\n",
454 | "\n",
455 | " # scheduler.step()\n",
456 | " summary(output, results)\n",
457 | "\n",
458 | " return model\n",
459 | "\n",
460 | "\n",
461 | "def train_model_wrapper(config):\n",
462 | " datapath = config[\"datapath\"]\n",
463 | " output = config[\"output\"]\n",
464 | " appliance = config[\"appliance\"]\n",
465 | " hparams = config[\"hparams\"]\n",
466 | " doplot = config[\"doplot\"]\n",
467 | " load = config[\"load\"]\n",
468 | " tune_hparams = config[\"tune\"]\n",
469 | "\n",
470 | " appliance[\"hparams\"][\"F\"] = tune_hparams[\"F\"]\n",
471 | " appliance[\"hparams\"][\"K\"] = tune_hparams[\"K\"]\n",
472 | " appliance[\"hparams\"][\"H\"] = tune_hparams[\"H\"]\n",
473 | "\n",
474 | " return train_model(datapath, output, appliance, hparams, doplot, load)\n"
475 | ],
476 | "execution_count": null,
477 | "outputs": []
478 | },
479 | {
480 | "cell_type": "markdown",
481 | "metadata": {
482 | "id": "gZn2iwRCeSly"
483 | },
484 | "source": [
485 | "###Test model"
486 | ]
487 | },
488 | {
489 | "cell_type": "code",
490 | "metadata": {
491 | "id": "jlQkkLBZenJ3"
492 | },
493 | "source": [
494 | "def test_model(datapath, output, appliance, hparams, doplot=None):\n",
495 | " buildings = appliance[\"buildings\"][\"test\"]\n",
496 | " name = appliance[\"name\"]\n",
497 | " params = appliance[\"hparams\"]\n",
498 | "\n",
499 | " filename = os.path.join(output, appliance[\"filename\"])\n",
500 | " plotfilename = None\n",
501 | " if doplot:\n",
502 | " plotfilename = filename\n",
503 | "\n",
504 | " active_threshold = appliance.get(\"active_threshold\", 0.15)\n",
505 | " active_ratio = appliance.get(\"active_ratio\", 0.5)\n",
506 | " active_oversample = appliance.get(\"active_oversample\", 2)\n",
507 | "\n",
508 | " my_dataset = InMemoryKoreaDataset(\n",
509 | " datapath,\n",
510 | " buildings,\n",
511 | " name,\n",
512 | " windowsize=params[\"L\"],\n",
513 | " active_threshold=active_threshold,\n",
514 | " active_ratio=active_ratio,\n",
515 | " active_oversample=active_oversample,\n",
516 | " )\n",
517 | "\n",
518 | " my_dataset, _ = torch.utils.data.random_split(my_dataset, (len(my_dataset), 0))\n",
519 | "\n",
520 | " test_loader = torch.utils.data.DataLoader(\n",
521 | " my_dataset, batch_size=hparams[\"batch_size\"]\n",
522 | " )\n",
523 | "\n",
524 | " model = Model(params[\"L\"], params[\"F\"], params[\"K\"], params[\"H\"])\n",
525 | " model = model.to(device)\n",
526 | "\n",
527 | " name = appliance[\"name\"]\n",
528 | " filename = os.path.join(output, appliance[\"filename\"])\n",
529 | " load_model(filename, model)\n",
530 | "\n",
531 | " output = os.path.join(output, f\"{name}\")\n",
532 | " loss, err = test_single(model, test_loader, appliance, plotfilename)\n",
533 | " print(f\"Test loss={loss:.2f} err={err:.2f}\")"
534 | ],
535 | "execution_count": null,
536 | "outputs": []
537 | },
538 | {
539 | "cell_type": "markdown",
540 | "metadata": {
541 | "id": "oSqH0LIkevF9"
542 | },
543 | "source": [
544 | "###Save/load"
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "metadata": {
550 | "id": "cPLte57VevoJ"
551 | },
552 | "source": [
553 | "def save(model, optimizer, file_name_model, error):\n",
554 | " print(\"Guardant...\")\n",
555 | " torch.save(\n",
556 | " {\n",
557 | " \"error\": error,\n",
558 | " \"model_state_dict\": model.state_dict(),\n",
559 | " \"optimizer_state_dict\": optimizer.state_dict(),\n",
560 | " },\n",
561 | " file_name_model,\n",
562 | " )\n",
563 | " print(\"Model guardat!\")\n",
564 | "\n",
565 | "\n",
566 | "def save_dataset(train_, test_, filename):\n",
567 | " torch.save({\"train\": train_, \"test\": test_}, filename)\n",
568 | "\n",
569 | "\n",
570 | "def load_model(file_name_model, model, optimizer=None):\n",
571 | " print(\"Loading model...\")\n",
572 | " if torch.cuda.is_available():\n",
573 | " state = torch.load(file_name_model)\n",
574 | " else:\n",
575 | " state = torch.load(file_name_model, map_location=torch.device(\"cpu\"))\n",
576 | "\n",
577 | " model.load_state_dict(state[\"model_state_dict\"])\n",
578 | " error = state[\"error\"]\n",
579 | " print(\"Loaded model! Error rècord: {}\".format(error))\n",
580 | " if optimizer:\n",
581 | " optimizer.load_state_dict(state[\"optimizer_state_dict\"])\n",
582 | " return error"
583 | ],
584 | "execution_count": null,
585 | "outputs": []
586 | },
587 | {
588 | "cell_type": "markdown",
589 | "metadata": {
590 | "id": "tS6GF_m8e064"
591 | },
592 | "source": [
593 | "###Plot"
594 | ]
595 | },
596 | {
597 | "cell_type": "code",
598 | "metadata": {
599 | "id": "em_QoB9qACcH"
600 | },
601 | "source": [
602 | "def plot(x, y, yhat, loss, err, filename):\n",
603 | " subplots = [221, 222, 223, 224]\n",
604 | " plt.figure(1, figsize=(10, 8))\n",
605 | " plt.subplots_adjust(top=0.88)\n",
606 | " for i in range(4):\n",
607 | " x_, y_, yhat_ = (\n",
608 | " x.detach().numpy()[i][0],\n",
609 | " y.detach().numpy()[i],\n",
610 | " yhat.detach().numpy()[i],\n",
611 | " )\n",
612 | " plt.subplot(subplots[i])\n",
613 | " plt.plot(range(len(x_)), x_, color=\"b\", label=\"x\")\n",
614 | " plt.plot(range(len(y_)), y_, color=\"g\", label=\"y\")\n",
615 | " plt.plot(range(len(yhat_)), yhat_, color=\"r\", label=\"yhat\")\n",
616 | "\n",
617 | " plt.suptitle(f\"loss {loss:.2f} error {err:.2f}\")\n",
618 | " plt.legend()\n",
619 | " plt.tight_layout()\n",
620 | " plt.savefig(filename)\n",
621 | " plt.clf()\n",
622 | "\n",
623 | "\n",
624 | "# def plot_window(x, y, yhat, alphas, loss, err, filename):\n",
625 | "# # Naive plot window\n",
626 | "# subplt_x = 4\n",
627 | "# subplt_y = 4\n",
628 | "# plt.figure(1, figsize=(20, 16))\n",
629 | "# plt.subplots_adjust(top=0.88)\n",
630 | "#\n",
631 | "# idxs = np.random.randint(len(x), size=(subplt_x * subplt_y))\n",
632 | "# for i, idx in enumerate(idxs):\n",
633 | "# x_, y_, yhat_ = (\n",
634 | "# x.detach().numpy()[idx][0],\n",
635 | "# y.detach().numpy()[idx],\n",
636 | "# yhat.detach().numpy()[idx],\n",
637 | "# )\n",
638 | "# alphas_ = alphas.detach().numpy()[idx].flatten()\n",
639 | "# ax1 = plt.subplot(subplt_x, subplt_y, i + 1)\n",
640 | "# ax2 = ax1.twinx()\n",
641 | "# ax1.plot(range(len(x_)), x_, color=\"b\", label=\"x\")\n",
642 | "# ax1.plot(range(len(y_)), y_, color=\"r\", label=\"y\")\n",
643 | "# ax1.plot(range(len(yhat_)), yhat_, color=\"orange\", label=\"yhat\")\n",
644 | "# ax2.fill_between(\n",
645 | "# range(len(alphas_)), alphas_, alpha=0.5, color=\"lightgrey\", label=\"alpha\"\n",
646 | "# )\n",
647 | "#\n",
648 | "# plt.suptitle(f\"loss {loss:.2f} error {err:.2f}\")\n",
649 | "# ax1.legend()\n",
650 | "# ax2.legend()\n",
651 | "# plt.legend()\n",
652 | "# plt.tight_layout()\n",
653 | "# plt.savefig(filename)\n",
654 | "# plt.clf()\n",
655 | "\n",
656 | "\n",
657 | "def plot_window(x, y, yhat, reg, clas, alphas, loss, err, filename):\n",
658 | " subplt_x = 4\n",
659 | " subplt_y = 4\n",
660 | " plt.figure(1, figsize=(20, 16))\n",
661 | " plt.subplots_adjust(top=0.88)\n",
662 | "\n",
663 | " idxs = np.random.randint(len(x), size=(subplt_x * subplt_y))\n",
664 | " for i, idx in enumerate(idxs):\n",
665 | " x_, y_, yhat_, reg_, clas_ = (\n",
666 | " x.detach().numpy()[idx][0],\n",
667 | " y.detach().numpy()[idx],\n",
668 | " yhat.detach().numpy()[idx],\n",
669 | " reg.detach().numpy()[idx],\n",
670 | " clas.detach().numpy()[idx],\n",
671 | " )\n",
672 | " alphas_ = alphas.detach().numpy()[idx].flatten()\n",
673 | " ax1 = plt.subplot(subplt_x, subplt_y, i + 1)\n",
674 | " ax2 = ax1.twinx()\n",
675 | " ax1.plot(range(len(x_)), x_, color=\"b\", label=\"x\")\n",
676 | " ax1.plot(range(len(y_)), y_, color=\"r\", label=\"y\")\n",
677 | " ax1.plot(range(len(reg_)), reg_, color=\"black\", label=\"reg\")\n",
678 | " ax1.plot(range(len(yhat_)), yhat_, alpha=0.5, color=\"orange\", label=\"yhat\")\n",
679 | " ax2.fill_between(\n",
680 | " range(len(alphas_)), alphas_, alpha=0.5, color=\"lightgrey\", label=\"alpha\"\n",
681 | " )\n",
682 | " alphas_max = np.max(alphas_)\n",
683 | " ax2.plot(\n",
684 | " range(len(clas_)), clas_ * alphas_max, color=\"cyan\", alpha=0.25, label=\"reg\"\n",
685 | " )\n",
686 | "\n",
687 | " plt.suptitle(f\"loss {loss:.2f} error {err:.2f}\")\n",
688 | " ax1.legend()\n",
689 | " ax2.legend()\n",
690 | " plt.legend()\n",
691 | " plt.tight_layout()\n",
692 | " plt.savefig(filename)\n",
693 | " plt.clf()\n",
694 | "\n",
695 | "\n",
696 | "def summary(path, results):\n",
697 | " df = pd.DataFrame(\n",
698 | " [\n",
699 | " {\n",
700 | " \"epoch\": x[0][0],\n",
701 | " \"train_loss\": x[0][1],\n",
702 | " \"train_err\": x[0][2],\n",
703 | " \"eval_loss\": x[1][1],\n",
704 | " \"eval_err\": x[1][2],\n",
705 | " }\n",
706 | " for x in results\n",
707 | " ]\n",
708 | " ).set_index(\"epoch\")\n",
709 | "\n",
710 | " columns = [\"train_loss\", \"eval_loss\"]\n",
711 | " filename = os.path.join(path, \"results-loss.csv\")\n",
712 | " df[columns].round(3).to_csv(filename, sep=\";\")\n",
713 | " filename = os.path.join(path, \"results-loss.png\")\n",
714 | "\n",
715 | " plt.figure(1, figsize=(10, 8))\n",
716 | " df[columns].round(3).plot()\n",
717 | " plt.savefig(filename)\n",
718 | " plt.clf()\n",
719 | "\n",
720 | " columns = [\"train_err\", \"eval_err\"]\n",
721 | " filename = os.path.join(path, \"results-error.csv\")\n",
722 | " df[columns].round(3).to_csv(filename, sep=\";\")\n",
723 | " filename = os.path.join(path, \"results-error.png\")\n",
724 | "\n",
725 | " plt.figure(1, figsize=(10, 8))\n",
726 | " df[columns].round(3).plot()\n",
727 | " plt.savefig(filename)\n",
728 | " plt.clf()"
729 | ],
730 | "execution_count": null,
731 | "outputs": []
732 | },
733 | {
734 | "cell_type": "markdown",
735 | "metadata": {
736 | "id": "jl__05W0e6lQ"
737 | },
738 | "source": [
739 | "###Main"
740 | ]
741 | },
742 | {
743 | "cell_type": "code",
744 | "metadata": {
745 | "id": "AEbuoiWHACfL"
746 | },
747 | "source": [
748 | "def main(args):\n",
749 | "\n",
750 | " if args[\"disable_random\"]:\n",
751 | " torch.manual_seed(7)\n",
752 | "\n",
753 | " train = args[\"train\"]\n",
754 | " tune_enabled = args[\"tune\"]\n",
755 | " output = args[\"path\"]\n",
756 | " plot_disabled = args[\"disable_plot\"]\n",
757 | "\n",
758 | " settings = load_yaml(args[\"settings\"])\n",
759 | " appliance = args[\"appliance\"]\n",
760 | "\n",
761 | " dataset = settings[\"dataset\"]\n",
762 | " hparams = settings[\"hparams\"]\n",
763 | " if args[\"epochs\"]:\n",
764 | " hparams[\"epochs\"] = int(args[\"epochs\"])\n",
765 | "\n",
766 | " appliance = settings[\"appliances\"][appliance]\n",
767 | "\n",
768 | " datapath = dataset[\"path\"]\n",
769 | " if train:\n",
770 | " print(\"==========================================\")\n",
771 | " print(f\"Training ONGOING\")\n",
772 | " print(\"==========================================\")\n",
773 | "\n",
774 | " if not tune_enabled:\n",
775 | " my_model = train_model(\n",
776 | " datapath,\n",
777 | " output,\n",
778 | " appliance,\n",
779 | " hparams,\n",
780 | " doplot=not plot_disabled,\n",
781 | " load=True\n",
782 | " )\n",
783 | " else:\n",
784 | " config = {\n",
785 | " \"datapath\": datapath,\n",
786 | " \"output\": output,\n",
787 | " \"appliance\": appliance,\n",
788 | " \"hparams\": hparams,\n",
789 | " \"doplot\": not plot_disabled,\n",
790 | " \"load\": False,\n",
791 | " \"tune\": {\n",
792 | " \"F\": tune.grid_search([16, 32, 64]),\n",
793 | " \"K\": tune.grid_search([4, 8, 16]),\n",
794 | " \"H\": tune.grid_search([256, 512, 1024]),\n",
795 | " },\n",
796 | " }\n",
797 | " analysis = tune.run(\n",
798 | " train_model_wrapper,\n",
799 | " metric=\"val_loss\",\n",
800 | " mode=\"min\",\n",
801 | " num_samples=5,\n",
802 | " config=config,\n",
803 | " )\n",
804 | " print(\"==========================================\")\n",
805 | " print(f\"Best hyperparameters\")\n",
806 | " print(analysis.best_config)\n",
807 | " print(\"==========================================\")\n",
808 | "\n",
809 | " print(\"==========================================\")\n",
810 | " print(f\"Training DONE\")\n",
811 | " print(\"==========================================\")\n",
812 | " else:\n",
813 | " print(\"==========================================\")\n",
814 | " print(f\"Testing ONGOING\")\n",
815 | " print(\"==========================================\")\n",
816 | " test_model(datapath, output, appliance, hparams, doplot=not plot_disabled)\n",
817 | " print(\"==========================================\")\n",
818 | " print(f\"Testing DONE\")\n",
819 | " print(\"==========================================\")"
820 | ],
821 | "execution_count": null,
822 | "outputs": []
823 | },
824 | {
825 | "cell_type": "code",
826 | "metadata": {
827 | "colab": {
828 | "base_uri": "https://localhost:8080/"
829 | },
830 | "id": "G-Hn1v_tAfzc",
831 | "outputId": "ea1ff142-db1f-4ee6-d1b1-fd973330e8a9"
832 | },
833 | "source": [
834 | "from google.colab import drive\n",
835 | "drive.mount('/content/gdrive')"
836 | ],
837 | "execution_count": null,
838 | "outputs": [
839 | {
840 | "output_type": "stream",
841 | "text": [
842 | "Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"
843 | ],
844 | "name": "stdout"
845 | }
846 | ]
847 | },
848 | {
849 | "cell_type": "markdown",
850 | "metadata": {
851 | "id": "7usI-cKSHV6o"
852 | },
853 | "source": [
854 | "### Args i execució"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "metadata": {
860 | "id": "LTC0PxuvACjM"
861 | },
862 | "source": [
863 | "args = {\"settings\": \"/content/gdrive/MyDrive/ColabNotebooks/settings.yaml\", \"appliance\": \"microwave\",\n",
864 | " \"path\": \"/content/gdrive/MyDrive/ColabNotebooks/microwave_out\", \"train\": True, \"epochs\": 1,\n",
865 | " \"disable_random\": True, \"tune\": False, \"disable_plot\": False}\n",
866 | "\n",
867 | "main(args)"
868 | ],
869 | "execution_count": null,
870 | "outputs": []
871 | },
872 | {
873 | "cell_type": "code",
874 | "metadata": {
875 | "id": "2350M3EyQBQE"
876 | },
877 | "source": [
878 | ""
879 | ],
880 | "execution_count": null,
881 | "outputs": []
882 | }
883 | ]
884 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Non-intrusive Load Disaggregation
2 | ## Introduction
3 |
4 | - Motivation and goals
5 |
6 | Climate change is one of the greatest challenges facing humanity, and machine learning approaches are a great solution to tackle this problem. In 2019, a group of machine learning experts developed a paper called "Tackling Climate Change with Machine Learning" [[1]](#1) focused on impactful uses of machine learning in reducing and responding to climate change challenges.
7 |
8 | One of the main domains of the many propositions is "Buildings and cities" and in more deep how to "optimize buildings energy consumption". The paper states "_while the energy consumed in buildings is responsible for a quarter of global energy-related emissions, a combination of easy-to-implement fixes and state-of-the-art strategies could reduce emissions for existing buildings by up to 90%_". This statement caught our attention to start this project. Find an optimization model to control and therefore optimize energy consumption in buildings.
9 |
10 | After extensive research, we decided to focus our study on Non-Intrusive Load Monitoring (NILM). NILM is the task of estimating the power demand of different appliances in a building given an aggregate power demand signal recorded by a single electric meter monitoring multiple appliances.
11 |
12 | Neural NILM is a non-linear regression problem that consists of training a neural network for each appliance in order to predict a time window of the appliance load given the corresponding time window of aggregated data.
13 |
14 | We adopted the "Non-Intrusive Load Monitoring with an Attention-based Deep Neural Network" [[2]](#2) paper developed by University of Rome Tor Vergata researchers, to be our Reference Paper. Other approaches to Neura NILM are presented in "Non-intrusive load disaggregation solutions for very low-rate smart meter data." [[3]](#3) and "Sequence-to-point learning with neural networks for non-intrusive load monitoring" [[4]](#4).
15 |
16 | - Dataset
17 |
18 | As to the dataset used, we selected the real-world dataset "the Reference Energy Disaggregation Data Set (REDD)" [[5]](#5). This dataset is one of the reference datasets used in NILM Reference Paper and contains data for six different houses from the USA. The data is collected at 1 second sampling period for the aggregate power consumption and 3 seconds for the appliance power consumption. The appliances used are the following:
19 | oven, refrigerator, dishwasher, kitchen_outlets, microwave, bathroom_outlet, lighting, washer_dryer, electric_heater, stove, disposal, electronics, furance, smoke_alarms, air_conditioner.
20 | Thus, in our model, we consider three appliances: dishwasher (DW), microwave (MW), and refrigerator (FR). These appliances are the same as the ones used in the Reference Paper to reach the same results.
21 |
22 | Dataset split
23 |
24 | The dataset is split using houses 2,3,4,5,6 to build the training set and house 1 as the test set.
25 |
26 |
27 |
28 | The actual dataset of our model is a combination of two datasets.
29 | - We found a deep learning research team from Seoul National University that had a pre-processed dataset that cleaned the data (see pre-processing section bellow) of the REDD dataset given by the Reference Paper. This dataset is used in their "Subtask Gated Networks for Non-Intrusive Load Monitoring paper" [[6]](#6).
30 | - On the other hand, in the REDD dataset there is a high active/inactive windows imbalance. This irregularity is observed especially in the case of the dishwasher and the microwave. As it is expected, due to the use of these appliances, much of the time a dishwasher and a microwave are not being used. Therefore there is a high overrepresentation of inactive windows. We implemented an oversampling process described in the pre-processing section (see below) to solve the problem.
31 |
32 | ## System architecture
33 |
34 | ### Preprocessing
35 |
36 | Initial project implementation was done using raw REDD dataset and it was necessary to pre-process the data as described in "Subtask gated networks for non-intrusive load monitoring" [[6]](#6), see details:
37 |
38 | 1. Data alignment. Align multiple time series with different acquisition frequencies.
39 | 2. Data imputation. Split the sequence so that the duration of missing values in subsequence is less than 20 seconds. Then fill the missing values in each subsequence by a backward filling method.
40 | 3. Data filtering. Only use the subsequences with more than one day duration
41 | 4. Generate sliding windows. Using sliding window over the aggregated signal with hop size equal to 1 sample
42 |
43 | Once authors from Seoul National University provided us the same dataset as the Reference Paper we disabled our data pre-processing. The main reason was to assure the same input data as the original paper to have the same, or similar, results.
44 |
45 | Oversampling is used to solve the problem of overrepresentation of inactive windows and the irregulatity of the active/innactive windows imbalance (described in the Dataset section). The process consist in replicating randomly picked active windows in each of the appliances to obtain a 50% - 50% class balance. The ratio between active/inactive windows is configurable in settings.
46 |
47 | After implementing oversampling the number of windows used for train, eval and test are listed below:
48 |
49 |
50 | | Appliance | Nº buildings train | Nº windows train | Nº windows eval | Nº buildings test |
51 | |-----------|--------------------|------------------|-----------------|-------------------|
52 | | dishwasher| 5| 289163 | 123927 | 1|
53 | | fridge | 4| 613167 | 262787 | 1|
54 | | microwave | 3| 82922 | 35538 | 1|
55 |
56 |
57 | ### Model architectures
58 |
59 | We've implemented three different model architectures:
60 |
61 | - Regression and classification enabled
62 | - Only regression enabled.
63 | - Regression and classification using the attention results.
64 |
65 | 
66 |
67 |
68 | #### Regression and classification enabled
69 | The designed architecture adopted to solve the NILM problem is based on a classical end-to-end regression network with its encoder-decoder components. Adding an attention mechanism in between the encoder and decoder. Apart from the main end-to-end regression network, an auxiliary end-to-end classification subnetwork is joined.
70 |
71 | Why an attention-based model?
72 | The attention-based model helps with the energy disaggregation task. It assigns importance, thought weights, to every position in the aggregated signal which after successful training, will correspond to a state change of the target appliance. The addition of an attention mechanism in the regression subnetwork will allow the model to focus on selected time steps or windows rather than on non-target appliances.
73 | The attention scores are the way to weigh the importance of every position in our input sequence to infer the disaggregated signal. To represent correctly these weights we made the output of the attention layer be a 1D vector with the length of a window sequence.
74 |
75 | Both subnetworks have a different objective:
76 | - Regression end-to-end network: allows the subnetwork to “implicitly detect and assign more importance to some events (e.g. turning on or off of the appliance) and to specific signal sections”.
77 | - Classification end-to-end network: helps the disaggregation process by enforcing explicitly the on/off states of the appliances.
78 |
79 | Both subnetwork outcomes are concatenated at the end to outcome the disaggregated consumption of the appliances.
80 |
81 |
82 |
83 |
84 | #### Only regression enabled
85 | This architecture consists of suppressing the classification subnetwork, that does not have an attention layer, from the model. The regression branch is kept as in the original network.
86 |
87 | #### Regression and classification using the attention results
88 | In this final model modification, the output of the attention layer is used to compute the result of the regression subnetwork (in all the models). In this architecture, we concatenate the output of the regression subnetwork with the output of the stack of convolutional layers, in the classification subnetwork. This concatenated vector is fed to the 2 fully connected layers on top of the classification branch. The expectations of this architecture's behavior are described in the Experiment 7 hypothesis.
89 |
90 | ### Train
91 |
92 | - Methodology. Model training is done using the whole pre-processed train dataset and batches of size 64 via data loader. At first, we set the epochs at 10 epochs, in most of the cases we founded enough to do an initial analysis of model response and performance. The common do_load -> do_predict -> calculate_loss -> update_optimizer train sequence is done per each of the train batches in each epoch. The common do_load -> do_predict -> calculate_loss validation sequence is done per each of the validation batches in each epoch.
93 |
94 | - Loss function. An aggregated loss function is used for the joint optimization of both regression and classification network: L=Lout+Lclas, where Lout is the Mean Squared Error (MSE) between the overall output of the network and the ground truth of a single appliance, and Lclas is the Binary Cross-Entropy (BCE) that measures the classification error of the on/off state for the classification subnetwork.
95 |
96 |
97 | ### Test
98 |
99 | - Methodology. Model testing is done over the whole preprocessed test dataset using batches of size 64 via a data loader. The common do_load -> do_predict -> calculate_error test sequence is done per each of the test batches.
100 | - Error metrics. MAE (Mean Absolute Error) is used to evaluate the performance of the neural network. MAE is calculated after applying the prediction postprocessing described in the Postprocessing section. These are the metrics used in the Reference paper and are used as benchmarking criteria between the different experiments described below.
101 |
102 | ### Postprocessing
103 |
104 | The disaggregation phase is carried out with a sliding window over the aggregated signal with a hop size equal to 1 sample. That's the reason why the model generates overlapped windows of the disaggregated signal. We reconstruct the overlapped windows employing a median filter on the overlapped portion.
105 |
106 | ## Experiments
107 |
108 | The main goals of the experiments are:
109 |
110 | - Learn how to implement and deploy a DL system on a commercial cloud computing platform
111 | - Understand and interpret the current NILM neural network described in the paper
112 | - Understand which is the task of regression branch
113 | - Understand which is the task of classification branch
114 | - Understand which is the task of attention
115 |
116 | We proposed the three main architecture modifications evaluated in the experiments during the analysis of the reference paper. The experiments were not designed
117 | sequentially after processing the results of the previous experiment.
118 |
119 | Main architecture modifications:
120 |
121 | - Paper architecture - Regression and classification enabled
122 | - Paper modification 1 - Only regression enabled
123 | - Paper modification 2 - Regression and classification using the attention results
124 |
125 | We initially explored the data to have a first picture of the type and the amount of data available. We realized there was a high active/inactive windows imbalance in the case of dishwasher and microwave (as explained in the Dataset explanation). There would be enough total amount of windows to train the model, but not enough specific active windows to prevent a biased model. If no oversample was done the model would mainly predict null demand in inactive windows, which would be correct, but would fail to predict non-null demand inactive windows. Although disaggregation is a regression problem, this would be similar to high specificity and low sensitivity in an active/inactive appliance classification problem.
126 |
127 | ### Neural network response charts
128 |
129 | We generate charts with time series describing the response of the neural network in train, eval, and test. These charts are used to visualize and interpret the response of both whole and specific parts of the network. The main parts of interest are regression, classification, and attention. In most of the charts, the available time series are:
130 |
131 | - Building consumption. Aggregated consumption of the building. Used as input of the neural network
132 | - Predicted appliance consumption. Disaggregated appliance consumption predicted by the neural network
133 | - Real appliance consumption. Real applianced consumption obtained from the meter
134 | - Classification branch output. Prediction of the classification branch
135 | - Regression branch output. Prediction of the regression branch
136 | - Attention score. Describes the zone of interest for attention to improve regression
137 |
138 |
139 | 
140 |
141 |
142 | All the consumption time series are referenced to the left-Y axis. Classification and attention are referenced to the right-Y-axis. In both cases, there's a rescaling in some prediction results to make all of them fit in a single chart (ie. classification prediction is scaled to nearly maximum consumption, ...). In the report, there're two train and two test sample charts per each of the experiments and appliance to visualize the response and support conclusions.
143 |
144 | Interpretation of the charts focuses in:
145 | - Performance. Comparing real vs predicted series it's possible to identify the performance of the model
146 | - Characterization of the error. Comparing real vs predicted series it's possible to identify error specific patterns (peaks, plateaus, etc)
147 | - Correlation of the error with aggregated demand. Comparing error vs aggregated building consumption it's possible to identify the response of the model to crowded scenarios (multiple appliances) and single scenario (single appliance). It's also possible to identify the response of the model with different kinds of appliances, with different consumption patterns, running simultaneously.
148 | - Contribution of each of the branches. Analyzing the output of the branches is possible to identify the contribution of each of the branches to the prediction. It's possible to identify the objective of each branch and also its performance
149 | - Focus of attention. Analyzing the attention output it's possible to identify which parts of the window are important to the regression output. The attention can be used to:
150 | - Identify whether the important parts vary in the different scenarios. Maybe there is a scenario in which there are different appliances ON or there is a scenario with just one appliance that is consuming a lot is being used ON. The attention will help differenciate this two situations.
151 | - Identify whether there're specific important parts or the importance is homogeneous along with the window
152 | - Identify whether important parts are described in the appliance itself or the neighborhood.
153 | - Identify characteristic of important parts such as peaks, plateaus, etc.
154 |
155 |
156 | ## Paper architecture - Regression and classification enabled
157 |
158 | ### Experiment 1. Paper
159 |
160 | #### Hypothesis
161 |
162 | The regression subnetwork infers the power consumption, whereas the classification subnetwork focuses on the binary classification of the appliance state (on/off). The attention mechanism improves the representational power of the network to identify the positions in the aggregated input sequence with useful information to identify appliance-specific patterns.
163 |
164 | Additional group hypothesis:
165 |
166 | Specific appliance patterns are described by state changes and state duration which are related to the operating regime of the internal electricity consumption components. The operating regime of the internal components depends on multiple factors:
167 |
168 | - Appliance operating mode.
169 | - User-selected modes of operation. There're appliances with a small number of user modes (fridge, dishwasher) and appliances with a mid number of user modes (microwave). The higher number of user modes is the higher number of different patterns that can be described by the neural network.
170 | - Cycle duration. There're appliances with small duration time cycles describing the pattern per operating mode, such as the fridge and the microwave, and appliances with high duration time cycles, such as the dishwasher. The longer the cycle duration is, the more difficult it will be to describe the behavior of the pattern as the input sequence windows are longer.
171 | - Environmental factors (temperature, etc). There're appliances with dependencies to external variables like environmental factors. In this specific model, there's a high dependency on temperature on the fridge and lower dependency on the microwave and dishwasher. Weather dependency adds stochasticity to the system and consequently, complexity to the model.
172 | - Internal components demand. The main electricity consuming components are:
173 | - Heating/cooling. There's weather dependency load demand adds stochasticity to the system, hence complexity to the model.
174 | - Motors. Load demand is mainly related to the user mode and to the component internal operating regime.
175 |
176 | #### Experiment setup
177 |
178 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods:
179 |
180 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test|
181 | |-----------|------------|----------------|----------------|-----------------------------|
182 | | dishwasher| TRUE | TRUE | FALSE | FALSE |
183 | | fridge | TRUE | TRUE | FALSE | FALSE |
184 | | microwave | TRUE | TRUE | FALSE | FALSE |
185 |
186 | #### Results
187 |
188 | See attached train vs loss curve to diagnose performance:
189 |
190 | 
191 |
192 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation:
193 |
194 | 
195 | 
196 |
197 |
198 |
199 | See obtained error (previously introduced in error metrics section and extra training information):
200 |
201 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
202 | |------------|-------|----------|---------------|
203 | |dishwasher |28.25 |4 | 15 |
204 | |fridge |26.75 |4 | 25 |
205 | |microwave |31.47 |4 | 1.23 |
206 |
207 | #### Conclusions
208 |
209 | As was described in the hypothesis the main goal of the regression branch is
210 | predicting the maximum expected demand of the appliance. As was also expected
211 | the classification branch is modulating the regression results to match
212 | the appliance load pattern. Classification has high specificity and low sensitivity.
213 |
214 | In both cases, train and eval have good results but have
215 | less accurate results in test. Our hypothesis is that
216 | model does not generalize well due to the small number and variance of appliance patterns of the different train buildings.
217 |
218 | See samples of dishwasher consumption per building:
219 |
220 | 
221 |
222 | The classification network is in charge of modeling the patterns. As seen in the results, it is less accurate in the steady-state sections than expected. Hence, the instability, and in some cases, the high sensitive response is also related to the overrepresentation issue.
223 |
224 | In most cases, increasing the number of acquisition samples would not be a good solution to fix the instability issue as there would be more active windows but the same pattern. That's the case of appliances with components that do not depend on environmental factors (temperatures, etc) like microwave or dishwasher. In the case of appliances with environmental factors, it would help to have also samples from different seasons. We implemented oversampling but it's similar to increasing the number of samples from the same appliance rather than new ones.
225 |
226 | There's no more data available rather than the public dataset. As a solution, data augmentation can not be easily implemented due to the lack of a database of appliance loads. In this case, it makes no sense to create synthetic aggregated scenarios mixing appliances from different buildings because they're already mixed in the training dataset and properly predicted in eval. In the classification branch, we hypothesize that in some cases adding noise would help to decrease high sensitive responses.
227 |
228 | Attention in appliances with a high simultaneity factor(\*) focus mainly on state changes in the appliance, like switch on/switch off or high consuming components of the appliance. Also, it focuses on state duration. That would be the case of dishwasher or
229 | microwave. Attention in appliance with low
230 | simultaneity factor also focus in other sections of the windows out of the
231 | active section. That would be the case of the fridge. Our hypothesis is that in the case of high simultaneity factor
232 | scenarios, attention focuses on appliance pattern, and in the case of low
233 | simultaneity factors it additionally focuses on the neighborhood. Attention would perform better to identify highly specialized and specific features in a consumption window.
234 |
235 | (\*) simultaneity factor describes the probability of an appliance
236 | to be active while other appliances are active. A large simultaneity factor
237 | means that the appliance is usually active while others are also active.
238 |
239 | Regarding the hypothesis on type of appliances:
240 | - The neural network can model the different operating modes in the appliances, even the ones with a high number of operating modes
241 | - The neural network can model both heating/cooling and motor components
242 | - There's no specific conclusion about the capacity to model weather dependency as both train and test datasets were acquired under similar environments (season, etc)
243 |
244 | ### Experiment 2 and 3. Paper with standarization
245 |
246 | Standardization can be used to rescale the testing samples to better
247 | describe relative patterns rather than absolute value consumptions. Standardization transforms features such that their mean (μ) equals 0 and standard deviation (σ) equals 1. The range of the new min and max values is determined by the standard deviation of the initial un-normalized feature.
248 |
249 | 
250 |
251 | Standardization is achieved by Z-score Normalization. Z-score is given by:
252 |
253 | 
254 |
255 | TThe standardization process is done over the specific dataset in each specific experiment.
256 |
257 | Although the model of appliances in train and test are different in terms of absolute consumptions, relative step changes in standardized data can be similar.
258 | This is an approach to bypass overrepresentation in data. In this case, the mean and standard value used in training is calculated over the train dataset, and the mean and standard deviation in the test is calculated over test dataset.
259 |
260 | #### Experiment 2. Paper with standardization - Using calculated standardization in test
261 |
262 | ##### Hypothesis
263 |
264 | The main difference between Experiment 1 and Experiment 2 is the addition of the standardization in the dataset of the model as explained in the past paragraph. In this experiment we are standarizing the data of the train in respect to the train and the test in respect to the test.
265 |
266 |
267 |
268 | We hypothesize that we will have a better outcome than Experiment 1.
269 |
270 | ##### Experiment setup
271 |
272 | See details of the experiments below. Each of the columns describes an specific option of the previously introduced network architectures and pre/post-processing methods:
273 |
274 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test|
275 | |-----------|------------|----------------|----------------|-----------------------------|
276 | | dishwasher| TRUE | TRUE | TRUE | TRUE |
277 | | fridge | TRUE | TRUE | TRUE | TRUE |
278 | | microwave | TRUE | TRUE | TRUE | TRUE |
279 |
280 | ##### Results
281 |
282 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation:
283 |
284 | 
285 | 
286 | 
287 |
288 |
289 |
290 | See obtained error previously introduced in error metrics section and extra training information:
291 |
292 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
293 | |------------|-------|----------|---------------|
294 | |dishwasher |46.98 |10 |40 |
295 | |fridge |52.17 |10 |55 |
296 | |microwave |31.16 |10 |2.25 |
297 |
298 | ##### Conclusions
299 |
300 | Our hypothesis is refuted as results are worse than without different standardization in train and test. To understand better why this happened we have calculated the standard deviation of the fridge for House 1 and House 2 to see if the values are within the same region of consumption.
301 |
302 | 
303 |
304 | As it can be seen in this box diagram the consumptions of the fridge for house 1 and house 2 don't follow a similar distribution. Therefore, now it is understandable why the results of Experiment 2 are worse than Experiment 1. We cannot standardize within different values because their consumption don't follow a similar distribution.
305 |
306 | #### Experiment 3. Paper with standarization - Using training standardization in test
307 |
308 | ##### Hypothesis
309 | We wanted to do the opposite of Experiment 2 to see if the dataset with the standardization of the train and test with the train values gave a better outcome.
310 | The standarization in the Experiment 3 is done as the following:
311 |
312 |
313 |
314 | Better results than experiment 2 are expected although not necessarily better than experiment 1.
315 |
316 | ##### Experiment setup
317 |
318 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods:
319 |
320 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test|
321 | |-----------|------------|----------------|----------------|-----------------------------|
322 | | dishwasher| TRUE | TRUE | TRUE | FALSE |
323 | | fridge | TRUE | TRUE | TRUE | FALSE |
324 | | microwave | TRUE | TRUE | TRUE | FALSE |
325 |
326 |
327 | ##### Results
328 |
329 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation:
330 |
331 | 
332 | 
333 | 
334 |
335 |
336 |
337 |
338 | See obtained error previously introduced in error metrics section and extra training information:
339 |
340 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
341 | |------------|-------|----------|---------------|
342 | |dishwasher |31.19 |10 |32 |
343 | |fridge |39.67 |10 |55 |
344 | |microwave |23.72 |10 |2.25 |
345 |
346 | ##### Conclusions
347 |
348 | Our hypothesis is supported as the results are better than with different standardization in train and test (as it is done in Experiment 2). In reference with the Experiment 1 we got also worst results, therefore we could conclude that in this situation and with this dataset standarizing the data is not recommended.
349 |
350 | ## Paper modification 1 - Only regression enabled
351 |
352 | ### Experiment 4. Only regression without standarization
353 |
354 | #### Hypothesis
355 |
356 | The main hypothesis of this experiment is whether attention can detect the consumption pattern and replace what in previous experiments was the classification branch by modulating the output of the regression branch.
357 | By extracting the classifier branch, the model prediction is expected to detect the peaks (with the help of attention) but may predict values with the biggest difference to the input consumption than with the classification branch.
358 |
359 | #### Experiment setup
360 |
361 | See details of the experiments below. Each of the columns describe an specific option of the previously introduced network architectures and pre/post processing methods:
362 |
363 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test|
364 | |-----------|------------|----------------|----------------|-----------------------------|
365 | | dishwasher| TRUE | FALSE | FALSE | FALSE |
366 | | fridge | TRUE | FALSE | FALSE | FALSE |
367 | | microwave | TRUE | FALSE | FALSE | FALSE |
368 |
369 | #### Results
370 |
371 | See attached train vs loss curve to diagnose performance:
372 |
373 | 
374 |
375 |
376 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation:
377 |
378 | 
379 | 
380 | 
381 |
382 |
383 |
384 | See obtained error previously introduced in error metrics section and extra training information:
385 |
386 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
387 | |------------|-------|----------|---------------|
388 | |dishwasher |24.79 |10 |36 |
389 | |fridge |29.86 |10 |50.9 |
390 | |microwave |22.56 |10 |1.9 |
391 |
392 | #### Conclusions
393 |
394 | The results of this experiment are worse than the original paper. Our set of experiments has the lowest mean absolute error (close to Experiment 1 and Experiment 6).
395 | The main hypothesis was that the attention would improve the performance. Hence, the results were better than expected. Attention learns how to focus on peaks of consumption (much better than in Experiment 1) and gives the model the ability to generalize better than what can be seen in Experiment 1.
396 | Without the classification branch, attention weights train better and the attention values are bigger than in Experiment 1. By not having the classification branch that modulates the regression output, the attention must learn the changes and focus on the significant changes (changes that in Experiment 1 were handled by classification).
397 | Attention in the fridge focus, on state changes (peaks and on-mode) and state duration. But in the microwave case, attention focuses mainly on the switch on. We conclude that after the peak the model expects a long-term change of consumption and in the microwave case it does not occur. That’s the main difference between microwave and fridge. This hypothesis cannot be applied in the dishwasher, because of the peaks of other appliances during the time it is on (that produces noise).
398 | Lastly, the regression is more sensitive to changes and allows to catch the pattern of the input smoothly. But only having the regression model subtracts the model from the specification.
399 |
400 | ### Experiment 5. Only regression with standardization (using calculated standardization in test)
401 |
402 | #### Hypothesis
403 |
404 | The main difference with Experiment 4 is the addition of the standardization in the dataset of the model. In this experiment, we are applying standardization in both training and test sets after splitting the data. We calculate the mean and std variables of the train and test set and apply it respectively.
405 | The hypothesis is that we will have a better outcome than Experiment 4 because both datasets will be standardized in the same way.
406 |
407 | #### Experiment setup
408 |
409 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods:
410 |
411 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test|
412 | |-----------|------------|----------------|----------------|-----------------------------|
413 | | dishwasher| TRUE | FALSE | TRUE | TRUE |
414 | | fridge | TRUE | FALSE | TRUE | TRUE |
415 | | microwave | TRUE | FALSE | TRUE | TRUE |
416 |
417 | #### Results
418 |
419 | 
420 | 
421 | 
422 |
423 |
424 |
425 | See obtained error previously introduced in error metrics section and extra training information:
426 |
427 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
428 | |------------|-------|----------|---------------|
429 | |dishwasher |38.78 |10 |36 |
430 | |fridge |36.38 |10 |51.1 |
431 | |microwave |23.92 |10 |2.2 |
432 |
433 | #### Conclusions
434 |
435 | Our hypothesis is not supported as results are worse (significantly in the dishwasher and fridge).
436 | These results must be produced because the properties (mean and standard deviation) are different in each dataset. So, we are applying different rescaling and making the difference bigger.
437 |
438 | ### Experiment 6. Only regression with standardization (using training standardization in test)
439 |
440 | #### Hypothesis
441 | This experiment combines the only regression architecture with the standardization technique. In this case, we are applying the standardization for the testing set in terms of the mean and standard deviation of the training set. We expect it to improve the results of Experiment 5 given the outcome of Experiments 2 and 3.
442 |
443 | #### Experiment setup
444 |
445 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods:
446 |
447 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test|
448 | |-----------|------------|----------------|----------------|-----------------------------|
449 | | dishwasher| TRUE | FALSE | TRUE | FALSE |
450 | | fridge | TRUE | FALSE | TRUE | FALSE |
451 | | microwave | TRUE | FALSE | TRUE | FALSE |
452 |
453 | #### Results
454 |
455 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation:
456 |
457 | 
458 | 
459 | 
460 |
461 |
462 |
463 |
464 | See obtained error previously introduced in error metrics section and extra training information:
465 |
466 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
467 | |------------|-------|----------|---------------|
468 | |dishwasher |26.37 |10 |36 |
469 | |fridge |29.96 |10 |51.1 |
470 | |microwave |20.1 |10 |2.2 |
471 |
472 | #### Conclusions
473 | In general, the results are similar to the other experiments in terms of MAE score. In comparison with Experiment 5, the results are significantly better. In Experiments 2 and 3, using training standardization in the test set gave better results as well.
474 |
475 | ## Paper modification 2 - Regression and classification using the attention results
476 |
477 | ### Experiment 7. Using attention in regression and classification
478 |
479 | #### Hypothesis
480 | Concatenating the output of the attention layers with the current input of the MLP in the classification branch will affect the prediction of this branch. As we have observed in previously ran experiments, the attention scores peak when the power consumption of the house changes. Consequently, this information can help the classifier decide whether it is a change of consumption or not.
481 |
482 | #### Experiment setup
483 |
484 | See details of the experiments below. Each of the columns describes a specific option of the previously introduced network architectures and pre/post-processing methods:
485 |
486 | | Appliance | Regression | Classification | Standardization| Recalculate mean/std in test| Attention Classification |
487 | |-----------|------------|----------------|----------------|-----------------------------|--------------------------|
488 | | dishwasher| TRUE | TRUE | FALSE | FALSE | TRUE |
489 | | fridge | TRUE | TRUE | FALSE | FALSE | TRUE |
490 | | microwave | TRUE | TRUE | FALSE | FALSE | TRUE |
491 |
492 | #### Results
493 | See attached train and test samples per each of the appliances to interpret end evaluate disaggregation:
494 | 
495 | 
496 | 
497 |
498 |
499 |
500 | See obtained error previously introduced in error metrics section and extra training information:
501 |
502 | | Appliance | MAE | Nº Epochs| Nº Hours Train|
503 | |------------|-------|----------|---------------|
504 | |dishwasher |28.09 |3 |6 |
505 | |fridge |31.08 |4 |8 |
506 | |microwave |26.98 |10 |1 |
507 |
508 | #### Conclusions
509 | In this case, the results are similar to the other experiments, for upcoming experimentation, what we would propose to use the attention output to calculate the classification would be eliminating this branch's convolutional layers. The magnitude of the values of the concatenated vector that enters the MLP can differ between the ones coming from attention and the ones coming from CNN layers, so this can handicap the training process.
510 |
511 | One observation that can be made is that in the case of the dishwasher and the fridge, overfitting started in the 3rd and 4th epochs, as can be noted in the image below (dishwasher).
512 |
513 | 
514 |
515 |
516 | ### Experiment results summary
517 |
518 | 
519 |
520 | ## Implementation details
521 |
522 | - Files description
523 | - settings.yaml. YAML file describing each of the experiment parameters
524 | - Train, val and test dataset properties
525 | - Hyperparameters (oversampling factor, learning rate, window size, filter properties, ...)
526 | - redd.yaml YAML file describing REDD dataset files and parameters (building and channels filenames)
527 | - redd.py REDD dataset parser
528 | - dataset.py REDD dataset loader and preprocessing
529 | - run-train.sh Experiments training launcher using default arguments
530 | - run-test.sh Experiments testing launcher using default arguments
531 | - main.py Orchestrator of train and test actions. Multiple arguments supported to handle different experiments actions and scenarios
532 | - model.py Described models implementation
533 | - train.py Train handler. Manage multiple epoch training and evaluation on training dataset
534 | - test.py Test handler. Manage pre-trained model and testing on testing dataset
535 | - utils.py Data handlers, error and plot helping functions
536 | - Framework
537 | - Python3.7.9
538 | - Torch 1.7.1
539 | - Computing resources
540 | - Using pre-configured Cloud Deep Learning VM from Google Cloud Market
541 | - vCPU cores: 2
542 | - RAM: 13 GB
543 | - 1 NVIDIA Tesla K80
544 |
545 | In order to run the code follow instructions below:
546 |
547 | ```
548 | 1) Clone the github project
549 | git clone https://github.com/abadiabosch/dlai-upc-2021-project.git
550 |
551 | 2) Install python requirements
552 | pip install -r requirements.txt
553 |
554 | 3) Download the dataset
555 | https://drive.google.com/drive/folders/1ey1UBfU41zjftiXjp6PmJ0OfXFhJYj4N?usp=sharing
556 |
557 | 4) Update settings.yaml to make dataset.path field point to the folder with *.csv downloaded in step 3
558 | dataset:
559 | path:
560 |
561 | 5) To train models run command below in source folder. Using default training settings, see main orchestrator arguments below
562 | sh run-train.sh
563 |
564 | 6) To test models rename output-train folder to output-test folder and run command below in source folder. Using default testing settings,
565 | see main orchestraro arguments below
566 | sh run-test.sh
567 |
568 | Main orchestrator command line arguments. See default settings in train and test launchers
569 |
570 | Command line arguments parser
571 | --settings
572 | Path to settings yaml file where all disaggregation scenarios and model hyperparameters are described
573 | --appliance
574 | Name of the appliance to train or test
575 | --path
576 | Path to output folder where resuls are saved
577 | --train
578 | Set to train or unset to test
579 | --tune
580 | Set to enable automatic architecture hyperparameters tunning
581 | --epochs
582 | Number of epochs to train
583 | --disable-plot
584 | Disable sliding window plotting during train or test
585 | --disable-random
586 | Disable randomness in processing
587 |
588 | ```
589 |
590 | In order to run pre-trained models follow additional instructions below:
591 | ```
592 | 1) Download models from
593 | https://drive.google.com/drive/folders/1gb_FmG1hs6lgSlSF9MLZ4w7rAgNEtfvC?usp=sharing
594 |
595 | 2) Copy each of the models (.th) to its path described in run-train.sh or run-test.sh
596 | ```
597 |
598 | ## Conclusions
599 |
600 | - Conclusion 1: During the experiments; the results were significantly better in train and eval than in test. The explanation behind this outcome is that our model was trained with a dataset with a very low variation of patterns of appliances. This is because there were just 3 to 5 different types of the same appliance for training, to test the model for a totally different type of appliance. For example, the patterns of the fridge consumption in the training set were different from the pattern of the testing set, therefore, the model did not have a broad variety of load profile patterns to learn to infer from. In the next image, we can see the variation of one house from the training set and the house for the testing set for the fridge appliance. The variation is of the two houses is totally different, with this graphic our explanation is endorsed.
601 |
602 | 
603 |
604 | - Conclusion 2: Classification is in charge of modeling the real consumption patterns of each window given.
605 | - Conclusion 3: Regression is in charge of infering the maximum consumption of the appliance in the input window. This applies to all the models but the "only regresion" one, as explained in conclusion 6.
606 | - Conclusion 4: Attention focuses on two scenarios related to the simultaneity of the appliances:
607 | - Scenarios with a high simultaneity factor: Attention focus on State changes of appliances (switch on/off) and state duration. Therefore, focuses on the appliance pattern. (case of the microwave and dishwasher)
608 | - Scenarios with low simultaneity factor: attention focuses on the neighborhood, outside of the active section of the appliance. (case of the fridge)
609 | - Conclusion 5: We don’t have clear conclusions of whether the standardization of the data set will produce better outcomes than the paper reference model.
610 | - Conclusion 6: Without a classification branch, the output is more smooth and therefore it does not capture adequately the peaks of consumption. This is because the regression branch is not prepared to do both tasks of inferring the maximum consumption and adapting to the exact pattern with instantaneous changes of power.
611 | - Conclusion 7: All the models took a big amount of time to be trained, the amount of data, the complexity of the forward and backward processes and the computational resources were the reason for that.
612 |
613 | ## Future work
614 |
615 | Transformers are state-of-the-art models with a high impact in deep learning. To continue developing the project we wanted to add this new attention mechanism to our model. Unfortunately, we did not have time to develop it.
616 | Therefore, as future work, we recommend applying transformers in replace to the attention layer in our model.
617 | The encoder module will use the input from the LSTM and will feed the self-attention block to reach the 1D convolution. A residual connection and layer normalization would be implemented.
618 | The encoder would follow a similar procedure as the encoder, adding a cross-attention. The cross attention would find which regions in the input consumption sequence are most relevant to constructing and therefore deserve the highest attention coefficients.
619 | Our hypothesis after applying transformers is to generate a better outcome than with the actual model. Being the model more efficient when selecting the regions in which the consumption sequence varies.
620 |
621 | ## References
622 |
623 | [1]
624 | Rolnick, D., Donti, P. L., Kaack, L. H., Kochanski, K., Lacoste, A., Sankaran, K., ... & Bengio, Y. (2019).
625 | Tackling climate change with machine learning.
626 | arXiv preprint arXiv:1906.05433.
627 | [https://arxiv.org/abs/1906.05433](https://arxiv.org/abs/1906.05433)
628 |
629 | [2]
630 | Piccialli, V., & Sudoso, A. M. (2021)
631 | Improving Non-Intrusive Load Disaggregation through an Attention-Based Deep Neural Network.
632 | Energies, 14(4), 847.
633 | [https://arxiv.org/abs/1912.00759](https://arxiv.org/abs/1912.00759)
634 |
635 | [3]
636 | Zhao, B., Ye, M., Stankovic, L., & Stankovic, V. (2020).
637 | Non-intrusive load disaggregation solutions for very low-rate smart meter data.
638 | Applied Energy, 268, 114949.
639 | [https://www.sciencedirect.com/science/article/abs/pii/S030626192030461X](https://www.sciencedirect.com/science/article/abs/pii/S030626192030461X)
640 |
641 | [4]
642 | Zhang, C., Zhong, M., Wang, Z., Goddard, N., & Sutton, C. (2018, April).
643 | Sequence-to-point learning with neural networks for non-intrusive load monitoring.
644 | In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 32, No. 1).
645 | [https://arxiv.org/abs/1612.09106]](https://arxiv.org/abs/1612.09106)
646 |
647 | [5]
648 | Kolter, J. Z., & Johnson, M. J. (2011, August)
649 | REDD: A public data set for energy disaggregation research.
650 | In Workshop on data mining applications in sustainability (SIGKDD), San Diego, CA (Vol. 25, No. Citeseer, pp. 59-62).
651 | [http://redd.csail.mit.edu/kolter-kddsust11.pdf](http://redd.csail.mit.edu/kolter-kddsust11.pdf)
652 |
653 | [6]
654 | Shin, C., Joo, S., Yim, J., Lee, H., Moon, T., & Rhee, W. (2019, July).
655 | Subtask gated networks for non-intrusive load monitoring.
656 | In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 33, No. 01, pp. 1150-1157).
657 | [https://www.mdpi.com/1996-1073/14/4/847/pdf](https://www.mdpi.com/1996-1073/14/4/847/pdf)
658 |
659 | Team Members colaborating on the project: Victor Gil, Sergi Bragos and Inés Ylla
660 |
--------------------------------------------------------------------------------