├── dataset_creation
    ├── refit_dataset_weak.rar
    ├── ukdale_dataset_weak.rar
    ├── noised_aggregate_creation.py
    ├── noise_extraction.py
    ├── synth_refit_creation.py
    └── synth_uk_creation.py
├── losses.py
├── refit_resampling.py
├── pooling_layer.py
├── metrics.py
├── README.md
├── utils_func.py
├── CRNN.py
├── environment.yml
├── ukdale_experiment_1_2.py
└── mixed_training_experiment.py


/dataset_creation/refit_dataset_weak.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GiuTan/Weak-NILM/HEAD/dataset_creation/refit_dataset_weak.rar


--------------------------------------------------------------------------------
/dataset_creation/ukdale_dataset_weak.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GiuTan/Weak-NILM/HEAD/dataset_creation/ukdale_dataset_weak.rar


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras import models, layers
 3 | import numpy as np
 4 | from tensorflow.keras import backend as K
 5 | 
 6 | 
 7 | def binary_crossentropy(y_true, y_pred):
 8 |     y_true = tf.cast(y_true, tf.float32)
 9 | 
10 |     new_true = tf.multiply(y_true, tf.cast(tf.not_equal(y_true, -1), tf.float32))
11 |     new_pred = tf.multiply(y_pred, tf.cast(tf.not_equal(y_true, -1), tf.float32))
12 | 
13 |     loss = K.binary_crossentropy(new_true, new_pred)
14 |     return tf.reduce_mean(loss)
15 | 
16 | 
17 | def binary_crossentropy_weak(y_true, y_pred):
18 |     y_true = tf.cast(y_true, tf.float32)
19 | 
20 |     new_true = tf.multiply(y_true, tf.cast(tf.not_equal(y_true, -1), tf.float32))
21 |     new_pred = tf.multiply(y_pred, tf.cast(tf.not_equal(y_true, -1), tf.float32))
22 | 
23 |     loss = K.binary_crossentropy(new_true, new_pred)
24 | 
25 |     return tf.reduce_mean(loss)


--------------------------------------------------------------------------------
/refit_resampling.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from matplotlib import pyplot as plt 
 4 | 
 5 | file_agg_path = '../dataset_weak/aggregate_data_noised/'
 6 | labels_path = '/raid/users/eprincipi/clean_refit/dataset_weak/labels/'
 7 | destination_agg_resample_path = '..dataset_weak/resampled_agg_REFIT_test/'
 8 | destination_labels_resample_path = '..dataset_weak/resampled_labels_REFIT_test/'
 9 | k = 4
10 | quantity = 12000
11 | for i in range(quantity):
12 |     agg1 = np.load(file_agg_path + 'house_' + str(k) + '/aggregate_%d.npy' % i)
13 |     labels_strong = np.load(labels_path + 'house_' + str(k) + '/strong_labels_%d.npy' % i, allow_pickle=True)
14 |     
15 |     
16 | 
17 |     time = pd.date_range('2014-01-01', periods=2550, freq='8s')
18 |     arr = pd.Series(data=agg1,index=time)
19 |     new_labels = []
20 |     for a in range(5):
21 |           label = pd.Series(data=labels_strong[a], index=time)
22 |           resampled_labels = label.resample('6s').bfill()
23 |           new_labels.append(resampled_labels[:len(agg1)].to_numpy())
24 |     resampled = arr.resample('6s').bfill()
25 |     new_labels = np.array(new_labels)
26 |     np.save(destination_agg_resample_path + 'house_' + str(k) + '/aggregate_%d.npy' % i, resampled[:len(agg1)])
27 |     np.save(destination_labels_resample_path + 'house_' + str(k) + '/strong_labels_%d.npy' % i,new_labels)
28 |     


--------------------------------------------------------------------------------
/pooling_layer.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras import backend as K
 2 | from tensorflow.python.keras.engine.base_layer import Layer
 3 | 
 4 | class LinSoftmaxPooling1D(Layer):
 5 |     '''
 6 |     Keras softmax pooling layer.
 7 |     '''
 8 | 
 9 |     def __init__(self, axis=0, **kwargs):
10 |         '''
11 |         Parameters
12 |         ----------
13 |         axis : int
14 |             Axis along which to perform the pooling. By default 0
15 |             (should be time).
16 |         kwargs
17 |         '''
18 |         super(LinSoftmaxPooling1D, self).__init__(**kwargs)
19 | 
20 |         self.axis = axis
21 | 
22 |     def get_output_shape_for(self, input_shape):
23 |         shape = list(input_shape)
24 |         del shape[self.axis]
25 |         return tuple(shape)
26 | 
27 |     def compute_output_shape(self, input_shape):
28 |         return self.get_output_shape_for(input_shape)
29 | 
30 |     def get_config(self):
31 |         config = {'axis': self.axis}
32 |         base_config = super(LinSoftmaxPooling1D, self).get_config()
33 |         return dict(list(base_config.items()) + list(config.items()))
34 | 
35 |     def call(self, x, mask=None):
36 |         square = x * x
37 |         sum_square = K.sum(square, axis=self.axis, keepdims=True)
38 |         print(sum_square.shape)
39 |         sum = K.sum(x, axis=self.axis, keepdims=True)
40 |         fin_vector = sum_square / sum
41 |         print(fin_vector)
42 |         return fin_vector


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | from tensorflow.keras import backend as K
 4 | 
 5 | # The energy error is the difference between
 6 | # the total predicted energy, and the total actual energy consumed
 7 | # by each active appliance in that sample instant
 8 | 
 9 | 
10 | def ANE(X_test_synth,Y_pred):
11 | 
12 |     classes = 5
13 |     fridge_val = 91
14 |     kettle_val = 1996
15 |     micro_val = 1107
16 |     wash_val = 487
17 |     dish_val = 723
18 |     mean_val = [kettle_val,micro_val,fridge_val,wash_val,dish_val]
19 |     p_actual = 0
20 |     p_ave = 0
21 |     for i in range(len(X_test_synth)):
22 |         agg = X_test_synth[i]
23 |         y_ave = Y_pred[i]
24 |         y_ave_t = y_ave.transpose()
25 |         p_ave_curr = 0
26 |         for k in range(classes):
27 |             y_ave_t[k] = y_ave_t[k] * mean_val[k]
28 |             p_ave_curr = p_ave_curr + y_ave_t[k]
29 |         p_actual = p_actual + agg
30 |         p_ave = p_ave + p_ave_curr
31 |     abs_ = np.abs(np.sum(p_actual) - np.sum(p_ave))
32 |     print("True energy", np.sum(p_actual))
33 |     print("Predicted energy", np.sum(p_ave))
34 |     ANE = abs_ / np.sum(p_actual)
35 | 
36 |     return ANE
37 | 
38 | def custom_f1_score(y_true, y_pred):
39 |     y_true = tf.multiply(y_true, tf.cast(tf.not_equal(y_true, -1), tf.float32))
40 |     y_pred = tf.multiply(y_pred, tf.cast(tf.not_equal(y_true, -1), tf.float32))
41 | 
42 |     def recall_m(y_true, y_pred):
43 |          TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
44 |          Positives = K.sum(K.round(K.clip(y_true, 0, 1)))
45 | 
46 |          recall = TP / (Positives + K.epsilon())
47 |          return recall
48 | 
49 |     def precision_m(y_true, y_pred):
50 |          TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
51 |          Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
52 | 
53 |          precision = TP / (Pred_Positives + K.epsilon())
54 |          return precision
55 | 
56 |     precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)
57 |     return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
58 | 


--------------------------------------------------------------------------------
/dataset_creation/noised_aggregate_creation.py:
--------------------------------------------------------------------------------
 1 | import random as python_random
 2 | import argparse
 3 | import numpy as np
 4 | import random
 5 | 
 6 | parser = argparse.ArgumentParser(description="Noised aggregate creation")
 7 | parser.add_argument("--building", type=int, default=2, help="Desired building")
 8 | parser.add_argument("--num_of_bags",type=int, default=2000, help="Number of bags created for the desired building")
 9 | parser.add_argument("--noise_path", type=str, default='', help="Path where noise has been saved")
10 | parser.add_argument("--agg_synth_path", type=str, default='', help="Path where synth aggregate has been saved")
11 | parser.add_argument("--agg_noised_path", type=str, default='', help="Path where noised aggregate has been saved")
12 | arguments = parser.parse_args()
13 | 
14 | def noise_segmentation(k, noise_path,num_of_bag):
15 | 
16 |     vector_noise = np.load(noise_path + 'noise_'+str(k)+'.npy', allow_pickle=True)
17 |     print("shape noise", vector_noise.shape)
18 |     print("shape noise", vector_noise[0].shape)
19 |     results = []
20 |     results = np.array(results)
21 |     for lung in range(len(vector_noise)):
22 |         results = np.concatenate([results,vector_noise[lung]], axis=0)
23 |     print("Shape total vector:", results.shape)
24 | 
25 |     random_list = random.sample(range(0, (len(results) - 2550)), (len(results) - 2550))
26 |     print("index control 1")
27 |     print(random_list[0])
28 |     vector_list_1 = []
29 |     for i in random_list[:num_of_bag]:
30 |         vector = results[i: (i + 2550)]
31 |         vector_list_1.append(vector)
32 | 
33 |     return np.array(vector_list_1)
34 | 
35 | if __name__ == '__main__':
36 |     np.random.seed(123)
37 |     python_random.seed(123)
38 | 
39 | 
40 |     agg_synth_path = arguments.agg_synth_path
41 |     agg_noised_path = arguments.agg_noised_path
42 |     building = arguments.building
43 |     noise_path = arguments.noise_path
44 |     noise = noise_segmentation(building, noise_path, arguments.num_of_bags)
45 |     len_noise = len(noise)
46 |     print(len_noise)
47 |     print(noise.shape)
48 |     n_= 0
49 |     k = arguments.building
50 |     for i in range(arguments.num_of_bags):
51 |             try:
52 |                 agg = np.load(agg_synth_path + 'house_' + str(k) + '/aggregate_%d.npy' %i)
53 |             except FileNotFoundError:
54 |                 continue
55 |             noise_ = np.nan_to_num(noise[n_], nan=1)
56 |             agg = np.add(noise_, agg)
57 |             n_ += 1
58 |             np.save(agg_noised_path + "house_" + str(k) + "/aggregate_%d" % i,agg)
59 | 
60 |     print("total len noise", len(noise))
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Weak-NILM
 2 | This repository contains source code to perform multi-label classification using a deep learning approach trained with weakly labeled data. This work refers to [Multilabel Appliance Classification With Weakly Labeled Data for Non-Intrusive Load Monitoring](https://ieeexplore.ieee.org/document/9831435).
 3 | The implemented network is a Convolutional Recurrent Neural Network. 
 4 | Both strongly and weakly labeled data are generated from two real-world benchmark datasets: REFIT and UK-DALE.
 5 | 
 6 | Two experiments are proposed:
 7 | - one based on UK-DALE data where it is possible to vary the percentage of bags with strong annotations as well as weak annotations
 8 | - one based on mixed dataset where the network can be trained with a percentage of strong annotations from UK-DALE and a quantity of weak annotations from REFIT, testing on both datasets the performance of the network. 
 9 | 
10 | In dataset_creation folder code for synthetic aggregates creation is available for both UK-DALE and REFIT. Modules noise_extraction.py and noised_aggregate_creation.py have to be used to create noised aggregate vectors, adding noise to synthetic vectors.
11 | Appliances taken into consideration are kettle, microwave, fridge, washing machine and dishwasher.
12 | 
13 | Data will be created with both types of label. Appliance states are set to 1 in strong annotations when the specific appliance is ON and 0 when is OFF, based on the on_power_threshold parameter; weak annotations are set to 1 when at least one time appliance is active inside the window. 
14 | Quantity of strong and weak annotations to be used in the experiments can be defined in the experiment modules. In fact, in ukdale_experiment_1_2 and mixed_training_experiment can be set:
15 | 
16 | - quantity of data previously generated from UKDALE house 1
17 | - quantity of data previously generated from UKDALE house 2
18 | - quantity of data previously generated from UKDALE house 3
19 | - quantity of data previously generated from UKDALE house 4
20 | - quantity of data previously generated from UKDALE house 5
21 | - strong annotations percentage
22 | - weak annotations percentage
23 | - control of strong quantity selected 
24 | - clip smoothing post-processing. This flag refers only to fully supervised + weak supervised experiment 
25 | - the use of weakly labeled dataset
26 | - type of experiment to be performed (fully supervised or fully supervised + weak supervised) 
27 | - path to synthetic data for ANE computation
28 | - flag to perform train or inference. If train is selected also the prediction on the test set and metrics estimation will be performed while if inference is chosen weights of a trained model will be loaded from the path to perform the inference.  
29 | 
30 | To perform the mixed experiment, in the proposed work REFIT was resample from 8s to 6s period. Specifically, refit_resampling.py can be used for this purpose. 
31 | For mixed training experiment there is the possibility to set also the testing dataset desired, choosing between REFIT and UK-DALE.
32 | 
33 | Required packages to prepare the enviroment are listed in environment.yml file.
34 | 
35 | Structure for the linear softmax pooling layer is inspired by https://github.com/marl/autopool.
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/utils_func.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.metrics import hamming_loss, precision_recall_curve, classification_report, roc_curve, auc
  3 | from matplotlib import pyplot as plt
  4 | from itertools import cycle
  5 | import scipy.signal
  6 | import random
  7 | 
  8 | def standardize_data(agg, mean, std):
  9 |     agg= agg -  mean
 10 |     agg /= std
 11 |     return agg
 12 | 
 13 | def output_binarization(output,thres):
 14 |     new_output = []
 15 |     for i in range(len(output)):
 16 |         matrix = np.zeros((2550, 5))
 17 |         for l in range(len(output[i])):
 18 |             curr = output[i]
 19 |             for k in range(5):
 20 |                     if curr[l][k] >= thres:
 21 |                         curr[l][k] = 1
 22 |                     else:
 23 |                         if curr[l][k] == -1:
 24 |                             curr[l][k] = -1
 25 |                         else:
 26 |                             curr[l][k] = 0
 27 |             matrix[l] = curr[l]
 28 |         new_output.append(matrix)
 29 | 
 30 |     new_output = np.array(new_output)
 31 |     return new_output
 32 | 
 33 | def app_binarization_weak(output,thres, classes):
 34 |     new_output = []
 35 |     for i in range(classes):
 36 |             for k in range(len(output)):
 37 |                 #curr = output[k]
 38 |                 if output[k][i] >= thres[i]:
 39 |                     output[k][i] = 1
 40 |                 else:
 41 |                     output[k][i] = 0
 42 |                 #matrix[l] = curr[l]
 43 |                 #new_output.append(matrix)
 44 | 
 45 |     # new_output = np.array(new_output)
 46 |     # return new_output
 47 |     return output
 48 | 
 49 | def app_binarization_strong(output,thres, classes):
 50 |     new_output = []
 51 |     for k in range(len(output)):
 52 |              for i in range(classes):
 53 |                 #curr = output[k]
 54 |                 if output[k][i] >= thres[i]:
 55 |                     output[k][i] = 1
 56 |                 else:
 57 |                     output[k][i] = 0
 58 |                 #matrix[l] = curr[l]
 59 |              new_output.append(output[k])
 60 | 
 61 |     new_output = np.array(new_output)
 62 |     # return new_output
 63 |     return new_output
 64 | 
 65 | def thres_analysis(Y_test,new_output,classes):
 66 | 
 67 |     precision = dict()
 68 |     recall = dict()
 69 |     thres_list_strong = []
 70 | 
 71 |     for i in range(classes):
 72 | 
 73 |         precision[i], recall[i], thresh = precision_recall_curve(Y_test[:, i], new_output[:, i])
 74 | 
 75 |         plt.title('Pres-Recall-THRES curve')
 76 |         plt.plot(precision[i], recall[i])
 77 |         plt.show()
 78 |         plt.close()
 79 | 
 80 |         f1 = (2 * precision[i] * recall[i] )/ (precision[i] + recall[i])
 81 |         opt_thres_f1 = np.argmax(f1)
 82 |         optimal_threshold_f1 = thresh[opt_thres_f1]
 83 |         print("Threshold for F1-SCORE value is:", optimal_threshold_f1)
 84 |         if optimal_threshold_f1 >= 0.955:
 85 |              optimal_threshold_f1 = 0.55
 86 |         thres_list_strong.append(optimal_threshold_f1)
 87 | 
 88 |     return thres_list_strong
 89 | 
 90 | 
 91 | def weak_count(Y_train_weak):
 92 |     list_counter = [0, 0, 0, 0, 0]
 93 | 
 94 |     for i in range(len(Y_train_weak)):
 95 |         vect = Y_train_weak[i]
 96 |         for k in range(5):
 97 |             if vect[0][k] == 1:
 98 |                 list_counter[k] += 1
 99 |     print("Weak composition:",list_counter)
100 | 
101 | 


--------------------------------------------------------------------------------
/CRNN.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.keras import backend as K
 3 | from pooling_layer import LinSoftmaxPooling1D
 4 | from losses import binary_crossentropy,binary_crossentropy_weak
 5 | from metrics import  custom_f1_score
 6 | 
 7 | 
 8 | def CRNN_block(x, kernel,drop_out,filters):
 9 |     conv_1 = tf.keras.layers.Conv2D(filters=filters, kernel_size=(kernel, 1), strides=(1, 1), padding='same',
10 |                                     kernel_initializer='glorot_uniform')(x)
11 |     print("conv_1")
12 |     print(conv_1.shape)
13 |     batch_norm_1 = tf.keras.layers.BatchNormalization()(conv_1)
14 |     act_1 = tf.keras.layers.Activation('relu')(batch_norm_1)
15 |     pool_1 = tf.keras.layers.MaxPooling2D(pool_size=(1, 1))(act_1)
16 |     drop_1 = tf.keras.layers.Dropout(drop_out)(pool_1)
17 |     print("drop_1")
18 |     print(drop_1.shape)
19 |     return drop_1
20 | 
21 | 
22 | def CRNN_construction(window_size, weight, lr=0.0, classes=0, drop_out = 1, kernel = 1, num_layers=1, gru_units=1, cs=False, strong_weak_flag =True):
23 | 
24 |     input_data = tf.keras.Input(shape=(window_size, 1))
25 |     x = tf.keras.layers.Reshape((window_size,1,1))(input_data)
26 | 
27 |     for i in range(num_layers):
28 |         filters = 2 ** (i+5)
29 |         CRNN = CRNN_block(x, kernel=kernel, drop_out=drop_out, filters=filters)
30 |         x = CRNN
31 | 
32 | 
33 |     spec_x = tf.keras.layers.Reshape((x.shape[1], x.shape[3]))(x)
34 |     print("Reshape")
35 |     print(spec_x.shape)
36 |     bi_direct = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(units=gru_units,return_sequences=True))(spec_x)
37 |     print("bi direct")
38 |     print(bi_direct.shape)
39 |     frame_level = tf.keras.layers.Dense(units=classes, activation='sigmoid', name="strong_level")(bi_direct)
40 |     print("frame level")
41 |     print(frame_level.shape)
42 |     pool_bag = LinSoftmaxPooling1D(axis=1)(frame_level)
43 |     bag_level = tf.keras.layers.Activation('sigmoid', name="weak_level")(pool_bag)
44 | 
45 |     if not strong_weak_flag:
46 | 
47 |         model_CRNN = tf.keras.Model(inputs=input_data, outputs=[frame_level],
48 |                                     name="CRNN")
49 |         optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
50 | 
51 |         model_CRNN.compile(optimizer=optimizer, loss={
52 |             "strong_level": binary_crossentropy},
53 |                            metrics=[custom_f1_score])
54 | 
55 |     else:
56 |         if cs:
57 |             frame_level_final = tf.keras.layers.Multiply(name="strong_level_final")([bag_level, frame_level])
58 |             print(frame_level_final.shape)
59 | 
60 |             model_CRNN = tf.keras.Model(inputs=input_data, outputs=[frame_level_final, bag_level],
61 |                                         name="CRNN")
62 |             optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
63 | 
64 |             model_CRNN.compile(optimizer=optimizer, loss={
65 |                 "strong_level_final": binary_crossentropy,
66 |                 "weak_level": binary_crossentropy_weak,
67 |             }, metrics=[custom_f1_score], loss_weights=[1, weight])
68 | 
69 |         else:
70 |             model_CRNN = tf.keras.Model(inputs=input_data, outputs=[frame_level, bag_level],
71 |                                         name="CRNN")
72 |             optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
73 | 
74 |             model_CRNN.compile(optimizer=optimizer, loss={
75 |                 "strong_level": binary_crossentropy,
76 |                 "weak_level": binary_crossentropy_weak,
77 |             }, metrics=[custom_f1_score], loss_weights=[1, weight])
78 | 
79 | 
80 | 
81 |     return model_CRNN
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: weak_labels
  2 | channels:
  3 |   - anaconda
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=conda_forge
  8 |   - _openmp_mutex=4.5=1_llvm
  9 |   - _tflow_select=2.1.0=gpu
 10 |   - absl-py=0.13.0=py38h06a4308_0
 11 |   - aiohttp=3.7.4=py38h27cfd23_1
 12 |   - astor=0.8.1=py38h06a4308_0
 13 |   - astunparse=1.6.3=py_0
 14 |   - async-timeout=3.0.1=py_1000
 15 |   - attrs=21.2.0=pyhd8ed1ab_0
 16 |   - backcall=0.2.0=pyh9f0ad1d_0
 17 |   - backports=1.0=py_2
 18 |   - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
 19 |   - blas=1.0=mkl
 20 |   - blinker=1.4=py_1
 21 |   - blosc=1.21.0=h9c3ff4c_0
 22 |   - brotlipy=0.7.0=py38h497a2fe_1001
 23 |   - bzip2=1.0.8=h7f98852_4
 24 |   - c-ares=1.17.1=h7f98852_1
 25 |   - ca-certificates=2020.10.14=0
 26 |   - cachetools=4.2.2=pyhd8ed1ab_0
 27 |   - certifi=2020.6.20=py38_0
 28 |   - cffi=1.14.4=py38ha312104_0
 29 |   - chardet=3.0.4=py38h924ce5b_1008
 30 |   - click=8.0.1=py38h578d9bd_0
 31 |   - coverage=5.5=py38h27cfd23_2
 32 |   - cryptography=3.4.7=py38ha5dfef3_0
 33 |   - cudatoolkit=11.0.221=h6bb024c_0
 34 |   - cudnn=8.2.1.32=h86fa8c9_0
 35 |   - cycler=0.10.0=py38_0
 36 |   - cython=0.29.23=py38h2531618_0
 37 |   - dataclasses=0.8=pyhc8e2a94_1
 38 |   - dbus=1.13.18=hb2f20db_0
 39 |   - decorator=4.4.2=py_0
 40 |   - expat=2.4.1=h9c3ff4c_0
 41 |   - fontconfig=2.13.1=he4413a7_1000
 42 |   - freetype=2.10.4=h0708190_1
 43 |   - gettext=0.19.8.1=hf34092f_1004
 44 |   - glib=2.66.3=h58526e2_0
 45 |   - google-auth=1.32.1=pyhd3eb1b0_0
 46 |   - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
 47 |   - google-pasta=0.2.0=py_0
 48 |   - gst-plugins-base=1.14.5=h0935bb2_2
 49 |   - gstreamer=1.14.5=h36ae1b5_2
 50 |   - h5py=2.10.0=py38h7918eee_0
 51 |   - hdf5=1.10.4=nompi_h3c11f04_1106
 52 |   - icu=58.2=hf484d3e_1000
 53 |   - idna=2.10=pyh9f0ad1d_0
 54 |   - importlib-metadata=4.6.1=py38h578d9bd_0
 55 |   - intel-openmp=2021.2.0=h06a4308_610
 56 |   - ipython=7.22.0=py38hb070fc8_0
 57 |   - ipython_genutils=0.2.0=py_1
 58 |   - jedi=0.17.2=py38h578d9bd_1
 59 |   - joblib=1.0.1=pyhd8ed1ab_0
 60 |   - jpeg=9d=h36c2ea0_0
 61 |   - keras-preprocessing=1.1.2=pyhd3eb1b0_0
 62 |   - kiwisolver=1.3.1=py38h1fd1430_1
 63 |   - libedit=3.1.20191231=he28a2e2_2
 64 |   - libffi=3.2.1=he1b5a44_1007
 65 |   - libgcc-ng=9.3.0=h2828fa1_19
 66 |   - libgfortran-ng=7.5.0=h14aa051_19
 67 |   - libgfortran4=7.5.0=h14aa051_19
 68 |   - libglib=2.66.3=hbe7bbb4_0
 69 |   - libgomp=9.3.0=h2828fa1_19
 70 |   - libiconv=1.16=h516909a_0
 71 |   - libpng=1.6.37=h21135ba_2
 72 |   - libprotobuf=3.14.0=h780b84a_0
 73 |   - libstdcxx-ng=9.3.0=h6de172a_19
 74 |   - libuuid=2.32.1=h7f98852_1000
 75 |   - libxcb=1.13=h7f98852_1003
 76 |   - libxml2=2.9.9=h13577e0_2
 77 |   - llvm-openmp=11.1.0=h4bd325d_1
 78 |   - lz4-c=1.9.3=h2531618_0
 79 |   - lzo=2.10=h516909a_1000
 80 |   - markdown=3.3.4=pyhd8ed1ab_0
 81 |   - matplotlib=3.1.3=py38_0
 82 |   - matplotlib-base=3.1.3=py38hef1b27d_0
 83 |   - mkl=2021.3.0=h726a3e6_557
 84 |   - mkl-service=2.3.0=py38h27cfd23_1
 85 |   - mkl_fft=1.3.0=py38h42c9631_2
 86 |   - mkl_random=1.2.1=py38ha9443f7_2
 87 |   - mock=4.0.3=py38h578d9bd_1
 88 |   - multidict=5.1.0=py38h497a2fe_1
 89 |   - ncurses=6.2=h58526e2_4
 90 |   - networkx=2.5.1=pyhd3eb1b0_0
 91 |   - numexpr=2.7.3=py38h22e1b3c_1
 92 |   - oauthlib=3.1.1=pyhd8ed1ab_0
 93 |   - openssl=1.1.1k=h7f98852_0
 94 |   - opt_einsum=3.3.0=pyhd3eb1b0_1
 95 |   - pandas=1.0.5=py38h0573a6f_0
 96 |   - parso=0.7.1=pyh9f0ad1d_0
 97 |   - pcre=8.45=h9c3ff4c_0
 98 |   - pexpect=4.8.0=pyh9f0ad1d_2
 99 |   - pickleshare=0.7.5=py_1003
100 |   - pip=21.1.3=pyhd8ed1ab_0
101 |   - prompt-toolkit=3.0.19=pyha770c72_0
102 |   - protobuf=3.14.0=py38h2531618_1
103 |   - pthread-stubs=0.4=h36c2ea0_1001
104 |   - ptyprocess=0.7.0=pyhd3deb0d_0
105 |   - pyasn1=0.4.8=py_0
106 |   - pyasn1-modules=0.2.7=py_0
107 |   - pycparser=2.20=pyh9f0ad1d_2
108 |   - pygments=2.9.0=pyhd8ed1ab_0
109 |   - pyjwt=2.1.0=pyhd8ed1ab_0
110 |   - pyopenssl=20.0.1=pyhd3eb1b0_1
111 |   - pyparsing=2.4.7=pyh9f0ad1d_0
112 |   - pyqt=5.9.2=py38h05f1152_4
113 |   - pysocks=1.7.1=py38h578d9bd_3
114 |   - pytables=3.6.1=py38h9fd0a39_0
115 |   - python=3.8.0=h0371630_2
116 |   - python-dateutil=2.8.1=pyhd3eb1b0_0
117 |   - python_abi=3.8=2_cp38
118 |   - pytz=2021.1=pyhd8ed1ab_0
119 |   - pyyaml=5.4.1=py38h27cfd23_1
120 |   - qt=5.9.7=h52cfd70_2
121 |   - readline=7.0=hf8c457e_1001
122 |   - requests=2.25.1=pyhd3eb1b0_0
123 |   - requests-oauthlib=1.3.0=py_0
124 |   - rsa=4.7.2=pyh44b312d_0
125 |   - scikit-learn=0.24.2=py38ha9443f7_0
126 |   - setuptools=49.6.0=py38h578d9bd_3
127 |   - sip=4.19.13=py38he6710b0_0
128 |   - six=1.15.0=py_0
129 |   - sqlite=3.33.0=h62c20be_0
130 |   - tbb=2021.3.0=h4bd325d_0
131 |   - tensorboard-plugin-wit=1.8.0=pyh44b312d_0
132 |   - termcolor=1.1.0=py38h06a4308_1
133 |   - threadpoolctl=2.2.0=pyh8a188c0_0
134 |   - tk=8.6.10=h21135ba_1
135 |   - tornado=6.1=py38h497a2fe_1
136 |   - traitlets=5.0.5=py_0
137 |   - urllib3=1.26.6=pyhd3eb1b0_1
138 |   - wcwidth=0.2.5=pyh9f0ad1d_2
139 |   - werkzeug=2.0.1=pyhd8ed1ab_0
140 |   - wheel=0.36.2=pyhd3deb0d_0
141 |   - wrapt=1.12.1=py38h7b6447c_1
142 |   - xorg-libxau=1.0.9=h7f98852_0
143 |   - xorg-libxdmcp=1.1.3=h7f98852_0
144 |   - xz=5.2.5=h516909a_1
145 |   - yaml=0.2.5=h7b6447c_0
146 |   - yarl=1.6.3=py38h497a2fe_2
147 |   - zipp=3.5.0=pyhd8ed1ab_0
148 |   - zlib=1.2.11=h516909a_1010
149 |   - zstd=1.4.9=haebb681_0
150 |   - pip:
151 |     - flatbuffers==1.12
152 |     - gast==0.3.3
153 |     - grpcio==1.32.0
154 |     - keras==2.4.3
155 |     - keras-nightly==2.5.0.dev2021032900
156 |     - keras-tuner==1.0.3
157 |     - kt-legacy==1.0.3
158 |     - numpy==1.18.5
159 |     - packaging==21.0
160 |     - scipy==1.4.1
161 |     - tensorboard==2.5.0
162 |     - tensorboard-data-server==0.6.1
163 |     - tensorflow==2.4.0
164 |     - tensorflow-addons==0.13.0
165 |     - tensorflow-estimator==2.4.0
166 |     - tensorflow-gpu==2.4.0
167 |     - typeguard==2.12.1
168 |     - typing-extensions==3.7.4.3
169 | prefix: /home/eprincipi/anaconda3/envs/weak_labels
170 | 


--------------------------------------------------------------------------------
/dataset_creation/noise_extraction.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from matplotlib import pyplot as plt
  3 | from nilmtk.nilmtk.dataset import DataSet
  4 | from utils_dataset_test import *
  5 | 
  6 | 
  7 | parser = argparse.ArgumentParser(description="Noise extraction")
  8 | parser.add_argument("--refit", type=bool, default=True, help="REFIT dataset noise extraction")
  9 | parser.add_argument("--ukdale", type=bool, default=True, help="UKDALE dataset noise extraction")
 10 | parser.add_argument("--building", type=int, default=1, help="UKDALE dataset noise extraction")
 11 | parser.add_argument("--start", type=str, default='2013-09-17', help="Start date")
 12 | parser.add_argument("--end", type=str, default='2015-07-08', help="End date")
 13 | arguments = parser.parse_args()
 14 | 
 15 | if __name__ == "__main__":
 16 |     print(arguments.building)
 17 |     refit_ = arguments.refit
 18 |     if refit_:
 19 |         print("REFIT Noise extraction")
 20 |         app_dict_refit = {'kettle': {2: 8, 3:9, 4:9,5:8, 6:7, 7:9, 8:9,9:7, 12:6,13:9,16:8,18:5,19:9},
 21 |                       'microwave': {2:5, 3:8, 4:8, 5:8, 6:6,8:8,9:6, 10:7, 12:5, 13:7, 16:7, 17:9,18:4,19:8},
 22 |                       'fridge': {2:1,3:2,4:1,5:1, 7:1,8:1, 9:1,10:4, 12:1, 15:1, 16:2, 17:1, 18:1, 19:1},
 23 |                       'washing machine': {2:2,3:6,4:5,5:3,6:2, 7:5,8:4,9:3,10:5, 13:3, 15:5,  16:4,    17:5,    18:2,  19:4},
 24 |                       'dish washer': {2:3,3:5, 5:4,6:3,  7:6, 9:4, 10:6, 13:4, 15:6, 17:6, 19:5}}
 25 | 
 26 |         appliances = {2: ['kettle', 'microwave', 'fridge', 'washing_machine', 'dish_washer'],
 27 |                      3: ['kettle', 'microwave', 'fridge', 'washing_machine', 'dish_washer'],
 28 |                      4: ['kettle', 'microwave', 'fridge', 'washing_machine',''],
 29 |                      5: ['kettle', 'microwave', 'fridge', 'washing_machine', 'dish_washer'],
 30 |                      6: ['kettle', 'microwave', '', 'washing_machine', 'dish_washer'],
 31 |                      7: ['kettle','', 'fridge', 'washing_machine', 'dish_washer'],
 32 |                      8: ['kettle','microwave', 'fridge', 'washing_machine', ''],
 33 |                      9: ['kettle', 'microwave', 'fridge', 'washing_machine', 'dish_washer'],
 34 |                      10: ['', 'microwave', 'fridge', 'washing_machine', 'dish_washer'],
 35 |                      12: ['kettle', 'microwave', 'fridge', '',''],
 36 |                      13: ['kettle', '', 'fridge', 'washing_machine', 'dish_washer'],
 37 |                      15: ['','', 'fridge', 'washing_machine', 'dish_washer'],
 38 |                      16: ['kettle', 'microwave', 'fridge', 'washing_machine', ''],
 39 |                      17: ['', 'microwave', 'fridge', 'washing_machine', 'dish_washer'],
 40 |                      18: ['kettle','microwave','fridge','washing_machine', ''],
 41 |                      19: ['kettle', 'microwave', 'fridge', 'washing_machine', 'dish_washer']}
 42 | 
 43 |         refit_path = "../REFIT.h5"
 44 |         build = arguments.building
 45 |         refit = DataSet(refit_path)
 46 |         # insert desired period
 47 |         refit.set_window(start=arguments.start, end=arguments.end)
 48 |         elec = refit.buildings[build].elec
 49 |         good = elec.good_sections(full_results=False)
 50 |         period = 8
 51 |         noised = []
 52 | 
 53 |         for i in range(len(good)):
 54 |             try:
 55 |                 refit.set_window(good[i])
 56 |                 elec = refit.buildings[build].elec
 57 | 
 58 | 
 59 |                 mains_ = next(elec.mains().load(sample_period=period))
 60 |                 mains_ = mains_['power']['active'].to_numpy()
 61 |                 if appliances[build][0] == 'kettle':
 62 |                     kettle = next(elec.meters[app_dict_refit['kettle'][build]].load(sample_period=period))
 63 |                     kettle = kettle['power']['active'].to_numpy()
 64 |                 else:
 65 |                     kettle =  np.zeros(len(mains_))
 66 |                 if appliances[build][1] == 'microwave':
 67 |                     micro = next(elec.meters[app_dict_refit['microwave'][build]].load(sample_period=period))
 68 |                     micro = micro['power']['active'].to_numpy()
 69 |                 else:
 70 |                     micro = np.zeros(len(mains_))
 71 |                 if appliances[build][3] == 'fridge':
 72 |                     fridge = next(elec.meters[app_dict_refit['fridge'][build]].load(sample_period=period))
 73 |                     fridge = fridge['power']['active'].to_numpy()
 74 |                 else:
 75 |                     fridge =  np.zeros(len(mains_))
 76 |                 if appliances[build][3] == 'washing_machine':
 77 |                     wash = next(elec.meters[app_dict_refit['washing machine'][build]].load(sample_period=period))
 78 |                     wash = wash['power']['active'].to_numpy()
 79 |                 else:
 80 |                     wash =  np.zeros(len(mains_))
 81 |                 if appliances[build][4] == 'dish_washer':
 82 |                     dish = next(elec.meters[app_dict_refit['dish washer'][build]].load(sample_period=period))
 83 |                     dish = dish['power']['active'].to_numpy()
 84 |                 else:
 85 |                     dish =  np.zeros(len(mains_))
 86 | 
 87 |                 plt.plot(mains_)
 88 |                 plt.plot(kettle)
 89 |                 plt.plot(micro)
 90 |                 plt.plot(fridge)
 91 |                 plt.plot(wash)
 92 |                 plt.plot(dish)
 93 |                 plt.show()
 94 | 
 95 |                 # noise creation
 96 |                 sum = np.zeros(len(mains_))
 97 |                 sum = np.add(kettle,sum)
 98 |                 sum = np.add(micro, sum)
 99 |                 sum = np.add(fridge, sum)
100 |                 sum = np.add(wash, sum)
101 |                 sum = np.add(dish, sum)
102 |                 noise = mains_ - sum
103 | 
104 |                 # negative values cancellation or misalignment correction
105 |                 for s in range(len(noise) - 30):
106 |                     if (noise[s] < 0 and noise[s + 1] > 0):
107 |                         for po in range(s,s + 30):
108 |                             noise[po] = 1
109 |                 for s in range(len(noise) - 30):
110 |                     if (noise[s] >= 0 and noise[s + 1] < 0):
111 |                         for po in range(s - 30, s):
112 |                             noise[po] = 1
113 | 
114 |                 noised.append(noise)
115 | 
116 |             except StopIteration:
117 |                 continue
118 |         print("Done!")
119 |         np.save('../noise_' + str(build) + '.npy', noised)
120 |     ukdale = arguments.ukdale
121 |     if ukdale:
122 | 
123 |         print("UKDALE Noise extraction")
124 |         ukdale_path = "../ukdale.h5"
125 |         build = arguments.building
126 |         ukdale = DataSet(ukdale_path)
127 |         # insert desired period
128 |         ukdale.set_window(start=arguments.start, end=arguments.end)
129 |         elec = ukdale.buildings[build].elec
130 |         good = elec.good_sections(full_results=False)
131 |         period = 6
132 |         noised = []
133 | 
134 |         for i in range(len(good)):
135 |             try:
136 |                 ukdale.set_window(good[i])
137 |                 elec = ukdale.buildings[build].elec
138 | 
139 |                 mains_ = next(elec.mains().load(sample_period=period))
140 | 
141 |                 if mains_.shape[0] == 0 and mains_.shape[1] == 0:
142 |                     continue
143 |                 else:
144 |                     Mains_ = mains_['power']['active'].to_numpy()
145 |                 if build != 3 or build != 4:
146 |                     kettle = next(elec['kettle'].load(sample_period=period))
147 |                     kettle = kettle['power']['active'].to_numpy()
148 | 
149 | 
150 |                     micro = next(elec['microwave'].load(sample_period=period))
151 |                     micro = micro['power']['active'].to_numpy()
152 | 
153 | 
154 |                     fridge = next(elec['fridge'].load(sample_period=period))
155 |                     fridge = fridge['power']['active'].to_numpy()
156 | 
157 | 
158 | 
159 |                     wash = next(elec['washing machine'].load(sample_period=period))
160 |                     wash = wash['power']['active'].to_numpy()
161 | 
162 | 
163 |                     dish = next(elec['dish washer'].load(sample_period=period))
164 |                     dish = dish['power']['active'].to_numpy()
165 | 
166 |                 else:
167 |                     if build == 4:
168 |                         fridge = next(elec['fridge'].load(sample_period=period))
169 |                         fridge = fridge['power']['active'].to_numpy()
170 |                         dish = np.zeros(len(mains_))
171 |                         kettle = np.zeros(len(mains_))
172 |                         wash = np.zeros(len(mains_))
173 |                         micro = np.zeros(len(mains_))
174 |                     else:
175 |                         kettle = next(elec['kettle'].load(sample_period=period))
176 |                         kettle = kettle['power']['active'].to_numpy()
177 |                         dish = np.zeros(len(mains_))
178 |                         fridge = np.zeros(len(mains_))
179 |                         wash = np.zeros(len(mains_))
180 |                         micro = np.zeros(len(mains_))
181 |                 plt.plot(Mains_)
182 |                 plt.plot(kettle)
183 |                 plt.plot(micro)
184 |                 plt.plot(fridge)
185 |                 plt.plot(wash)
186 |                 plt.plot(dish)
187 |                 plt.show()
188 | 
189 |                 # noise creation
190 |                 if len(kettle) == len(micro) == len(fridge) == len(wash) == len(dish) == len(Mains_):
191 |                     sum = np.zeros(len(Mains_))
192 |                     sum = np.add(kettle, sum)
193 |                     sum = np.add(micro, sum)
194 |                     sum = np.add(fridge, sum)
195 |                     sum = np.add(wash, sum)
196 |                     sum = np.add(dish, sum)
197 |                     noise = Mains_ - sum
198 | 
199 |                     # negative values cancellation or misalignment correction
200 |                     for s in range(len(noise) - 30):
201 |                         if (noise[s] < 0 and noise[s + 1] > 0):
202 |                             for po in range(s, s + 30):
203 |                                 noise[po] = 1
204 |                     for s in range(len(noise) - 30):
205 |                         if (noise[s] >= 0 and noise[s + 1] < 0):
206 |                             for po in range(s - 30, s):
207 |                                 noise[po] = 1
208 | 
209 |                     noised.append(noise)
210 |                 else:
211 |                     continue
212 | 
213 |             except StopIteration:
214 |                 continue
215 |         print("Done!")
216 |         np.save('../noise_' + str(build) + '.npy', noised)
217 | 


--------------------------------------------------------------------------------
/ukdale_experiment_1_2.py:
--------------------------------------------------------------------------------
  1 | from CRNN import *
  2 | import numpy as np
  3 | from utils_func import *
  4 | import json
  5 | import random as python_random
  6 | import tensorflow as tf
  7 | import os
  8 | from sklearn.metrics import classification_report
  9 | from metrics import *
 10 | from tensorflow.keras.callbacks import TensorBoard
 11 | from datetime import datetime
 12 | import argparse
 13 | 
 14 | 
 15 | parser = argparse.ArgumentParser(description="UKDALE experiments 1 and 2")
 16 | 
 17 | parser.add_argument("--quantity_1", type=int, default=45581, help="Number of bags in UKDALE house 1")
 18 | parser.add_argument("--quantity_2", type=int, default=3271, help="Number of bags in UKDALE house 2")
 19 | parser.add_argument("--quantity_3", type=int, default=3047, help="Number of bags in UKDALE house 3")
 20 | parser.add_argument("--quantity_4", type=int, default=553, help="Number of bags in UKDALE house 4")
 21 | parser.add_argument("--quantity_5", type=int, default=2969, help="Number of bags in UKDALE house 5")
 22 | parser.add_argument("--perc_strong", type= int, default=20, help="Percentage of UKDALE strong data")
 23 | parser.add_argument("--perc_weak", type= int, default=80, help="Percentage of UKDALE weak data")
 24 | parser.add_argument("--control_strong", type= bool, default=True, help="Flag to control if the correct quantity of strongly annotated bags have been considered")
 25 | parser.add_argument("--cs", type= bool, default=False, help="Clip smoothing post-processing")
 26 | parser.add_argument("--weak_houses", type=bool, default=True, help="The flag to load weak dataset")
 27 | parser.add_argument("--strong_weak_flag", type=bool, default=True, help="The flag to choose Strong-CRNN or Weak-CRNN")
 28 | parser.add_argument("--synth_path", type=str, default='', help="Path where test synth aggregate bags are stored")
 29 | parser.add_argument("--test", type=bool, default=False,help="Flag for inference")
 30 | arguments = parser.parse_args()
 31 | 
 32 | 
 33 | if __name__ == '__main__':
 34 | 
 35 | 
 36 |     file_agg_path = '../aggregate_data_noised/'
 37 |     file_labels_path = '../labels/'
 38 | 
 39 |     random.seed(123)
 40 |     np.random.seed(123)
 41 |     python_random.seed(123)
 42 |     tf.random.set_seed(1234)
 43 |     tf.experimental.numpy.random.seed(1234)
 44 | 
 45 |     os.environ['TF_DETERMINISTIC_OPS'] = '1'
 46 |     os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
 47 |     tf.config.threading.set_inter_op_parallelism_threads(1)
 48 |     tf.config.threading.set_intra_op_parallelism_threads(1)
 49 | 
 50 | 
 51 |     quantity_1 = arguments.quantity_1
 52 |     quantity_2 = arguments.quantity_2
 53 |     quantity_5 = arguments.quantity_5
 54 |     quantity_4 = arguments.quantity_4
 55 |     quantity_3 = arguments.quantity_3
 56 | 
 57 |     perc_strong = arguments.perc_strong
 58 |     print("perc strong:",perc_strong)
 59 | 
 60 |     test = arguments.test
 61 | 
 62 |     houses = [1,2,3,4,5]
 63 |     houses_id = [0, 'house_1/', 'house_2/', 'house_3/', 'house_4/', 'house_5/']
 64 | 
 65 | 
 66 |     X_train, Y_train, Y_train_weak = [], [], []
 67 |     X_test, Y_test, Y_test_weak = [], [], []
 68 |     X_val, Y_val, Y_val_weak = [], [], []
 69 | 
 70 |     # LOADING DATA FROM .JSON FOR LABELS STRONG AND WEAK AND .NPY FOR AGGREGATE   #
 71 | 
 72 |     for k in houses:
 73 | 
 74 |         count_str = 0
 75 |         count_val = 0
 76 |         count_weak = 0
 77 | 
 78 | 
 79 |         f = open(file_labels_path + 'labels_%d.json' % k)
 80 |         labels = json.load(f)
 81 |         print("Labels Loaded")
 82 |         if k == 1:
 83 |             quantity = quantity_1
 84 |         if k == 2:
 85 |             quantity = quantity_2
 86 |         if k == 5:
 87 |             quantity = quantity_5
 88 |         if k == 3:
 89 |             quantity = quantity_3
 90 |         if k == 4:
 91 |             quantity = quantity_4
 92 | 
 93 |         b = round(quantity / 5)
 94 |         a = round(b / 5)
 95 | 
 96 | 
 97 |         for i in range(quantity):
 98 | 
 99 |             try:
100 |                 agg = np.load(file_agg_path + houses_id[k] + 'aggregate_%d.npy' %i)
101 |             except FileNotFoundError:
102 |                 continue
103 | 
104 |             key = 'labels_%d' %i
105 | 
106 |             #  STRONG  #
107 |             try:
108 |                 list_strong = labels[key]['strong']
109 |             except KeyError:
110 |                 continue
111 |             matrix = np.zeros((5, 2550))
112 | 
113 | 
114 | 
115 |             for l in range(len(list_strong)):
116 |                 matrix[l] = np.array(list_strong[l])
117 | 
118 | 
119 |             if k == 1 or k == 5 or k==3 or k==4:
120 | 
121 |                 if i < a or (i>=b and i <(a+b)) or (i>=(b*2) and i<(b*2 + a)) or (i>= (b*3) and i<(b*3 + a)) or (i>= b*4 and i<(b*4 + a)):
122 | 
123 | 
124 |                             # Validation data are always annotated with both strong and weak labels
125 |                             matrix = np.transpose(matrix)
126 |                             X_val.append(agg)
127 |                             Y_val.append(matrix)
128 | 
129 | 
130 |                 else:
131 | 
132 |                         quantity_train = round(quantity/100*80)
133 |                         num_data = round(quantity_train / 100 * (100 - perc_strong))
134 |                         print("Quantity of data without strong labels:")
135 |                         print(num_data)
136 |                         count_str += 1
137 |                         if count_str < num_data:
138 |                             matrix = np.ones((2550,5))
139 |                             matrix = np.negative(matrix)
140 |                             X_train.append(agg)
141 |                             Y_train.append(matrix)
142 | 
143 |                         else:
144 | 
145 | 
146 |                             matrix = np.transpose(matrix)
147 |                             X_train.append(agg)
148 |                             Y_train.append(matrix)
149 | 
150 | 
151 | 
152 |             if k == 2:
153 |                     matrix = np.transpose(matrix)
154 |                     X_test.append(agg)
155 |                     Y_test.append(matrix)
156 | 
157 | 
158 |             ##### WEAK #####
159 |             try:
160 |                 list_weak = labels[key]['weak']
161 |             except KeyError:
162 |                 continue
163 | 
164 | 
165 | 
166 | 
167 |             if k == 1 or k == 5 or k==3 or k==4:
168 | 
169 |                 if i < a or (i >= b and i < (a + b)) or (i >= (b * 2) and i < (b * 2 + a)) or (
170 |                             i >= (b * 3) and i < (b * 3 + a)) or (i >= b * 4 and i < (b * 4 + a)):
171 | 
172 | 
173 |                     Y_val_weak.append(np.array(list_weak).reshape(1,5))
174 | 
175 |                 else:
176 | 
177 | 
178 |                     Y_train_weak.append(np.array(list_weak).reshape(1, 5))
179 |             if k == 2:
180 | 
181 |                 Y_test_weak.append(np.array(list_weak).reshape(1,5))
182 | 
183 | 
184 |     weak_X_train, weak_Y_train,weak_Y_train_weak   = [], [], []
185 |     weak_X_train_balanced, weak_Y_train_balanced,weak_Y_train_weak_balanced   = [], [], []
186 | 
187 |     weak_houses = arguments.weak_houses
188 |     print("Weak dataset loading:",weak_houses)
189 |     if weak_houses:
190 |         n_1 = 0
191 |         f_weak = open(file_labels_path + 'labels_1_weak.json')
192 |         labels_weak = json.load(f_weak)
193 |         print("Labels weak Loaded")
194 | 
195 |         for i in range(len(labels_weak)):
196 | 
197 |             try:
198 |                 agg = np.load(file_agg_path + 'house_1_weak/' + 'aggregate_%d.npy' %i)
199 |             except FileNotFoundError:
200 |                 continue
201 | 
202 | 
203 |             key = 'labels_%d' %i
204 | 
205 |             #  STRONG  #
206 |             try:
207 |                 list_strong = labels_weak[key]['strong']
208 |             except KeyError:
209 |                 continue
210 | 
211 |             matrix = np.negative(np.ones((5, 2550)))
212 |             weak_X_train.append(agg)
213 |             weak_Y_train.append(matrix)
214 |             ##### WEAK #####
215 |             try:
216 |                 list_weak = labels_weak[key]['weak']
217 |             except KeyError:
218 |                 continue
219 | 
220 |             weak_Y_train_weak.append(np.array(list_weak).reshape(1,5))
221 | 
222 | 
223 | 
224 |         f_weak_balanced = open(file_labels_path + 'labels_1_weak_balanced.json')
225 |         labels_weak_balanced = json.load(f_weak_balanced)
226 |         print("Labels weak balanced Loaded")
227 | 
228 |         for i in range(len(labels_weak_balanced)):
229 | 
230 |             try:
231 |                 agg = np.load('../aggregate_data_noised/house_1_weak_balanced/' + 'aggregate_%d.npy' % i)
232 |             except FileNotFoundError:
233 |                 continue
234 | 
235 | 
236 |             key = 'labels_%d' % i
237 | 
238 |             #  STRONG  #
239 |             try:
240 |                 list_strong = labels_weak_balanced[key]['strong']
241 |             except KeyError:
242 |                 continue
243 | 
244 |             matrix = np.negative(np.ones((2550, 5)))
245 | 
246 |             weak_X_train_balanced.append(agg)
247 |             weak_Y_train_balanced.append(matrix)
248 | 
249 |             ##### WEAK #####
250 |             try:
251 |                 list_weak = labels_weak_balanced[key]['weak']
252 |             except KeyError:
253 |                 continue
254 | 
255 |             weak_Y_train_weak_balanced.append(np.array(list_weak).reshape(1, 5))
256 | 
257 |         print("Weak dataset loaded!")
258 | 
259 |         X_train = np.array(X_train)
260 |         Y_train = np.array(Y_train)
261 |         Y_train_weak = np.array(Y_train_weak)
262 |         X_test = np.array(X_test)
263 |         Y_test = np.array(Y_test)
264 |         Y_test_weak = np.array(Y_test_weak)
265 |         X_val = np.array(X_val)
266 |         Y_val = np.array(Y_val)
267 |         Y_val_weak = np.array(Y_val_weak)
268 |         print(Y_val_weak.shape)
269 |         print(Y_val.shape)
270 |         weak_X_train = np.array(weak_X_train)
271 |         weak_Y_train = np.array(weak_Y_train)
272 |         weak_Y_train_weak = np.array(weak_Y_train_weak)
273 |         weak_X_train_balanced = np.array(weak_X_train_balanced)
274 |         weak_Y_train_balanced = np.array(weak_Y_train_balanced)
275 |         weak_Y_train_weak_balanced = np.array(weak_Y_train_weak_balanced)
276 | 
277 |         X_train = np.concatenate([X_train,weak_X_train, weak_X_train_balanced], axis=0)
278 |         Y_train = np.concatenate([Y_train,weak_Y_train, weak_Y_train_balanced], axis = 0)
279 |         Y_train_weak = np.concatenate([Y_train_weak,weak_Y_train_weak, weak_Y_train_weak_balanced], axis = 0)
280 |         print(X_train.shape)
281 |         print(Y_train.shape)
282 |         print(Y_train_weak.shape)
283 | 
284 | 
285 |     else:
286 |         X_train = np.array(X_train)
287 |         Y_train = np.array(Y_train)
288 |         Y_train_weak = np.array(Y_train_weak)
289 |         X_test = np.array(X_test)
290 |         Y_test = np.array(Y_test)
291 |         Y_test_weak = np.array(Y_test_weak)
292 |         X_val = np.array(X_val)
293 |         Y_val = np.array(Y_val)
294 |         Y_val_weak = np.array(Y_val_weak)
295 |         print(Y_val_weak.shape)
296 |         print(Y_val.shape)
297 | 
298 | 
299 |     assert(len(Y_val)==len(Y_val_weak))
300 |     assert(len(Y_train)==len(Y_train_weak))
301 | 
302 |     control_strong = arguments.control_strong
303 |     if control_strong:
304 |         Y_val_new = []
305 |         X_val_new = []
306 |         Y_val_weak_new = []
307 |         Y_train_new = []
308 |         X_train_new = []
309 |         Y_train_weak_new = []
310 | 
311 |         for i in range(len(Y_val)):
312 |             if np.all(Y_val[i][0] != -1):
313 |                 Y_val_new.append(Y_val[i])
314 |                 X_val_new.append(X_val[i])
315 |                 Y_val_weak_new.append(Y_val_weak[i])
316 | 
317 |         for i in range(len(Y_train)):
318 |             if np.all(Y_train[i][0] != -1):
319 |                 Y_train_new.append(Y_train[i])
320 |                 X_train_new.append(X_train[i])
321 |                 Y_train_weak_new.append(Y_train_weak[i])
322 | 
323 |         Y_val_new = np.array(Y_val_new)
324 |         Y_val_weak_new = np.array(Y_val_weak_new)
325 |         X_val_new = np.array(X_val_new)
326 |         Y_train_new = np.array(Y_train_new)
327 |         Y_train_weak_new = np.array(Y_train_weak_new)
328 |         X_train_new = np.array(X_train_new)
329 |         print("Val strong shape")
330 |         print(Y_val_new.shape)
331 |         print("Train strong shape")
332 |         print(Y_train_new.shape)
333 | 
334 | 
335 | 
336 | 
337 |     x_train = X_train
338 |     y_strong_train = Y_train
339 |     y_weak_train = Y_train_weak
340 | 
341 |     print('Data shape:')
342 |     print(x_train.shape)
343 |     print(X_val.shape)
344 |     print(X_test.shape)
345 | 
346 |     # Aggregate Standardization #
347 |     if perc_strong <= 100 and weak_houses == False:
348 |         train_mean = np.mean(X_train_new)
349 |         train_std = np.std(X_train_new)
350 |         print(perc_strong)
351 |         print("Mean train")
352 |         print(train_mean)
353 |         print("Std train")
354 |         print(train_std)
355 |     else:
356 |         train_mean = np.mean(x_train)
357 |         train_std = np.std(x_train)
358 |         print(perc_strong)
359 |         print("Mean train")
360 |         print(train_mean)
361 |         print("Std train")
362 |         print(train_std)
363 | 
364 |     x_train = standardize_data(x_train,train_mean, train_std)
365 |     X_val = standardize_data(X_val, train_mean, train_std)
366 |     X_test = standardize_data(X_test,train_mean, train_std)
367 |     perc_weak = arguments.perc_weak
368 |     if weak_houses:
369 | 
370 |         len_tot_weak = len(Y_train_weak)
371 |         quant_weak = round(len_tot_weak * perc_weak / 100)
372 | 
373 |         print("Weak quantity:", quant_weak)
374 |         no_weak = np.ones((1, 5))
375 |         no_weak = np.negative(no_weak)
376 |         new_weak = []
377 |         for lun in range(len_tot_weak):
378 |             if lun <= quant_weak:
379 |                new_weak.append(Y_train_weak[lun])
380 |             else:
381 |                new_weak.append(no_weak)
382 | 
383 |         Y_train_weak = np.array(new_weak)
384 |         weak_count(Y_train_weak)
385 |     type_ = 'NOISED_BEST_'+ str(perc_weak) + 'weak_' + str(perc_strong) + 'strong_'
386 | 
387 |     batch_size = 64
388 |     window_size = 2550
389 |     drop = 0.1
390 |     kernel = 5
391 |     num_layers = 3
392 |     gru_units = 64
393 |     lr = 0.002
394 |     drop_out = drop
395 |     weight= 1e-2
396 | 
397 |     CRNN = CRNN_construction(window_size,weight, lr=lr, classes=5, drop_out=drop, kernel = kernel, num_layers=num_layers, gru_units=gru_units, cs=arguments.cs,strong_weak_flag=arguments.strong_weak_flag)
398 | 
399 |     if arguments.cs:
400 |         early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_strong_level_final_custom_f1_score', mode='max',
401 |                                                       patience=15, restore_best_weights=True)
402 |     else:
403 |         early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_strong_level_custom_f1_score', mode='max',
404 |                                                       patience=15, restore_best_weights=True)
405 | 
406 | 
407 |     log_dir_ = '/home/eprincipi/Weak_Supervision/weak_labels/models/logs/logs_CRNN'  + datetime.now().strftime("%Y%m%d-%H%M%S") + type_ + str(weight)
408 |     tensorboard = TensorBoard(log_dir=log_dir_)
409 |     file_writer = tf.summary.create_file_writer(log_dir_ + "/metrics")
410 |     file_writer.set_as_default()
411 | 
412 |     if not test:
413 |         if arguments.strong_weak_flag:
414 |             history = CRNN.fit(x=x_train, y=[y_strong_train, y_weak_train], shuffle=True, epochs=1000,
415 |                                batch_size=batch_size,
416 |                                validation_data=(X_val, [Y_val, Y_val_weak]), callbacks=[early_stop, tensorboard], verbose=1)
417 |             CRNN.save_weights(
418 |                 '')
419 |             output_strong, output_weak = CRNN.predict(x=X_val)
420 |             output_strong_test_o, output_weak_test = CRNN.predict(x=X_test)
421 |         else:
422 |             history = CRNN.fit(x=x_train, y=y_strong_train, shuffle=True, epochs=1000, batch_size=batch_size,
423 |                                validation_data=(X_val, Y_val), callbacks=[early_stop], verbose=1)
424 |             CRNN.save_weights(
425 |                 '')
426 |             output_strong = CRNN.predict(x=X_val)
427 |             output_strong_test_o = CRNN.predict(x=X_test)
428 | 
429 |     else:
430 |         CRNN.load_weights('')
431 | 
432 |     shape = output_strong.shape[0] * output_strong.shape[1]
433 |     shape_test = output_strong_test_o.shape[0] * output_strong_test_o.shape[1]
434 | 
435 | 
436 |     Y_val = Y_val.reshape(shape, 5)
437 |     Y_test = Y_test.reshape(shape_test, 5)
438 | 
439 |     output_strong = output_strong.reshape(shape, 5)
440 | 
441 |     output_strong_test = output_strong_test_o.reshape(shape_test, 5)
442 | 
443 |     thres_strong = thres_analysis(Y_val, output_strong,classes=5)
444 |     assert (Y_val.shape == output_strong.shape)
445 | 
446 | 
447 |     plt.plot(output_strong[:24000, 0])
448 |     plt.plot(Y_val[:24000, 0])
449 |     plt.legend(['output', 'ground truth'])
450 |     plt.show()
451 |     plt.plot(output_strong[:24000, 1])
452 |     plt.plot(Y_val[:24000, 1])
453 |     plt.legend(['output', 'ground truth'])
454 |     plt.show()
455 | 
456 |     plt.plot(output_strong[:24000, 2])
457 |     plt.plot(Y_val[:24000, 2])
458 |     plt.legend(['output', 'ground truth'])
459 |     plt.show()
460 | 
461 |     plt.plot(output_strong[:24000, 3])
462 |     plt.plot(Y_val[:24000, 3])
463 |     plt.legend(['output', 'ground truth'])
464 |     plt.show()
465 | 
466 |     plt.plot(output_strong[:24000, 4])
467 |     plt.plot(Y_val[:24000, 4])
468 |     plt.legend(['output', 'ground truth'])
469 |     plt.show()
470 | 
471 |     print(thres_strong)
472 | 
473 | 
474 |     output_strong_test = app_binarization_strong(output_strong_test, thres_strong, 5)
475 |     output_strong = app_binarization_strong(output_strong, thres_strong, 5)
476 | 
477 | 
478 |     print("STRONG SCORES:")
479 |     print("Validation")
480 | 
481 |     print(classification_report(Y_val, output_strong))
482 | 
483 |     print("Test")
484 | 
485 |     print(classification_report(Y_test, output_strong_test))
486 | 
487 |     houses = [2]
488 |     X_test = []
489 |     Y_test = []
490 |     synth_path = arguments.synth_path
491 |     for k in houses:
492 | 
493 |         f = open(synth_path + 'labels_%d.json' % k)
494 |         labels = json.load(f)
495 |         print("Labels Loaded")
496 | 
497 |         if k == 2:
498 |             quantity = quantity_2
499 | 
500 |         b = round(quantity / 5)
501 |         a = round(b / 5)
502 | 
503 |         for i in range(quantity):
504 | 
505 |             agg = np.load(file_agg_path + houses_id[k] + 'aggregate_%d.npy' % i)
506 | 
507 |             key = 'labels_%d' % i
508 | 
509 |             #  STRONG  #
510 |             list_strong = labels[key]['strong']
511 | 
512 |             matrix = np.zeros((5, 2550))
513 | 
514 | 
515 |             for l in range(len(list_strong)):
516 |                 matrix[l] = np.array(list_strong[l])
517 | 
518 |             matrix = np.transpose(matrix)
519 |             X_test.append(agg)
520 |             Y_test.append(matrix)
521 | 
522 |     X_test = np.array(X_test)
523 |     plt.plot(X_test[0])
524 |     plt.show()
525 |     ANE = ANE(X_test, output_strong_test_o)
526 |     print("ANE:")
527 |     print(ANE)
528 | 
529 | 
530 | 


--------------------------------------------------------------------------------
/mixed_training_experiment.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from datetime import datetime
  4 | import argparse
  5 | import gc
  6 | from tensorflow.keras.callbacks import TensorBoard
  7 | from sklearn.metrics import precision_recall_curve, classification_report
  8 | from CRNN import *
  9 | from utils_func import *
 10 | import json
 11 | import random as python_random
 12 | import tensorflow as tf
 13 | from metrics import *
 14 | 
 15 | parser = argparse.ArgumentParser(description="Mixed experiment code")
 16 | 
 17 | parser.add_argument("--quantity_1", type=int, default=45581, help="Number of bags in UKDALE house 1")
 18 | parser.add_argument("--quantity_2", type=int, default=3271, help="Number of bags in UKDALE house 2")
 19 | parser.add_argument("--quantity_3", type=int, default=3047, help="Number of bags in UKDALE house 3")
 20 | parser.add_argument("--quantity_4", type=int, default=553, help="Number of bags in UKDALE house 4")
 21 | parser.add_argument("--quantity_5", type=int, default=2969, help="Number of bags in UKDALE house 5")
 22 | parser.add_argument("--perc_strong", type= int, default=20, help="Percentage of UKDALE strong data")
 23 | parser.add_argument("--perc_weak", type= int, default=80, help="Percentage of REFIT weak data")
 24 | parser.add_argument("--test", type= bool, default=False, help="Flag to perform inference")
 25 | parser.add_argument("--test_ukdale", type=bool, default=True, help="Flag to select which dataset has to be used for testing")
 26 | parser.add_argument("--refit_synth", type=str, default='', help="REFIT synth data path")
 27 | parser.add_argument("--ukdale_synth", type=str, default='', help="UKDALE synth data path")
 28 | arguments = parser.parse_args()
 29 | 
 30 | 
 31 | if __name__ == '__main__':
 32 |     # UK-DALE path
 33 |     path = '../weak_labels/'
 34 |     file_agg_path = path + 'aggregate_data_noised/'
 35 |     file_labels_path = path + 'labels/'
 36 | 
 37 |     # REFIT path
 38 |     refit_agg_resample_path = '../resampled_agg_REFIT/'
 39 | 
 40 |     # set seeds for reproducible results
 41 |     random.seed(123)
 42 |     np.random.seed(123)
 43 |     python_random.seed(123)
 44 |     tf.random.set_seed(1234)
 45 |     tf.experimental.numpy.random.seed(1234)
 46 | 
 47 |     os.environ['TF_DETERMINISTIC_OPS'] = '1'
 48 |     os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
 49 |     tf.config.threading.set_inter_op_parallelism_threads(1)
 50 |     tf.config.threading.set_intra_op_parallelism_threads(1)
 51 | 
 52 |     os.environ["CUDA_VISIBLE_DEVICES"] = "7"
 53 | 
 54 |     quantity_1 = arguments.quantity_1
 55 |     quantity_2 = arguments.quantity_2
 56 |     quantity_5 = arguments.quantity_5
 57 |     quantity_4 = arguments.quantity_4
 58 |     quantity_3 = arguments.quantity_3
 59 |     houses = [1, 2, 3, 4, 5]
 60 |     houses_id = [0, 'house_1/', 'house_2/', 'house_3/', 'house_4/', 'house_5/']
 61 | 
 62 |     perc_strong = arguments.perc_strong
 63 |     print("perc strong", perc_strong)
 64 |     perc_weak = arguments.perc_weak
 65 |     print("perc weak:", perc_weak)
 66 | 
 67 |     # Flags Inizialization
 68 | 
 69 |     test = arguments.test
 70 |     strong_weak = True
 71 |     strong_weak_flag = True
 72 |     test_ukdale = arguments.test_ukdale
 73 |     weak_counter = True
 74 | 
 75 |     X_train, Y_train, Y_train_weak = [], [], []
 76 |     X_test, Y_test, Y_test_weak = [], [], []
 77 |     X_val, Y_val, Y_val_weak = [], [], []
 78 | 
 79 |     for k in houses:
 80 | 
 81 |         count_str = 0
 82 |         count_val = 0
 83 |         count_weak = 0
 84 | 
 85 | 
 86 |         f = open(file_labels_path + 'labels_%d.json' % k)
 87 |         labels = json.load(f)
 88 |         print("Labels Loaded")
 89 |         if k == 1:
 90 |             quantity = quantity_1
 91 |         if k == 2:
 92 |             quantity = quantity_2
 93 |         if k == 5:
 94 |             quantity = quantity_5
 95 |         if k == 3:
 96 |             quantity = quantity_3
 97 |         if k == 4:
 98 |             quantity = quantity_4
 99 | 
100 |         b = round(quantity / 5)
101 |         a = round(b / 5)
102 | 
103 |         for i in range(quantity):
104 | 
105 |             try:
106 |                 agg = np.load(file_agg_path + houses_id[k] + 'aggregate_%d.npy' % i)
107 |             except FileNotFoundError:
108 |                 continue
109 | 
110 |             key = 'labels_%d' % i
111 | 
112 |             #  STRONG  #
113 |             try:
114 |                 list_strong = labels[key]['strong']
115 |             except KeyError:
116 |                 continue
117 | 
118 |             matrix = np.zeros((5, 2550))
119 |             error_vectors = 0
120 | 
121 | 
122 | 
123 |             for l in range(len(list_strong)):
124 |                 matrix[l] = np.array(list_strong[l])
125 | 
126 |             if k == 1 or k == 5 or k == 3 or k == 4:
127 | 
128 |                 if i < a or (i >= b and i < (a + b)) or (i >= (b * 2) and i < (b * 2 + a)) or (
129 |                             i >= (b * 3) and i < (b * 3 + a)) or (i >= b * 4 and i < (b * 4 + a)):
130 |                         # se rientra nei dati di validation rimangono strong
131 | 
132 |                     matrix = np.transpose(matrix)
133 |                     X_val.append(agg)
134 |                     Y_val.append(matrix)
135 | 
136 | 
137 |                 else:  # se va nei dati di train
138 | 
139 |                     quantity_train = round(quantity / 100 * 80)
140 |                     num_data = round(quantity_train / 100 * (100 - perc_strong))
141 |                     print("Quantity of data without strong labels:")
142 |                     print(num_data)
143 |                     count_str += 1
144 |                     if count_str < num_data:
145 |                         matrix = np.ones((2550, 5))
146 |                         matrix = np.negative(matrix)
147 |                         X_train.append(agg)
148 |                         Y_train.append(matrix)
149 | 
150 |                     else:
151 |                         matrix = np.transpose(matrix)
152 |                         X_train.append(agg)
153 |                         Y_train.append(matrix)
154 | 
155 |             if k == 2:
156 |                 matrix = np.transpose(matrix)
157 |                 X_test.append(agg)
158 |                 Y_test.append(matrix)
159 | 
160 |             ##### WEAK #####
161 |             try:
162 |                 list_weak = labels[key]['weak']
163 |             except KeyError:
164 |                 continue
165 | 
166 |             if k == 1 or k == 5 or k == 3 or k == 4:
167 | 
168 |                 if i < a or (i >= b and i < (a + b)) or (i >= (b * 2) and i < (b * 2 + a)) or (
169 |                             i >= (b * 3) and i < (b * 3 + a)) or (i >= b * 4 and i < (b * 4 + a)):
170 | 
171 |                     Y_val_weak.append(np.array(list_weak).reshape(1, 5))
172 | 
173 |                 else:
174 | 
175 |                     if strong_weak:
176 |                         num_data = 0
177 |                         print(num_data)
178 |                         count_weak += 1
179 |                         if count_weak <= num_data:
180 |                             list_weak = [-1, -1, -1, -1,
181 |                                              -1]
182 |                             Y_train_weak.append(np.array(list_weak).reshape(1, 5))
183 | 
184 |                         else:
185 |                             Y_train_weak.append(np.array(list_weak).reshape(1, 5))
186 | 
187 |                     else:
188 |                         Y_train_weak.append(np.array(list_weak).reshape(1, 5))
189 |             if k == 2:
190 |                 Y_test_weak.append(np.array(list_weak).reshape(1, 5))
191 | 
192 |     X_train = np.array(X_train)
193 |     Y_train = np.array(Y_train)
194 |     Y_train_weak = np.array(Y_train_weak)
195 |     if not test_ukdale:
196 |         X_test, Y_test, Y_test_weak = [], [], []
197 |         refit_agg_resample_path_test = '../resampled_agg_REFIT_test/'
198 |         refit_labels_resample_path_test = '../resampled_labels_REFIT_test/'
199 |         
200 | 	houses = [4, 9, 15]
201 | 
202 | 	# number of bags in each house can change based on the dataset composition  
203 |         for k in houses:
204 | 
205 |             quant = [0, 0, 0, 0, 12000, 0, 0, 0, 0, 9000, 0, 0, 0, 0, 0, 1500, 0, 0, 0, 0, 0, 0, 0]
206 | 
207 |             error_vectors = 0
208 |             print("Aggregate Loading")
209 | 
210 |             for i in range(quant[k]):
211 | 
212 |                 agg = np.load(refit_agg_resample_path_test + 'house_' + str(k) + '/aggregate_%d.npy' % i)
213 |                 labels_weak = np.load('../labels/' + 'house_' + str(
214 |                     k) + '/weak_labels_%d.npy' % i, allow_pickle=True)
215 |                 labels_strong = np.load(
216 |                     refit_labels_resample_path_test + 'house_' + str(k) + '/strong_labels_%d.npy' % i,
217 |                     allow_pickle=True)
218 |                 val_q = round(quant[k] / 100 * 20)
219 | 
220 |                 if k == 4 or k == 9 or k == 15:
221 | 
222 |                         matrix = np.transpose(labels_strong)
223 |                         X_test.append(agg)
224 |                         Y_test.append(matrix)
225 |                         Y_test_weak.append(labels_weak.reshape(1, 5))
226 | 
227 |         X_test = np.array(X_test)
228 |         Y_test = np.array(Y_test)
229 |         Y_test_weak = np.array(Y_test_weak)
230 |     else:
231 |         X_test = np.array(X_test)
232 |         Y_test = np.array(Y_test)
233 |         Y_test_weak = np.array(Y_test_weak)
234 | 
235 | 
236 |     X_val = np.array(X_val)
237 |     Y_val = np.array(Y_val)
238 |     Y_val_weak = np.array(Y_val_weak)
239 |     Y_val = output_binarization(Y_val, 0.4)
240 | 
241 |     assert (len(X_val) == len(Y_val))
242 |     assert (len(Y_val) == len(Y_val_weak))
243 | 
244 |     houses_weak = [2, 5, 7, 10, 12, 13, 16]
245 |     weak_X_train_balanced, weak_Y_train_balanced, weak_Y_train_weak_balanced = [], [], []
246 |     
247 |     # number of bags can change based on the created dataset
248 |     for k in houses_weak:
249 |         quant = [0, 0, 20000, 0, 0, 20000, 0, 20000, 0, 0, 20000, 0, 15000, 3000, 0, 0, 5000]
250 | 
251 |         for i in range(quant[k]):
252 | 
253 |             print("Aggregate Loading")
254 |             agg = np.load(refit_agg_resample_path + 'house_' + str(k) + '/aggregate_%d.npy' % i)
255 | 
256 |             labels_weak = np.load('../labels/' + 'house_' + str(
257 |                 k) + '/weak_labels_%d.npy' % i, allow_pickle=True)
258 |             labels_strong = np.load('..labels/' + 'house_' + str(
259 |                 k) + '/strong_labels_%d.npy' % i, allow_pickle=True)
260 | 
261 | 
262 |             matrix = np.ones((5, 2550))
263 |             matrix = np.negative(matrix)
264 | 
265 | 
266 |             print("TRAIN")
267 |             matrix = np.transpose(matrix)
268 |             weak_X_train_balanced.append(agg)
269 |             weak_Y_train_balanced.append(matrix)
270 |             weak_Y_train_weak_balanced.append(labels_weak.reshape(1, 5))
271 | 
272 | 
273 |     weak_X_train_balanced = np.array(weak_X_train_balanced)
274 |     weak_Y_train_balanced = np.array(weak_Y_train_balanced)
275 |     weak_Y_train_weak_balanced = np.array(weak_Y_train_weak_balanced)
276 | 
277 |     Y_train_new = []
278 |     X_train_new = []
279 |     Y_train_weak_new = []
280 |     # it is necessary to considered only the strong labeled data so
281 |     for i in range(len(Y_train)):
282 |         if np.all(Y_train[i][0] != -1):
283 |             Y_train_new.append(Y_train[i])
284 |             X_train_new.append(X_train[i])
285 |             Y_train_weak_new.append(Y_train_weak[i])
286 | 
287 |     Y_train_new = np.array(Y_train_new)
288 |     Y_train_weak_new = np.array(Y_train_weak_new)
289 |     X_train_new = np.array(X_train_new)
290 |     print("Y train strong shape", Y_train_new.shape)
291 | 
292 |     X_train = np.concatenate([X_train_new, weak_X_train_balanced], axis=0)
293 |     Y_train = np.concatenate([Y_train_new, weak_Y_train_balanced], axis=0)
294 |     Y_train_weak = np.concatenate([Y_train_weak_new, weak_Y_train_weak_balanced], axis=0)
295 | 
296 |     num_weak = round(len(Y_train_weak) / 100 * perc_weak)
297 |     num_non_weak = round(len(Y_train_weak) / 100 * (100 - perc_weak))
298 |     Y_train_weak = Y_train_weak[:num_weak]
299 |     Y_train = Y_train[:num_weak]
300 |     X_train = X_train[:num_weak]
301 | 
302 |     # this function return the quantity of bags that contain each appliance
303 |     weak_count(Y_train_weak)
304 | 
305 |     print("Total x train", X_train.shape)
306 |     print("Total Y train", Y_train.shape)
307 |     print("Total Y train weak", Y_train_weak.shape)
308 |     assert (len(Y_val) == len(Y_val_weak))
309 |     assert (len(Y_train) == len(Y_train_weak))
310 | 
311 |     x_train = X_train
312 |     y_strong_train = Y_train
313 |     y_weak_train = Y_train_weak
314 | 
315 |     train_mean = np.mean(x_train)
316 |     train_std = np.std(x_train)
317 |     print("STRONG-WEAK")
318 |     print(perc_strong)
319 |     print("Mean train")
320 |     print(train_mean)
321 |     print("Std train")
322 |     print(train_std)
323 | 
324 |     x_train = standardize_data(x_train, train_mean, train_std)
325 |     X_val = standardize_data(X_val, train_mean, train_std)
326 |     X_test = standardize_data(X_test, train_mean, train_std)
327 | 
328 |     batch_size = 64
329 |     window_size = 2550
330 |     drop = 0.1
331 |     kernel = 5
332 |     num_layers = 3
333 |     gru_units = 64
334 |     cs = False
335 |     type_ = 'UKDALE_REFIT_NOISED_' + str(perc_weak) + 'weak_' + str(
336 |         perc_strong) + 'strong_weak_'
337 |     lr = 0.002
338 |     drop_out = drop
339 |     weight = 1e-2
340 |     classes = 5
341 | 
342 |     CRNN = CRNN_construction(window_size, weight, lr=lr, classes=5, drop_out=drop, kernel=kernel, num_layers=num_layers,
343 |                              gru_units=gru_units, cs=cs, strong_weak_flag=True)
344 | 
345 |     if cs:
346 |         early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_strong_level_final_custom_f1_score', mode='max',
347 |                                                       patience=15, restore_best_weights=True)
348 |     else:
349 |         early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_strong_level_custom_f1_score', mode='max',
350 |                                                       patience=15, restore_best_weights=True)
351 | 
352 |     log_dir_ = '../logs_CRNN' + datetime.now().strftime(
353 |         "%Y%m%d-%H%M%S") + type_ + str(weight)
354 |     tensorboard = TensorBoard(log_dir=log_dir_)
355 |     file_writer = tf.summary.create_file_writer(log_dir_ + "/metrics")
356 |     file_writer.set_as_default()
357 | 
358 |     if not test:
359 |         history = CRNN.fit(x=x_train, y=[y_strong_train, y_weak_train], shuffle=True, epochs=1000,
360 |                            batch_size=batch_size,
361 |                            validation_data=(X_val, [Y_val, Y_val_weak]), callbacks=[early_stop, tensorboard], verbose=1)
362 |         CRNN.save_weights(
363 |             '')
364 | 
365 |     else:
366 |         CRNN.load_weights(
367 |             '')
368 | 
369 |     output_strong, output_weak = CRNN.predict(x=X_val)
370 |     output_strong_test_o, output_weak_test = CRNN.predict(x=X_test)
371 |     print(Y_val.shape)
372 |     print(output_strong.shape)
373 | 
374 |     shape = output_strong.shape[0] * output_strong.shape[1]
375 |     shape_test = output_strong_test_o.shape[0] * output_strong_test_o.shape[1]
376 | 
377 |     Y_val = Y_val.reshape(shape, 5)
378 |     Y_test = Y_test.reshape(shape_test, 5)
379 |     output_strong = output_strong.reshape(shape, 5)
380 |     output_strong_test = output_strong_test_o.reshape(shape_test, 5)
381 | 
382 | 
383 |     thres_strong = thres_analysis(Y_val, output_strong, classes)
384 | 
385 |     output_weak_test = output_weak_test.reshape(output_weak_test.shape[0] * output_weak_test.shape[1], 5)
386 |     output_weak = output_weak.reshape(output_weak.shape[0] * output_weak.shape[1], 5)
387 |     print(output_weak)
388 |     thres_weak = [0.501, 0.501, 0.501, 0.501, 0.501]
389 | 
390 |     assert (Y_val.shape == output_strong.shape)
391 | 
392 |     plt.plot(output_strong[:24000, 0])
393 |     plt.plot(Y_val[:24000, 0])
394 |     plt.legend(['output', 'strong labels'])
395 |     plt.show()
396 | 
397 |     plt.plot(output_strong[:24000, 1])
398 |     plt.plot(Y_val[:24000, 1])
399 |     plt.legend(['output', 'strong labels'])
400 |     plt.show()
401 | 
402 |     plt.plot(output_strong[:24000, 2])
403 |     plt.plot(Y_val[:24000, 2])
404 |     plt.legend(['output', 'strong labels'])
405 |     plt.show()
406 | 
407 |     plt.plot(output_strong[:24000, 3])
408 |     plt.plot(Y_val[:24000, 3])
409 |     plt.legend(['output', 'strong labels'])
410 |     plt.show()
411 | 
412 |     plt.plot(output_strong[:24000, 4])
413 |     plt.plot(Y_val[:24000, 4])
414 |     plt.legend(['output', 'strong labels'])
415 |     plt.show()
416 | 
417 |     print("Estimated best thresholds:", thres_strong)
418 | 
419 |     output_strong_test = app_binarization_strong(output_strong_test, thres_strong, 5)
420 |     output_strong = app_binarization_strong(output_strong, thres_strong, 5)
421 | 
422 |     print("STRONG SCORES:")
423 |     print("Validation")
424 |     print(classification_report(Y_val, output_strong))
425 |     print("Test")
426 |     print(classification_report(Y_test, output_strong_test))
427 | 
428 |     if test_ukdale:
429 |         houses = [2]
430 |         synth_agg_path = arguments.ukdale_synth
431 |         X_test_synth = []
432 |         Y_test = []
433 |         for k in houses:
434 | 
435 |             f = open(file_labels_path + 'labels_%d.json' % k)
436 |             labels = json.load(f)
437 |             print("Labels Loaded")
438 |             if k == 1:
439 |                 quantity = quantity_1
440 |             if k == 2:
441 |                 quantity = quantity_2
442 |             if k == 5:
443 |                 quantity = quantity_5
444 |             if k == 3:
445 |                 quantity = quantity_3
446 |             if k == 4:
447 |                 quantity = quantity_4
448 | 
449 |             b = round(quantity / 5)
450 |             a = round(b / 5)
451 | 
452 |             for i in range(quantity):
453 | 
454 |                 agg = np.load(synth_agg_path + houses_id[k] + 'aggregate_%d.npy' % i)
455 | 
456 |                 key = 'labels_%d' % i
457 | 
458 |                 #  STRONG  #
459 |                 list_strong = labels[key]['strong']
460 | 
461 |                 matrix = np.zeros((5, 2550))
462 | 
463 | 
464 |                 for l in range(len(list_strong)):
465 |                     matrix[l] = np.array(list_strong[l])
466 | 
467 |                     matrix = np.transpose(matrix)
468 |                     X_test_synth.append(agg)
469 |                     Y_test.append(matrix)
470 | 
471 |             X_test_synth = np.array(X_test_synth)
472 | 
473 |             ANE = ANE(X_test_synth, output_strong_test_o)
474 |             print("ANE UKDALE:", ANE)
475 | 
476 |     else:
477 |         refit_synth_agg_resample_path = arguments.refit_synth
478 |         houses_re_test = [4, 9, 15]
479 |         quantity_9 = 9000
480 |         quantity_15 = 1500
481 |         quantity_4 = 12000
482 |         X_test = []
483 | 
484 |         for k in houses_re_test:
485 | 
486 |             quant = [0, 0, 0, 0, quantity_4, 0, 0, 0, 0, quantity_9, 0, 0, 0, 0, 0, quantity_15, 0, 0, 0, 0]
487 | 
488 |             for i in range(quant[k]):
489 | 
490 |                 agg = np.load(refit_synth_agg_resample_path + 'house_' + str(k) + '/aggregate_%d.npy' % i)
491 | 
492 | 
493 |                 X_test.append(agg)
494 | 
495 |             X_test_synth = np.array(X_test)
496 |             ANE = ANE(X_test_synth, output_strong_test_o)
497 |             print("ANE REFIT:", ANE)
498 | 
499 | 


--------------------------------------------------------------------------------
/dataset_creation/synth_refit_creation.py:
--------------------------------------------------------------------------------
   1 | import numpy as np
   2 | from os.path import join
   3 | import pandas as pd
   4 | from matplotlib import pyplot as plt
   5 | from nilmtk.nilmtk.dataset import DataSet
   6 | from nilmtk.nilmtk.electric import Electric
   7 | from nilmtk.nilmtk.dataset_converters.refit import convert_refit
   8 | from refit_appliance_info import *
   9 | 
  10 | on_power_threshold = {
  11 |     'washing machine': 25,
  12 |     'washing_machine':25,
  13 |     'dish_washer':25,
  14 |     'dish washer': 25,
  15 |     'microwave': 100,
  16 |     'kettle': 2000,
  17 |     'fridge': 80
  18 | }
  19 | 
  20 | min_off_duration = {
  21 |     'washing machine': 300,
  22 |     'dish washer': 1800,
  23 |     'microwave': 20,
  24 |     'kettle': 20,
  25 |     'fridge': 20
  26 | }
  27 | 
  28 | min_on_duration = {
  29 |     'washing machine': 600,
  30 |     'dish washer': 600,
  31 |     'microwave': 20,
  32 |     'kettle': 20,
  33 |     'fridge': 20
  34 | }
  35 | indices_of_activations = {
  36 |     'washing_machine': [],
  37 |     'kettle': [],
  38 |     'fridge': [],
  39 |     'dish_washer': [],
  40 |     'microwave': []
  41 | }
  42 | 
  43 | app_dict = {'kettle': {'house': [2,3,4,5,6,7,8,9,12,13,19],
  44 |                     'channel':  [8,9,9,8,7,9,9,7,6,9,9]},
  45 |               'microwave': {'house': [4,10,12,17,19],
  46 |                         'channel': [8,8,5,9,8]},
  47 |               'fridge': {'house':  [2,5,9,12,15],
  48 |                          'channel': [1,1,1,1,1]},
  49 |               'washing machine': {'house':  [2,5,7,8,9, 13, 15, 16,    17,    18],
  50 |                                   'channel':[2,3,5,4,3, 3,  5,   4,    5,     2]},
  51 |               'dish washer': {'house':    [2,5,7,9,13],
  52 |                               'channel':  [3,4,6,4,4]}
  53 |               }
  54 | 
  55 | np.random.seed(0)
  56 | 
  57 | #refit_ = ''
  58 | refit_path = "../REFIT.h5"
  59 | #convert_refit.convert_refit(refit_,refit_path, 'HDF' )
  60 | refit = DataSet(refit_path)
  61 | houses = [2,3,4,5,6,7,8,9,10,12,13,15,16,17,18,19]
  62 | 
  63 | number_shift = 50
  64 | number_shift1 = 600
  65 | number_shift2 = 1000
  66 | number_shift4 = 1750
  67 | window_lenght = 2550
  68 | phase_change = 5
  69 | window_length = 2550
  70 | list_kettle = []
  71 | list_micro = []
  72 | list_fridge = []
  73 | list_wash = []
  74 | list_dish = []
  75 | 
  76 | def indexes_annotation(app1,app2,app3,app4,app5, rand_1, rand_2,rand_3, rand_4, rand_5):
  77 | 
  78 | 
  79 |     if app1 == 'kettle':
  80 |         list_kettle.append(rand_1)
  81 |         #indices_of_activations[app1] = rand_1
  82 |     elif app2 == 'kettle':
  83 |         list_kettle.append(rand_2)
  84 |         #indices_of_activations[app2] = rand_2
  85 |     elif app3 == 'kettle':
  86 |         list_kettle.append(rand_3)
  87 |         #indices_of_activations[app3] = rand_3
  88 |     elif app4 == 'kettle':
  89 |         list_kettle.append(rand_4)
  90 |         #indices_of_activations[app4] = rand_4
  91 |     elif app5 == 'kettle':
  92 |         list_kettle.append(rand_5)
  93 |         #indices_of_activations[app5] = rand_5
  94 | 
  95 |     if app1 == 'microwave':
  96 |         list_micro.append(rand_1)
  97 |         #indices_of_activations[app1] = rand_1
  98 |     elif app2 == 'microwave':
  99 |         list_micro.append(rand_2)
 100 |         #indices_of_activations[app2] = rand_2
 101 |     elif app3 == 'microwave':
 102 |         list_micro.append(rand_3)
 103 |         #indices_of_activations[app3] = rand_3
 104 |     elif app4 == 'microwave':
 105 |         list_micro.append(rand_4)
 106 |         #indices_of_activations[app4] = rand_4
 107 |     elif app5 == 'microwave':
 108 |         list_micro.append(rand_5)
 109 |         #indices_of_activations[app5] = rand_5
 110 | 
 111 |     if app1 == 'fridge':
 112 |         list_fridge.append(rand_1)
 113 |         #indices_of_activations[app1] = rand_1
 114 |     elif app2 == 'fridge':
 115 |         list_fridge.append(rand_2)
 116 |         #indices_of_activations[app2] = rand_2
 117 |     elif app3 == 'fridge':
 118 |         list_fridge.append(rand_3)
 119 |         #indices_of_activations[app3] = rand_3
 120 |     elif app4 == 'fridge':
 121 |         list_fridge.append(rand_4)
 122 |         #indices_of_activations[app4] = rand_4
 123 |     elif app5 == 'fridge':
 124 |         list_fridge.append(rand_5)
 125 |         #indices_of_activations[app5] = rand_5
 126 | 
 127 |     if app1 == 'washing_machine':
 128 |         list_wash.append(rand_1)
 129 |         #indices_of_activations[app1] = rand_1
 130 |     elif app2 == 'washing_machine':
 131 |         list_wash.append(rand_2)
 132 |         #indices_of_activations[app2] = rand_2
 133 |     elif app3 == 'washing_machine':
 134 |         list_wash.append(rand_3)
 135 |         #indices_of_activations[app3] = rand_3
 136 |     elif app4 == 'washing_machine':
 137 |         list_wash.append(rand_4)
 138 |         #indices_of_activations[app4] = rand_4
 139 |     elif app5 == 'washing_machine':
 140 |         list_wash.append(rand_5)
 141 |         #indices_of_activations[app5] = rand_5
 142 | 
 143 |     if app1 == 'dish_washer':
 144 |         list_dish.append(rand_1)
 145 |         #indices_of_activations[app1] = rand_1
 146 |     elif app2 == 'dish_washer':
 147 |         list_dish.append(rand_2)
 148 |         #indices_of_activations[app2] = rand_2
 149 |     elif app3 == 'dish_washer':
 150 |         list_dish.append(rand_3)
 151 |         #indices_of_activations[app3] = rand_3
 152 |     elif app4 == 'dish_washer':
 153 |         list_dish.append(rand_4)
 154 |         #indices_of_activations[app4] = rand_4
 155 |     elif app5 == 'dish_washer':
 156 |         list_dish.append(rand_5)
 157 |         #indices_of_activations[app5] = rand_5
 158 | 
 159 | 
 160 | def sample_counter(app1,app2,app3,app4,app5,sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d, act1,act2,act3,act4,act5):
 161 | 
 162 | 
 163 |     if app1 == 'kettle':
 164 |         sample_count_k += len(act1)
 165 |         #indices_of_activations[app1] = rand_1
 166 |     if app2 == 'kettle':
 167 |         sample_count_k += len(act2)
 168 |         #indices_of_activations[app2] = rand_2
 169 |     if app3 == 'kettle':
 170 |         sample_count_k += len(act3)
 171 |         #indices_of_activations[app3] = rand_3
 172 |     if app4 == 'kettle':
 173 |         sample_count_k += len(act4)
 174 |         #indices_of_activations[app4] = rand_4
 175 |     if app5 == 'kettle':
 176 |         sample_count_k += len(act5)
 177 |         #indices_of_activations[app5] = rand_5
 178 | 
 179 |     if app1 == 'microwave':
 180 |         sample_count_m += len(act1)
 181 |         #indices_of_activations[app1] = rand_1
 182 |     if app2 == 'microwave':
 183 |         sample_count_m += len(act2)
 184 |         #indices_of_activations[app2] = rand_2
 185 |     if app3 == 'microwave':
 186 |         sample_count_m += len(act3)
 187 |         #indices_of_activations[app3] = rand_3
 188 |     if app4 == 'microwave':
 189 |         sample_count_m += len(act4)
 190 |         #indices_of_activations[app4] = rand_4
 191 |     if app5 == 'microwave':
 192 |         sample_count_m += len(act5)
 193 |         #indices_of_activations[app5] = rand_5
 194 | 
 195 |     if app1 == 'fridge':
 196 |         sample_count_f += len(act1)
 197 |         #indices_of_activations[app1] = rand_1
 198 |     if app2 == 'fridge':
 199 |         sample_count_f += len(act2)
 200 |         #indices_of_activations[app2] = rand_2
 201 |     if app3 == 'fridge':
 202 |         sample_count_f += len(act3)
 203 |         #indices_of_activations[app3] = rand_3
 204 |     if app4 == 'fridge':
 205 |         sample_count_f += len(act4)
 206 |         #indices_of_activations[app4] = rand_4
 207 |     if app5 == 'fridge':
 208 |         sample_count_f += len(act5)
 209 |         #indices_of_activations[app5] = rand_5
 210 | 
 211 |     if app1 == 'washing_machine':
 212 |         sample_count_w += len(act1)
 213 |         #indices_of_activations[app1] = rand_1
 214 |     if app2 == 'washing_machine':
 215 |         sample_count_w += len(act2)
 216 |         #indices_of_activations[app2] = rand_2
 217 |     if app3 == 'washing_machine':
 218 |         sample_count_w += len(act3)
 219 |         #indices_of_activations[app3] = rand_3
 220 |     if app4 == 'washing_machine':
 221 |         sample_count_w += len(act4)
 222 |         #indices_of_activations[app4] = rand_4
 223 |     if app5 == 'washing_machine':
 224 |         sample_count_w += len(act5)
 225 |         #indices_of_activations[app5] = rand_5
 226 | 
 227 |     if app1 == 'dish_washer':
 228 |         sample_count_d += len(act1)
 229 |         #indices_of_activations[app1] = rand_1
 230 |     elif app2 == 'dish_washer':
 231 |         sample_count_d += len(act2)
 232 |         #indices_of_activations[app2] = rand_2
 233 |     if app3 == 'dish_washer':
 234 |         sample_count_d += len(act3)
 235 |         #indices_of_activations[app3] = rand_3
 236 |     if app4 == 'dish_washer':
 237 |         sample_count_d += len(act4)
 238 |         #indices_of_activations[app4] = rand_4
 239 |     if app5 == 'dish_washer':
 240 |         sample_count_d += len(act5)
 241 |         #indices_of_activations[app5] = rand_5
 242 | 
 243 |     return sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d
 244 | 
 245 | 
 246 | def repetition_counter(appliances):
 247 | 
 248 |     indexes_k = []
 249 |     indexes_m = []
 250 |     indexes_f = []
 251 |     indexes_w = []
 252 |     indexes_d = []
 253 |     list_indexes = [indexes_k, indexes_m, indexes_f, indexes_w, indexes_d]
 254 |     repetition = 0
 255 |     for i in range(len(appliances)):
 256 |         indexes = list_indexes[i]
 257 |         if appliances[i] != '':
 258 |             for idx in indices_of_activations[appliances[i]]:
 259 |                 repetition = indices_of_activations[appliances[i]].count(idx)
 260 |                 indexes.append(repetition)
 261 | 
 262 |     return indexes_k, indexes_m, indexes_f, indexes_w, indexes_d
 263 | 
 264 | def padd_shift(rand_shift,app):
 265 | 
 266 |     padd = pd.Series(np.zeros(rand_shift))
 267 |     app_series = app['power']['active']
 268 |     app_series = app_series.append(padd, ignore_index=True).shift(periods=rand_shift, fill_value=0)
 269 |     return app_series
 270 | 
 271 | 
 272 | def strong_labels_creation(appliance1,app1,appliance2, app2,appliance3, app3, appliance4, app4, appliance5, app5, flag = 0):
 273 | 
 274 |     list_vectors_strong = []
 275 |     vector_strong_1 = pd.Series()
 276 |     vector_strong_2 = pd.Series()
 277 |     vector_strong_3 = pd.Series()
 278 |     vector_strong_4 = pd.Series()
 279 |     vector_strong_5 = pd.Series()
 280 | 
 281 |     if flag == 4:
 282 |             vector_strong_1 = pd.Series()
 283 | 
 284 |             list_1 = []
 285 |             if appliance1 == 'kettle':
 286 |                 for l in range(len(app1)):
 287 |                     if app1[l] < on_power_threshold['kettle']:
 288 |                         list_1.append(0)
 289 |                     else:
 290 |                         list_1.append(1)
 291 |                 print("Appliance 1 vector strong Kettle")
 292 |                 vector_strong_1_a = pd.Series(list_1)
 293 |             else:
 294 |                 vector_strong_1_a = pd.Series(np.zeros(window_lenght))
 295 | 
 296 |             list_1 = []
 297 |             if appliance2 == 'kettle':
 298 |                 for l in range(len(app2)):
 299 |                     if app2[l] < on_power_threshold['kettle']:
 300 |                         list_1.append(0)
 301 |                     else:
 302 |                         list_1.append(1)
 303 | 
 304 |                 print("Appliance 2 vector strong Kettle")
 305 |                 vector_strong_1_b = pd.Series(list_1)
 306 |             else:
 307 |                 vector_strong_1_b = pd.Series(np.zeros(window_lenght))
 308 | 
 309 |             list_1 = []
 310 |             if appliance3 == 'kettle':
 311 |                 for l in range(len(app3)):
 312 |                     if app3[l] < on_power_threshold['kettle']:
 313 |                         list_1.append(0)
 314 |                     else:
 315 |                         list_1.append(1)
 316 | 
 317 |                 print("Appliance 3 vector strong Kettle")
 318 |                 vector_strong_1_c = pd.Series(list_1)
 319 |             else:
 320 |                 vector_strong_1_c = pd.Series(np.zeros(window_lenght))
 321 | 
 322 |             list_1 = []
 323 |             if appliance4 == 'kettle':
 324 |                 for l in range(len(app4)):
 325 |                     if app4[l] < on_power_threshold['kettle']:
 326 |                         list_1.append(0)
 327 |                     else:
 328 |                         list_1.append(1)
 329 | 
 330 |                 print("Appliance 4 vector strong Kettle")
 331 |                 vector_strong_1_d = pd.Series(list_1)
 332 |             else:
 333 |                 vector_strong_1_d = pd.Series(np.zeros(window_lenght))
 334 | 
 335 |             list_1 = []
 336 |             if appliance5 == 'kettle':
 337 |                 for l in range(len(app5)):
 338 |                     if app5[l] < on_power_threshold['kettle']:
 339 |                         list_1.append(0)
 340 |                     else:
 341 |                         list_1.append(1)
 342 | 
 343 |                 print("Appliance 5 vector strong Kettle")
 344 |                 vector_strong_1_e = pd.Series(list_1)
 345 |             else:
 346 |                 vector_strong_1_e = pd.Series(np.zeros(window_lenght))
 347 | 
 348 | 
 349 |             vector_strong_1 = vector_strong_1_a.add(vector_strong_1_b)
 350 |             vector_strong_1 = vector_strong_1.add(vector_strong_1_c)
 351 |             vector_strong_1 = vector_strong_1.add(vector_strong_1_d)
 352 |             vector_strong_1 = vector_strong_1.add(vector_strong_1_e)
 353 | 
 354 |             list_2 = []
 355 |             vector_strong_2 = pd.Series()
 356 |             if appliance1 == 'microwave':
 357 |                 for l in range(len(app1)):
 358 |                     if app1[l] < on_power_threshold['microwave']:
 359 |                         list_2.append(0)
 360 |                     else:
 361 |                         list_2.append(1)
 362 |                 vector_strong_2_a = pd.Series(list_2)
 363 |             else:
 364 |                 vector_strong_2_a = pd.Series(np.zeros(window_lenght))
 365 | 
 366 |             list_2 = []
 367 |             if appliance2 == 'microwave':
 368 |                 for l in range(len(app2)):
 369 |                     if app2[l] < on_power_threshold['microwave']:
 370 |                         list_2.append(0)
 371 |                     else:
 372 |                         list_2.append(1)
 373 |                 vector_strong_2_b = pd.Series(list_2)
 374 |             else:
 375 |                 vector_strong_2_b = pd.Series(np.zeros(window_lenght))
 376 | 
 377 |             list_2 = []
 378 |             if appliance3 == 'microwave':
 379 |                 for l in range(len(app3)):
 380 |                     if app3[l] < on_power_threshold['microwave']:
 381 |                         list_2.append(0)
 382 |                     else:
 383 |                         list_2.append(1)
 384 | 
 385 |                 print("Appliance 3 vector strong microwave")
 386 |                 vector_strong_2_c = pd.Series(list_2)
 387 |             else:
 388 |                 vector_strong_2_c = pd.Series(np.zeros(window_lenght))
 389 | 
 390 |             list_2 = []
 391 |             if appliance4 == 'microwave':
 392 |                 for l in range(len(app4)):
 393 |                     if app4[l] < on_power_threshold['microwave']:
 394 |                         list_2.append(0)
 395 |                     else:
 396 |                         list_2.append(1)
 397 | 
 398 |                 print("Appliance 4 vector strong microwave")
 399 |                 vector_strong_2_d = pd.Series(list_2)
 400 |             else:
 401 |                 vector_strong_2_d = pd.Series(np.zeros(window_lenght))
 402 | 
 403 |             list_2 = []
 404 |             if appliance5 == 'microwave':
 405 |                 for l in range(len(app5)):
 406 |                     if app5[l] < on_power_threshold['microwave']:
 407 |                         list_2.append(0)
 408 |                     else:
 409 |                         list_2.append(1)
 410 | 
 411 |                 print("Appliance 5 vector strong microwave")
 412 |                 vector_strong_2_e = pd.Series(list_2)
 413 |             else:
 414 |                 vector_strong_2_e = pd.Series(np.zeros(window_lenght))
 415 | 
 416 |             vector_strong_2 = vector_strong_2_a.add(vector_strong_2_b)
 417 |             vector_strong_2 = vector_strong_2.add(vector_strong_2_c)
 418 |             vector_strong_2 = vector_strong_2.add(vector_strong_2_d)
 419 |             vector_strong_2 = vector_strong_2.add(vector_strong_2_e)
 420 | 
 421 | 
 422 |             list_3 = []
 423 |             vector_strong_3 = pd.Series()
 424 |             if appliance1 == 'fridge':
 425 |                 for l in range(len(app1)):
 426 |                     if app1[l] < on_power_threshold['fridge']:
 427 |                         list_3.append(0)
 428 |                     else:
 429 |                         list_3.append(1)
 430 |                 vector_strong_3_a = pd.Series(list_3)
 431 |             else:
 432 |                 vector_strong_3_a = pd.Series(np.zeros(window_lenght))
 433 | 
 434 |             list_3 = []
 435 |             if appliance2 == 'fridge':
 436 |                 for l in range(len(app2)):
 437 |                     if app2[l] < on_power_threshold['fridge']:
 438 |                         list_3.append(0)
 439 |                     else:
 440 |                         list_3.append(1)
 441 |                 vector_strong_3_b = pd.Series(list_3)
 442 |             else:
 443 |                 vector_strong_3_b = pd.Series(np.zeros(window_lenght))
 444 | 
 445 |             list_3 = []
 446 |             if appliance3 == 'fridge':
 447 |                 for l in range(len(app3)):
 448 |                     if app3[l] < on_power_threshold['fridge']:
 449 |                         list_3.append(0)
 450 |                     else:
 451 |                         list_3.append(1)
 452 | 
 453 |                 print("Appliance 3 vector strong microwave")
 454 |                 vector_strong_3_c = pd.Series(list_3)
 455 |             else:
 456 |                 vector_strong_3_c = pd.Series(np.zeros(window_lenght))
 457 | 
 458 |             list_3 = []
 459 |             if appliance4 == 'fridge':
 460 |                 for l in range(len(app4)):
 461 |                     if app4[l] < on_power_threshold['fridge']:
 462 |                         list_3.append(0)
 463 |                     else:
 464 |                         list_3.append(1)
 465 | 
 466 |                 print("Appliance 4 vector strong microwave")
 467 |                 vector_strong_3_d = pd.Series(list_3)
 468 |             else:
 469 |                 vector_strong_3_d = pd.Series(np.zeros(window_lenght))
 470 | 
 471 |             list_3 = []
 472 |             if appliance5 == 'fridge':
 473 |                 for l in range(len(app5)):
 474 |                     if app5[l] < on_power_threshold['fridge']:
 475 |                         list_3.append(0)
 476 |                     else:
 477 |                         list_3.append(1)
 478 | 
 479 |                 print("Appliance 5 vector strong microwave")
 480 |                 vector_strong_3_e = pd.Series(list_3)
 481 |             else:
 482 |                 vector_strong_3_e = pd.Series(np.zeros(window_lenght))
 483 | 
 484 |             vector_strong_3 = vector_strong_3_a.add(vector_strong_3_b)
 485 |             vector_strong_3 = vector_strong_3.add(vector_strong_3_c)
 486 |             vector_strong_3 = vector_strong_3.add(vector_strong_3_d)
 487 |             vector_strong_3 = vector_strong_3.add(vector_strong_3_e)
 488 | 
 489 | 
 490 | 
 491 | 
 492 |             list_4 = []
 493 |             vector_strong_4 = pd.Series()
 494 |             if appliance1 == 'washing_machine':
 495 |                 for l in range(len(app1)):
 496 |                     if app1[l] < on_power_threshold['washing_machine']:
 497 |                         list_4.append(0)
 498 |                     else:
 499 |                         list_4.append(1)
 500 |                 vector_strong_4 = pd.Series(list_4)
 501 |             elif appliance2 == 'washing_machine':
 502 |                 for l in range(len(app2)):
 503 |                     if app2[l] < on_power_threshold['washing_machine']:
 504 |                         list_4.append(0)
 505 |                     else:
 506 |                         list_4.append(1)
 507 |                 vector_strong_4 = pd.Series(list_4)
 508 | 
 509 | 
 510 |             elif appliance3 == 'washing_machine':
 511 |                 for l in range(len(app3)):
 512 |                     if app3[l] < on_power_threshold['washing_machine']:
 513 |                         list_4.append(0)
 514 |                     else:
 515 |                         list_4.append(1)
 516 | 
 517 |                 print("Appliance 3 vector strong washing machine")
 518 |                 vector_strong_4 = pd.Series(list_4)
 519 | 
 520 |             elif appliance4 == 'washing_machine':
 521 |                 for l in range(len(app4)):
 522 |                     if app4[l] < on_power_threshold['washing_machine']:
 523 |                         list_4.append(0)
 524 |                     else:
 525 |                         list_4.append(1)
 526 | 
 527 |                 print("Appliance 4 vector strong washing machine")
 528 |                 vector_strong_4 = pd.Series(list_4)
 529 |             elif appliance5 == 'washing_machine':
 530 |                 for l in range(len(app5)):
 531 |                     if app5[l] < on_power_threshold['washing_machine']:
 532 |                         list_4.append(0)
 533 |                     else:
 534 |                         list_4.append(1)
 535 | 
 536 |                 print("Appliance 5 vector strong washing machine")
 537 |                 vector_strong_4 = pd.Series(list_4)
 538 |             else:
 539 |                 vector_strong_4 = pd.Series(np.zeros(window_lenght))
 540 | 
 541 |             list_5 = []
 542 |             vector_strong_5 = pd.Series()
 543 |             if appliance1 == 'dish_washer':
 544 |                 for l in range(len(app1)):
 545 |                     if app1[l] < on_power_threshold['dish_washer']:
 546 |                         list_5.append(0)
 547 |                     else:
 548 |                         list_5.append(1)
 549 |                 vector_strong_5 = pd.Series(list_5)
 550 | 
 551 |             elif appliance2 == 'dish_washer':
 552 |                 for l in range(len(app2)):
 553 |                     if app2[l] < on_power_threshold['dish_washer']:
 554 |                         list_5.append(0)
 555 |                     else:
 556 |                         list_5.append(1)
 557 |                 vector_strong_5 = pd.Series(list_5)
 558 | 
 559 |             elif appliance3 == 'dish_washer':
 560 |                 for l in range(len(app3)):
 561 |                     if app3[l] < on_power_threshold['dish_washer']:
 562 |                         list_5.append(0)
 563 |                     else:
 564 |                         list_5.append(1)
 565 | 
 566 |                 print("Appliance 3 vector strong dish washer")
 567 |                 vector_strong_5 = pd.Series(list_5)
 568 |             elif appliance4 == 'dish_washer':
 569 |                 for l in range(len(app4)):
 570 |                     if app4[l] < on_power_threshold['dish_washer']:
 571 |                         list_5.append(0)
 572 |                     else:
 573 |                         list_5.append(1)
 574 | 
 575 |                 print("Appliance 4 vector strong dish washer")
 576 |                 vector_strong_5 = pd.Series(list_5)
 577 |             elif appliance5 == 'dish_washer':
 578 |                 for l in range(len(app5)):
 579 |                     if app5[l] < on_power_threshold['dish_washer']:
 580 |                         list_5.append(0)
 581 |                     else:
 582 |                         list_5.append(1)
 583 | 
 584 |                 print("Appliance 5 vector strong dish washer")
 585 |                 vector_strong_5 = pd.Series(list_5)
 586 |             else:
 587 |                 vector_strong_5 = pd.Series(np.zeros(window_lenght))
 588 | 
 589 | 
 590 | 
 591 | 
 592 |     list_vectors_strong.append(vector_strong_1)
 593 |     list_vectors_strong.append(vector_strong_2)
 594 |     list_vectors_strong.append(vector_strong_3)
 595 |     list_vectors_strong.append(vector_strong_4)
 596 |     list_vectors_strong.append(vector_strong_5)
 597 | 
 598 |     return list_vectors_strong
 599 | 
 600 | 
 601 | def activation_appliances_nilmtk(appliances, buildings):
 602 |     for appliance in appliances:
 603 |         for i in range(len(app_dict[appliance]['house'])):
 604 | 
 605 |             elec = refit.buildings[app_dict[appliance]['house'][i]].elec
 606 |             #lista = elec.appliances
 607 |             #app = lista[1]
 608 |             #curr_appliance = elec[appliance] elec.meters[8]
 609 | 
 610 |             activation_ = Electric.get_activations(elec.meters[app_dict[appliance]['channel'][i]], min_off_duration=min_off_duration[appliance],min_on_duration=min_on_duration[appliance],on_power_threshold=on_power_threshold[appliance])
 611 | 
 612 | 
 613 |             plt.plot(activation_[0])
 614 |             # plt.title(appliance + str(app_dict[appliance]['house'][i]))
 615 |             plt.show()
 616 |             # print("len activation")
 617 |             # print(len(activation_[0]))
 618 | 
 619 |             # df = next(elec.meters[app_dict[appliance]['channel'][i]].load(ac_type='active')) #, sample_period=8))
 620 |             # df.head()
 621 |             print("Casa:", app_dict[appliance]['house'][i])
 622 |             print(appliance)
 623 |             print("N° di attivazioni:",len(activation_))
 624 |             if appliance == 'dish washer':   #app_dict[appliance]['house'][i]
 625 |                 np.save("/mnt/sda1/home/gtanoni/codice_prova/dish_washeractivations_" + str(app_dict[appliance]['house'][i]) + ".npy", activation_)
 626 |             elif appliance == 'washing machine':
 627 |                 np.save("/mnt/sda1/home/gtanoni/codice_prova/washing_machineactivations_" + str(app_dict[appliance]['house'][i]) + ".npy", activation_)
 628 |             else:
 629 |                 np.save("/mnt/sda1/home/gtanoni/codice_prova/" + appliance + "activations_" + str(app_dict[appliance]['house'][i]) + ".npy", activation_)
 630 | 
 631 | 
 632 | def data_iteration_seq3(activations_, build, appliances, num_of_bags):
 633 | 
 634 |     final_weak = []
 635 |     final_strong = []
 636 |     aggregate_ = []
 637 |     sample_count_k = 0
 638 |     sample_count_m = 0
 639 |     sample_count_f = 0
 640 |     sample_count_w = 0
 641 |     sample_count_d = 0
 642 | 
 643 | 
 644 |     for a in range(num_of_bags):
 645 | 
 646 |             indx = []
 647 |             for app in range(len(appliances)):
 648 |                 if appliances[app] == '':
 649 |                     continue
 650 |                 else:
 651 |                     indx.append(app)
 652 | 
 653 |             mu, sigma = 0, 1
 654 |             s = np.random.normal(mu, sigma, window_lenght)
 655 |             s = pd.Series(s)
 656 |             s_app = pd.Series(np.zeros(window_lenght))
 657 | 
 658 |             i = indx[0]
 659 |             print("Appliance 1")
 660 |             print(appliances[i])
 661 | 
 662 | 
 663 |             rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
 664 |             app1 = activations_[appliances[i]][rand_1[0]]
 665 |             app1 = app1.reset_index()
 666 |             app1 = app1.drop(["Unix"], axis=1)
 667 |             #app1_series = app1['power']['active']
 668 |             rand_shift = np.random.randint(number_shift, size=1)
 669 |             app1_series = padd_shift(rand_shift[0], app1)
 670 |             app1_series = app1_series.add(s_app, fill_value=0)
 671 | 
 672 | 
 673 |             rand_app2 = indx[1]
 674 | 
 675 | 
 676 |             rand_app = [indx[2]]
 677 |             print("Appliance 2")
 678 |             print(appliances[rand_app[0]])
 679 | 
 680 |             print("Appliance 3")
 681 |             print(appliances[rand_app2])
 682 | 
 683 |             rand_2 = np.random.randint(len(activations_[appliances[rand_app[0]]]), size=1)
 684 |             app2 = activations_[appliances[rand_app[0]]][rand_2[0]]
 685 |             app2 = app2.reset_index()
 686 |             app2 = app2.drop(["Unix"], axis=1)
 687 | 
 688 |             rand_shift = np.random.randint(number_shift1,number_shift2, size=1)
 689 |             app2_series = padd_shift(rand_shift[0], app2)
 690 |             app2_series = app2_series.add(s_app, fill_value=0)
 691 | 
 692 |             rand_3 = np.random.randint(len(activations_[appliances[rand_app2]]), size=1)
 693 |             app3 = activations_[appliances[rand_app2]][rand_3[0]]
 694 |             app3 = app3.reset_index()
 695 |             app3 = app3.drop(["Unix"], axis=1)
 696 | 
 697 |             sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(appliances[i], appliances[rand_app[0]], appliances[rand_app2], 0, 0, sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], app2['power']['active'], app3['power']['active'],0,0)
 698 | 
 699 |             rand_shift = np.random.randint(1200,number_shift4, size=1)
 700 |             app3_series = padd_shift(rand_shift[0], app3)
 701 |             app3_series = app3_series.add(s_app, fill_value=0)
 702 | 
 703 | 
 704 |             vector = app2_series.add(app1_series, fill_value=0)
 705 |             vector = vector.add(app3_series, fill_value=0)
 706 |             aggregate = vector.add(s, fill_value=0)
 707 |             aggregate = aggregate.to_numpy()
 708 |             aggregate[aggregate < 0] = 0
 709 |             aggregate = pd.Series(aggregate)
 710 |             aggregate_.append(aggregate)
 711 | 
 712 | 
 713 |             list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series,
 714 |                                                              appliance2=appliances[rand_app[0]], app2=app2_series,
 715 |                                                              appliance3=appliances[rand_app2], app3=app3_series, appliance4=0,
 716 |                                                              app4=0, appliance5=0, app5=0, flag=4)
 717 |             final_strong.append(list_vectors_strong)
 718 |             vector_weak = [0, 0, 0, 0, 0]
 719 |             vector_weak[i] = 1
 720 |             vector_weak[rand_app[0]] = 1
 721 |             vector_weak[rand_app2] = 1
 722 |             print(vector_weak)
 723 |             final_weak.append(vector_weak)
 724 | 
 725 |             indexes_annotation(appliances[i], appliances[rand_app[0]], appliances[rand_app2], 0, 0, rand_1[0], rand_2[0], rand_3[0], 0, 0)
 726 | 
 727 | 
 728 |             print("Sequences with 3 appliances:")
 729 |             print(a)
 730 | 
 731 |     indices_of_activations['kettle'] = list_kettle
 732 |     indices_of_activations['microwave'] = list_micro
 733 |     indices_of_activations['fridge'] = list_fridge
 734 |     indices_of_activations['washing_machine'] = list_wash
 735 |     indices_of_activations['dish_washer'] = list_dish
 736 | 
 737 |     repetitions_k, repetitions_m, repetitions_f, repetitions_w, repetitions_d = repetition_counter(appliances)
 738 | 
 739 | 
 740 |     print("Total samples kettle:", sample_count_k)
 741 |     print("Total samples micro:", sample_count_m)
 742 |     print("Total samples fridge:", sample_count_f)
 743 |     print("Total samples washing:", sample_count_w)
 744 |     print("Total samples dish:", sample_count_d)
 745 |     #
 746 |     with open('/mnt/sda1/home/gtanoni/codice_prova/phase_repetition_' + str(build) +'_.txt', 'a+') as file:
 747 |          print("Repetitions Kettle:", file=file)
 748 |          print(repetitions_k, file=file)
 749 |          print("Repetitions Micro:", file=file)
 750 |          print(repetitions_m, file=file)
 751 |          print("Repetitions Fridge:", file=file)
 752 |          print(repetitions_f, file=file)
 753 |          print("Repetitions Wash:", file=file)
 754 |          print(repetitions_w, file=file)
 755 |          print("Repetitions Dish:", file=file)
 756 |          print(repetitions_d, file=file)
 757 | 
 758 |     return aggregate_, final_strong, final_weak
 759 | 
 760 | def data_iteration_seq4(activations_, build, appliances, num_of_bags):
 761 |     final_weak = []
 762 |     final_strong = []
 763 |     aggregate_ = []
 764 |     sample_count_k = 0
 765 |     sample_count_m = 0
 766 |     sample_count_f = 0
 767 |     sample_count_w = 0
 768 |     sample_count_d = 0
 769 | 
 770 |     for a in range(num_of_bags):
 771 | 
 772 | 
 773 |         mu, sigma = 0, 1
 774 |         s = np.random.normal(mu, sigma, window_lenght)
 775 |         s = pd.Series(s)
 776 |         s_app = pd.Series(np.zeros(window_lenght))
 777 | 
 778 |         indx = []
 779 |         for app in range(len(appliances)):
 780 |             if appliances[app] == '':
 781 |                 continue
 782 |             else:
 783 |                 indx.append(app)
 784 | 
 785 |         i = indx[0]
 786 |         print("Appliance 1")
 787 |         print(appliances[i])
 788 |         rand_app = [indx[2]]
 789 | 
 790 | 
 791 |         rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
 792 |         app1 = activations_[appliances[i]][rand_1[0]]
 793 |         app1 = app1.reset_index()
 794 |         app1 = app1.drop(["Unix"], axis=1)
 795 |         app1_series = app1['power']['active']
 796 |         app1_series = app1_series.add(s_app, fill_value=0)
 797 |         rand_app2 = indx[1]
 798 |         print(appliances[rand_app2])
 799 | 
 800 |         print("Appliance 2")
 801 |         print(appliances[rand_app[0]])
 802 |         print("Appliance 3")
 803 |         rand_app3 = indx[3]
 804 |         print("Appliance 4")
 805 |         print(appliances[rand_app3])
 806 | 
 807 |         rand_2 = np.random.randint(len(activations_[appliances[rand_app[0]]]), size=1)
 808 |         app2 = activations_[appliances[rand_app[0]]][rand_2[0]]
 809 |         app2 = app2.reset_index()
 810 |         app2 = app2.drop(["Unix"], axis=1)
 811 |         rand_shift = np.random.randint(0,number_shift, size=1)
 812 |         app2_series = padd_shift(rand_shift[0], app2)
 813 |         app2_series = app2_series.add(s_app, fill_value=0)
 814 | 
 815 |         rand_3 = np.random.randint(len(activations_[appliances[rand_app2]]), size=1)
 816 |         app3 = activations_[appliances[rand_app2]][rand_3[0]]
 817 |         app3 = app3.reset_index()
 818 |         app3 = app3.drop(["Unix"], axis=1)
 819 |         rand_shift = np.random.randint(number_shift1,number_shift2, size=1)
 820 |         app3_series = padd_shift(rand_shift[0], app3)
 821 |         app3_series = app3_series.add(s_app, fill_value=0)
 822 | 
 823 |         rand_4 = np.random.randint(len(activations_[appliances[rand_app3]]), size=1)
 824 |         app4 = activations_[appliances[rand_app3]][rand_4[0]]
 825 |         app4 = app4.reset_index()
 826 |         app4 = app4.drop(["Unix"], axis=1)
 827 |         rand_shift = np.random.randint(number_shift4,2000, size=1)
 828 |         app4_series = padd_shift(rand_shift[0], app4)
 829 |         app4_series = app4_series.add(s_app, fill_value=0)
 830 | 
 831 |         vector = app2_series.add(app1['power']['active'], fill_value=0)
 832 |         vector = vector.add(app3_series, fill_value=0)
 833 |         vector = vector.add(app4_series, fill_value=0)
 834 |         aggregate = vector.add(s, fill_value=0)
 835 |         aggregate = aggregate.to_numpy()
 836 |         aggregate[aggregate<0] = 0
 837 |         aggregate = pd.Series(aggregate)
 838 |         aggregate_.append(aggregate)
 839 | 
 840 |         sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(
 841 |                     appliances[i], appliances[rand_app[0]], appliances[rand_app2], appliances[rand_app3], 0, sample_count_k, sample_count_m,
 842 |                     sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], app2['power']['active'],
 843 |                     app3['power']['active'], app4['power']['active'], 0)
 844 | 
 845 | 
 846 | 
 847 |         list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series,
 848 |                                                              appliance2=appliances[rand_app[0]], app2=app2_series,
 849 |                                                              appliance3=appliances[rand_app2], app3=app3_series,
 850 |                                                              appliance4=appliances[rand_app3], app4=app4_series, appliance5=0,
 851 |                                                              app5=0, flag=4)
 852 |         final_strong.append(list_vectors_strong)
 853 |         vector_weak = [0, 0, 0, 0, 0]
 854 |         vector_weak[i] = 1
 855 |         vector_weak[rand_app[0]] = 1
 856 |         vector_weak[rand_app2] = 1
 857 |         vector_weak[rand_app3] = 1
 858 |         print(vector_weak)
 859 |         final_weak.append(vector_weak)
 860 |         indexes_annotation(appliances[i], appliances[rand_app[0]], appliances[rand_app2], appliances[rand_app3], 0, rand_1[0],
 861 |                                    rand_2[0], rand_3[0], rand_4[0], 0)
 862 | 
 863 |     indices_of_activations['kettle'] = list_kettle
 864 |     indices_of_activations['microwave'] = list_micro
 865 |     indices_of_activations['fridge'] = list_fridge
 866 |     indices_of_activations['washing_machine'] = list_wash
 867 |     indices_of_activations['dish_washer'] = list_dish
 868 | 
 869 |     repetitions_k, repetitions_m, repetitions_f, repetitions_w, repetitions_d = repetition_counter(appliances)
 870 |     print("repetitions counted!")
 871 |     print("Total samples kettle:", sample_count_k)
 872 |     print("Total samples micro:", sample_count_m)
 873 |     print("Total samples fridge:", sample_count_f)
 874 |     print("Total samples washing:", sample_count_w)
 875 |     print("Total samples dish:", sample_count_d)
 876 |     #
 877 |     with open('/mnt/sda1/home/gtanoni/codice_prova/phase_repetition_' + str(build) + '_.txt',
 878 |               'a+') as file:
 879 |         print("Repetitions Kettle:", file=file)
 880 |         print(repetitions_k, file=file)
 881 |         print("Repetitions Micro:", file=file)
 882 |         print(repetitions_m, file=file)
 883 |         print("Repetitions Fridge:", file=file)
 884 |         print(repetitions_f, file=file)
 885 |         print("Repetitions Wash:", file=file)
 886 |         print(repetitions_w, file=file)
 887 |         print("Repetitions Dish:", file=file)
 888 |         print(repetitions_d, file=file)
 889 | 
 890 |     return aggregate_, final_strong, final_weak
 891 | 
 892 | def data_iteration_seq2(activations_, build, appliances, num_of_bags):
 893 | 
 894 |     final_weak = []
 895 |     final_strong = []
 896 |     aggregate_ = []
 897 |     sample_count_k = 0
 898 |     sample_count_m = 0
 899 |     sample_count_f = 0
 900 |     sample_count_w = 0
 901 |     sample_count_d = 0
 902 | 
 903 |     for a in range(num_of_bags):
 904 | 
 905 |         indx = []
 906 |         for app in range(len(appliances)):
 907 |             if appliances[app] == '':
 908 |                 continue
 909 |             else:
 910 |                 indx.append(app)
 911 |         list_indices_app1 = []
 912 |         list_indices_app2 = []
 913 | 
 914 | 
 915 |         mu, sigma = 0, 1
 916 |         s = np.random.normal(mu, sigma,window_lenght)
 917 |         s = pd.Series(s)
 918 |         s_app = pd.Series(np.zeros(window_lenght))
 919 | 
 920 |         i = indx[0]
 921 |         print("Appliance 1")
 922 |         print(appliances[i])
 923 | 
 924 |         rand_app = [indx[1]]
 925 | 
 926 |         print("Appliance 2")
 927 |         print(appliances[rand_app[0]])
 928 | 
 929 |         rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
 930 |         list_indices_app1.append(rand_1)
 931 |         app1 = activations_[appliances[i]][rand_1[0]]
 932 |         app1 = app1.reset_index()
 933 |         app1 = app1.drop(["Unix"], axis=1)
 934 |         app1_series = app1['power']['active']
 935 | 
 936 |         rand_2 = np.random.randint(len(activations_[appliances[rand_app[0]]]), size=1)
 937 |         list_indices_app2.append(rand_2)
 938 |         app2 = activations_[appliances[rand_app[0]]][rand_2[0]]
 939 |         app2 = app2.reset_index()
 940 |         app2 = app2.drop(["Unix"], axis=1)
 941 | 
 942 |         sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(appliances[i], appliances[rand_app[0]], 0, 0, 0, sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d, app1_series, app2['power']['active'], 0,0,0)
 943 | 
 944 | 
 945 |         app1_series = app1_series.add(s_app, fill_value=0)
 946 | 
 947 |         rand_shift = np.random.randint(number_shift, size=1)
 948 |         app2_series = padd_shift(rand_shift[0], app2)
 949 |         app2_series = app2_series.add(s_app, fill_value=0)
 950 | 
 951 |         vector = app2_series.add(app1_series, fill_value=0)
 952 |         aggregate = vector.add(s, fill_value=0)
 953 |         aggregate = aggregate.to_numpy()
 954 |         aggregate[aggregate < 0] = 0
 955 |         aggregate = pd.Series(aggregate)
 956 |         aggregate_.append(aggregate)
 957 | 
 958 |             #plt.plot(aggregate)
 959 |             #plt.show()
 960 | 
 961 |         list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series,
 962 |                                                                  appliance2=appliances[rand_app[0]], app2=app2_series,
 963 |                                                                  appliance3=0, app3=0, appliance4=0, app4=0, appliance5=0,
 964 |                                                                  app5=0, flag=4)
 965 |         final_strong.append(list_vectors_strong)
 966 |         vector_weak = [0, 0, 0, 0, 0]
 967 |         vector_weak[i] = 1
 968 |         vector_weak[rand_app[0]] = 1
 969 |         print(vector_weak)
 970 |         final_weak.append(vector_weak)
 971 |         indexes_annotation(appliances[i], appliances[rand_app[0]], 0, 0, 0, rand_1[0],
 972 |                                        rand_2[0], 0, 0, 0)
 973 | 
 974 | 
 975 | 
 976 |     indices_of_activations['kettle'] = list_kettle
 977 |     indices_of_activations['microwave'] = list_micro
 978 |     indices_of_activations['fridge'] = list_fridge
 979 |     indices_of_activations['washing_machine'] = list_wash
 980 |     indices_of_activations['dish_washer'] = list_dish
 981 | 
 982 |     repetitions_k, repetitions_m, repetitions_f, repetitions_w, repetitions_d = repetition_counter(appliances)
 983 |     print("repetitions counted!")
 984 | 
 985 | 
 986 |     print("Total samples kettle:", sample_count_k)
 987 |     print("Total samples micro:", sample_count_m)
 988 |     print("Total samples fridge:", sample_count_f)
 989 |     print("Total samples washing:", sample_count_w)
 990 |     print("Total samples dish:", sample_count_d)
 991 |     #
 992 |     with open('/mnt/sda1/home/gtanoni/codice_prova/phase_repetition_' + str(build) + '_.txt',
 993 |               'a+') as file:
 994 |         print("Repetitions Kettle:", file=file)
 995 |         print(repetitions_k, file=file)
 996 |         print("Repetitions Micro:", file=file)
 997 |         print(repetitions_m, file=file)
 998 |         print("Repetitions Fridge:", file=file)
 999 |         print(repetitions_f, file=file)
1000 |         print("Repetitions Wash:", file=file)
1001 |         print(repetitions_w, file=file)
1002 |         print("Repetitions Dish:", file=file)
1003 |         print(repetitions_d, file=file)
1004 | 
1005 |     return aggregate_, final_strong, final_weak
1006 | 
1007 | 
1008 | def data_iteration_seq1(activations_, build, appliances, num_of_bags):
1009 |     sample_count_k = 0
1010 |     sample_count_m = 0
1011 |     sample_count_f = 0
1012 |     sample_count_w = 0
1013 |     sample_count_d = 0
1014 |     final_weak = []
1015 |     final_strong = []
1016 |     aggregate_ = []
1017 | 
1018 |     for a in range(num_of_bags):  # l'AGGREGATO è LA SOMMA DI 1 APPLIANCE
1019 | 
1020 |             mu, sigma = 0, 1
1021 |             s = np.random.normal(mu, sigma, window_lenght)
1022 |             s = pd.Series(s)
1023 |             s_app = pd.Series(np.zeros(window_lenght))
1024 | 
1025 |             indx = []
1026 |             for app in range(len(appliances)):
1027 |                 if appliances[app] == '':
1028 |                     continue
1029 |                 else:
1030 |                     indx.append(app)
1031 | 
1032 |             i = indx[0]
1033 | 
1034 |             print("Appliance 1")
1035 |             print(appliances[i])
1036 | 
1037 |             rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
1038 | 
1039 |             app1 = activations_[appliances[i]][rand_1[0]]
1040 |             app1 = app1.reset_index()
1041 |             app1 = app1.drop(["Unix"], axis=1)
1042 |             app1_series = app1['power']['active']
1043 | 
1044 |             aggregate = app1_series.add(s, fill_value=0)
1045 |             aggregate = aggregate.to_numpy()
1046 |             aggregate[aggregate < 0] = 0
1047 |             aggregate = pd.Series(aggregate)
1048 |             aggregate_.append(aggregate)
1049 |             app1_series = app1_series.add(s_app, fill_value=0)
1050 | 
1051 |             sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(
1052 |                 appliances[i], 0, 0, 0,
1053 |                 0,
1054 |                 sample_count_k, sample_count_m,
1055 |                 sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], 0,
1056 |                 0, 0, 0)
1057 | 
1058 |             list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series, appliance2=0,
1059 |                                                          app2=0,
1060 |                                                          appliance3=0, app3=0, appliance4=0, app4=0, appliance5=0,
1061 |                                                          app5=0, flag=4)
1062 |             final_strong.append(list_vectors_strong)
1063 |             vector_weak = [0, 0, 0, 0, 0]
1064 |             vector_weak[i] = 1
1065 |             print(vector_weak)
1066 |             final_weak.append(vector_weak)
1067 |             indexes_annotation(appliances[i], 0, 0, 0, 0, rand_1[0], 0, 0, 0, 0)
1068 | 
1069 | 
1070 |     indices_of_activations['kettle'] = list_kettle
1071 |     indices_of_activations['microwave'] = list_micro
1072 |     indices_of_activations['fridge'] = list_fridge
1073 |     indices_of_activations['washing_machine'] = list_wash
1074 |     indices_of_activations['dish_washer'] = list_dish
1075 | 
1076 |     repetitions_k, repetitions_m, repetitions_f, repetitions_w, repetitions_d = repetition_counter(appliances)
1077 |     print("repetitions counted!")
1078 | 
1079 |     print("Total samples kettle:", sample_count_k)
1080 |     print("Total samples micro:", sample_count_m)
1081 |     print("Total samples fridge:", sample_count_f)
1082 |     print("Total samples washing:", sample_count_w)
1083 |     print("Total samples dish:", sample_count_d)
1084 |     #
1085 |     with open('../phase_repetition_' + str(build) + '_.txt',
1086 |               'a+') as file:
1087 |         print("Repetitions Kettle:", file=file)
1088 |         print(repetitions_k, file=file)
1089 |         print("Repetitions Micro:", file=file)
1090 |         print(repetitions_m, file=file)
1091 |         print("Repetitions Fridge:", file=file)
1092 |         print(repetitions_f, file=file)
1093 |         print("Repetitions Wash:", file=file)
1094 |         print(repetitions_w, file=file)
1095 |         print("Repetitions Dish:", file=file)
1096 |         print(repetitions_d, file=file)
1097 | 
1098 |     return aggregate_, final_strong, final_weak
1099 | 
1100 | appliances = ['kettle', 'microwave','fridge', 'washing machine', 'dish washer']
1101 | # Save activations' files
1102 | activation_appliances_nilmtk(appliances,houses)
1103 | 
1104 | dict_ = {'2': ['kettle', '', 'fridge', 'washing_machine', 'dish_washer'],
1105 |          '3': ['kettle', '', '','',''],
1106 |          '4': ['kettle', 'microwave', '','',''],
1107 |          '5': ['kettle','', 'fridge', 'washing_machine', 'dish_washer'],
1108 |          '6': ['kettle', '', '','',''],
1109 |          '7': ['kettle','', '', 'washing_machine', 'dish_washer'],
1110 |          '8': ['kettle','', '', 'washing_machine', ''],
1111 |          '9': ['kettle','', '', 'washing_machine', 'dish_washer'],
1112 |          '10':['', 'microwave', '','',''],
1113 |          '12': ['', 'microwave', 'fridge', '',''],
1114 |          '13': ['kettle', '', '', '', 'dish_washer'],
1115 |          '15': ['','', 'fridge', '', ''],
1116 |          '16': ['', '', '', 'washing_machine', 'dish_washer'],
1117 |          '17': ['', 'microwave', '', 'washing_machine', ''],
1118 |          '18': ['','','','washing_machine', 'dish_washer'],
1119 |          '19': ['kettle', 'microwave'],
1120 |          '20': ['kettle','','','', 'dish_washer']}
1121 | 
1122 | for id in houses:
1123 |     list_activations = {'kettle': [], 'microwave': [], 'fridge': [], 'washing_machine': [], 'dish_washer': []}
1124 | 
1125 |     for app in dict_[str(id)]:
1126 | 
1127 |         if app == '':
1128 |             continue
1129 |         else:
1130 |             list_new = []
1131 | 
1132 |             activations = np.load('../'+ app +'activations_' + str(id) + '.npy', allow_pickle=True)
1133 |             for k in range(len(activations)):
1134 |                 new = activations[k].resample('8S').bfill()
1135 |                 list_new.append(new)
1136 |             np.save('../'+ app +'activations_' + str(id) + '_resampled.npy', list_new)
1137 |             list_activations[app] = list_new
1138 |     if id == 2 or id == 5 or id == 7:
1139 |         Aggregate_, final_strong_, final_weak_ = data_iteration_seq4(list_activations, id, dict_[str(id)], num_of_bags=8000)
1140 |     if id == 3 or id==6 or id==10 or id==16 or id==18 or id==15:
1141 |         Aggregate_, final_strong_, final_weak_ = data_iteration_seq1(list_activations,  id, dict_[str(id)], num_of_bags=12000)
1142 |     if id == 4 or id == 8 or id == 12 or id == 17 or id == 19:
1143 |         Aggregate_, final_strong_, final_weak_ = data_iteration_seq2(list_activations, id, dict_[str(id)], num_of_bags=12000)
1144 |     if id == 9:
1145 |         Aggregate_, final_strong_, final_weak_ = data_iteration_seq3(list_activations,  id, dict_[str(id)], num_of_bags=9000)
1146 | 
1147 |     #    SAVE THE DATA AND LABELS   #
1148 |     for bag in range(len(Aggregate_)):
1149 |         agg = Aggregate_[bag].to_numpy()
1150 | 
1151 | 
1152 |         strong = final_strong_[bag]
1153 | 
1154 |         if len(agg) > 2550 or len(strong[0]) != 2550 or len(strong[1]) != 2550 or len(strong[2]) !=2550 or len(
1155 |                 strong[3]) != 2550 or len(strong[4]) != 2550:
1156 |             continue
1157 | 
1158 |         else:
1159 |             np.save("../aggregate_data/house_"+ str(id) +"/aggregate_%d" % bag,agg)
1160 |             strong = final_strong_[bag]
1161 |             for k in range(len(strong)):
1162 |                 strong[k] = strong[k].tolist()
1163 |             weak = final_weak_[bag]
1164 |             np.save("../labels/house_"+ str(id) +"/strong_labels_%d.npy" % bag,strong)
1165 |             np.save("../labels/house_"+ str(id) +"/weak_labels_%d.npy" % bag,weak)
1166 |     print("Total number of bags:",len(final_weak_))
1167 |     del final_strong_
1168 |     del final_weak_
1169 |     del Aggregate_
1170 | 
1171 | 
1172 | 
1173 | 
1174 | 
1175 | 
1176 | 


--------------------------------------------------------------------------------
/dataset_creation/synth_uk_creation.py:
--------------------------------------------------------------------------------
   1 | import numpy as np
   2 | from os.path import join
   3 | import pandas as pd
   4 | from nilmtk.nilmtk.dataset import DataSet
   5 | from nilmtk.nilmtk.electric import Electric
   6 | import json
   7 | import argparse
   8 | import gc
   9 | #from uk_appliance_info import *
  10 | np.random.seed(0) # for strong
  11 | #np.random.seed(3) # for weak
  12 | parser = argparse.ArgumentParser(description="UK-DALE synthetic dataset creation")
  13 | 
  14 | parser.add_argument("--tot_pos_sample_1", type=int, default=1400000, help="Number of total positive samples from house 1")
  15 | parser.add_argument("--tot_pos_sample_2", type=int, default=120000, help="Number of total positive samples from house 2")
  16 | parser.add_argument("--tot_pos_sample_3", type=int, default=200000, help="Number of total positive samples from house 3")
  17 | parser.add_argument("--tot_pos_sample_4", type=int, default=100000, help="Number of total positive samples from house 4")
  18 | parser.add_argument("--tot_pos_sample_5", type=int, default=93350, help="Number of total positive samples from house 5")
  19 | parser.add_argument("--building", type= int, default=1, help="House for bags creation")
  20 | parser.add_argument("--window_length", type= int, default=2550, help="Segments dimension")
  21 | parser.add_argument("--number_shift", type= int, default=250, help="Number of samples from which randomly select activations shifting")
  22 | arguments = parser.parse_args()
  23 | 
  24 | 
  25 | min_off_duration = {
  26 |     'washing machine': 30,
  27 |     'dish washer': 1800,
  28 |     'microwave': 30,
  29 |     'kettle': 0,
  30 |     'fridge': 12
  31 | }
  32 | 
  33 | min_on_duration = {
  34 |     'washing machine': 1800,
  35 |     'dish washer': 1800,
  36 |     'microwave': 12,
  37 |     'kettle': 12,
  38 |     'fridge': 60
  39 | }
  40 | 
  41 | on_power_threshold = {
  42 |     'washing machine': 20,
  43 |     'dish washer': 10,
  44 |     'microwave': 200,
  45 |     'kettle': 2000,
  46 |     'fridge': 50
  47 | 
  48 | }
  49 | 
  50 | activations_1 = {
  51 |     'washing machine': [],
  52 |     'kettle': [],
  53 |     'fridge': [],
  54 |     'dish washer': [],
  55 |     'microwave': []
  56 | }
  57 | 
  58 | activations_3 = {
  59 |     'washing machine': [],
  60 |     'kettle': [],
  61 |     'fridge': [],
  62 |     'dish washer': [],
  63 |     'microwave': []
  64 | }
  65 | 
  66 | activations_4 = {
  67 |     'washing machine': [],
  68 |     'kettle': [],
  69 |     'fridge': [],
  70 |     'dish washer': [],
  71 |     'microwave': []
  72 | }
  73 | 
  74 | 
  75 | activations_2 = {
  76 |     'washing machine': [],
  77 |     'kettle': [],
  78 |     'fridge': [],
  79 |     'dish washer': [],
  80 |     'microwave': []
  81 | }
  82 | 
  83 | activations_5 = {
  84 |     'washing machine': [],
  85 |     'kettle': [],
  86 |     'fridge': [],
  87 |     'dish washer': [],
  88 |     'microwave': []
  89 | }
  90 | 
  91 | indices_of_activations = {
  92 |     'washing machine': [],
  93 |     'kettle': [],
  94 |     'fridge': [],
  95 |     'dish washer': [],
  96 |     'microwave': []
  97 | }
  98 | 
  99 | 
 100 | #    SAVE THE DATA AND LABELS   #
 101 | dict_1 = {'labels_0': {
 102 |     'strong':[],
 103 |     'weak':[],
 104 | }}
 105 | 
 106 | dict_2 = {'labels_0': {
 107 |     'strong':[],
 108 |     'weak':[],
 109 | }}
 110 | 
 111 | dict_5 = {'labels_0': {
 112 |     'strong':[],
 113 |     'weak':[],
 114 | }}
 115 | 
 116 | dict_3 = {'labels_0': {
 117 |     'strong':[],
 118 |     'weak':[],
 119 | }}
 120 | 
 121 | dict_4 = {'labels_0': {
 122 |     'strong':[],
 123 |     'weak':[],
 124 | }}
 125 | 
 126 | 
 127 | destination_path = "../aggregate_data/"
 128 | ukdale_path = "../ukdale.h5"
 129 | ukdale = DataSet(ukdale_path)
 130 | if arguments.building == 3:
 131 |     appliances = ['kettle','','','','']
 132 | if arguments.building == 4:
 133 |     appliances = ['','','fridge','','']
 134 | if arguments.building == 1 or arguments.building == 2 or arguments.building == 5:
 135 |     appliances = ['kettle', 'microwave','fridge', 'washing machine', 'dish washer']
 136 | 
 137 | phase_change = 5 # number of appliances to which subdivide total positive samples
 138 | samples_per_class_1 = round((arguments.tot_pos_sample_1 / phase_change))
 139 | samples_per_class_5 = round((arguments.tot_pos_sample_5 / phase_change))
 140 | samples_per_class_2 = round((arguments.tot_pos_sample_2 / phase_change))
 141 | samples_per_class_3 = arguments.tot_pos_sample_3
 142 | samples_per_class_4 = arguments.tot_pos_sample_4
 143 | window_length = 2550
 144 | number_shift = arguments.number_shift
 145 | cases = 5
 146 | list_kettle = []
 147 | list_micro = []
 148 | list_fridge = []
 149 | list_wash = []
 150 | list_dish = []
 151 | 
 152 | def activation_appliances_nilmtk(appliances, building):
 153 |     print("Extracting activations")
 154 |     for appliance in appliances:
 155 |             elec = ukdale.buildings[building].elec
 156 |             curr_appliance = elec[appliance]
 157 |             activation_ = Electric.get_activations(curr_appliance, min_off_duration=min_off_duration[appliance],
 158 |                                                    min_on_duration=min_on_duration[appliance],
 159 |                                                    on_power_threshold=on_power_threshold[appliance])
 160 |             df = next(curr_appliance.load(ac_type='active'))
 161 |             df.head()
 162 |             print(building)
 163 |             print(appliance)
 164 |             print(len(activation_))
 165 |             if building == 1:
 166 |                 activations_1[
 167 |                     appliance] = activation_
 168 | 
 169 |             if building == 2:
 170 |                 activations_2[appliance] = activation_
 171 | 
 172 |             if building == 5:
 173 |                 activations_5[appliance] = activation_
 174 | 
 175 |             if building == 3:
 176 |                 activations_3[appliance] = activation_
 177 | 
 178 |             if building == 4:
 179 |                 activations_4[appliance] = activation_
 180 |     if building == 1:
 181 |         return activations_1
 182 |     if building == 2:
 183 |         return activations_2
 184 |     if building == 3:
 185 |         return activations_3
 186 |     if building == 4:
 187 |         return activations_4
 188 |     if building ==5:
 189 |         return activations_5
 190 | 
 191 | 
 192 | 
 193 | def indexes_annotation(app1,app2,app3,app4,app5, rand_1, rand_2,rand_3, rand_4, rand_5):
 194 | 
 195 | 
 196 |     if app1 == 'kettle':
 197 |         list_kettle.append(rand_1)
 198 | 
 199 |     elif app2 == 'kettle':
 200 |         list_kettle.append(rand_2)
 201 | 
 202 |     elif app3 == 'kettle':
 203 |         list_kettle.append(rand_3)
 204 | 
 205 |     elif app4 == 'kettle':
 206 |         list_kettle.append(rand_4)
 207 | 
 208 |     elif app5 == 'kettle':
 209 |         list_kettle.append(rand_5)
 210 | 
 211 | 
 212 |     if app1 == 'microwave':
 213 |         list_micro.append(rand_1)
 214 | 
 215 |     elif app2 == 'microwave':
 216 |         list_micro.append(rand_2)
 217 | 
 218 |     elif app3 == 'microwave':
 219 |         list_micro.append(rand_3)
 220 | 
 221 |     elif app4 == 'microwave':
 222 |         list_micro.append(rand_4)
 223 | 
 224 |     elif app5 == 'microwave':
 225 |         list_micro.append(rand_5)
 226 | 
 227 | 
 228 |     if app1 == 'fridge':
 229 |         list_fridge.append(rand_1)
 230 | 
 231 |     elif app2 == 'fridge':
 232 |         list_fridge.append(rand_2)
 233 | 
 234 |     elif app3 == 'fridge':
 235 |         list_fridge.append(rand_3)
 236 | 
 237 |     elif app4 == 'fridge':
 238 |         list_fridge.append(rand_4)
 239 | 
 240 |     elif app5 == 'fridge':
 241 |         list_fridge.append(rand_5)
 242 | 
 243 | 
 244 |     if app1 == 'washing machine':
 245 |         list_wash.append(rand_1)
 246 | 
 247 |     elif app2 == 'washing machine':
 248 |         list_wash.append(rand_2)
 249 | 
 250 |     elif app3 == 'washing machine':
 251 |         list_wash.append(rand_3)
 252 | 
 253 |     elif app4 == 'washing machine':
 254 |         list_wash.append(rand_4)
 255 | 
 256 |     elif app5 == 'washing machine':
 257 |         list_wash.append(rand_5)
 258 | 
 259 | 
 260 |     if app1 == 'dish washer':
 261 |         list_dish.append(rand_1)
 262 | 
 263 |     elif app2 == 'dish washer':
 264 |         list_dish.append(rand_2)
 265 | 
 266 |     elif app3 == 'dish washer':
 267 |         list_dish.append(rand_3)
 268 | 
 269 |     elif app4 == 'dish washer':
 270 |         list_dish.append(rand_4)
 271 | 
 272 |     elif app5 == 'dish washer':
 273 |         list_dish.append(rand_5)
 274 | 
 275 | 
 276 | def sample_counter(app1,app2,app3,app4,app5,sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d, act1,act2,act3,act4,act5):
 277 | 
 278 | 
 279 |     if app1 == 'kettle':
 280 |         sample_count_k += len(act1)
 281 | 
 282 |     if app2 == 'kettle':
 283 |         sample_count_k += len(act2)
 284 | 
 285 |     if app3 == 'kettle':
 286 |         sample_count_k += len(act3)
 287 | 
 288 |     if app4 == 'kettle':
 289 |         sample_count_k += len(act4)
 290 | 
 291 |     if app5 == 'kettle':
 292 |         sample_count_k += len(act5)
 293 | 
 294 | 
 295 |     if app1 == 'microwave':
 296 |         sample_count_m += len(act1)
 297 | 
 298 |     if app2 == 'microwave':
 299 |         sample_count_m += len(act2)
 300 | 
 301 |     if app3 == 'microwave':
 302 |         sample_count_m += len(act3)
 303 | 
 304 |     if app4 == 'microwave':
 305 |         sample_count_m += len(act4)
 306 | 
 307 |     if app5 == 'microwave':
 308 |         sample_count_m += len(act5)
 309 | 
 310 | 
 311 |     if app1 == 'fridge':
 312 |         sample_count_f += len(act1)
 313 | 
 314 |     if app2 == 'fridge':
 315 |         sample_count_f += len(act2)
 316 | 
 317 |     if app3 == 'fridge':
 318 |         sample_count_f += len(act3)
 319 | 
 320 |     if app4 == 'fridge':
 321 |         sample_count_f += len(act4)
 322 | 
 323 |     if app5 == 'fridge':
 324 |         sample_count_f += len(act5)
 325 | 
 326 | 
 327 |     if app1 == 'washing machine':
 328 |         sample_count_w += len(act1)
 329 | 
 330 |     if app2 == 'washing machine':
 331 |         sample_count_w += len(act2)
 332 | 
 333 |     if app3 == 'washing machine':
 334 |         sample_count_w += len(act3)
 335 | 
 336 |     if app4 == 'washing machine':
 337 |         sample_count_w += len(act4)
 338 | 
 339 |     if app5 == 'washing machine':
 340 |         sample_count_w += len(act5)
 341 | 
 342 |     if app1 == 'dish washer':
 343 |         sample_count_d += len(act1)
 344 | 
 345 |     elif app2 == 'dish washer':
 346 |         sample_count_d += len(act2)
 347 | 
 348 |     if app3 == 'dish washer':
 349 |         sample_count_d += len(act3)
 350 | 
 351 |     if app4 == 'dish washer':
 352 |         sample_count_d += len(act4)
 353 | 
 354 |     if app5 == 'dish washer':
 355 |         sample_count_d += len(act5)
 356 | 
 357 |     return sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d
 358 | 
 359 | def repetition_counter(appliances):
 360 | 
 361 |     indexes_k = []
 362 |     indexes_m = []
 363 |     indexes_f = []
 364 |     indexes_w = []
 365 |     indexes_d = []
 366 |     list_indexes = [indexes_k, indexes_m, indexes_f, indexes_w, indexes_d]
 367 |     for i in range(len(appliances)):
 368 |         indexes = list_indexes[i]
 369 |         if appliances[i] != '':
 370 |             for idx in indices_of_activations[appliances[i]]:
 371 |                 repetition = indices_of_activations[appliances[i]].count(idx)
 372 |                 indexes.append(repetition)
 373 | 
 374 |     return indexes_k, indexes_m, indexes_f, indexes_w, indexes_d
 375 | 
 376 | def padd_shift(rand_shift,app):
 377 | 
 378 |     padd = pd.Series(np.zeros(rand_shift))
 379 |     app_series = app['power']['active']
 380 |     app_series = app_series.append(padd, ignore_index=True).shift(periods=rand_shift, fill_value=0)
 381 |     return app_series
 382 | 
 383 | def strong_labels_creation(appliance1,app1,appliance2, app2,appliance3, app3, appliance4, app4, appliance5, app5, flag = 0):
 384 | 
 385 |     list_vectors_strong = []
 386 |     vector_strong_1 = pd.Series()
 387 |     vector_strong_2 = pd.Series()
 388 |     vector_strong_3 = pd.Series()
 389 |     vector_strong_4 = pd.Series()
 390 |     vector_strong_5 = pd.Series()
 391 | 
 392 |     if flag == 4:
 393 |             vector_strong_1 = pd.Series()
 394 | 
 395 |             list_1 = []
 396 |             if appliance1 == 'kettle':
 397 |                 for l in range(len(app1)):
 398 |                     if app1[l] < on_power_threshold['kettle']:
 399 |                         list_1.append(0)
 400 |                     else:
 401 |                         list_1.append(1)
 402 |                 print("Appliance 1 vector strong Kettle")
 403 |                 vector_strong_1_a = pd.Series(list_1)
 404 |             else:
 405 |                 vector_strong_1_a = pd.Series(np.zeros(window_length))
 406 | 
 407 |             list_1 = []
 408 |             if appliance2 == 'kettle':
 409 |                 for l in range(len(app2)):
 410 |                     if app2[l] < on_power_threshold['kettle']:
 411 |                         list_1.append(0)
 412 |                     else:
 413 |                         list_1.append(1)
 414 | 
 415 |                 print("Appliance 2 vector strong Kettle")
 416 |                 vector_strong_1_b = pd.Series(list_1)
 417 |             else:
 418 |                 vector_strong_1_b = pd.Series(np.zeros(window_length))
 419 | 
 420 |             list_1 = []
 421 |             if appliance3 == 'kettle':
 422 |                 for l in range(len(app3)):
 423 |                     if app3[l] < on_power_threshold['kettle']:
 424 |                         list_1.append(0)
 425 |                     else:
 426 |                         list_1.append(1)
 427 | 
 428 |                 print("Appliance 3 vector strong Kettle")
 429 |                 vector_strong_1_c = pd.Series(list_1)
 430 |             else:
 431 |                 vector_strong_1_c = pd.Series(np.zeros(window_length))
 432 | 
 433 |             list_1 = []
 434 |             if appliance4 == 'kettle':
 435 |                 for l in range(len(app4)):
 436 |                     if app4[l] < on_power_threshold['kettle']:
 437 |                         list_1.append(0)
 438 |                     else:
 439 |                         list_1.append(1)
 440 | 
 441 |                 print("Appliance 4 vector strong Kettle")
 442 |                 vector_strong_1_d = pd.Series(list_1)
 443 |             else:
 444 |                 vector_strong_1_d = pd.Series(np.zeros(window_length))
 445 | 
 446 |             list_1 = []
 447 |             if appliance5 == 'kettle':
 448 |                 for l in range(len(app5)):
 449 |                     if app5[l] < on_power_threshold['kettle']:
 450 |                         list_1.append(0)
 451 |                     else:
 452 |                         list_1.append(1)
 453 | 
 454 |                 print("Appliance 5 vector strong Kettle")
 455 |                 vector_strong_1_e = pd.Series(list_1)
 456 |             else:
 457 |                 vector_strong_1_e = pd.Series(np.zeros(window_length))
 458 | 
 459 | 
 460 |             vector_strong_1 = vector_strong_1_a.add(vector_strong_1_b)
 461 |             vector_strong_1 = vector_strong_1.add(vector_strong_1_c)
 462 |             vector_strong_1 = vector_strong_1.add(vector_strong_1_d)
 463 |             vector_strong_1 = vector_strong_1.add(vector_strong_1_e)
 464 | 
 465 |             list_2 = []
 466 |             vector_strong_2 = pd.Series()
 467 |             if appliance1 == 'microwave':
 468 |                 for l in range(len(app1)):
 469 |                     if app1[l] < on_power_threshold['microwave']:
 470 |                         list_2.append(0)
 471 |                     else:
 472 |                         list_2.append(1)
 473 |                 vector_strong_2_a = pd.Series(list_2)
 474 |             else:
 475 |                 vector_strong_2_a = pd.Series(np.zeros(window_length))
 476 | 
 477 |             list_2 = []
 478 |             if appliance2 == 'microwave':
 479 |                 for l in range(len(app2)):
 480 |                     if app2[l] < on_power_threshold['microwave']:
 481 |                         list_2.append(0)
 482 |                     else:
 483 |                         list_2.append(1)
 484 |                 vector_strong_2_b = pd.Series(list_2)
 485 |             else:
 486 |                 vector_strong_2_b = pd.Series(np.zeros(window_length))
 487 | 
 488 |             list_2 = []
 489 |             if appliance3 == 'microwave':
 490 |                 for l in range(len(app3)):
 491 |                     if app3[l] < on_power_threshold['microwave']:
 492 |                         list_2.append(0)
 493 |                     else:
 494 |                         list_2.append(1)
 495 | 
 496 |                 print("Appliance 3 vector strong microwave")
 497 |                 vector_strong_2_c = pd.Series(list_2)
 498 |             else:
 499 |                 vector_strong_2_c = pd.Series(np.zeros(window_length))
 500 | 
 501 |             list_2 = []
 502 |             if appliance4 == 'microwave':
 503 |                 for l in range(len(app4)):
 504 |                     if app4[l] < on_power_threshold['microwave']:
 505 |                         list_2.append(0)
 506 |                     else:
 507 |                         list_2.append(1)
 508 | 
 509 |                 print("Appliance 4 vector strong microwave")
 510 |                 vector_strong_2_d = pd.Series(list_2)
 511 |             else:
 512 |                 vector_strong_2_d = pd.Series(np.zeros(window_length))
 513 | 
 514 |             list_2 = []
 515 |             if appliance5 == 'microwave':
 516 |                 for l in range(len(app5)):
 517 |                     if app5[l] < on_power_threshold['microwave']:
 518 |                         list_2.append(0)
 519 |                     else:
 520 |                         list_2.append(1)
 521 | 
 522 |                 print("Appliance 5 vector strong microwave")
 523 |                 vector_strong_2_e = pd.Series(list_2)
 524 |             else:
 525 |                 vector_strong_2_e = pd.Series(np.zeros(window_length))
 526 | 
 527 |             vector_strong_2 = vector_strong_2_a.add(vector_strong_2_b)
 528 |             vector_strong_2 = vector_strong_2.add(vector_strong_2_c)
 529 |             vector_strong_2 = vector_strong_2.add(vector_strong_2_d)
 530 |             vector_strong_2 = vector_strong_2.add(vector_strong_2_e)
 531 | 
 532 | 
 533 |             list_3 = []
 534 |             vector_strong_3 = pd.Series()
 535 |             if appliance1 == 'fridge':
 536 |                 for l in range(len(app1)):
 537 |                     if app1[l] < on_power_threshold['fridge']:
 538 |                         list_3.append(0)
 539 |                     else:
 540 |                         list_3.append(1)
 541 |                 vector_strong_3_a = pd.Series(list_3)
 542 |             else:
 543 |                 vector_strong_3_a = pd.Series(np.zeros(window_length))
 544 | 
 545 |             list_3 = []
 546 |             if appliance2 == 'fridge':
 547 |                 for l in range(len(app2)):
 548 |                     if app2[l] < on_power_threshold['fridge']:
 549 |                         list_3.append(0)
 550 |                     else:
 551 |                         list_3.append(1)
 552 |                 vector_strong_3_b = pd.Series(list_3)
 553 |             else:
 554 |                 vector_strong_3_b = pd.Series(np.zeros(window_length))
 555 | 
 556 |             list_3 = []
 557 |             if appliance3 == 'fridge':
 558 |                 for l in range(len(app3)):
 559 |                     if app3[l] < on_power_threshold['fridge']:
 560 |                         list_3.append(0)
 561 |                     else:
 562 |                         list_3.append(1)
 563 | 
 564 |                 print("Appliance 3 vector strong microwave")
 565 |                 vector_strong_3_c = pd.Series(list_3)
 566 |             else:
 567 |                 vector_strong_3_c = pd.Series(np.zeros(window_length))
 568 | 
 569 |             list_3 = []
 570 |             if appliance4 == 'fridge':
 571 |                 for l in range(len(app4)):
 572 |                     if app4[l] < on_power_threshold['fridge']:
 573 |                         list_3.append(0)
 574 |                     else:
 575 |                         list_3.append(1)
 576 | 
 577 |                 print("Appliance 4 vector strong microwave")
 578 |                 vector_strong_3_d = pd.Series(list_3)
 579 |             else:
 580 |                 vector_strong_3_d = pd.Series(np.zeros(window_length))
 581 | 
 582 |             list_3 = []
 583 |             if appliance5 == 'fridge':
 584 |                 for l in range(len(app5)):
 585 |                     if app5[l] < on_power_threshold['fridge']:
 586 |                         list_3.append(0)
 587 |                     else:
 588 |                         list_3.append(1)
 589 | 
 590 |                 print("Appliance 5 vector strong microwave")
 591 |                 vector_strong_3_e = pd.Series(list_3)
 592 |             else:
 593 |                 vector_strong_3_e = pd.Series(np.zeros(window_length))
 594 | 
 595 |             vector_strong_3 = vector_strong_3_a.add(vector_strong_3_b)
 596 |             vector_strong_3 = vector_strong_3.add(vector_strong_3_c)
 597 |             vector_strong_3 = vector_strong_3.add(vector_strong_3_d)
 598 |             vector_strong_3 = vector_strong_3.add(vector_strong_3_e)
 599 | 
 600 | 
 601 | 
 602 | 
 603 |             list_4 = []
 604 |             vector_strong_4 = pd.Series()
 605 |             if appliance1 == 'washing machine':
 606 |                 for l in range(len(app1)):
 607 |                     if app1[l] < on_power_threshold['washing machine']:
 608 |                         list_4.append(0)
 609 |                     else:
 610 |                         list_4.append(1)
 611 |                 vector_strong_4 = pd.Series(list_4)
 612 |             elif appliance2 == 'washing machine':
 613 |                 for l in range(len(app2)):
 614 |                     if app2[l] < on_power_threshold['washing machine']:
 615 |                         list_4.append(0)
 616 |                     else:
 617 |                         list_4.append(1)
 618 |                 vector_strong_4 = pd.Series(list_4)
 619 | 
 620 | 
 621 |             elif appliance3 == 'washing machine':
 622 |                 for l in range(len(app3)):
 623 |                     if app3[l] < on_power_threshold['washing machine']:
 624 |                         list_4.append(0)
 625 |                     else:
 626 |                         list_4.append(1)
 627 | 
 628 |                 print("Appliance 3 vector strong washing machine")
 629 |                 vector_strong_4 = pd.Series(list_4)
 630 | 
 631 |             elif appliance4 == 'washing machine':
 632 |                 for l in range(len(app4)):
 633 |                     if app4[l] < on_power_threshold['washing machine']:
 634 |                         list_4.append(0)
 635 |                     else:
 636 |                         list_4.append(1)
 637 | 
 638 |                 print("Appliance 4 vector strong washing machine")
 639 |                 vector_strong_4 = pd.Series(list_4)
 640 |             elif appliance5 == 'washing machine':
 641 |                 for l in range(len(app5)):
 642 |                     if app5[l] < on_power_threshold['washing machine']:
 643 |                         list_4.append(0)
 644 |                     else:
 645 |                         list_4.append(1)
 646 | 
 647 |                 print("Appliance 5 vector strong washing machine")
 648 |                 vector_strong_4 = pd.Series(list_4)
 649 |             else:
 650 |                 vector_strong_4 = pd.Series(np.zeros(window_length))
 651 | 
 652 |             list_5 = []
 653 |             vector_strong_5 = pd.Series()
 654 |             if appliance1 == 'dish washer':
 655 |                 for l in range(len(app1)):
 656 |                     if app1[l] < on_power_threshold['dish washer']:
 657 |                         list_5.append(0)
 658 |                     else:
 659 |                         list_5.append(1)
 660 |                 vector_strong_5 = pd.Series(list_5)
 661 | 
 662 |             elif appliance2 == 'dish washer':
 663 |                 for l in range(len(app2)):
 664 |                     if app2[l] < on_power_threshold['dish washer']:
 665 |                         list_5.append(0)
 666 |                     else:
 667 |                         list_5.append(1)
 668 |                 vector_strong_5 = pd.Series(list_5)
 669 | 
 670 |             elif appliance3 == 'dish washer':
 671 |                 for l in range(len(app3)):
 672 |                     if app3[l] < on_power_threshold['dish washer']:
 673 |                         list_5.append(0)
 674 |                     else:
 675 |                         list_5.append(1)
 676 | 
 677 |                 print("Appliance 3 vector strong dish washer")
 678 |                 vector_strong_5 = pd.Series(list_5)
 679 |             elif appliance4 == 'dish washer':
 680 |                 for l in range(len(app4)):
 681 |                     if app4[l] < on_power_threshold['dish washer']:
 682 |                         list_5.append(0)
 683 |                     else:
 684 |                         list_5.append(1)
 685 | 
 686 |                 print("Appliance 4 vector strong dish washer")
 687 |                 vector_strong_5 = pd.Series(list_5)
 688 |             elif appliance5 == 'dish washer':
 689 |                 for l in range(len(app5)):
 690 |                     if app5[l] < on_power_threshold['dish washer']:
 691 |                         list_5.append(0)
 692 |                     else:
 693 |                         list_5.append(1)
 694 | 
 695 |                 print("Appliance 5 vector strong dish washer")
 696 |                 vector_strong_5 = pd.Series(list_5)
 697 |             else:
 698 |                 vector_strong_5 = pd.Series(np.zeros(window_length))
 699 | 
 700 | 
 701 | 
 702 | 
 703 |     list_vectors_strong.append(vector_strong_1)
 704 |     list_vectors_strong.append(vector_strong_2)
 705 |     list_vectors_strong.append(vector_strong_3)
 706 |     list_vectors_strong.append(vector_strong_4)
 707 |     list_vectors_strong.append(vector_strong_5)
 708 | 
 709 |     return list_vectors_strong
 710 | 
 711 | def data_iteration(activations_, samples_per_class, building):
 712 | 
 713 |     final_weak = []
 714 |     final_strong = []
 715 |     aggregate_ = []
 716 | 
 717 |     if building == 4 or building == 3:
 718 |         sample_count_k = 0
 719 |         sample_count_m = 0
 720 |         sample_count_f = 0
 721 |         sample_count_w = 0
 722 |         sample_count_d = 0
 723 | 
 724 |         for a in range(10000):  # l'AGGREGATO è LA SOMMA DI 1 APPLIANCE
 725 |             if not (
 726 |                     sample_count_f > samples_per_class or sample_count_k > samples_per_class ):
 727 | 
 728 |                 if building == 4:
 729 |                     i = 2
 730 |                 else:
 731 |                     i = 0
 732 |                 mu, sigma = 0, 1
 733 |                 s = np.random.normal(mu, sigma, window_length)
 734 |                 s = pd.Series(s)
 735 |                 s_app = pd.Series(np.zeros(window_length))
 736 | 
 737 |                 print("Appliance 1")
 738 |                 print(appliances[i])
 739 | 
 740 |                 if (appliances[i] == 'kettle' and sample_count_k >= samples_per_class):
 741 |                     i = np.random.randint(len(appliances), size=1)
 742 |                     i = i[0]
 743 | 
 744 | 
 745 |                 rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
 746 | 
 747 |                 app1 = activations_[appliances[i]][rand_1[0]]
 748 |                 app1 = app1.reset_index()
 749 |                 app1 = app1.drop(["index"], axis=1)
 750 | 
 751 |                 if appliances[i] == 'kettle':
 752 |                     rand_shift = np.random.randint((number_shift + 700), size=1)
 753 |                     rand_shift_mkf1 = np.random.randint((rand_shift[0] + 200 - 100), size=1)
 754 |                     rand_shift_mkf2 = np.random.randint((rand_shift[0] + 500), size=1)
 755 |                     rand_shift_mkf3 = np.random.randint((rand_shift[0] + 300), size=1)
 756 |                     app_mkf1 = activations_[appliances[i]][rand_1[0] - 1]
 757 |                     app_mkf1 = app_mkf1.reset_index()
 758 |                     app_mkf1 = app_mkf1.drop(["index"], axis=1)
 759 | 
 760 |                     app_mkf2 = activations_[appliances[i]][rand_1[0] - 5]
 761 |                     app_mkf2 = app_mkf2.reset_index()
 762 |                     app_mkf2 = app_mkf2.drop(["index"], axis=1)
 763 | 
 764 |                     app_mkf3 = activations_[appliances[i]][rand_1[0] - 7]
 765 |                     app_mkf3 = app_mkf3.reset_index()
 766 |                     app_mkf3 = app_mkf3.drop(["index"], axis=1)
 767 | 
 768 |                     len1 = len(app_mkf1['power']['active'])
 769 |                     len2 = len(app_mkf2['power']['active'])
 770 |                     len3 = len(app_mkf3['power']['active'])
 771 | 
 772 |                     app_mkf1 = padd_shift(rand_shift_mkf1[0], app_mkf1)
 773 | 
 774 |                     app_mkf2 = padd_shift(rand_shift_mkf2[0], app_mkf2)
 775 | 
 776 |                     app_mkf3 = padd_shift(rand_shift_mkf3[0], app_mkf3)
 777 | 
 778 |                     if appliances[i] == 'kettle':
 779 |                         sample_count_k = sample_count_k + len1 + len2 + len3
 780 |                     elif appliances[i] == 'microwave':
 781 |                         sample_count_m = sample_count_m + len1 + len2 + len3
 782 |                     elif appliances[i] == 'fridge':
 783 |                         sample_count_f = sample_count_f + len1 + len2 + len3
 784 | 
 785 |                     app1_series = padd_shift(rand_shift[0], app1)
 786 |                     app1_series = app1_series.add(app_mkf1, fill_value=0)
 787 |                     app1_series = app1_series.add(app_mkf2, fill_value=0)
 788 |                     app1_series = app1_series.add(app_mkf3, fill_value=0)
 789 | 
 790 |                 else:
 791 |                     rand_shift = np.random.randint(number_shift, size=1)
 792 |                     app1_series = padd_shift(rand_shift[0], app1)
 793 | 
 794 |                 aggregate = app1_series.add(s, fill_value=0)
 795 |                 aggregate = aggregate.to_numpy()
 796 |                 aggregate[aggregate < 0] = 0
 797 |                 aggregate = pd.Series(aggregate)
 798 |                 aggregate_.append(aggregate)
 799 |                 app1_series = app1_series.add(s_app, fill_value=0)
 800 | 
 801 |                 sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(
 802 |                     appliances[i], 0, 0, 0,
 803 |                     0,
 804 |                     sample_count_k, sample_count_m,
 805 |                     sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], 0,
 806 |                     0, 0, 0)
 807 | 
 808 |                 list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series, appliance2=0,
 809 |                                                              app2=0,
 810 |                                                              appliance3=0, app3=0, appliance4=0, app4=0, appliance5=0,
 811 |                                                              app5=0, flag=4)
 812 |                 final_strong.append(list_vectors_strong)
 813 |                 vector_weak = [0, 0, 0, 0, 0]
 814 |                 vector_weak[i] = 1
 815 |                 print(vector_weak)
 816 |                 final_weak.append(vector_weak)
 817 |                 # print(list_indices)
 818 |                 indexes_annotation(appliances[i], 0, 0, 0, 0, rand_1[0], 0, 0, 0, 0)
 819 |             else:
 820 |                 break
 821 | 
 822 |     else:
 823 |         print("Aggregate with one appliance")
 824 |         sample_count_k = 0
 825 |         sample_count_m = 0
 826 |         sample_count_f = 0
 827 |         sample_count_w = 0
 828 |         sample_count_d = 0
 829 | 
 830 |         for a in range(10000):
 831 |             if not (sample_count_k > samples_per_class and sample_count_m > samples_per_class and sample_count_w > samples_per_class and
 832 |                     sample_count_f > samples_per_class and sample_count_d > samples_per_class):
 833 | 
 834 |                 i = np.random.randint(len(appliances), size=1)
 835 |                 i = i[0]
 836 |                 mu, sigma = 0, 1
 837 |                 s = np.random.normal(mu, sigma, window_length)
 838 |                 s = pd.Series(s)
 839 |                 s_app = pd.Series(np.zeros(window_length))
 840 | 
 841 | 
 842 | 
 843 |                 if (appliances[i] == 'kettle' and sample_count_k >= samples_per_class):
 844 |                     i = np.random.randint(len(appliances), size=1)
 845 |                     i = i[0]
 846 | 
 847 |                 rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
 848 | 
 849 |                 app1 = activations_[appliances[i]][rand_1[0]]
 850 |                 app1 = app1.reset_index()
 851 |                 app1 = app1.drop(["index"], axis=1)
 852 | 
 853 |                 if appliances[i] == 'kettle':
 854 |                     rand_shift = np.random.randint((number_shift + 700), size=1)
 855 |                     rand_shift_mkf1 = np.random.randint((rand_shift[0] + 200 - 100), size=1)
 856 |                     rand_shift_mkf2 = np.random.randint((rand_shift[0] + 500), size=1)
 857 |                     rand_shift_mkf3 = np.random.randint((rand_shift[0] + 300), size=1)
 858 |                     app_mkf1 = activations_[appliances[i]][rand_1[0] - 1]
 859 |                     app_mkf1 = app_mkf1.reset_index()
 860 |                     app_mkf1 = app_mkf1.drop(["index"], axis=1)
 861 | 
 862 |                     app_mkf2 = activations_[appliances[i]][rand_1[0] - 5]
 863 |                     app_mkf2 = app_mkf2.reset_index()
 864 |                     app_mkf2 = app_mkf2.drop(["index"], axis=1)
 865 | 
 866 |                     app_mkf3 = activations_[appliances[i]][rand_1[0] - 7]
 867 |                     app_mkf3 = app_mkf3.reset_index()
 868 |                     app_mkf3 = app_mkf3.drop(["index"], axis=1)
 869 | 
 870 |                     len1 = len(app_mkf1['power']['active'])
 871 |                     len2 = len(app_mkf2['power']['active'])
 872 |                     len3 = len(app_mkf3['power']['active'])
 873 | 
 874 |                     app_mkf1 = padd_shift(rand_shift_mkf1[0], app_mkf1)
 875 | 
 876 |                     app_mkf2 = padd_shift(rand_shift_mkf2[0], app_mkf2)
 877 | 
 878 |                     app_mkf3 = padd_shift(rand_shift_mkf3[0], app_mkf3)
 879 | 
 880 |                     if appliances[i] == 'kettle':
 881 |                         sample_count_k = sample_count_k + len1 + len2 + len3
 882 |                     elif appliances[i] == 'microwave':
 883 |                         sample_count_m = sample_count_m + len1 + len2 + len3
 884 |                     elif appliances[i] == 'fridge':
 885 |                         sample_count_f = sample_count_f + len1 + len2 + len3
 886 | 
 887 |                     app1_series = padd_shift(rand_shift[0], app1)
 888 |                     app1_series = app1_series.add(app_mkf1, fill_value=0)
 889 |                     app1_series = app1_series.add(app_mkf2, fill_value=0)
 890 |                     app1_series = app1_series.add(app_mkf3, fill_value=0)
 891 | 
 892 |                 else:
 893 |                     rand_shift = np.random.randint(number_shift, size=1)
 894 |                     app1_series = padd_shift(rand_shift[0], app1)
 895 | 
 896 |                 aggregate = app1_series.add(s, fill_value=0)
 897 |                 aggregate = aggregate.to_numpy()
 898 |                 aggregate[aggregate < 0] = 0
 899 |                 aggregate = pd.Series(aggregate)
 900 |                 aggregate_.append(aggregate)
 901 |                 app1_series = app1_series.add(s_app, fill_value=0)
 902 | 
 903 |                 sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(
 904 |                     appliances[i], 0, 0, 0,
 905 |                     0,
 906 |                     sample_count_k, sample_count_m,
 907 |                     sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], 0,
 908 |                     0, 0, 0)
 909 | 
 910 |                 list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series, appliance2=0,
 911 |                                                              app2=0,
 912 |                                                              appliance3=0, app3=0, appliance4=0, app4=0, appliance5=0,
 913 |                                                              app5=0, flag=4)
 914 |                 final_strong.append(list_vectors_strong)
 915 |                 vector_weak = [0, 0, 0, 0, 0]
 916 |                 vector_weak[i] = 1
 917 |                 print(vector_weak)
 918 |                 final_weak.append(vector_weak)
 919 |                 # print(list_indices)
 920 |                 indexes_annotation(appliances[i], 0, 0, 0, 0, rand_1[0], 0, 0, 0, 0)
 921 |             else:
 922 |                 break
 923 |                 # print("Sequences with 1 appliances:")
 924 |                 # print(a)
 925 |         sample_count_k = 0
 926 |         sample_count_m = 0
 927 |         sample_count_f = 0
 928 |         sample_count_w = 0
 929 |         sample_count_d = 0
 930 | 
 931 |         # Aggregate is the sum of two appliances
 932 |         print("Aggregate with two appliances")
 933 |         for a in range(125000):
 934 |             if not (sample_count_k > samples_per_class and sample_count_m > samples_per_class and sample_count_f > samples_per_class and sample_count_w > samples_per_class and sample_count_d > samples_per_class):
 935 | 
 936 | 
 937 |                 i = np.random.randint(len(appliances), size=1)
 938 |                 i = i[0]
 939 |                 list_indices_app1 = []
 940 |                 list_indices_app2 = []
 941 | 
 942 |                 rand_app = np.random.randint(len(appliances), size=1)
 943 |                 mu, sigma = 0, 1
 944 |                 s = np.random.normal(mu, sigma,window_length)
 945 |                 s = pd.Series(s)
 946 |                 s_app = pd.Series(np.zeros(window_length))
 947 | 
 948 | 
 949 | 
 950 |                 if (appliances[i] =='kettle' and sample_count_k >= samples_per_class):
 951 |                     i = np.random.randint(len(appliances), size=1)
 952 |                     i = i[0]
 953 |                 elif (appliances[i] == 'microwave' and sample_count_m >= samples_per_class):
 954 |                     i = np.random.randint(len(appliances), size=1)
 955 |                     i = i[0]
 956 |                 elif (appliances[i] == 'fridge' and sample_count_f >= samples_per_class):
 957 |                     # i = np.random.randint(len(appliances) - 3, size=1)
 958 |                     # i = i[0]
 959 |                     i = 1
 960 |                 elif (appliances[i] == 'washing machine' and sample_count_w >= samples_per_class):
 961 |                     #i = np.random.randint(len(appliances) - 3, size=1)
 962 |                     #i = i[0]
 963 |                     i = 1
 964 |                 elif (appliances[i] == 'dish washer' and sample_count_d > samples_per_class):
 965 |                     # i = np.random.randint(len(appliances) - 3, size=1)
 966 |                     # i = i[0]
 967 |                     i = 1
 968 | 
 969 |                 if rand_app[0] == i:
 970 |                         rand_app = np.random.randint(5, size=1)
 971 |                 if rand_app[0] == i:
 972 |                         rand_app = np.random.randint(5, size=1)
 973 | 
 974 | 
 975 | 
 976 |                 if (appliances[rand_app[0]] == 'kettle' and sample_count_k >= samples_per_class):
 977 |                         rand_app = np.random.randint(len(appliances) , size=1)
 978 |                 elif (appliances[rand_app[0]] == 'microwave' and sample_count_m >= samples_per_class):
 979 |                         rand_app = np.random.randint(len(appliances), size=1)
 980 |                 elif (appliances[rand_app[0]] == 'fridge' and sample_count_f >= samples_per_class):
 981 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
 982 |                 elif (appliances[rand_app[0]] == 'washing machine' and sample_count_w >= samples_per_class):
 983 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
 984 |                 elif (appliances[rand_app[0]] == 'dish washer' and sample_count_d > samples_per_class):
 985 |                         rand_app = np.random.randint(len(appliances) - 3 , size=1)
 986 | 
 987 |                 rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
 988 |                 list_indices_app1.append(rand_1)
 989 |                 app1 = activations_[appliances[i]][rand_1[0]]
 990 |                 app1 = app1.reset_index()
 991 |                 app1 = app1.drop(["index"], axis=1)
 992 |                 app1_series = app1['power']['active']
 993 | 
 994 |                 rand_2 = np.random.randint(len(activations_[appliances[rand_app[0]]]), size=1)
 995 |                 list_indices_app2.append(rand_2)
 996 |                 app2 = activations_[appliances[rand_app[0]]][rand_2[0]]
 997 |                 app2 = app2.reset_index()
 998 |                 app2 = app2.drop(["index"], axis=1)
 999 | 
1000 |                 sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(appliances[i], appliances[rand_app[0]], 0, 0, 0, sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d, app1_series, app2['power']['active'], 0,0,0)
1001 | 
1002 | 
1003 | 
1004 |                 app1_series = app1_series.add(s_app, fill_value=0)
1005 | 
1006 |                 rand_shift = np.random.randint(number_shift, size=1)
1007 |                 app2_series = padd_shift(rand_shift[0], app2)
1008 |                 app2_series = app2_series.add(s_app, fill_value=0)
1009 | 
1010 |                 vector = app2_series.add(app1_series, fill_value=0)
1011 |                 aggregate = vector.add(s, fill_value=0)
1012 |                 aggregate = aggregate.to_numpy()
1013 |                 aggregate[aggregate < 0] = 0
1014 |                 aggregate = pd.Series(aggregate)
1015 |                 aggregate_.append(aggregate)
1016 | 
1017 |                 list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series,
1018 |                                                                      appliance2=appliances[rand_app[0]], app2=app2_series,
1019 |                                                                      appliance3=0, app3=0, appliance4=0, app4=0, appliance5=0,
1020 |                                                                      app5=0, flag=4)
1021 |                 final_strong.append(list_vectors_strong)
1022 |                 vector_weak = [0, 0, 0, 0, 0]
1023 |                 vector_weak[i] = 1
1024 |                 vector_weak[rand_app[0]] = 1
1025 |                 print(vector_weak)
1026 |                 final_weak.append(vector_weak)
1027 |                 indexes_annotation(appliances[i], appliances[rand_app[0]], 0, 0, 0, rand_1[0],
1028 |                                            rand_2[0], 0, 0, 0)
1029 | 
1030 |             else:
1031 |                 break
1032 | 
1033 |         sample_count_k = 0
1034 |         sample_count_m = 0
1035 |         sample_count_f = 0
1036 |         sample_count_w = 0
1037 |         sample_count_d = 0
1038 | 
1039 |         print("Aggregate with three appliances")
1040 |         for a in range(125000):
1041 |             if not (
1042 |                     sample_count_k > samples_per_class and sample_count_m > samples_per_class and sample_count_f > samples_per_class and sample_count_w > samples_per_class and sample_count_d > samples_per_class):
1043 | 
1044 |                 i = np.random.randint(len(appliances), size=1)
1045 |                 i = i[0]
1046 | 
1047 |                 rand_app = np.random.randint(len(appliances), size=1)
1048 |                 rand_app2 = 0
1049 |                 mu, sigma = 0, 1
1050 |                 s = np.random.normal(mu, sigma, window_length)
1051 |                 s = pd.Series(s)
1052 |                 s_app = pd.Series(np.zeros(window_length))
1053 | 
1054 | 
1055 | 
1056 |                 if (appliances[i] =='kettle' and sample_count_k >= samples_per_class):
1057 |                     i = np.random.randint(len(appliances), size=1)
1058 |                     i = i[0]
1059 |                 elif (appliances[i] == 'microwave' and sample_count_m >= samples_per_class):
1060 |                     i = np.random.randint(len(appliances), size=1)
1061 |                     i = i[0]
1062 |                 elif (appliances[i] == 'fridge' and sample_count_f >= samples_per_class):
1063 |                     i = np.random.randint(len(appliances) - 3, size=1)
1064 |                     i = i[0]
1065 |                 elif (appliances[i] == 'washing machine' and sample_count_w >= samples_per_class):
1066 |                     i = np.random.randint(len(appliances) - 3, size=1)
1067 |                     i = i[0]
1068 |                 elif (appliances[i] == 'dish washer' and sample_count_d > samples_per_class):
1069 |                     i = np.random.randint(len(appliances) - 3, size=1)
1070 |                     i = i[0]
1071 | 
1072 | 
1073 |                 rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
1074 |                 app1 = activations_[appliances[i]][rand_1[0]]
1075 |                 app1 = app1.reset_index()
1076 |                 app1 = app1.drop(["index"], axis=1)
1077 |                 app1_series = app1['power']['active']
1078 |                 app1_series = app1_series.add(s_app, fill_value=0)
1079 |                 if rand_app[0] == i:
1080 |                         rand_app = np.random.randint(cases, size=1)
1081 |                 if rand_app[0] == i:
1082 |                         rand_app = np.random.randint(cases, size=1)
1083 |                 if i != 0 and rand_app[0] != 0:
1084 |                         rand_app2 = 0
1085 |                 if i != 1 and rand_app[0] != 1:
1086 |                         rand_app2 = 1
1087 |                 if i != 2 and rand_app[0] != 2:
1088 |                         rand_app2 = 2
1089 |                 if i != 3 and rand_app[0] != 3 and i != 4 and rand_app[0] != 4:
1090 |                         rand_app2 = 3
1091 |                 if i != 4 and rand_app[0] != 4 and i!= 3 and rand_app[0] != 3:
1092 |                         rand_app2 = 4
1093 | 
1094 | 
1095 | 
1096 |                 if (appliances[rand_app[0]] == 'kettle' and sample_count_k >= samples_per_class):
1097 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
1098 |                 elif (appliances[rand_app[0]] == 'microwave' and sample_count_m >= samples_per_class):
1099 |                         rand_app = np.random.randint(len(appliances), size=1)
1100 |                 elif (appliances[rand_app[0]] == 'fridge' and sample_count_f >= samples_per_class):
1101 |                         rand_app = np.random.randint(len(appliances) - 1, size=1)
1102 |                 elif (appliances[rand_app[0]] == 'washing machine' and sample_count_w >= samples_per_class):
1103 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
1104 |                 elif (appliances[rand_app[0]] == 'dish washer' and sample_count_d > samples_per_class):
1105 |                         rand_app = np.random.randint(len(appliances) - 2 , size=1)
1106 | 
1107 | 
1108 | 
1109 |                 if (appliances[rand_app2] == 'kettle' and sample_count_k >= samples_per_class):
1110 |                         rand_app2 = 1
1111 |                 elif (appliances[rand_app2] == 'microwave' and sample_count_m >= samples_per_class):
1112 |                         rand_app2 = 0
1113 |                 elif (appliances[rand_app2] == 'fridge' and sample_count_f >= samples_per_class):
1114 |                         rand_app2 = 0
1115 |                 elif (appliances[rand_app2] == 'washing machine' and sample_count_w >= samples_per_class):
1116 |                         rand_app2 = 1
1117 |                 elif (appliances[rand_app2] == 'dish washer' and sample_count_d > samples_per_class):
1118 |                         rand_app2 = 1
1119 | 
1120 | 
1121 |                 rand_2 = np.random.randint(len(activations_[appliances[rand_app[0]]]), size=1)
1122 |                 app2 = activations_[appliances[rand_app[0]]][rand_2[0]]
1123 |                 app2 = app2.reset_index()
1124 |                 app2 = app2.drop(["index"], axis=1)
1125 | 
1126 |                 rand_shift = np.random.randint(number_shift, size=1)
1127 |                 app2_series = padd_shift(rand_shift[0], app2)
1128 |                 app2_series = app2_series.add(s_app, fill_value=0)
1129 | 
1130 |                 rand_3 = np.random.randint(len(activations_[appliances[rand_app2]]), size=1)
1131 |                 app3 = activations_[appliances[rand_app2]][rand_3[0]]
1132 |                 app3 = app3.reset_index()
1133 |                 app3 = app3.drop(["index"], axis=1)
1134 | 
1135 |                 sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(appliances[i], appliances[rand_app[0]], appliances[rand_app2], 0, 0, sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], app2['power']['active'], app3['power']['active'],0,0)
1136 | 
1137 |                 rand_shift = np.random.randint(number_shift, size=1)
1138 |                 app3_series = padd_shift(rand_shift[0], app3)
1139 |                 app3_series = app3_series.add(s_app, fill_value=0)
1140 | 
1141 |                 vector = app2_series.add(app1_series, fill_value=0)
1142 |                 vector = vector.add(app3_series, fill_value=0)
1143 |                 aggregate = vector.add(s, fill_value=0)
1144 |                 aggregate = aggregate.to_numpy()
1145 |                 aggregate[aggregate < 0] = 0
1146 |                 aggregate = pd.Series(aggregate)
1147 |                 aggregate_.append(aggregate)
1148 | 
1149 | 
1150 |                 list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series,
1151 |                                                                  appliance2=appliances[rand_app[0]], app2=app2_series,
1152 |                                                                  appliance3=appliances[rand_app2], app3=app3_series, appliance4=0,
1153 |                                                                  app4=0, appliance5=0, app5=0, flag=4)
1154 |                 final_strong.append(list_vectors_strong)
1155 |                 vector_weak = [0, 0, 0, 0, 0]
1156 |                 vector_weak[i] = 1
1157 |                 vector_weak[rand_app[0]] = 1
1158 |                 vector_weak[rand_app2] = 1
1159 |                 print(vector_weak)
1160 |                 final_weak.append(vector_weak)
1161 | 
1162 |                 indexes_annotation(appliances[i], appliances[rand_app[0]], appliances[rand_app2], 0, 0, rand_1[0], rand_2[0], rand_3[0], 0, 0)
1163 | 
1164 |             else:
1165 |                 break
1166 | 
1167 |         sample_count_k = 0
1168 |         sample_count_m = 0
1169 |         sample_count_f = 0
1170 |         sample_count_w = 0
1171 |         sample_count_d = 0
1172 | 
1173 |         # Aggregate is the sum of 4 appliances
1174 | 
1175 |         for a in range(125000):
1176 |             if not (
1177 |                     sample_count_k > samples_per_class and sample_count_m > samples_per_class and sample_count_f > samples_per_class and sample_count_w > samples_per_class and sample_count_d > samples_per_class):
1178 | 
1179 |                     i = np.random.randint(len(appliances), size=1)
1180 |                     i = i[0]
1181 |                     rand_app = np.random.randint(len(appliances), size=1)
1182 |                     rand_app2 = 0
1183 |                     rand_app3 = 0
1184 |                     mu, sigma = 0, 1
1185 |                     s = np.random.normal(mu, sigma, window_length)
1186 |                     s = pd.Series(s)
1187 |                     s_app = pd.Series(np.zeros(window_length))
1188 | 
1189 |                     print("Appliance 1")
1190 |                     print(appliances[i])
1191 | 
1192 |                     if (appliances[i] == 'kettle' and sample_count_k >= samples_per_class):
1193 |                         i = np.random.randint(len(appliances) - 2 , size=1)
1194 |                         i = i[0]
1195 |                     elif (appliances[i] == 'microwave' and sample_count_m >= samples_per_class):
1196 |                         i = np.random.randint(len(appliances) - 1, size=1)
1197 |                         i = i[0]
1198 |                     elif (appliances[i] == 'fridge' and sample_count_f >= samples_per_class):
1199 |                         i = np.random.randint(len(appliances) - 3, size=1)
1200 |                         i = i[0]
1201 |                     elif (appliances[i] == 'washing machine' and sample_count_w >= samples_per_class):
1202 |                         i = np.random.randint(len(appliances) - 2, size=1)
1203 |                         i = i[0]
1204 |                     elif (appliances[i] == 'dish washer' and sample_count_d > samples_per_class):
1205 |                         i = np.random.randint(len(appliances) - 2, size=1)
1206 |                         i = i[0]
1207 | 
1208 | 
1209 |                     rand_1 = np.random.randint(len(activations_[appliances[i]]), size=1)
1210 |                     app1 = activations_[appliances[i]][rand_1[0]]
1211 |                     app1 = app1.reset_index()
1212 |                     app1 = app1.drop(["index"], axis=1)
1213 |                     app1_series = app1['power']['active']
1214 |                     app1_series = app1_series.add(s_app, fill_value=0)
1215 |                     if rand_app[0] == i:
1216 |                         rand_app = np.random.randint(cases, size=1)
1217 |                     if rand_app[0] == i:
1218 |                         rand_app = np.random.randint(cases, size=1)
1219 |                     if i != 0 and rand_app[0] != 0:
1220 |                         rand_app2 = 0
1221 |                     if i != 1 and rand_app[0] != 1:
1222 |                         rand_app2 = 1
1223 |                     if i != 2 and rand_app[0] != 2:
1224 |                         rand_app2 = 2
1225 |                     if i != 3 and rand_app[0] != 3 and i != 4 and rand_app[0] != 4:
1226 |                         rand_app2 = 3
1227 |                     if i != 4 and rand_app[0] != 4 and i != 3 and rand_app[0] != 3:
1228 |                         rand_app2 = 4
1229 |                     if i != 0 and rand_app[0] != 0 and rand_app2 != 0:
1230 |                         rand_app3 = 0
1231 |                     if i != 1 and rand_app[0] != 1 and rand_app2 != 1:
1232 |                         rand_app3 = 1
1233 |                     if i != 2 and rand_app[0] != 2 and rand_app2 != 2 and i != 4 and rand_app[0] != 4 and rand_app2 != 4:
1234 |                         rand_app3 = 2
1235 |                     if i != 3 and rand_app[0] != 3 and rand_app2 != 3 and i != 4 and rand_app[0] != 4 and rand_app2 != 4:
1236 |                         rand_app3 = 3
1237 |                     if i != 4 and rand_app[0] != 4 and rand_app2 != 4 and i!=3 and rand_app[0] != 3 and rand_app2 !=3:
1238 |                         rand_app3 = 4
1239 | 
1240 | 
1241 | 
1242 |                     if (appliances[rand_app[0]] == 'kettle' and sample_count_k >= samples_per_class):
1243 |                         rand_app = np.random.randint(len(appliances) - 2, size=1)
1244 |                     elif (appliances[rand_app[0]] == 'microwave' and sample_count_m >= samples_per_class):
1245 |                         rand_app = np.random.randint(len(appliances), size=1)
1246 |                     elif (appliances[rand_app[0]] == 'fridge' and sample_count_f >= samples_per_class):
1247 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
1248 |                     elif (appliances[rand_app[0]] == 'washing machine' and sample_count_w >= samples_per_class):
1249 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
1250 |                     elif (appliances[rand_app[0]] == 'dish washer' and sample_count_d > samples_per_class):
1251 |                         rand_app = np.random.randint(len(appliances) - 3, size=1)
1252 | 
1253 |                     if (appliances[rand_app2] == 'kettle' and sample_count_k >= samples_per_class):
1254 |                         rand_app2 = 1
1255 |                     elif (appliances[rand_app2] == 'microwave' and sample_count_m >= samples_per_class):
1256 |                         rand_app2 = 0
1257 |                     elif (appliances[rand_app2] == 'fridge' and sample_count_f >= samples_per_class):
1258 |                         rand_app2 = 1
1259 |                     elif (appliances[rand_app2] == 'washing machine' and sample_count_w >= samples_per_class):
1260 |                         rand_app2 = 0
1261 |                     elif (appliances[rand_app2] == 'dish washer' and sample_count_d > samples_per_class):
1262 |                         rand_app2 = 1
1263 | 
1264 |                     if (appliances[rand_app3] == 'kettle' and sample_count_k >= samples_per_class):
1265 |                         rand_app3 = 0
1266 |                     elif (appliances[rand_app3] == 'microwave' and sample_count_m >= samples_per_class):
1267 |                         rand_app3 = 1
1268 |                     elif (appliances[rand_app3] == 'fridge' and sample_count_f >= samples_per_class):
1269 |                         rand_app3 = 0
1270 |                     elif (appliances[rand_app3] == 'washing machine' and sample_count_w >= samples_per_class):
1271 |                         rand_app3 = 1
1272 |                     elif (appliances[rand_app3] == 'dish washer' and sample_count_d > samples_per_class):
1273 |                         rand_app3 = 0
1274 | 
1275 | 
1276 |                     rand_2 = np.random.randint(len(activations_[appliances[rand_app[0]]]), size=1)
1277 |                     app2 = activations_[appliances[rand_app[0]]][rand_2[0]]
1278 |                     app2 = app2.reset_index()
1279 |                     app2 = app2.drop(["index"], axis=1)
1280 |                     rand_shift = np.random.randint(number_shift, size=1)
1281 |                     app2_series = padd_shift(rand_shift[0], app2)
1282 |                     app2_series = app2_series.add(s_app, fill_value=0)
1283 | 
1284 |                     rand_3 = np.random.randint(len(activations_[appliances[rand_app2]]), size=1)
1285 |                     app3 = activations_[appliances[rand_app2]][rand_3[0]]
1286 |                     app3 = app3.reset_index()
1287 |                     app3 = app3.drop(["index"], axis=1)
1288 |                     rand_shift = np.random.randint(number_shift, size=1)
1289 |                     app3_series = padd_shift(rand_shift[0], app3)
1290 |                     app3_series = app3_series.add(s_app, fill_value=0)
1291 | 
1292 |                     rand_4 = np.random.randint(len(activations_[appliances[rand_app3]]), size=1)
1293 |                     app4 = activations_[appliances[rand_app3]][rand_4[0]]
1294 |                     app4 = app4.reset_index()
1295 |                     app4 = app4.drop(["index"], axis=1)
1296 |                     rand_shift = np.random.randint(number_shift, size=1)
1297 |                     app4_series = padd_shift(rand_shift[0], app4)
1298 |                     app4_series = app4_series.add(s_app, fill_value=0)
1299 | 
1300 |                     vector = app2_series.add(app1['power']['active'], fill_value=0)
1301 |                     vector = vector.add(app3_series, fill_value=0)
1302 |                     vector = vector.add(app4_series, fill_value=0)
1303 |                     aggregate = vector.add(s, fill_value=0)
1304 |                     aggregate = aggregate.to_numpy()
1305 |                     aggregate[aggregate<0] = 0
1306 |                     aggregate = pd.Series(aggregate)
1307 |                     aggregate_.append(aggregate)
1308 | 
1309 |                     sample_count_k, sample_count_m, sample_count_f, sample_count_w, sample_count_d = sample_counter(
1310 |                         appliances[i], appliances[rand_app[0]], appliances[rand_app2], appliances[rand_app3], 0, sample_count_k, sample_count_m,
1311 |                         sample_count_f, sample_count_w, sample_count_d, app1['power']['active'], app2['power']['active'],
1312 |                         app3['power']['active'], app4['power']['active'], 0)
1313 | 
1314 |                     #plt.plot(aggregate)
1315 |                     #plt.show()
1316 | 
1317 |                     list_vectors_strong = strong_labels_creation(appliance1=appliances[i], app1=app1_series,
1318 |                                                                  appliance2=appliances[rand_app[0]], app2=app2_series,
1319 |                                                                  appliance3=appliances[rand_app2], app3=app3_series,
1320 |                                                                  appliance4=appliances[rand_app3], app4=app4_series, appliance5=0,
1321 |                                                                  app5=0, flag=4)
1322 |                     final_strong.append(list_vectors_strong)
1323 |                     vector_weak = [0, 0, 0, 0, 0]
1324 |                     vector_weak[i] = 1
1325 |                     vector_weak[rand_app[0]] = 1
1326 |                     vector_weak[rand_app2] = 1
1327 |                     vector_weak[rand_app3] = 1
1328 |                     print(vector_weak)
1329 |                     final_weak.append(vector_weak)
1330 |                     indexes_annotation(appliances[i], appliances[rand_app[0]], appliances[rand_app2], appliances[rand_app3], 0, rand_1[0],
1331 |                                        rand_2[0], rand_3[0], rand_4[0], 0)
1332 | 
1333 |             else:
1334 |                 break
1335 | 
1336 |     indices_of_activations['kettle'] = list_kettle
1337 |     indices_of_activations['microwave'] = list_micro
1338 |     indices_of_activations['fridge'] = list_fridge
1339 |     indices_of_activations['washing_machine'] = list_wash
1340 |     indices_of_activations['dish_washer'] = list_dish
1341 | 
1342 |     repetitions_k, repetitions_m, repetitions_f, repetitions_w, repetitions_d = repetition_counter(appliances)
1343 |     print("repetitions counted!")
1344 | 
1345 | 
1346 |     with open(destination_path + 'phase_repetition_' + str(building) +'_.txt', 'a+') as file:
1347 |          print("Repetitions Kettle:", file=file)
1348 |          print(repetitions_k, file=file)
1349 |          print("Repetitions Micro:", file=file)
1350 |          print(repetitions_m, file=file)
1351 |          print("Repetitions Fridge:", file=file)
1352 |          print(repetitions_f, file=file)
1353 |          print("Repetitions Wash:", file=file)
1354 |          print(repetitions_w, file=file)
1355 |          print("Repetitions Dish:", file=file)
1356 |          print(repetitions_d, file=file)
1357 | 
1358 |     return aggregate_, final_strong, final_weak
1359 | 
1360 | def aggregate_creation(appliances = None, building= None):
1361 | 
1362 |     global samples_per_class
1363 |     activations_ = activation_appliances_nilmtk(appliances, building)
1364 | 
1365 | 
1366 |     if building == 1:
1367 |         samples_per_class = samples_per_class_1
1368 |     if building == 2:
1369 |         samples_per_class = samples_per_class_2
1370 |     if building == 5:
1371 |         samples_per_class = samples_per_class_5
1372 |     if building == 4:
1373 |         samples_per_class = samples_per_class_4
1374 |     if building == 3:
1375 |         samples_per_class = samples_per_class_3
1376 | 
1377 |     aggregate, final_strong, final_weak = data_iteration(activations_, samples_per_class=samples_per_class, building=arguments.building)
1378 | 
1379 |     return aggregate, final_strong,final_weak
1380 | 
1381 | if __name__ == "__main__":
1382 |         window_length = arguments.window_length
1383 |         building = arguments.building
1384 |         print("Building:", building)
1385 |         aggregate, final_strong,final_weak = aggregate_creation(appliances = appliances,building = arguments.building)
1386 | 
1387 |         if building == 1:
1388 |             dict_ = dict_1
1389 |         if building == 2:
1390 |             dict_ = dict_2
1391 |         if building == 3:
1392 |             dict_ = dict_3
1393 |         if building == 4:
1394 |             dict_ = dict_4
1395 |         if building == 5:
1396 |             dict_ = dict_5
1397 | 
1398 | 
1399 |         for bag in range(len(aggregate)):
1400 |             agg = aggregate[bag].to_numpy()
1401 | 
1402 |             strong = final_strong[bag]
1403 | 
1404 |             # data correction for anomalous activations length
1405 | 
1406 |             if len(agg) > 2550 or len(strong[0]) > 2550 or len(strong[1]) > 2550 or len(strong[2]) > 2550 or len(
1407 |                     strong[3]) > 2550 or len(strong[4]) > 2550:
1408 |                 continue
1409 | 
1410 |             else:
1411 |                 np.save("../aggregate_data/house_" + str(building) + "/aggregate_%d" % bag, agg)
1412 | 
1413 | 
1414 | 
1415 |                 for k in range(len(strong)):
1416 |                     strong[k] = strong[k].tolist()
1417 |                 weak = final_weak[bag]
1418 |                 label = 'labels_%d' % bag
1419 |                 dict_[label] = {'strong': [], 'weak': []}
1420 |                 dict_[label]['strong'] = strong
1421 |                 dict_[label]['weak'] = weak
1422 | 
1423 |         with open('../labels_'+ str(building) +'.json', 'w') as outfile:
1424 |                 json.dump(dict_, outfile)
1425 | 
1426 |         print("Total number of bags:",len(final_strong))
1427 |         del final_weak
1428 |         del final_strong
1429 |         del aggregate
1430 | 
1431 |         gc.collect()
1432 | 
1433 | 
1434 | 
1435 | 


--------------------------------------------------------------------------------