├── utils ├── __init__.py ├── multiprocessing_tools.py ├── logger.py └── chaotic_toolkit.py ├── datasources ├── __init__.py ├── paths_manager.py ├── labels_factory.py └── datasource.py ├── experiments ├── __init__.py ├── run_model_selection_1h_cv5.py ├── run_model_selection_5mins_cv5.py ├── run_model_selection_5mins_cv5_redd.py ├── run_generic_experiment.py ├── run_state_of_the_art.py └── experiments.py ├── nilmlab ├── __init__.py ├── lab_exceptions.py ├── factories.py ├── tstransformers.py ├── lab.py └── exp_model_list.py ├── results └── __init__.py ├── data_exploration ├── __init__.py └── time_delay_embedding │ ├── redd1.pdf │ └── delay_embedding_parameterization-uk_dale.ipynb ├── pretrained_models └── clf-v1.pkl ├── requirements.txt ├── LICENSE ├── .gitignore └── README.md /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasources/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nilmlab/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /results/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_exploration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrained_models/clf-v1.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChristoferNal/multi-nilm/HEAD/pretrained_models/clf-v1.pkl -------------------------------------------------------------------------------- /nilmlab/lab_exceptions.py: -------------------------------------------------------------------------------- 1 | class NoSiteMeterException(Exception): 2 | pass 3 | 4 | 5 | class LabelNormalizationError(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /data_exploration/time_delay_embedding/redd1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChristoferNal/multi-nilm/HEAD/data_exploration/time_delay_embedding/redd1.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy~=1.18.1 2 | scikit-learn~=0.21.3 3 | pandas~=1.0.1 4 | loguru~=0.4.1 5 | nilmtk~=0.4.0 6 | pyts~=0.10.0 7 | tslearn~=0.3.0 8 | scikit-multilearn~=0.2.0 9 | psutil~=5.6.7 10 | matplotlib~=3.2.0 11 | fuzzywuzzy~=0.17.0 12 | numba~=0.48.0 13 | PyWavelets -------------------------------------------------------------------------------- /datasources/paths_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | dirname = os.path.dirname(__file__) 4 | UK_DALE = os.path.join(dirname, '../../Datasets/UKDALE/ukdale.h5') 5 | REDD = os.path.join(dirname, '../../Datasets/REDD/redd.h5') 6 | 7 | SAVED_MODEL = os.path.join(dirname, "../pretrained_models/clf-v1.pkl") 8 | PATH_SIGNAL2VEC = os.path.join(dirname, '../pretrained_models/signal2vec-v1.csv') 9 | -------------------------------------------------------------------------------- /utils/multiprocessing_tools.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.pool import Pool 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def parallelize_dataframe(df, func, num_partitions, num_cores): 8 | df_split = np.array_split(df, num_partitions) 9 | pool = Pool(num_cores) 10 | df = pd.concat(pool.map(func, df_split)) 11 | pool.close() 12 | pool.join() 13 | return df 14 | -------------------------------------------------------------------------------- /experiments/run_model_selection_1h_cv5.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from experiments.experiments import ModelSelectionExperiment 4 | from nilmlab.lab import TimeSeriesLength 5 | 6 | dirname = os.path.dirname(__file__) 7 | single_building_exp_checkpoint = os.path.join(dirname, '../results/cv1h.csv') 8 | 9 | exp = ModelSelectionExperiment(cv=5) 10 | exp.set_ts_len(TimeSeriesLength.WINDOW_1_HOUR) 11 | exp.set_checkpoint_file(single_building_exp_checkpoint) 12 | exp.run() -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | DEBUG: bool = True 4 | TIMING: bool = True 5 | TRACE_MEMORY: bool = True 6 | INFO: bool = True 7 | MB: int = 1024 * 1024 8 | 9 | 10 | def debug(d): 11 | if DEBUG: 12 | print('DEBUG: ' + d) 13 | 14 | 15 | def info(i): 16 | if INFO: 17 | print('INFO: ' + i) 18 | 19 | 20 | def timing(t): 21 | if TIMING: 22 | print('TIMING: ' + t) 23 | 24 | 25 | def debug_mem(message, obj): 26 | if TRACE_MEMORY: 27 | print('MEMORY: {}'.format(message.format(sys.getsizeof(obj) / MB))) 28 | 29 | 30 | def trace_mem(o): 31 | return sys.getsizeof(o) / MB 32 | -------------------------------------------------------------------------------- /experiments/run_model_selection_5mins_cv5.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from experiments.experiments import ModelSelectionExperiment 4 | from nilmlab import exp_model_list 5 | from nilmlab.lab import TimeSeriesLength 6 | 7 | dirname = os.path.dirname(__file__) 8 | single_building_exp_checkpoint = os.path.join(dirname, '../results/cv5mins_ukdale1.csv') 9 | 10 | exp = ModelSelectionExperiment() 11 | exp.set_ts_len(TimeSeriesLength.WINDOW_5_MINS) 12 | exp.set_checkpoint_file(single_building_exp_checkpoint) 13 | 14 | exp.set_transformers(exp_model_list.model_selection_delay_embeddings) 15 | exp.set_classifiers(exp_model_list.model_selection_rakel + exp_model_list.model_selection_mlknn) 16 | 17 | exp.run() 18 | -------------------------------------------------------------------------------- /experiments/run_model_selection_5mins_cv5_redd.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from experiments.experiments import REDDModelSelectionExperiment 4 | 5 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 7 | from nilmlab import exp_model_list 8 | from nilmlab.lab import TimeSeriesLength 9 | 10 | dirname = os.path.dirname(__file__) 11 | single_building_exp_checkpoint = os.path.join(dirname, '../results/cv5mins_redd3.csv') 12 | 13 | exp = REDDModelSelectionExperiment(building=3) 14 | exp.set_ts_len(TimeSeriesLength.WINDOW_5_MINS) 15 | exp.set_checkpoint_file(single_building_exp_checkpoint) 16 | exp.set_transformers(exp_model_list.cv_signal2vec) 17 | exp.set_classifiers(exp_model_list.cv_signal2vec_clf) 18 | exp.run() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Christoforos Nalmpantis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # PyCharm 92 | .idea/ 93 | 94 | # For mac 95 | .DS_Store 96 | .~lock* 97 | results/* 98 | !results/__init__.py 99 | -------------------------------------------------------------------------------- /experiments/run_generic_experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | 4 | from datasources.datasource import DatasourceFactory 5 | from experiments.experiments import GenericExperiment 6 | from nilmlab import exp_model_list 7 | from nilmlab.factories import EnvironmentFactory 8 | from nilmlab.lab import TimeSeriesLength 9 | from nilmlab.exp_model_list import CLF_MODELS, TRANSFORMER_MODELS, SAX 10 | from utils.logger import debug 11 | 12 | dirname = os.path.dirname(__file__) 13 | dirname = os.path.join(dirname, "../results") 14 | if not os.path.exists(dirname): 15 | os.mkdir(dirname) 16 | same_datasource_exp_checkpoint = os.path.join(dirname, 'results_from_generic_exp.csv') 17 | 18 | appliances = ['microwave', 'dish washer', 'fridge', 'kettle', 'washer dryer', 19 | 'toaster', 'television', 'hair dryer', 'vacuum cleaner'] 20 | env = EnvironmentFactory.create_env_single_building(datasource=DatasourceFactory.create_uk_dale_datasource(), 21 | appliances=appliances) 22 | 23 | experiment = GenericExperiment(env) 24 | 25 | window = TimeSeriesLength.WINDOW_10_MINS 26 | models = {} 27 | if window == TimeSeriesLength.WINDOW_10_MINS: 28 | models = exp_model_list.selected_models_10mins 29 | elif window == TimeSeriesLength.WINDOW_1_HOUR: 30 | models = exp_model_list.selected_models_1h 31 | elif window == TimeSeriesLength.WINDOW_2_HOURS: 32 | models = exp_model_list.selected_models_2h 33 | elif window == TimeSeriesLength.WINDOW_8_HOURS: 34 | models = exp_model_list.selected_models_8h 35 | elif window == TimeSeriesLength.WINDOW_4_HOURS: 36 | models = exp_model_list.selected_models_4h 37 | elif window == TimeSeriesLength.WINDOW_1_DAY: 38 | models = exp_model_list.selected_models_24h 39 | 40 | for k in models.keys(): 41 | if k is not SAX: 42 | continue 43 | experiment.setup_running_params( 44 | transformer_models=models[k][TRANSFORMER_MODELS], 45 | classifier_models=models[k][CLF_MODELS], 46 | train_appliances=appliances, 47 | test_appliances=appliances, 48 | ts_len=window, 49 | repeat=1) 50 | experiment.set_checkpoint_file(same_datasource_exp_checkpoint) 51 | tb = "No error" 52 | try: 53 | experiment.run() 54 | except Exception as e: 55 | tb = traceback.format_exc() 56 | debug(tb) 57 | debug(f"Failed for {k}") 58 | debug(f"{e}") 59 | -------------------------------------------------------------------------------- /datasources/labels_factory.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Dict, Tuple 3 | 4 | import loguru 5 | from nilmtk import MeterGroup 6 | from numba import njit 7 | import numpy as np 8 | from pandas import DataFrame 9 | 10 | from datasources.datasource import SITE_METER 11 | from utils.logger import TIMING, debug, timing 12 | 13 | 14 | def create_multilabels_from_meters(meters: DataFrame, meter_group: MeterGroup, labels2id: dict) -> DataFrame: 15 | """ 16 | Creates multi labels from the given meter group using a dictionary as a lookup table. 17 | Args: 18 | meters (DataFrame): 19 | meter_group (MeterGroup): 20 | labels2id (dict): 21 | 22 | Returns: 23 | A DataFrame with the multi labels. 24 | """ 25 | start_time = time.time() if TIMING else None 26 | labels = dict() 27 | for col in meters.columns: 28 | loguru.logger.info(f"Creating multilabels from meter {col}, " 29 | f"\nlabels2id[col] {labels2id[col]}" 30 | f"\nmetergroup[labels2id[col]] {meter_group[labels2id[col]]}") 31 | meter = meter_group[labels2id[col]] 32 | threshold = meter.on_power_threshold() 33 | vals = meters[col].values.astype(float) 34 | if vals is None or col == SITE_METER: 35 | loguru.logger.debug(f"Skipping {col} - {vals}") 36 | continue 37 | loguru.logger.debug(f"meters[col].values.astype(float) {col} - {vals}") 38 | labels[col] = create_labels(vals, threshold) 39 | timing('Create multilabels from meters {}'.format(round(time.time() - start_time, 2))) 40 | return DataFrame(labels) 41 | 42 | 43 | def create_multilabels_from_many_buildings(data_per_building: Dict[int, Tuple[DataFrame, MeterGroup, Dict]]) \ 44 | -> Dict[int, DataFrame]: 45 | """ 46 | Creates multi labels given more than one buildings. 47 | Args: 48 | data_per_building (Dict[int, Tuple[DataFrame, MeterGroup, Dict]]): A dictionary with keys the numbers of the buildings and values tuples containing 49 | the necessary data to create labels by calling create_multilabels_from_meters() 50 | 51 | Returns: 52 | A dictionary with the labels for each building. 53 | """ 54 | labels_per_building = dict() 55 | for building in data_per_building.keys(): 56 | df, metergroup, label2id = data_per_building[building] 57 | labels_df = create_multilabels_from_meters(df, metergroup, label2id) 58 | labels_per_building[building] = labels_df 59 | return labels_per_building 60 | 61 | 62 | def create_multilabels(appliances: dict, meter_group: MeterGroup) -> dict: 63 | """ 64 | Creates labels from the given meter group for the given appliances. 65 | Args: 66 | appliances (dict): dict with keys ['oven', 'microwave', 'dish washer', 'fridge freezer', 'kettle', 'washer dryer', 67 | 'toaster', 'boiler', 'television', 'hair dryer', 'vacuum cleaner', 'light'] 68 | 69 | meter_group (MeterGroup): A MeterGroup object. 70 | 71 | Returns: 72 | A dictionary with labels per meter. 73 | """ 74 | start_time = time.time() if TIMING else None 75 | labels = dict() 76 | 77 | for key in appliances.keys(): 78 | meter = meter_group.submeters()[key] 79 | threshold = meter.on_power_threshold() 80 | labels[meter.label() + str(meter.instance())] = create_labels(appliances[key], threshold) 81 | debug('{} threshold = {}'.format(meter.label(), threshold)) 82 | 83 | timing('Create multilabels {}'.format(round(time.time() - start_time, 2))) 84 | return labels 85 | 86 | 87 | @DeprecationWarning 88 | def apply_create_labels_on_df(dataframe, threshold): 89 | """This method is 300 times slower than create_labels which uses numba""" 90 | return dataframe.apply(lambda x: 1 if x >= threshold else 0) 91 | 92 | 93 | @njit(parallel=True) 94 | def create_labels(array, threshold): 95 | res = np.empty(array.shape) 96 | for i in range(len(array)): 97 | if array[i] >= threshold: 98 | res[i] = 1 99 | else: 100 | res[i] = 0 101 | return list(res) 102 | -------------------------------------------------------------------------------- /utils/chaotic_toolkit.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from sklearn.neighbors import NearestNeighbors 4 | 5 | from utils.logger import info 6 | 7 | """ 8 | Code reference https://www.kaggle.com/tigurius/introduction-to-taken-s-embedding 9 | """ 10 | 11 | def takens_embedding(series: np.ndarray, delay, dimension) -> np.ndarray: 12 | """ 13 | This function returns the Takens embedding of data with delay into dimension, 14 | delay*dimension must be < len(data) 15 | """ 16 | if delay * dimension > len(series): 17 | info(f'Not enough data for the given delay ({delay}) and dimension ({dimension}).' 18 | f'\ndelay * dimension > len(data): {delay * dimension} > {len(series)}') 19 | return series 20 | delay_embedding = np.array([series[0:len(series) - delay * dimension]]) 21 | for i in range(1, dimension): 22 | delay_embedding = np.append(delay_embedding, 23 | [series[i * delay:len(series) - delay * (dimension - i)]], axis=0) 24 | return delay_embedding 25 | 26 | 27 | def compute_mutual_information(series, delay, num_bins): 28 | """This function calculates the mutual information given the delay. 29 | First one calculates the minimium $x_{min}$ and maximum $x_{max}$ of the time-series. Then the interval $[x_{min}, 30 | x_{max}]$ is divided into a large number of bins. Denote by $P_k$ the probability that an element of the time-series 31 | is in the $k$th bin and by $P_{h,k}(\tau)$ the probability that $x_i$ is in the $h$th bin while $x_{i+\tau}$ is in 32 | the $k$th bin. Then the mutual information is 33 | $$ I(\tau) = - \sum_{h=1}^{nBins} \sum_{k=1}^{nBins} P_{h,k}(\tau) \log \frac{P_{h,k}(\tau)}{P_h P_k}.$$ 34 | The first minimum of $I(\tau)$ as a function of $\tau$ gives the optimal delay, since there we get largest 35 | information by adding $x_{i+\tau}$. All probabilities here are calculated as empirical probabilities. """ 36 | mutual_information = 0 37 | max_val = max(series) 38 | min_val = min(series) 39 | delayed_series = series[delay:len(series)] 40 | shortened_series = series[0:len(series) - delay] 41 | bin_size = abs(max_val - min_val) / num_bins 42 | prob_in_bin_dict = {} 43 | condition_to_be_in_bin = {} 44 | condition_delay_to_be_in_bin = {} 45 | 46 | for i in range(0, num_bins): 47 | memoize_prob(i, bin_size, condition_to_be_in_bin, min_val, prob_in_bin_dict, shortened_series) 48 | 49 | for j in range(0, num_bins): 50 | memoize_prob(j, bin_size, condition_to_be_in_bin, min_val, prob_in_bin_dict, shortened_series) 51 | 52 | if j not in condition_delay_to_be_in_bin: 53 | cond = compute_condition(j, bin_size, min_val, delayed_series) 54 | condition_delay_to_be_in_bin.update({j: cond}) 55 | 56 | p_ij = calculate_joint_prob(condition_delay_to_be_in_bin, condition_to_be_in_bin, i, j, shortened_series) 57 | if p_ij != 0 and prob_in_bin_dict[i] != 0 and prob_in_bin_dict[j] != 0: 58 | mutual_information -= p_ij * math.log(p_ij / (prob_in_bin_dict[i] * prob_in_bin_dict[j])) 59 | 60 | return mutual_information 61 | 62 | 63 | def calculate_joint_prob(condition_delay_to_be_in_bin, condition_to_be_in_bin, i, j, shortened_series): 64 | return len(shortened_series[condition_to_be_in_bin[i] & condition_delay_to_be_in_bin[j]]) / len(shortened_series) 65 | 66 | 67 | def memoize_prob(bin_index, bin_size, condition_to_be_in_bin, min_val, prob_in_bin_dict, shortened_series): 68 | if bin_index not in prob_in_bin_dict: 69 | compute_and_update_probability(bin_index, bin_size, condition_to_be_in_bin, 70 | min_val, prob_in_bin_dict, shortened_series) 71 | 72 | 73 | def compute_and_update_probability(bin_index, bin_size, condition_to_be_in_bin, min_val, prob_in_bin_dict, 74 | shortened_series): 75 | cond = compute_condition(bin_index, bin_size, min_val, shortened_series) 76 | condition_to_be_in_bin.update({bin_index: cond}) 77 | num_of_vals_in_bin = calculate_num_of_elements_in_bin(bin_index, condition_to_be_in_bin, shortened_series) 78 | prob_in_bin_dict.update({bin_index: num_of_vals_in_bin / len(shortened_series)}) 79 | 80 | 81 | def calculate_num_of_elements_in_bin(bin_index, condition_to_be_in_bin, shortened_series): 82 | return len(shortened_series[condition_to_be_in_bin[bin_index]]) 83 | 84 | 85 | def compute_condition(bin_index, bin_size, min_val, series): 86 | return (series >= (min_val + bin_index * bin_size)) & (series < (min_val + (bin_index + 1) * bin_size)) 87 | 88 | 89 | def calculate_false_nearest_neighours(data, delay, dimension) -> int: 90 | "Calculates the number of false nearest neighbours of embedding dimension" 91 | embedded_data = takens_embedding(data, delay, dimension) 92 | # the first nearest neighbour is the data point itself, so we choose the second one 93 | nearest_neighbors = NearestNeighbors(n_neighbors=2, algorithm='auto', n_jobs=-1).fit(embedded_data.transpose()) 94 | distances, indices = nearest_neighbors.kneighbors(embedded_data.transpose()) 95 | # two data points are nearest neighbours if their distance is smaller than the standard deviation 96 | epsilon = np.std(distances.flatten()) 97 | false_neighbors = 0 98 | for i in range(0, len(data) - delay * (dimension + 1)): 99 | if (0 < distances[i, 1]) and (distances[i, 1] < epsilon) and ((abs( 100 | data[i + dimension * delay] - data[indices[i, 1] + dimension * delay]) / distances[ 101 | i, 1]) > 10): 102 | false_neighbors += 1 103 | return false_neighbors 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MULTI-NILM: A novel framework for multi-label Non-Intrusive Load Monitoring 2 | 3 | ## Description 4 | This repository is based on our paper with title: 5 | ["On time series representations for multi-label NILM"](https://rdcu.be/b3Vh2) [1] 6 | and it can be used to replicate the experiments. It defines a framework for multi-label NILM systems and includes the following time series 7 | representations: **Signal2Vec, BOSS, SFA, WEASEL, DFT, SAX, 1d-SAX, PAA**; 8 | and an implementation of **delay embedding** using Taken's theorem. Feel free to reuse, modify and extend this repository. 9 | 10 | ## Multi-NILM framework 11 | Multi-nilm is a novel framework for efficient non-intrusive load monitoring systems. 12 | It has three inherent properties: 13 | - It utilizes a data representation for sufficient dimensionality reduction. 14 | - It uses lightweight disaggregation models. 15 | - It tackles the disaggregation problem as a multi-label classification problem. 16 | 17 | ## Examples 18 | Examples of experiments can be found under the directory _experiments_. 19 | The module [experiments.py](experiments/experiments.py) defines three types of experiments (_GenericExperiment, ModelSelectionExperiment_ 20 | and _REDDModelSelectionExperiment_). You can also create your own 21 | experiment by extending the abstract class _nilmlab.lab.Experiment_. 22 | 23 | After defining an experiment it requires only a few lines of code to setup and configure it. 24 | All files with names _run*.py_ are specific implementations that can be used as a reference. 25 | In order to run any of them it is as simple as: 26 | ```python 27 | python -m experiments.run_generic_experiment 28 | ``` 29 | The results are saved under the directory _results_ as a csv file containing information about the 30 | setup, the source of the data, the parameters, the classification models, the performance and others. 31 | 32 | ## Data 33 | 34 | Currently only **REDD** and **UK DALE** are supported, which have to be downloaded manually. 35 | The popular **NILMTK** toolkit is used for reading the energy data. 36 | 37 | ## Project structure 38 | A detailed structure of the project is presented below. The key points are: 39 | - 📂 __data\_exploration__: Contains helpful notebooks e.g. how to define delay embedding parameters. 40 | - 📂 __datasources__: Includes modules related to data e.g. loading using nilmtk, processing labels and others. 41 | - 📂 __experiments__: Defines some experiments such as model selection and has examples on how to run the 42 | defined experiments. 43 | - 📂 __nilmlab__: This is the main code which encapsulates all the logic of the proposed framework 44 | and implements various time series representations. 45 | - 📂 __pretrained\_models__: Any pretrained models that are used for Signal2Vec [1,2]. 46 | - 📂 __results__: Results of the experiments will be saved in this directory. 47 | - 📂 __utils__: Various tools that have been developed to support the implementation of the various algorithms. 48 | 49 | 50 | - 📂 __multi\-nilm__ 51 | - 📄 [LICENSE](LICENSE) 52 | - 📄 [README.md](README.md) 53 | - 📄 [createtree.sh](createtree.sh) 54 | - 📂 __data\_exploration__: 55 | - 📄 [\_\_init\_\_.py](data_exploration/__init__.py) 56 | - 📂 __time\_delay\_embedding__ 57 | - 📄 [delay\_embedding\_parameterization\-redd.ipynb](data_exploration/time_delay_embedding/delay_embedding_parameterization-redd.ipynb) 58 | - 📄 [delay\_embedding\_parameterization\-uk\_dale.ipynb](data_exploration/time_delay_embedding/delay_embedding_parameterization-uk_dale.ipynb) 59 | - 📂 __datasources__ 60 | - 📄 [\_\_init\_\_.py](datasources/__init__.py) 61 | - 📄 [datasource.py](datasources/datasource.py) 62 | - 📄 [labels\_factory.py](datasources/labels_factory.py) 63 | - 📄 [paths\_manager.py](datasources/paths_manager.py) 64 | - 📂 __experiments__ 65 | - 📄 [\_\_init\_\_.py](experiments/__init__.py) 66 | - 📄 [experiments.py](experiments/experiments.py) 67 | - 📄 [run\_generic\_experiment.py](experiments/run_generic_experiment.py) 68 | - 📄 [run\_model\_selection\_1h\_cv5.py](experiments/run_model_selection_1h_cv5.py) 69 | - 📄 [run\_model\_selection\_5mins\_cv5.py](experiments/run_model_selection_5mins_cv5.py) 70 | - 📄 [run\_model\_selection\_5mins\_cv5\_redd.py](experiments/run_model_selection_5mins_cv5_redd.py) 71 | - 📄 [run\_state\_of\_the\_art.py](experiments/run_state_of_the_art.py) 72 | - 📂 __nilmlab__ 73 | - 📄 [\_\_init\_\_.py](nilmlab/__init__.py) 74 | - 📄 [exp\_model\_list.py](nilmlab/exp_model_list.py) 75 | - 📄 [factories.py](nilmlab/factories.py) 76 | - 📄 [lab.py](nilmlab/lab.py) 77 | - 📄 [lab\_exceptions.py](nilmlab/lab_exceptions.py) 78 | - 📄 [tstransformers.py](nilmlab/tstransformers.py) 79 | - 📂 __pretrained\_models__ 80 | - 📄 [clf\-v1.pkl](pretrained_models/clf-v1.pkl) 81 | - 📄 [signal2vec\-v1.csv](pretrained_models/signal2vec-v1.csv) 82 | - 📄 [requirements.txt](requirements.txt) 83 | - 📂 __results__ 84 | - 📄 [\_\_init\_\_.py](results/__init__.py) 85 | - 📄 [tree.md](tree.md) 86 | - 📂 __utils__ 87 | - 📄 [\_\_init\_\_.py](utils/__init__.py) 88 | - 📄 [chaotic\_toolkit.py](utils/chaotic_toolkit.py) 89 | - 📄 [logger.py](utils/logger.py) 90 | - 📄 [multiprocessing\_tools.py](utils/multiprocessing_tools.py) 91 | 92 | ## Dependencies 93 | 94 | The code has been developed using python3.6 and the dependencies can be found in [requirements.txt](requirements.txt). 95 | - numpy~=1.18.1 96 | - scikit-learn~=0.21.3 97 | - pandas~=1.0.1 98 | - loguru~=0.4.1 99 | - nilmtk~=0.4.0 100 | - pyts~=0.10.0 https://github.com/johannfaouzi/pyts 101 | - tslearn~=0.3.0 https://github.com/tslearn-team/tslearn 102 | - scikit-multilearn~=0.2.0 http://scikit.ml/ 103 | - psutil~=5.6.7 104 | - matplotlib~=3.2.0 105 | - fuzzywuzzy~=0.17.0 106 | - numba~=0.48.0 107 | - PyWavelets https://pywavelets.readthedocs.io/en/latest/install.html 108 | 109 | ## Licence 110 | 111 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details 112 | 113 | 114 | ## References 115 | 1. Nalmpantis, C., Vrakas, D. On time series representations for multi-label NILM. Neural Comput & Applic (2020). https://doi.org/10.1007/s00521-020-04916-5 116 | 2. Nalmpantis, C., & Vrakas, D. (2019, May). Signal2Vec: Time Series Embedding Representation. In International Conference on Engineering Applications of Neural Networks (pp. 80-90). Springer, Cham. https://doi.org/10.1007/978-3-030-20257-6_7 117 | -------------------------------------------------------------------------------- /experiments/run_state_of_the_art.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Model selection environment parameters 4 | - UKDALE Building 1 1/1/2014 - 30/6/2014 5 | - REDD Building 1,3 1/4/2011 - 30/5/2011 6 | Train-test environment parameters 7 | - UKDALE Building 1 8 | Train: 1/3/2013 - 30/6/2014 9 | Test : 1/7/2014 - 31/12/2014 10 | - REDD Building 1 11 | Train: 18/4/2011 - 17/5/2011 12 | Test : 18/5/2011 - 25/5/2011 13 | - REDD Building 3 14 | Train: 16/4/2011 - 30/4/2011 15 | Test : 17/5/2011 - 30/5/2011 16 | 17 | redd 3 18 | 2011-04-16 01:11:24-04:00 - 2011-05-30 20:19:54-04:00 19 | 20 | redd 1 21 | 2011-04-18 09:22:06-04:00 - 2011-05-24 15:57:00-04:00 22 | """ 23 | 24 | import os 25 | import traceback 26 | 27 | from datasources.datasource import DatasourceFactory 28 | from experiments.experiments import GenericExperiment 29 | from nilmlab import exp_model_list 30 | from nilmlab.factories import EnvironmentFactory 31 | from nilmlab.lab import TimeSeriesLength 32 | from nilmlab.exp_model_list import CLF_MODELS, TRANSFORMER_MODELS, BOSS, SIGNAL2VEC, TIME_DELAY_EMBEDDING 33 | from utils.logger import debug 34 | 35 | dirname = os.path.dirname(__file__) 36 | 37 | STATE_OF_THE_ART = os.path.join(dirname, '../results/state_of_the_art_performance.csv') 38 | APPLIANCES_UK_DALE_BUILDING_1 = ['oven', 'microwave', 'dish washer', 'fridge freezer', 39 | 'kettle', 'washer dryer', 'toaster', 'boiler', 'television', 40 | 'hair dryer', 'vacuum cleaner', 'light'] 41 | APPLIANCES_REDD_BUILDING_1 = ['electric oven', 'fridge', 'microwave', 'washer dryer', 'unknown', 'sockets', 'light'] 42 | APPLIANCES_REDD_BUILDING_3 = ['electric furnace', 'CE appliance', 'microwave', 'washer dryer', 'unknown', 'sockets'] 43 | 44 | ukdale_train_year_start = '2013' 45 | ukdale_train_year_end = '2014' 46 | ukdale_train_month_end = '5' 47 | ukdale_train_month_start = '3' 48 | ukdale_train_end_date = "{}-30-{}".format(ukdale_train_month_end, ukdale_train_year_end) 49 | ukdale_train_start_date = "{}-1-{}".format(ukdale_train_month_start, ukdale_train_year_start) 50 | 51 | ukdale_test_year_start = '2014' 52 | ukdale_test_year_end = '2014' 53 | ukdale_test_month_end = '12' 54 | ukdale_test_month_start = '6' 55 | ukdale_test_end_date = "{}-30-{}".format(ukdale_test_month_end, ukdale_test_year_end) 56 | ukdale_test_start_date = "{}-1-{}".format(ukdale_test_month_start, ukdale_test_year_start) 57 | 58 | redd1_train_year_start = '2011' 59 | redd1_train_year_end = '2011' 60 | redd1_train_month_end = '5' 61 | redd1_train_month_start = '4' 62 | redd1_train_end_date = "{}-17-{}".format(redd1_train_month_end, redd1_train_year_end) 63 | redd1_train_start_date = "{}-18-{}".format(redd1_train_month_start, redd1_train_year_start) 64 | 65 | redd1_test_year_start = '2011' 66 | redd1_test_year_end = '2011' 67 | redd1_test_month_end = '5' 68 | redd1_test_month_start = '5' 69 | redd1_test_end_date = "{}-25-{}".format(redd1_test_month_end, redd1_test_year_end) 70 | redd1_test_start_date = "{}-18-{}".format(redd1_test_month_start, redd1_test_year_start) 71 | 72 | redd3_train_year_start = '2011' 73 | redd3_train_year_end = '2011' 74 | redd3_train_month_end = '4' 75 | redd3_train_month_start = '4' 76 | redd3_train_end_date = "{}-30-{}".format(redd3_train_month_end, redd3_train_year_end) 77 | redd3_train_start_date = "{}-16-{}".format(redd3_train_month_start, redd3_train_year_start) 78 | 79 | redd3_test_year_start = '2011' 80 | redd3_test_year_end = '2011' 81 | redd3_test_month_end = '5' 82 | redd3_test_month_start = '5' 83 | redd3_test_end_date = "{}-30-{}".format(redd1_test_month_end, redd1_test_year_end) 84 | redd3_test_start_date = "{}-17-{}".format(redd1_test_month_start, redd1_test_year_start) 85 | 86 | env_ukdale_building_1 = EnvironmentFactory.create_env_single_building( 87 | datasource=DatasourceFactory.create_uk_dale_datasource(), 88 | building=1, 89 | sample_period=6, 90 | train_year=ukdale_train_year_start + "-" + ukdale_train_year_end, 91 | train_start_date=ukdale_train_start_date, 92 | train_end_date=ukdale_train_end_date, 93 | test_year=ukdale_test_year_start + "-" + ukdale_test_year_end, 94 | test_start_date=ukdale_test_start_date, 95 | test_end_date=ukdale_test_end_date, 96 | appliances=APPLIANCES_UK_DALE_BUILDING_1) 97 | 98 | ukdale_building1_experiment = GenericExperiment(env_ukdale_building_1) 99 | 100 | env_redd_building_1 = EnvironmentFactory.create_env_single_building( 101 | datasource=DatasourceFactory.create_redd_datasource(), 102 | building=1, 103 | sample_period=6, 104 | train_year=redd1_train_year_start + "-" + redd1_train_year_end, 105 | train_start_date=redd1_train_start_date, 106 | train_end_date=redd1_train_end_date, 107 | test_year=redd1_test_year_start + "-" + redd1_test_year_end, 108 | test_start_date=redd1_test_start_date, 109 | test_end_date=redd1_test_end_date, 110 | appliances=APPLIANCES_REDD_BUILDING_1) 111 | 112 | redd_building1_experiment = GenericExperiment(env_redd_building_1) 113 | 114 | ukdale_building3_experiment = GenericExperiment(env_ukdale_building_1) 115 | 116 | env_redd_building_3 = EnvironmentFactory.create_env_single_building( 117 | datasource=DatasourceFactory.create_redd_datasource(), 118 | building=3, 119 | sample_period=6, 120 | train_year=redd3_train_year_start + "-" + redd3_train_year_end, 121 | train_start_date=redd3_train_start_date, 122 | train_end_date=redd3_train_end_date, 123 | test_year=redd3_test_year_start + "-" + redd3_test_year_end, 124 | test_start_date=redd3_test_start_date, 125 | test_end_date=redd3_test_end_date, 126 | appliances=APPLIANCES_REDD_BUILDING_3) 127 | 128 | redd_building3_experiment = GenericExperiment(env_redd_building_3) 129 | 130 | 131 | def run_experiments(experiment, appliances, window): 132 | models = exp_model_list.state_of_the_art 133 | for k in models.keys(): 134 | experiment.setup_running_params( 135 | transformer_models=models[k][TRANSFORMER_MODELS], 136 | classifier_models=models[k][CLF_MODELS], 137 | train_appliances=appliances, 138 | test_appliances=appliances, 139 | ts_len=window, 140 | repeat=1) 141 | experiment.set_checkpoint_file(STATE_OF_THE_ART) 142 | tb = "No error" 143 | try: 144 | experiment.run() 145 | except Exception as e: 146 | tb = traceback.format_exc() 147 | debug(tb) 148 | debug(f"Failed for {k}") 149 | debug(f"{e}") 150 | 151 | 152 | run_experiments(ukdale_building1_experiment, APPLIANCES_UK_DALE_BUILDING_1, TimeSeriesLength.WINDOW_5_MINS) 153 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_10_MINS) 154 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_30_MINS) 155 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_1_HOUR) 156 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_2_HOURS) 157 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_4_HOURS) 158 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_8_HOURS) 159 | run_experiments(redd_building1_experiment, APPLIANCES_REDD_BUILDING_1, TimeSeriesLength.WINDOW_1_DAY) 160 | -------------------------------------------------------------------------------- /experiments/experiments.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Tuple, List, Any 3 | 4 | from datasources.datasource import DatasourceFactory 5 | from nilmlab import exp_model_list 6 | from nilmlab.lab import Environment, Experiment, TimeSeriesLength 7 | from utils.logger import timing 8 | 9 | reset_results = False 10 | 11 | 12 | class ModelSelectionExperiment(Experiment): 13 | appliances = ['oven', 'microwave', 'dish washer', 'fridge freezer', 'kettle', 'washer dryer', 14 | 'toaster', 'boiler', 'television', 'hair dryer', 'vacuum cleaner', 'light'] 15 | results_file: str 16 | ts_len: TimeSeriesLength 17 | 18 | def __init__(self, cv=3): 19 | super().__init__() 20 | self.transformers = exp_model_list.model_selection_transformers 21 | self.classifiers = exp_model_list.model_selection_clf_list 22 | self.cv = cv 23 | 24 | def setup_environment(self): 25 | train_year = '2014' 26 | train_month_end = '8' 27 | train_month_start = '1' 28 | train_end_date = "{}-30-{}".format(train_month_end, train_year) 29 | train_start_date = "{}-1-{}".format(train_month_start, train_year) 30 | train_sample_period = 6 31 | train_building = 1 32 | train_datasource = DatasourceFactory.create_uk_dale_datasource() 33 | 34 | test_year = '2014' 35 | test_month_end = '9' 36 | test_month_start = '7' 37 | test_end_date = "{}-30-{}".format(test_month_end, test_year) 38 | test_start_date = "{}-1-{}".format(test_month_start, test_year) 39 | test_sample_period = 6 40 | test_building = 1 41 | test_datasource = DatasourceFactory.create_uk_dale_datasource() 42 | env = Environment(train_datasource, train_building, train_year, train_start_date, train_end_date, 43 | train_sample_period, self.appliances) 44 | self.populate_environment(env) 45 | self.populate_train_parameters(env) 46 | 47 | def run(self): 48 | self.setup_environment() 49 | self.env.set_ts_len(self.ts_len) 50 | 51 | for transformer in self.transformers: 52 | for clf in self.classifiers: 53 | self.env.place_multilabel_classifier(clf) 54 | self.env.place_ts_transformer(transformer) 55 | macro_scores, micro_scores = self.env.cross_validate(self.appliances, cv=self.cv, raw_data=False) 56 | 57 | description = self.create_description(type(clf).__name__, 58 | str(clf), 59 | transformer.get_name(), 60 | str(self.env.get_type_of_transformer()), 61 | str(transformer), 62 | str(self.cv), 63 | macro_scores.mean(), 64 | macro_scores.std(), 65 | micro_scores.mean(), 66 | micro_scores.std(), 67 | str(len(self.appliances)), 68 | str(self.appliances)) 69 | 70 | self.save_experiment(description, reset_results, self.results_file) 71 | 72 | def set_checkpoint_file(self, results_file: str = '../results/cross_val_window_4_hours.csv'): 73 | self.results_file = results_file 74 | 75 | def set_ts_len(self, ts_len: TimeSeriesLength = TimeSeriesLength.WINDOW_4_HOURS): 76 | self.ts_len = ts_len 77 | 78 | 79 | class GenericExperiment(Experiment): 80 | results_file: str 81 | ts_len: TimeSeriesLength 82 | 83 | def __init__(self, environment): 84 | super().__init__() 85 | self.env = environment 86 | self.transformers = None 87 | self.classifiers = None 88 | self.train_appliances = [] 89 | self.test_appliances = [] 90 | self.repeat = 1 91 | 92 | def setup_environment(self): 93 | self.env.set_ts_len(self.ts_len) 94 | self.populate_environment(self.env) 95 | self.populate_train_parameters(self.env) 96 | 97 | def setup_running_params(self, 98 | transformer_models: List[Tuple[Any, str]], 99 | classifier_models: List[Tuple[Any, str]], 100 | train_appliances, 101 | test_appliances=None, 102 | ts_len: TimeSeriesLength = TimeSeriesLength.WINDOW_4_HOURS, 103 | repeat: int = 1): 104 | self.set_transfomers_and_classifiers(transformer_models, classifier_models) 105 | self.set_ts_len(ts_len) 106 | self.repeat = repeat 107 | self.train_appliances = train_appliances 108 | if test_appliances: 109 | self.test_appliances = test_appliances 110 | else: 111 | self.test_appliances = train_appliances 112 | 113 | def run(self): 114 | self.setup_environment() 115 | if len(self.transformers) != len(self.classifiers): 116 | raise Exception("List of transformers doesn't have the same length with list of classifiers. " 117 | "It should be a 1-1 map") 118 | 119 | for model_index in range(len(self.transformers)): 120 | transformer = self.transformers[model_index] 121 | transformer_descr = str(transformer) 122 | clf = self.classifiers[model_index] 123 | clf_descr = str(clf) 124 | for i in range(self.repeat): 125 | self.env.place_multilabel_classifier(clf) 126 | self.env.place_ts_transformer(transformer) 127 | start_time = time.time() 128 | preprocess_train_time, fit_time = self.env.train(self.train_appliances) 129 | training_time = time.time() - start_time 130 | timing(f"training time {training_time}") 131 | start_time = time.time() 132 | macro, micro, report, preprocess_time, prediction_time = self.env.test(self.test_appliances) 133 | testing_time = time.time() - start_time 134 | timing(f"testing time {testing_time}") 135 | 136 | description = self.create_description(type(clf).__name__, 137 | clf_descr, 138 | transformer.get_name(), 139 | str(self.env.get_type_of_transformer()), 140 | transformer_descr, 141 | "train/test", 142 | macro, 143 | None, 144 | micro, 145 | None, 146 | str(len(self.train_appliances)), 147 | str(self.train_appliances), 148 | str(report), 149 | str(training_time), 150 | str(testing_time), 151 | str(preprocess_time), 152 | str(prediction_time), 153 | str(preprocess_train_time), 154 | str(fit_time) 155 | ) 156 | 157 | self.save_experiment(description, reset_results, self.results_file) 158 | 159 | def set_checkpoint_file(self, results_file: str = '../results/cross_val_window_4_hours.csv'): 160 | self.results_file = results_file 161 | 162 | def set_ts_len(self, ts_len: TimeSeriesLength = TimeSeriesLength.WINDOW_4_HOURS): 163 | self.ts_len = ts_len 164 | 165 | def set_transfomers_and_classifiers(self, transformer_models: List[Tuple[Any, str]], 166 | classifier_models: List[Tuple[Any, str]]): 167 | self.transformers = transformer_models 168 | self.classifiers = classifier_models 169 | 170 | def set(self, environment): 171 | self.env = environment 172 | 173 | 174 | class REDDModelSelectionExperiment(ModelSelectionExperiment): 175 | appliances_redd3 = ['electric furnace', 'CE appliance', 'microwave', 'washer dryer', 'unknown', 'sockets'] 176 | appliances_redd1 = ['electric oven', 'fridge', 'microwave', 'washer dryer', 'unknown', 'sockets', 'light'] 177 | 178 | results_file: str 179 | ts_len: TimeSeriesLength 180 | 181 | def __init__(self, building=1, cv=2): 182 | super().__init__() 183 | self.transformers = exp_model_list.model_selection_transformers 184 | self.classifiers = exp_model_list.model_selection_clf_list 185 | self.building = building 186 | self.cv = cv 187 | 188 | def setup_environment(self): 189 | train_year = '2011' 190 | train_month_end = '5' 191 | train_month_start = '4' 192 | train_end_date = "{}-30-{}".format(train_month_end, train_year) 193 | train_start_date = "{}-1-{}".format(train_month_start, train_year) 194 | train_sample_period = 6 195 | train_building = self.building 196 | if self.building == 1: 197 | self.appliances = self.appliances_redd1 198 | elif self.building == 3: 199 | self.appliances = self.appliances_redd3 200 | 201 | train_datasource = DatasourceFactory.create_redd_datasource() 202 | 203 | env = Environment(train_datasource, train_building, train_year, train_start_date, train_end_date, 204 | train_sample_period, self.appliances) 205 | self.populate_environment(env) 206 | self.populate_train_parameters(env) 207 | -------------------------------------------------------------------------------- /nilmlab/factories.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from typing import Union 3 | 4 | from pyts import approximation, transformation 5 | from tslearn import piecewise 6 | from tslearn.piecewise import SymbolicAggregateApproximation, OneD_SymbolicAggregateApproximation 7 | 8 | from datasources.datasource import DatasourceFactory, Datasource 9 | from nilmlab.lab import Environment 10 | from nilmlab.lab import TransformerType 11 | from nilmlab.tstransformers import TSLearnTransformerWrapper, PytsTransformerWrapper, Signal2Vec, WaveletAdapter, \ 12 | TimeDelayEmbeddingAdapter 13 | 14 | SECONDS_PER_DAY = 60 * 60 * 24 15 | 16 | CAPACITY15GB = 1024 * 1024 * 1024 * 15 17 | 18 | reset_results = False 19 | 20 | 21 | class EnvironmentFactory: 22 | 23 | @staticmethod 24 | def create_env_single_building(datasource: Datasource, 25 | building: int = 1, 26 | sample_period: int = 6, 27 | train_year: str = "2013-2014", 28 | train_start_date: str = "3-1-2013", 29 | train_end_date: str = "5-30-2014", 30 | test_year: str = "2014", 31 | test_start_date: str = "6-1-2014", 32 | test_end_date: str = "12-30-2014", 33 | appliances: List = None): 34 | """ 35 | The specific experiment includes training and testing on house 1 of UK-DALE. 36 | The test set is defined as the year following April 2016, while the rest of the data are available 37 | for training. 38 | """ 39 | env = Environment(datasource, building, train_year, train_start_date, train_end_date, sample_period, appliances) 40 | env.setup_test_data(datasource=datasource, building=building, year=test_year, 41 | start_date=test_start_date, end_date=test_end_date, appliances=appliances) 42 | return env 43 | 44 | @staticmethod 45 | def create_env_single_building_learning_and_generalization_on_the_same_dataset( 46 | datasource: Datasource, 47 | sample_period: int = 6, 48 | train_building: int = 1, 49 | train_year: str = '2013-2014', 50 | train_start_date: str = '3-1-2013', 51 | train_end_date: str = '5-30-2014', 52 | test_building: int = 2, 53 | test_year: str = '2014', 54 | test_start_date: str = '6-1-2014', 55 | test_end_date: str = '12-30-2014', 56 | appliances: List = None): 57 | """ 58 | House 1 of UK-DALE is selected as the training set here again, while the rest of the 59 | houses where the target appliance is present compose the test sets. 60 | If an appliance is not present in the training or test building an error will be thrown. 61 | """ 62 | env = Environment(datasource, train_building, train_year, 63 | train_start_date, train_end_date, sample_period, appliances) 64 | env.setup_test_data(datasource=datasource, building=test_building, year=test_year, 65 | start_date=test_start_date, end_date=test_end_date, appliances=appliances) 66 | return env 67 | 68 | @staticmethod 69 | def create_env_multi_building_learning_and_generalization_on_the_same_dataset( 70 | datasource: Datasource = DatasourceFactory.create_uk_dale_datasource(), 71 | sample_period: int = 6, 72 | train_building: Union[int, List[int]] = (1, 2), 73 | train_year: str = '2013-2014', 74 | train_start_date: str = '3-1-2013', 75 | train_end_date: str = '5-30-2014', 76 | test_building: int = 5, 77 | test_year: str = '2014', 78 | test_start_date: str = '6-1-2014', 79 | test_end_date: str = '12-30-2014', 80 | appliances: List = ('fridge', 'microwave')): 81 | """ 82 | The experiments used for this category are defined for the UK-DALE dataset. 83 | """ 84 | env = Environment(datasource, train_building, train_year, 85 | train_start_date, train_end_date, sample_period, appliances) 86 | env.setup_test_data(datasource=datasource, building=test_building, year=test_year, 87 | start_date=test_start_date, end_date=test_end_date, appliances=appliances) 88 | return env 89 | 90 | @staticmethod 91 | def create_env_generalization_on_different_dataset(train_datasource: Datasource, 92 | sample_period: int, 93 | train_building: int, 94 | train_year: str, 95 | train_start_date: str, 96 | train_end_date: str, 97 | test_datasource: Datasource, 98 | test_building: int, 99 | test_year: str, 100 | test_start_date: str, 101 | test_end_date: str): 102 | """ 103 | The training set is comprised of UK-DALE data, while testing is applied to REDD data. 104 | The first has buildings in the UK, while the second is for buildings in USA. 105 | """ 106 | env = Environment(train_datasource, train_building, train_year, train_start_date, train_end_date, sample_period) 107 | env.setup_test_data(datasource=test_datasource, building=test_building, year=test_year, 108 | start_date=test_start_date, end_date=test_end_date) 109 | return env 110 | 111 | 112 | class TransformerFactory: 113 | 114 | @staticmethod 115 | def build_tslearn_paa(n_paa_segments=50, supports_approximation=True): 116 | paa = piecewise.PiecewiseAggregateApproximation(n_paa_segments) 117 | return TSLearnTransformerWrapper(paa, supports_approximation=supports_approximation) 118 | 119 | @staticmethod 120 | def build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True): 121 | sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, 122 | alphabet_size_avg=n_sax_symbols) 123 | return TSLearnTransformerWrapper(sax, supports_approximation=supports_approximation) 124 | 125 | @staticmethod 126 | def build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50): 127 | one_d_sax = OneD_SymbolicAggregateApproximation(n_segments=n_paa_segments, 128 | alphabet_size_avg=n_sax_symbols, 129 | alphabet_size_slope=4) 130 | return TSLearnTransformerWrapper(one_d_sax, supports_approximation=False) 131 | 132 | @staticmethod 133 | def build_pyts_paa(n_paa_segments=50): 134 | paa = approximation.PiecewiseAggregateApproximation(window_size=None, output_size=n_paa_segments, 135 | overlapping=False) 136 | return PytsTransformerWrapper(paa) 137 | 138 | @staticmethod 139 | def build_pyts_sax(n_sax_symbols=50): 140 | sax = approximation.SymbolicAggregateApproximation(n_bins=n_sax_symbols, 141 | alphabet=[i for i in range(n_sax_symbols)]) 142 | return PytsTransformerWrapper(sax) 143 | 144 | @staticmethod 145 | def build_pyts_dft(n_coefs=30, norm_mean=False, norm_std=False, supports_approximation=True): 146 | dft = approximation.DiscreteFourierTransform(n_coefs=n_coefs, norm_mean=norm_mean, norm_std=norm_std) 147 | return PytsTransformerWrapper(dft, supports_approximation) 148 | 149 | @staticmethod 150 | def build_pyts_sfa(n_coefs=50, n_bins=5, norm_mean=False, norm_std=False): 151 | sfa = approximation.SymbolicFourierApproximation(n_coefs=n_coefs, 152 | norm_mean=norm_mean, 153 | norm_std=norm_std, 154 | n_bins=n_bins, 155 | alphabet=[i for i in range(n_bins)] 156 | ) 157 | return PytsTransformerWrapper(sfa) 158 | 159 | @staticmethod 160 | def build_pyts_boss(word_size=2, n_bins=5, window_size=10, norm_mean=False, norm_std=False): 161 | # TODO: Check other parameters 162 | boss = transformation.BOSS(word_size=word_size, 163 | window_size=window_size, 164 | norm_mean=norm_mean, 165 | norm_std=norm_std, 166 | n_bins=n_bins 167 | ) 168 | return PytsTransformerWrapper(boss) 169 | 170 | @staticmethod 171 | def build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False): 172 | # TODO: Check other parameters 173 | weasel = transformation.WEASEL(word_size=word_size, 174 | norm_mean=norm_mean, 175 | norm_std=norm_std, 176 | n_bins=n_bins 177 | ) 178 | return PytsTransformerWrapper(weasel) 179 | 180 | @staticmethod 181 | def build_signal2vec(classifier_path: str, embedding_path: str, 182 | transformer_type: TransformerType = TransformerType.transform_and_approximate, 183 | num_of_vectors: int = 1): 184 | signal2vec = Signal2Vec(classifier_path, embedding_path, num_of_representative_vectors=num_of_vectors) 185 | signal2vec.set_type(transformer_type) 186 | return signal2vec 187 | 188 | @staticmethod 189 | def build_wavelet(wavelet_name: str = 'haar', filter_bank: str = None, mode='symmetric', level=None, drop_cA=False, 190 | transformer_type: TransformerType = TransformerType.approximate): 191 | wavelet_adapter = WaveletAdapter(wavelet_name=wavelet_name, filter_bank=filter_bank, mode=mode, 192 | level=level, drop_cA=drop_cA) 193 | wavelet_adapter.set_type(transformer_type) 194 | return wavelet_adapter 195 | 196 | @staticmethod 197 | def build_delay_embedding(delay_in_seconds: int, dimension: int, sample_period: int = 6, 198 | transformer_type: TransformerType = TransformerType.approximate): 199 | wavelet_adapter = TimeDelayEmbeddingAdapter(delay_in_seconds=delay_in_seconds, dimension=dimension, 200 | sample_period=sample_period) 201 | wavelet_adapter.set_type(transformer_type) 202 | return wavelet_adapter 203 | -------------------------------------------------------------------------------- /datasources/datasource.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import List, Tuple 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | from fuzzywuzzy import fuzz 7 | from nilmtk import DataSet, MeterGroup 8 | from pandas import DataFrame 9 | 10 | from datasources.paths_manager import UK_DALE, REDD 11 | from nilmlab.lab_exceptions import LabelNormalizationError 12 | from utils.logger import timing, TIMING, info, debug 13 | 14 | NAME_UK_DALE = 'UK DALE' 15 | NAME_REDD = 'REDD' 16 | SITE_METER = 'Site meter' 17 | 18 | 19 | class Datasource(): 20 | 21 | def __init__(self, dataset: DataSet, name: str): 22 | self.dataset = dataset 23 | self.name = name 24 | 25 | def get_dataset(self): 26 | return self.dataset 27 | 28 | def get_name(self): 29 | return self.name 30 | 31 | def read_all_meters(self, start: str, end: str, sample_period: int = 6, building: int = 1) \ 32 | -> Tuple[DataFrame, MeterGroup]: 33 | """ 34 | Read the records during the given start and end dates, for all the meters of the given building. 35 | Args: 36 | start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". 37 | end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". 38 | sample_period (int): The sample period of the records. 39 | building (int): The building to read the records from. 40 | 41 | Returns: 42 | Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. 43 | """ 44 | start_time = time.time() if TIMING else None 45 | self.dataset.set_window(start=start, end=end) 46 | elec = self.dataset.buildings[building].elec 47 | timing('NILMTK selecting all meters: {}'.format(round(time.time() - start_time, 2))) 48 | 49 | start_time = time.time() if TIMING else None 50 | df = elec.dataframe_of_meters(sample_period=sample_period) 51 | timing('NILMTK converting all meters to dataframe: {}'.format(round(time.time() - start_time, 2))) 52 | 53 | df.fillna(0, inplace=True) 54 | return df, elec 55 | 56 | def read_selected_appliances(self, appliances: List, start: str, end: str, sample_period=6, building=1, 57 | include_mains=True) -> Tuple[DataFrame, MeterGroup]: 58 | """ 59 | Loads the data of the specified appliances. 60 | Args: 61 | appliances (List): A list of appliances to read their records. 62 | start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". 63 | end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". 64 | sample_period (int): The sample period of the records. 65 | building (int): The building to read the records from. 66 | include_mains (bool): True if should include main meters. 67 | 68 | Returns: 69 | Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. 70 | """ 71 | debug(f" read_selected_appliances {appliances}, {building}, {start}, {end}, {include_mains}") 72 | 73 | selected_metergroup = self.get_selected_metergroup(appliances, building, end, start, include_mains) 74 | 75 | start_time = time.time() if TIMING else None 76 | df = selected_metergroup.dataframe_of_meters(sample_period=sample_period) 77 | timing('NILMTK converting specified appliances to dataframe: {}'.format(round(time.time() - start_time, 2))) 78 | 79 | debug(f"Length of data of read_selected_appliances {len(df)}") 80 | df.fillna(0, inplace=True) 81 | return df, selected_metergroup 82 | 83 | def read_mains(self, start, end, sample_period=6, building=1) -> Tuple[DataFrame, MeterGroup]: 84 | """ 85 | Loads the data of the specified appliances. 86 | Args: 87 | start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". 88 | end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". 89 | sample_period (int): The sample period of the records. 90 | building (int): The building to read the records from. 91 | 92 | Returns: 93 | Returns a tuple containing the respective DataFrame and MeterGroup of the data that are read. 94 | """ 95 | self.dataset.set_window(start=start, end=end) 96 | mains_meter = self.dataset.buildings[building].elec.mains() 97 | if isinstance(mains_meter, MeterGroup): 98 | mains_metergroup = mains_meter 99 | else: 100 | mains_metergroup = MeterGroup(meters=[mains_meter]) 101 | start_time = time.time() if TIMING else None 102 | df = mains_metergroup.dataframe_of_meters(sample_period=sample_period) 103 | timing('NILMTK converting mains to dataframe: {}'.format(round(time.time() - start_time, 2))) 104 | 105 | df.fillna(0, inplace=True) 106 | return df, mains_metergroup 107 | 108 | # def read_data_of_appliance(self, start, end, sample_period=6, building=1, device=None) -> np.ndarray: 109 | # """ 110 | # Reads the data of a specific appliance. If no device is specified then it reads the main meter. 111 | # :param start: 112 | # :type start: 113 | # :param end: 114 | # :type end: 115 | # :param sample_period: 116 | # :type sample_period: 117 | # :param building: 118 | # :type building: 119 | # :param device: 120 | # :type device: 121 | # :return: 122 | # :rtype: 123 | # """ 124 | # start_time = time.time() if TIMING else None 125 | # 126 | # power_df = self.read_df(start, end, sample_period, building, device) 127 | # power_data = power_df.values 128 | # 129 | # debug('Power data shape {}'.format(power_data.shape)) 130 | # debug('Type of power_data {}'.format(type(power_data))) 131 | # debug('Size of power_data {}'.format(len(power_data))) 132 | # timing('NILMTK reading and getting power series: {}'.format(round(time.time() - start_time, 2))) 133 | # self.clean_nans(power_data) 134 | # 135 | # return power_data 136 | 137 | def get_selected_metergroup(self, appliances, building, end, start, include_mains) -> MeterGroup: 138 | """ 139 | Gets a MeterGroup with the specified appliances for the given building during the given dates. 140 | Args: 141 | appliances (List): A list of appliances to read their records. 142 | building (int): The building to read the records from. 143 | start (str): The starting date in the format "{month}-{day of month}-{year}" e.g. "05-30-2012". 144 | end (str): The final date in the format "{month}-{day of month}-{year}" e.g. "08-30-2012". 145 | include_mains (bool): True if should include main meters. 146 | 147 | Returns: 148 | A MeterGroup containing the specified appliances. 149 | """ 150 | start_time = time.time() if TIMING else None 151 | self.dataset.set_window(start=start, end=end) 152 | elec = self.dataset.buildings[building].elec 153 | appliances_with_one_meter = [] 154 | appliances_with_more_meters = [] 155 | for appliance in appliances: 156 | metergroup = elec.select_using_appliances(type=appliances) 157 | if len(metergroup.meters) > 1: 158 | appliances_with_more_meters.append(appliance) 159 | else: 160 | appliances_with_one_meter.append(appliance) 161 | 162 | special_metergroup = None 163 | for appliance in appliances_with_more_meters: 164 | inst = 1 165 | if appliance == 'sockets' and building == 3: 166 | inst = 4 167 | if special_metergroup is None: 168 | special_metergroup = elec.select_using_appliances(type=appliance, instance=inst) 169 | else: 170 | special_metergroup = special_metergroup.union(elec.select_using_appliances(type=appliance, instance=1)) 171 | 172 | selected_metergroup = elec.select_using_appliances(type=appliances_with_one_meter) 173 | selected_metergroup = selected_metergroup.union(special_metergroup) 174 | if include_mains: 175 | mains_meter = self.dataset.buildings[building].elec.mains() 176 | if isinstance(mains_meter, MeterGroup): 177 | if len(mains_meter.meters) > 1: 178 | mains_meter = mains_meter.meters[0] 179 | mains_metergroup = MeterGroup(meters=[mains_meter]) 180 | else: 181 | mains_metergroup = mains_meter 182 | else: 183 | mains_metergroup = MeterGroup(meters=[mains_meter]) 184 | selected_metergroup = selected_metergroup.union(mains_metergroup) 185 | timing('NILMTK select using appliances: {}'.format(round(time.time() - start_time, 2))) 186 | return selected_metergroup 187 | 188 | @staticmethod 189 | def normalize_columns(df: DataFrame, meter_group: MeterGroup, appliance_names: List[str]) -> Tuple[DataFrame, dict]: 190 | """ 191 | It normalizes the names of the columns for compatibility. 192 | Args: 193 | df (DataFrame): 194 | meter_group (MeterGroup): 195 | appliance_names (List[str]): 196 | 197 | Returns: 198 | A tuple with a DataFrame and a dictionary mapping labels to ids. 199 | """ 200 | labels = meter_group.get_labels(df.columns) 201 | normalized_labels = [] 202 | info(f"Df columns before normalization {df.columns}") 203 | info(f"Labels before normalization {labels}") 204 | 205 | for label in labels: 206 | if label == SITE_METER and SITE_METER not in appliance_names: 207 | normalized_labels.append(SITE_METER) 208 | continue 209 | for name in appliance_names: 210 | ratio = fuzz.ratio(label.lower().replace('electric', "").lstrip().rstrip().split()[0], 211 | name.lower().replace('electric', "").lstrip().rstrip().split()[0]) 212 | if ratio > 90: 213 | info(f"{name} ~ {label} ({ratio}%)") 214 | normalized_labels.append(name) 215 | if len(normalized_labels) != len(labels): 216 | debug(f"len(normalized_labels) {len(normalized_labels)} != len(labels) {len(labels)}") 217 | raise LabelNormalizationError() 218 | label2id = {l: i for l, i in zip(normalized_labels, df.columns)} 219 | df.columns = normalized_labels 220 | info(f"Normalized labels {normalized_labels}") 221 | return df, label2id 222 | 223 | @staticmethod 224 | def rename_columns(df: DataFrame, meter_group: MeterGroup) -> (DataFrame, dict, dict): 225 | """ 226 | Rename columns of the given DataFrame using the respective labels of each meter. 227 | Args: 228 | df (DataFrame): 229 | meter_group (MeterGroup): 230 | 231 | Returns: 232 | Returns a DataFrame with renamed columns and two dictionaries to covnert labels to ids and vice versa. 233 | """ 234 | new_columns = [] 235 | label2id = dict() 236 | id2label = dict() 237 | for col in df.columns: 238 | try: 239 | meter = meter_group[col] 240 | label = meter.label() + str(col[0]) 241 | new_columns.append(label) 242 | label2id[label] = col 243 | id2label[col] = label 244 | except KeyError: 245 | info(f"KeyError key={col}") 246 | df.columns = new_columns 247 | return df, label2id, id2label 248 | 249 | # def read_df(self, start, end, sample_period=6, building=1, device=None): 250 | # self.dataset.set_window(start=start, end=end) 251 | # elec = self.dataset.buildings[building].elec 252 | # if device is not None: 253 | # mains = elec.submeters()[device] 254 | # debug('Reading data of {}.'.format(device)) 255 | # else: 256 | # mains = elec.mains() 257 | # debug('Reading data of mains.') 258 | # power_df = mains.power_series_all_data(sample_period=sample_period) 259 | # return power_df 260 | 261 | @staticmethod 262 | def clean_nans(data): 263 | start_time = time.time() if TIMING else None 264 | np.nan_to_num(data, False) 265 | timing('None to num: {}'.format(round(time.time() - start_time, 2))) 266 | 267 | 268 | class DatasourceFactory: 269 | """ 270 | It is responsible to create different data sources that are based on various data sets. 271 | """ 272 | 273 | @staticmethod 274 | def create_uk_dale_datasource(): 275 | return Datasource(DatasourceFactory.get_uk_dale_dataset(), NAME_UK_DALE) 276 | 277 | @staticmethod 278 | def get_uk_dale_dataset(): 279 | return DataSet(UK_DALE) 280 | 281 | @staticmethod 282 | def create_redd_datasource(): 283 | return Datasource(DatasourceFactory.get_redd_dataset(), NAME_REDD) 284 | 285 | @staticmethod 286 | def get_redd_dataset(): 287 | return DataSet(REDD) 288 | 289 | 290 | def save_and_plot(sequence, plot=False, save_figure=False, filename=None): 291 | if plot or save_figure: 292 | plt.plot(sequence) 293 | if filename is not None and save_figure: 294 | plt.savefig(filename + '.png') 295 | if plot: 296 | plt.show() 297 | -------------------------------------------------------------------------------- /nilmlab/tstransformers.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Union, Iterable 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import psutil 7 | import pywt 8 | from loguru import logger 9 | from pyts import approximation, transformation 10 | from sklearn.base import TransformerMixin 11 | from sklearn.externals import joblib 12 | from tslearn import utils as tsutils 13 | from tslearn.piecewise import SymbolicAggregateApproximation, OneD_SymbolicAggregateApproximation 14 | from tslearn.preprocessing import TimeSeriesScalerMeanVariance 15 | 16 | from nilmlab.lab import TimeSeriesTransformer, TransformerType 17 | from utils import chaotic_toolkit 18 | from utils.logger import debug, timing, debug_mem, info 19 | 20 | SECONDS_PER_DAY = 60 * 60 * 24 21 | 22 | CAPACITY15GB = 1024 * 1024 * 1024 * 15 23 | 24 | 25 | class Signal2Vec(TimeSeriesTransformer): 26 | 27 | def __init__(self, classifier_path: str, embedding_path: str, num_of_representative_vectors: int = 1): 28 | super().__init__() 29 | self.clf = joblib.load(classifier_path) 30 | embedding = pd.read_csv(embedding_path) 31 | self.embedding = embedding.reset_index().to_dict('list') 32 | self.type = TransformerType.transform_and_approximate 33 | self.num_of_representative_vectors = num_of_representative_vectors 34 | 35 | def __repr__(self): 36 | return f"Signal2Vec num_of_representative_vectors: {self.num_of_representative_vectors}" 37 | 38 | def transform(self, series: np.ndarray, sample_period: int = 6) -> np.ndarray: 39 | discrete_series = self.discretize_in_chunks(series, sample_period) 40 | debug_mem('Time series {} MB', series) 41 | debug_mem('Discrete series {} MB', discrete_series) 42 | 43 | vector_representation = self.map_into_vectors(discrete_series) 44 | debug_mem('Sequence of vectors : {} MB', vector_representation) 45 | 46 | return np.array(vector_representation) 47 | 48 | def approximate(self, data_in_batches: np.ndarray, window: int = 1, should_fit: bool = True) -> list: 49 | # TODO: Window is used only by signal2vec, move it to constructor or extract it as len(segment). 50 | if self.num_of_representative_vectors > 1: 51 | window = int(window / self.num_of_representative_vectors) 52 | data_in_batches = np.reshape(data_in_batches, 53 | (len(data_in_batches), window, 300 * self.num_of_representative_vectors)) 54 | 55 | squeezed_seq = np.sum(data_in_batches, axis=1) 56 | vf = np.vectorize(lambda x: x / window) 57 | squeezed_seq = vf(squeezed_seq) 58 | return squeezed_seq 59 | 60 | def reconstruct(self, series: np.ndarray) -> list: 61 | raise Exception('Signal2Vec doesn\'t support reconstruct yet.') 62 | 63 | def get_name(self): 64 | return type(self).__name__ 65 | 66 | def get_type(self): 67 | return self.type 68 | 69 | def set_type(self, method_type: TransformerType): 70 | if method_type == TransformerType.approximate: 71 | raise Exception('Signal2vec does not support only approximation. The series has to be transformed firstly') 72 | self.type = method_type 73 | 74 | def discretize(self, data): 75 | debug('Length of data {}'.format(len(data))) 76 | start_time = time.time() 77 | 78 | pred = self.clf.predict(data.reshape(-1, 1)) 79 | 80 | timing('clf.predict: {}'.format(round(time.time() - start_time, 2))) 81 | debug('Length of predicted sequence {}'.format(len(pred))) 82 | debug('Type of discrete sequence {}'.format(type(pred))) 83 | 84 | return pred 85 | 86 | def map_into_vectors(self, sequence): 87 | start_time = time.time() 88 | sequence_of_vectors = [self.embedding[str(i)] for i in sequence] 89 | timing('Appending vectors to list : {}'.format(round(time.time() - start_time, 2))) 90 | return sequence_of_vectors 91 | 92 | def discretize_in_chunks(self, sequence, sample_period: int = 6): 93 | memory = psutil.virtual_memory() 94 | debug('Memory: {}'.format(memory)) 95 | chunk_size = sample_period * SECONDS_PER_DAY 96 | if memory.total >= CAPACITY15GB: 97 | chunk_size = chunk_size * 2 98 | seq = list() 99 | split_n = max(int(len(sequence) / chunk_size), 1) 100 | rem = len(sequence) % split_n 101 | if rem != 0: 102 | sequence = sequence[:-rem] 103 | 104 | debug('Spliting data into {} parts for memory efficient classification'.format(split_n)) 105 | for d in np.split(sequence, split_n): 106 | debug('Discretising time series...') 107 | s = self.discretize(d) 108 | seq.append(s) 109 | 110 | return np.concatenate(seq) 111 | 112 | 113 | class WaveletAdapter(TimeSeriesTransformer): 114 | """ 115 | http://ataspinar.com/2018/12/21/a-guide-for-using-the-wavelet-transform-in-machine-learning/ 116 | """ 117 | 118 | def __init__(self, wavelet_name: str = 'haar', filter_bank: str = None, mode='symmetric', level=None, 119 | drop_cA=False): 120 | super().__init__() 121 | self.wavelet_name = wavelet_name 122 | self.filter_bank = filter_bank 123 | self.mode = mode 124 | self.level = level 125 | self.drop_cA = drop_cA 126 | self.type = TransformerType.approximate 127 | 128 | def __repr__(self): 129 | return str(f"Wavelet {self.wavelet_name}, level {self.level}, filter_bank {self.filter_bank}, " 130 | f"mode {self.mode}, drop_cA {self.drop_cA}") 131 | 132 | def transform(self, series: np.ndarray, sample_period: int = 6) -> list: 133 | debug('WaveletAdapter series shape {}'.format(series.shape)) 134 | coeffs = pywt.wavedec(data=series, wavelet=self.wavelet_name, level=self.level, mode=self.mode) 135 | ts_representation = pywt.waverec(coeffs, wavelet=self.wavelet_name, mode=self.mode) 136 | debug('WaveletAdapter series shape after inverse {}'.format(series.shape)) 137 | return ts_representation[0].ravel() 138 | 139 | def approximate(self, series: np.ndarray, window: int = 1, should_fit: bool = True) -> np.ndarray: 140 | ts_representation = list() 141 | debug(f'WaveletAdapter.approximate: param series \n{series} ') 142 | for segment in series: 143 | coeffs = pywt.wavedec(data=segment, wavelet=self.wavelet_name, 144 | level=self.level, mode=self.mode) 145 | if self.drop_cA: 146 | coeffs = coeffs[0] 147 | else: 148 | coeffs = np.concatenate(coeffs) 149 | 150 | ts_representation.append(coeffs) 151 | # debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'.format(ts_representation)) 152 | debug('WaveletAdapter.approximate: ts_representation shape {}'.format(np.shape(ts_representation))) 153 | # ts_representation = np.reshape(ts_representation, ( 154 | # np.shape(ts_representation)[0], np.shape(ts_representation)[1] * np.shape(ts_representation)[2])) 155 | # debug('WaveletAdapter.approximate: ts_representation \n{}'.format(ts_representation)) 156 | return np.asarray(ts_representation) 157 | 158 | def reconstruct(self, series: np.ndarray) -> list: 159 | raise Exception('WaveletAdapter doesn\'t support reconstruct yet.') 160 | 161 | def get_type(self) -> TransformerType: 162 | return self.type 163 | 164 | def set_type(self, method_type: TransformerType): 165 | self.type = method_type 166 | 167 | def get_name(self): 168 | return type(self).__name__ 169 | 170 | 171 | class TimeDelayEmbeddingAdapter(TimeSeriesTransformer): 172 | """ 173 | http://eprints.maths.manchester.ac.uk/175/1/embed.pdf 174 | """ 175 | 176 | def __init__(self, delay_in_seconds: int, dimension: int, sample_period: int = 6): 177 | super().__init__() 178 | self.delay_in_seconds = delay_in_seconds 179 | self.dimension = dimension 180 | self.sample_period = sample_period 181 | self.type = TransformerType.approximate 182 | 183 | def __repr__(self): 184 | return f"TimeDelayEmbedding delay={self.delay_in_seconds} dim={self.dimension}" 185 | 186 | def transform(self, series: np.ndarray, sample_period: int = 6) -> list: 187 | """ 188 | Given a whole time series, it is automatically segmented into segments with size 189 | window_size = delay_items * self.dimension. Next, delay embeddings are extracted for each segment. 190 | """ 191 | delay_items = int(self.delay_in_seconds / sample_period) 192 | window_size = delay_items * self.dimension 193 | num_of_segments = int(len(series) / window_size) 194 | delay_embeddings = [] 195 | for i in range(num_of_segments): 196 | segment = series[i * window_size:(i + 1) * window_size] 197 | embedding = chaotic_toolkit.takens_embedding(segment, delay_items, self.dimension) 198 | delay_embeddings.append(embedding) 199 | return delay_embeddings 200 | 201 | def approximate(self, series_in_segments: np.ndarray, window: int = 1, should_fit: bool = True) -> np.ndarray: 202 | """ 203 | The time series is given as segments. For each segment we extract the delay embeddings. 204 | """ 205 | delay_items = int(self.delay_in_seconds / self.sample_period) 206 | window_size = delay_items * self.dimension 207 | 208 | if window_size > len(series_in_segments[0]): 209 | raise Exception( 210 | f'Not enough data for the given delay ({self.delay_in_seconds} seconds) and dimension ({self.dimension}).' 211 | f'\ndelay_items * dimension > len(data): {window_size} > {len(series_in_segments[0])}') 212 | 213 | if window_size == len(series_in_segments[0]): 214 | info(f"TimeDelayEmbeddingAdapter is applied with delay embeddings equavalent to the length of each segment" 215 | f" {window_size} == {len(series_in_segments[0])}") 216 | 217 | if window_size < len(series_in_segments[0]): 218 | info(f"TimeDelayEmbeddingAdapter is applied with delay embeddings covering less than the length of each " 219 | f"segment. {window_size} < {len(series_in_segments[0])}") 220 | 221 | delay_embeddings = [] 222 | for segment in series_in_segments: 223 | embedding = chaotic_toolkit.takens_embedding(segment, delay_items, self.dimension) 224 | delay_embeddings.append(embedding) 225 | return np.asarray(delay_embeddings) 226 | 227 | def reconstruct(self, series: np.ndarray) -> list: 228 | raise Exception('TimeDelayEmbeddingAdapter doesn\'t support reconstruct yet.') 229 | 230 | def get_type(self) -> TransformerType: 231 | return self.type 232 | 233 | def set_type(self, method_type: TransformerType): 234 | self.type = method_type 235 | 236 | def get_name(self): 237 | return type(self).__name__ 238 | 239 | 240 | class TSLearnTransformerWrapper(TimeSeriesTransformer): 241 | 242 | def __init__(self, transformer: TransformerMixin, supports_approximation: bool = True): 243 | super().__init__() 244 | if not isinstance(transformer, TransformerMixin): 245 | raise Exception('Invalid type of approximator. It should be an instance of TransformerMixin.') 246 | self.transformer = transformer 247 | self.supports_approximation = supports_approximation 248 | if supports_approximation: 249 | self.type = TransformerType.approximate 250 | else: 251 | self.type = TransformerType.transform 252 | 253 | def __repr__(self): 254 | return str(self.transformer) 255 | 256 | def transform(self, series: np.ndarray, sample_period: int = 6) -> list: 257 | debug('TSLearnApproximatorWrapper series shape {}'.format(series.shape)) 258 | ts_representation = self.transformer.inverse_transform(self.transformer.fit_transform(series)) 259 | return ts_representation[0].ravel() 260 | 261 | def approximate(self, series: np.ndarray, window: int = 1, should_fit: bool = True) -> np.ndarray: 262 | # series is already in batches 263 | debug('TSLearnApproximatorWrapper.approximate: series shape {}'.format(series.shape)) 264 | debug('TSLearnApproximatorWrapper.approximate: to_time_series shape {}'.format(series.shape)) 265 | ts_representation = list() 266 | debug(f'TSLearnApproximatorWrapper.approximate: param series \n{series} ') 267 | for segment in series: 268 | if isinstance(self.transformer, SymbolicAggregateApproximation) or isinstance(self.transformer, OneD_SymbolicAggregateApproximation): 269 | logger.info("Scaling the data so that they consist a normal distribution.") 270 | scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series 271 | segment = scaler.fit_transform(segment) 272 | ts_representation.append(self.transformer.fit_transform(segment)) 273 | # debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'.format(ts_representation)) 274 | debug('TSLearnApproximatorWrapper.approximate: ts_representation shape {}'.format(np.shape(ts_representation))) 275 | ts_representation = np.reshape(ts_representation, ( 276 | np.shape(ts_representation)[0], np.shape(ts_representation)[1] * np.shape(ts_representation)[2])) 277 | debug('TSLearnApproximatorWrapper.approximate: ts_representation \n{}'.format(ts_representation)) 278 | debug('TSLearnApproximatorWrapper.approximate: ts_representation shape {}'.format(ts_representation.shape)) 279 | return ts_representation 280 | 281 | def reconstruct(self, series: np.ndarray) -> list: 282 | raise Exception('Pyts doesn\'t support reconstruct.') 283 | 284 | def get_type(self): 285 | return self.type 286 | 287 | def get_name(self): 288 | return type(self.transformer).__name__ 289 | 290 | def set_type(self, method_type: TransformerType): 291 | if not self.supports_approximation and method_type == TransformerType.approximate: 292 | raise Exception('{} does not support approximation.'.format(type(self.transformer).__name__)) 293 | self.type = method_type 294 | 295 | 296 | class PytsTransformerWrapper(TimeSeriesTransformer): 297 | 298 | def __init__(self, transformer, supports_approximation: bool = True): 299 | super().__init__() 300 | if not isinstance(transformer, TransformerMixin): 301 | raise Exception('Invalid type of approximator. It should be an instance of TransformerMixin.') 302 | self.transformer = transformer 303 | self.supports_approximation = supports_approximation 304 | if supports_approximation: 305 | self.type = TransformerType.approximate 306 | else: 307 | self.type = TransformerType.transform 308 | 309 | def __repr__(self): 310 | return str(self.transformer) 311 | 312 | def transform(self, series: np.ndarray, sample_period: int = 6) -> Union[np.ndarray, Iterable, int, float]: 313 | if isinstance(self.transformer, approximation.DiscreteFourierTransform): 314 | n_coefs = self.transformer.n_coefs 315 | series = tsutils.to_time_series(series) 316 | series = np.reshape(series, (1, -1)) 317 | n_samples, n_timestamps = series.shape 318 | self.transformer.drop_sum = True 319 | X_dft = self.transformer.fit_transform(series) 320 | 321 | # Compute the inverse transformation 322 | if n_coefs % 2 == 0: 323 | real_idx = np.arange(1, n_coefs, 2) 324 | imag_idx = np.arange(2, n_coefs, 2) 325 | X_dft_new = np.c_[ 326 | X_dft[:, :1], 327 | X_dft[:, real_idx] + 1j * np.c_[X_dft[:, imag_idx], 328 | np.zeros((n_samples,))] 329 | ] 330 | else: 331 | real_idx = np.arange(1, n_coefs, 2) 332 | imag_idx = np.arange(2, n_coefs + 1, 2) 333 | X_dft_new = np.c_[ 334 | X_dft[:, :1], 335 | X_dft[:, real_idx] + 1j * X_dft[:, imag_idx] 336 | ] 337 | X_irfft = np.fft.irfft(X_dft_new, n_timestamps) 338 | debug('PytsTransformerWrapper ts_representation shape {}'.format(np.shape(X_irfft))) 339 | 340 | return np.ravel(X_irfft) 341 | else: 342 | raise Exception('Pyts doesn\'t support trasform') 343 | 344 | def approximate(self, series: np.ndarray, window: int = 1, target=None, should_fit: bool = True) -> list: 345 | # series is already in batches 346 | debug('PytsTransformerWrapper series shape {}'.format(series.shape)) 347 | if isinstance(self.transformer, transformation.WEASEL): 348 | labels = list() 349 | for t in target: 350 | l: str = '' 351 | for i in range(target.shape[1]): 352 | l = l + str(int(t[i])) 353 | labels.append(l) 354 | 355 | if should_fit: 356 | ts_representation = self.transformer.fit_transform(series, labels) 357 | else: 358 | ts_representation = self.transformer.transform(series) 359 | else: 360 | if should_fit: 361 | ts_representation = self.transformer.fit_transform(series) 362 | elif isinstance(self.transformer, transformation.BOSS): 363 | debug("BOSS instance, only transform") 364 | ts_representation = self.transformer.transform(series) 365 | else: 366 | debug("Fit transform.") 367 | ts_representation = self.transformer.fit_transform(series) 368 | 369 | # debug('PytsTransformerWrapper ts_representation \n{}'.format(ts_representation)) 370 | debug('PytsTransformerWrapper ts_representation shape {}'.format(np.shape(ts_representation))) 371 | return ts_representation 372 | 373 | def reconstruct(self, series: np.ndarray) -> list: 374 | raise Exception('Pyts doesn\'t support reconstruct.') 375 | 376 | def get_type(self): 377 | return self.type 378 | 379 | def set_type(self, method_type: TransformerType): 380 | if not self.supports_approximation and method_type == TransformerType.approximate: 381 | raise Exception('{} does not support approximation.'.format(type(self.transformer).__name__)) 382 | self.type = method_type 383 | 384 | def get_name(self): 385 | return type(self.transformer).__name__ 386 | 387 | def uses_labels(self): 388 | if isinstance(self.transformer, transformation.WEASEL): 389 | return True 390 | return False 391 | -------------------------------------------------------------------------------- /nilmlab/lab.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from abc import ABC, abstractmethod 4 | from enum import Enum 5 | from pathlib import Path 6 | from typing import List, Dict, Tuple, Union 7 | 8 | import loguru 9 | import numpy as np 10 | import pandas as pd 11 | from nilmtk import MeterGroup 12 | from pandas import DataFrame 13 | from sklearn.base import ClassifierMixin 14 | from sklearn.metrics import f1_score, classification_report 15 | from sklearn.model_selection import cross_val_score 16 | 17 | from datasources import labels_factory 18 | from datasources.datasource import Datasource, SITE_METER 19 | from nilmlab.lab_exceptions import NoSiteMeterException 20 | from utils.logger import debug, info, timing 21 | 22 | 23 | class TransformerType(Enum): 24 | # TODO: More clear TransformerType is needed. 25 | raw = 1 26 | transform = 2 27 | approximate = 3 28 | transform_and_approximate = 4 29 | 30 | 31 | class TimeSeriesTransformer(ABC): 32 | 33 | def __init__(self): 34 | super().__init__() 35 | 36 | @abstractmethod 37 | def transform(self, series: np.ndarray, sample_period: int = 6) -> list: 38 | """ 39 | An interface to transform a given time series into another representation. 40 | It unifies different transformations and usually either just transforms a time series without dimensionality 41 | reduction or transforms a whole time series and reconstructs it using the underlying time series representation. 42 | Args: 43 | series (ndarray): A time series to be transformed according to the algorithm. 44 | sample_period (int): The sampling frequency. 45 | 46 | Returns: 47 | Returns the transformed time series as a list. 48 | """ 49 | pass 50 | 51 | @abstractmethod 52 | def approximate(self, series: np.ndarray, window: int = 1, should_fit: bool = True) -> np.ndarray: 53 | """ 54 | An interface to transform a given time series into another representation. 55 | In most transformers it transforms each segment of a time series, because the given time series is in segments. 56 | TODO: should_fit is used only by a few transformers. Move it to their constructors. 57 | Args: 58 | series (ndarray): A time series to be transformed according to the algorithm. 59 | window (int): The size of the sub-segments of the given time series. 60 | This is not supported by all algorithms. 61 | should_fit (bool): If the algorith should firstly fit to the data, executing some prepressing steps. 62 | Returns: 63 | Returns the transformed time series as ndarray. 64 | """ 65 | pass 66 | 67 | @abstractmethod 68 | def reconstruct(self, series: np.ndarray) -> list: 69 | """ 70 | It reconstructs the transformed time series. 71 | Args: 72 | series (ndarray): A transformed time series. 73 | 74 | Returns: 75 | The reconstructed time series as a list of values. 76 | """ 77 | pass 78 | 79 | @abstractmethod 80 | def get_type(self) -> TransformerType: 81 | """ 82 | Returns the type of the transformer, which indicates which functions the underlying algoirthm supports. 83 | Returns: A TransformerType. 84 | """ 85 | pass 86 | 87 | @abstractmethod 88 | def set_type(self, method_type: TransformerType): 89 | """ 90 | Sets the type of the transformer, which indicates which functions the underlying algoirthm supports. 91 | """ 92 | pass 93 | 94 | @abstractmethod 95 | def get_name(self): 96 | pass 97 | 98 | def uses_labels(self): 99 | return False 100 | 101 | 102 | def bucketize_data(data: np.ndarray, window: int) -> np.ndarray: 103 | """ 104 | It segments the time series grouping it into batches. Its segment is of size equal to the window. 105 | Args: 106 | data (ndarray): The given time series. 107 | window (int): The size of the segments. 108 | 109 | Returns: 110 | 111 | """ 112 | debug('bucketize_data: Initial shape {}'.format(data.shape)) 113 | n_dims = len(data.shape) 114 | 115 | if n_dims == 1: 116 | seq_in_batches = np.reshape(data, (int(len(data) / window), window)) 117 | elif n_dims == 2: 118 | seq_in_batches = np.reshape(data, (int(len(data) / window), window, data.shape[1])) 119 | else: 120 | raise Exception('Invalid number of dimensions {}.'.format(n_dims)) 121 | debug('bucketize_data: Shape in batches: {}'.format(seq_in_batches.shape)) 122 | return seq_in_batches 123 | 124 | 125 | def bucketize_target(target: np.ndarray, window: int) -> np.ndarray: 126 | """ 127 | Creates target data according to the lenght of the window of the segmented data. 128 | Args: 129 | target (ndarray): Target data with the original size. 130 | window (int): The length of window that will be used to create the corresponding labels. 131 | Returns: 132 | The target data for the new bucketized time series. 133 | """ 134 | target_in_batches = bucketize_data(target, window) 135 | any_multilabel = np.any(target_in_batches, axis=1) 136 | debug('bucketize_target: Shape of array in windows: {}'.format(target_in_batches.shape)) 137 | debug('bucketize_target: Shape of array after merging windows: {}'.format(any_multilabel.shape)) 138 | return any_multilabel 139 | 140 | 141 | class TimeSeriesLength(Enum): 142 | """ 143 | The length of each segment of the time series, which will be used for inference. 144 | """ 145 | WINDOW_SAMPLE_PERIOD = 'same' 146 | WINDOW_1_MIN = '1m' 147 | WINDOW_5_MINS = '5m' 148 | WINDOW_10_MINS = '10m' 149 | WINDOW_30_MINS = '30m' 150 | WINDOW_1_HOUR = '1h' 151 | WINDOW_2_HOURS = '2h' 152 | WINDOW_4_HOURS = '4h' 153 | WINDOW_8_HOURS = '8h' 154 | WINDOW_1_DAY = '1d' 155 | WINDOW_1_WEEK = '1w' 156 | 157 | 158 | def repeat_the_same_date_for_all_buildings(buildings: List[int], end_date: List[str], start_date: List[str]): 159 | """ 160 | Creates a list of start and end dates for as many buildings as in the given list of buildings. 161 | Args: 162 | buildings (List[int]): The given buildings that will be used. 163 | end_date (List[str]): End date of the data that will be selected for each building. 164 | start_date (List[str]): Start date of the data that will be selected for each building. 165 | """ 166 | for i in range(1, len(buildings)): 167 | start_date.append(start_date[-1]) 168 | end_date.append(end_date[-1]) 169 | 170 | 171 | def dates_as_lists(end_date: Union[str, List[str]], start_date: Union[str, List[str]]): 172 | """ 173 | If the given dates are pure strings convert them to lists of strings. 174 | Args: 175 | end_date (Union[str, List[str]]): End date of the data that will be selected for each building. 176 | start_date (List[str]): Start date of the data that will be selected for each building. 177 | Returns: 178 | Returns the start and end dates as lists of dates. 179 | """ 180 | if not isinstance(start_date, list): 181 | start_date = [start_date] 182 | if not isinstance(end_date, list): 183 | end_date = [end_date] 184 | return end_date, start_date 185 | 186 | 187 | class Environment: 188 | """ 189 | This class describes all the parameters related to the data. 190 | """ 191 | 192 | def __init__(self, datasource: Datasource, 193 | buildings: Union[int, List[int]], 194 | year: Union[str, str], 195 | start_date: Union[str, List[str]], 196 | end_date: Union[str, List[str]], 197 | sample_period: int = 6, 198 | appliances: List = None, 199 | is_deep_classifier=False): 200 | """ 201 | Constructs a new Environment with the given parameters. 202 | Args: 203 | datasource (Datasource): The data source that will be used to load energy data. 204 | buildings (Union[int, List[int]]): The given buildings that will be used. 205 | year (Union[str, str]): The year or the range of years that are used. This parameter doesn't affect the 206 | actual experiments, it is used mainly as a summary of the period of the data of the created environment. 207 | start_date (Union[str, List[str]]): Start date of the data that will be selected for each building. 208 | end_date (Union[str, List[str]]): End date of the data that will be selected for each building. 209 | sample_period (int): The sampling frequency. 210 | appliances (List): A list of appliances. 211 | is_deep_classifier (bool): This is a flag that is used in case of deep neural networks. 212 | """ 213 | self.datasource = datasource 214 | self.buildings = buildings 215 | self.year = year 216 | self.start_date = start_date 217 | self.end_date = end_date 218 | self.sample_period = sample_period 219 | self.appliances = appliances 220 | self.is_deep_classifier = is_deep_classifier 221 | if not buildings: 222 | raise EnvironmentError("Building is not specified.") 223 | if isinstance(buildings, int) or len(buildings) == 1: 224 | if isinstance(buildings, int): 225 | building = buildings 226 | else: 227 | building = buildings[0] 228 | if isinstance(start_date, list): 229 | start_date = start_date[0] 230 | if isinstance(end_date, list): 231 | end_date = end_date[0] 232 | all_df, metergroup, label2id = self.setup_one_building(appliances, datasource, building, 233 | start_date, end_date, sample_period) 234 | labels_df = labels_factory.create_multilabels_from_meters(all_df, metergroup, label2id) 235 | else: 236 | end_date, start_date = dates_as_lists(end_date, start_date) 237 | if isinstance(start_date, list) and isinstance(end_date, list): 238 | 239 | if len(start_date) == len(end_date) and len(start_date) == 1 and len(buildings) > 1: 240 | repeat_the_same_date_for_all_buildings(buildings, end_date, start_date) 241 | 242 | if len(start_date) != len(buildings) or len(end_date) != len(buildings): 243 | raise EnvironmentError("Number of buildings not the same with number of dates") 244 | 245 | buildings_with_dates = list(zip(buildings, start_date, end_date)) 246 | data_per_building: Dict[int, Tuple[DataFrame, MeterGroup, Dict]] = \ 247 | self.setup_across_many_buildings(appliances, datasource, buildings_with_dates, sample_period) 248 | labels_per_building = labels_factory.create_multilabels_from_many_buildings(data_per_building) 249 | 250 | data_frame = [] 251 | labels_frame = [] 252 | metergroup = None 253 | for building in data_per_building.keys(): 254 | df, metergroup_of_building, label2id = data_per_building[building] 255 | if not metergroup: 256 | metergroup = metergroup_of_building 257 | else: 258 | metergroup = metergroup.union(metergroup_of_building) 259 | data_frame.append(df) 260 | labels_frame.append(labels_per_building[building]) 261 | all_df = pd.concat(data_frame) 262 | labels_df = pd.concat(labels_frame) 263 | 264 | self.all_df, self.metergroup, self.labels_df = all_df, metergroup, labels_df 265 | self.ts_transformer = None 266 | self.multilabel_clf = None 267 | 268 | self.train_datasource = datasource 269 | self.train_building = buildings 270 | self.train_year = year 271 | self.train_start_date = start_date 272 | self.train_end_date = end_date 273 | self.train_sample_period = sample_period 274 | self.train_df = self.all_df 275 | self.train_labels_df = self.labels_df 276 | # self.train_label2id = self.label2id 277 | 278 | self.test_datasource = None 279 | self.test_building = None 280 | self.test_year = None 281 | self.test_start_date = None 282 | self.test_end_date = None 283 | self.test_sample_period = None 284 | self.test_df = None 285 | self.test_labels_df = None 286 | self.test_label2id = None 287 | self.ts_length = TimeSeriesLength.WINDOW_SAMPLE_PERIOD 288 | 289 | def setup_train_data(self, datasource: Datasource = None, 290 | building: int = None, 291 | year: str = None, 292 | start_date: str = None, 293 | end_date: str = None, 294 | sample_period: int = 6, 295 | appliances: List = None): 296 | """ 297 | Setup training data. 298 | Args: 299 | datasource (Datasource): The Datasource that will be used for training. 300 | building (int): The building that will be used for training. 301 | year (str): The year(s) that the training data correspond to. 302 | start_date (str): Start date of the data that will be selected for each building. 303 | end_date (str): End date of the data that will be selected for each building. 304 | sample_period (int): The sampling frequency. 305 | appliances (List): A list of appliances. 306 | """ 307 | if datasource is not None: 308 | self.train_datasource = datasource 309 | if building is not None: 310 | self.train_building = building 311 | if year is not None: 312 | self.train_year = year 313 | if start_date is not None: 314 | self.train_start_date = start_date 315 | if end_date is not None: 316 | self.train_end_date = end_date 317 | if sample_period is not None: 318 | self.train_sample_period = sample_period 319 | if appliances: 320 | self.appliances = appliances 321 | self.train_df, train_metergroup, train_label2id = self.setup_one_building(appliances, datasource, building, 322 | start_date, end_date, sample_period) 323 | self.train_labels_df = labels_factory.create_multilabels_from_meters(self.train_df, 324 | train_metergroup, 325 | train_label2id) 326 | 327 | def setup_test_data(self, datasource: Datasource = None, 328 | building: int = None, 329 | year: str = None, 330 | start_date: str = None, 331 | end_date: str = None, 332 | sample_period: int = 6, 333 | appliances: List = None): 334 | """ 335 | Setup the testing data. 336 | Args: 337 | datasource (Datasource): The Datasource that will be used for testing. 338 | building (int): The building that will be used for testing. 339 | year (str): The year(s) that the testing data correspond to. 340 | start_date (str): Start date of the data that will be selected for each building. 341 | end_date (str): End date of the data that will be selected for each building. 342 | sample_period (int): The sampling frequency. 343 | appliances (List): A list of appliances. 344 | """ 345 | if datasource is not None: 346 | self.test_datasource = datasource 347 | if building is not None: 348 | self.test_building = building 349 | if year is not None: 350 | self.test_year = year 351 | if start_date is not None: 352 | self.test_start_date = start_date 353 | if end_date is not None: 354 | self.test_end_date = end_date 355 | if sample_period is not None: 356 | self.test_sample_period = sample_period 357 | if appliances: 358 | self.appliances = appliances 359 | self.test_df, test_metergroup, test_label2id = self.setup_one_building(appliances, datasource, building, 360 | start_date, end_date, sample_period) 361 | self.test_labels_df = labels_factory.create_multilabels_from_meters(self.test_df, 362 | test_metergroup, 363 | test_label2id) 364 | 365 | def set_deep_classifier(self, is_deep_clf: bool = True): 366 | """ 367 | Set to true if a deep neural network is used as a classifier. 368 | Args: 369 | is_deep_clf (bool): 370 | 371 | Returns: 372 | 373 | """ 374 | self.is_deep_classifier = is_deep_clf 375 | 376 | def set_ts_len(self, ts_length: TimeSeriesLength): 377 | """ 378 | Set the length of the segments of the given time series. 379 | Args: 380 | ts_length (TimeSeriesLength): The length of the segments of the time series. 381 | """ 382 | self.ts_length = ts_length 383 | 384 | def get_ts_len(self) -> TimeSeriesLength: 385 | """ 386 | It returns the length of the segments of the time series. 387 | Returns: A TimeSeriesLength that corresponds to the size of the segments of the time series. 388 | """ 389 | return self.ts_length 390 | 391 | def get_multilabels(self, labels_df: DataFrame, appliances: List = None) -> DataFrame: 392 | """ 393 | Get the labels of the specified appliances. 394 | Args: 395 | labels_df (DataFrame): 396 | appliances (List): 397 | 398 | Returns: 399 | 400 | """ 401 | debug(f"get_multilabels labels_df.columns {labels_df.columns}") 402 | debug(f"get_multilabels appliances {appliances}") 403 | if appliances is None: 404 | return labels_df 405 | else: 406 | return labels_df[appliances] 407 | 408 | def get_site_meter_data(self, df: DataFrame) -> np.ndarray: 409 | """ 410 | Get the data of the site meter from the given DataFrame. 411 | Args: 412 | df (DataFrame): A DataFrame containing energy data with columns corresponding to different meters. 413 | 414 | Returns: 415 | The site meter data as an array (ndarray). 416 | """ 417 | for col in df.columns: 418 | if SITE_METER in col: 419 | return df[col].values 420 | raise NoSiteMeterException("Couldn' t find site meter.") 421 | 422 | def get_window(self, dt: TimeSeriesLength) -> int: 423 | """ 424 | Get the number of samples that correspond to the given TimeSeriesLength. 425 | The result may vary depending on the sampling rate that is predefined. 426 | Args: 427 | dt (TimeSeriesLength): The given TimeSeriesLength in time. 428 | Returns: 429 | The number of samples that correspond to the time length. 430 | """ 431 | choices = {TimeSeriesLength.WINDOW_SAMPLE_PERIOD: 1, 432 | TimeSeriesLength.WINDOW_1_MIN : self.get_no_of_samples_per_min(), 433 | TimeSeriesLength.WINDOW_5_MINS : self.get_no_of_samples_per_min() * 5, 434 | TimeSeriesLength.WINDOW_10_MINS : self.get_no_of_samples_per_min() * 10, 435 | TimeSeriesLength.WINDOW_30_MINS : self.get_no_of_samples_per_min() * 30, 436 | TimeSeriesLength.WINDOW_1_HOUR : self.get_no_of_samples_per_hour(), 437 | TimeSeriesLength.WINDOW_2_HOURS : self.get_no_of_samples_per_hour() * 2, 438 | TimeSeriesLength.WINDOW_4_HOURS : self.get_no_of_samples_per_hour() * 4, 439 | TimeSeriesLength.WINDOW_8_HOURS : self.get_no_of_samples_per_hour() * 8, 440 | TimeSeriesLength.WINDOW_1_DAY : self.get_no_of_samples_per_day(), 441 | TimeSeriesLength.WINDOW_1_WEEK : self.get_no_of_samples_per_day() * 7 442 | } 443 | return int(choices.get(dt, 1)) 444 | 445 | def get_features(self, data_df: DataFrame, representation: TransformerType = TransformerType.raw) -> List: 446 | """ 447 | It transforms the given data using underlying algorithm that is wrapped by the TimeSeriesTransformer interface. 448 | Args: 449 | data_df (DataFrame): The time series that will be transformed into another time series representation. 450 | representation (TransformerType): The type of transformation that the specified TimeSeriesTransformer 451 | supports. 452 | Returns: 453 | A list containing the converted time series. 454 | """ 455 | data = self.get_site_meter_data(data_df) 456 | if representation == TransformerType.transform or representation == TransformerType.transform_and_approximate: 457 | if self.ts_transformer is None: 458 | raise Exception('TimeSeriesTransformer has not been placed!') 459 | data = self.ts_transformer.transform(data) 460 | return data 461 | 462 | def reduce_dimensions(self, data_in_batches: np.ndarray, window: int, target: np.ndarray, should_fit: bool = True): 463 | """ 464 | It uses the method approximate of the TimeSeriesTransformer in order to achieve dimensionality reduction. 465 | Args: 466 | data_in_batches (ndarray): The data of the time series separated in batches. 467 | window (int): The size of the sub-segments of the given time series. 468 | This is not supported by all algorithms. 469 | target (ndarray): The labels that correspond to the given data in batches. 470 | should_fit (bool): True if it is supported by the algorithm of the specified time series representation. 471 | Returns: 472 | The shortened time series as an array (ndarray). 473 | 474 | """ 475 | if self.ts_transformer is None: 476 | raise Exception('TimeSeriesTransformer has not been placed!') 477 | if self.ts_transformer.uses_labels(): 478 | squeezed_seq = self.ts_transformer.approximate(data_in_batches, window, target, should_fit) 479 | else: 480 | squeezed_seq = self.ts_transformer.approximate(data_in_batches, window, should_fit=should_fit) 481 | 482 | debug('Shape of squeezed seq: {}'.format(squeezed_seq.shape)) 483 | return squeezed_seq 484 | 485 | def get_no_of_samples_per_min(self): 486 | """ 487 | It returns the number of samples per minute. This depends also on the predefined sample period. 488 | Returns: 489 | An int representing the number of samples. 490 | """ 491 | return 60 / self.sample_period 492 | 493 | def get_no_of_samples_per_hour(self): 494 | """ 495 | It returns the number of samples per hour. This depends also on the predefined sample period. 496 | Returns: 497 | An int representing the number of samples. 498 | """ 499 | return self.get_no_of_samples_per_min() * 60 500 | 501 | def get_no_of_samples_per_day(self): 502 | """ 503 | It returns the number of samples per day. This depends also on the predefined sample period. 504 | Returns: 505 | An int representing the number of samples. 506 | """ 507 | return self.get_no_of_samples_per_hour() * 24 508 | 509 | def place_ts_transformer(self, transformer: TimeSeriesTransformer): 510 | """ 511 | Set the time series transformer that will be used. 512 | Args: 513 | transformer (TimeSeriesTransformer): The time series transformer that will be used. 514 | """ 515 | self.ts_transformer = transformer 516 | 517 | def place_multilabel_classifier(self, multilabel_clf: Union[str, ClassifierMixin]): 518 | """ 519 | Specify the multi label classifier that will be used. 520 | Args: 521 | multilabel_clf (Union[str, ClassifierMixin]): 522 | """ 523 | if isinstance(multilabel_clf, str): 524 | self.is_deep_classifier = True 525 | self.multilabel_clf = multilabel_clf 526 | else: 527 | self.multilabel_clf = multilabel_clf 528 | 529 | def setup_across_many_buildings(self, appliances, datasource, buildings_with_dates: List[Tuple[int, str, str]], 530 | sample_period: int) -> Dict[int, Tuple[DataFrame, MeterGroup, Dict]]: 531 | """ 532 | Setup using many buildings. 533 | Args: 534 | appliances (List): The appliances that will be recongized. 535 | datasource (Datasource): The Datasource that will be used to load energy data. 536 | buildings_with_dates (List[Tuple[int, str, str]]): The dates for all the buildings that will be used. 537 | sample_period (int): The sampling frequency. 538 | 539 | Returns: 540 | A dictionary containing the loaded data for each building. 541 | """ 542 | data_per_building = dict() 543 | for building, start_date, end_date in buildings_with_dates: 544 | loguru.logger.info(f"setup across many buildings: building {building}, start {start_date}, end {end_date}") 545 | df, metergroup, label2id = self.setup_one_building(appliances, datasource, building, 546 | start_date, end_date, sample_period) 547 | data_per_building[building] = (df, metergroup, label2id) 548 | return data_per_building 549 | 550 | @staticmethod 551 | def setup_one_building(appliances, datasource, building, start_date, end_date, 552 | sample_period) -> (pd.DataFrame, MeterGroup, Dict, Dict): 553 | """ 554 | Setup and load the data using one building. 555 | Args: 556 | appliances (List): The appliances that will be recongized. 557 | datasource (Datasource): The Datasource that will be used to load energy data. 558 | building (int): The building that is used. 559 | start_date (str): Start date of the data that will be selected for each building. 560 | end_date (str): End date of the data that will be selected for each building. 561 | sample_period (int): The sampling frequency. 562 | Returns: 563 | 564 | """ 565 | if appliances: 566 | info(f'Reading data from specified meters. \n-Building: {building}\n-Appliances {appliances}') 567 | all_df, metergroup = datasource.read_selected_appliances(appliances=appliances, start=start_date, 568 | end=end_date, 569 | sample_period=sample_period, building=building) 570 | else: 571 | info('Reading data from all meters...') 572 | all_df, metergroup = datasource.read_all_meters(start_date, end_date, 573 | building=building, 574 | sample_period=sample_period) 575 | 576 | loguru.logger.debug(f"Length of data of all loaded meters {len(all_df)}") 577 | all_df, label2id = datasource.normalize_columns(all_df, metergroup, appliances) 578 | loguru.logger.debug(f"Length of data of all loaded meters {len(all_df)}") 579 | info('Meters that have been loaded (all_df.columns):\n' + str(all_df.columns)) 580 | 581 | return all_df, metergroup, label2id 582 | 583 | def get_type_of_transformer(self) -> TransformerType: 584 | """ 585 | Get the type of the transformer. 586 | Returns: The type of the transformer (TransformerType) 587 | """ 588 | if self.ts_transformer is None: 589 | raise Exception('TimeSeriesTransformer has not been placed!') 590 | return self.ts_transformer.get_type() 591 | 592 | def cross_validate(self, appliances: list, cv: int = 5, 593 | raw_data: bool = False): 594 | """ 595 | Execute a cross validation. 596 | Args: 597 | appliances (List): List of appliances to be recognized. 598 | cv (int): The number sets to be used for cross validation. 599 | raw_data (bool): If the experiment uses raw data without any time series representation. 600 | Returns: 601 | A tuple with macro and micro f scores. Currently micro is disabled and returns 0. 602 | scores : array of float, shape=(len(list(cv)),) 603 | Array of scores of the estimator for each run of the cross validation. 604 | """ 605 | # TODO: Define overlap for windows 606 | # TODO: define the case window=1 607 | # TODO: Clarify TS_TRASFORMATION and TS_APPROXIMATION cases 608 | ts_length = self.get_ts_len() 609 | data, target = self._preprocess(self.all_df, self.labels_df, appliances, ts_length, raw_data) 610 | 611 | if len(data.shape) == 3: 612 | data = np.reshape(data, (data.shape[0], data.shape[1] * data.shape[2])) 613 | 614 | debug(f"Unique classes {target}") 615 | macro_scores = cross_val_score(self.multilabel_clf, data, target, cv=cv, scoring='f1_macro', n_jobs=-1) 616 | info('F1 macro: {} (+/- {})'.format(macro_scores.mean(), macro_scores.std())) 617 | 618 | # micro_scores = cross_val_score(self.multilabel_clf, data, target, cv=cv, scoring='f1_micro', n_jobs=-1) 619 | # info('F1 micro: {} (+/- {})'.format(micro_scores.mean(), micro_scores.std())) 620 | micro_scores = np.array([0, 0]) 621 | return macro_scores, micro_scores 622 | 623 | def train(self, appliances: list, raw_data: bool = False): 624 | """ 625 | Train the algorithm for the specified appliances. 626 | Args: 627 | appliances (List): List of appliances to be recognized. 628 | raw_data (bool): True if the experiment uses raw data without any time series representation. 629 | Returns: 630 | The preprocess and the fiting time. 631 | """ 632 | info("Prepossessing before training...") 633 | start_time = time.time() 634 | data, target = self._preprocess(self.train_df, self.train_labels_df, appliances, self.get_ts_len(), raw_data) 635 | preprocess_time = time.time() - start_time 636 | timing(f"preprocess time {preprocess_time}") 637 | 638 | if len(data.shape) == 3: 639 | data = np.reshape(data, (data.shape[0], data.shape[1] * data.shape[2])) 640 | 641 | info("Training...") 642 | start_time = time.time() 643 | self.multilabel_clf.fit(data, target) 644 | fit_time = time.time() - start_time 645 | timing(f"fit time {fit_time}") 646 | return preprocess_time, fit_time 647 | 648 | def test(self, appliances: list, raw_data: bool = False): 649 | """ 650 | Runs a test using the specified appliances. 651 | Args: 652 | appliances (List): List of appliances to be recognized. 653 | raw_data (bool): True if the experiment uses raw data without any time series representation. 654 | Returns: 655 | A tuple containing macro, micro, a report, preprocess and fiting time. 656 | """ 657 | if self.test_df is None or self.test_labels_df is None: 658 | raise (Exception('Test data or test target is None')) 659 | info("Prepossessing before testing...") 660 | start_time = time.time() 661 | data, target = self._preprocess(self.test_df, self.test_labels_df, appliances, 662 | self.get_ts_len(), raw_data, should_fit=False) 663 | preprocess_time = time.time() - start_time 664 | timing(f"preprocess time {preprocess_time}") 665 | if len(data.shape) == 3: 666 | data = np.reshape(data, (data.shape[0], data.shape[1] * data.shape[2])) 667 | info("Testing...") 668 | 669 | start_time = time.time() 670 | predictions = self.multilabel_clf.predict(data) 671 | predictions_time = time.time() - start_time 672 | timing(f"predictions time {predictions_time}") 673 | 674 | micro = f1_score(target, predictions, average='micro') 675 | macro = f1_score(target, predictions, average='macro') 676 | info('F1 macro {}'.format(macro)) 677 | info('F1 micro {}'.format(micro)) 678 | report = classification_report(target, predictions, target_names=appliances, output_dict=True) 679 | # confusion_matrix = multilabel_confusion_matrix(y_true=target, y_pred=predictions.toarray()) 680 | # confusion_matrix = None 681 | return macro, micro, report, preprocess_time, predictions_time 682 | 683 | def _preprocess(self, data_df, labels_df, appliances, ts_length, raw_data, should_fit: bool = True): 684 | if self.multilabel_clf is None: 685 | raise Exception('Multilabel classifier has not been placed!') 686 | if raw_data: 687 | representation_type = TransformerType.raw 688 | else: 689 | representation_type = self.get_type_of_transformer() 690 | debug(f"Type of transformer {representation_type}") 691 | 692 | start_time = time.time() 693 | data = self.get_features(data_df, representation_type) 694 | get_features_time = time.time() - start_time 695 | timing(f"get features time {get_features_time}") 696 | 697 | debug(f"Features \n {data[:10]}") 698 | target = self.get_multilabels(labels_df, appliances) 699 | target = np.array(target.values) 700 | debug(f"Target \n {target[:10]}") 701 | window = self.get_window(ts_length) 702 | rem = len(data) % window 703 | if rem > 0: 704 | data = data[:-rem] 705 | target = target[:-rem] 706 | target = bucketize_target(target, window) 707 | data = bucketize_data(data, window) 708 | # if representation_type == TransformerType.raw or representation_type == TransformerType.approximate: 709 | # pass 710 | if representation_type == TransformerType.approximate \ 711 | or representation_type == TransformerType.transform_and_approximate: 712 | start_time = time.time() 713 | data = self.reduce_dimensions(data, window, target, should_fit) 714 | reduce_dimensions_time = time.time() - start_time 715 | timing(f"reduce dimensions time {reduce_dimensions_time}") 716 | 717 | return data, target 718 | 719 | 720 | class Experiment(ABC): 721 | """ 722 | Abstract class describing an multi label disaggregation experiment. 723 | """ 724 | columns_results = [ 725 | 'train_end_date', 726 | 'train_start_date', 727 | 'train_sample_period', 728 | 'train_building', 729 | 'train_datasource', 730 | 'test_end_date', 731 | 'test_start_date', 732 | 'test_sample_period', 733 | 'test_building', 734 | 'test_datasource', 735 | 'ts length', 736 | 'classifier', 737 | 'clf properties', 738 | 'ts_representation', 739 | 'transformer_type', 740 | 'ts_repr properties', 741 | 'cross validation', 742 | 'macro avg', 743 | 'macro sd', 744 | 'micro avg', 745 | 'micro sd', 746 | 'num_of_appliances', 747 | 'appliances' 748 | ] 749 | 750 | def __init__(self): 751 | super().__init__() 752 | self.env = None 753 | self.train_end_date = None 754 | self.train_start_date = None 755 | self.train_sample_period = None 756 | self.train_building = None 757 | self.train_datasource_name = None 758 | 759 | self.test_end_date = None 760 | self.test_start_date = None 761 | self.test_sample_period = None 762 | self.test_building = None 763 | self.test_datasource_name = None 764 | self.ts_length = None 765 | self.transformers = None 766 | self.classifiers = None 767 | self.deep_classifiers = None 768 | 769 | @abstractmethod 770 | def run(self): 771 | pass 772 | 773 | @abstractmethod 774 | def setup_environment(self): 775 | pass 776 | 777 | def set_transformers(self, transformers: List): 778 | self.transformers = transformers 779 | 780 | def set_classifiers(self, classifiers: List): 781 | self.classifiers = classifiers 782 | 783 | def set_deep_classifiers(self, classifiers: List[str]): 784 | self.deep_classifiers = classifiers 785 | 786 | def populate_environment(self, environment: Environment): 787 | self.env = environment 788 | 789 | def populate_ts_params(self): 790 | self.ts_length = self.env.get_ts_len() 791 | 792 | def populate_train_parameters(self, env: Environment): 793 | self.train_end_date = env.train_end_date 794 | self.train_start_date = env.train_start_date 795 | self.train_sample_period = env.train_sample_period 796 | self.train_building = env.train_building 797 | self.train_datasource_name = env.train_datasource.get_name() 798 | 799 | def populate_test_parameters(self): 800 | if self.env.test_datasource is None: 801 | return 802 | self.test_end_date = self.env.test_end_date 803 | self.test_start_date = self.env.test_start_date 804 | self.test_sample_period = self.env.test_sample_period 805 | self.test_building = self.env.test_building 806 | self.test_datasource_name = self.env.test_datasource.get_name() 807 | 808 | def create_description(self, 809 | classifier: str, 810 | clf_properties: str, 811 | ts_representation: str, 812 | transformer_type: str, 813 | ts_repr_properties: str, 814 | cross_validation: str, 815 | macro_avg: str, 816 | macro_sd: str, 817 | micro_avg: str, 818 | micro_sd: str, 819 | num_of_appliances: str, 820 | appliances: str, 821 | report: str = None, 822 | training_time: str = None, 823 | testing_time: str = None, 824 | preprocess_time: str = None, 825 | prediction_time: str = None, 826 | preprocess_train_time: str = None, 827 | fit_time: str = None) -> dict: 828 | self.populate_ts_params() 829 | self.populate_test_parameters() 830 | debug(f"train building {self.train_building}") 831 | description = { 832 | 'train_end_date' : str(self.train_end_date), 833 | 'train_start_date' : str(self.train_start_date), 834 | 'train_sample_period' : str(self.train_sample_period), 835 | 'train_building' : str(self.train_building), 836 | 'train_datasource' : str(self.train_datasource_name), 837 | 'test_end_date' : str(self.test_end_date), 838 | 'test_start_date' : str(self.test_start_date), 839 | 'test_sample_period' : str(self.test_sample_period), 840 | 'test_building' : str(self.test_building), 841 | 'test_datasource' : str(self.test_datasource_name), 842 | 'ts length' : str(self.ts_length), 843 | 'classifier' : classifier, 844 | 'clf_properties' : clf_properties, 845 | 'ts_representation' : ts_representation, 846 | 'transformer_type' : transformer_type, 847 | 'ts_repr_properties' : ts_repr_properties, 848 | 'cross_validation' : cross_validation, 849 | 'macro_avg' : macro_avg, 850 | 'macro_sd' : macro_sd, 851 | 'micro_avg' : micro_avg, 852 | 'micro_sd' : micro_sd, 853 | 'num_of_appliances' : num_of_appliances, 854 | 'appliances' : appliances, 855 | 'report' : report, 856 | 'training_time' : training_time, 857 | 'testing_time' : testing_time, 858 | 'preprocess_time' : preprocess_time, 859 | 'prediction_time' : prediction_time, 860 | 'preprocess_train_time': preprocess_train_time, 861 | 'fit_time' : fit_time 862 | } 863 | return description 864 | 865 | def save_experiment(self, description, reset_results, results_file): 866 | new_results_df = pd.DataFrame(description, index=[0]) 867 | results_csv = Path(results_file) 868 | if reset_results and results_csv.is_file(): 869 | os.remove(results_file) 870 | if results_csv.is_file(): 871 | results_df = pd.read_csv(results_csv) 872 | results_df = results_df.append(new_results_df) 873 | else: 874 | results_df = new_results_df 875 | results_df.to_csv(results_csv, index=False) 876 | info(str(results_df.tail())) 877 | -------------------------------------------------------------------------------- /nilmlab/exp_model_list.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier 2 | from sklearn.naive_bayes import GaussianNB 3 | from sklearn.neural_network import MLPClassifier 4 | from sklearn.tree import ExtraTreeClassifier 5 | from skmultilearn.adapt import MLkNN 6 | from skmultilearn.ensemble import RakelD 7 | 8 | from datasources.paths_manager import SAVED_MODEL, PATH_SIGNAL2VEC 9 | from nilmlab.factories import TransformerFactory 10 | from nilmlab.lab import TransformerType 11 | 12 | SAX = 'SAX' 13 | SAX1D = 'SAX1D' 14 | SFA = 'SFA' 15 | DFT = 'DFT' 16 | PAA = 'PAA' 17 | WEASEL = 'WEASEL' 18 | SIGNAL2VEC = 'SIGNAL2VEC' 19 | TRANSFORMER_MODELS = 'TRANSFORMER_MODELS' 20 | CLF_MODELS = 'CLF_MODELS' 21 | BOSS = 'BOSS' 22 | TIME_DELAY_EMBEDDING = 'TIME_DELAY_EMBEDDING' 23 | WAVELETS = 'WAVELETS' 24 | 25 | selected_models_10mins = { 26 | BOSS : { 27 | CLF_MODELS : [ 28 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam') 29 | ], 30 | TRANSFORMER_MODELS: [ 31 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=20, window_size=10, norm_mean=False, 32 | norm_std=False) 33 | ] 34 | }, 35 | SIGNAL2VEC: { 36 | CLF_MODELS : [ 37 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic') 38 | ], 39 | TRANSFORMER_MODELS: [ 40 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=1) 41 | ] 42 | }, 43 | PAA : { 44 | CLF_MODELS : [ 45 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500) 46 | ], 47 | TRANSFORMER_MODELS: [ 48 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True) 49 | ] 50 | }, 51 | DFT : { 52 | CLF_MODELS : [ 53 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500) 54 | ], 55 | TRANSFORMER_MODELS: [ 56 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 57 | supports_approximation=True) 58 | ] 59 | }, 60 | SFA : { 61 | CLF_MODELS : [ 62 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam') 63 | ], 64 | TRANSFORMER_MODELS: [ 65 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False) 66 | ] 67 | }, 68 | SAX1D : { 69 | CLF_MODELS : [ 70 | RandomForestClassifier(n_jobs=-1, n_estimators=100) 71 | ], 72 | TRANSFORMER_MODELS: [ 73 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10) 74 | ] 75 | }, 76 | SAX : { 77 | CLF_MODELS : [ 78 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam') 79 | ], 80 | TRANSFORMER_MODELS: [ 81 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True) 82 | ] 83 | } 84 | } 85 | 86 | selected_models_4h = { 87 | BOSS : { 88 | CLF_MODELS : [ 89 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 90 | activation='logistic'), 91 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 92 | activation='logistic'), 93 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam', activation='logistic'), 94 | MLPClassifier(hidden_layer_sizes=(100, 100), learning_rate='adaptive', solver='adam', 95 | activation='logistic') 96 | ], 97 | TRANSFORMER_MODELS: [ 98 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=26, window_size=10, norm_mean=False, 99 | norm_std=False), 100 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=25, window_size=10, norm_mean=False, 101 | norm_std=False), 102 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=26, window_size=10, norm_mean=False, 103 | norm_std=False), 104 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=26, window_size=10, norm_mean=False, 105 | norm_std=False) 106 | ] 107 | }, 108 | SIGNAL2VEC: { 109 | CLF_MODELS : [ 110 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 111 | activation='logistic'), 112 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 113 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam'), 114 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 115 | ], 116 | TRANSFORMER_MODELS: [ 117 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=5), 118 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=10), 119 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=10), 120 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=10) 121 | ] 122 | }, 123 | WEASEL : { 124 | CLF_MODELS : [ 125 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam'), 126 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 127 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam', activation='logistic'), 128 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam') 129 | ], 130 | TRANSFORMER_MODELS: [ 131 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 132 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 133 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 134 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False) 135 | ] 136 | }, 137 | PAA : { 138 | CLF_MODELS : [ 139 | ExtraTreesClassifier(n_jobs=-1, n_estimators=200), 140 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000), 141 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 142 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500) 143 | ], 144 | TRANSFORMER_MODELS: [ 145 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 146 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 147 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 148 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True) 149 | ] 150 | }, 151 | DFT : { 152 | CLF_MODELS : [ 153 | ExtraTreesClassifier(n_jobs=-1, n_estimators=200), 154 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000), 155 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 156 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500) 157 | ], 158 | TRANSFORMER_MODELS: [ 159 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 160 | supports_approximation=True), 161 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 162 | supports_approximation=True), 163 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 164 | supports_approximation=True), 165 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 166 | supports_approximation=True) 167 | ] 168 | }, 169 | SFA : { 170 | CLF_MODELS : [ 171 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500), 172 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 173 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000), 174 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam'), 175 | ], 176 | TRANSFORMER_MODELS: [ 177 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=9, norm_mean=False, norm_std=False), 178 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=9, norm_mean=False, norm_std=False), 179 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=9, norm_mean=False, norm_std=False), 180 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=9, norm_mean=False, norm_std=False) 181 | ] 182 | }, 183 | SAX1D : { 184 | CLF_MODELS : [ 185 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam'), 186 | ExtraTreeClassifier(), 187 | ExtraTreeClassifier(), 188 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100) 189 | ], 190 | TRANSFORMER_MODELS: [ 191 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=20), 192 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=20), 193 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 194 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50) 195 | ] 196 | }, 197 | SAX : { 198 | CLF_MODELS : [ 199 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam'), 200 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam'), 201 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam'), 202 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam') 203 | ], 204 | TRANSFORMER_MODELS: [ 205 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True), 206 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=10, supports_approximation=True), 207 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=50, supports_approximation=True), 208 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=10, supports_approximation=True) 209 | ] 210 | } 211 | } 212 | 213 | selected_models_8h = { 214 | BOSS : { 215 | CLF_MODELS : [ 216 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 217 | activation='logistic'), 218 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 219 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 220 | activation='logistic'), 221 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 222 | activation='logistic') 223 | ], 224 | TRANSFORMER_MODELS: [ 225 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 226 | norm_std=False), 227 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 228 | norm_std=False), 229 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=10, window_size=10, norm_mean=False, 230 | norm_std=False), 231 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=10, window_size=10, norm_mean=False, 232 | norm_std=False) 233 | ] 234 | }, 235 | SIGNAL2VEC: { 236 | CLF_MODELS : [ 237 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam'), 238 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 239 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam'), 240 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic') 241 | ], 242 | TRANSFORMER_MODELS: [ 243 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=50), 244 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 245 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=1), 246 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=5) 247 | ] 248 | }, 249 | WEASEL : { 250 | CLF_MODELS : [ 251 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 252 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 253 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam'), 254 | MLPClassifier(hidden_layer_sizes=(100, 100), learning_rate='adaptive', solver='adam') 255 | ], 256 | TRANSFORMER_MODELS: [ 257 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 258 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 259 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 260 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False) 261 | ] 262 | }, 263 | PAA : { 264 | CLF_MODELS : [ 265 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000), 266 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500), 267 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 268 | ExtraTreesClassifier(n_jobs=-1, n_estimators=200) 269 | ], 270 | TRANSFORMER_MODELS: [ 271 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 272 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 273 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 274 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True) 275 | ] 276 | }, 277 | DFT : { 278 | CLF_MODELS : [ 279 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500), 280 | RandomForestClassifier(n_jobs=-1, n_estimators=100), 281 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100), 282 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000) 283 | ], 284 | TRANSFORMER_MODELS: [ 285 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 286 | supports_approximation=True), 287 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 288 | supports_approximation=True), 289 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 290 | supports_approximation=True), 291 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 292 | supports_approximation=True) 293 | ] 294 | }, 295 | SFA : { 296 | CLF_MODELS : [ 297 | MLPClassifier(hidden_layer_sizes=(100, 50, 100, 50), learning_rate='adaptive', solver='adam'), 298 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 299 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam'), 300 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam') 301 | ], 302 | TRANSFORMER_MODELS: [ 303 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 304 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 305 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 306 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False) 307 | ] 308 | }, 309 | SAX1D : { 310 | CLF_MODELS : [ 311 | MLPClassifier(hidden_layer_sizes=(100, 50, 100, 50), learning_rate='adaptive', solver='adam'), 312 | MLPClassifier(hidden_layer_sizes=(100, 100), learning_rate='adaptive', solver='adam'), 313 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), 314 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam') 315 | ], 316 | TRANSFORMER_MODELS: [ 317 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 318 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 319 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 320 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=20) 321 | ] 322 | }, 323 | SAX : { 324 | CLF_MODELS : [ 325 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam'), 326 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), 327 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam'), 328 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam') 329 | ], 330 | TRANSFORMER_MODELS: [ 331 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=100, supports_approximation=False), 332 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=10, supports_approximation=False), 333 | TransformerFactory.build_tslearn_sax(n_paa_segments=10, n_sax_symbols=20, supports_approximation=False), 334 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=20, supports_approximation=True) 335 | ] 336 | } 337 | } 338 | 339 | selected_models_1h = { 340 | BOSS : { 341 | CLF_MODELS : [ 342 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam', 343 | activation='logistic'), 344 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam', 345 | activation='logistic'), 346 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 347 | activation='logistic'), 348 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 349 | activation='logistic') 350 | ], 351 | TRANSFORMER_MODELS: [ 352 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=20, window_size=10, norm_mean=False, 353 | norm_std=False), 354 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 355 | norm_std=False), 356 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 357 | norm_std=False), 358 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 359 | norm_std=False) 360 | ] 361 | }, 362 | SIGNAL2VEC: { 363 | CLF_MODELS : [ 364 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 365 | activation='logistic'), 366 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 367 | activation='logistic'), 368 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 369 | activation='logistic'), 370 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 371 | activation='logistic') 372 | ], 373 | TRANSFORMER_MODELS: [ 374 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=2), 375 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 376 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 377 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=1) 378 | ] 379 | }, 380 | WEASEL : { 381 | CLF_MODELS : [ 382 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 383 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 384 | MLPClassifier(hidden_layer_sizes=(100, 50, 100, 50), learning_rate='adaptive', solver='adam'), 385 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam', activation='logistic') 386 | ], 387 | TRANSFORMER_MODELS: [ 388 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 389 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 390 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 391 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False) 392 | ] 393 | }, 394 | PAA : { 395 | CLF_MODELS : [ 396 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 397 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam'), 398 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100), 399 | MLPClassifier(hidden_layer_sizes=(100, 50, 100, 50), learning_rate='adaptive', solver='adam') 400 | ], 401 | TRANSFORMER_MODELS: [ 402 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 403 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 404 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 405 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True) 406 | ] 407 | }, 408 | DFT : { 409 | CLF_MODELS : [ 410 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100), 411 | RandomForestClassifier(n_jobs=-1, n_estimators=100), 412 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500), 413 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000) 414 | ], 415 | TRANSFORMER_MODELS: [ 416 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 417 | supports_approximation=True), 418 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 419 | supports_approximation=True), 420 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 421 | supports_approximation=True), 422 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 423 | supports_approximation=True), 424 | ] 425 | }, 426 | SFA : { 427 | CLF_MODELS : [ 428 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam'), 429 | MLPClassifier(hidden_layer_sizes=(1000, 2000, 100), learning_rate='adaptive', solver='adam'), 430 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam'), 431 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam') 432 | ], 433 | TRANSFORMER_MODELS: [ 434 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 435 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 436 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 437 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False) 438 | ] 439 | }, 440 | SAX1D : { 441 | CLF_MODELS : [ 442 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100), 443 | ExtraTreesClassifier(n_jobs=-1, n_estimators=200), 444 | RandomForestClassifier(n_jobs=-1, n_estimators=100), 445 | ExtraTreesClassifier(n_jobs=-1, n_estimators=200) 446 | ], 447 | TRANSFORMER_MODELS: [ 448 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50), 449 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 450 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 451 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50) 452 | ] 453 | }, 454 | SAX : { 455 | CLF_MODELS : [ 456 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam'), 457 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), 458 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 459 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam'), 460 | ], 461 | TRANSFORMER_MODELS: [ 462 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=10, supports_approximation=True), 463 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=50, supports_approximation=True), 464 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=10, supports_approximation=True), 465 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True) 466 | ] 467 | } 468 | } 469 | 470 | selected_models_2h = { 471 | BOSS : { 472 | CLF_MODELS : [ 473 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 474 | activation='logistic'), 475 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 476 | activation='logistic'), 477 | MLPClassifier(hidden_layer_sizes=(1000, 2000, 100), learning_rate='adaptive', solver='adam', 478 | activation='logistic'), 479 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam', activation='logistic') 480 | ], 481 | TRANSFORMER_MODELS: [ 482 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 483 | norm_std=False), 484 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 485 | norm_std=False), 486 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 487 | norm_std=False), 488 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=20, window_size=10, norm_mean=False, 489 | norm_std=False) 490 | ] 491 | }, 492 | SIGNAL2VEC: { 493 | CLF_MODELS : [ 494 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 495 | activation='logistic'), 496 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 497 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 498 | activation='logistic'), 499 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 500 | activation='logistic') 501 | ], 502 | TRANSFORMER_MODELS: [ 503 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 504 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 505 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 506 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=5) 507 | ] 508 | }, 509 | WEASEL : { 510 | CLF_MODELS : [ 511 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 512 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 513 | MLPClassifier(hidden_layer_sizes=(100, 50, 100, 50), learning_rate='adaptive', solver='adam'), 514 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam', activation='logistic') 515 | ], 516 | TRANSFORMER_MODELS: [ 517 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 518 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 519 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False), 520 | TransformerFactory.build_pyts_weasel(word_size=2, n_bins=4, norm_mean=False, norm_std=False) 521 | ] 522 | }, 523 | PAA : { 524 | CLF_MODELS : [ 525 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 526 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam'), 527 | MLPClassifier(hidden_layer_sizes=(100, 50, 100, 50), learning_rate='adaptive', solver='adam'), 528 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic') 529 | ], 530 | TRANSFORMER_MODELS: [ 531 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 532 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 533 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 534 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True) 535 | ] 536 | }, 537 | DFT : { 538 | CLF_MODELS : [ 539 | 540 | ], 541 | TRANSFORMER_MODELS: [ 542 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 543 | supports_approximation=True), 544 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 545 | supports_approximation=True), 546 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 547 | supports_approximation=True), 548 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 549 | supports_approximation=True) 550 | ] 551 | }, 552 | SFA : { 553 | CLF_MODELS : [ 554 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500), 555 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 556 | RandomForestClassifier(n_jobs=-1, n_estimators=100), 557 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000) 558 | ], 559 | TRANSFORMER_MODELS: [ 560 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 561 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 562 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 563 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False) 564 | ] 565 | }, 566 | SAX1D : { 567 | CLF_MODELS : [ 568 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000), 569 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 570 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 571 | RandomForestClassifier(n_jobs=-1, n_estimators=100) 572 | ], 573 | TRANSFORMER_MODELS: [ 574 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 575 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 576 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=20), 577 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=20) 578 | ] 579 | }, 580 | SAX : { 581 | CLF_MODELS : [ 582 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), 583 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam'), 584 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam'), 585 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam') 586 | ], 587 | TRANSFORMER_MODELS: [ 588 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True), 589 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=10, supports_approximation=True), 590 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True), 591 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=50, supports_approximation=True) 592 | ] 593 | } 594 | } 595 | 596 | selected_models_24h = { 597 | BOSS : { 598 | CLF_MODELS : [ 599 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 600 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 601 | activation='logistic'), 602 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), 603 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam') 604 | ], 605 | TRANSFORMER_MODELS: [ 606 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=5, window_size=10, norm_mean=False, norm_std=False), 607 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=5, window_size=10, norm_mean=False, norm_std=False), 608 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=10, window_size=10, norm_mean=False, 609 | norm_std=False), 610 | TransformerFactory.build_pyts_boss(word_size=4, n_bins=5, window_size=10, norm_mean=False, norm_std=False) 611 | ] 612 | }, 613 | SIGNAL2VEC: { 614 | CLF_MODELS : [ 615 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam'), 616 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), 617 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam'), 618 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam') 619 | ], 620 | TRANSFORMER_MODELS: [ 621 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 622 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=4), 623 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=5), 624 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=2) 625 | ] 626 | }, 627 | WEASEL : { 628 | CLF_MODELS : [ 629 | ], 630 | TRANSFORMER_MODELS: [ 631 | ] 632 | }, 633 | PAA : { 634 | CLF_MODELS : [ 635 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 636 | activation='logistic'), 637 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 638 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 639 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam', 640 | activation='logistic') 641 | ], 642 | TRANSFORMER_MODELS: [ 643 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 644 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 645 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True), 646 | TransformerFactory.build_tslearn_paa(n_paa_segments=10, supports_approximation=True) 647 | ] 648 | }, 649 | DFT : { 650 | CLF_MODELS : [ 651 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100), 652 | ExtraTreesClassifier(n_jobs=-1, n_estimators=2000), 653 | ExtraTreesClassifier(n_jobs=-1, n_estimators=500), 654 | ExtraTreesClassifier(n_jobs=-1, n_estimators=200) 655 | ], 656 | TRANSFORMER_MODELS: [ 657 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 658 | supports_approximation=True), 659 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 660 | supports_approximation=True), 661 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 662 | supports_approximation=True), 663 | TransformerFactory.build_pyts_dft(n_coefs=10, norm_mean=False, norm_std=False, 664 | supports_approximation=True) 665 | ] 666 | }, 667 | SFA : { 668 | CLF_MODELS : [ 669 | MLPClassifier(hidden_layer_sizes=(100,), learning_rate='adaptive', solver='adam'), 670 | ExtraTreesClassifier(n_jobs=-1, n_estimators=1000), 671 | RandomForestClassifier(n_jobs=-1, n_estimators=200), 672 | ExtraTreesClassifier(n_jobs=-1, n_estimators=100) 673 | ], 674 | TRANSFORMER_MODELS: [ 675 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 676 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 677 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False), 678 | TransformerFactory.build_pyts_sfa(n_coefs=10, n_bins=5, norm_mean=False, norm_std=False) 679 | ] 680 | }, 681 | SAX1D : { 682 | CLF_MODELS : [ 683 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic'), 684 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 685 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', activation='logistic'), 686 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', activation='logistic') 687 | ], 688 | TRANSFORMER_MODELS: [ 689 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=10), 690 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=20), 691 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=50), 692 | TransformerFactory.build_tslearn_one_d_sax(n_paa_segments=50, n_sax_symbols=100) 693 | ] 694 | }, 695 | SAX : { 696 | CLF_MODELS : [ 697 | MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), 698 | MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam'), 699 | MLPClassifier(hidden_layer_sizes=(100, 100), learning_rate='adaptive', solver='adam'), 700 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam') 701 | ], 702 | TRANSFORMER_MODELS: [ 703 | TransformerFactory.build_tslearn_sax(n_paa_segments=50, n_sax_symbols=10, supports_approximation=True), 704 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=50, supports_approximation=True), 705 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=10, supports_approximation=True), 706 | TransformerFactory.build_tslearn_sax(n_paa_segments=20, n_sax_symbols=50, supports_approximation=True) 707 | ] 708 | } 709 | } 710 | 711 | model_selection_clf_list = [ 712 | MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam', 713 | activation='logistic'), 714 | MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam', 715 | activation='logistic'), 716 | MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam', 717 | activation='logistic') 718 | ] 719 | 720 | model_selection_transformers = [ 721 | TransformerFactory.build_pyts_boss(word_size=2, n_bins=5, window_size=10, norm_mean=False, norm_std=False), 722 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=2), 723 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=1), 724 | TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, transformer_type=TransformerType.transform) 725 | ] 726 | 727 | model_selection_mlknn = [MLkNN(k=1, s=1.0, ignore_first_neighbours=0), 728 | MLkNN(k=3, s=1.0, ignore_first_neighbours=0), 729 | MLkNN(k=10, s=1.0, ignore_first_neighbours=0), 730 | MLkNN(k=20, s=1.0, ignore_first_neighbours=0), 731 | 732 | MLkNN(k=1, s=0.5, ignore_first_neighbours=0), 733 | MLkNN(k=3, s=0.5, ignore_first_neighbours=0), 734 | MLkNN(k=10, s=0.5, ignore_first_neighbours=0), 735 | MLkNN(k=20, s=0.5, ignore_first_neighbours=0), 736 | 737 | MLkNN(k=1, s=0.7, ignore_first_neighbours=0), 738 | MLkNN(k=3, s=0.7, ignore_first_neighbours=0), 739 | MLkNN(k=10, s=0.7, ignore_first_neighbours=0), 740 | MLkNN(k=20, s=0.7, ignore_first_neighbours=0) 741 | ] 742 | 743 | model_selection_rakel = [ 744 | RakelD(MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam')), 745 | RakelD(MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam'), labelset_size=5), 746 | RakelD(MLPClassifier(hidden_layer_sizes=(100, 100), learning_rate='adaptive', solver='adam')), 747 | RakelD(MLPClassifier(hidden_layer_sizes=(100, 100), learning_rate='adaptive', solver='adam'), labelset_size=5), 748 | RakelD(MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam')), 749 | RakelD(MLPClassifier(hidden_layer_sizes=(2000, 100), learning_rate='adaptive', solver='adam'), labelset_size=5), 750 | RakelD(MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam')), 751 | RakelD(MLPClassifier(hidden_layer_sizes=(2000), learning_rate='adaptive', solver='adam'), labelset_size=5), 752 | RakelD(base_classifier=GaussianNB(), base_classifier_require_dense=[True, True], labelset_size=3), 753 | RakelD(base_classifier=GaussianNB(), base_classifier_require_dense=[True, True], labelset_size=5), 754 | RakelD(base_classifier=GaussianNB(), base_classifier_require_dense=[True, True], labelset_size=7) 755 | ] 756 | 757 | model_selection_wavelets = [ 758 | TransformerFactory.build_wavelet(), 759 | TransformerFactory.build_wavelet(drop_cA=True) 760 | ] 761 | 762 | model_selection_delay_embeddings = [ 763 | TransformerFactory.build_delay_embedding(delay_in_seconds=30, dimension=6), 764 | TransformerFactory.build_delay_embedding(delay_in_seconds=32, dimension=8), 765 | TransformerFactory.build_delay_embedding(delay_in_seconds=6, dimension=8), 766 | TransformerFactory.build_delay_embedding(delay_in_seconds=12, dimension=8) 767 | ] 768 | 769 | cv_signal2vec = [TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=1)] 770 | cv_signal2vec_clf = [MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', 771 | solver='adam', activation='logistic')] 772 | cv_boss_clf = [MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam')] 773 | cv_boss = [TransformerFactory.build_pyts_boss(word_size=2, n_bins=2, window_size=10, 774 | norm_mean=False, norm_std=False)] 775 | 776 | state_of_the_art = { 777 | SIGNAL2VEC : { 778 | CLF_MODELS : [MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', 779 | solver='adam', activation='logistic')], 780 | TRANSFORMER_MODELS: [TransformerFactory.build_signal2vec(SAVED_MODEL, PATH_SIGNAL2VEC, num_of_vectors=1)] 781 | }, 782 | WAVELETS : { 783 | CLF_MODELS : [MLkNN(ignore_first_neighbours=0, k=3, s=1.0), 784 | RakelD(MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', 785 | solver='adam'), labelset_size=5)], 786 | TRANSFORMER_MODELS: [TransformerFactory.build_wavelet(), TransformerFactory.build_wavelet()] 787 | }, 788 | TIME_DELAY_EMBEDDING: { 789 | CLF_MODELS : [ 790 | MLkNN(ignore_first_neighbours=0, k=3, s=1.0), 791 | RakelD(MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', 792 | solver='adam'), labelset_size=5) 793 | ], 794 | TRANSFORMER_MODELS: [TransformerFactory.build_delay_embedding(delay_in_seconds=30, dimension=6), 795 | TransformerFactory.build_delay_embedding(delay_in_seconds=30, dimension=6) 796 | ] 797 | }, 798 | BOSS : { 799 | CLF_MODELS : [ 800 | MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam')], 801 | TRANSFORMER_MODELS: [TransformerFactory.build_pyts_boss(word_size=2, n_bins=4, window_size=10, 802 | norm_mean=False, norm_std=False)] 803 | } 804 | } 805 | -------------------------------------------------------------------------------- /data_exploration/time_delay_embedding/delay_embedding_parameterization-uk_dale.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/home/christoforos/anaconda3/envs/nilm/lib/python3.7/site-packages/nilmtk/utils.py:502: UserWarning: Found duplicate index. Keeping first value\n", 13 | " warnings.warn(\"Found duplicate index. Keeping first value\")\n" 14 | ] 15 | }, 16 | { 17 | "name": "stdout", 18 | "output_type": "stream", 19 | "text": [ 20 | "TIMING: NILMTK converting mains to dataframe: 16.54\n", 21 | "100000\n", 22 | " (54, 1, UK-DALE)\n", 23 | "count 3.469800e+06\n", 24 | "mean 3.110080e+02\n", 25 | "std 4.187989e+02\n", 26 | "min 4.727666e+01\n", 27 | "25% 1.395367e+02\n", 28 | "50% 2.054350e+02\n", 29 | "75% 3.256867e+02\n", 30 | "max 7.910050e+03\n" 31 | ] 32 | }, 33 | { 34 | "data": { 35 | "text/plain": [ 36 | "" 37 | ] 38 | }, 39 | "execution_count": 1, 40 | "metadata": {}, 41 | "output_type": "execute_result" 42 | }, 43 | { 44 | "data": { 45 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEECAYAAADEVORYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAgAElEQVR4nO3deZgU1bn48e8rEEENKgpEwZshkZsENJJAFI0mrpFEE0xuvEFvFBMT1KtmNxfyMxGTEIlJjBqjBjURjLKICygCIq4sAgMMDDsDDDAsM8OwM8zCzPv7o6uhZ6aX6u6qXqrfz/PM092nlj7d0/XWqXNOnSOqijHGmMJwXLYzYIwxJnMs6BtjTAGxoG+MMQXEgr4xxhQQC/rGGFNALOgbY0wBaZ/tDCRy+umna1FRUbazYYwxeWXx4sW7VLVr6/ScD/pFRUUUFxdnOxvGGJNXRGRztHSr3jHGmAJiQd8YYwqIq6AvIj8VkZUiskJExotIRxHpIiKzRGS983hqxPojRKRMRNaKyNUR6f1FpNRZ9qiIiB8fyhhjTHQJ6/RFpAfwI6CPqh4WkUnAEKAPMFtVR4vIcGA48H8i0sdZ3hc4E3hLRP5TVZuAJ4BhwIfAG8AgYLoPn8sY00pjYyMVFRXU1dVlOyvGQx07dqRnz5506NDB1fpuG3LbA51EpBE4AdgOjAAudZaPBd4F/g8YDExQ1Xpgk4iUAeeLSDnQWVXnA4jIOOA6LOgbkxEVFRV89KMfpaioCLvIDgZVpaamhoqKCnr16uVqm4TVO6q6DfgzsAXYAexT1TeB7qq6w1lnB9DN2aQHsDViFxVOWg/neev0NkRkmIgUi0hxdXW1qw9ijImvrq6O0047zQJ+gIgIp512WlJXbwmDvlNXPxjoRai65kQR+W68TaKkaZz0tomqY1R1gKoO6Nq1TTdTYwJJVfF7qHML+MGT7P/UTUPulcAmVa1W1UbgZeAioFJEznDe9Aygylm/AjgrYvuehKqDKpznrdONKXgNR5rpNeIN/vzm2mxnxQScm6C/BRgoIic4vW2uAFYDU4GhzjpDgSnO86nAEBE5XkR6Ab2BhU4V0AERGejs5+aIbYwJrLrGJuoam+Kuc9hZPm5+1PtpAuPw4cN8+ctfpqkp9HnbtWtHv3796NevH9/4xjfarH/33Xdz0kknudr3oEGDOOWUU7j22mtdrX/LLbcwefLkFmnh9yovL+ecc845mv7UU0/x+c9/nj179rRYP5z/vn37ct555/HQQw/R3NzcYp3Bgwdz4YUXtkgbOXIkf/7zn9vkKfL76NevH6NHjwZgyJAhrF+/3tXnSiRhQ66qLhCRycAS4AiwFBgDnARMEpFbCZ0YrnfWX+n08FnlrH+n03MH4A7gWaAToQZca8Q1gffpX8/g5E4dWHbfV7Kdlaz75z//ybe+9S3atWsHQKdOnSgpKYm6bnFxMXv37nW973vuuYfa2lr+8Y9/eJLXsOeee46//e1vvP3225x66qktlkXmv6qqihtvvJF9+/Zx//33A7B3716WLFnCSSedxKZNmxI2tsb6Pu644w4efPBBnnrqqbQ/j6veO6p6H3Bfq+R6QqX+aOuPAkZFSS8Gzmm7hTHBtu9wY7az0ML9r61k1fb9nu6zz5mdue/rfeOu8/zzz/PCCy8k3FdTUxP33HMPL7zwAq+88oqr97/iiit49913Xa3r1qRJkxg9ejSzZ8/m9NNPj7tut27dGDNmDF/4whcYOXIkIsJLL73E17/+dbp3786ECRMYMWJESvm45JJLuOWWWzhy5Ajt26c3eo7dkWuMyYiGhgY2btxI5ACKdXV1DBgwgIEDB/Lqq68eTX/sscf4xje+wRlnnJGFnIZs3ryZu+66izfffJOPfexjrrb5xCc+QXNzM1VVoSbO8ePHc8MNN3DDDTcwfvz4hNsfPny4RfXOxIkTATjuuOM4++yzWbZsWeofyJHzA64Zkw+27q5l655aLvpk/NJgrkhUIvfDrl27OOWUU1qkbdmyhTPPPJONGzdy+eWXc+6559KpUydefPFFz0vtrUXr9RKZ1rVrV7p06cKkSZP46U9/6nq/4R5YlZWVlJWVcfHFFyMitG/fnhUrVrRoK2gtXnVXt27d2L59O/3793edl2ispG+MBy558B1ufGpBtrOR0zp16tSmP/mZZ54JhErIl156KUuXLmXp0qWUlZVx9tlnU1RURG1tLWeffbbn+TnttNNaNMzu3r27RRXOCSecwPTp03nyySd5/vnn2bp169ES+JNPPhl1nxs3bqRdu3Z069aNiRMnsmfPHnr16kVRURHl5eVMmDAh5fzW1dXRqVOnlLcPs6BvjMmIU089laampqOBf8+ePdTX1wOhq4C5c+fSp08frrnmGnbu3El5eTnl5eWccMIJlJWVAbBw4UJuvvnmpN53xIgRUdsFLr30UiZOnEhDQwMAzz77LJdddlmLdbp27cqMGTP41a9+xapVqygpKaGkpITbb7+9zf6qq6u5/fbbueuuuxARxo8fz4wZM45+jsWLF6cV9NetW0ffvulfoVn1jjG5xN97s7LuK1/5CnPmzOHKK69k9erV3HbbbRx33HE0NzczfPhw+vTpE3f7LVu2xCztXnLJJaxZs4aDBw/Ss2dPnnnmGa6++mpKS0ujdge99tprWbx4Mf3796ddu3Z88pOfjFqC79WrF1OnTuVrX/saL7/8MhdccMHRZeE6+MbGRtq3b89NN93Ez372M8rLy9myZQsDBw5ssZ/OnTuzYEHoivD3v/89Dz/88NHlFRUVR/cXNmjQIEaPHk1lZSWdOnXypI3Dgr4xOaBQbpS96667eOihh7jyyiu56KKLKC0tTbjNwYMHjz5fsGABd955Z9T1Pvjgg6jpjY2NbfrJh913333cd1/rjomhyZtWrFhx9PV5553Htm3b2qwXvt8g2vbR1l+yZAkAF1xwASNHjnS9vxdeeIHbbrst6rJkWdA3xmTM5z73OS677DKampqO9tVPxp/+9Kekt5k5c2bS2+SaU045hZtuusmTfVnQN8Zk1Pe///1sZyHvfO973/NsX9aQa0wB8XtAN5N5yf5PLegbUyA6duxITU2NBf4ACY+n37FjR9fbWPWOMQWiZ8+eVFRUYHNUBEt45iy3LOgbk0P8LIN36NDB9exKJriseseYHFAgPTZNDrCgb4wxBcSCvjHGFBAL+sYYU0DcTIz+KREpifjbLyI/EZEuIjJLRNY7j6dGbDNCRMpEZK2IXB2R3l9ESp1lj4rN0myMMRmVMOir6lpV7aeq/YD+QC3wCjAcmK2qvYHZzmtEpA8wBOgLDAIeF5Hw/dZPAMMIzZvb21lujDEmQ5Kt3rkC2KCqm4HBwFgnfSxwnfN8MDBBVetVdRNQBpwvImcAnVV1vobuDhkXsY0xBrtj1vgv2aA/BAjP+dVdVXcAOI/dnPQewNaIbSqctB7O89bpxhQ8q+k0O/fVsXqHt/MWR+M66IvIR4BvAC8mWjVKmsZJj/Zew0SkWESK7e5BY0whGPjAbL76SPThob2UTEn/q8ASVa10Xlc6VTY4j1VOegVwVsR2PYHtTnrPKOltqOoYVR2gqgO6du2aRBaNMcbEk0zQv4FjVTsAU4GhzvOhwJSI9CEicryI9CLUYLvQqQI6ICIDnV47N0dsY4wxJgNcjb0jIicAVwGRU7eMBiaJyK3AFuB6AFVdKSKTgFXAEeBOVQ1PB3MH8CzQCZju/BljjMkQV0FfVWuB01ql1RDqzRNt/VHAqCjpxcA5yWfTGGOMF+yOXGOMKSAW9I3JIdZL3/jNgr4xOcB66ZtMsaBvjDEFxIK+McYUEAv6xhhTQCzoG2NMAbGgb4wxBcSCvjE5xEZWNn6zoG9MDrCRlU2mWNA3xpgCYkHfGGMKiAV9Y4wpIBb0C8iUkm3sr2vMdjaMMVlkQb9ArKs8wI8nlPCLScuynRVjTBZZ0C8QtQ2heWwq99elva/Zqys5VH8krX3c9MwCfjqxJO28BI3aOJvGZ66CvoicIiKTRWSNiKwWkQtFpIuIzBKR9c7jqRHrjxCRMhFZKyJXR6T3F5FSZ9mjzrSJJo+UVR3k1rHFDH+5NK39fLB+F68s3eZRrvKf2DibJkPclvQfAWao6qeB84DVwHBgtqr2BmY7rxGRPsAQoC8wCHhcRNo5+3kCGEZo3tzeznKTIxZsrKFo+DTWVR6Iuc5Bp4S/peZQprJljPFQwqAvIp2BLwHPAKhqg6ruBQYDY53VxgLXOc8HAxNUtV5VNwFlwPkicgbQWVXnq6oC4yK28dSmXYcYOXUlzc3BuFRetnUvk4q3+v4+b5TuAGBe2S7f38sYkx1uSvqfAKqBf4nIUhF5WkROBLqr6g4A57Gbs34PIDJCVThpPZznrdM9d/tzi3l2Xjnrqw76sfuMG/z3ufxy8vK09qEBv7//SFMzO/el315hTNC5Cfrtgc8DT6jq54BDOFU5MUSrnNQ46W13IDJMRIpFpLi6utpFFlvvNNgBLi0BbUb53eurGPjAbPbWNmQ7K8bkNDdBvwKoUNUFzuvJhE4ClU6VDc5jVcT6Z0Vs3xPY7qT3jJLehqqOUdUBqjqga9eubj+LiSPop8HZa0I/vwN16fUqMibbXl5SkXilNCQM+qq6E9gqIp9ykq4AVgFTgaFO2lBgivN8KjBERI4XkV6EGmwXOlVAB0RkoNNr5+aIbQxQWrGPLTW1vr5HMMv5xuQPVeW1ZdtpbGqOuvxnPt9L097lencDz4vIR4CNwPcInTAmicitwBbgegBVXSkikwidGI4Ad6pqk7OfO4BngU7AdOfPU1t317KuMj/r8r/+2BwAykdfk+WcmGwJeNOLAd5aXcXd45dy12Vn84urP5V4A4+5CvqqWgIMiLLoihjrjwJGRUkvBs5JJoPJuu25xcfeL/CVGv6I960FvUE4WwLa1GKi2HMo1O7kxY2SqQjcHbkNMS6ZCp2bWJ3UvXIWpYzJS4EL+ia+dGO1lfONyW8W9E1KrJxvTH6yoG+MMQUk0EHf2hwj2ZdhCtfkxRVc/+S8bGcjJ7jtsmnyXPgEaNUyuc1Ozf74xYs2j0RYoEv6pq2gjmZddaA+21kwxpVsdyW3oG/aiFctlqtVZg1HrKuuyS/ZKn8VXND/cGMN546caXPFpimgFwzGBF6gg360UunDb63jQN0RVmzbl/kMGWNMlgU66Jtjtu7xdyA3Y0x+sKBfIH460aveC+4q9Sct2krR8GnUNng/1PG8sl3UHLSGW2NSYUG/wHhVFZ9oP39/twyAqv3eB+cbn17Ad8Z86Pl+c4E1lRi/WdAvMEFpgC0LyFSYreVo5ygTIIEO+tH6w+Zql8NcYl+RMcEV6KAfj9iFdBturgLspGlMerJ9DLkK+iJSLiKlIlIiIsVOWhcRmSUi653HUyPWHyEiZSKyVkSujkjv7+ynTEQelaDeHloAEv3rsv3DNiZXhQ+NbBU8kynpX6aq/VQ1PIPWcGC2qvYGZjuvEZE+wBCgLzAIeFxE2jnbPAEMIzRvbm9nucmgTP/Q7LRuTHT5eEfuYGCs83wscF1E+gRVrVfVTUAZcL6InAF0VtX5Gppzb1zENgXpX3M3Ma9sV7azYYwpIG6DvgJvishiERnmpHVX1R0AzmM3J70HsDVi2wonrYfzvHV62m56ZgG/eqXUi1155lD9ET4xYhozV+6Muc79r63ixqcXZDBXhW3b3sPZzoLx0MyVOykaPo3NNYeynZWUzVy5k3HzyzP6nm6D/hdV9fPAV4E7ReRLcdaNdtGicdLb7kBkmIgUi0hxdXV1wsx9sH4XLyzY0nbnUfaeqarmzTW1NCv8dda6DL2jS85/QVX5wdhi3l+X+PsNgoWbdvPF0W8zeXFF4pWzydpCXJtSsg2AFdv2e7K/vbUN7Duc2TG5bntuMb+ZsjKj7+kq6KvqduexCngFOB+odKpscB6rnNUrgLMiNu8JbHfSe0ZJj/Z+Y1R1gKoO6Nq1q/tPkwSv6tN2H2qgMQ8nY68/0sxbqyv5wbjiNss0TitsvsWkpmalqVlZV3kAgCVb9qS1v4o9tRQNn8bK7TZ2U9D0++0szrv/zWxnw3cJg76InCgiHw0/B74CrACmAkOd1YYCU5znU4EhInK8iPQi1GC70KkCOiAiA51eOzdHbOOZeAHLD5//3Sx+MrEko+/pl2QaeROtme0xw8POHTmTi0bP9mx/s1eHyjYTF21NsKbx2tRl2xnxcm5V4+YjNyX97sAcEVkGLASmqeoMYDRwlYisB65yXqOqK4FJwCpgBnCnqjY5+7oDeJpQ4+4GYLqHnyUpXp4bpi3fwVUPvefdDn2U7gVOst9btu+HqG1oojLKUBCPzl7P4s3plfpNZv1o/FLGL2xbjWuSk3C6RFXdCJwXJb0GuCLGNqOAUVHSi4Fzks+md/wKQetzaFiA6aU7uOP5JSy77yuc3KmDL+8Rq3qs/kgTHY5LrVNY9YF69tY20Lv7R9PImTsPzVrHQ7PWUT76mpS2t/sQTKqy/dspuDty8+FYfXT2eg7Vpz465RPvbQCgfFfmezV86t4Z3DtlRUrbXvLg21z11/c9zlFL6R5w4ZNdvOqr99dVc+VD73k6m9fERVvYfajBs/2FzS3blVPtE1tqavnNlBU0Nbv/R+VKVWKy0m1XXLx5D5OKk69mLLigH5bLNw09NGsdf3kzM71+/PgaovWkcqOusWWQPFR/hH9/uNmTdppj/+8kgokqdY1NLdLcfF/3vrqCsqqD7NjnTRfRjdUH+b+XSrl7/BIA9tc1Un+kKcFWbVUdqGvxevHm3fzP0wu45tE5nuQzGXsONXAkSgeIu8cvYdz8zZS6mOQo21WH2fZfT8zjl5OXJ71dwQZ9vyU6qSQaZ/5wo/fj0EPmTnZeXML+9rVV3PvqCuaW1aS9r1QCxLj5m/n0r2dEDd6lHnUTbC1aqbXeuWKoORgq6X925Jvc+FRy93e8t66a80fNZvbqyqNp//XE/DRymrqXFlfwud/N4t5X214RHhuiwPgl74L+3toGyncdYtV2fw66ZKRaAp25cid9fjPT49y4Ew5+mapXTHSSKS7fDRC11FdzKNQAe7gx+VKtF6Yt3wGEqhyOcj7Qsq17s5Glo5JthA7ntyTL+Qb4+YuhCX1ed75f441Xl27jB2MX0dysbN0de6a8hA25uWTnvjoGPnCs+12qjXDZ9l6GbohyE9cjg3IyVwGLyr3p+fLtJ+dTPvoavvn4PE/2F8si5+SyZHP2g54xfgh3HX/y/Q08OGNtzPXyqqSfbB2p36XZn7ron7/rYENKDXrp5N1N7M61xi83dbjpCJ9o1zo3abVWVnWQDzcmrkbKRLWDqhbEdJBWheOPDzfujrs8r4K+FxZuCn0hH26sofpAPTNWxB4bJ5FXS6LeUAwcC9q7DtZz5wtLkt733A27eCkDQwbUNTZzz4vu58/N9M1vmXLlQ+8xxMUUjJloExk3fzP9f/8WZVXRT1BhryzN8SElEon4Lg/VH2HA799ieUXu9CQKqoIL+mEPv7WeL4x6i9v/vdiX+tllFcf2OWtVZZw1YUP1QQ626qK5dffho3WfydhX20jFnuSuiF5M4uTiNuTn67khWgk71keZs967EVIjv6/wVclmpy0h1nfp3WT32be28gC7Ir57L38+zXG6f85Zv4vVO1JrH6xrbOJAnT9j9fz61RVx852Ogg36kVoHXC88/Fb0LpfXPzmvTZfGK/7yHt/1aLTNK//6HjU+9OfORfM3uO/V47aA3v/3b8XdKLIX0Hef8X6E1Gi9jHKtKs4rmareCd+3Es13n1nAVx/5wNX4WVt317ZoIP3qIx9w7sjkx+oJ/z/HLzzWx/7dtVUt1nnuw82sck5GTc3K0jTHjIpkQT/DYjWAetWrovpARGkpShExXD3hx4kukdU79qf9A478TDc8lbg6xmvZvL/Di4nmXliwhbfXxL/yzJR0P0/kvQortu2LOYx564AazS4XbSiXPPgOlzz4ztHXm1K8+THaldst/1oUc/1H3lrHNx+fF7NGwk3eIwU66IfPqA1HmikaPo2i4dN8f897XlyWkfdJVbjP9/mjWg5CFquuvmp/HYcb3HWZjFffv67yAF995AMemrXW9546mfTPOZtcNQC75XepvuZQA99/tu3IqumauXIn8zakVt3VcKQ56SpJgLdWHwvm1/5tDrc9tzil9we48IG3U97WT/e/tpJH3y4DYORr0YdgLtmyN24Xzdbyqsvm1hR+GIBv9W7RJFM/ng2x+nf/cNxiPn7aCUCoJPLhxpqjDZvn9TyZKXddHHW715dv52OdOzKgqEvc9925L3Q3aLSGuu1ZnNzkzzPX8ourP9Um/auPfHC0rjde+8RvX1/lV9aiCnc9TUZzsx6958Av4YCbTDfqcEH/3ldLmVSce8fNtr2HOaVTB048PjthcuvuWv41t/zo66VbYtcGvOPiaiYsb0r6gx+bw4/GL/Vl3340Okarm83kySdZb62ubJHjpz/YePT5shg9Kir21HLXC0v59pPp3dl50ejslbIee6csanqsxr2x88oT7jPZ0vq8sl185jczoi5ralb2HGo8mqfrU/iuJy+uiNlVNeyVpRWedmhYX3mA15bF7t0W6f11yV0hNDUrA/9w7ErVr04DXxz9Nt8Z0/b7Ltm6N6mCyjcfn8sd/07+KuSO55Pv9QewrzZ+m17elPRjBZ58MvixudnOgqf+18WPMhfHOEpU/fa32eujplftr2PNzvjBM5LboR/iTan5hzdW88ycTa7fM5pqF3W+4Z5A6d7wWDR8Gv9zwX/wvNNZ4evnndlieeTdzfG+nXhVhYs372Hn/rqYy70UbVau6/6e3HEcr4TuhdYTISWKlXlT0o9lX613pec563dRNHxayl24Etno46iXqspbrbqG+lEAijwWk7npLJ/6n/wlxhSXTUkWKRVtMf/pnPW7XN3nELnO9NLMD1XQ3Kycc99MJqQ4dv3zMQbc23e4kS/96VhD6J7aRn46sSRjATwXLNmyJ+G4W37L+6D/xT/GrhoIHztuewnMWBk6wIpTqDdtzY8SbrxW+pkro099aELS+X+k2rhaum1fi/lPv/vMgqRm3PLqN5TMdJ5rdu7n/fXVHKw/wn1TvZ27Ndpw4a8s3ebpe3hh/oYaXzpjnHf/m3zr8Xn84Y01nu87Ga6Dvoi0E5GlIvK687qLiMwSkfXO46kR644QkTIRWSsiV0ek9xeRUmfZo+JBH7TWXQ83VGd+DPlo/KjVGBDZh7wVN5fwyYn+CbLRZ3z8wi2uut3lmmhXQlv3tO1l4fc3+vBb0aurohn08Adxuw/muhVRhvNIdpyo15e7a4tIVqYnXY8lmZL+j4HVEa+HA7NVtTcw23mNiPQBhgB9gUHA4yLSztnmCWAYoXlzezvLsy6oN7/4LV5NRetqjHSGb3h7TVXagSgbdwin8p75eidzrrj2b3NYkuaNTLGqp4LCVdAXkZ7ANYTmtw0bDIx1no8FrotIn6Cq9aq6idB8uOeLyBlAZ1Wdr6EIMC5iG19loy0xn49dL06C250umq2DmBc3GOW6Qp/cI9t27C2cNoJUuC3pPwz8Eoi8Xu2uqjsAnMduTnoPILLissJJ6+E8b51ufJL0JOY+xKpUpuKbW7Yr6vj66cjXc002Cw/5XHAxsSUM+iJyLVClqm47mkY7vDROerT3HCYixSJSXF2dmbHnwdtL6zyNMUdlM0j+z9MLjt6F6J20WnJDDx78PqLtw6p02rKvxD9uSvpfBL4hIuXABOByEfk3UOlU2eA8hlvaKoCzIrbvCWx30ntGSW9DVceo6gBVHdC1a9ckPk6r/aS6oQcRrxCqMVqLW8ef5L5SHdckl3jxE7ATQmJ3+3TTZlAlDPqqOkJVe6pqEaEG2rdV9bvAVGCos9pQYIrzfCowRESOF5FehBpsFzpVQAdEZKDTa+fmiG18VYDx1z8a+TT5iJS9f0XuRM9D9Uf45eRl7I9xh/axeWLth2u8l04//dHAVSKyHrjKeY2qrgQmAauAGcCdqhoesesOQo3BZcAGYHoa7+8ZK0355/Z/p3Yr+dqd2Z8DOVWJTob/mruJScUVjHlvY9TleizqZ5cdF4GU1DAMqvou8K7zvAa4IsZ6o4BRUdKLgXOSzWSqDtZl98637EvuqH3qg2O3+2c73tQcbOA/upyQ5VyEpBr7Yl1hti5kBLnL8KLy3ZTvOsT1A85KvLJHgvx9eiHv78iNJ9k5dXNR5FglmeLLlU+S+/Q6C/vTKABMKnZ/F20iiT5X6+X5HsCuf3I+90xenvR22S50BFmgg36ydaNeHl7J3PoeT+RYJfHszcJsWamcHLLVvpLK5PRhU+LMhRyP1cmnLp1j0b73+AId9FPlxU+m6oDXwyK0daCukfELt6Cqnp1kwmJWTaS4v+c+3AzAgo3pj2uULV4UCqz9KH3p3N1tAhb097Qq7boN3l79hLbFGWPbj2qae19dwYiXSymOMTGK39x+b3trG3h7TahH7+FGd7NwBdWsVTvbVuHEHc7C1+zEla9VSz9/sYS/x5gnweRB0C/dto+yqoOu1m3dBe64cHHVRfR/eUlFmwnLk7UzThuC22qaZNQcDJ3k6hqbOBBlBEMvA8baJMaRb21vCsNfuy3Nqapvk+v44UBE24J1JfZHXWMzf5q5NtvZyFk5H/QBHnvb/SiBqfrZpGW+v4efolXveFnFFJ5VKdeurJsVprqcoSmW8FSOXotWUlbif4eC9ZQ0/sqLoP9qig1pQdTc7D4k/O/zS6iIMpSvG24aw1qXxps9PiPMKWs7jV6saQXTce+rpZ7t68EZa9i6212vsUTfsF0IGD/kRdBPVaqXz7l82T05ysTrqrGDdOX+5Ev7qYbut1Z7O+Z9XWPbq5doabnk8Xc3HH2eSq+xtl02TarWJZgXuFDlzRy5qThapZ9kEM/lLl+R7RapnpzmRSlBR/rb7PVc3Pv0pPfr9zRwPxibXzODRaveyd1fVltZbUT24L2/8tf3099JAAW6pO/W3ARB0K1sHiSxTwBtM3Xj0wvi7utQQxO7Dma+338ib62ubJOWyfSjIHsAABaRSURBVCDqVVfBfO0Vkyvs20tPoIO+2xK7VxOhZ/PH2O64zJch/fy8mf0uM/fdRX6uxmblQIxB146un8Uf1ZEk2o8yKdc6E+SbQAX91kE+iR6beU2Bkzt1yHY2PJXJA9uvNpxEhY4n3t3AuSPftCBmMipQQT9VQR77PtWAEqsqo1CrJmpSmHg+2s8quL80ky8s6JNfB+Lvp61u0yvhSFNzxhqfU7l7NJXTRCbPw27eau6GGs9K5In2Ez7hhgojhXmSNf4JdNBPtQSfasDJ1GX68844NmG3+tCrJchXP6naUO3uzvB4lPhhPPIKq7D/A7G/pUK92vRKsIN++LGAA5gCW3cnf4NWm+od1zMcB5d/0/IF/4t8Z01VAXzK/BDooO9WkM8J1z8537MROKeUbKM+zhDFXh7UOdmQ61Oexi/0brz+XPW9ZxcxvXRHtrNhcBH0RaSjiCwUkWUislJE7nfSu4jILBFZ7zyeGrHNCBEpE5G1InJ1RHp/ESl1lj0qaRbB31kb/w5Qt3sPQsyP91nTGSwtbG7ZLn48oYTR01fHXOcXL3o3flEmh8/N5ZvxgqQ6A8ONm8TclPTrgctV9TygHzBIRAYCw4HZqtobmO28RkT6EJpAvS8wCHhcRNo5+3oCGEZosvTezvKUfe9fi1q8Hvz3OS1ehw/mZA/pVEaFhNwd5/uO55Ofp7b1+Tg8OmTksA658GkzMW8BZG8gtBz9SQGh3/uUkm2ez+Vg/JUw6GtIuAWrg/OnwGBgrJM+FrjOeT4YmKCq9aq6idAk6OeLyBlAZ1Wdr6HoOC5iG0/sSTFYtw5wf5yxxovsZJTXwSFml80ci0IDH5id7Sz4Kre+7ZamlGznxxNKGDUt9tVfJM8+Sy5/KXnAVZ2+iLQTkRKgCpilqguA7qq6A8B57Oas3gOIrKSscNJ6OM9bp0d7v2EiUiwiaXVLyXRd/YRFmambjVYrtnn3IV/fs3xX2/3vO5zaSdaNTDa++/VWI6eujL4gx06cqZi6bDs/mVgCwLPzyl1ts22PN3NW78rC1KBB4iroq2qTqvYDehIqtZ8TZ/VY/Txc9/9Q1TGqOkBVB7jJXzIZ8dMrS7dl9P0iA+PLS7x979ZBd6MT9CNnvtrt48GX6gkllf+B26Cf7FVOtCvP6gP1PPp2/Fmd8uGckMrENdOSaMiN9x0s2pS/U27mgqR676jqXuBdQnXxlU6VDc5juFW1AjgrYrOewHYnvWeUdN8lOqj9Hh3Sa5moYok17kqs4ViKy2MfiJmsEnrMp2ny3lnj7bDRsUR+U0HuVZYOtzPpmejc9N7pKiKnOM87AVcCa4CpwFBntaHAFOf5VGCIiBwvIr0INdgudKqADojIQKfXzs0R22TVh3k2WXeqbRfJCM+U5db3n10Uc9nO/f7MTOWVN0p3JlznnbVVHGoIzvy+yUzGkw3xTnjzN9akvN/IRue6Ap2v2U1J/wzgHRFZDiwiVKf/OjAauEpE1gNXOa9R1ZXAJGAVMAO4U1XD3+4dwNOEGnc3ANM9/CxtBLWkFJ4e8P111VnOyTHxQsiNT8UfytlTPsWyTJxoW7vpmYW+VaG9uSrxiQ7gl5PzexrR1m586kM+O3Imyyv2cjhAJ/FkJJxERVWXA5+Lkl4DXBFjm1HAqCjpxUC89gBPpdMYuK+2kZNPCNbIlX6KnPDbpE41+jSRXttc4+4u7UnFFTz47fN8zk3mLCrfA8CY9zfyu8EZC0U5xe7IjeFgjtfzV+Z4lUm2NFifcVcemJ7b3ZIPN/j/f5y3IfVqonxmQT+GXOuP3toFfwh2/3QvHLETQN7600x/T0qrduznzhfc3bT4Xg5Vo3oh0EE/oFX6xqUfjsuvOXXNMX63oWysdn9fy7TlGelkmDHBDvpBbck1rryzNlgltFyxars304ua7Ah00E9HjtfuGJM1X3v0g2xnIaOCFgsCHfStnG9MfrKJUvwT7KBvUd8Yk6agnX4CHfSNMen7v8nLacrwHbxBq1LJJYEO+umU9O1HZ0zIxOKtrN6R2cZbO/78E+ygb7X6xpg0Be0EFOigbzHfFKLL//Iuw/L8HoVte70Ze9+0lXDsnUJlvQdMvtpYfSipm49MYQl0Sd8K+sZ4o5B7wgWtABjsoJ/GLzVo9XjGGAMBD/rGFLpcHzjQZF6gg346V6R2qJgg+NPMtZ7sp6B7wgUsGLiZLvEsEXlHRFaLyEoR+bGT3kVEZonIeufx1IhtRohImYisFZGrI9L7i0ips+xR8XlEtEKuhzQG4PF3N3iyn0I+lmp8mr0sW9yU9I8AP1fVzwADgTtFpA8wHJitqr2B2c5rnGVDgL6EJlB/XETaOft6AhhGaN7c3s5yY4zJWQEr6CcO+qq6Q1WXOM8PAKuBHsBgYKyz2ljgOuf5YGCCqtar6iZC8+GeLyJnAJ1Vdb6GKhrHRWyTc6wu1BgDwYsFSdXpi0gRoflyFwDdVXUHhE4MQDdntR7A1ojNKpy0Hs7z1unR3meYiBSLSFp3mBR0PaQxHirk6p2gcR30ReQk4CXgJ6oabyCOaD8PjZPeNlF1jKoOUNUBbvMXNSPpjL2TzhsbEzCFXIAKWEHfXdAXkQ6EAv7zqvqyk1zpVNngPFY56RXAWRGb9wS2O+k9o6T7xs+f6b7D/k7nZozJDQV3c5bTw+YZYLWqPhSxaCow1Hk+FJgSkT5ERI4XkV6EGmwXOlVAB0RkoLPPmyO2yTuZHmrWmGwq5OqdoJX03Yy980XgJqBUREqctF8Bo4FJInIrsAW4HkBVV4rIJGAVoZ4/d6pqk7PdHcCzQCdguvPnHx+HVg5a444xJrqgHeoJg76qziF2+LwixjajgFFR0ouBc5LJYDoKuR7SGC/ZkRQcgb4jNz0BO70bY1JScHX6+czPeshg/QyMia+Q6/SD1nwX7KCfxrZBq8czxhgIeND3k50UTGEp4KJ+wI71QAd9P8dzC1o9nzHxFHL1TtAEPOinvq2FdGMMBK+AF+yg7+fOg/U7MMbEELSq3EAH/bKqgylvG7R/tDHpKOTanaCFgkAH/XfWViVeKUVB+yEYE4/P8x3ltKDdfR/ooJ/OHblBq8czxqQmaJEg2EG/cAsnxniqkA+lgBX0LeinKmg/BGPiKeQCVNAO9WAH/XSqdxKNshm4n4IxJqqAlfACHfT9DMwB+x0YY2II2qEe6KCfDgvqxhxjw5QHR6CDvv1QjfFGIdfpB42b6RL/KSJVIrIiIq2LiMwSkfXO46kRy0aISJmIrBWRqyPS+4tIqbPsUclEx9+0hmGIX9S3CwFjCkM6V/252MffTUn/WWBQq7ThwGxV7Q3Mdl4jIn2AIUBfZ5vHRaSds80TwDBCc+b2jrJPz1nhxBiTrqB12kgY9FX1fWB3q+TBwFjn+Vjguoj0Capar6qbgDLgfBE5A+isqvM1dOobF7GNb/z8V+XiGdwY472gHeqp1ul3V9UdAM5jNye9B7A1Yr0KJ62H87x1elQiMkxEikWkOMX8pS3xxOiZyYcxuaCQ6/SDdqx73ZAb7aehcdKjUtUxqjpAVQd4nRljjElGwGJ+ykG/0qmywXkMj2xWAZwVsV5PYLuT3jNKuq8KeZAoY4w3UqnKPdzQxP66xpy8Skg16E8FhjrPhwJTItKHiMjxItKLUIPtQqcK6ICIDHR67dwcsY1vLOQb4w0rQCXny396h8+OfDPb2YiqfaIVRGQ8cClwuohUAPcBo4FJInIrsAW4HkBVV4rIJGAVcAS4U1WbnF3dQagnUCdguvOXt3LxDG6MXwo55KdyrFcdqPc+Ix5JGPRV9YYYi66Isf4oYFSU9GLgnKRyl0UW1I0xAGsrD2Q7C54K9h25hVw8McZDdiylJhfLjoEO+ulIfEduLv47jTEmvkAHfT+raKz6xxQSG8cqOAId9NOReDx9Y4zJP4EO+haYjfGG1emnJheHawl00DfGGNOSBf0YEp2fc/EMboxfrKAfHAn76eezdAJzXWMTdY1NdOzQrkX6zn11LCrfTd8zO6ebPWN8tXV3rXc7y0LUX7hpN2d16cS+w43s2FeX+Qx4YGLx1sQrpWD+hpqUtw100H99+Q5eXz6N41L4wQ4Z8yEAG//wNVZs38cvJy+n75kn89KS0GChN1/4cS+zajw0YeEWfvf6qmxnIyOKhk87+vyNH13C2d1OOvr6kgff8ex95m+oYUBRF9plsHL/v/8xP2Pvlch766pjLvvW43NpUph020CampWV2/cfXfb/XlkRc7tUFJfvpnvnjtzw1Icp70NyvZri+DN66xlDH852NowxJq9s/uO1i6ONVGx1+sYYU0As6BtjTAGxoG+MMQXEgr4xxhQQC/rGGFNALOj7rOQ3V2U7C8YYc1TG++mLyCDgEaAd8LSqjs50HjLh37dewMW9Twfgys9055NdT2TE1z4DwIKNNXxnTOr9bI0xprVPdD2RjdWHAJjxk0v4zB+jr5fRoC8i7YC/A1cRmix9kYhMVdW07qT5zoCz2FPbQO/uJ9GxfTtOPL49v01wc841557BtNId/EeXE9ji4Z2LL95+IV8o6tIi7emhLbvKDmi1PNOu63cmf/jWuVz4wNvsO9yY1bzkgh9e0ounPtiU7Wy0cfmnu/H2mqpsZ6NgPfnd/tz+78VRl71+98Us3bqXX7/qzc1XQ75wFhMWpXb3bvnoa5JaP6M3Z4nIhcBIVb3aeT0CQFUfiLXNgAEDtLi4OEM5DCku383qHfs51NBEU7PS2NRMU7NypFnpcUondu6r4ziBE49vz21f/mRG8wawr7aR6oP1fLLriZ5OWP3pX0/ngl6nMfb753u2T5N5qmoTmSegqmyoPsTHTu7IScfHLvuqKjv313Gw7ghdP3o8p5zwkTbL6480A1B/pJkTP9KOw41NHN++HR9pH6o9P9zQxEfaH0e7iKEBkv0fNTcr//XkPNbtPECPUzuxrvIgl/Q+nXHfPz/mfkQk6s1ZmQ763wYGqeoPnNc3AReo6l2xtslG0DfGmHwXK+hnuiE32impzVlHRIaJSLGIFFdXxx7zwhhjTHIyHfQrgLMiXvcEtrdeSVXHqOoAVR3QtWvXjGXOGGOCLtNBfxHQW0R6ichHgCHA1AznwRhjClZGe++o6hERuQuYSajL5j9VdWUm82CMMYUs4/30VfUN4I1Mv68xxhi7I9cYYwqKBX1jjCkgFvSNMaaA5Px0iSJyAFgbY/HJwL4Eu3CzjlfrnQ7syvG8ZfI9k10vvE7k95hreQuLlUc/3zPV9fz43/vxP+hA7v3fY63X+v+fS3kL+5SqfrRNqqrm9B9QHGfZGBfbJ1zHq/Ui85qrecvke6b6v4r3P8923qL9rzP1nqmu58f/3o//QS7+32Ot1zqvuZS3WHkM/+V79c5rHq2TrfUK5T3drmd5y/57ul2vUN7T7Xq5nLcW8qF6p1ijjB+Ri3I5r7mct9byIa/5kMewfMlrvuQT8iOvsfKYDyX9MdnOQBJyOa+5nLfW8iGv+ZDHsHzJa77kE/Ijr1HzmPMlfWOMMd7Jh5K+McYYj1jQN8aYApIzQV9EDmY7D4mISJOIlET8FcVZ910RyUhDj4ioiDwX8bq9iFSLyOuZeP9UiMg3nXx/Ott5aS0fv0/Ij2MoUqL8ZvIYivLeOfv7TFfOBP08cVhV+0X8lWc7Q45DwDki0sl5fRWwLZkdiEimB9+7AZhDaHht15x5lv2W9vdp8l5Kv898kFNBX0ROEpHZIrJEREpFZLCTXiQiq0XkKRFZKSJvRhyQWSUi/UXkPRFZLCIzReSMiMXfFZF5IrJCRPyeeHY6EJ4h+QZgfEQez3fysdR5/JSTfouIvCgirwFv+py/o0TkJOCLwK04B5WIXCoi74vIKyKySkSeFJHjnGUHReS3IrIAuDBD2Uzl+/xARPpFrDdXRD6bofyG3/PSyCsSEXlMRG5xnpeLyP0Rx1fWS7Hx8pstcX6fsb7Xr4nIGhGZIyKP5voVYU4FfaAO+Kaqfh64DPiLHJv1tzfwd1XtC+wF/isL+esUUbXzioh0AP4GfFtV+wP/BEZFrH+iql4E/K+zzE8TgCEi0hH4LLAgYtka4Euq+jngN8AfIpZdCAxV1ct9zl+k64AZqroO2C0in3fSzwd+DpwLfBL4lpN+IrBCVS9Q1TkZymMq3+fTwC0AIvKfwPGqujxD+XVrl3N8PQH8ItuZyVGxfp9tOL+PfwBfVdWLgZyf6i/j4+knIMAfRORLQDPQA+juLNukqiXO88VAUeazF6reCb8QkXOAc4BZzrmpHbAjYv3xAKr6voh0FpFTVHWvHxlT1eVOG8MNtJ2v4GRgrIj0JjQncYeIZbNUdbcfeYrjBuBh5/kE5/U0YKGqbgQQkfHAxcBkoAl4KZMZTPH7fBH4tYjcA3wfeDYjmU3Oy87jYo6dVE1LsX6f0Xwa2Kiqm5zX44Fh/mYvPbkW9P+H0Jmyv6o2ikg50NFZVh+xXhOQC9U7AqxU1VhVDq1vgvD7poipwJ+BS4HTItJ/B7yjqt90Atm7EcsO+ZynFkTkNOByQnXmSuhEqYQCa6zvq05VmzKXy6OS+j5VtVZEZgGDgf8GstEIeYSWV/AdWy0PH0dN5Mbxnyi/GRXn9zmV6PkU8kyuVe+cDFQ5Af8y4OPZzlACa4GuInIhgIh0EJG+Ecu/46RfDOxTVTej5qXjn8BvVbW0VfrJHGuIvMXnPCTybWCcqn5cVYtU9SxgE6FS/fkSmj/5OELfXaaqcmJJ5ft8GngUWJSFKyiAzUAfETleRE4GrshCHpKRa/mN9fuE6PlcA3xCjvXk+05Gc5uCnAj6Ts+ReuB5YICIFBMq9a/JasYSUNUGQj+SP4rIMqAEuChilT0iMg94klCjkN/5qVDVR6IsehB4QETmEiq5ZNMNwCut0l4CbgTmA6OBFYQOtNbrZVQq36eqLgb2A//KQBaPCh9DqroVmAQsJ3Q8Lc1kPtzK4fzG+322yaeqHibUZjdDROYAlbgbEjlrcmIYBhE5D3hKVf3u4WJylIhcCvxCVa/Ndl7SISJnEqru+bSqNmfwffPqGMq3/MYjIiep6kGn08nfgfWq+tds5yuWrJf0ReR2Qo0f92Y7L8akQ0RuJtTL5/9lOODn1TGUb/l14YciUgKsJFT1948s5yeunCjpG2OMyYyMl/RF5CwReUdCN1utFJEfO+ldRGSWiKx3Hk910k9z1j8oIo/F2OdUEVmRyc9hjDH5KBvVO0eAn6vqZ4CBwJ0i0gcYDsxW1d7AbOc1hG7Y+jUxbiQRkW8BeTXmiDHGZEvGg76q7lDVJc7zA8BqQjdhDQbGOquNJXRXHKp6yLkLs671vpzbpX8G/D4DWTfGmLyX1YZcp2/r5wg1fnVX1R0QOjEA3Vzs4nfAX4Ban7JojDGBkrWg75TSXwJ+oqr7U9i+H3C2qma1L7cxxuSTrAR9Z6Cyl4DnVTU8FkilOCNUOo9VCXZzIdDfGaphDvCfIvKuPzk2xphgyEbvHQGeAVar6kMRi6YCQ53nQ4Ep8fajqk+o6pmqWkToFv51qnqp9zk2xpjgyHg/fWccmg+AUkIjaQL8ilC9/iTgP4AtwPXhsUuc0nxn4COEhlX+iqquithnEfC6qp6TkQ9hjDF5ym7OMsaYApL1YRiMMcZkjgV9Y4wpIBb0jTGmgFjQN8aYAmJB3xhjCogFfWOMKSAW9I0xpoBY0DfGmALy/wEV13dODjjlXAAAAABJRU5ErkJggg==\n", 46 | "text/plain": [ 47 | "
" 48 | ] 49 | }, 50 | "metadata": { 51 | "needs_background": "light" 52 | }, 53 | "output_type": "display_data" 54 | } 55 | ], 56 | "source": [ 57 | "import sys\n", 58 | "sys.path.append('../..')\n", 59 | "from datasources.datasource import DatasourceFactory\n", 60 | "import datasources.datasource\n", 61 | "import utils.chaotic_toolkit as ct\n", 62 | "import nilmlab.tstransformers as ts\n", 63 | "\n", 64 | "year = '2014'\n", 65 | "month_end = '8'\n", 66 | "month_start = '1'\n", 67 | "end_date = \"{}-30-{}\".format(month_end, year)\n", 68 | "start_date = \"{}-1-{}\".format(month_start, year)\n", 69 | "sample_period = 6\n", 70 | "datasource = DatasourceFactory.create_uk_dale_datasource()\n", 71 | "df_mains, metergroup = datasource.read_mains(start=start_date, end=end_date, sample_period=6, building=1)\n", 72 | "data = df_mains[(54, 1, \"UK-DALE\")].values\n", 73 | "data = data[:100000]\n", 74 | "print(len(data))\n", 75 | "print(df_mains.describe())\n", 76 | "df_mains.plot()\n", 77 | "# data = df_mains[(1, 1, 'REDD')].values" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 2, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "[4, 9, 14]\n", 90 | "Delay 5\n" 91 | ] 92 | }, 93 | { 94 | "data": { 95 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEGCAYAAAB7DNKzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAgAElEQVR4nO3deXhU1f3H8feXLOyLQCBskVVWRSCAUEUFVMStWqlr3WixuFut1fprq7V20WoXbQVcKaCtqNQVBATFDWSRJZCEfQmQhDVASEgyOb8/ZrARk8lAZuZOks/reebJzL1n5nxyCfnmbueYcw4REZGK1PE6gIiIxDYVChERCUqFQkREglKhEBGRoFQoREQkqHivA0RCy5YtXceOHb2OISJSbSxdunS3cy6pvHU1slB07NiRJUuWeB1DRKTaMLMtFa3ToScREQnKk0JhZmPMbLWZlZpZapB2L5lZrpmlRTOfiIj8j1d7FGnAFcCCStq9AoyKeBoREamQJ+conHPpAGZWWbsFZtYxCpFERKQCOkchIiJBRWyPwszmAsnlrHrYOfd2BPobB4wDSElJCffHi4jUWhErFM65kZH67Ar6mwRMAkhNTdWQuCIiYaJDTyIiNcDizXuZ+MmGiHy2V5fHXm5mWcAQ4H0z+zCwvK2ZfVCm3WvAl0B3M8sys7Fe5BURiVWFxT5+994afjjxS6Yt2srhopKw9+HVVU8zgBnlLN8BjC7z+ppo5hIRqU6+3rqP+6avYOOufK4/I4WHLuxJg8Tw/1qvkUN4iIjUZIXFPv46dx2TFmygTdP6TB07mDO7tYxYfyoUIiLVyMqs/dz3+grW5R7i6oEdePiinjSulxDRPlUoRESqgaKSUp6Zt45/fryBpEZ1efnmgZzbvVVU+lahEBGJcat35HHf6yvIyD7ID/q359eX9KJp/cjuRZSlQiEiEqOKfaX8c/4Gnpm3jpMaJvL8Damc16t11HOoUIiIxKDM7IPcN305adsPcNnpbXnkkt6c1DDRkywqFCIiMaTEV8rEBRv569y1NKmXwITr+zOqTxtPM6lQiIjEiPW5B7lv+kpWbNvP6FOTeeyyPrRoVNfrWCoUIiJeK/GV8tLnm/jz7LU0TIzj2Wv7cfFpbb2O9Q0VChERDxwp8fHF+j3MSstmTnoOe/OLOL9Xax6//FSSGnu/F1GWCoWISJTkHynh48xdzFqdzfyMXA4dKaFR3XiG92jFpX3bMqJnq0ondPOCCoWISATtyy9ibnoOH67OZsG63RSVlNKiYSIXn9aGC/okM7RLC+rGx3kdMygVChGRMMvOK2T2mmxmpWWzaNNefKWOds3qc93gFEb1Tia1Y3Pi6sTenkNFVChERMJg0+58PlztLw7Lt+0HoGurRvz07M6M6t2GPu2axORhpVCoUIiInKBiXynTl2Txry83k5F9EIDT2jfl5xd054LeyXRt1cjbgGGiQiEicpx8pY53V+zgL3PXsmXPYU5r35TfXNKL83sn065Zfa/jhZ0KhYhIiJxzfLg6h6fnZLI25xA92zThxRtTGd4jNq9WChcVChGRSjjnWLBuN0/NzmRlVh6dkxry7LX9GN2nDXWq0UnpE6VCISISxFeb9vLnDzP5avNe2jWrzxNXnsYV/doRH1fH62hRo0IhIlKOVVl5PDk7kwVrd5HUuC6/vaw3Vw3sEPP3PESCCoWISBlrcw7y9Oy1zFqdTbMGCTx0YQ9uGNKR+om1r0AcpUIhIgJs2ZPPX+eu47/Lt9MwMZ57RnZj7JmdIj4fdXWgQiEitdrOvAL+/tF6pi/ZRnycMW5YZ346rItnkwTFIhUKEamVnHNMXbiFx95PxznHtYNTuOPcrrRqUs/raDFHhUJEap3CYh//99803liaxbndk/jtZX3o0LyB17FilgqFiNQqWfsOM37qMlZtz+PuEd24e0S3WnEvRFWoUIhIrfHF+t3c8drXFJeU8sINqYzs1drrSNWCCoWI1HjOOZ7/dCN/nJlBl6RGTPzRADon1YwB+6LBk1sLzWyMma02s1IzS62gTQczm29m6YG2d0c7p4hUf4eLSrjzta/5/QcZjOqTzIzbv6cicZy82qNIA64AJgZpUwLc55xbZmaNgaVmNsc5tyYqCUUkYvIOFzMzbSd9OzSjZ5smEetn8+58bp2ylHW5B/nFqB789OzONXrwvkjxpFA459KBoP9gzrmdwM7A84Nmlg60A1QoRKqpgiIfr3yxmec+Xs+BwhIABpx8EtefkcKFfdpQLyF8dz/Pz8jl7n9/TZ06xuRbBnFWt6SwfXZtUy3OUZhZR6AfsChIm3HAOICUlJSo5BKR0BT7Snl9yTb+NncduQePMLxHK247pwvLt+1n2qKt3PufFTz2XjpjBrTn2sEpnNyi4Qn3VVrqeHb+ev4ydy09k5sw8UcDdOlrFZlzLjIfbDYXSC5n1cPOubcDbT4G7nfOLQnyOY2AT4DHnXNvhdJ3amqqW7Kkwo8UkSgpLXW8v2onT83OZPOew6SefBIPjOrBoE7Nv9Xmiw17mLpwC3PSc/CVOs4+JYnrzziZ4T1aHdfc0gcKi/nZf1YwNz2Hy/u14/eXn1qrx2g6Hma21DlX7jnjiO1ROOdGVvUzzCwBeBOYFmqREBHvHZ2/4YlZGazecYAeyY156aZUzu3+3Ql+6tQxzuzWkjO7tSQ7r5B/L97Ka19t5Sf/WkLbpvW4ZlAKVw3qQKvGwe+YXpdzkFunLGXr3sM8ckkvbhzaUecjwiRiexQhdR5kj8L8/8KTgb3OuXuO53O1RyHinWVb9/HErAwWbtxLh+b1ue+87lzSt+1x7RkU+0r5KD2HqQu38tn63cTXMS7oncx1Z6QwpHML7PO/Qbv+0GkYADNX7eT16dPoF7+Jwdf/lsGdW0Tq26uxgu1ReFIozOxy4BkgCdgPLHfOXWBmbYEXnHOjzexM4FNgFVAaeOsvnXMfVPb5KhQi0bc25yBPfpjJnDU5tGyUyF0junH1wBQS46t2Ff6m3flMW7iF6UuzyCsopmurRtzXLYcL1jyIu/Jl/ryuNcsXvMOEus/g+8HLNO9T5YMZtVLMFYpIU6EQiZ6sfYf5y5x1vPV1Fo0S47n17M7c/L1ONKwb3iPbhcU+3lu5k6kLt7B8237OTkjnb/F/Y3LxCMbWnU/iNZNJ7HpOWPusTTw5RyEiNdvuQ0f4x/z1TFu4FQx+clZnxp8dueG56yXEceWA9lw5oD1p2/OYurADb2dkcrd7Hb73AKhIRIwKhYgcl8JiHxM+2cDzCzZSUOzjh6kduGtEN9o2qx+1DH3aNeWP/ffDho9gyAOw5EXodNY35ywkvFQoRCRky7bu44E3VrI+9xAX9knmvvO707WVB8NhbFoA02+CMa/4i0Ons779WsJKhUJEKlVQ5OOp2Zm8+Pkm2jSpx+RbBnH2KR7e6bx92beLQqdh/tfbl6lQRIAKhYgE9dWmvTzwxgo27znMtYNTeOjCHt7PI31mOVfMdxqmIhEhKhQiUq78IyU8MSuDyV9uoUPz+rz648EM7drS61jiARUKEfmOz9fv5hdvrmT7/gJuGtqRn1/QPeyXu0r1oX95EfnGwcJifv9BBq99tZVOLRvy+q1DGNixeeVvlBpNhUJEAPg4M5eH3lpFzoFCxg3rzL0jT9GAegKoUIjUenmHi3ns/TW8sTSLrq0a8eb4ofRLOcnrWBJDVChEarE5a3J4eMYq9uQXcfu5XbhrRDfqxmsvQr5NhUKkFtqXX8Qj767m7eU76JHcmBdvHMip7Zt6HUtilAqFSC2Sf6SEWWnZ/GFmOvsPF3PPyG7cdk7XKo/wKjWbCoVIDbdt72HmZeQyNz2HRRv3UuQrpU+7JkwZO5iebZp4HU+qgZAKhZm1A04u2945tyBSoUTkxJX4Svl6234+Ss9lXkYOa3MOAdA5qSE3Dj2Z4T1aM7DjScTHaS9CQlNpoTCzPwFXAWsAX2CxA1QoRGJEXkExn6zdxbz0HD5eu4v9h4uJr2MM6tScH6Z2YETP1nRq2dDrmFJNhbJH8X2gu3PuSKTDiEhonHNs3J3PvPRcPsrIYfHmffhKHc0bJjK8RytG9GjNWae0pInXYzJJjRBKodgIJAAqFCIeS995gOlLspiXkcPmPYcB6JHcmFuHdWZEz9ac3qHZcc1NLRKKUArFYWC5mX1EmWLhnLsrYqlE5DtyDxQyZsKXFPlKGdqlBWPP7MS5PVrR/qQGXkeTGi6UQvFO4CEiHvrjzAyKSkr58N5hOt8gUVVpoXDOTTazROCUwKJM51xxZGOJSFlLt+zlra+3c/u5XVQkJOpCuerpHGAysBkwoIOZ3ajLY0Wiw1fq+PXbq2nTtB63n9vV6zhSC4Vy6Okp4HznXCaAmZ0CvAYMiGQwEfH79+KtrN5xgGeu6UeDRN0jK9EXyh03CUeLBIBzbi3+q6BEJML25Rfx5IeZnNG5ORef1sbrOFJLhfLnyRIzexGYEnh9HbA0cpFE5Kin5mRysLCERy7tjZkuexVvhFIoxgO3A3fhP0exAPhnJEOJCKzekceri7Zyw5CO9EjWmEzinVCuejoCPB14iEgUOOd45J3VNGuQyL0jT6n8DSIRVOE5CjN7PfB1lZmtPPZRlU7NbIyZrTazUjNLraBNPTP7ysxWBNo+WpU+RaqTt5fvYPHmffxiVHeaNtApQfFWsD2KuwNfL45Av2nAFcDEIG2OAMOdc4fMLAH4zMxmOucWRiCPSMw4dKSE33+QTt/2TRkzoIPXcUQq3qNwzu0MPL3NObel7AO4rSqdOufSy15JVUEb55w7FHiZEHi4qvQrUh08M28duQeP8MilvamjcZskBoRyeex55Sy7MNxBymNmcWa2HMgF5jjnFkWjX5FjFRT5yCuI/IAEG3Yd4qXPNjFmQHv6pZwU8f5EQlHhoSczG49/z6HzMeckGgOfV/bBZjYXSC5n1cPOubdDCeec8wGnm1kzYIaZ9XHOpVXQ3zhgHEBKSkooHy8SksNFJfzguS/Jzivg5ZsHcXqHZhHpxznHo++uoV58HA+M6hGRPkRORLBzFK8CM4E/AA+WWX7QObe3sg92zo2sYrayn7XfzD4GRuE/v1Fem0nAJIDU1FQdopKwcM7xizdXkZF9gFaN63Lt8wuZ+KMBnNUtKex9zVmTw4K1u/jVxb1Ialw37J8vcqKCnaPIc85tds5dEzgvUYD/HEEjM4v4n+xmlhTYk8DM6gMjgYxI9ytS1gufbuLdFTv4+QXdefeOM0lp3oBbXlnMeyt3hLWfwmIfj72/hm6tGnHDkJPD+tkiVVXpOQozu8TM1gGbgE/wDw44syqdmtnlZpYFDAHeN7MPA8vbmtkHgWZtgPmBw16L8Z+jeK8q/Yocj8/W7eYPM9MZfWoy48/uQqsm9fjPuCH0bd+MO1/7mikLt4Str0kLNrJtbwGPXtqbBM1lLTEmlDuzfwecAcx1zvUzs3OBa6rSqXNuBjCjnOU7gNGB5yuBflXpR+REbdt7mDtfW0bXVo148sq+3wyf0bRBAlPGDub2V5fxq/+msfdQEXeN6Fql4TWy9h3mnx+v56JT2zC0a8twfQsiYRPKny7Fzrk9QB0zq+Ocmw+cHuFcIp4pKPJx65SllJQ6Jv4olYZ1v/33VP3EOCb+aABX9G/HX+au5ZF3VlNaeuKnxX7/QToAv7yoZ5Vyi0RKKHsU+82sEf4xnqaZWS5QEtlYIt5wzvHQWytJzz7AizemVjhJUEJcHf58ZV9aNEzk+U83se9wMX8e05fE+OM7bPT5+t18sCqb+847hXbN6ofjWxAJu1AKxWVAIXAv/pFjmwK/jWQoEa+8/Plm/rt8B/eddwrDe7QO2rZOHeOXo3vSvGFd/jQrg/0FxUy4vn/Ic0YU+0p55J3VpDRvwE+GdQ5HfJGIqPTPH+dcfuB+hgbAu8BUdIe01EBfbtjD4x+kc36v1iHPJGdmjD+nC3/6wal8tm4X1z6/iH35RSG9919fbmFd7iF+dXEv6iXEVSW6SESFctXTrWaWA6wEluCfi2JJpIOJRNP2/QXc8eoyOrZowFM/7HvcQ2dcNTCFf143gDU7DzBm4pfszCsI2n7XwSP8dc5azj4liZE9W1UlukjEhXJA9X6gt3Ouo3Ous3Ouk3NO+8lSYxQW+xg/dSlHSkqZdEMqjeud2Gito/okM/nmQWTnFXLlc1+yYdehCtv+aVYGhSU+fnNJL01IJDEvlEKxATgc6SAiXnDO8X//TWNlVh5/uep0uiQ1qtLnDenSgn+PO4MjJT7GTPiSlVn7v9Nm2dZ9vLE0i1vO7ETnKvYnEg2hFIqHgC/MbKKZ/f3oI9LBRKJh6sItvLE0i7tHdOO8XsFPXoeqT7umTP/pUBokxnHNpIV8tm73N+tKS/0TErVuUpc7h3cLS38ikRZKoZgIzAMW4j8/cfQhUq0t3ryXR99dw4gerbh7RHh/aXdq2ZA3xw+l/UkN+OJfv2LhR/77S19fso2VWXk8PfAAjRY/G9Y+RSIllOv4SpxzP4t4EpEoys4rZPzUZXRo3oCnrzo9IvM+tG5Sj9dvHcLTz6+m24I7effQEZ5Y2YKb22xj6NdPwJhXwt6nSCSEUijmB4bwfhf/rHMAhDKCrEgsOlLi46dTl1JQVMJrPxlM0/qRm2q0aYMEHhw/jmdejGfssvu5yTeS2wo+wa6aDJ2GRaxfkXAKpVBcG/j6UJllDtCVT1ItPfLOapZv28+E6/vTrXXjiPdXPzGOe8eNZeELq7lr58sw6AEVCalWgp6jMLM6wIOBS2LLPlQkpFp6ddFWXvtqG7ef24VRfdpErd+ErZ9xVt47MOwBWPIibFoQtb5FqipooXDOlQK3RymLSEQt3bKP37yTxtmnJPGz87pHr+NNC2D6Tf5zEsMf9n+dfpOKhVQboVz1NMfM7jezDmbW/Ogj4slEwij3QCHjpy6lTdP6/P3qfsRF4OR1hbYv8xeHo4ebOg3zv96+LHoZRKoglHMUtwS+lt2z0DkKqTaKSkoZP20ZBwtL+NfYQTRtELmT1+U6857vLus0TOcppNqotFA45zpFI4hIuG3YdYhZadm8u2IHGdkHefbafvRIbuJ1LJFqp9JCYWYJwHjg6J8/HwMTnXPFEcwlctycc2TmHOSDVdnMStvJ2hz/WEv9Uprx1Ji+XHxaW48TilRPoRx6eg5IAP4ZeP2jwLIfRyqUSKicc6zansfMtGxmpWWzaXc+dQwGdmzOI5f04oI+ybRpqgmBRKoilEIx0DnXt8zreWa2IlKBRCpTWur4etu+wJ5DNtv3FxBXxxjapQU/PqsT5/dKJqlxXa9jitQYoRQKn5l1cc5tADCzzoAvsrFEvq3EV8pXm/cyKy2bD1dnk3PgCIlxdTizW0vuGekf0K9Zg0SvY4rUSKEUip/jH8ZjI2DAycDNEU0lErBlTz4TPtnA7NU57Mkvol5CHc45pRUXnprM8B6tTnjuCBEJXYWFwszGOOemAxuBbkB3/IUiwzl3pKL3iYRL2vY8bnzpKwqKfYzo2ZoL+yRzTvekkOekFpHwCPY/7iFgOvCmc64//qlQRaLiq017GfvKYhrXi2f6T4dogh8RDwUrFHvMbD7QyczeOXalc+7SyMWS2mx+Zi7jpy6lbbP6TB07mLbNdNWSiJeCFYqLgP7AFOCp6MSR2u7dFTu49z/L6Z7cmMm3DKJlI129JOK1CguFc64IWGhmQ51zu6KYSWqp177ayi9nrCL15JN48aaBNNGJapGYEMpZwZPM7HGgY9n2zrnhkQoltc+ETzbwx5kZnNM9ieeuG0D9xDivI4lIQCiFYjowAXiBMN0/YWZjgEeAnsAg59ySIG3jgCXAdufcxeHoX2KHc44nPszkuY83cPFpbXj6h6eTGB/KoMYiEi2hzpn9XJj7TQOuACaG0PZuIB3QaG41TGmp41dvpzFt0VauHZzCY5f1ie7w3yISklD+dHvXzG4zszbhmo/COZfunMusrJ2Ztcd/Uv2FqvQnsafYV8o9/1nOtEVbGX9OFx7/voqESKwKZY/ixsDXn5dZFq35KP4KPABUOrGxmY0DxgGkpKREOJZURWGxj9umLWNeRi6/GNWD8ed08TqSiAQRsfkozGwukFzOqoedc2+H8P6LgVzn3FIzO6ey9s65ScAkgNTUVHeccSVKDhYWM3byEhZv3svjl/fhusEnex1JRCoRbAiP4c65eWZ2RXnrnXNvBftg59zIKmb7HnCpmY0G6gFNzGyqc+76Kn6ueGTPoSPc+PJXZOw8yN+u7selfTU/hEh1EGyP4mxgHnBJOescELRQVJVz7iH8w4gQ2KO4X0Wi+tqZV8D1Lywia18Bz9+Qyrk9WnkdSURCFOyGu98EvoZ9pFgzuxx4BkgC3jez5c65C8ysLfCCc250uPsU72zanc/1LyziQEExU8YOZlCnKl0LISJR5skwnM65GcCMcpbvAL5TJJxzH+OfglWqmTU7DnDDS1/hnOO1cWfQp11TryOJyHHSeM0SMek7D3D1pC9pVDeeKT8+gy4aAVakWlKhkIhwzvGbt1eTGF+H6eOH0k4jwIpUW8Gueir3aqejKrvqSWq3j9Jz+WrzXn73/T4qEiLVXLA9ivKudjoq4lc9SfVV4ivlj7My6NyyIVcN7OB1HBGpomBXPWlebDkhbyzNYn3uISZcP4CEOA3wJ1LdhXSOwswuAnrjv/ENAOfcbyMVSqqvw0UlPD1nLQNOPokLerf2Oo6IhEGlf+6Z2QTgKuBOwIAxgMZdkHK99Nkmcg8e4aELe2CmQf5EaoJQjgsMdc7dAOxzzj0KDAF04Fm+Y8+hI0z4ZCPn92pNakfdVCdSU4RSKAoCXw8H7pwuBk5ooECp2Z6Zt56CYh8PjOrhdRQRCaNQzlG8Z2bNgCeBZfiveNL8EPItm3fnM3XhFq4a2IGurXRjnUhNEsow448Fnr5pZu8B9ZxzeZGNJdXNk7MzSYirwz0junkdRUTCrNJCYWY3lLMM59y/IhNJqpvl2/bz/sqd3DW8K62a1Kv8DSJSrYRy6Glgmef1gBH4D0GpUAjOOf7wQTotGiYy7mzNVCdSE4Vy6OnOsq/NrCkwJWKJpFqZn5nLok17+e1lvWlUV0OHidREJ3Lb7GFAB6IFX6njjzMz6NSyIdcM0jzlIjVVKOco3sV/pRP4C0svYHokQ0n18ObSLNbmHOKf1/XXUB0iNVgoxwr+XOZ5CbDFOZcVoTxSTRQU+Xh6zlpO79CMC/skex1HRCIolD8DRzvnPgk8PnfOZZnZnyKeTGLaS59vIvtAIb8c3VNDdYjUcKEUivPKWXZhuINI9bE3v4gJH29gZM9Wmv9apBYINnHReOA2oIuZrSyzqjHweaSDSex6dt568otK+IWG6hCpFYKdo3gVmAn8AXiwzPKDzrm9EU0lMWvrnsNMWbiZH6Z2oFvrxl7HEZEoCDZxUR6QZ2a/OGZVIzNr5JzbGtloEov+PDuTuDrGveed4nUUEYmSUK56eh//5bGG/87sTkAm/omMpBZZmbWfd1bs4I5zu9JaQ3WI1Bqh3Jl9atnXZtYfuDViiSQm+YfqyKB5w0RuPbuz13FEJIqO+y4p59wyvj3+k9QCH6/dxZcb93DX8K40rpfgdRwRiaJQ7sz+WZmXdYD+wK6IJZKY4yt1/GlmBie3aMC1gzULrkhtE8o5irKXtpTgP2fxZmTiSCya8fV2MrIP8uy1/UiM11AdIrVNKOcoHo1GEIlNhcU+npqdSd/2Tbno1DZexxERD1T656GZpZrZDDNbZmYrjz6q0qmZjTGz1WZWamapQdptNrNVZrbczJZUpU85Ma98sZmdeYU8eKGG6hCprUI59DQN+DmwCigNU79pwBXAxBDanuuc2x2mfmuFgiIfM9N2Uj8hjtZN65HcpB5JjeuGNsLrZ3+Fdv2h0zD25Rfxj/nrue3kHQzZOQW63BP58CISc0IpFLucc++Es1PnXDqgv1AjYOmWvdz3+go27zn8reVm0LJRXZKb1KN1k3q0bhJ4HigkyU39y5u07YdNvwnGvMI/Vrfk1KIV/CzvOWg32ZtvSEQ8F0qh+I2ZvQB8BBw5utA591bEUv2PA2abmQMmOucmVdTQzMYB4wBSUmrfJDqFxf5hv5//dCNtm9bnlZsHktS4LjkHCsnOO0L2gUJy8grJOVhI1r7DLN2yl32Hi7/zOfUT4rig4T38dsr1NCsewaT6HxF/1VToNMyD70pEYkEoheJmoAeQwP8OPTkgaKEws7lAeRMVPOycezvEfN9zzu0ws1bAHDPLcM4tKK9hoIhMAkhNTXXltamplm/bz32vL2fDrnyuGZTCwxf1/GZa0t5tm1b4vsJiH7kH/EUk+0AhuQcKyc4rJPtAK2ZvX8Mdpa9xKPVnKhIitVwohaLvsXdnh8I5N/IE8hz7GTsCX3PNbAYwCCi3UNRGR0p8/P2jdUz4ZCNJjeoy+ZZBnH1KUsjvr5cQR0qLBqS0aPDtFZsWwPTZMOwBGi15EXqcq2IhUouFUigWmlkv59yaiKcpw8waAnWccwcDz88HfhvNDLEsbXse909fQUb2Qa4c0J5fXdyLpvXDcMf0pgUQOEdBp2HQ6axvvxaRWieUQnEmcKOZbcJ/jsIA55w77UQ7NbPLgWeAJOB9M1vunLvAzNoCLzjnRgOtgRmBE97xwKvOuVkn2mdNUewr5R/z1/PsvPWc1DCRF29MZUTP1uHrYPuybxeFTsP8r7cvU6EQqaXMueCH882s3DEbnHNbIpIoDFJTU92SJTXvtovM7IPcN305adsPcNnpbXn00t40a5DodSwRqQHMbKlzrtz72kK5MztmC0JtUeIrZeKCjfxt7joa14tnwvX9GdVHd0mLSHSEcuhJPLQ+9xD3TV/Bim37GX1qMo9d1ocWjep6HUtEahEVihjlK3W89NkmnpydSYPEOP5+TT8uOa2NblIUkahToYhBm3bn8/PpK1iyZR8je7bm91f0oVVjzSgnIt5QoYgxm3bnc/HfP6VOHeOpMX25on877UWIiKdUKGLM4+/7b1f54K6z6NC8QSWtRUQiT7PQxJBP1+1ibnoudwzvpiIhIjFDhSJGlHv6L3sAAAtbSURBVPhKeey9NaQ0b8AtZ3b0Oo6IyDdUKGLEq19tZW3OIX45uid14+O8jiMi8g0Vihiw/3ART89Zy5DOLbigdxiH4xARCQMVihjw17nrOFBQzK8v6aUrnEQk5qhQeGx97kGmLNzC1YNS6NmmiddxRES+Q4XCY797P50GiXHcd94pXkcRESmXCoWH5mfm8nHmLu4e0U3jN4lIzFKh8Eixr5TfvbeGTi0bcsOQjl7HERGpkAqFR6Z8uYUNu/J5eHRPEuP1zyAisUu/oTywN7+Iv85dy1ndWjKiZyuv44iIBKVC4YG/zFlLfpGPX12sy2FFJPapUERZZvZBpi3awnWDUzildWOv44iIVEqFIoqcczz23hoa10vg3pG6HFZEqgcViiiam57LZ+t3c8/IbpzUMNHrOCIiIVGhiJIjJT4ef38NXVs14vozTvY6johIyFQoomTyF5vZvOcw/3dRTxLitNlFpPrQb6wo2H3oCM98tJ5zuydxTnddDisi1YsKRRQ8NTuTgmIf/3dxL6+jiIgcNxWKCFu9I49/L97GDUM60iWpkddxRESOmwpFBB29HLZZ/QTuHtHN6zgiIifEk0JhZmPMbLWZlZpZapB2zczsDTPLMLN0MxsSzZxV9eHqbBZu3MvPzu9O0wYJXscRETkhXu1RpAFXAAsqafc3YJZzrgfQF0iPdLBwKSz28fgH6XRv3ZhrBnbwOo6IyAmL96JT51w6EHScIzNrAgwDbgq8pwgoikK8sHjp801s21vA1LGDidflsCJSjcXyb7DOwC7gZTP72sxeMLOGFTU2s3FmtsTMluzatSt6KcuRe6CQf8xbz8ierTmzW0tPs4iIVFXECoWZzTWztHIel4X4EfFAf+A551w/IB94sKLGzrlJzrlU51xqUlJSGL6DE/fkh5kU+Up5+KKenuYQEQmHiB16cs6NrOJHZAFZzrlFgddvEKRQxIpVWXm8sSyLn5zVmU4tK9wBEhGpNmL20JNzLhvYZmbdA4tGAGs8jFSpYl8pj7y7muYNErljeFev44iIhIVXl8debmZZwBDgfTP7MLC8rZl9UKbpncA0M1sJnA78PvppQ3OgsJibX17M0i37+OXonjSpp8thRaRm8OqqpxnAjHKW7wBGl3m9HKjwPotYsX1/Abe8vJgNuw7x5zF9+cGA9l5HEhEJG08KRU2Stj2PW15ZTEGRj8m3DOJ7XXWVk4jULCoUVTA/I5fbX11Gs/oJvDF+KN2TNbWpiNQ8KhQnaNqiLfz67dX0SG7MSzcNpHWTel5HEhGJCBWK41Ra6njiw0wmfLKBc7sn8ey1/WlYV5tRRGou/YY7DoXFPn7+xkreXbGD6wan8OilvTU8h4jUeCoUIdqXX8S4KUtYvHkfD17Yg1uHdQ46VpWISE2hQhGCLXvyufnlxWTtL+DZa/tx8WltvY4kIhI1KhSVWLZ1Hz+ZvASfc0z78WAGdmzudSQRkahSoQhiVlo2d//7a5Kb1uPlmwbSWVOZikgtpEJRgRc/28Tv3l/D6R2a8cINqbRoVNfrSCIinlChOIav1D/P9StfbObCPsn85arTqZcQ53UsERHPqFCUUVDk465/f82cNTn8+MxO/HJ0T+rU0ZVNIlK7qVAE5BUUc8OLi1i1PY9HL+3NjUM7eh1JRCQmqFAENKobT8eWDbljeDfO69Xa6zgiIjFDhSIgro7xt6v7eR1DRCTmaPwJEREJSoVCRESCUqEQEZGgVChERCQoFQoREQlKhUJERIJSoRARkaBUKEREJChzznmdIezMbBewxescQbQEdnsdIgTVJSdUn6zKGX7VJWus5zzZOZdU3ooaWShinZktcc6lep2jMtUlJ1SfrMoZftUla3XJWR4dehIRkaBUKEREJCgVCm9M8jpAiKpLTqg+WZUz/KpL1uqS8zt0jkJERILSHoWIiASlQiEiIkGpUESImXUws/lmlm5mq83s7nLanGNmeWa2PPD4tUdZN5vZqkCGJeWsNzP7u5mtN7OVZtbfo5zdy2yr5WZ2wMzuOaaNJ9vUzF4ys1wzSyuzrLmZzTGzdYGvJ1Xw3lFmlhnYvg96kPNJM8sI/NvOMLNmFbw36M9JFHI+Ymbby/zbjq7gvVHbnkGy/qdMzs1mtryC90Ztm1aJc06PCDyANkD/wPPGwFqg1zFtzgHei4Gsm4GWQdaPBmYCBpwBLIqBzHFANv6bhDzfpsAwoD+QVmbZE8CDgecPAn+q4PvYAHQGEoEVx/6cRCHn+UB84PmfyssZys9JFHI+Atwfws9F1LZnRVmPWf8U8Guvt2lVHtqjiBDn3E7n3LLA84NAOtDO21Qn7DLgX85vIdDMzNp4nGkEsME5FxN34DvnFgB7j1l8GTA58Hwy8P1y3joIWO+c2+icKwL+HXhf1HI652Y750oCLxcC7SPVf6gq2J6hiOr2hOBZzcyAHwKvRTJDpKlQRIGZdQT6AYvKWT3EzFaY2Uwz6x3VYP/jgNlmttTMxpWzvh2wrczrLLwveldT8X++WNimAK2dczvB/4cD0KqcNrG2bW/Bv/dYnsp+TqLhjsAhspcqOJQXa9vzLCDHObeugvWxsE0rpUIRYWbWCHgTuMc5d+CY1cvwHzrpCzwD/Dfa+QK+55zrD1wI3G5mw45Zb+W8x7Prqs0sEbgUmF7O6ljZpqGKmW1rZg8DJcC0CppU9nMSac8BXYDTgZ34D+kcK2a2Z8A1BN+b8HqbhkSFIoLMLAF/kZjmnHvr2PXOuQPOuUOB5x8ACWbWMsoxcc7tCHzNBWbg330vKwvoUOZ1e2BHdNKV60JgmXMu59gVsbJNA3KOHqILfM0tp01MbFszuxG4GLjOBQ6eHyuEn5OIcs7lOOd8zrlS4PkK+o+J7QlgZvHAFcB/Kmrj9TYNlQpFhASOTb4IpDvnnq6gTXKgHWY2CP+/x57opQQza2hmjY8+x39iM+2YZu8ANwSufjoDyDt6SMUjFf6VFgvbtIx3gBsDz28E3i6nzWKgm5l1CuwpXR14X9SY2SjgF8ClzrnDFbQJ5eckoo45L3Z5Bf17vj3LGAlkOOeyylsZC9s0ZF6fTa+pD+BM/Lu8K4Hlgcdo4KfATwNt7gBW478yYyEw1IOcnQP9rwhkeTiwvGxOA/6B/2qSVUCqh9u1Af5f/E3LLPN8m+IvXDuBYvx/1Y4FWgAfAesCX5sH2rYFPijz3tH4r4rbcHT7RznnevzH9Y/+nE44NmdFPydRzjkl8PO3Ev8v/zZeb8+KsgaWv3L057JMW8+2aVUeGsJDRESC0qEnEREJSoVCRESCUqEQEZGgVChERCQoFQoREQlKhUIkTAKjm95/outFYpUKhYiIBKVCIVIFZvZwYO6DuUD3wLIuZjYrMNDbp2bWo5z3/cTMFgcGL3zTzBqYWWMz2xQY+gUzaxKYryAhyt+WyLeoUIicIDMbgH+IiH74x/QZGFg1CbjTOTcAuB/4Zzlvf8s5N9D5By9Mx38370HgY+CiQJurgTedc8WR+y5EKhfvdQCRauwsYIYLjI9kZu8A9YChwPTAkFMAdct5bx8z+x3QDGgEfBhY/gLwAP5Rb28GfhKx9CIhUqEQqZpjx8CpA+x3zp1eyfteAb7vnFthZjfhn5kP59znZtbRzM4G4pxzsTlInNQqOvQkcuIWAJebWf3AKKCXAIeBTWY2Br6Zb7xvOe9tDOwMnH+47ph1/8I/0NzLkYsuEjoVCpET5PxT3f4H/4irbwKfBlZdB4w1s6OjgpY3Feev8M94OAfIOGbdNOAkqvn0mVJzaPRYkRhjZlcClznnfuR1FhHQOQqRmGJmz+CfwW+011lEjtIehYiIBKVzFCIiEpQKhYiIBKVCISIiQalQiIhIUCoUIiIS1P8DzLLT6XgdgKoAAAAASUVORK5CYII=\n", 96 | "text/plain": [ 97 | "
" 98 | ] 99 | }, 100 | "metadata": { 101 | "needs_background": "light" 102 | }, 103 | "output_type": "display_data" 104 | } 105 | ], 106 | "source": [ 107 | "import numpy as np\n", 108 | "import matplotlib.pyplot as plt #plotting\n", 109 | "from mpl_toolkits.mplot3d import Axes3D #3d plots\n", 110 | "\n", 111 | "mutual_information_per_delay = []\n", 112 | "for i in range(1,20):\n", 113 | "# print(f\"iteration {i}\")\n", 114 | " mutual_information_per_delay = np.append(mutual_information_per_delay,[ct.compute_mutual_information(data, i, 50)])\n", 115 | "\n", 116 | "local_min = [i \n", 117 | " for i in range(1, len(mutual_information_per_delay) -1) \n", 118 | " if (mutual_information_per_delay[i] < mutual_information_per_delay[i-1]\n", 119 | " and mutual_information_per_delay[i] < mutual_information_per_delay[i+1])] \n", 120 | "print(local_min)\n", 121 | "local_min_idx = [i + 1 for i in local_min]\n", 122 | "plt.plot(range(1, 20), mutual_information_per_delay)\n", 123 | "plt.plot(local_min_idx, mutual_information_per_delay[local_min], 'x')\n", 124 | "plt.xlabel('delay')\n", 125 | "plt.ylabel('mutual information')\n", 126 | "delay = local_min_idx[0]\n", 127 | "print(f\"Delay {delay}\")" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 3, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "name": "stdout", 137 | "output_type": "stream", 138 | "text": [ 139 | "[0.6734, 0.62873, 0.2491, 0.11505, 0.07475, 0.05378, 0.04291, 0.03713, 0.03155, 0.02798, 0.02445, 0.02209, 0.01977, 0.01813]\n", 140 | "[6, 7, 8, 9, 10, 11, 12, 13]\n" 141 | ] 142 | }, 143 | { 144 | "data": { 145 | "text/plain": [ 146 | "Text(0, 0.5, 'Fraction of false neighbors')" 147 | ] 148 | }, 149 | "execution_count": 3, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | }, 153 | { 154 | "data": { 155 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEICAYAAABS0fM3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8GearUAAAgAElEQVR4nO3deXxddZ3/8dc7W9uENiltaGnSFcpSaKoQAZVVZFhkUdQR3EZlHgyOKI67Px1EfTijwiBuiB0GcUPGBQQUBQfZEWhBaGmhUAq0aSm0dF+zfX5/nJP2Ns1y0+bm5ua+n4/Hedyz3XPfSZt8cs73nO9XEYGZmRWvknwHMDOz/HIhMDMrci4EZmZFzoXAzKzIuRCYmRU5FwIzsyKX00Ig6TRJiyQtlvSFLrZ/VtIT6fSUpDZJ++Yyk5mZ7Uq5eo5AUinwLHAK0ATMAc6PiIXd7H8W8G8R8Zaejjt27NiYMmVKP6c1MxvaHnvssdURUdvVtrIcfu5RwOKIWAIg6UbgHKDLQgCcD/yqt4NOmTKFuXPn9ltIM7NiIOml7rbl8tJQHbAsY7kpXbcbSZXAacDvutl+oaS5kuauWrWq34OamRWzXBYCdbGuu+tQZwEPRsSarjZGxOyIaIyIxtraLs9szMxsD+WyEDQBEzOW64EV3ex7HllcFjIzs/6Xy0IwB5guaaqkCpJf9rd23klSNXACcEsOs5iZWTdy1lgcEa2SLgbuAEqB6yJigaSL0u3XpLu+A7gzIjbnKouZmXUvZ7eP5kpjY2P4riEzs76R9FhENHa1zU8Wm5kVuaIpBKs3befrf1jI2s3N+Y5iZjaoFE0heOj51/jJgy9w4hX3cP2DL9DS1p7vSGZmg0LRFIKzZ03gT5ccz+F1o7jstoWc8d37ue9ZP5xmZlY0hQDg4PEj+cUFRzP7A0fS3NbOB697lAuun8OSVZvyHc3MLG+KqhAASOIfDhvPnf92PF88/RAeeWENp151H9/440I2bGvJdzwzswFXdIWgw7CyUv7lhAO4+zMncu7r67n2gRc46fJ7uOGRpbS1F9YttWZme6NoC0GH2pHD+Na7Grjt4mOZVlvF/7t5Pmd+/wEeXvJavqOZmQ2Ioi8EHQ6vq+bX//JGfvDe17NhawvnzX6Yf/3lYyxbsyXf0czMcsqFIIMkzmyYwF2fPoFPnXIQdz+zipOvvJfL73iGzdtb8x3PzCwnXAi6MLy8lE+cPJ2/fuYE3jZzf3549/OcdMU9/O6xJtrdfmBmQ4wLQQ/2rx7Bd97zOm761zexf80IPv2bJ3nHjx7i8aVr8x3NzKzfuBBk4YhJo7n5o2/iyn+cxcvrtnLu1Q/xyRv/zsvrt+Y7mpnZXnMhyFJJiTj3iHru/syJXHzSgdz+1ErecsW9fO+u59jW0pbveGZme8yFoI+qhpXxmVMP5q5PncBJh9Ry5V+e5eT/upc/zFtBoXXpbWYGLgR7bOK+lVz9viO58cJjqB5RzsU3/J1r7l2S71hmZn3mQrCXjpk2hts+fiwz66q5e9Gr+Y5jZtZnLgT9oLREHDGphqeWr3f3FGZWcFwI+klDfQ1bmtvck6mZFRwXgn7SUF8NwLym9XlOYmbWNy4E/WRa7T5UVpQyr2ldvqOYmfVJTguBpNMkLZK0WNIXutnnRElPSFog6d5c5sml0hJxeF0185b7jMDMCkvOCoGkUuCHwOnADOB8STM67VMDXA2cHRGHAe/OVZ6B0FBXzcIVGzwespkVlFyeERwFLI6IJRHRDNwInNNpn/cCN0XEUoCIKOj7L2fWV7O9tZ1nX9mY7yhmZlnLZSGoA5ZlLDel6zIdBIyWdI+kxyR9sKsDSbpQ0lxJc1etGrwDzs+qrwFgvhuMzayA5LIQqIt1nW+yLwOOBN4GnAr8u6SDdntTxOyIaIyIxtra2v5P2k8mj6lk5PAytxOYWUEpy+Gxm4CJGcv1wIou9lkdEZuBzZLuA2YBz+YwV85IoqG+2ncOmVlByeUZwRxguqSpkiqA84BbO+1zC3CcpDJJlcDRwNM5zJRzDfU1LFq50T2SmlnByFkhiIhW4GLgDpJf7r+OiAWSLpJ0UbrP08CfgXnAo8C1EfFUrjINhIa6alragkUr3WBsZoUhl5eGiIjbgds7rbum0/LlwOW5zDGQZu54wngdsybW5DmNmVnv/GRxP6urGcGYqgp3NWFmBcOFoJ9JYmZ9NfN955CZFQgXghxoqKvm2Vc2sqW5Nd9RzMx65UKQAw31NbQHLFyxId9RzMx65UKQAzPdJbWZFRAXghwYN2o440YN84NlZlYQ+lQIJJVIGpWrMENJQ32Nu5ows4LQayGQdIOkUZKqgIXAIkmfzX20wtZQV82SVZvZuK0l31HMzHqUzRnBjIjYALyd5OGwScAHcppqCOhoJ/BtpGY22GVTCMollZMUglsiooXdexG1ThrcJbWZFYhsCsE1wItAFXCfpMmA74vsxb5VFdSPHuF2AjMb9Hrsa0hSCfBKRNRlrFsKnJTrYEOBu6Q2s0LQ4xlBRLST9CCauS7SnkWtFw31NSxbs5W1m5vzHcXMrFvZXBr6i6TPSJooad+OKefJhoCGOjcYm9ngl0031B9JXz+WsS6Aaf0fZ2g5rG5nl9THHzR4h9g0s+LWayGIiKkDEWQoqh5RzrSxVe5qwswGtV4LQXrr6EeB49NV9wA/Tm8jtV7MrK/m0RfW5DuGmVm3smkj+BFwJHB1Oh2ZrrMszKyr5uX123h147Z8RzEz61I2bQRviIhZGct/lfRkrgINNR3DVc5vWs/Jhw7Pcxozs91lc0bQJumAjgVJ04C23EUaWmbsP4oSuUtqMxu8sjkj+Cxwt6QlgIDJwIdzmmoIqRpWxoH77eMHy8xs0MrmrqG7JE0HDiYpBM9ExPacJxtCGupruGfRq0QEkvIdx8xsF9l0Qz2c5BmCy4BLgY+m63ol6TRJiyQtlvSFLrafKGm9pCfS6dI+5i8IDfXVrN7UzMvr3WBsZoNPNpeGfgZsBL6fLp8P/Bx4d09vklQK/BA4BWgC5ki6NSIWdtr1/og4s0+pC8zMjAfLJtSMyHMaM7NdZVMIDu5019DdWd41dBSwOCKWAEi6ETiHZHCbonLo/qMoKxHzmtZz2uH75zuOmdkusrlr6O+SjulYkHQ08GAW76sDlmUsN6XrOnujpCcl/UnSYV0dSNKFkuZKmrtq1aosPnpwGV5eysHjR7rPITMblLo9I5A0n6RPoXLgg2n300Fy11A2f9V31SraeUCbx4HJEbFJ0hnA74Hpu70pYjYwG6CxsbEgB8VpqK/m9vkr3WBsZoNOT5eG9va6fRMwMWO5HliRuUM6BGbH/O2SrpY0NiJW7+VnDzoN9TX86tFlLF2zhcljqvIdx8xsh24vDUXESx0TyS/1jiEqO6bezAGmS5oqqQI4D7g1cwdJ45X+eSzpqDTPa3v0lQxyOxuMfXnIzAaXbDqd+zjwFeAVoD1dHUBDT++LiFZJFwN3AKXAdRGxQNJF6fZrgHeR3I7aCmwFzouIgrz005uDx4+koqyEeU3rOGvWhHzHMTPbIZu7hi4huXOoz3+pR8TtwO2d1l2TMf8D4Ad9PW4hKi8tYcb+o3xGYGaDTjZ3DS0D/NurHzTUV/PU8vW0tw/Jkx4zK1A93TX0qXR2CXCPpD8CO7qWiIgrc5xtyJlZV83P/vYSS1Zv4sD9RuY7jpkZ0POloY7fVEvTqSKdbA91dEk9r2m9C4GZDRrdFoKI+OpABikGB9Tuw4jyUuY1refcI+rzHcfMDMjurqHb2P120fXAXJIhK92TWpZKS8ThdaPcJbWZDSrZNBYvATYB/51OG0huJT0oXbY+aKivYcGKDbS2tfe+s5nZAMjm9tHXR8TxGcu3SbovIo6XtCBXwYaqhvpqtre289yrmzh0/1H5jmNmltUZQa2kSR0L6fzYdLE5J6mGsMwuqc3MBoNsCsGngQck3S3pHuB+4LOSqoCf5jLcUDRlTBUjh5f5wTIzGzSyGary9nSoykPYOVRlRwPxVbkMNxSVlIiZddXuktrMBo1uzwgkvSV9PRd4G3AAMA04I11ne2hmfTVPv7yB7a1t+Y5iZtbjGcEJwF+Bs7rYFsBNOUlUBGbV19DSFixauZGG+pp8xzGzItfTA2VfSV8/PHBxikNml9QuBGaWb702FksaJ+l/JP0pXZ4h6YLcRxu66kePYHRlue8cMrNBIZu7hq4nGVOgoxP9Z4FP5ipQMZBEQ32N7xwys0Ehm0IwNiJ+TTooTUS0Am7l3EsN9dU89+omtjb7W2lm+ZVNIdgsaQxpf0OSjsHjE+y1mXXVtLUHC1/2t9LM8iubQvApkrGGD5D0IPAz4OM5TVUEMrukNjPLp2weKHtc0gnAwSQPlC2KiJacJxvixo0azn4jhzHfhcDM8iybTucAjgKmpPsfIYmI+FnOUhWJhvpqnvSdQ2aWZ9mMR/BzkqeKn2BnI3GQXCKyvdBQX8Ndz7zKxm0tjBxenu84ZlaksjkjaARmRESfR1yXdBrwXaAUuDYivtnNfm8AHgbeExG/7evnFKqZ9dVEwIIVGzhm2ph8xzGzIpVNY/FTwPi+HlhSKfBD4HRgBnC+pBnd7PctkmcVikqDu6Q2s0EgmzOCscBCSY8C2ztWRsTZvbzvKGBxRCwBkHQjcA6wsNN+Hwd+B7wh29BDxZh9hlFXM8J3DplZXmVTCC7bw2PXAcsylpuAozN3kFQHvAN4Cz0UAkkXAhcCTJo0qbvdClJDvbukNrP8yub20Xv38Njq6nCdlq8CPh8RbVJXu+/IMBuYDdDY2NjntorBbGZ9NX96aiXrtjRTU1mR7zhmVoSyvX10TzQBEzOW64EVnfZpBG5Mi8BYkrEOWiPi9znMNajMSnsfnb98PcdNr81zGjMrRtk0Fu+pOcB0SVMlVQDnkTyhvENETI2IKRExBfgt8K/FVAQADp+ws0tqM7N8yKoQSBoh6eC+HDjtnO5ikruBngZ+HRELJF0k6aK+Rx2aqivLmTKm0ncOmVneZPNA2VnAFUAFMFXS64CvZXHXEBFxO3B7p3XXdLPvh7IJPBQ11Ncw98U1+Y5hZkUqmzOCy0huBV0HEBFPkHQ3Yf2kob6aFeu3sWrj9t53NjPrZ9kUgtaI8AXsHOoYunL+cl8eMrOBl9WTxZLeC5RKmi7p+8BDOc5VVA6vq0Zyg7GZ5Uc2heDjwGEkTxX/CtiAh6rsV1XDyjiwdh93SW1meZHNA2VbgC8BX0r7BaqKiG05T1ZkZtZXc9+zq4kIenq4zsysv/V6RiDpBkmjJFUBC4BFkj6b+2jFZVZ9Das3bWflBtdYMxtY2VwamhERG4C3k9wKOgn4QE5TFaGZ9X6wzMzyI5tCUC6pnKQQ3JIOUzmk+vsZDGbsP4qyEvnBMjMbcNkUgh8DLwJVwH2SJpM0GFs/Gl5eykHjRvqMwMwGXK+FICK+FxF1EXFGJF4CThqAbEWno0vqPRgMzsxsj3V715CkT/Xy3iv7OUvRm1lfzY1zlrFszVYmjanMdxwzKxI93T46csBSGLCzS+p5y9e5EJjZgOm2EETEVwcyiMFB40ZSUVrC/Kb1nNkwId9xzKxIZNP76HDgApKni4d3rI+Ij+QwV1GqKCvh0P1H8qTvHDKzAZTNXUM/B8YDpwL3kow0tjGXoYpZQ30NTy3fQHu7G4zNbGBkUwgOjIh/BzZHxE+BtwEzcxureM2sr2bT9lZeeG1zvqOYWZHIphC0pK/rJB0OVOPxCHKmYccTxr48ZGYDI5tCMFvSaODLJGMOLwS+ldNURezA2n0YUV7qB8vMbMD09BzBJRHxXeDpiFgL3AdMG7BkRaqstITDJoxyl9RmNmB6OiP4cPr6/YEIYjvNrK/mqRXraW1rz3cUMysCPRWCpyW9CBwsaV7GNF/SvAHKV5Rm1dewraWdxas25TuKmRWBnh4oO1/SeOAO4Ow9Obik04DvAqXAtRHxzU7bzwG+DrQDrcAnI+KBPfmsoSSzS+pDxo/KcxozG+p6fKAsIlYCs/bkwOloZj8ETgGagDmSbo2IhRm73QXcGhEhqQH4NXDInnzeUDJ1TBUjh5Uxr2kd/9g4Md9xzGyIy+auoT11FLA4IpZERDNwI3BO5g4RsSl2drVZhcc5AKCkRBxeV+0GYzMbELksBHXAsozlpnTdLiS9Q9IzwB+BLrutkHShpLmS5q5atSonYQebhvpqnn55I82tbjA2s9zqthBI+nn6eskeHrurEdh3+4s/Im6OiENIRkD7elcHiojZEdEYEY21tbV7GKewzKyvprmtnUUr3ZuHmeVWT2cER6ajkX1E0mhJ+2ZOWRy7Cci8wF0PrOhu54i4DzhA0tiskg9xmV1Sm5nlUk+NxdcAfyZ5iOwxdv0LP+j94bI5wHRJU4HlwHnAezN3kHQg8HzaWHwEUAG81qevYIiqHz2CmsrypJ3g6HynMbOhrKfbR78HfE/SjyLio309cES0SrqY5PbTUuC6iFgg6aJ0+zXAO4EPSmoBtgLvyWg8LmqSmFlXzZNuMDazHOt1PIKI+KikWcBx6ar7IiKrB8oi4nbg9k7rrsmY/xbut6hbs+pr+NG9z7OtpY3h5aX5jmNmQ1Svdw1J+gTwS2C/dPqlpI/nOpglDcZt7cHClzfkO4qZDWG9nhEA/wwcHRGbASR9C/gb7oMo53Z0Sb1sHUdMGp3nNGY2VGXzHIGAtozlNrq+NdT62fhRw6kdOYx5y91OYGa5k80ZwU+ARyTdnC6/Hfif3EWyDpJo8BPGZpZjvZ4RRMSVJF1SrwHWAh+OiKtyHcwSM+urWbxqE5u2t+Y7ipkNUdmcERARjwOP5ziLdWFWfQ0RsGD5eo6eNibfccxsCMplX0PWDw6vSxqM57udwMxyxIVgkKsdOYwJ1cP9YJmZ5YwLQQFoqK9hXpP7HDKz3MjmgbJzJT0nab2kDZI2SvITTgPomGn78tJrW3hw8ep8RzGzISibM4JvA2dHRHVEjIqIkRHh8RMH0HlHTaJ+9Ai+dttCD2hvZv0um0LwSkQ8nfMk1q3h5aV8+W2HsuiVjdzw6NJ8xzGzISab20fnSvpf4PfA9o6VEXFTzlLZbk49bDxvOmAM/3Xns5zVMIHRVRX5jmRmQ0Q2ZwSjgC3APwBnpdOZuQxlu5PEpWfNYOO2Fq76v2fzHcfMhpBsuqH+8EAEsd4dMn4U7zt6Mr94ZCnvPXoyB48fme9IZjYEZHPXUL2kmyW9KukVSb+TVD8Q4Wx3nzrlIPYZVsbX/rAAj+FjZv0hm0tDPwFuBSYAdcBt6TrLg9FVFXzqlIN4cPFr3LnwlXzHMbMhIJtCUBsRP4mI1nS6HqjNcS7rwfuOnsRB4/bhG398mm0tbb2/wcysB9kUgtWS3i+pNJ3ejweYz6uy0hIuPfMwlq7ZwnUPvpDvOGZW4LIpBB8B/hFYCbwMvCtdZ3l07PSxnDJjHD/462Je2bAt33HMrIBlMx7B0og4OyJqI2K/iHh7RLw0EOGsZ19+26G0tgXf+vMz+Y5iZgWs20Ig6XPp6/clfa/zlM3BJZ0maZGkxZK+0MX290mal04PSZq1519K8Zk8pooLjpvKTY8v5+9L1+Y7jpkVqJ7OCDq6lZgLPNbF1CNJpcAPgdOBGcD5kmZ02u0F4ISIaAC+DszuU3rjYycdSO3IYXz1toW0t/t2UjPru24LQUTcls5uiYifZk4kTxr35ihgcUQsiYhm4EbgnE6f8VBEdPwp+zDg5xP6aJ9hZXz+tEN4Ytk6fv/E8nzHMbMClE1j8RezXNdZHbAsY7kpXdedC4A/dbVB0oWS5kqau2rVqiw+uric+/o6Zk2s4Zt/esZjG5tZn/XURnC6pO8DdZ3aB64Hsvltoy7WdXntQtJJJIXg811tj4jZEdEYEY21tX6EobOSEvGVs2bw6sbtXH334nzHMbMC09MZwQqS9oFt7No2cCtwahbHbgImZizXp8fchaQG4FrgnIjw8wl76IhJozn39XVce/8LLH0tmyt3ZmaJntoInkzbA2YCv8hoH7iFjO6oezAHmC5pqqQK4DySIrKDpEnATcAHIsJdau6lz512CGWl4hu3L8x3FDMrINm0EdwJjMhYHgH8X29viohW4GLgDpI7kH4dEQskXSTponS3S4ExwNWSnpA0t0/pbRfjq4fzsZMO5I4Fr3hYSzPLmnrrwVLSExHxut7WDZTGxsaYO9f1ojvbWto45Tv3Ullexh8/cSxlpdnUejMb6iQ9FhGNXW3L5rfEZklHZBzsSGBrf4Wz/jW8vJQvnTHDw1qaWdayGaryk8BvJHU09O4PvCd3kWxvnXrYOA9raWZZy6avoTnAIcBHgX8FDo2IXp8stvzxsJZm1hfZXkA+mKSbiNeTdBXxwdxFsv5wyPhRvP+YZFjLRSs35juOmQ1i2QxV+RXg++l0EvBt4Owc57J+8G9v9bCWZta7bM4I3gWcDKxMB7KfBQzLaSrrFx7W0syykU0h2BoR7UCrpFHAq8C03May/uJhLc2sN9kUgrmSaoD/Juli4nHg0Zymsn7jYS3NrDc9FgJJAv4zItZFxDXAKcA/pZeIrEAcO30s/+BhLc2sGz0WgkhaGH+fsfxiRMzLeSrrd1/ysJZm1o1sLg09LOkNOU9iOeVhLc2sO9kUgpNIisHz6djC8yX5rKAAfeykA9nPw1qaWSc9DUwzKZ09neQuobcAZwFnpq9WYDyspZl1paczgt8DRMRLwJUR8VLmNDDxrL+9w8NamlknPRWCzKEm/dzAEOFhLc2ss54KQXQzbwXOw1qaWaaeCsEsSRskbQQa0vkNkjZK2jBQAS03Pn+6h7U0s0RPYxaXRsSoiBgZEWXpfMfyqIEMaf1v3CgPa2lmCY9jWMQuOHYqE/cdwdduW0hrW3u+45hZnrgQFLHMYS0/eN2jftDMrEi5EBS5Uw8bx2VnzeCZlRt5x9UP8c8/ncPTL7sJyKyY5LQQSDpN0iJJiyV9oYvth0j6m6Ttkj6TyyzWNUl86M1Tue9zJ/HpUw7ikRfWcPp37+fiGx7n+VWb8h3PzAaAcjVylaRS4FmSHkubgDnA+RGxMGOf/YDJwNuBtRFxRW/HbWxsjLlz5+Yks8H6LS3Mvv95fvLgi2xraeOdR9RzyVunUz+6Mt/RzGwvSHosIhq72pbLM4KjgMURsSQimoEbgXMyd4iIVyNiDtCSwxzWB9WV5Xz21EO473Mn8aE3TeWWJ1dw0hX3cOktT/Gqu7A2G5JyWQjqgGUZy03puj6TdKGkuZLmrlq1ql/CWc/G7jOMS8+awT2fOZF3HTmRGx5ZyvGX381/3v40azc35zuemfWjXBYCdbFuj65DRcTsiGiMiMba2tq9jGV9MaFmBP957kzu+vQJnH74/sy+fwnHfftuvvOXZ9m4zSdyZkNBLgtBEzAxY7keWJHDz7Mcmjymiu+853Xc8cnjOW76WL5713Mc9+27uebe59na7LGQzQpZLgvBHGC6pKmSKoDzgFtz+Hk2AA4aN5Ifvf9Ibrv4WF6X9mJ6/OV389OHXmR7qwuCWSHK2V1DAJLOAK4CSoHrIuIbki4CiIhrJI0H5gKjgHZgEzAjIrq9kd13DQ0uc15cwxV3LOKRF9ZQVzOCT5x8IO88op6yUj+iYjaY9HTXUE4LQS64EAw+EcEDi1dzxR2LeLJpPVPHVvHJt07nrIYJlJR01VRkZgMtX7ePWpGQxHHTa/n9x97M7A8cybCyEi658QnO+N793LlgJYX2x4ZZsfEZgfW79vbgtnkruOr/nuOF1Zs5oLaKEw7aj+Omj+XoaftSWVGW74hmRceXhiwvWtvauenvy7ntyRU88sIamlvbKS8VR04ezXHTazn2wLEcXldNqS8fmeWcC4Hl3baWNua8uIYHnlvN/c+tZmHasV1NZTlvOmDMjsIwcV93ZWGWCy4ENuis3rSdBxcnReGB51azMu2+YsqYSo6dPpZjD6zljQeMoXpEeZ6Tmg0NLgQ2qEUEz6/axP3p2cLDS15jS3MbpSViVn01x06v5bjpY3ndxBrKfVuq2R5xIbCC0tzazt+XruWB9IxhXtM62gP2GVbGMdP2TS4jTR/LtLFVSG5fMMuGC4EVtPVbWnjo+dXcvzi5jLR0zRYAJlQP500HjuWgcfswad8qpoytZPK+VYyoKM1zYrPBx4XAhpS1d17OY61T+O1r03j0xTWs2dzMG0sW0KAl/LjtLMaNGsbkMVVMGVOZvlYxeUwlk8dUMnK42xysOPVUCHxDtxWc0dOP5q2/+RBvfff18IFT2PT0Xxl+y494pPFKRpYcxIuvbeGl1zZz96JVrNrYtMt7x1RVMHlMZVockrOISfsmyzWV5b7UZEXJZwRWmF64D37zIWi8AOb+D7z7eph6/G67bd7eykuvbWHpms07CsSLq5PXFet3HWhn1PAypoyt2nE2MWnfSsZXD2d0ZQU1leWMrqygsqLUxcIKks8IbOiZenxSBO77Nhz/uS6LAEDVsDJmTBjFjAmjdtu2raWNZWu28NJrW3jxtc07Xp9cto7b579MW/vufyRVlJZQU1nOvlU7i0NNZQWjd8wnr6OrytP1FVSPKPdDczaouRBYYXrhvuRM4PjPJa9Tj+u2GHRneHkp08eNZPq4kbtta2lrp2ntVlZt3M7aLc2s29LM2i0tyfzmFtak6557dRPrtjSzbksLrV0UDgAJRg0vT4pFVcUuBaNmRDk1Vclrx/rqEcl+VT77sAHiQmCFp+OyUMfloKnH7brcD8pLS5g6toqpY6uy2j8i2Li9lXWbk2KxNi0Oa9MCsqOQbG7mlQ3bWLRyI2u3NLOlh0F9yktF9YiOolG+y3xNWjRq0nU1leU7zkxGlLuAWN+4EFjhWf74rr/0px6fLC9/vN8KQV9JYtTwckYNL2fSmOy7ydje2sb6rS2s29IxJQVk3daOAtLC+q3NrN3cwvJ1W1mwYj3rtrSwtaX7AlJRWkJ1ZTkjh5VROayUyooyKitKqep4HVbGiIpSqiqSbVXDShlRUbbLcmXHfEVyDD/IN7S5sdisAG1rySwgSdFYn1E81m1pZnNzG1u2t7K5uZUtzVv8uWAAAAqRSURBVG3JtL2Vzc1tbN7e2u2lrK6UlyotDKVUDitLC0VSXEZ0FJldCkhagIZ1v09lhQvMQHJjsdkQM7y8lOHlpYwbNXyPj9Hc2s7W5ra0ULSyeXsyn6zbWTS2Nrfustyx79bmNlZu2JZxjKTYdNXI3p2K0pKkOJQnBaaqopQRFcnXVlFawrDyUoaVlTCsrISKshKGlaXL5cl8RbptWOa2jO1dva+itMQj6HXiQmBWpCrSX5LVlf33kF1EsH2XArPzTGRLV+taMgtOun17G2u3tLC9pY3m1na2t7azvbUtfW2nubV9r3OWliijSOxaVLpat8t8eQnDMopU533KSkR5WQnlJSWUl4qy0uS1POO1rLSE8pKO+Y5tJXm7u8yFwMz6jaQdZyujqypy8hnt7UFz286isKNItOyc36WAtHTs28a2jPfs2KelPT1e2875lnbWbe1cjHZ9Xy6UiJ1FoqyEspISKjKKyflHTeKfj5vW75/rQmBmBaWkRAwvSYpNvkQkxSizSHQUmNa2oKWtnZa2oLUtKSw71rUn6zq2t3Rsa2+npTVobe+0f1vG/u3B2H2G5eTrcSEwM+sjSWm7Qym7P4VSeHLaYiLpNEmLJC2W9IUutkvS99Lt8yQdkcs8Zma2u5wVAkmlwA+B04EZwPmSZnTa7XRgejpdCPwoV3nMzKxruTwjOApYHBFLIqIZuBE4p9M+5wA/i8TDQI2k/XOYyczMOsllIagDlmUsN6Xr+roPki6UNFfS3FWrVvV7UDOzYpbLQtDVDbGdnzTJZh8iYnZENEZEY21tbb+EMzOzRC4LQRMwMWO5HlixB/uYmVkO5bIQzAGmS5oqqQI4D7i10z63Ah9M7x46BlgfES/nMJOZmXWSs+cIIqJV0sXAHUApcF1ELJB0Ubr9GuB24AxgMbAF+HCu8piZWdcKrvdRSauAl/KdoxtjgdX5DrGHCjV7oeYGZ8+XYs0+OSK6bGQtuEIwmEma2103r4NdoWYv1Nzg7Pni7LtzX6xmZkXOhcDMrMi5EPSv2fkOsBcKNXuh5gZnzxdn78RtBGZmRc5nBGZmRc6FwMysyLkQ7CVJEyXdLelpSQskXZLvTH0lqVTS3yX9Id9Z+kJSjaTfSnom/f6/Md+ZsiXp39L/L09J+pWkPR+FPsckXSfpVUlPZazbV9JfJD2Xvo7OZ8budJP98vT/zDxJN0uqyWfG7nSVPWPbZySFpLH98VkuBHuvFfh0RBwKHAN8rItxFwa7S4Cn8x1iD3wX+HNEHALMokC+Bkl1wCeAxog4nOTJ+/Pym6pH1wOndVr3BeCuiJgO3JUuD0bXs3v2vwCHR0QD8CzwxYEOlaXr2T07kiYCpwBL++uDXAj2UkS8HBGPp/MbSX4Z7daV9mAlqR54G3BtvrP0haRRwPHA/wBERHNErMtvqj4pA0ZIKgMqGcSdLUbEfcCaTqvPAX6azv8UePuAhspSV9kj4s6IaE0XHybp7HLQ6eb7DvAd4HN00VPznnIh6EeSpgCvBx7Jb5I+uYrkP1V7voP00TRgFfCT9LLWtZKq8h0qGxGxHLiC5C+6l0k6W7wzv6n6bFxHB5Hp6355zrOnPgL8Kd8hsiXpbGB5RDzZn8d1IegnkvYBfgd8MiI25DtPNiSdCbwaEY/lO8seKAOOAH4UEa8HNjN4L0/sIr2efg4wFZgAVEl6f35TFR9JXyK5tPvLfGfJhqRK4EvApf19bBeCfiCpnKQI/DIibsp3nj54M3C2pBdJhhJ9i6Rf5DdS1pqApojoOPv6LUlhKARvBV6IiFUR0QLcBLwpz5n66pWOYWXT11fznKdPJP0TcCbwviich6kOIPnj4cn0Z7YeeFzS+L09sAvBXpIkkuvUT0fElfnO0xcR8cWIqI+IKSSNlX+NiIL4yzQiVgLLJB2crjoZWJjHSH2xFDhGUmX6/+dkCqShO8OtwD+l8/8E3JLHLH0i6TTg88DZEbEl33myFRHzI2K/iJiS/sw2AUekPwt7xYVg770Z+ADJX9NPpNMZ+Q5VJD4O/FLSPOB1wH/kOU9W0rOY3wKPA/NJfg4HbbcHkn4F/A04WFKTpAuAbwKnSHqO5A6Wb+YzY3e6yf4DYCTwl/Tn9Zq8huxGN9lz81mFc1ZkZma54DMCM7Mi50JgZlbkXAjMzIqcC4GZWZFzITAzK3IuBFZQJH1I0g9y8X5Jm9LXCZJ+u6ef0cvnv9jRY6Skh3LxGVlkuLYAO0a0HCrLdwCzwSYiVgDvGoDPycvTxBHxz/n4XBu8fEZgA07S+yU9mj7M82NJpen6TZK+JekxSf8n6ShJ90hakna21WGipD9LWiTpK1kc98OSnpV0L8kDgB37T5X0N0lzJH09Y/2Ujj7g0zOIm9LPe07StzP2uyA97j2S/rurMw1JYyTdmXaM92NAGds6zkBOlHSvpF+nx/umpPelX8t8SQek+9VK+l2ad46kN6frL1PSd33H9+oT6foqSX+U9KSScQ/ek66/R1JjOn9++hlPSfpWZjZJ30jf+7CkcXvwT22FIiI8eRqwCTgUuA0oT5evBj6Yzgdwejp/M3AnUE4y1sAT6foPkfTYOQYYATwFNHZ3XGB/ki4daoEK4EHgB+k+t2Z89seATen8FOCpjM9bAlQDw4GXgIkkncW9COybZry/47idvt7vAZem829Lv8ax6XLH550IrEuzDgOWA19Nt10CXJXO3wAcm85PIunWBOAy4KH0vWOB19JM7wT+OyNLdfp6T/o9m5DxvSkD/gq8PePf4qx0/tvAl/P9f8dT7iZfGrKBdjJwJDAn6WaHEezssKwZ+HM6Px/YHhEtkuaT/HLu8JeIeA1A0k3AsSS9SHZ13KOBeyJiVbr//wIHpcd5M8kvS4CfAzv+Iu7krohYn75/ITCZ5BfuvRGxJl3/m4zjZjoeOBcgIv4oaW03nzEn0m6dJT1PUgQ7vg8npfNvBWakXx/AKEkj0/k/RsR2YLukV4Fx6XuvSP/S/0NE3N/pM9/Art+bX6Z5f0/yb9ExYt1jJN1I2BDlQmADTcBPI6KrUaFaIqKjz5N2YDtARLQrGcClQ+d+UaK740p6exf7d35vb7ZnzLeR/Nyom3374zPaM5bb2flzWgK8MSK2Zr4xLQy7ZYyIZyUdCZwB/KekOyPia5lv7SFP5r9Fx9dsQ5TbCGyg3QW8S9J+sGPs28l9PMYp6ftGkIyM9WAPx30EODG9Vl8OvDvjOA+yc4jI9/Uxw6PACZJGp0Xqnd3sd1/HsSWdDuzN2L53Ahd3LEh6XU87S5oAbImIX5AMhNO5m+5HSL6GsWl7yvnAvXuRzwqUq7wNqIhYKOnLwJ2SSoAWkuvzL/XhMA+QXMo5ELghIuYCdHXciHhY0mUkvTi+TNLjZ2l6nEuAGyRdQjKeRF++juWS/oPkl+kKki6w13ex61eBX0l6nOSX7N6MM/sJ4IdKelstIykyF/Ww/0zgckntJN+Pj3b6Gl6W9EXgbpKzg9sjomC6k7b+495HzfaQpH0iYlN6RnAzcF1E3JzvXGZ95UtDZnvuMklPkNy59AJJI6tZwfEZgZlZkfMZgZlZkXMhMDMrci4EZmZFzoXAzKzIuRCYmRW5/w9w/ejp5W4sYwAAAABJRU5ErkJggg==\n", 156 | "text/plain": [ 157 | "
" 158 | ] 159 | }, 160 | "metadata": { 161 | "needs_background": "light" 162 | }, 163 | "output_type": "display_data" 164 | } 165 | ], 166 | "source": [ 167 | "false_neighbors = []\n", 168 | "\n", 169 | "for i in range(1,15):\n", 170 | " false_neighbors.append(ct.calculate_false_nearest_neighours(data, delay, i) / len(data))\n", 171 | " \n", 172 | "zeros = [i for i in range(0, len(false_neighbors)) if false_neighbors[i] < 0.05]\n", 173 | "print(false_neighbors)\n", 174 | "print(zeros)\n", 175 | "plt.plot(range(1,15),false_neighbors)\n", 176 | "plt.plot(zeros[0], false_neighbors[zeros[0]], 'x')\n", 177 | "\n", 178 | "plt.xlabel('embedding dimension')\n", 179 | "plt.ylabel('Fraction of false neighbors')" 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.7.6" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 4 204 | } 205 | --------------------------------------------------------------------------------