├── NILM ├── utils │ ├── __init__.py │ ├── tools.py │ ├── utils_nilmtk.py │ └── load_data.py ├── preprocessing │ ├── __init__.py │ └── sampling.py ├── converter │ ├── __init__.py │ ├── metadata │ │ ├── blued.json │ │ └── metadata_blued_to_json.py │ ├── convert_dataframe_to_meter.py │ └── convert_blued_to_user.py ├── tracking │ ├── __init__.py │ └── simple_tracking.py ├── __init__.py ├── clustering │ ├── __init__.py │ ├── DBSCAN.py │ └── mean_shift.py ├── detection │ ├── __init__.py │ ├── simple_edge.py │ └── steady_states.py ├── modeling │ ├── __init__.py │ └── association_two_states.py ├── user.py ├── measurements.py ├── events.py ├── clusters.py ├── appliance_consumptions.py ├── appliance_models.py └── meter.py ├── .gitignore ├── test_converters.py ├── test_redd.py ├── README.rst └── test.py /NILM/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc -------------------------------------------------------------------------------- /NILM/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from sampling import resample -------------------------------------------------------------------------------- /NILM/converter/__init__.py: -------------------------------------------------------------------------------- 1 | from convert_dataframe_to_meter import dataframe_to_meter 2 | from convert_blued_to_user import blued_to_user -------------------------------------------------------------------------------- /NILM/tracking/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from simple_tracking import simple_tracking 4 | 5 | __all__= { 6 | "simple": { 7 | "model": simple_tracking, 8 | "parameters": { 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /NILM/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from user import User 4 | from meter import Meter 5 | from meter import Store 6 | from appliance_models import ApplianceModels 7 | from appliance_consumptions import ApplianceConsumptions 8 | from clusters import Clusters 9 | from events import Events 10 | -------------------------------------------------------------------------------- /NILM/converter/metadata/blued.json: -------------------------------------------------------------------------------- 1 | {"number_users": 1, "users": {"user_blued": {"meters": {"meter_blued": {"tz": "US/Eastern", "measurements": [["A", "P"], ["A", "Q"], ["B", "P"], ["B", "Q"]], "user_id": "user_blued", "meter_id": "meter_blued", "number_datasets": 2}}, "user_id": "user_blued", "number_meters": 1}}} -------------------------------------------------------------------------------- /NILM/clustering/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Dec 1 18:21:12 2014 4 | 5 | @author: thibaut 6 | """ 7 | 8 | from dbscan import DBSCAN 9 | from mean_shift import MeanShift 10 | 11 | __all__ = { 12 | "DBSCAN": { 13 | "model": DBSCAN, 14 | "parameters": { 15 | "eps": 35, 16 | "min_samples": 1}}, 17 | "MeanShift": { 18 | "model": MeanShift, 19 | "parameters": {}}} 20 | -------------------------------------------------------------------------------- /NILM/detection/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jan 8 15:03:12 2015 4 | 5 | @author: thibaut 6 | """ 7 | 8 | from simple_edge import simple_edge 9 | from steady_states import steady_states 10 | 11 | 12 | __all__ = { 13 | "simple_edge": { 14 | "model": simple_edge, 15 | "parameters": { 16 | "edge_threshold": 70}}, 17 | "steady_states": { 18 | "model": steady_states, 19 | "parameters": { 20 | "edge_threshold": 70, 21 | "state_threshold": 15}}} 22 | -------------------------------------------------------------------------------- /NILM/utils/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Dec 18 18:24:21 2014 4 | 5 | @author: thibaut 6 | """ 7 | 8 | from user import User 9 | 10 | 11 | def create_user(hdf_filename= 12 | '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED/user1.h5'): 13 | user = User(hdf_filename) 14 | user.load() 15 | return user 16 | 17 | 18 | def create_meter(hdf_filename= 19 | '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED/user1.h5'): 20 | user = User(hdf_filename) 21 | user.load() 22 | meters_list = user.metadata['meters'].keys() 23 | meter = user.meters[meters_list[0]] 24 | return meter 25 | -------------------------------------------------------------------------------- /NILM/converter/metadata/metadata_blued_to_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | metadata_Blued = { 4 | "number_users": 1, 5 | "users": { 6 | 'user_blued': { 7 | "user_id": 'user_blued', 8 | "number_meters": 1, 9 | "meters": { 10 | 'meter_blued': { 11 | "user_id": 'user_blued', 12 | "meter_id": 'meter_blued', 13 | "number_datasets": 2, 14 | "measurements": [('A', 'P'), ('A', 'Q'), 15 | ('B', 'P'), ('B', 'Q')], 16 | "tz": "US/Eastern" 17 | } 18 | } 19 | } 20 | } 21 | } 22 | 23 | outputfilename = 'blued.json' 24 | with open(outputfilename, 'wb') as outfile: 25 | json.dump(metadata_Blued, outfile) 26 | -------------------------------------------------------------------------------- /NILM/utils/utils_nilmtk.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from os.path import isdir, dirname, abspath 3 | from os import getcwd 4 | from inspect import currentframe, getfile, getsourcefile 5 | from sys import getfilesystemencoding 6 | 7 | 8 | def get_module_directory(): 9 | # Taken from http://stackoverflow.com/a/6098238/732596 10 | path_to_this_file = dirname(getfile(currentframe())) 11 | if not isdir(path_to_this_file): 12 | encoding = getfilesystemencoding() 13 | path_to_this_file = dirname(unicode(__file__, encoding)) 14 | if not isdir(path_to_this_file): 15 | abspath(getsourcefile(lambda _: None)) 16 | if not isdir(path_to_this_file): 17 | path_to_this_file = getcwd() 18 | assert isdir(path_to_this_file), path_to_this_file + ' is not a directory' 19 | return path_to_this_file 20 | -------------------------------------------------------------------------------- /NILM/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 26 12:30:14 2015 4 | 5 | @author: thibaut 6 | """ 7 | 8 | from association_two_states import euclidian_cluster_metric 9 | from association_two_states import simple_association_two_states 10 | from association_two_states import dbscan_association_two_states 11 | from association_two_states import affinity_propagation_association_two_states 12 | 13 | 14 | __all__ = { 15 | "simple": { 16 | "model": simple_association_two_states, 17 | "parameters": { 18 | 'distance_threshold': 35, 19 | 'metric': euclidian_cluster_metric}}, 20 | "dbscan": { 21 | "model": dbscan_association_two_states, 22 | "parameters": { 23 | 'eps': 35, 24 | 'min_samples': 1, 25 | 'metric': euclidian_cluster_metric}} 26 | } -------------------------------------------------------------------------------- /test_converters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from NILM import converter 4 | from NILM import Meter 5 | 6 | import pandas as pd 7 | 8 | blued_path = '/Volumes/Stockage/DATA/DATA_BLUED/RAW' 9 | store_path = '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED' 10 | 11 | # converter.blued_to_user(blued_path, store_path) 12 | 13 | user_filename = 'user_blued.h5' 14 | hdf_filename = "/".join((store_path, user_filename)) 15 | 16 | key = "/".join(("meter_blued", "measurements")) 17 | with pd.get_store(hdf_filename) as store: 18 | df = store[key] 19 | 20 | meter_filename = 'meter_blued.h5' 21 | hdf_filename = "/".join((store_path, meter_filename)) 22 | 23 | converter.dataframe_to_meter(df, hdf_filename) 24 | 25 | meter1 = Meter.from_meter_hdf(hdf_filename) 26 | 27 | meter_filename = 'meter_blued_2.h5' 28 | hdf_filename = "/".join((store_path, meter_filename)) 29 | meter2 = Meter.from_dataframe(df, hdf_filename) 30 | -------------------------------------------------------------------------------- /NILM/preprocessing/sampling.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pandas.tseries.offsets import Micro, Second 3 | 4 | 5 | def resample(df, sampling_period=1): 6 | """Resample the data 7 | 8 | Warning: does not handle missing values 9 | 10 | Parameters 11 | ---------- 12 | df: pandas.DataFrame, 13 | index: pandas.DatetimeIndex 14 | values: power measured 15 | 16 | sampling_period: float of int, optional 17 | Elapsed time between two measures in second 18 | 19 | Returns 20 | ------- 21 | df: pandas.DataFrame, 22 | index: pandas.DatetimeIndex with sampling_period seconds between 23 | two timestapms 24 | values: power measured 25 | """ 26 | assert isinstance(df, pd.DataFrame) 27 | assert isinstance(df.index, pd.DatetimeIndex) 28 | 29 | if isinstance(sampling_period, int): 30 | df = df.resample(Second(sampling_period), how='last', label='right', 31 | closed='right') 32 | else: 33 | period = sampling_period*(10**6) 34 | df = df.resample(Micro(period), how='last', 35 | label='right', closed='right') 36 | return df 37 | -------------------------------------------------------------------------------- /NILM/utils/load_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Dec 18 19:21:42 2014 4 | 5 | @author: thibaut 6 | """ 7 | 8 | def load_data_window(store, key, start=None, end=None): 9 | """ 10 | load only on window of data, 11 | start and end needs to be pd.Timestamp or 12 | a datetime.datetime 13 | """ 14 | assert isfile(hdf_filename) 15 | key = join('/location{:d}'.format(location), 'phase{:s}'.format(phase)) 16 | key_meter = join(key, name) 17 | 18 | if start is None: 19 | if end is None: 20 | with pd.get_store(hdf_filename) as store: 21 | meter = store.select(key_meter) 22 | return meter 23 | else: 24 | end = pd.Timestamp(end) 25 | with pd.get_store(hdf_filename) as store: 26 | meter = store.select(key_meter, 'indexstart') 33 | return meter 34 | else: 35 | end = pd.Timestamp(end) 36 | with pd.get_store(hdf_filename) as store: 37 | meter = store.select(key_meter, 'indexstart') 38 | return meter -------------------------------------------------------------------------------- /NILM/detection/simple_edge.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jan 8 15:04:36 2015 4 | 5 | @author: thibaut 6 | """ 7 | from __future__ import print_function, division 8 | import pandas as pd 9 | import numpy as np 10 | 11 | 12 | def simple_edge(df, edge_threshold=70): 13 | """Implements simple edge detection 14 | 15 | Parameters 16 | ---------- 17 | df: pandas.DataFrame 18 | DataFrame containing the measurements 19 | index: DateTimeIndex, timestamps 20 | attributes: power measured (depending on powers available) 21 | 22 | edge_threshold: int or float 23 | Threshold on a the active power (first power) to classify 24 | detect an event between two consecutive measurements. 25 | 26 | Returns 27 | ------- 28 | events: pandas.DataFrame 29 | DataFrame containing the events 30 | index: DateTimeIndex, timestamps 31 | attributes: power types availabes 32 | """ 33 | # PART I: t to delta t 34 | columns = df.columns 35 | 36 | df_t1 = df.values[1:] 37 | df_t0 = df.values[:-1] 38 | df_dt = df_t1-df_t0 39 | index_dt = df.index[1:] 40 | events = pd.DataFrame(df_dt, columns=columns, index=index_dt) 41 | # PART II: Application of edge_threshold 42 | events = events[np.abs(events[columns[0]]) > edge_threshold] 43 | events.index.name = df.index.name 44 | return events 45 | -------------------------------------------------------------------------------- /NILM/clustering/DBSCAN.py: -------------------------------------------------------------------------------- 1 | from sklearn.cluster import DBSCAN 2 | 3 | 4 | 5 | # def dbscan(events, eps, min_samples, 6 | # features=['delta P', 'delta Q'], viz=False, 7 | # standard_scaler=False): 8 | # try: 9 | # X = events[features].values 10 | # except TypeError: 11 | # features = pd.MultiIndex.from_tuples(features) 12 | # X = events[features].values 13 | # if standard_scaler is False: 14 | # Y = X 15 | # else: 16 | # Y = StandardScaler().fit_transform(X) 17 | # db = DBSCAN(eps=eps, min_samples=min_samples).fit(Y) 18 | # # events['cluster'] = db.labels_ 19 | # labels = db.labels_ 20 | # n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) 21 | # print('Estimated number of clusters: %d' % n_clusters_) 22 | # if viz is True: 23 | # _viz_dbscan(db, X) 24 | # return db.labels_ 25 | 26 | 27 | # def _viz_dbscan(db, X): 28 | # labels = db.labels_ 29 | # core_samples_mask = np.zeros_like(db.labels_, dtype=bool) 30 | # core_samples_mask[db.core_sample_indices_] = True 31 | # # n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) 32 | # unique_labels = set(labels) 33 | # colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) 34 | # for k, col in zip(unique_labels, colors): 35 | # class_member_mask = (labels == k) 36 | # xy = X[class_member_mask & core_samples_mask] 37 | # plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 38 | # markeredgecolor='k', markersize=14) 39 | # xy = X[class_member_mask & ~core_samples_mask] 40 | # plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 41 | # markeredgecolor='k', markersize=6) 42 | # plt.show() 43 | -------------------------------------------------------------------------------- /NILM/clustering/mean_shift.py: -------------------------------------------------------------------------------- 1 | # import pandas as pd 2 | from sklearn.cluster import MeanShift 3 | # import numpy as np 4 | 5 | # Remark continue to correct it 6 | 7 | 8 | 9 | # def mean_shift_clustering(pair_buffer_df, features): 10 | # # Creating feature vector 11 | # cluster_df = pd.DataFrame() 12 | # if 'active' in features: 13 | # fmean = lambda row: (np.fabs(row['T1 Active']) + 14 | # np.fabs(row['T2 Active'])) / 2 15 | # cluster_df['active'] = pd.Series(pair_buffer_df.apply(fmean, axis=1), 16 | # index=pair_buffer_df.index) 17 | # if 'reactive' in features: 18 | # cluster_df['reactive'] = pd.Series(pair_buffer_df.apply(lambda row: 19 | # ((np.fabs(row['T1 Reactive']) + np.fabs(row['T2 Reactive'])) / 2), axis=1), index=pair_buffer_df.index) 20 | # if 'delta' in features: 21 | # cluster_df['delta'] = pd.Series(pair_buffer_df.apply(lambda row: 22 | # (row['T2 Time'] - row['T1 Time']), axis=1), index=pair_buffer_df.index) 23 | # cluster_df['delta'] = cluster_df['delta'].apply(lambda x: int(x) / 6e10) 24 | # if 'hour_of_use' in features: 25 | # cluster_df['hour_of_use'] = pd.DatetimeIndex(pair_buffer_df['T1 Time']).hour 26 | 27 | # """ 28 | # if 'sd_event' in features: 29 | # cluster_df['sd_event'] = pd.Series(pair_buffer_df.apply(lambda row: 30 | # (df.power[row['T1 Time']:row['T2 Time']]).std(), axis=1), index=pair_buffer_df.index) 31 | # """ 32 | 33 | # X = cluster_df.values.reshape((len(cluster_df.index), len(features))) 34 | # ms = MeanShift(bin_seeding=True) 35 | # ms.fit(X) 36 | # labels = ms.labels_ 37 | # cluster_centers = ms.cluster_centers_ 38 | # labels_unique = np.unique(labels) 39 | # n_clusters_ = len(labels_unique) 40 | # return pd.DataFrame(cluster_centers, columns=features) -------------------------------------------------------------------------------- /test_redd.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import pandas as pd 4 | from os.path import isfile 5 | import NILM as nilm 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | redd_file = '/Volumes/Stockage/DATA/DATA_REDD/RAW/low_freq/house_1/channel_1.dat' 10 | assert isfile(redd_file) 11 | 12 | col = pd.MultiIndex.from_tuples([('A', 'P')]) 13 | df = pd.read_csv(redd_file, names=col, header=None, index_col=0, sep=' ', nrows=50000) 14 | df.index = pd.to_datetime(df.index, unit='s', utc=True) 15 | 16 | hdf_filename = '/Volumes/Stockage/DATA/Meters/meter_redd_1.h5' 17 | meter = nilm.Meter.from_dataframe(df, hdf_filename) 18 | 19 | meter.load_measurements(sampling_period=10) 20 | meter.detect_events(detection_type='simple_edge') 21 | #meter.detect_events(detection_type='steady_states', edge_threshold=30, state_threshold=10) 22 | measures = meter.measurements 23 | events = meter.events 24 | plt.plot(measures.index, measures.values) 25 | plt.plot(events.timestamps.values, events.P.values, 'ro') 26 | plt.show() 27 | meter.cluster_events('DBSCAN', eps=30) 28 | meter.model_appliances('simple', distance_threshold = 100) 29 | meter.track_consumptions('simple') 30 | 31 | print len(meter.events) 32 | 33 | """ 34 | # Plot appliances 35 | for phase, appliance in meter.appliance_consumptions.columns: 36 | print appliance 37 | #meter.measurements[phase][meter.power_types[0]].plot() 38 | #meter.appliance_consumptions[phase][appliance].plot(color='r') 39 | df0 = meter.measurements[phase][meter.power_types[0]] 40 | df = meter.appliance_consumptions[phase][appliance] 41 | plt.plot(df0.index, df0.values) 42 | plt.plot(df.index, df.values, 'r') 43 | plt.show() 44 | 45 | 46 | phases = meter.phases 47 | for phase in phases: 48 | print 'phase :', phase 49 | #meter.measurements[phase][meter.power_types[0]].plot() 50 | meter.appliance_consumptions[phase].sum(axis=1).plot(color='r') 51 | plt.show() 52 | 53 | """ 54 | 55 | 56 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | *************************************** 2 | Non Intrusive Load Monitoring Framework 3 | *************************************** 4 | 5 | :Date: April 2015 6 | :Authors: Thibaut Lavril 7 | :Version: 2.0 8 | 9 | 10 | Purpose 11 | ======= 12 | 13 | Framework to implement Non Intrusive Load Monitoring (NILM) based on event 14 | detection. This framework implements algorithms and methods developped by 15 | Hart in [1]. 16 | 17 | The goal of NILM is to breakdown a total consumption of an household 18 | into individual appliances consumptions (fridge, aircon, etc). To do so machine learning 19 | techniques (mainly unsupervised learning) are used. 20 | 21 | Algorithm Overview 22 | ================== 23 | 24 | The algorithm is composed of different steps: 25 | 26 | - **Data Loading and Preprocessing**: the meter data, e.g. powers measured by a smart meter on the different phases is load into memory and preprocessed (sampling rate, missing values, outliers). 27 | 28 | - **Detection of events**: events are variations of total consumption which can be caused by the change of state of an appliance. Events can be detected by different signal processing algorithms. 29 | 30 | - **Clustering of events**: events detected are clustered, e.g. we try to group together events which are likely to come from the same appliance's change of state. Unsupervised machine learning algorithms are employed there. 31 | 32 | - **Modeling of appliances**: with the clusters obtained and time serie analysis, appliance models are built. An appliance model regroups clusters. Each cluster representing a change of state of the appliance. Unsupervised machine learning algorithms can also being employed there. 33 | 34 | - **Tracking of appliance's consumptions**: Once appliance models built, it is possible to track the behaviour of each appliance to compute the consumption of this appliance at each time. 35 | 36 | The total consumption is therefore breakdown into different appliances. These appliances are not labelled. The ultimate part consits of using a supervised machine learning algorithm to label each appliance after disaggregation (not implemented here). 37 | 38 | References 39 | ========== 40 | 41 | [1] Hart, G. W. "Prototype nonintrusive appliance load monitor." (1985). 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /NILM/user.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import pandas as pd 4 | from meter import Meter 5 | 6 | 7 | class User(object): 8 | """ Represent a user. 9 | 10 | Each user is composed by a user metadata and a list of NILM.Meter. 11 | NILM.User are useful to store a dataset with severals meters. 12 | 13 | Parameters 14 | ---------- 15 | hdf_filename: str 16 | Name (with relative or absolute path) of the HDFS file "*.h5" where 17 | the user is store. To create such files see NILM.converter module. 18 | 19 | ID: str, optional 20 | Name of the user. It is the name of the hdf_filename by default. 21 | 22 | 23 | Attributes 24 | ---------- 25 | ID: str 26 | Name of the user. It is the name of the hdf_filename by default. 27 | 28 | metadata: dict of str 29 | Metadata stored in the HDFS file. It has informations on the user, 30 | the meters belonging to the user: meters ID, measurements of the meters 31 | etc ... 32 | 33 | filename: str 34 | Name of the file (with path) where the user is stored. 35 | 36 | meters_ID: list of str 37 | List with the meters_ID of the NILM.Meter belonging to the user. 38 | 39 | meters: list of NILM.Meter 40 | List containing the NILM.Meter objects. 41 | """ 42 | 43 | def __init__(self, hdf_filename, ID=None): 44 | assert os.path.isfile(hdf_filename) 45 | with pd.get_store(hdf_filename) as store: 46 | metadata = store.root._v_attrs.metadata 47 | 48 | if ID is None: 49 | self.ID = hdf_filename.split('/')[-1] 50 | else: 51 | self.ID = ID 52 | self.metadata = metadata 53 | self.filename = hdf_filename 54 | 55 | meters_ID = [] 56 | for meter_ID in metadata['meters'].keys(): 57 | meters_ID.append(meter_ID) 58 | self.meters_ID = meters_ID 59 | 60 | meters = [] 61 | for meter_ID in metadata['meters'].keys(): 62 | meter = Meter.from_user(self, meter_ID) 63 | meters.append(meter) 64 | self.meters = meters 65 | 66 | def __repr__(self): 67 | return str(self.ID) 68 | 69 | 70 | if __name__ == "__main__": 71 | hdf_filename = '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED/user_blued.h5' 72 | user = User(hdf_filename) 73 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import NILM as nilm 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | hdf_filename = '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED/user_blued.h5' 8 | user = nilm.User(hdf_filename) 9 | meter = user.meters[0] 10 | 11 | 12 | # meter.load_measurements? 13 | meter.load_measurements(sampling_period=1) 14 | phases = meter.phases 15 | for phase in phases: 16 | print 'phase :', phase 17 | meter.measurements[phase].plot() 18 | 19 | # meter.detect_events? 20 | # nilm.Events? 21 | # nilm.Events.detection_types 22 | meter.detect_events(detection_type='steady_states', edge_threshold=70, 23 | state_threshold=15) 24 | #meter.detect_events(detection_type='simple_edge') 25 | indexed_events = meter.events.set_index(['timestamps']) 26 | for phase in phases: 27 | print 'phase :', phase 28 | meter.measurements[phase].plot() 29 | indexed_events.P[indexed_events.phase == phase].plot(style='o') 30 | plt.show() 31 | 32 | 33 | # meter.cluster_events? 34 | # nilm.Clusters? 35 | # nilm.Clusters.clustering_types 36 | meter.cluster_events('DBSCAN', eps=100, min_samples=1) 37 | for phase in phases: 38 | ev = meter.events[meter.events.phase == phase] 39 | labels = ev.cluster.values 40 | n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) 41 | unique_labels = set(labels) 42 | colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) 43 | for k, col in zip(unique_labels, colors): 44 | class_member_mask = (labels == k) 45 | xy = ev[meter.power_types].values[class_member_mask] 46 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 47 | markeredgecolor='k', markersize=10) 48 | plt.show() 49 | 50 | 51 | 52 | # meter.model_appliances? 53 | # nilm.ApplianceModels? 54 | # nilm.ApplianceModels.association_two_states_types 55 | meter.model_appliances('simple', distance_threshold=80) 56 | 57 | # meter.track_consumptions? 58 | # nilm.ApplianceConsumptions 59 | # nilm.ApplianceConsumptions.tracking_types 60 | meter.track_consumptions('simple') 61 | 62 | # Plot appliances 63 | #for phase, appliance in meter.appliance_consumptions.columns: 64 | # print appliance 65 | # meter.measurements[phase][meter.power_types[0]].plot() 66 | # meter.appliance_consumptions[phase][appliance].plot(color='r') 67 | # plt.show() 68 | 69 | phases = meter.phases 70 | for phase in phases: 71 | print 'phase :', phase 72 | meter.measurements[phase][meter.power_types[0]].plot() 73 | meter.appliance_consumptions[phase].sum(axis=1).plot(color='r') 74 | plt.show() 75 | -------------------------------------------------------------------------------- /NILM/measurements.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Dec 18 17:36:44 2014 4 | 5 | @author: thibaut 6 | """ 7 | import pandas as pd 8 | import preprocessing 9 | 10 | 11 | class Measurements(pd.DataFrame): 12 | """ 13 | Measurements object inherits from pandas.DataFrame. The 14 | DataFrame is constructed by the method 'load_data'. 15 | 16 | 'load_data' method load the data referenced in the NILM.meter.Store object. 17 | The measurements are resampled with the sampling function implemented 18 | in preprocessing. Missing measurements are not handled. 19 | The result is a pandas.DataFrame: 20 | index: timestamp of the measurement. 21 | attributes: 22 | timestamp: timestamp of when the event was detect. 23 | ['A', 'B', ...]: phase where measurements is done 24 | (for all phases available) 25 | ['P', 'Q']: value of each measured power (for all powers 26 | available). 27 | 28 | TODO: Functions to detect/handle missing measurements. 29 | 30 | Parameters 31 | ---------- 32 | sampling_period: int of float 33 | Elapse time between two measurements in second. 34 | 35 | Attributes 36 | ---------- 37 | sampling_period: float 38 | Elapse time between two measurements in second. 39 | """ 40 | 41 | def __init__(self, sampling_period): 42 | super(Measurements, self).__init__() 43 | self.sampling_period = float(sampling_period) 44 | 45 | def load_data(self, meter): 46 | """ 47 | 'load_data' method load the data referenced in the NILM.meter.Store 48 | object. 49 | The measurements are resampled with the sampling function implemented 50 | in preprocessing. Missing measurements are not handled. 51 | The result is a pandas.DataFrame: 52 | index: timestamp of the measurement. 53 | attributes: 54 | timestamp: timestamp of when the event was detect. 55 | ['A', 'B', ...]: phase where measurements is done 56 | (for all phases available) 57 | ['P', 'Q']: value of each measured power (for all powers 58 | available). 59 | 60 | TODO: Functions to detect/handle missing measurements. 61 | 62 | Parameters 63 | ---------- 64 | meter: NILM.Meter 65 | Meter object which has a Store which references a metering data. 66 | """ 67 | sampling_period = self.sampling_period 68 | 69 | hdf_filename = meter.store.filename 70 | key = meter.store.key 71 | 72 | with pd.get_store(hdf_filename) as store: 73 | df = store[key] 74 | 75 | if sampling_period is not None: 76 | df = preprocessing.resample(df, sampling_period) 77 | 78 | df = df.sort_index() 79 | df.index.name = 'timestamps' 80 | super(Measurements, self).__init__(df) 81 | -------------------------------------------------------------------------------- /NILM/converter/convert_dataframe_to_meter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Jan 21 11:13:13 2015 4 | 5 | @author: thibaut 6 | """ 7 | 8 | import pandas as pd 9 | import pprint 10 | import os 11 | 12 | 13 | def dataframe_to_meter(df, hdf_filename): 14 | """Converts a pandas.Dataframe into a meter HDFS file. 15 | 16 | The DataFrame needs to have a well determined structure. See Parameters 17 | The meter is store into a HDFS file so-called meter HDFS file which can 18 | be used to create a NILM.Meter object. 19 | 20 | Parameters 21 | ---------- 22 | df: pandas.DataFrame, (n_timestamps, n_power*n_phases) 23 | index: pandas.DataTimeIndex, 24 | All timestamps of measurements, timezone aware. 25 | columns: pandas.MultiIndex 26 | The columns needs to have two levels. 27 | level 0: Phases measured by the meters 28 | level 1: Powers measured by the meters 29 | values: float 30 | Value for the power and phase considered at the timestamp. 31 | 32 | hdf_filename: str 33 | path+name of the file to store the meter HDFS file. 34 | """ 35 | 36 | if os.path.exists(hdf_filename): 37 | os.remove(hdf_filename) 38 | 39 | try: 40 | assert isinstance(df.index, pd.DatetimeIndex) 41 | except AssertionError: 42 | raise AssertionError('Convertion: the dataframe index needs to be \ 43 | timestamps') 44 | 45 | # Check that there is two levels for columns: 46 | try: 47 | assert len(df.columns.levels) == 2 48 | except AssertionError: 49 | raise AssertionError('Convertion: the dataframe columns needs to have \ 50 | two levels: phases and power_types') 51 | 52 | phases = list(df.columns.levels[0]) 53 | print 'Convertion: the phases are:', phases 54 | 55 | power_types = list(df.columns.levels[1]) 56 | print 'Convertion: the power types measured are:', power_types 57 | 58 | for phase in phases: 59 | try: 60 | assert (list(df[phase].columns) == power_types) 61 | except AssertionError: 62 | raise AssertionError('Convertion: the phase {:s} of does not have \ 63 | all the power types'.format(phase)) 64 | 65 | measurements = {} 66 | measurements['phases'] = phases 67 | measurements['power_types'] = power_types 68 | 69 | timestamps = {} 70 | timestamps['tz'] = str(df.index.tz) 71 | timestamps['start'] = str(pd.Timestamp(df.index[0])) 72 | timestamps['end'] = str(pd.Timestamp(df.index[-1])) 73 | timestamps['duration_hours'] = str((pd.Timestamp(df.index[-1]) - 74 | pd.Timestamp(df.index[0])) 75 | .seconds//3600.) 76 | 77 | metadata = {} 78 | metadata['measurements'] = measurements 79 | metadata['timestamps'] = timestamps 80 | print "Convertion: meatadata stored is:" 81 | pprint.PrettyPrinter(0).pprint(metadata) 82 | 83 | with pd.get_store(hdf_filename) as store: 84 | store['measurements'] = df 85 | store.root._v_attrs.metadata = metadata 86 | print 'Convertion: dataframe stored as meter in {:s}!'.format(hdf_filename) 87 | 88 | if __name__ == '__main__': 89 | hdf_filename = 'meter_blued.h5' 90 | dataframe_to_meter(df, hdf_filename) 91 | -------------------------------------------------------------------------------- /NILM/modeling/association_two_states.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 26 12:30:24 2015 4 | 5 | @author: thibaut 6 | 7 | Algorithms which associate clusters to model two states appliances 8 | """ 9 | import numpy as np 10 | from sklearn.metrics.pairwise import pairwise_distances 11 | from sklearn.cluster import AffinityPropagation, DBSCAN 12 | 13 | 14 | def euclidian_cluster_metric(x1, x2): 15 | """Distance to measure the similarity between two clusters 16 | with OPPOSITE SIGN (to detect 'on' and 'off') 17 | """ 18 | assert x1.shape == x2.shape 19 | return np.linalg.norm(x1 + x2) 20 | 21 | 22 | def simple_association_two_states(X, distance_threshold, metric): 23 | """Associate clusters 2 by 2. 24 | 25 | Compute the distance between clusters of different signs. 26 | Associate the cluster 2 by 2 beginning by the clusters the nearest 27 | (with opposite sign). Continues until the distance (with opposite sign) 28 | between the nearest clusters are more than the threshold. 29 | 30 | Parameters 31 | ---------- 32 | X: np.array of float (n_clusters, n_powers) 33 | matrix containg the different mean powers of the clusters 34 | (positions of centroids of clusters) 35 | 36 | distance_threshold: float 37 | maximum distance between clusters tollerated to associate two clusters 38 | 39 | metric: function 40 | metric used to measure the distance between two clusters. If clusters 41 | have same means with opposite signs the distance should be near 0. 42 | 43 | Returns 44 | ------- 45 | appliances: numpy.array of int (n_clusters, ) 46 | array containg the appliance choosen for each cluster. 47 | """ 48 | # Construct the distance matrix D 49 | D = pairwise_distances(X, metric=metric) 50 | 51 | # Initialization 52 | d = 0 # Distance between two clusters 53 | n_appli = 0 54 | appliances = -1*np.ones_like(D[0]) # appliances set at -1 55 | 56 | while d < distance_threshold: 57 | 58 | # Find the new min > d 59 | D_masked = D[D > d] 60 | d = D_masked.min() # Update d 61 | 62 | # Find coordinates of min 63 | xx, yy = np.indices(D.shape) 64 | xxx = xx[D == d] # array of x 65 | yyy = yy[D == d] # array of y 66 | 67 | for (x, y) in zip(xxx, yyy): 68 | if (appliances[x] == -1) & (appliances[y] == -1): 69 | # if x and y doesn't have appliance, set them at n_appli 70 | appliances[x] = n_appli 71 | appliances[y] = n_appli 72 | n_appli += 1 73 | 74 | return appliances 75 | 76 | 77 | def dbscan_association_two_states(X, metric, **dbscan_parameters): 78 | """Associate clusters by density 79 | 80 | Proceed to DBSCAN algorith on pairwise distance matrix betwwen clusters. 81 | This matrix is computed with the metric given. 82 | Therefore more than 2 clusters can defined an appliance if they 83 | are very close. 84 | 85 | Parameters 86 | ---------- 87 | X: np.array of float (n_clusters, n_powers) 88 | matrix containg the different mean powers of the clusters 89 | (positions of centroids of clusters) 90 | 91 | metric: function 92 | metric used to measure the distance between two clusters. If clusters 93 | have same means with opposite signs the distance should be near 0. 94 | 95 | dbscan_parameters: dict, optional 96 | Arguments to pass to the sklearn dbscan function. 97 | 98 | Returns 99 | ------- 100 | appliances: numpy.array of int (n_clusters, ) 101 | array containg the appliance choosen for each cluster. 102 | """ 103 | # Construct the distance matrix D 104 | D = pairwise_distances(X, metric) 105 | # Compute a DBSCAN clustering for a distance matrix 106 | model = DBSCAN(metric='precomputed', **dbscan_parameters) 107 | appliances = model.fit_predict(D) 108 | return appliances 109 | 110 | 111 | def affinity_propagation_association_two_states(X, **dbscan_parameters): 112 | pass 113 | -------------------------------------------------------------------------------- /NILM/events.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import detection 3 | 4 | 5 | class Events(pd.DataFrame): 6 | 7 | """ 8 | This class inherits from pandas.DataFrame. The 9 | DataFrame is constructed by the method 'detection'. 10 | 11 | Detection method uses event detection methods to detect 12 | events in the measurments. The result is a DataFrame: 13 | index: ID of the event 14 | attributes: 15 | timestamp: timestamp of when the event was detect. 16 | phase: phase where the event is detected. 17 | ['P', 'Q']: value of the transition for each power (if available). 18 | The method used to detect events is choosed in the '__init__' of 19 | Events object. This needs to be one of the functions implemented in the 20 | submodule 'detection'. 21 | 22 | Parameters 23 | ---------- 24 | detection_type: string 25 | Name of a detection function. This function will be used to detect 26 | events. Needs to be one of the keys of the dictionnary 27 | 'detection_types'. 28 | 29 | detection_parameters: dict (optional) 30 | Arguments to be passed as argument of the function which will be used 31 | to detect the events. Arguments not informed will take the default 32 | value defined in the dictionnary 'detection_types'. 33 | 34 | Attributes 35 | ---------- 36 | detection_types: dict, (class variable) 37 | Dictionnary wich lists all the functions to detect 38 | events which are implementend in the submodule 'detection'. 39 | Keys: str, 40 | Name of the detection function implemented. 41 | Values: dict, 42 | 'model': detection function from 'detection' submodule. 43 | 'parameters': dictionary of default parameters of the 44 | detection function. 45 | NOTE: When a new detection function is implemented in 'detection' 46 | submodule, the function and default parameters need to be entered into 47 | this dict. 48 | 49 | detection_type: str 50 | Name of the detection function used to cluster events. 51 | Needs to belong to be one key of the dictionnary 'detection_types'. 52 | 53 | detection_model: function 54 | Function which will be use to cluster events. This function is 55 | implemented in the submodule 'detection'. 56 | 57 | detection_parameters: dict 58 | Arguments passed to the detection_model function. Its the dict 59 | 'detection_parameters' passed into the '__init__' function completed 60 | by default parameters (if not informed by detection_parameters) 61 | """ 62 | 63 | detection_types = detection.__all__ 64 | 65 | def __init__(self, detection_type, **detection_parameters): 66 | super(Events, self).__init__() 67 | # Check name of method for association is valid 68 | assert detection_type in Events.detection_types 69 | 70 | # define model and default parameters from the dict 71 | detection_dict = Events.detection_types[detection_type] 72 | model = detection_dict['model'] 73 | parameters = detection_dict['parameters'] 74 | # Add the parameters from **parameters in the dict 75 | for k, v in detection_parameters.iteritems(): 76 | parameters[k] = v 77 | 78 | self.detection_type = detection_type 79 | self.detection_model = model 80 | self.detection_parameters = parameters 81 | 82 | def detection(self, meter): 83 | """ 84 | Detection method uses event detection methods to detect 85 | events in the measurements. The result is a DataFrame: 86 | index: ID of the event 87 | attributes: 88 | timestamp: timestamp of when the event was detect. 89 | phase: phase where the event is detected. 90 | ['P', 'Q']: values of the transition for each power 91 | (if available). 92 | The method used to detect events is choosed in the '__init__' of 93 | Events object. This needs to be one of the functions implemented in the 94 | submodule 'detection'. 95 | 96 | Parameters 97 | ---------- 98 | meter: NILM.Meter 99 | Meter where the measurements are already loaded. 100 | 101 | References 102 | ---------- 103 | Hart, G. W. "Prototype nonintrusive appliance load monitor." (1985). 104 | """ 105 | 106 | phases = meter.phases 107 | model = self.detection_model 108 | parameters = self.detection_parameters 109 | df = pd.DataFrame() 110 | for phase in phases: 111 | measurements = meter.measurements[phase] 112 | dff = model(measurements, **parameters) 113 | dff['phase'] = phase 114 | df = df.append(dff) 115 | df = df.reset_index() 116 | super(Events, self).__init__(df) 117 | -------------------------------------------------------------------------------- /NILM/tracking/simple_tracking.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 26 18:46:22 2015 4 | 5 | @author: thibaut 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | 12 | def simple_tracking(timestamps, time_events, transitions, power_events): 13 | """Compute the consumption of one appliance for all timestamps 14 | 15 | Use a simple tracking for two-states appliances. Only events 'on' 16 | matched with an event 'off' are considered. Events mismatched are just 17 | labelled as mismatched. 18 | 19 | Parameters 20 | ---------- 21 | timestamps: pandas.DateTimeIndex (n_measures, ) 22 | timestamps given by the meter's measurements 23 | 24 | time_events: pandas.DateTimeIndex (n_events, ) 25 | timestamps where all the events of the appliance considered occur. 26 | 27 | transitions: numpy.array of int (n_events, ) 28 | class of transitions (-1 for 'off', 1 for 'on' for 2 states appliances) 29 | 30 | power_events: numpy.array of float (n_events, ) 31 | power value of events (only main power now) 32 | 33 | Returns 34 | ------- 35 | consumptions: np.array of float (n_measures, ) 36 | consumption of the appliance for each timestamps of the meter 37 | 38 | event_matched: np.array of bools (n_events, ) 39 | True if the corresponding event was match with another event, False 40 | otherwise. 41 | """ 42 | appliance_transitions, event_matched = deleting_anomalies(transitions) 43 | # Initialization of dataframe with the appliance state and consumption 44 | df = pd.DataFrame(index=timestamps) 45 | appliance_state = np.zeros(len(timestamps)) 46 | consumption = np.zeros(len(timestamps)) 47 | df['state'] = appliance_state 48 | df['consumption'] = consumption 49 | 50 | time_on = timestamps[0] 51 | state_consumption = consumption[0] 52 | for transition, time, value in zip(appliance_transitions, time_events, 53 | power_events): 54 | if transition == 1: 55 | time_on = time # We store the 'on' time 56 | # The consumption of the state is defined as the 'on' transition 57 | state_consumption = value 58 | if transition == -1: 59 | time_off = time 60 | # We select all the timestamps between time_on and time_off 61 | cond = (timestamps >= time_on) & (timestamps < time_off) 62 | # We set to 1 the state for the selected timestamps 63 | np.place(appliance_state, cond, 1) 64 | # We set to state_consumption the consumption for the selected ... 65 | # timestamps. 66 | np.place(consumption, cond, state_consumption) 67 | return consumption, event_matched 68 | 69 | 70 | def deleting_anomalies(transitions): 71 | """Change transitions to delete anomalies 72 | 73 | Delete anomalies two-states appliances. Deletes transitons 'on' which 74 | are not followed by transitions 'off'. A new transition array is construct 75 | Transitions non matched are labelled as mismatched. 76 | 77 | Parameters 78 | ---------- 79 | transitions: numpy.array of int (n_events, ) 80 | class of transitions (-1 for 'off', 1 for 'on' for 2 states appliances) 81 | with anomalies: ('on','on') for example. 82 | 83 | Returns 84 | ------- 85 | appliances_transitions: numpy.array of int (n_events, ) 86 | class of transitions (-1 for 'off', 1 for 'on' for 2 states appliances) 87 | without anomalies. Non matched transitions have label 0. 88 | 89 | transition_matched: np.array of bools (n_events, ) 90 | True if the corresponding transition was match with another 91 | transition, False otherwise. 92 | """ 93 | # Initilization, we consider that the previous transition ... 94 | # was a off, and therefore the appliance is in state off 95 | previous_transition = -1 96 | index_previous = None 97 | 98 | transition_matched = np.full_like(transitions, False, dtype=bool) 99 | appliance_transitions = np.zeros_like(transitions) 100 | 101 | for index, transition in enumerate(transitions): 102 | # if transition is a 'turning on' we do nothing 103 | if transition == 1: 104 | pass 105 | 106 | # if transition is a 'turning off' ... 107 | elif transition == -1: 108 | if previous_transition == 1: 109 | # and if the previous one is a 'turning on' 110 | # We can match the two transitions! 111 | transition_matched[index_previous] = True 112 | appliance_transitions[index_previous] = 1 113 | transition_matched[index] = True 114 | appliance_transitions[index] = -1 115 | if previous_transition == -1: 116 | pass 117 | 118 | # Update 119 | previous_transition = transition 120 | index_previous = index 121 | return appliance_transitions, transition_matched 122 | -------------------------------------------------------------------------------- /NILM/clusters.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import clustering 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | class Clusters(pd.DataFrame): 8 | """ 9 | This class inherits from pandas.DataFrame. The 10 | DataFrame is constructed by the method 'clustering'. 11 | 12 | Clustering method uses unsupervised learning to clusters 13 | events. The result is a DataFrame. The index is the 14 | ID of the cluster. The attributes of each cluster are 15 | the phase, the average powers, the number of events. 16 | The method used to cluster is choosed in the __init__ of Clusters. 17 | It needs to be one of the functions implemented in submodule 18 | clustering. 19 | 20 | Parameters 21 | ---------- 22 | clustering_type: string 23 | Name of a clustering function. This function will be used to cluster 24 | the events. Needs to be one of the keys of the dictionnary 25 | 'clustering_types'. 26 | 27 | clustering_parameters: dict (optional) 28 | Arguments to be passed as argument of the function which will be used 29 | to cluster the events. Arguments not informed will take the default 30 | value defined in the dictionnary 'clustering_types'. 31 | 32 | Attributes 33 | ---------- 34 | clustering_types: dict, (class variable) 35 | Dictionnary wich lists all the functions to cluster 36 | events which are implementend in the submodule 'clustering'. 37 | Keys: str, 38 | Name of the clustering function implemented. 39 | Values: dict, 40 | 'model': clustering function from 'clustering' submodule. 41 | 'parameters': dictionary of default parameters of the 42 | clustering function. 43 | NOTE: When a new clustering function is implemented in 'clustering' 44 | submodule, the function and default parameters need to be entered into 45 | this dict. 46 | 47 | clustering_type: str 48 | Name of the clustering function used to cluster events. 49 | Needs to belong to be one key of the dictionnary 'clustering_types'. 50 | 51 | clustering_model: function 52 | Function which will be use to cluster events. This function is 53 | implemented in the submodule 'clustering'. 54 | 55 | clustering_parameters: dict 56 | Arguments passed to the clustering_model function. Its the dict 57 | 'clustering_parameters' passed into the '__init__' function completed 58 | by default parameters (if not informed by clustering_parameters) 59 | """ 60 | 61 | clustering_types = clustering.__all__ 62 | 63 | def __init__(self, clustering_type, **clustering_parameters): 64 | super(Clusters, self).__init__() 65 | # Check name of method for association is valid 66 | assert clustering_type in Clusters.clustering_types 67 | # define model and default parameters from the dict 68 | clustering_dict = Clusters.clustering_types[clustering_type] 69 | model = clustering_dict['model'] 70 | parameters = clustering_dict['parameters'] 71 | # Add the parameters from **parameters in the dict 72 | for k, v in clustering_parameters.iteritems(): 73 | parameters[k] = v 74 | 75 | self.clustering_type = clustering_type 76 | self.clustering_model = model 77 | self.clustering_parameters = parameters 78 | 79 | def clustering(self, meter, features=None): 80 | """ 81 | Clustering method uses unsupervised learning to clusters 82 | events. The result is a DataFrame. The index is the 83 | ID of the cluster. The attributes of each cluster are 84 | the phase, the average powers, the number of events. 85 | The method used to cluster is choosed in the '__init__' of Clusters 86 | by the parameter 'clustering_type'. 87 | It needs to be one of the functions implemented in submodule 88 | 'clustering'. 89 | 90 | Parameters 91 | ---------- 92 | meter: NILM.Meter 93 | Meter where the events are already detected. 94 | """ 95 | 96 | phases = meter.phases 97 | clustering_model = self.clustering_model 98 | parameters = self.clustering_parameters 99 | n_events = len(meter.events.index) 100 | if features is None: 101 | features = meter.power_types 102 | else: 103 | # Check that features are in powers measured by meter 104 | assert np.in1d(features, meter.power_types).all() 105 | 106 | n_labels = 0 107 | labels_arr = -10*np.ones(n_events) 108 | 109 | for phase in phases: 110 | mask = (meter.events.phase == phase).values 111 | X = meter.events[features][mask].values 112 | model = clustering_model(**parameters) 113 | model.fit(X) 114 | # Different labels for each phase, but -1 when not clustered 115 | labels = model.labels_ + n_labels 116 | # but label -1 when not clustered 117 | np.place(labels, labels == n_labels-1, -1) 118 | labels_arr[mask] = labels 119 | n_labels = n_labels + model.labels_.max() + 1 120 | meter.events['cluster'] = labels_arr 121 | 122 | self.phases_ = meter.phases 123 | self.power_types_ = meter.power_types 124 | 125 | df = meter.events.groupby(['phase', 'cluster']).mean() 126 | serie_count = meter.events.groupby(['phase', 'cluster']).count() 127 | df['n_events'] = serie_count[meter.power_types[0]].values 128 | df = df.reset_index() 129 | super(Clusters, self).__init__(df) 130 | 131 | def plot_clusters_2D(self, meter): 132 | phases = meter.phases 133 | power_types = meter.power_types 134 | assert len(power_types) == 2 135 | for phase in phases: 136 | mask = (meter.events.phase == phase) 137 | X = meter.events[power_types][mask].values 138 | labels = meter.events['cluster'][mask].values 139 | unique_labels = set(labels) 140 | colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) 141 | for k, col in zip(unique_labels, colors): 142 | if k == -1: 143 | # Black used for noise. 144 | col = 'k' 145 | class_member_mask = (labels == k) 146 | xy = X[class_member_mask] 147 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col) 148 | plt.show() 149 | -------------------------------------------------------------------------------- /NILM/appliance_consumptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 19 09:30:35 2015 4 | 5 | @author: thibaut 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import tracking 10 | 11 | 12 | class ApplianceConsumptions(pd.DataFrame): 13 | """ 14 | This class inherits from a pandas.DataFrame. 15 | 16 | The main method is the tracking of appliances. The tracking 17 | of appliances method take as input a meter 18 | (where the appliance models are already constructed). It 19 | will track the behaviour appliances defined in ApplianceModels 20 | class. The tracking construct a pd.dataFrame where the columns 21 | are the appliance detected and the rows consumption tracked for each 22 | timestamps. This method can diffent tracking functions. These functions 23 | are implemented in the submodule 'tracking'. The function used is choosed 24 | in the __init__ of ApplianceConsumptions. 25 | 26 | The pandas.DataFrame constructed allows therefore to know all the 27 | consumptions of all appliances detected. 28 | 29 | Parameters 30 | ---------- 31 | tracking_type:: string 32 | Name of a function which track appliances consumptions of available 33 | appliances. This function will be used to compute the appliance 34 | mconsumptions. Needs to be one of the keys of the dictionnary 35 | 'tracking_types' of NILM.ApplianceConsumptions object. 36 | 37 | tracking_parameters: dict (optional) 38 | Parameters to be passed as arguments of the function which will be 39 | used to track the appliances consumptions. Arguments not informed 40 | will take the default value defined in the dictionnary 41 | 'tracking_types' of NILM.Consumptions object. 42 | 43 | Attributes 44 | ---------- 45 | tracking_types: dict, (class variable) 46 | Dictionnary with all the tracking methods implemented. The keys are 47 | the name of the tracking methods implemented. The values are 48 | dictionnary with two keys: 'model' and 'parameters'. The value 49 | associated to 'model' is a function for tracking. This function 50 | is implemented into the submodel 'tracking'. The values associated 51 | to 'parameters' is a dictionnary name:value of default parameters of 52 | the function in 'model'. NOTE: When a new tracking function is 53 | implemented in 'tracking' submodule, the function and default 54 | parameters need to be entered into this dict. 55 | 56 | tracking_type: string 57 | Name of the tracking model used. Needs to belong to be one 58 | key of the dictionnary tracking_types. 59 | 60 | tracking_model: function 61 | Function used to do the tracking. Function are in the submodule 62 | 'tracking'. 63 | 64 | References 65 | ---------- 66 | Hart, G. W. "Prototype nonintrusive appliance load monitor." (1985). 67 | """ 68 | 69 | tracking_types = tracking.__all__ 70 | 71 | 72 | def __init__(self, tracking_type, **tracking_parameters): 73 | super(ApplianceConsumptions, self).__init__() 74 | 75 | # Check name of method for tracking is valid 76 | assert tracking_type in ApplianceConsumptions.tracking_types 77 | # define model and default parameters from the dict 78 | tracking_dict = ApplianceConsumptions.tracking_types[tracking_type] 79 | model = tracking_dict['model'] 80 | parameters = tracking_dict['parameters'] 81 | # Add the parameters from **parameters in the dict 82 | for k, v in tracking_parameters.iteritems(): 83 | parameters[k] = v 84 | 85 | self.tracking_type = tracking_type 86 | self.tracking_model = model 87 | self.tracking_parameters = parameters 88 | 89 | def tracking(self, meter): 90 | """ 91 | The tracking of appliances method take as input a 92 | meter (where the appliance models are already constructed). It 93 | will track the behaviour appliances defined in ApplianceModels 94 | class. The tracking construct a pd.dataFrame where the columns 95 | are the appliance detected and the rows consumption tracked for each 96 | timestamps. This method can diffent tracking functions. These functions 97 | are implemented in the submodule 'tracking'. The function used is 98 | choosen in the __init__ of ApplianceConsumptions. 99 | 100 | The pandas.DataFrame constructed allows therefore to know all the 101 | consumptions of all appliances detected. 102 | 103 | Parameters 104 | ---------- 105 | meter: NILM.Meter 106 | Meter where the appliance models are already built. 107 | """ 108 | 109 | tracking_model = self.tracking_model 110 | tracking_parameters = self.tracking_parameters 111 | 112 | phases = meter.phases 113 | powers = meter.power_types 114 | P = powers[0] 115 | 116 | # Check if appliance models are built 117 | meter.appliance_models 118 | 119 | clusters = meter.clusters 120 | events = meter.events 121 | 122 | df = pd.merge(events, clusters[['phase', 'cluster', 'appliance', 123 | 'transition']], on=['phase', 'cluster']) 124 | 125 | df = df.sort_index(by=['phase', 'appliance', 'timestamps']) 126 | 127 | event_matched_arr = np.zeros(len(df.index), dtype='bool') 128 | 129 | timestamps = meter.measurements.index 130 | 131 | consumptions = pd.DataFrame(index=timestamps) 132 | 133 | for phase in phases: 134 | mask = (df['phase'] == phase) 135 | appliances = set(df[mask]['appliance'].tolist()) 136 | appliances = [app for app in appliances if app >= 0] 137 | for appliance in appliances: 138 | # Extract transitions, values of transitions ... 139 | mask = (df['phase'] == phase) & (df['appliance'] == appliance) 140 | mask = mask.values 141 | transitions = df[mask]['transition'].values 142 | power_events = df[mask][P].values 143 | time_events = df[mask]['timestamps'].values 144 | 145 | conso_appliance, event_matched = \ 146 | tracking_model(timestamps, time_events, transitions, 147 | power_events, **tracking_parameters) 148 | name = 'appliance_' + str(int(appliance)) 149 | cols = (phase, name) 150 | consumptions[cols] = conso_appliance 151 | event_matched_arr[mask] = event_matched 152 | consumptions.columns = pd.MultiIndex.from_tuples(consumptions.columns) 153 | df['event_matched'] = event_matched_arr 154 | 155 | # We sort df like events are sorted 156 | df = df.sort_index(by=['phase', 'timestamps']) 157 | meter.events['event_matched'] = df['event_matched'].values 158 | super(ApplianceConsumptions, self).__init__(consumptions) 159 | -------------------------------------------------------------------------------- /NILM/detection/steady_states.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | 6 | 7 | def steady_states(dataframe, state_threshold=15, 8 | edge_threshold=70): 9 | """ 10 | Finds steady states given a datafram of power 11 | 12 | Taken from nilmtk. Quote while using 13 | 14 | 15 | Parameters 16 | ---------- 17 | 18 | dataframe: pd.DataFrame with DateTimeIndex 19 | 20 | min_n_samples(int): number of samples to consider constituting a 21 | steady state. 22 | 23 | state_threshold: maximum difference between highest and lowest 24 | value in steady state. 25 | 26 | edge_threshold: the level used to define significant 27 | appliances, transitions below this level will be ignored. 28 | See Hart 1985. p27. 29 | 30 | 31 | Returns 32 | ------- 33 | transitions: pandas.Dataframe 34 | DataFrame containing the events 35 | index: DateTimeIndex, timestamps 36 | attributes: power types availabes 37 | 38 | References 39 | ---------- 40 | Hart, G. W. "Prototype nonintrusive appliance load monitor." (1985). 41 | """ 42 | 43 | 44 | # Tells whether we have both real and reactive power or only real power 45 | num_measurements = len(dataframe.columns) 46 | estimatedSteadyPower = np.array([0] * num_measurements) 47 | lastSteadyPower = np.array([0] * num_measurements) 48 | previousMeasurement = np.array([0] * num_measurements) 49 | 50 | # These flags store state of power 51 | 52 | instantaneousChange = False # power changing this second 53 | ongoingChange = False # power change in progress over multiple seconds 54 | 55 | index_transitions = [] # Indices to use in returned Dataframe 56 | index_steadystates = [] 57 | transitions = [] # holds information on transitions 58 | steadyStates = [] # steadyStates to store in returned Dataframe 59 | N = 0 # N stores the number of samples in state 60 | time = dataframe.iloc[0].name # first state starts at beginning 61 | 62 | # Iterate over the rows performing algorithm 63 | # print ("Finding Edges, please wait ...", end="\n") 64 | # sys.stdout.flush() 65 | 66 | for row in dataframe.itertuples(): 67 | 68 | # test if either active or reactive moved more than threshold 69 | # http://stackoverflow.com/questions/17418108/elegant-way-to-perform-tuple-arithmetic 70 | # http://stackoverflow.com/questions/13168943/expression-for-elements-greater-than-x-and-less-than-y-in-python-all-in-one-ret 71 | 72 | # Step 2: this does the threshold test and then we sum the boolean 73 | # array. 74 | thisMeasurement = row[1:] 75 | # logging.debug('The current measurement is: %s' % (thisMeasurement,)) 76 | # logging.debug('The previous measurement is: %s' % 77 | # (previousMeasurement,)) 78 | 79 | stateChange = np.fabs( 80 | np.subtract(thisMeasurement, previousMeasurement)) 81 | # logging.debug('The State Change is: %s' % (stateChange,)) 82 | 83 | if np.sum(stateChange > state_threshold): 84 | instantaneousChange = True 85 | else: 86 | instantaneousChange = False 87 | 88 | # Step 3: Identify if transition is just starting, if so, process it 89 | if (instantaneousChange and (not ongoingChange)): 90 | 91 | # Calculate transition size 92 | lastTransition = np.subtract(estimatedSteadyPower, lastSteadyPower) 93 | # logging.debug('The steady state transition is: %s' % 94 | # (lastTransition,)) 95 | 96 | # Sum Boolean array to verify if transition is above noise level 97 | if np.sum(np.fabs(lastTransition) > edge_threshold): 98 | if not time == dataframe.iloc[0].name: 99 | 100 | # 3A, C: if so add the index of the transition start and the 101 | # power information 102 | 103 | # Avoid outputting first transition from zero 104 | index_transitions.append(time) 105 | # logging.debug('The current row time is: %s' % (time)) 106 | transitions.append(lastTransition) 107 | 108 | # I think we want this, though not specifically in Hart's algo notes 109 | # We don't want to append a steady state if it's less than min samples in length. 110 | # if N > min_n_samples: 111 | index_steadystates.append(time) 112 | # logging.debug('The ''time'' stored is: %s' % (time)) 113 | # last states steady power 114 | steadyStates.append(estimatedSteadyPower) 115 | 116 | 117 | 118 | # 3B 119 | lastSteadyPower = estimatedSteadyPower 120 | # 3C 121 | time = row[0] 122 | 123 | # Step 4: if a new steady state is starting, zero counter 124 | if instantaneousChange: 125 | N = 0 126 | 127 | # Hart step 5: update our estimate for steady state's energy 128 | estimatedSteadyPower = np.divide( 129 | np.add(np.multiply(N, estimatedSteadyPower), 130 | thisMeasurement), (N + 1)) 131 | # logging.debug('The steady power estimate is: %s' % 132 | # (estimatedSteadyPower,)) 133 | # Step 6: increment counter 134 | N = N + 1 135 | 136 | # Step 7 137 | ongoingChange = instantaneousChange 138 | 139 | # Step 8 140 | previousMeasurement = thisMeasurement 141 | 142 | lastTransition = np.subtract(estimatedSteadyPower, lastSteadyPower) 143 | if np.sum(np.fabs(lastTransition) > edge_threshold): 144 | index_transitions.append(time) 145 | # logging.debug('The current row time is: %s' % (time)) 146 | transitions.append(lastTransition) 147 | 148 | # I think we want this, though not specifically in Hart's algo notes 149 | # We don't want to append a steady state if it's less than min samples in length. 150 | # if N > min_n_samples: 151 | index_steadystates.append(time) 152 | # logging.debug('The ''time'' stored is: %s' % (time)) 153 | # last states steady power 154 | steadyStates.append(estimatedSteadyPower) 155 | 156 | 157 | 158 | # print("Edge detection complete.") 159 | 160 | # print("Creating transition frame ...") 161 | # sys.stdout.flush() 162 | 163 | columns = dataframe.columns 164 | 165 | 166 | if len(index_transitions)==0: 167 | # No events 168 | return pd.DataFrame() 169 | else: 170 | transitions = pd.DataFrame(data=transitions, index=index_transitions, 171 | columns=columns) 172 | transitions.index.name = dataframe.index.name 173 | # print("Transition frame created.") 174 | 175 | # print("Creating states frame ...") 176 | # sys.stdout.flush() 177 | steadyStates = pd.DataFrame(data=steadyStates, index=index_steadystates, 178 | columns=columns) 179 | # print("States frame created.") 180 | # print("Finished.") 181 | return transitions -------------------------------------------------------------------------------- /NILM/appliance_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jan 15 09:38:02 2015 4 | 5 | @author: thibaut 6 | """ 7 | 8 | import pandas as pd 9 | import numpy as np 10 | import modeling 11 | 12 | 13 | class ApplianceModels(pd.DataFrame): 14 | 15 | """ 16 | This class inherits from pandas.DataFrame. 17 | 18 | The main method is the modeling of appliances. The modeling 19 | of appliances method take as input a meter 20 | (where the events are already clustered). It 21 | will model the appliances by grouping some clusters of events. 22 | The current algorithm allows only to model two-states-appliances. 23 | The clusters are defined in Clusters class. The modeling construct 24 | a pd.dataFrame where the columns are ['phase', 'appliance', 'transition']. 25 | Each row is a cluster. Therefore for each cluster an appliance number 26 | will be defined. This method can use diffent modeling functions. 27 | These functions are implemented in the submodule 'modeling'. 28 | The function used is choosed in the __init__ of ApplianceModels. 29 | 30 | The pandas.DataFrame constructed allows therefore to know at 31 | which appliance each cluster is associated. 32 | 33 | Parameters 34 | ---------- 35 | association_two_states_type: string 36 | Name of a function which model two-state appliances with available 37 | clusters. This function will be used to built tha appliance models. 38 | Needs to be one of the keys of the dictionnary 39 | 'association_two_states_types' of NILM.ApplianceModels object. 40 | 41 | association_two_states_parameters: dict (optional) 42 | Parameters to be passed as arguments of the function which will be 43 | used to model the two-states appliances. Arguments not informed 44 | will take the default value defined in the dictionnary 45 | 'association_two_states_types' of NILM.Clusters object. 46 | 47 | Attributes 48 | ---------- 49 | association_two_states_types: dict, (class variable) 50 | Dictionnary wich lists all the methods to create two-states appliances 51 | implemented. The keys are the name of the methods to create two-states 52 | appliances implemented. The values are dictionnary with two keys: 53 | 'model' and 'parameters'. The value associated to 'model' is a 54 | function for which associate clusters to create two-states appliances. 55 | This function is implemented into the submodule 'modeling 56 | The values associated to 'parameters' is a dictionnary nwhich map 57 | the names of the arguments of the model with the default 58 | value of theses parameters. NOTE: When a new tracking function is 59 | implemented in 'modeling' submodule, the function and default 60 | parameters need to be entered into this dict. 61 | 62 | tracking_type: string 63 | Name of the tracking model used. Needs to belong to be one 64 | key of the dictionnary tracking_types. 65 | 66 | model_2states: function 67 | Function used to do the association_two_states. Function are 68 | implemented in the submodule 'modleing'. 69 | 70 | parameters_2states: dict 71 | dict of parameters/value to be passed as argument of the 72 | function model_2states. 73 | 74 | References 75 | ---------- 76 | Hart, G. W. "Prototype nonintrusive appliance load monitor." (1985). 77 | """ 78 | 79 | association_two_states_types = modeling.__all__ 80 | 81 | 82 | def __init__(self, association_two_states_type, 83 | **association_two_sates_parameters): 84 | super(ApplianceModels, self).__init__() 85 | assert association_two_states_type in\ 86 | ApplianceModels.association_two_states_types 87 | association_two_states_dict = ApplianceModels.\ 88 | association_two_states_types[association_two_states_type] 89 | model_2states = association_two_states_dict['model'] 90 | parameters_2states = association_two_states_dict['parameters'] 91 | # Add the parameters from **parameters in the dict 92 | for k, v in association_two_sates_parameters.iteritems(): 93 | parameters_2states[k] = v 94 | 95 | self.model_2states = model_2states 96 | self.parameters_2states = parameters_2states 97 | 98 | def modeling(self, meter): 99 | """ 100 | The modeling of appliances method take as input a meter 101 | (where the events are already clustered). It 102 | will model the appliances by grouping some clusters of events. 103 | The current algorithm allows only to model two-states-appliances. 104 | The clusters are defined in Clusters class. The modeling construct 105 | a pd.dataFrame where the columns are: 106 | ['phase', 'appliance', 'transition']. 107 | Each row is a cluster. Therefore for each cluster an appliance number 108 | will be defined. 'transition' informed if it is an 'on' or a 'off'. 109 | This method can use diffent modeling functions. 110 | These functions are implemented in the submodule 'modeling'. 111 | The function used is choosed in the __init__ of ApplianceModels. 112 | 113 | Parameters 114 | ---------- 115 | meter: NILM.Meter 116 | Meter where the appliance models are already construct. 117 | """ 118 | 119 | # Check that the clustering on meter was done 120 | clusters = meter.clusters 121 | 122 | # Take the list of phases and powers measured by meter 123 | powers = meter.power_types 124 | phases = meter.phases 125 | 126 | # I.Modeling two states apliance 127 | 128 | # Initialization 129 | appliances = -10*np.ones(len(clusters.index)) 130 | transitions = np.zeros(len(clusters.index)) 131 | n_appliances = 0 132 | model_2states = self.model_2states 133 | parameters_2states = self.parameters_2states 134 | 135 | for phase in phases: 136 | # Select the clusters of this phase and with label != -1 137 | mask = ((clusters.phase == phase) & (clusters.cluster != -1)).values 138 | df = clusters[mask] 139 | # Select the powers 140 | X = df[powers].values 141 | 142 | # Associate the clusters to make appliances. 143 | # The model and the parameters used 144 | # are stored in the building_model object 145 | a = model_2states(X, **parameters_2states) 146 | a = np.where(a == -1, -1, a + n_appliances) 147 | n_appliances = max(a) + 1 148 | 149 | # Add if transition is 'on' or 'off' 150 | t = np.zeros_like(a) 151 | mask_2 = (a != -1) 152 | P = X[mask_2][:, 0] 153 | t[mask_2] = np.where(P > 0, 1, -1) 154 | appliances[mask] = a 155 | transitions[mask] = t 156 | 157 | # Add appliances label to meter.clusters 158 | meter.clusters['appliance'] = appliances 159 | meter.clusters['transition'] = transitions 160 | 161 | # Construct the pd.DataFrame Appliances Model 162 | df = meter.clusters.sort_index(by=['phase', 'appliance', 'transition']) 163 | df = df.set_index(['phase', 'appliance', 'transition']) 164 | df = df.reset_index() 165 | super(ApplianceModels, self).__init__(df) 166 | 167 | if __name__ == '__main__': 168 | am = ApplianceModels('simple') 169 | am.modeling(meter) 170 | -------------------------------------------------------------------------------- /NILM/converter/convert_blued_to_user.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pandas as pd 3 | import numpy as np 4 | import sys 5 | import scipy.io 6 | import dateutil.tz 7 | import datetime 8 | import os 9 | from os.path import isdir, isfile, join 10 | from dateutil.parser import parse 11 | import json 12 | 13 | 14 | metadata_BLUED = { 15 | "number_users": 1, 16 | "users": { 17 | 'user_blued': { 18 | "number_meters": 1, 19 | "meters": { 20 | 'meter_blued': { 21 | "number_datasets": 2, 22 | "measurements": {"phases": ['A', 'B'], 23 | "power_types": ['P', 'Q']}, 24 | "tz": "US/Eastern" 25 | } 26 | } 27 | } 28 | } 29 | } 30 | 31 | 32 | def blued_to_user(input_path, hdf_path): 33 | """ 34 | Convert the matlab files of Blued into a pd.DataFrame. The dataframe is 35 | stored in a HDFS file (one file by user). IMPORTANT: The matlab files needs 36 | to be unzipped before converted and stored in their originally folder. 37 | 38 | Parameters 39 | ---------- 40 | input_path: string 41 | The path where the "location001_dataset00x" folders are stored 42 | 43 | hdf_path: string 44 | The path of the folder where to store the HDFS files. 45 | """ 46 | assert isdir(input_path) 47 | assert isdir(hdf_path) 48 | 49 | _convert_data(input_path, hdf_path, metadata_BLUED) 50 | print("Convertion finished!") 51 | 52 | 53 | def _load_metadata_BLUED(): 54 | metadata_file = join(_give_path_script(), 'metadata', 'blued.json') 55 | assert isfile(metadata_file) 56 | with open(metadata_file) as fh: 57 | metadata_BLUED = json.load(fh) 58 | return metadata_BLUED 59 | 60 | 61 | def _give_path_script(): 62 | try: 63 | path = os.path.abspath(__file__) 64 | dir_path = os.path.dirname(path) 65 | except NameError: 66 | dir_path = os.getcwd() 67 | return dir_path 68 | 69 | 70 | def _convert_data(input_path, hdf_path, metadata_BLUED): 71 | for user in metadata_BLUED['users'].keys(): 72 | metadata_user = metadata_BLUED["users"][user] 73 | print("Loading", user, end=": ") 74 | sys.stdout.flush() 75 | _convert_user(input_path, hdf_path, metadata_user, user) 76 | 77 | 78 | def _convert_metadata_user(hdf_filename, metadata_user): 79 | with pd.get_store(hdf_filename) as store: 80 | store.root._v_attrs.metadata = metadata_user 81 | 82 | 83 | def _convert_user(input_path, hdf_path, metadata_user, user): 84 | hdf_filename = _make_hdf_file(user, hdf_path) 85 | if os.path.exists(hdf_filename): 86 | os.remove(hdf_filename) 87 | _convert_metadata_user(hdf_filename, metadata_user) 88 | for meter in metadata_user['meters'].keys(): 89 | metadata_meter = metadata_user["meters"][meter] 90 | print(meter, end="... ") 91 | sys.stdout.flush() 92 | _convert_meter(input_path, hdf_path, metadata_meter, meter, user) 93 | 94 | 95 | def _convert_meter(input_path, hdf_path, metadata_meter, meter, user): 96 | number_datasets = metadata_meter["number_datasets"] 97 | hdf_filename = _make_hdf_file(user, hdf_path) 98 | key_measurements = _make_key_measurements(meter) 99 | tz = metadata_meter["tz"] 100 | start = _find_start(meter, input_path, tz) 101 | with pd.get_store(hdf_filename) as store: 102 | for dataset in np.arange(1, number_datasets + 1): 103 | print(dataset, end=" ") 104 | sys.stdout.flush() 105 | df = _load_dataset(dataset, input_path, metadata_meter, start, 106 | meter) 107 | store.append(str(key_measurements), df, format='table') 108 | store.flush() 109 | print() 110 | 111 | 112 | def _load_dataset(dataset, input_path, metadata_meter, start, meter): 113 | measurements = metadata_meter["measurements"] 114 | phases = measurements['phases'] 115 | power_types = measurements['power_types'] 116 | path = _make_input_path_blued(meter, dataset, input_path) 117 | assert isdir(path) 118 | index = None 119 | data = None 120 | sub_files = _make_list_subfiles_blued(dataset) 121 | for sub_file in sub_files: 122 | measures, timestamps = _load_subfile(sub_file, dataset, 123 | meter, path) 124 | if index is None: 125 | data = measures 126 | index = timestamps 127 | else: 128 | data = np.concatenate((data, measures), axis=0) 129 | index = np.concatenate((index, timestamps), axis=0) 130 | tz = metadata_meter["tz"] 131 | index = _sec_since_start_to_Datetime(index, start, tz) 132 | cols = pd.MultiIndex.from_product([phases, power_types]) 133 | df = pd.DataFrame(data, columns=cols, index=index, dtype='float32') 134 | return df 135 | 136 | 137 | def _load_subfile(sub_file, dataset, meter, path): 138 | input_file = _make_input_file_blued(sub_file, dataset, meter, path) 139 | assert isfile(input_file) 140 | mat = scipy.io.loadmat(input_file) 141 | t = mat['data'][0][0][2] 142 | t = t.reshape(len(t)) 143 | tt = mat['data'][0][0][3].reshape(len(t)) 144 | Qa = mat['data'][0][0][4][0].reshape(len(t), 1) 145 | Qb = mat['data'][0][0][5][0].reshape(len(t), 1) 146 | Pa = mat['data'][0][0][6][0].reshape(len(t), 1) 147 | Pb = mat['data'][0][0][7][0].reshape(len(t), 1) 148 | measures = np.concatenate((Pa, Qa, Pb, Qb), axis=1) 149 | timestamps = tt 150 | return measures, timestamps 151 | 152 | 153 | def _sec_since_start_to_Datetime(index, start, tz): 154 | """ 155 | Compute the pd.DatetimeIndex in timezone tz. The index is 156 | the time elapsed since start. start is given in the timezone 157 | tz. 158 | """ 159 | zero = datetime.datetime(1970, 1, 1) 160 | zero = zero.replace(tzinfo=dateutil.tz.gettz('UTC')) 161 | start_int = (start-zero).total_seconds() 162 | index = index + start_int 163 | index = pd.to_datetime(index, unit='s', utc=True) 164 | index = index.tz_convert(tz) 165 | return index 166 | 167 | 168 | def _make_input_file_blued(sub_file, dataset, meter, path): 169 | meter_path = 'location_00{:d}'.format(1) 170 | filename = "_".join((meter_path, 171 | 'matlab_{:d}.mat'.format(sub_file))) 172 | filename = "/".join((path, filename)) 173 | return filename 174 | 175 | 176 | def _make_input_path_blued(meter, dataset, input_path): 177 | meter_path = 'location_00{:d}'.format(1) 178 | dataset_path = "_".join((meter_path, 'dataset_00{:d}'.format(dataset))) 179 | path = "/".join((input_path, dataset_path)) 180 | return path 181 | 182 | 183 | def _make_list_subfiles_blued(dataset): 184 | first_subfile = (dataset-1)*4 + 1 185 | last_subfile = first_subfile + 4 186 | return range(first_subfile, last_subfile) 187 | 188 | 189 | def _make_hdf_file(user, hdf_path): 190 | filename = "{:s}.h5".format(user) 191 | hdf_filename = "/".join((hdf_path, filename)) 192 | return hdf_filename 193 | 194 | 195 | def _make_key_measurements(meter): 196 | return "/{:s}/measurements".format(meter) 197 | 198 | 199 | def _find_start(meter, input_path, tz): 200 | name_dir = "_".join(('/location_00{:d}'.format(1), 'dataset_001/')) 201 | start_end_path = "".join((input_path, name_dir)) 202 | start_end_file = "".join((start_end_path, 'start_end.txt')) 203 | with open(start_end_file) as f: 204 | l = [line.strip() for line in f] 205 | start_date = l[1].split(",")[1] 206 | start_time = l[2].split(",")[1] 207 | start = parse(start_date+' '+start_time) 208 | start = start.replace(tzinfo=dateutil.tz.gettz(tz)) 209 | return start 210 | 211 | 212 | if __name__ == "__main__": 213 | hdf_path = '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED' 214 | input_path = '/Volumes/Stockage/DATA/DATA_BLUED/RAW' 215 | blued_to_user(input_path, hdf_path) 216 | hdf_filename = _make_hdf_file("user_blued", hdf_path) 217 | print(hdf_filename) 218 | key = _make_key_measurements("meter_blued") 219 | with pd.get_store(hdf_filename) as store: 220 | df = store[key] 221 | metadata_dict = store.root._v_attrs.metadata 222 | import matplotlib.pyplot as plt 223 | plt.plot(df.index) 224 | -------------------------------------------------------------------------------- /NILM/meter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pandas as pd 3 | import os 4 | 5 | import converter 6 | from measurements import Measurements 7 | from events import Events 8 | from clusters import Clusters 9 | from appliance_models import ApplianceModels 10 | from appliance_consumptions import ApplianceConsumptions 11 | 12 | 13 | class Store(object): 14 | """ Reference where the data of a meter is stored. 15 | 16 | Parameters 17 | ---------- 18 | filename: str 19 | Path + filename of the HDFS file where the data is stored 20 | It can be a user or a meter HDFS file. For more informations 21 | on meter HDFS file, see NILM.converter.dataframe_to_meter function. 22 | 23 | key: str 24 | Key used to index the data of the meter inside the HDFS file. 25 | If the file is a user file the key is "meter_ID/measurements". 26 | If the file is a meter the key is "measurements". 27 | 28 | Attributes 29 | ---------- 30 | filename: str 31 | Path + filename of the HDFS file where the data is stored 32 | It can be a user or a meter file. 33 | key: str 34 | Key used to index the data of the meter inside the HDFS file. 35 | If the file is a user file the key is "meter_ID/measurements". 36 | If the file is a meter the key is "measurements". 37 | """ 38 | 39 | def __init__(self, filename=None, key=None): 40 | self.filename = str(filename) 41 | self.key = str(key) 42 | 43 | def __repr__(self): 44 | return self.__dict__.__repr__() 45 | 46 | 47 | class Meter(object): 48 | """Main object of this disaggregation algorithm. 49 | 50 | Models a real meter. The data is in a HDFS store and can be loaded 51 | into the Measurements attribute. The data is referenced in the 52 | NILM.meter.Store object All the actions to disaggregate a meter 53 | are performed on the Meter object. Meter can be created from 54 | a NILM.User object, loaded from a meter HDFS file or created from 55 | a pandas.DataFrame with measurements data well designed. 56 | 57 | 58 | Parameters 59 | ---------- 60 | metadata: dict, optional 61 | Contains additionnal informations on the meter. 62 | Example: timezone, start of the measurements etc... 63 | 64 | phases: list of str, optional 65 | List of phases measured by meter. 66 | 67 | power_types: list of str, optional 68 | List of power types measured by meter. 69 | Example: ['P', 'Q'], ['apparent', 'reactive'] 70 | 71 | store: NILM.Store object, optional 72 | Where the data of the meter is stored. 73 | 74 | Attributes 75 | ---------- 76 | metadata: dict 77 | Contains additionnal informations on the meter. 78 | Example: timezone, start of the measurements etc... 79 | 80 | phases: list of str 81 | List of phases measured by meter. 82 | 83 | power_types: list of str 84 | List of power types measured by meter. 85 | Example: ['P', 'Q'], ['apparent', 'reactive'] 86 | 87 | store: NILM.Store object 88 | Where the data of the meter is stored. 89 | 90 | measurements: NILM.Measurements object 91 | Data of the meter. Measurements of powers for each timestamps. Requires 92 | to load the measurements of the meter to exist. 93 | 94 | events: NILM.Events object 95 | Events detected in the measurements. Requires to detect the events of 96 | the meter to exist. 97 | 98 | clusters: NILM.Clusters object 99 | Clusters of events detected. Requires to cluster the events of the 100 | meter to exist. 101 | 102 | appliance_models: NILM.ApplianceModels object 103 | Appliances models constructed with the clusters. Requires to model the 104 | appliance of the meter first. 105 | 106 | appliance_consumptions: NILM.ApplianceConsumptions object 107 | Consumptions of the disaggregated appliances. Requires to track the 108 | consumptions of the meter before. 109 | 110 | References 111 | ---------- 112 | Hart, G. W. "Prototype nonintrusive appliance load monitor." (1985). 113 | """ 114 | 115 | def __init__(self, metadata=None, phases=None, power_types=None, 116 | store=None, ID=None): 117 | self.phases = phases 118 | self.metadata = metadata 119 | self.power_types = power_types 120 | self.store = store 121 | self.ID = ID 122 | 123 | @staticmethod 124 | def from_user(user, meter_ID): 125 | """Returns a Meter created from a User object. 126 | 127 | Parameters 128 | ---------- 129 | user: NILM.User object 130 | User which own the meter. 131 | 132 | meter_ID: str 133 | Name of the meter. Needs to a a ID in the User.meters_ID list. 134 | """ 135 | assert meter_ID in user.meters_ID 136 | metadata = user.metadata['meters'][meter_ID] 137 | 138 | measurements = metadata['measurements'] 139 | phases = measurements['phases'] 140 | power_types = measurements['power_types'] 141 | 142 | key = "/".join((meter_ID, 'measurements')) 143 | store = Store(user.filename, key) 144 | 145 | meter = Meter(metadata, phases, power_types, store, meter_ID) 146 | return meter 147 | 148 | @staticmethod 149 | def from_meter_hdf(hdf_filename): 150 | """Returns a Meter created from a meter HDFS file. 151 | 152 | For more informations on meter HDFS file, 153 | see NILM.converter.dataframe_to_meter function. 154 | 155 | Parameters 156 | ---------- 157 | hdf_filename: str 158 | Path + filename of the meter HDFS file. For more informations on 159 | meter HDFS file, see NILM.converter.dataframe_to_meter function. 160 | """ 161 | assert os.path.isfile(hdf_filename) 162 | with pd.get_store(hdf_filename) as store: 163 | metadata = store.root._v_attrs.metadata 164 | 165 | meter_ID = hdf_filename.split('/')[-1] 166 | phases = list(metadata['measurements']['phases']) 167 | power_types = list(metadata['measurements']['power_types']) 168 | 169 | key = 'measurements' 170 | store = Store(hdf_filename, key) 171 | meter = Meter(metadata, phases, power_types, store, meter_ID) 172 | return meter 173 | 174 | @staticmethod 175 | def from_dataframe(df, hdf_filename): 176 | """Create a Meter from a dataframe. 177 | 178 | The meter is store in a meter HDFS file. For more informations on 179 | meter HDFS file, see NILM.converter.dataframe_to_meter function. 180 | 181 | Parameters 182 | ---------- 183 | df: pandas.DataFrame 184 | Dataframe containing the measurements of the powers for the 185 | phases and power types measured by a meter. The format of the 186 | DataFrame is specified in converter.dataframe_to_meter function. 187 | 188 | hdf_filename: str 189 | Path + filename of the meter HDFS file. To create a meter 190 | HDFS file, see NILM.converter.dataframe_to_meter function. 191 | """ 192 | 193 | converter.dataframe_to_meter(df, hdf_filename) 194 | return Meter.from_meter_hdf(hdf_filename) 195 | 196 | def __repr__(self): 197 | return str(self.ID) 198 | 199 | def load_measurements(self, sampling_period): 200 | """ Create a NILM.Measurements object as Meter attribute. 201 | 202 | Parameters 203 | ---------- 204 | sampling_period: int of float 205 | Elapse time between two measurements in second. 206 | """ 207 | measurements = Measurements(sampling_period) 208 | measurements.load_data(self) 209 | self.measurements_ = measurements 210 | print "Meter: measurements loaded!" 211 | 212 | @property 213 | def measurements(self): 214 | try: 215 | return self.measurements_ 216 | except AttributeError: 217 | return AttributeError('Meter: load measurements before!') 218 | 219 | def detect_events(self, detection_type, **detection_parameters): 220 | """Create a NILM.Events object as Meter attribute. 221 | 222 | Parameters 223 | ---------- 224 | detection_type: string 225 | Name of a detection function. This function will be used to detect 226 | events. Needs to be one of the keys of the dictionnary 227 | 'detection_types' of NILM.Events object. 228 | 229 | detection_parameters: dict (optional) 230 | Arguments to be passed as argument of the function which will be 231 | used to detect the events. Arguments not informed will take the 232 | default value defined in the dictionnary 'detection_types' 233 | of NILM.Events object. 234 | """ 235 | events = Events(detection_type, **detection_parameters) 236 | events.detection(self) 237 | self.events_ = events 238 | print "Meter: events detected!" 239 | 240 | @property 241 | def events(self): 242 | try: 243 | return self.events_ 244 | except AttributeError: 245 | return AttributeError('Meter: detect events before!') 246 | 247 | def cluster_events(self, clustering_type, features=None, 248 | **clustering_parameters): 249 | """Create a NILM.Clusters object as Meter attribute. 250 | 251 | Parameters 252 | ---------- 253 | clustering_type: string 254 | Name of a clustering function. This function will be used to 255 | cluster the events. Needs to be one of the keys of the dictionnary 256 | 'clustering_types' of NILM.Clusters object. 257 | 258 | clustering_parameters: dict (optional) 259 | Arguments to be passed as argument of the function which will be 260 | used to cluster the events. Arguments not informed will take the 261 | default value defined in the dictionnary 'clustering_types' of 262 | NILM.Clusters object. 263 | """ 264 | clusters = Clusters(clustering_type, **clustering_parameters) 265 | clusters.clustering(self, features) 266 | self.clusters_ = clusters 267 | print "Meter: events clustered!" 268 | 269 | @property 270 | def clusters(self): 271 | try: 272 | return self.clusters_ 273 | except AttributeError: 274 | return AttributeError('Meter: cluster events before!') 275 | 276 | def model_appliances(self, association_two_states_type, 277 | **association_two_sates_parameters): 278 | """Create a NILM.ApplianceModels object as Meter attribute. 279 | 280 | Parameters 281 | ---------- 282 | association_two_states_type: string 283 | Name of a function which model two-state appliances with available 284 | clusters. This function will be used to built tha appliance models. 285 | Needs to be one of the keys of the dictionnary 286 | 'association_two_states_types' of NILM.ApplianceModels object. 287 | 288 | association_two_states_parameters: dict (optional) 289 | Parameters to be passed as arguments of the function which will be 290 | used to model the two-states appliances. Arguments not informed 291 | will take the default value defined in the dictionnary 292 | 'association_two_states_types' of NILM.Clusters object. 293 | """ 294 | appliance_models = ApplianceModels(association_two_states_type, 295 | **association_two_sates_parameters) 296 | appliance_models.modeling(self) 297 | self.appliance_models_ = appliance_models 298 | print "Meter: appliances modeled!" 299 | 300 | @property 301 | def appliance_models(self): 302 | try: 303 | return self.appliance_models_ 304 | except AttributeError: 305 | return AttributeError('Meter: model appliances before!') 306 | 307 | def track_consumptions(self, tracking_type, **tracking_parameters): 308 | """Create a NILM.ApplianceConsumptions object as Meter attribute. 309 | 310 | Parameters 311 | ---------- 312 | tracking_type:: string 313 | Name of a function which track appliances consumptions of available 314 | appliances. This function will be used to compute the appliance 315 | mconsumptions. Needs to be one of the keys of the dictionnary 316 | 'tracking_types' of NILM.ApplianceConsumptions object. 317 | 318 | tracking_parameters: dict (optional) 319 | Parameters to be passed as arguments of the function which will be 320 | used to track the appliances consumptions. Arguments not informed 321 | will take the default value defined in the dictionnary 322 | 'tracking_types' of NILM.Consumptions object. 323 | """ 324 | appliance_consumptions = ApplianceConsumptions(tracking_type, 325 | **tracking_parameters) 326 | appliance_consumptions.tracking(self) 327 | self.appliance_consumptions_ = appliance_consumptions 328 | print "Meter: appliance consumptions tracked!" 329 | 330 | @property 331 | def appliance_consumptions(self): 332 | try: 333 | return self.appliance_consumptions_ 334 | except AttributeError: 335 | return AttributeError('Meter: track appliances consumptions\ 336 | before!') 337 | 338 | 339 | if __name__ == '__main__': 340 | from user import User 341 | hdf_filename = '/Volumes/Stockage/DATA/DATA_BLUED/CONVERTED/user_blued.h5' 342 | user = User(hdf_filename) 343 | meter = user.meters[0] 344 | 345 | meter.load_measurements(sampling_period=10) 346 | meter.detect_events(detection_type='steady_states') 347 | meter.cluster_events('DBSCAN', eps=35) 348 | meter.model_appliances('simple') 349 | meter.track_consumptions('simple') 350 | --------------------------------------------------------------------------------