├── DataSeperation.py ├── README.md ├── main.py ├── procfunc.py ├── screenshots ├── current_work.PNG ├── dataset_structure.PNG ├── results_1.PNG └── results_2.PNG └── utils_laj.py /DataSeperation.py: -------------------------------------------------------------------------------- 1 | import os, os.path 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import math 6 | 7 | 8 | def movingavg(data, window): 9 | w = np.repeat(1.0, window) / window 10 | smas = np.convolve(data, w, 'valid') 11 | return smas 12 | 13 | 14 | root = r'D:\LAHIRU\Work\KeySight\OriginalData' 15 | 16 | months = ['Sep'] 17 | machines = ['192_168_28_28', 'hp36'] 18 | 19 | for month in months: 20 | sensor_file = 'sensorData' + month + '.csv' 21 | event_file = 'machineEvent' + month + '.csv' 22 | 23 | SensorData_all = pd.read_csv(os.path.join(root, sensor_file), index_col='cf:timestamp', 24 | usecols=['cf:sensorType', 'cf:timestamp', 'cf:value', 'cf:equipmentId', 'cf:partId', 25 | 'cf:moduleId'], 26 | dtype={'cf:sensorType': str, 'cf:timestamp': str, 'cf:value': str, 27 | 'cf:equipmentId': str, 'cf:partId': str, 'cf:moduleId': str}) 28 | 29 | SensorData_all.rename(columns={'cf:sensorType': 'sensorType', 'cf:timestamp': 'timestamp', 'cf:value': 'value', 30 | 'cf:equipmentId': 'equipmentId', 'cf:partId': 'partId', 'cf:moduleId': 'moduleId'}, 31 | inplace=True) 32 | 33 | SensorData_all = SensorData_all.drop(SensorData_all[SensorData_all.sensorType == 'cf:sensorType'].index) 34 | 35 | SensorData_all.index = pd.to_datetime((SensorData_all.index).str[0:-6], format='%Y-%m-%dT%H:%M:%S.%f', 36 | errors='coerce') 37 | 38 | for machine in machines: 39 | eq_id = machine 40 | save_file = 'filtered_events_' + month + '_' + machine + '.csv' 41 | print(save_file) 42 | 43 | if eq_id == 'hp36': 44 | df_filterd = SensorData_all.loc[ 45 | (SensorData_all['equipmentId'] == '192.168.28.252') | (SensorData_all['equipmentId'] == 'hp36')] 46 | elif eq_id == '192_168_28_28': 47 | df_filterd = SensorData_all.loc[SensorData_all['equipmentId'] == '192.168.28.28'] # filter by equipment ID 48 | 49 | df_filterd = SensorData_all 50 | 51 | df_filterd = df_filterd.loc[df_filterd.index.notnull()] 52 | df_filterd = df_filterd.loc[df_filterd.value.notnull()] 53 | df_filterd.fillna('Blank', inplace=True) 54 | 55 | print(df_filterd.equipmentId.unique()) 56 | 57 | df_filterd.replace('BF1', 'BF', inplace=True) 58 | df_filterd.replace('VV1', 'VV', inplace=True) 59 | module_list = df_filterd.moduleId.unique() 60 | tmp = [] 61 | for _module in module_list: 62 | df_moduled = df_filterd.loc[df_filterd['moduleId'] == _module] # filter by module ID 63 | partId_list = df_moduled.partId.unique() 64 | print(_module, partId_list) 65 | for _part in partId_list: 66 | df_parted = df_moduled.loc[df_moduled['partId'] == _part] # filter by part ID 67 | sensor_list = df_parted.sensorType.unique() 68 | # print(_part, sensor_list) 69 | for _sensor in sensor_list: 70 | # if _sensor == 'Orientation X' or _sensor == 'Orientation Y' or _sensor == 'Orientation Z' or _sensor == '3.3V' or _sensor == '5.0V': 71 | # continue 72 | # print(_sensor) 73 | df_sensor = df_parted.loc[df_parted['sensorType'] == _sensor] # filter by sensor type 74 | # print(df_sensor.shape) 75 | sensor_values = df_sensor[['value']] 76 | sensor_values = sensor_values.loc[sensor_values.value.notnull()] 77 | sensor_values['value'] = pd.to_numeric(sensor_values['value'], errors='coerce') 78 | sensor_values = sensor_values[~sensor_values.index.duplicated(keep='first')] 79 | sensor_values.sort_index(inplace=True) 80 | sensor_values.rename(columns={'value': _module + "_" + _part + "_" + _sensor}, inplace=True) 81 | # print(_sensor,'start',sensor_values.index[0],'end',sensor_values.index[-1]) 82 | tmp.append(sensor_values) 83 | print("***********") 84 | # print(len(tmp)) 85 | 86 | ########################### Machine Evens ############################# 87 | machine_event = pd.read_csv(os.path.join(root, event_file), index_col='cf:timestamp', 88 | dtype={'cf:eventType': str}) 89 | 90 | machine_event.rename(columns={'Unnamed: 0': 'hashId', 'cf:eventType': 'event'}, inplace=True) 91 | machine_event.index = pd.to_datetime((machine_event.index).str[0:-6], format='%Y-%m-%dT%H:%M:%S.%f', 92 | errors='coerce') 93 | 94 | machine_event = machine_event.loc[machine_event.index.notnull()] 95 | 96 | hash_split = machine_event['hashId'].str.split(':', expand=True) 97 | _tmp = pd.concat([machine_event, hash_split], axis=1) 98 | event_data = _tmp.loc[:, ['event', 2]] 99 | event_data.rename(columns={2: 'equipmentId'}, inplace=True) 100 | 101 | # print("equipments : ",event_data.equipmentId.unique()) 102 | 103 | print('before unique machine events : ', machine_event.event.unique()) 104 | 105 | clearnup_nums = {'event': {'User input received': 1, 'Machine ready': 2, 'Machine alarm': 3, 106 | 'Wait for user input': 4, 'Log out': 5, 'DUT all tests completed': 6, 107 | 'DUT test time': 7, 108 | 'DUT overall test result': 8, 'DUT serial number': 9, 'Board file loaded': 10, 109 | 'Start running testplan': 11, 'Fixture locked': 12, 'Testplan loaded': 13, 110 | 'Fixture unlocked': 14, 'Finish running diagnostic': 15, 'Machine busy/idle': 16, 111 | 'Stop running testplan': 17, 'Start running diagnostic': 18}} # 112 | event_data.replace(clearnup_nums, inplace=True) 113 | 114 | if eq_id == 'hp36': 115 | event_data_filtered = event_data.loc[(event_data['equipmentId'] == '192.168.28.252') | ( 116 | event_data['equipmentId'] == 'hp36')] # filter by equipment ID 117 | elif eq_id == '192_168_28_28': 118 | event_data_filtered = event_data.loc[event_data['equipmentId'] == '192.168.28.28'] # filter by equipment ID 119 | 120 | event_data_filtered = event_data_filtered[~event_data_filtered.index.duplicated(keep='first')] 121 | event_data_filtered.sort_index(inplace=True) 122 | event_values = event_data_filtered[['event']] 123 | # print('after unique machine events : ', event_values.event.unique()) 124 | ###################################################################### 125 | 126 | newindex = tmp[0].index.union(event_values.index) 127 | for i in range(1, len(tmp)): 128 | newindex = tmp[i].index.union(newindex) 129 | 130 | for i in range(0, len(tmp)): 131 | # print(tmp[i].shape) 132 | tmp[i] = tmp[i].reindex(newindex, method='nearest') 133 | event_values = event_values.reindex(newindex, method='nearest') 134 | print(tmp[0].shape) 135 | 136 | sync_results = pd.concat([tmp[0], tmp[1]], axis=1) 137 | for i in range(2, len(tmp)): 138 | sync_results = pd.concat([sync_results, tmp[i]], axis=1) 139 | sync_results = pd.concat([sync_results, event_values], axis=1) 140 | # print(list(sync_results)) 141 | print('after unique machine events : ', sync_results.event.unique()) 142 | unique, counts = np.unique(sync_results.event.values, return_counts=True) 143 | event_count = np.asarray((unique, counts)).T 144 | print(event_count) 145 | # show_col = ['Module 3_BF_Temperature Blower'] 146 | # sync_results.plot(y=show_col) 147 | 148 | # print(sync_results.event.value_counts()) 149 | # sync_results.to_csv(save_file) 150 | sync_results.plot() 151 | plt.show() 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Predictive-Maintance-Model 2 | 3 | A requirement to implement a degradation model for an industrial machine and predict the failures beforehand. 4 | Malfunctioning of machines are captured here as anomalies and failures and its related data are captured here as outliers 5 | 6 | ### Dependencies 7 | 8 | - numpy 9 | - scikit-learn > 0.19.1 10 | - pandas > 0.20.3 11 | 12 | ### Dataset 13 | 14 | pickel files of the dataset: https://www.dropbox.com/s/jt0nsqsmqxm8wz4/pickle.rar?dl=0 15 | 16 | ### Dataset structure (Time Synchronized) 17 | 18 | ![Screenshot](screenshots/dataset_structure.PNG) 19 | 20 | ### Architecture 21 | 22 | ![Screenshot](screenshots/current_work.PNG) 23 | 24 | ### Results of anomaly and outlier detection 25 | 26 | ![Screenshot](screenshots/results_1.PNG) 27 | ![Screenshot](screenshots/results_2.PNG) 28 | 29 | ### Degradation Model 30 | 31 | The degradation model for remaining useful life estimation can be found [here](https://github.com/LahiruJayasinghe/RUL-Net) 32 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn.preprocessing import StandardScaler 6 | from utils_laj import cache 7 | 8 | from procfunc import plot_data 9 | from procfunc import clustering 10 | from procfunc import get_outlier_data 11 | from procfunc import get_pca_components 12 | from procfunc import eigenvalue_analysis 13 | from procfunc import show_plots 14 | 15 | if __name__ == "__main__": 16 | 17 | # TODO: doc discription 18 | 19 | #### folder structure ############################################################################################## 20 | root = r'D:\LAHIRU\Work\KeySight\DataVisualization' 21 | data_folder = 'synchronized_data' 22 | anomaly_folder = 'first_round_anomaly_detection' 23 | #################################################################################################################### 24 | 25 | #### data parameters ############################################################################################### 26 | months = ['Nov'] 27 | 28 | machines = ['192_168_28_28'] 29 | # machines = ['hp36'] 30 | 31 | discrete_sensors = True 32 | # discrete_sensors = True 33 | #################################################################################################################### 34 | 35 | #### clustering parameters ######################################################################################### 36 | # cluster = 'dbscan' 37 | distance = 0.6 38 | min_samples = 20 39 | 40 | cluster = 'kmeans' 41 | n_clusters = 6 42 | #################################################################################################################### 43 | 44 | remove_outliers = False 45 | show_anomalies = False 46 | show_noise = False 47 | 48 | plot_eig_vals = False 49 | plot_events_based = False 50 | plot_cluster_based = True 51 | save_fig = False 52 | 53 | if discrete_sensors: 54 | file_tag = 'filtered_events_' 55 | pickle_tag = '' 56 | else: 57 | file_tag = 'filtered_events_sensors_' 58 | pickle_tag = 'SensorFiltered_' 59 | 60 | for month in months: 61 | for machine in machines: 62 | data_file = file_tag + month + '_' + machine + '.csv' 63 | data_fpath = os.path.join(data_folder, data_file) 64 | ftag = pickle_tag + month + '_' + machine 65 | path_half = os.path.join('pickle', ftag) 66 | print('data file: ', data_file, '\nfile tag: ', ftag) 67 | 68 | #### pre processing ######################################################################################## 69 | # data_all = pd.read_csv(data_fpath, index_col='cf:timestamp') 70 | # data_all = cache(path_half+'_DataAll.pkl',data_all.values) 71 | # data_all = cache(path_half+'_DataAll.pkl') 72 | ############################################################################################################ 73 | 74 | #### make events as classes ################################################################################ 75 | # col = data_all.shape[1] 76 | # cls = data_all[:,col-1].astype(np.int32) # take events as classes 77 | # cls = cache(path_half + '_EventCls.pkl',cls) 78 | ############################################################################################################ 79 | cls = cache(path_half + '_EventCls.pkl') 80 | 81 | #### evaluating number of events ########################################################################### 82 | unique, counts = np.unique(cls, return_counts=True) 83 | event_count = np.asarray((unique, counts)).T 84 | num_events = event_count[-1, 0] 85 | # print(event_count,num_events) 86 | ############################################################################################################ 87 | 88 | #### pre processing ######################################################################################## 89 | # data_all = data_all[:,0:col-1] # exclude events for PCA 90 | # print("excluding events", data_all.shape) 91 | 92 | # data_all = cache(path_half+'_ExcludeEvent.pkl',data_all) 93 | # data_all = cache(path_half+'_ExcludeEvent.pkl') 94 | 95 | # data_ma = movingavg(data_all,window=250) 96 | # data_ma = movingavg(data_all,window=1) 97 | # data_ma=data_all 98 | # print("data_ma shape : ",data_ma.shape) 99 | ############################################################################################################ 100 | 101 | #### standerdization ####################################################################################### 102 | # mean = np.mean(data_ma,axis=0) 103 | # data_ma_std = data_ma - mean 104 | # data_ma_std = StandardScaler().fit_transform(data_ma) #[n_samples, n_features] 105 | data_ma_std = cache(path_half + '_ExcludeEvent_normalized.pkl') 106 | ############################################################################################################ 107 | 108 | if plot_eig_vals: 109 | eigenvalue_analysis(data_ma_std, save_fig, ftag + '_eigenvalues') 110 | 111 | #### show anomalies ######################################################################################## 112 | if show_anomalies: 113 | for_anomaly = pd.read_csv(data_fpath, index_col='cf:timestamp') 114 | for_anomaly.plot().legend(loc='upper right') 115 | anomaly_fpath = os.path.join(anomaly_folder, ftag + '_outliers.csv') 116 | print(anomaly_fpath) 117 | anomaly_data = pd.read_csv(anomaly_fpath) 118 | anomaly_indexes = anomaly_data['index'] 119 | for i in anomaly_indexes: 120 | plt.axvspan(i, i + 1, color='green', alpha=0.3) 121 | # plt.axvline(i, color='green', alpha=0.5) 122 | if show_noise: 123 | noise_i = pd.read_csv(os.path.join(anomaly_folder, ftag + '_noise.csv'))['index'] 124 | for i in noise_i: 125 | plt.axvspan(i, i + 1, color='blue', alpha=0.5) 126 | plt.show() 127 | ############################################################################################################ 128 | 129 | data_all = pd.read_csv(data_fpath).reset_index() # for anomaly analysis 130 | 131 | #### remove_outliers ####################################################################################### 132 | if remove_outliers: 133 | if pickle_tag == 'SensorFiltered_': 134 | max_index = 181759 135 | min_index = 181435 136 | else: 137 | max_index = 115694 138 | min_index = 114428 139 | 140 | data_ma_std = np.delete(data_ma_std, slice(min_index, max_index), axis=0) 141 | cls = np.delete(cls, slice(min_index, max_index)) 142 | data_all.drop(data_all.index[min_index:max_index], inplace=True) 143 | ############################################################################################################ 144 | 145 | Z, x, y, z = get_pca_components(data_ma_std) # perform pca on standardized data 146 | 147 | if cluster == 'kmeans': 148 | labels, event_count, k_cls = clustering(Z, cluster, n_clusters) 149 | outlier_data = get_outlier_data(data_all, labels, cluster, event_count) 150 | if save_fig: 151 | outlier_data.to_csv(ftag + '_outliers.csv') 152 | elif cluster == 'dbscan': 153 | Z = Z[(Z[:, 0] > 5.5)] 154 | x = Z[:, 0] 155 | y = Z[:, 1] 156 | z = Z[:, 2] 157 | labels, event_count, k_cls = clustering(Z, cluster, distance=distance, min_samples=min_samples) 158 | outlier_data, noise_data = get_outlier_data(data_all, labels, cluster, event_count) 159 | if save_fig: 160 | noise_data.to_csv(ftag + '_noise.csv') 161 | outlier_data.to_csv(ftag + '_outliers.csv') 162 | print(event_count.T) 163 | 164 | if plot_events_based: 165 | plot_data(x, y, None, 3, cls, num_events, save_fig, 'first three Eigenvalues' + '_' + ftag) 166 | plot_data(x, y, None, 2, cls, num_events, save_fig, 'x-y' + '_' + ftag) 167 | plot_data(x, z, None, 2, cls, num_events, save_fig, 'x-z' + '_' + ftag) 168 | plot_data(y, z, None, 2, cls, num_events, save_fig, 'y-z' + '_' + ftag) 169 | 170 | if plot_cluster_based: 171 | plot_data(x, y, z, 3, labels, k_cls, save_fig, ftag + '_outliers_3d') 172 | plot_data(x, y, None, 2, labels, k_cls, save_fig, ftag + '_outliers_XY') 173 | plot_data(y, z, None, 2, labels, k_cls, save_fig, ftag + '_outliers_YZ') 174 | plot_data(x, z, None, 2, labels, k_cls, save_fig, ftag + '_outliers_XZ') 175 | 176 | show_plots(save_fig) 177 | -------------------------------------------------------------------------------- /procfunc.py: -------------------------------------------------------------------------------- 1 | from sklearn.decomposition import PCA 2 | import os, os.path 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import matplotlib.cm as cm 7 | from mpl_toolkits.mplot3d import Axes3D 8 | from sklearn.preprocessing import StandardScaler 9 | from utils_laj import movingavg 10 | from utils_laj import cache 11 | from sklearn.cluster import KMeans 12 | from sklearn.cluster import DBSCAN 13 | 14 | 15 | def plot_data(x, y, z, dim, labels_array, num_classes, save_fig, image_name): 16 | """ 17 | Scatter plot for data distribution 18 | 19 | :param z: 20 | :param y: 21 | :param x: 22 | :param dim: dim=3 or dim=2, indicate the dimentions of the plot / type(dim)=int 23 | :param labels_array: an numpy array, which indicate the label for each and every data eg: [0 0 1 ... 2 2 2] / type(labels_array)=np.array 24 | :param num_classes: number of classes or clusters that the data belong / type(num_classes)=int 25 | :param save_fig: boolean value, save figure if True / type(save_fig)=bool 26 | :param image_name: string variable for plot title and file name in case its saved / type(image_name)=string 27 | 28 | """ 29 | 30 | cmap = cm.get_cmap('gist_ncar', num_classes) 31 | if dim == 3: 32 | fig = plt.figure() 33 | ax = Axes3D(fig) 34 | sc = ax.scatter(x, y, z, c=labels_array.astype(np.float), edgecolor='k', cmap=cmap) 35 | plt.ylabel('y') 36 | plt.xlabel('x') 37 | plt.colorbar(sc, ticks=range(0, num_classes)) 38 | plt.title(image_name) 39 | if save_fig == True: 40 | fig.savefig(image_name + '.png', bbox_inches='tight') 41 | plt.close(fig) 42 | elif dim == 2: 43 | fig = plt.figure() 44 | sc = plt.scatter(x, y, c=labels_array.astype(np.float), edgecolor='k', cmap=cmap) 45 | plt.ylabel('y') 46 | plt.xlabel('x') 47 | plt.colorbar(sc, ticks=range(0, num_classes)) 48 | plt.title(image_name) 49 | if save_fig == True: 50 | fig.savefig(image_name + '.png', bbox_inches='tight') 51 | plt.close(fig) 52 | else: 53 | raise ValueError("invalid dimension valid either dim=2 or dim=3") 54 | 55 | 56 | def clustering(Z, cluster_method, n_clusters=0, distance=0, min_samples=0): 57 | """ 58 | Clustering function supports only kmeans and dbscan 59 | Eg: 60 | labels : [0 0 1 ... 2 2 2] 61 | event_cout : [[ -1 72] 62 | [ 0 398] 63 | [ 1 26] 64 | [ 2 2520]] 65 | k_cls : 4 66 | 67 | :param Z: The data need to be clustered 68 | :param cluster_method: should be either 'kmeans' or 'dbscan' type(cluster_method)=string 69 | :param n_clusters: only for kmenas, type(n_clusters)=int 70 | :param distance: only for dbscan type(distance)=float 71 | :param min_samples: only for dbscan type(min_samples)=float 72 | :return: 'labels' represents cluster labels for each and every points in 'Z' based on the 'clustering_method' 73 | 'event_count[:,0]' column represents different cluster labels calculated by 'cluster_method' 74 | 'k_cls' total number of different clusters exist in 'Z' 75 | """ 76 | 77 | if cluster_method == 'kmeans': 78 | if isinstance(n_clusters, int) and n_clusters > 0: 79 | # kmeans = KMeans(n_clusters=k_cls,init='k-means++',n_init=k_cls) 80 | kmeans = KMeans(n_clusters=n_clusters) 81 | kmeans.fit(Z) 82 | labels = kmeans.labels_ 83 | else: 84 | raise ValueError("invalied 'n_clusters'") 85 | elif cluster_method == 'dbscan': 86 | if isinstance(min_samples, int) and min_samples > 0 and distance > 0: 87 | np.random.seed(42) 88 | db = DBSCAN(eps=distance, min_samples=min_samples).fit(Z) 89 | labels = db.labels_ 90 | else: 91 | raise ValueError("invalied 'distance' or 'min_samples'") 92 | else: 93 | raise ValueError("Undefined cluster method, valied either cluster_method='kmeans' or cluster_method='dbscan'") 94 | 95 | unique, counts = np.unique(labels, return_counts=True) 96 | event_count = np.asarray((unique, counts)).T 97 | k_cls = len(unique) 98 | return labels, event_count, k_cls 99 | 100 | 101 | def get_outlier_data(data_all, labels, cluster_method, event_count): 102 | """ 103 | Hypothesis : outliers are the cluster, which containes smallest number of data points 104 | Since this implementation is focusing on removing anomalies and considering our data distribution, 105 | this hypothesis is almost accurate so far 106 | 107 | for dbscan, noise data lable (-1) and the label which belogs to the smallest cluster are identified separately 108 | 109 | :param data_all: raw data 110 | :param labels: cluster labels for each and every points in raw data, based on the 'cluster_method' 111 | :param cluster_method: should be either 'kmeans' or 'dbscan' type(cluster_method)=string 112 | :param event_count: an array which contains number of data points for specific cluster label type(event_count)=np.array, [None,2] 113 | :return: outlier data as np.array 114 | """ 115 | 116 | if cluster_method == 'kmeans': 117 | smallest_cluster_label = np.argmin(event_count[:, 1]) 118 | outliers_indexes = np.where(labels == smallest_cluster_label)[0] 119 | print('number of outlier datapoints : ', len(outliers_indexes), '\noutlier label : ', smallest_cluster_label) 120 | return data_all.iloc[outliers_indexes] 121 | elif cluster_method == 'dbscan': 122 | smallest_cluster_label = np.argmin(event_count[1:, 1]) 123 | outliers_indexes = np.where(labels == smallest_cluster_label)[0] 124 | print('number of outlier datapoints : ', len(outliers_indexes), '\noutlier label : ', smallest_cluster_label) 125 | noise_indexes = np.where(labels == -1)[0] 126 | # print('number of noise indexes : ', len(noise_indexes)) 127 | return data_all.iloc[outliers_indexes], data_all.iloc[noise_indexes] 128 | else: 129 | raise ValueError("Undefined cluster method, valid either cluster_method='kmeans' or cluster_method == 'dbscan'") 130 | 131 | 132 | def get_pca_components(Z): 133 | """ 134 | 135 | :param Z: accept normalized data, type(Z)=np.array shape(Z)=[n_samples, n_features] 136 | :return: pca values for respective components (x,y, and z) 137 | """ 138 | pca = PCA(n_components=3) 139 | Z = pca.fit_transform(Z) 140 | x = Z[:, 0] 141 | y = Z[:, 1] 142 | z = Z[:, 2] 143 | return Z, x, y, z 144 | 145 | 146 | def eigenvalue_analysis(data_std, save_fig, image_name): 147 | """ 148 | plot covariance and correlation graphs 149 | plot cumulative explained variance and individual explained variance of eigen values 150 | 151 | :param data_std: data should be standardized type(data_std)=np.array, shape(data_std)=[features,samples] 152 | :param save_fig: whether to save the plot type(save_fig)=bool 153 | :param image_name: plot title and its saving file name incase its save_fig=True 154 | :return: 155 | """ 156 | ############### covariance/correlation ######## 157 | cov_mat = np.cov(data_std.T) 158 | # mean_vec = np.mean(data_ma_std, axis=0) 159 | # cov_mat = (data_ma_std - mean_vec).T.dot((data_ma_std - mean_vec)) / (data_ma_std.shape[0]-1) 160 | plt.figure() 161 | plt.imshow(cov_mat, label='covariance of data') 162 | plt.title('covariance of data') 163 | corr_mat = np.corrcoef(data_std.T) 164 | plt.figure() 165 | plt.imshow(corr_mat, label='correlation of data') 166 | plt.title('correlation of data') 167 | ############################################## 168 | 169 | ############### eigenvalues ################## 170 | eig_vals, eig_vecs = np.linalg.eig(cov_mat) 171 | for ev in eig_vecs: 172 | np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev)) 173 | print('\nEigenvalues \n%s' % eig_vals) 174 | 175 | tot = sum(eig_vals) 176 | var_exp = [(i / tot) * 100 for i in sorted(eig_vals, reverse=True)] 177 | cum_var_exp = np.cumsum(var_exp) 178 | with plt.style.context('seaborn-whitegrid'): 179 | fig = plt.figure() 180 | plt.bar(range(data_std.shape[1]), var_exp, alpha=0.5, align='center', 181 | label='individual explained variance') 182 | plt.step(range(data_std.shape[1]), cum_var_exp, where='mid', 183 | label='cumulative explained variance') 184 | plt.ylabel('Explained variance ratio') 185 | plt.xlabel('Principal components') 186 | plt.legend(loc='best') 187 | fig.savefig(image_name + '.png', bbox_inches='tight') 188 | plt.tight_layout() 189 | plt.show() 190 | ############################################## 191 | 192 | 193 | def show_plots(save_fig): 194 | if not save_fig: 195 | plt.show() 196 | -------------------------------------------------------------------------------- /screenshots/current_work.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LahiruJayasinghe/machine-failure-detection/e982c55bf675b5d80f715ab3105fb0a4ab445d9f/screenshots/current_work.PNG -------------------------------------------------------------------------------- /screenshots/dataset_structure.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LahiruJayasinghe/machine-failure-detection/e982c55bf675b5d80f715ab3105fb0a4ab445d9f/screenshots/dataset_structure.PNG -------------------------------------------------------------------------------- /screenshots/results_1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LahiruJayasinghe/machine-failure-detection/e982c55bf675b5d80f715ab3105fb0a4ab445d9f/screenshots/results_1.PNG -------------------------------------------------------------------------------- /screenshots/results_2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LahiruJayasinghe/machine-failure-detection/e982c55bf675b5d80f715ab3105fb0a4ab445d9f/screenshots/results_2.PNG -------------------------------------------------------------------------------- /utils_laj.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import numpy as np 4 | 5 | def cache(cache_path,obj=0): 6 | if os.path.exists(cache_path) and obj==0: 7 | with open(cache_path, mode='rb') as file: 8 | obj = pickle.load(file) 9 | print("- Data loaded from cache-file: " + cache_path) 10 | else: 11 | with open(cache_path, mode='wb') as file: 12 | pickle.dump(obj, file) 13 | print("- Data saved to cache-file: " + cache_path) 14 | 15 | return obj 16 | 17 | def movingavg(data,window): #[n_samples, n_features] 18 | data = np.transpose(data) 19 | if data.ndim > 1 : 20 | tmp = [] 21 | for i in range(data.shape[0]): 22 | ma = movingavg(np.squeeze(data[i]), window) 23 | tmp.append(ma) 24 | smas = np.array(tmp) 25 | else : 26 | w = np.repeat(1.0,window)/window 27 | smas = np.convolve(data,w,'valid') 28 | smas = np.transpose(smas) 29 | return smas #[n_samples, n_features] --------------------------------------------------------------------------------