├── Chapter10_MSPM_Nonlinear
    ├── KPCA_NOC_data.txt
    ├── KPCA_faultDetection.ipynb
    ├── KPCA_faultDetection.py
    ├── KPCA_test_data.txt
    ├── KPLS-FaultDetection-numerical-example.ipynb
    ├── KPLS-regression-numerical-example.ipynb
    ├── KPLS-regression-numerical-example.py
    ├── KPLS_FaultDetection-numericalExample.py
    ├── KerNIPALS.py
    ├── info.txt
    └── kernel_utils.py
├── Chapter11_MSPM_Multimode
    ├── Etch_data_explore.ipynb
    ├── Etch_data_explore.py
    ├── GMM_clustering.ipynb
    ├── GMM_clustering.py
    ├── GMM_illustration.ipynb
    ├── GMM_illustration.py
    ├── MACHINE_Data.mat
    ├── Metal_etch_complete_data_visualize.ipynb
    ├── Metal_etch_complete_data_visualize.py
    ├── ProcessMonitoring_GMM.ipynb
    ├── ProcessMonitoring_GMM.py
    ├── info.txt
    ├── k_means_clustering.ipynb
    ├── k_means_clustering.py
    ├── k_means_failure.ipynb
    └── k_means_failure.py
├── Chapter12_SVM
    ├── Metal_etch_2DPCA_testData.csv
    ├── Metal_etch_2DPCA_trainingData.csv
    ├── SVDD_FaultDetection.ipynb
    ├── SVDD_FaultDetection.py
    ├── SVDD_OneClassClassification.ipynb
    ├── SVDD_OneClassClassification.py
    ├── SVDD_toyDataset.csv
    ├── SVM_BinaryClassification.ipynb
    ├── SVM_BinaryClassification.py
    ├── SVM_Kernel_BinaryClassification.ipynb
    ├── SVM_Kernel_BinaryClassification.py
    ├── SVM_SoftMarginClassification.ipynb
    ├── SVM_SoftMarginClassification.py
    ├── info.txt
    ├── toyDataset.csv
    └── toyDataset2.csv
├── Chapter13_DT_RF_Ensemble
    ├── Boiler_emulator_dataset.txt
    ├── RandomForest_FaultClassification_Boilers.ipynb
    ├── RandomForest_FaultClassification_Boilers.py
    ├── XGBoostFaultClassification_Boilers.ipynb
    ├── XGBoostFaultClassification_Boilers.py
    └── info.txt
├── Chapter14_ProximityTechniques
    ├── FD-IF.py
    ├── FD-KNN.py
    ├── FD-LOF.py
    ├── FD_IF.ipynb
    ├── FD_KNN.ipynb
    ├── FD_LOF.ipynb
    ├── MACHINE_Data.mat
    └── info.txt
├── Chapter15_FDD_Supervised_ANN
    ├── CCPP_FFNN.ipynb
    ├── CCPP_FFNN.py
    ├── Folds5x2_pp.xlsx
    ├── debutanizer_FaultDetection_FFNN.ipynb
    ├── debutanizer_FaultDetection_FFNN.py
    ├── debutanizer_data_withFault.txt
    └── info.txt
├── Chapter16_FDD_Unsupervised_ANN
    ├── Autoencoder_DimensionalityReduction_FCCU.ipynb
    ├── Autoencoder_DimensionalityReduction_FCCU.py
    ├── Autoencoder_FaultDetection_FCCU.ipynb
    ├── Autoencoder_FaultDetection_FCCU.py
    ├── MACHINE_Data.mat
    ├── NOC_varyingFeedFlow_outputs.csv
    ├── SOM_fault_detection.ipynb
    ├── SOM_fault_detection.py
    ├── SOM_visualization.ipynb
    ├── SOM_visualization.py
    ├── UAf_decrease_outputs.csv
    └── info.txt
├── Chapter17_VCM_SignalProcessing
    ├── Spectrogram_introduction.ipynb
    ├── Spectrogram_introduction.py
    ├── WindTurbineVibration_TimeDomainFeatureExtraction.ipynb
    ├── WindTurbineVibration_TimeDomainFeatureExtraction.py
    ├── info.txt
    ├── spectrum_introduction.ipynb
    └── spectrum_introduction.py
├── Chapter18_VCM_FaultDetectionDiagnosis
    ├── CWRU_SVM_FaultClassification.ipynb
    ├── CWRU_SVM_FaultClassification.py
    ├── feature_timeDomain_48k_2048_load_1HP.csv
    └── info.txt
├── Chapter19_PrognosisConcepts
    ├── WindTurbine_HI.ipynb
    ├── WindTurbine_HI.py
    └── info.txt
├── Chapter20_RULEstimation
    ├── PM_test.txt
    ├── PM_train.txt
    ├── PM_truth.txt
    ├── WindTurbine_RUL.ipynb
    ├── WindTurbine_RUL.py
    ├── gasTurbine_RULviaANN.ipynb
    ├── gasTurbine_RULviaANN.py
    └── info.txt
├── Chapter2_ScriptingEnvironment
    ├── NumpyBasics.ipynb
    ├── NumpyBasics.py
    ├── PandasBasics.ipynb
    ├── PandasBasics.py
    ├── PythonBasics.ipynb
    ├── PythonBasics.py
    └── info.txt
├── Chapter3_EDA
    ├── Dynamics_assessment.ipynb
    ├── Dynamics_assessment.py
    ├── Multimodality_assessment.ipynb
    ├── Multimodality_assessment.py
    ├── NonGaussianity_assessment.ipynb
    ├── NonGaussianity_assessment.py
    ├── Nonlinearity_assessment.ipynb
    ├── Nonlinearity_assessment.py
    ├── TEP_dataset_assessment.ipynb
    ├── TEP_dataset_assessment.py
    ├── d00.dat
    └── info.txt
├── Chapter4_BestPractices
    ├── DataBalancing.ipynb
    ├── DataBalancing.py
    └── info.txt
├── Chapter5_UnivariateSPC
    ├── CUSUM_ControlChart.ipynb
    ├── CUSUM_ControlChart.py
    ├── CUSUM_ControlChart_AerationTank.ipynb
    ├── CUSUM_ControlChart_AerationTank.py
    ├── CUSUM_intro.ipynb
    ├── CUSUM_intro.py
    ├── EWMA_ControlChart.ipynb
    ├── EWMA_ControlChart.py
    ├── ShewhartControlChart.ipynb
    ├── ShewhartControlChart.py
    ├── aeration-rate.csv
    └── info.txt
├── Chapter6_PatternMatching
    ├── SteamGenData_DiscordDiscovery.ipynb
    ├── SteamGenData_HistoricalPatternSearch.ipynb
    ├── current_steamFlow.txt
    ├── historical_steamFlow.txt
    ├── info.txt
    └── steamgen.dat
├── Chapter7_MSPM_SteadyState1
    ├── DimensionalityReduction.ipynb
    ├── DimensionalityReduction.py
    ├── LDPE.csv
    ├── ProcessMonitoring_PCA.ipynb
    ├── ProcessMonitoring_PCA.py
    ├── ProcessMonitoring_PLS.ipynb
    ├── ProcessMonitoring_PLS.py
    ├── info.txt
    └── proc1a.xls
├── Chapter8_MSPM_SteadyState2
    ├── DimensionalityReduction_FDA.ipynb
    ├── DimensionalityReduction_FDA.py
    ├── DimensionalityReduction_ICA.ipynb
    ├── DimensionalityReduction_ICA.py
    ├── FDA_illustration.ipynb
    ├── FDA_illustration.py
    ├── FaultClassification_FDA.ipynb
    ├── FaultClassification_FDA.py
    ├── ICA_illustration.ipynb
    ├── ICA_illustration.py
    ├── ProcessMonitoring_ICA.ipynb
    ├── ProcessMonitoring_ICA.py
    ├── ProcessMonitoring_PCA.ipynb
    ├── ProcessMonitoring_PCA.py
    ├── TEP_data_explore.ipynb
    ├── TE_processData_explore.py
    ├── d00.dat
    ├── d05.dat
    ├── d05_te.dat
    ├── d10.dat
    ├── d10_te.dat
    ├── d19.dat
    ├── d19_te.dat
    └── info.txt
├── Chapter9_MSPM_Dynamic
    ├── DPCA_FaultDetection.ipynb
    ├── DPCA_FaultDetection.py
    ├── LinearRelationshipExtraction_DPCA_versus_PCA.ipynb
    ├── LinearRelationshipExtraction_DPCA_versus_PCA.py
    ├── Monitoring_via_CVA_TennesseeEastmanProcess.ipynb
    ├── Monitoring_via_CVA_TennesseeEastmanProcess.py
    ├── d00.dat
    ├── d05_te.dat
    ├── info.txt
    ├── multivariate_NOC_data.txt
    └── multivariate_test_data.txt
├── Images
    ├── Book3_CoverPage.JPG
    └── info.txt
├── LICENSE
└── README.md


/Chapter10_MSPM_Nonlinear/KPCA_faultDetection.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                          KPCA-based fault detection
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.decomposition import KernelPCA
 10 | 
 11 | plt.rcParams.update({'font.size': 14})
 12 | 
 13 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 14 | ##                          read data
 15 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 16 | X_train = np.loadtxt('KPCA_NOC_data.txt')
 17 | X_test = np.loadtxt('KPCA_test_data.txt')
 18 | 
 19 | # training data plots
 20 | plt.figure(figsize=(15,4))
 21 | plt.plot(X_train[:,0],'.-', color='teal')
 22 | plt.ylabel('x1', fontsize=20), plt.xlabel('sample #', fontsize=20)
 23 | plt.grid(), plt.title('Training data')
 24 | 
 25 | plt.figure(figsize=(15,4))
 26 | plt.plot(X_train[:,1],'.-', color='teal')
 27 | plt.ylabel('x2', fontsize=20), plt.xlabel('sample #', fontsize=20)
 28 | plt.grid(), plt.title('Training data')
 29 | 
 30 | plt.figure(figsize=(15,4))
 31 | plt.plot(X_train[:,2],'.-', color='teal')
 32 | plt.ylabel('x3', fontsize=20), plt.xlabel('sample #', fontsize=20)
 33 | plt.grid(), plt.title('Training data')
 34 | 
 35 | # visualize in 3D
 36 | # %matplotlib auto
 37 | # from mpl_toolkits.mplot3d import Axes3D
 38 | # fig = plt.figure()
 39 | # ax = Axes3D(fig)
 40 | # ax.scatter(X_train[:,0], X_train[:,1], X_train[:,2])
 41 | # ax.set_xlabel('x1')
 42 | # ax.set_ylabel('x2')
 43 | # ax.set_zlabel('x3')
 44 | 
 45 | # %matplotlib inline
 46 | 
 47 | # test data plots
 48 | plt.figure(figsize=(15,4))
 49 | plt.plot(X_test[:,0],'.-', color='teal')
 50 | plt.ylabel('x1', fontsize=20), plt.xlabel('sample #', fontsize=20)
 51 | plt.grid(), plt.title('Test data')
 52 | 
 53 | plt.figure(figsize=(15,4))
 54 | plt.plot(X_test[:,1],'.-', color='teal')
 55 | plt.ylabel('x2', fontsize=20), plt.xlabel('sample #', fontsize=20)
 56 | plt.grid(), plt.title('Test data')
 57 | 
 58 | plt.figure(figsize=(15,4))
 59 | plt.plot(X_test[:,2],'.-', color='teal')
 60 | plt.ylabel('x3', fontsize=20), plt.xlabel('sample #', fontsize=20)
 61 | plt.grid(), plt.title('Test data')
 62 | 
 63 | # visualize in 3D
 64 | # %matplotlib auto
 65 | # from mpl_toolkits.mplot3d import Axes3D
 66 | # fig = plt.figure()
 67 | # ax = Axes3D(fig)
 68 | # ax.scatter(X_test[:,0], X_test[:,1], X_test[:,2])
 69 | # ax.set_xlabel('x1')
 70 | # ax.set_ylabel('x2')
 71 | # ax.set_zlabel('x3')
 72 | 
 73 | # %matplotlib inline
 74 | 
 75 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 76 | ##                          kpca model training
 77 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 78 | # scale data
 79 | X_scaler = StandardScaler()
 80 | X_train_scaled = X_scaler.fit_transform(X_train)
 81 | 
 82 | # fit kPCA model
 83 | gamma = 1/((5*3)**2)
 84 | kpca = KernelPCA(kernel='rbf', gamma=gamma)
 85 | kpca.fit(X_train_scaled)
 86 | 
 87 | # find number of components to retain
 88 | eigVals = kpca.eigenvalues_
 89 | eigVals_normalized = eigVals / np.sum(eigVals)
 90 | cum_eigVals = 100*np.cumsum(eigVals_normalized) 
 91 | n_comp = np.argmax(cum_eigVals >= 95) + 1    
 92 | print('Number of components cumulatively explaining atleast 95% variance: ', n_comp)
 93 | 
 94 | # compute scores for training data
 95 | scores_train = kpca.transform(X_train_scaled) # one column for each of the 38 columns
 96 | 
 97 | # compute Q statistics for training data
 98 | N = X_train.shape[0]
 99 | Q_train = np.zeros((N,1))
100 | for i in range(N):
101 |     Q_train[i] = np.dot(scores_train[i,:], scores_train[i,:]) - np.dot(scores_train[i,:n_comp], scores_train[i,:n_comp])
102 | Q_CL = np.percentile(Q_train, 95)
103 | 
104 | # monitoring chart for training data
105 | plt.figure(), plt.plot(Q_train), plt.plot([1,len(Q_train)],[Q_CL,Q_CL], color='red')
106 | plt.xlabel('Sample #', fontsize=20), plt.ylabel('Q (training data)', fontsize=20), plt.title('KPCA', fontsize=20), plt.grid(), plt.show()
107 | 
108 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109 | ##                          kpca model test
110 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
111 | X_test_scaled = X_scaler.transform(X_test)
112 | 
113 | # compute scores for test data
114 | scores_test = kpca.transform(X_test_scaled)
115 | 
116 | # compute Q statistics for test data
117 | N_test = X_test.shape[0]
118 | Q_test = np.zeros((N_test,1))
119 | for i in range(N_test):
120 |     Q_test[i] = np.dot(scores_test[i,:], scores_test[i,:]) - np.dot(scores_test[i,:n_comp], scores_test[i,:n_comp])
121 | 
122 | # monitoring chart for test data
123 | plt.figure(), plt.plot(Q_test), plt.plot([1,len(Q_test)],[Q_CL,Q_CL], color='red')
124 | plt.xlabel('Sample #', fontsize=20), plt.ylabel('Q (test data)', fontsize=20), plt.title('KPCA', fontsize=20), plt.grid(), plt.show()
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/Chapter10_MSPM_Nonlinear/KPLS-regression-numerical-example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Application of KPLS for nonlinear regression.
 3 | Numerical system taken from "Nonlinear Partial Least Squares: An Overview" by Roman Rosipal (https://www.researchgate.net/publication/266488967_Nonlinear_Partial_Least_Squares_An_Overview)
 4 | 
 5 | 
 6 | @author: Ankur Kumar @ MLforPSE.com
 7 | """
 8 | 
 9 | #%% packages
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | 
13 | from KerNIPALS import KerNIPALS
14 | from kernel_utils import Kernel, Kernel_test
15 | 
16 | #%% generate data
17 | def z(x):
18 |     return 4.26 * (np.exp(-x) - 4 * np.exp(-2.0*x) + 3 * np.exp(-3.0*x))
19 | 
20 | X = np.linspace(0.0, 3.5, 100)[:,None] # for training
21 | Xt = np.linspace(0, 3.5, 30)[:,None] # for test
22 | 
23 | Y_noiseFree = z(X)
24 | Y = Y_noiseFree + np.random.normal(loc=0.0, scale=0.2, size=100)[:,None]
25 | Yt = z(Xt)
26 | 
27 | plt.figure()
28 | plt.plot(X, Y_noiseFree, linewidth=2, label='noise-free y')
29 | plt.plot(X, Y, 'r', label='y training')
30 | plt.plot(Xt, Yt, '*m', label='y test')
31 | plt.xlabel('x'), plt.ylabel('y')
32 | plt.grid()
33 | plt.legend()
34 | 
35 | #%% center data
36 | Y_center = Y - Y.mean()
37 | Yt_center = Yt - Y.mean()
38 | 
39 | #%% KPLS fitting
40 | N = X.shape[0]
41 | Nt = Xt.shape[0]
42 | n_latents = 8
43 | 
44 | # kernel matrices
45 | K = Kernel(X, 1.8)
46 | K_t = Kernel_test(X, Xt, 1.8)
47 | 
48 | # centralization of kernel matrices
49 | M = np.eye(N) - np.ones((N,N))/N
50 | Mt = np.ones((Nt, N))/N
51 | K_t_bar = np.dot((K_t - np.dot(Mt, K)), M)
52 | K_bar = np.dot(np.dot(M, K), M) 
53 | 
54 | # fit and predict
55 | [Bf, T, U] = KerNIPALS(K_bar, Y_center, n_latents)
56 | 
57 | #%% KPLS predictions
58 | Y_pred = np.dot(K_bar, Bf) + Y.mean()
59 | Yt_pred = np.dot(K_t_bar, Bf) + Y.mean()
60 | 
61 | plt.figure()
62 | plt.plot(X, Y, 'r', label='actual y')
63 | plt.plot(X, Y_pred, '*', label='predicted y')
64 | plt.xlabel('x'), plt.ylabel('y')
65 | plt.title('training data')
66 | plt.grid()
67 | plt.legend()
68 | 
69 | plt.figure()
70 | plt.plot(Xt, Yt, 'r', label='actual y')
71 | plt.plot(Xt, Yt_pred, '*', label='predicted y')
72 | plt.xlabel('x'), plt.ylabel('y')
73 | plt.title('test data')
74 | plt.grid()
75 | plt.legend()


--------------------------------------------------------------------------------
/Chapter10_MSPM_Nonlinear/KerNIPALS.py:
--------------------------------------------------------------------------------
 1 | """
 2 | NIPALS implementation for kernel partial least squares.
 3 | 
 4 | @author: Ankur Kumar @ MLforPSE.com
 5 | """
 6 | 
 7 | import numpy as np
 8 | import random
 9 | 
10 | def KerNIPALS(K, Y, nlatents):
11 |     """
12 |     NIPALS implementation for kernel partial least squares.
13 | 
14 |     Args:
15 |         K    : kernel (Gram) matrix (number of samples X number of samples) 
16 |         Y    : training outputs (number of samples X dimY) 
17 |         nlatents  : number of score vectors to extract 
18 | 
19 |     Returns:
20 |         Bf      : matrix of dual-form regression coefficients (number of samples X dimY)     
21 |         T, U    : matrix of latent vectors (number of samples X nlatents) 
22 |         
23 |     """
24 |       
25 |     max_iterations = 50
26 |     crit = 1e-8
27 | 
28 |     N = K.shape[0]
29 |     m = Y.shape[1]
30 |     
31 |     T = np.empty((N, nlatents))
32 |     U = np.empty((N, nlatents))
33 | 
34 |     Kres = np.copy(K)
35 |     Yres = np.copy(Y)
36 |     
37 |     for num_lv in range(nlatents):
38 |         print('finding latent #: {}'.format(num_lv+1))
39 |         
40 |         #initialization
41 |         u = Yres[:, random.randint(0, m-1)][:, None]
42 |         iteration_count = 0
43 |         convergence_metric = crit * 10.0
44 |         
45 |         # inner iterations
46 |         while iteration_count < max_iterations and convergence_metric > crit:
47 |             u_old = np.copy(u)
48 |             
49 |             t = np.dot(Kres, u)
50 |             t = t/np.linalg.norm(t)
51 |             c = np.dot(Yres.T, t)
52 |             u = np.dot(Yres, c)
53 |             u = u/np.linalg.norm(u)
54 |                         
55 |             convergence_metric = np.linalg.norm(u-u_old)/np.linalg.norm(u)
56 |             iteration_count += 1
57 | 
58 |         if iteration_count >= max_iterations:
59 |             raise Exception('KPLS failed to converge for component: {}'.format(num_lv+1))
60 |         
61 |         # store component
62 |         T[:, num_lv] = t[:,0]
63 |         U[:, num_lv] = u[:,0]
64 |         
65 |         # deflate
66 |         Ktt = np.dot(np.dot(Kres, t), t.T)
67 |         Kres = Kres - Ktt.T - Ktt + np.dot(t, np.dot(t.T, Ktt))
68 |         Yres = Yres - np.dot(t, np.dot(t.T, Yres))
69 |         
70 |     # matrix for regression
71 |     temp = np.linalg.inv(np.dot(T.T, np.dot(K, U)))
72 |     Bf =  np.dot(np.dot(np.dot(U, temp), T.T), Y)
73 | 
74 |     return Bf, T, U


--------------------------------------------------------------------------------
/Chapter10_MSPM_Nonlinear/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter10_MSPM_Nonlinear/kernel_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions used to generate Kernel matrices for training and test datasets.
 3 | 
 4 | @author: Ankur Kumar @ MLforPSE.com
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | def Kernel(X, width):
10 |     """
11 | 
12 |     Args:
13 |         X - N x dim matrix of input data (number of samples  x dimension)
14 |         width (float) : width of the Gaussian Kernel
15 | 
16 |     Returns:
17 |         K - N x N  kernel matrix  
18 |         
19 |     """
20 | 
21 |     N, m = X.shape
22 |     K = np.zeros((N, N))
23 |     
24 |     for i in range(N):
25 |         K[i,i] = 0
26 |         for j in range(i+1, N):
27 |             vec_diff = X[i,:]-X[j,:]
28 |             K[i, j] = np.sum(vec_diff**2)
29 |             K[j,i] = K[i,j]   
30 |     K = np.exp(-K/width)
31 | 
32 |     return K
33 | 
34 | 
35 | def Kernel_test(X, Xt, width):
36 |     """
37 | 
38 |     Args:
39 |         X -  N  x dim matrix of training input data (number of samples  x dimension)
40 |         Xt - Nt x dim matrix of testing  input data (number of samples  x dimension)
41 |         width (float) : width of the Gaussian Kernel
42 | 
43 |     Returns:
44 |         K_tst - Nt x N  kernel matrix  
45 |         
46 |     """
47 | 
48 |     N, m = X.shape
49 |     Nt, m = Xt.shape
50 |     K_tst = np.zeros((Nt, N))
51 |     
52 |     for i in range(Nt):
53 |         for j in range(N):
54 |             vec_diff = Xt[i,:]-X[j,:]
55 |             K_tst[i, j] = np.sum(vec_diff**2)
56 |     K_tst = np.exp(-K_tst/width)
57 | 
58 |     return K_tst


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/Etch_data_explore.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          exploration of Etch data
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #%% fetch data
10 | import scipy.io
11 | 
12 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
13 | Etch_data = matlab_data['LAMDATA']
14 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
15 | 
16 | variable_names = Etch_data[0,0].variables
17 | 
18 | #%% plot data of a particular variable for all calibration experiment
19 | plt.figure()
20 | _ = [plt.plot(calibration_dataAll[expt,0][:,6]) for expt in range(calibration_dataAll.size)]
21 | plt.xlabel('Time (s)')
22 | plt.ylabel(variable_names[6])
23 | 
24 | plt.figure()
25 | _ = [plt.plot(calibration_dataAll[expt,0][:,19]) for expt in range(calibration_dataAll.size)]
26 | plt.xlabel('Time (s)')
27 | plt.ylabel(variable_names[19])
28 | 
29 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30 | ##                          perform Multiway PCA
31 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
32 | 
33 | #%% generate unfolded data matrix
34 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
35 | n_samples = 85 # following the work of He et al.
36 | 
37 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
38 | for expt in range(calibration_dataAll.size):
39 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
40 |     
41 |     if calibration_expt.shape[0] < 85:
42 |         continue
43 |     
44 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
45 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
46 | 
47 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
48 | 
49 | #%% scale data
50 | from sklearn.preprocessing import StandardScaler
51 | 
52 | scaler = StandardScaler()
53 | data_train_normal = scaler.fit_transform(unfolded_dataMatrix)
54 |            
55 | #%% PCA
56 | from sklearn.decomposition import PCA
57 | 
58 | pca = PCA(n_components = 3) # following the work of He et al.
59 | score_train = pca.fit_transform(data_train_normal)
60 | 
61 | #%% visualize in 2D
62 | plt.figure()
63 | plt.scatter(score_train[:,0],score_train[:,1])
64 | plt.xlabel('PC1 scores')
65 | plt.ylabel('PC2 scores')


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/GMM_clustering.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          GMM clustering of Etch data
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.mixture import GaussianMixture
 9 | 
10 | #%% fetch data
11 | import scipy.io
12 | 
13 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
14 | Etch_data = matlab_data['LAMDATA']
15 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
16 | 
17 | variable_names = Etch_data[0,0].variables
18 | 
19 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
20 | ##                          perform Multiway PCA
21 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
22 | 
23 | #%% generate unfolded data matrix
24 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
25 | n_samples = 85 # following the work of He et al.
26 | 
27 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
28 | for expt in range(calibration_dataAll.size):
29 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
30 |     
31 |     if calibration_expt.shape[0] < 85:
32 |         continue
33 |     
34 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
35 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
36 | 
37 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
38 | 
39 | #%% scale data
40 | from sklearn.preprocessing import StandardScaler
41 | 
42 | scaler = StandardScaler()
43 | data_train_normal = scaler.fit_transform(unfolded_dataMatrix)
44 |            
45 | #%% PCA
46 | from sklearn.decomposition import PCA
47 | 
48 | pca = PCA(n_components = 3) # following the work of He et al.
49 | score_train = pca.fit_transform(data_train_normal)
50 | 
51 | #%% visualize in 2D
52 | plt.figure()
53 | plt.scatter(score_train[:,0],score_train[:,1])
54 | plt.xlabel('PC1 scores')
55 | plt.ylabel('PC2 scores')
56 | 
57 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
58 | ##                          GMM on PCA scores
59 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
60 | #%% finding # of components via BIC 
61 | BICs = []
62 | lowestBIC = np.inf
63 | for n_cluster in range(1, 10):
64 |     gmm = GaussianMixture(n_components = n_cluster, random_state = 100)
65 |     gmm.fit(score_train)
66 |     BIC = gmm.bic(score_train)
67 |     BICs.append(BIC)
68 |     
69 |     if BIC < lowestBIC:
70 |         optimal_n_cluster = n_cluster 
71 |         lowestBIC = BIC
72 | 
73 | plt.figure()
74 | plt.plot(range(1,10), BICs, marker='o')
75 | plt.xlabel('Number of components')
76 | plt.ylabel('BIC')
77 | plt.show()
78 | 
79 | #%% fit GMM model to metal-etch data
80 | gmm = GaussianMixture(n_components = optimal_n_cluster, random_state = 100)
81 | cluster_label = gmm.fit_predict(score_train)
82 | 
83 | plt.figure()
84 | plt.scatter(score_train[:, 0], score_train[:, 1], c = cluster_label, s=20, cmap='viridis')
85 | plt.xlabel('PC1 scores')
86 | plt.ylabel('PC2 scores')
87 | 
88 | cluster_centers = gmm.means_
89 | cluster_plot_labels = ['Cluster ' + str(i+1) for i in range(optimal_n_cluster)]
90 | for i in range(optimal_n_cluster):
91 |     plt.scatter(cluster_centers[i, 0], cluster_centers[i, 1], c='red', s=20, marker = '*', alpha=0.5)
92 |     plt.annotate(cluster_plot_labels[i], (cluster_centers[i,0], cluster_centers[i,1]))


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/GMM_illustration.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          GMM algorithm for the illustration example
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.datasets import make_blobs
 9 | 
10 | #%% generate data
11 | n_samples = 1500
12 | X, _ = make_blobs(n_samples=n_samples, random_state=100)
13 | 
14 | plt.figure()
15 | plt.scatter(X[:,0], X[:,1])
16 | 
17 | rotation_matrix = [[0.60, -0.70], [-0.5, 0.7]]
18 | X_transformed = np.dot(X, rotation_matrix)
19 | 
20 | plt.figure()
21 | plt.scatter(X_transformed[:,0], X_transformed[:,1])
22 | 
23 | #%% fit GMM model
24 | from sklearn.mixture import GaussianMixture
25 | 
26 | gmm = GaussianMixture(n_components = 3, random_state = 100)
27 | cluster_label = gmm.fit_predict(X_transformed)
28 | 
29 | plt.figure()
30 | plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c = cluster_label, s=20, cmap='viridis')
31 | plt.xlabel('Variable 1')
32 | plt.ylabel('Variable 2')
33 | 
34 | cluster_centers = gmm.means_ # cluster centers
35 | cluster_plot_labels = ['Cluster ' + str(i+1) for i in range(gmm.n_components)]
36 | for i in range(gmm.n_components):
37 |     plt.scatter(cluster_centers[i, 0], cluster_centers[i, 1], c='red', s=20, marker = '*', alpha=0.5)
38 |     plt.annotate(cluster_plot_labels[i], (cluster_centers[i,0], cluster_centers[i,1]))
39 | 
40 | #%% membership probabilities
41 | probs = gmm.predict_proba(X_transformed[1069,np.newaxis]) # predict_proba requires 2D array
42 | print('Posterior probablities of clusters 1, 2, 3 for the data-point: ', probs[-1,:])
43 | 
44 | #%% posterior probability calculation
45 | x = X_transformed[1069,np.newaxis]
46 | 
47 | import scipy.stats
48 | g1 = scipy.stats.multivariate_normal(gmm.means_[0,:], gmm.covariances_[0,:]).pdf(x)
49 | g2 = scipy.stats.multivariate_normal(gmm.means_[1,:], gmm.covariances_[1,:]).pdf(x)
50 | g3 = scipy.stats.multivariate_normal(gmm.means_[2,:], gmm.covariances_[2,:]).pdf(x)
51 | print('Local component densities: ', g1, g2, g3)
52 | 
53 | den = gmm.weights_[0]*g1 + gmm.weights_[1]*g2 + gmm.weights_[2]*g3
54 | posterior_prob_cluster1 = gmm.weights_[0]*g1/den
55 | posterior_prob_cluster2 = gmm.weights_[1]*g2/den
56 | posterior_prob_cluster3 = gmm.weights_[2]*g3/den 
57 | print('Posterior probabilities: ', posterior_prob_cluster1, posterior_prob_cluster2, posterior_prob_cluster3)
58 | 
59 | #%% finding # of components via BIC method
60 | BICs = []
61 | lowestBIC = np.inf
62 | for n_cluster in range(1, 10):
63 |     gmm = GaussianMixture(n_components = n_cluster, random_state = 100)
64 |     gmm.fit(X_transformed)
65 |     BIC = gmm.bic(X_transformed)
66 |     BICs.append(BIC)
67 |     
68 |     if BIC < lowestBIC:
69 |         optimal_n_cluster = n_cluster 
70 |         lowestBIC = BIC
71 | 
72 | plt.figure()
73 | plt.plot(range(1,10), BICs, marker='o')
74 | plt.scatter(optimal_n_cluster, lowestBIC, c='red', marker='*', s=1000)
75 | plt.xlabel('Number of clusters')
76 | plt.ylabel('BIC')
77 | plt.show()
78 | 
79 | #%% finding # of components via FJ algorithm
80 | from gmm_mml import GmmMml
81 | gmmFJ = GmmMml(plots=False)
82 | gmmFJ.fit(X_transformed)
83 | cluster_label = gmmFJ.predict(X_transformed)
84 | 
85 | plt.figure()
86 | plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c = cluster_label, s=20, cmap='viridis')
87 | plt.xlabel('Variable 1')
88 | plt.ylabel('Variable 2')
89 | 
90 | clusters = np.unique(cluster_label)
91 | print(clusters)


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/MACHINE_Data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ML-PSE/Machine_Learning_for_PM_and_PdM/96c21a8aeb4177541ea79e13474e099cc5ea00dd/Chapter11_MSPM_Multimode/MACHINE_Data.mat


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/Metal_etch_complete_data_visualize.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          Training & Test data of Etch dataset
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #%% fetch data
10 | import scipy.io
11 | 
12 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
13 | Etch_data = matlab_data['LAMDATA']
14 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
15 | test_dataAll = Etch_data[0,0].test
16 | 
17 | variable_names = Etch_data[0,0].variables
18 | 
19 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
20 | ##                          perform Multiway PCA
21 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
22 | 
23 | #%% generate unfolded data matrix
24 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
25 | n_samples = 85 # following the work of He et al.
26 | 
27 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
28 | for expt in range(calibration_dataAll.size):
29 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
30 |     
31 |     if calibration_expt.shape[0] < 85:
32 |         continue
33 |     
34 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
35 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
36 | 
37 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
38 | 
39 | #%% generate unfolded test data matrix (with only 85 samples)
40 | unfolded_TestdataMatrix = np.empty((1,n_vars*n_samples))
41 | for expt in range(test_dataAll.size):
42 |     test_expt = test_dataAll[expt,0][5:90,2:]
43 |     
44 |     if test_expt.shape[0] < 85:
45 |         continue
46 |     
47 |     unfolded_row = np.ravel(test_expt, order='F')[np.newaxis,:]
48 |     unfolded_TestdataMatrix = np.vstack((unfolded_TestdataMatrix, unfolded_row))
49 | 
50 | unfolded_TestdataMatrix = unfolded_TestdataMatrix[1:,:]
51 | 
52 | #%% scale data
53 | from sklearn.preprocessing import StandardScaler
54 | 
55 | scaler = StandardScaler()
56 | data_train_normal = scaler.fit_transform(unfolded_dataMatrix)
57 | data_test_normal = scaler.transform(unfolded_TestdataMatrix)
58 |            
59 | #%% PCA
60 | from sklearn.decomposition import PCA
61 | 
62 | pca = PCA(n_components = 3) # following the work of He et al.
63 | score_train = pca.fit_transform(data_train_normal)
64 | score_test = pca.transform(data_test_normal)
65 | 
66 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
67 | ##                          visualize in PCA score space
68 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
69 | 
70 | #%% visualize in 2D
71 | plt.figure()
72 | plt.scatter(score_train[:,0],score_train[:,1], c='blue', alpha=0.1)
73 | plt.scatter(score_test[:,0],score_test[:,1], c='red', marker = '*')
74 | plt.xlabel('PC1 scores')
75 | plt.ylabel('PC2 scores')
76 | 
77 | #%% visualize in 3D
78 | from mpl_toolkits.mplot3d import Axes3D
79 | fig = plt.figure()
80 | ax = Axes3D(fig)
81 | ax.scatter(score_train[:,0],score_train[:,1],score_train[:,2], c='blue', alpha=0.1)
82 | ax.scatter(score_test[:,0],score_test[:,1],score_test[:,2], c='red', marker = '*')
83 | ax.set_xlabel('PC1 scores')
84 | ax.set_ylabel('PC2 scores')
85 | ax.set_zlabel('PC3 scores')


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/k_means_clustering.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                          K-Means clustering of Etch data
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from sklearn.cluster import KMeans
  9 | 
 10 | #%% fetch data
 11 | import scipy.io
 12 | 
 13 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
 14 | Etch_data = matlab_data['LAMDATA']
 15 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
 16 | 
 17 | variable_names = Etch_data[0,0].variables
 18 | 
 19 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 20 | ##                          perform Multiway PCA
 21 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 22 | 
 23 | #%% generate unfolded data matrix
 24 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
 25 | n_samples = 85 # following the work of He et al.
 26 | 
 27 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
 28 | for expt in range(calibration_dataAll.size):
 29 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
 30 |     
 31 |     if calibration_expt.shape[0] < 85:
 32 |         continue
 33 |     
 34 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
 35 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
 36 | 
 37 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
 38 | 
 39 | #%% scale data
 40 | from sklearn.preprocessing import StandardScaler
 41 | 
 42 | scaler = StandardScaler()
 43 | data_train_normal = scaler.fit_transform(unfolded_dataMatrix)
 44 |            
 45 | #%% PCA
 46 | from sklearn.decomposition import PCA
 47 | 
 48 | pca = PCA(n_components = 3) # following the work of He et al.
 49 | score_train = pca.fit_transform(data_train_normal)
 50 | 
 51 | #%% visualize in 2D
 52 | plt.figure()
 53 | plt.scatter(score_train[:,0],score_train[:,1])
 54 | plt.xlabel('PC1 scores')
 55 | plt.ylabel('PC2 scores')
 56 | 
 57 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 58 | ##                          K-Means on PCA scores
 59 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 60 | 
 61 | #%% determining number of clusters via elbow method
 62 | SSEs = []
 63 | for n_cluster in range(1, 10):
 64 |     kmeans = KMeans(n_clusters = n_cluster, random_state = 100).fit(score_train)
 65 |     SSEs.append(kmeans.inertia_)
 66 | 
 67 | plt.figure()
 68 | plt.plot(range(1,10), SSEs, marker='o')
 69 | plt.xlabel('Number of clusters')
 70 | plt.ylabel('SSEs')
 71 | plt.show()
 72 | 
 73 | #%% fit k-means model
 74 | n_cluster = 3
 75 | kmeans = KMeans(n_clusters = n_cluster, random_state = 100).fit(score_train)
 76 | cluster_label = kmeans.predict(score_train) # can also use kmeans.labels_
 77 | 
 78 | plt.figure()
 79 | plt.scatter(score_train[:, 0], score_train[:, 1], c = cluster_label, s = 20, cmap = 'viridis')
 80 | plt.xlabel('PC1 scores')
 81 | plt.ylabel('PC2 scores')
 82 | 
 83 | cluster_centers = kmeans.cluster_centers_
 84 | cluster_plot_labels = ['Cluster ' + str(i+1) for i in range(n_cluster)]
 85 | for i in range(n_cluster):
 86 |     plt.scatter(cluster_centers[i,0], cluster_centers[i,1], c = 'red', s = 40, marker = '*', alpha = 0.9)
 87 |     plt.annotate(cluster_plot_labels[i], (cluster_centers[i,0], cluster_centers[i,1]))
 88 | 
 89 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 90 | ##                          Analyzing cluster quality via silhouette plots
 91 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 92 | 
 93 | #%% average silhouette score
 94 | from sklearn.metrics import silhouette_samples, silhouette_score
 95 | 
 96 | silhouette_avgValue = silhouette_score(score_train, cluster_label)
 97 | print('Average silhouette score is :', silhouette_avgValue)
 98 | 
 99 | #%% silhouette plot
100 | from matplotlib import cm
101 | 
102 | plt.figure()
103 | silhouette_values = silhouette_samples(score_train, cluster_label)
104 | y_lower, y_upper = 0, 0
105 | yticks = []
106 | for i in range(n_cluster):
107 |     cluster_silhouette_vals = silhouette_values[cluster_label == i]
108 |     cluster_silhouette_vals.sort()
109 |     
110 |     y_upper += len(cluster_silhouette_vals)
111 |     color = cm.nipy_spectral(i / n_cluster)
112 |     plt.barh(range(y_lower, y_upper),cluster_silhouette_vals,height=1.0,edgecolor='none',color=color)
113 |     
114 |     yticks.append((y_lower + y_upper) / 2)
115 |     y_lower += len(cluster_silhouette_vals)
116 | 
117 | plt.axvline(silhouette_avgValue, color="red", linestyle="--")
118 | plt.yticks(yticks, np.arange(n_cluster)+1)
119 | plt.xlabel('Silhouette coefficient values')
120 | plt.ylabel('Cluster')


--------------------------------------------------------------------------------
/Chapter11_MSPM_Multimode/k_means_failure.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          An illustration where K-Means clustering fails
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.cluster import KMeans
 9 | 
10 | #%% generate ellipsoidal shaped data
11 | from sklearn.datasets import make_blobs
12 | 
13 | n_samples = 1500
14 | X, y = make_blobs(n_samples=n_samples, random_state=100)
15 | 
16 | plt.figure()
17 | plt.scatter(X[:,0], X[:,1])
18 | 
19 | rotation_matrix = [[0.60, -0.70], [-0.5, 0.7]]
20 | X_transformed = np.dot(X, rotation_matrix)
21 | 
22 | plt.figure()
23 | plt.scatter(X_transformed[:,0], X_transformed[:,1])
24 | 
25 | #%% determining number of clusters via elbow method
26 | SSEs = []
27 | for n_cluster in range(1, 10):
28 |     kmeans = KMeans(n_clusters = n_cluster, random_state = 100).fit(X_transformed)
29 |     SSEs.append(kmeans.inertia_)
30 | 
31 | plt.figure()
32 | plt.plot(range(1,10), SSEs, marker='o')
33 | plt.xlabel('Number of clusters')
34 | plt.ylabel('SSEs')
35 | plt.show()
36 | 
37 | #%% fit k-means model
38 | n_cluster = 3
39 | kmeans = KMeans(n_clusters = n_cluster, random_state = 100).fit(X_transformed)
40 | cluster_label = kmeans.predict(X_transformed) # can also use kmeans.labels_
41 | 
42 | plt.figure()
43 | plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c = cluster_label, s=20, cmap='viridis')
44 | plt.xlabel('Variable 1')
45 | plt.ylabel('Variable 2')
46 | 
47 | cluster_centers = kmeans.cluster_centers_
48 | cluster_plot_labels = ['Cluster ' + str(i+1) for i in range(n_cluster)]
49 | for i in range(n_cluster):
50 |     plt.scatter(cluster_centers[i, 0], cluster_centers[i, 1], c='red', s=20, marker = '*', alpha=0.5)
51 |     plt.annotate(cluster_plot_labels[i], (cluster_centers[i,0], cluster_centers[i,1]))


--------------------------------------------------------------------------------
/Chapter12_SVM/Metal_etch_2DPCA_testData.csv:
--------------------------------------------------------------------------------
  1 | -1.590060264426986514e+01,1.959766933190950411e+01
  2 | -1.547209007164751782e+01,1.924510277047016871e+01
  3 | -1.572175965451248203e+01,1.812024393789404186e+01
  4 | -1.491358767453154854e+01,1.743093464871702736e+01
  5 | -1.456929132146523287e+01,1.677789603576111688e+01
  6 | -1.444835534419145873e+01,1.695678572983727861e+01
  7 | -1.408454239087918403e+01,1.760958541734841987e+01
  8 | -1.526897706994721204e+01,1.720419904104799258e+01
  9 | -1.404317902280828712e+01,1.492823556451696021e+01
 10 | -1.444869488334333596e+01,1.350007524485155308e+01
 11 | -1.538193166683978674e+01,1.649092300547190604e+01
 12 | -1.440788870600578520e+01,1.489717374688225426e+01
 13 | -1.465195399885972449e+01,1.503053334574198274e+01
 14 | -1.523574610361107240e+01,1.437237445913324407e+01
 15 | -1.480756533188320745e+01,1.394324735613628263e+01
 16 | -1.473682853600041653e+01,1.363135119134676465e+01
 17 | -1.388771542072477594e+01,1.530117335427214798e+01
 18 | -1.372026149411241747e+01,1.367799826914744443e+01
 19 | -1.385215377585273799e+01,1.389414397734873319e+01
 20 | -1.454891809260755053e+01,1.247368480749638486e+01
 21 | -1.339630817672150975e+01,1.134255947780724760e+01
 22 | -1.375942953532387847e+01,1.348794314870175981e+01
 23 | -1.379974406716709012e+01,1.090180744409004454e+01
 24 | -1.253032768611273795e+01,1.350006163340349197e+01
 25 | -1.311352603997067057e+01,1.177976171907386949e+01
 26 | -1.291244858566835951e+01,1.228225699018055650e+01
 27 | -1.282437159013584704e+01,1.186396260587084761e+01
 28 | -1.265952865712230135e+01,1.039147848533183804e+01
 29 | -1.313141596171455383e+01,1.178849730772767224e+01
 30 | -1.289663350311743883e+01,1.435611979807014471e+01
 31 | -1.306352043972883870e+01,1.252944062896454547e+01
 32 | -1.321191935853068422e+01,1.019836619965118629e+01
 33 | -1.097721961451119022e+01,9.049979573497141772e+00
 34 | -1.112686086462622193e+01,7.644722169015463642e+00
 35 | -1.713955935401163089e+01,-5.578327060160934714e+00
 36 | -1.592480916209403752e+01,-8.508265369385002330e+00
 37 | -1.471336363000017577e+01,-9.490933648576531212e+00
 38 | -1.412443410579964187e+01,-7.265822627898478991e+00
 39 | -1.428621258675190830e+01,-1.046416172884392815e+01
 40 | -1.434939051872324711e+01,-1.066675168384127304e+01
 41 | -1.318427628757119940e+01,-1.317179202987081688e+01
 42 | -1.399002077321009452e+01,-1.597314348991336175e+01
 43 | -1.264027961705749448e+01,-1.311115581850450873e+01
 44 | -1.167927667779478185e+01,-1.548375029057391572e+01
 45 | -1.318247005838433950e+01,-1.825629474455989509e+01
 46 | -1.209726233142010976e+01,-1.634187190569486958e+01
 47 | -1.362264645493182869e+01,-1.361133566942143069e+01
 48 | -1.272135838153200460e+01,-1.753879566898303821e+01
 49 | -1.257616012113411230e+01,-1.326542907798231674e+01
 50 | -1.196867099422467007e+01,-1.511423084952975238e+01
 51 | -1.177892808109381839e+01,-1.877335605902641902e+01
 52 | -1.149040204836195578e+01,-1.421199622215757330e+01
 53 | -1.326989198966397332e+01,-1.649822623080184769e+01
 54 | -1.267251862860992695e+01,-1.244267901794849251e+01
 55 | -1.233690606095980868e+01,-1.292273546574902809e+01
 56 | -1.198396769156340902e+01,-1.255460107767917499e+01
 57 | -1.457189230373988664e+01,-1.746780674466677041e+01
 58 | -1.282400686682571234e+01,-1.667625655239982763e+01
 59 | -1.236772533340117874e+01,-1.395597883448890819e+01
 60 | -1.106485330279897283e+01,-1.769380792570366268e+01
 61 | -1.188454558254445814e+01,-1.272441365623174470e+01
 62 | -1.307046469317520376e+01,-1.059438152709463488e+01
 63 | -1.324191789690385512e+01,-1.685020976613154176e+01
 64 | -1.081773207258006408e+01,-1.301146127447871415e+01
 65 | -1.163313829429259805e+01,-1.332801681904053126e+01
 66 | -1.089699194013976680e+01,-1.431002296554136244e+01
 67 | -9.998364341872099104e+00,-1.445167552353228047e+01
 68 | -1.012063687374656418e+01,-1.465249564077271849e+01
 69 | -1.129115140083126967e+01,-1.523611644720196523e+01
 70 | -1.227310939836927517e+01,-1.705765974945707342e+01
 71 | 2.711764588684072663e+01,2.434701313366004705e+00
 72 | 2.492495930045946650e+01,1.639960738169884413e+00
 73 | 2.426941596462805606e+01,1.789157086327504809e+00
 74 | 2.302252131973602189e+01,4.670294689111417874e+00
 75 | 2.430016884058502669e+01,7.000373847842803476e-01
 76 | 2.325421067446470857e+01,1.374405973782110824e+00
 77 | 2.394162895524445389e+01,1.590654444987497085e+00
 78 | 2.243544068599467067e+01,7.084884716643469194e-01
 79 | 2.320155943704745383e+01,1.621145130468390372e+00
 80 | 2.219829811702116373e+01,2.979719539352659652e+00
 81 | 2.385444875050709612e+01,1.055344306782977526e+00
 82 | 2.379140796780362876e+01,1.979767029457547789e+00
 83 | 2.354037730005953222e+01,2.249025394497004893e+00
 84 | 2.316393953191440502e+01,-3.522178580036868234e-01
 85 | 2.234564478714399627e+01,2.087072173607444014e+00
 86 | 2.350611634937974515e+01,3.009487178965839060e+00
 87 | 2.235786318301708064e+01,3.390229423734798075e+00
 88 | 2.260682729269416313e+01,1.470139858457315096e+00
 89 | 2.280940276560387758e+01,3.481589318628476093e+00
 90 | 2.480547493603340570e+01,3.577622759131037888e+00
 91 | 2.721710729624222225e+01,-1.127524601548393290e+00
 92 | 2.419269282598034465e+01,2.457877826750805017e+00
 93 | 2.632500091457416858e+01,-4.269781028722355209e+00
 94 | 2.711796923963038708e+01,-4.144848233506450152e-01
 95 | 2.682036697016721405e+01,6.401670610583477528e-01
 96 | 2.613154867476419341e+01,1.468532200569654458e-02
 97 | 2.675858719774519656e+01,-1.470567358353822796e+00
 98 | 2.743497861954157102e+01,1.940827016079831602e-01
 99 | 2.712352658167176500e+01,-1.047604743073245359e+00
100 | 2.764490604918904282e+01,-6.345610637026291023e-01
101 | 2.660992439681393051e+01,-2.392968975197947046e+00
102 | 2.757612357903460421e+01,-2.546628304632092110e+00
103 | 2.673582846881784647e+01,-1.389296149867934815e+00
104 | 2.714521821597164930e+01,-1.027199248376986784e-01
105 | 2.707042584472434754e+01,-4.092589228500159160e-01
106 | 2.828955505575184048e+01,-3.724282735181374093e+00
107 | 2.965352116386102921e+01,-2.178087890250238079e+00
108 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/Metal_etch_2DPCA_trainingData.csv:
--------------------------------------------------------------------------------
  1 | -1.590060264426986514e+01,1.959766933190950411e+01
  2 | -1.547209007164751782e+01,1.924510277047016871e+01
  3 | -1.572175965451248203e+01,1.812024393789404186e+01
  4 | -1.491358767453154854e+01,1.743093464871702736e+01
  5 | -1.456929132146523287e+01,1.677789603576111688e+01
  6 | -1.444835534419145873e+01,1.695678572983727861e+01
  7 | -1.408454239087918403e+01,1.760958541734841987e+01
  8 | -1.526897706994721204e+01,1.720419904104799258e+01
  9 | -1.404317902280828712e+01,1.492823556451696021e+01
 10 | -1.444869488334333596e+01,1.350007524485155308e+01
 11 | -1.538193166683978674e+01,1.649092300547190604e+01
 12 | -1.440788870600578520e+01,1.489717374688225426e+01
 13 | -1.465195399885972449e+01,1.503053334574198274e+01
 14 | -1.523574610361107240e+01,1.437237445913324407e+01
 15 | -1.480756533188320745e+01,1.394324735613628263e+01
 16 | -1.473682853600041653e+01,1.363135119134676465e+01
 17 | -1.388771542072477594e+01,1.530117335427214798e+01
 18 | -1.372026149411241747e+01,1.367799826914744443e+01
 19 | -1.385215377585273799e+01,1.389414397734873319e+01
 20 | -1.454891809260755053e+01,1.247368480749638486e+01
 21 | -1.339630817672150975e+01,1.134255947780724760e+01
 22 | -1.375942953532387847e+01,1.348794314870175981e+01
 23 | -1.379974406716709012e+01,1.090180744409004454e+01
 24 | -1.253032768611273795e+01,1.350006163340349197e+01
 25 | -1.311352603997067057e+01,1.177976171907386949e+01
 26 | -1.291244858566835951e+01,1.228225699018055650e+01
 27 | -1.282437159013584704e+01,1.186396260587084761e+01
 28 | -1.265952865712230135e+01,1.039147848533183804e+01
 29 | -1.313141596171455383e+01,1.178849730772767224e+01
 30 | -1.289663350311743883e+01,1.435611979807014471e+01
 31 | -1.306352043972883870e+01,1.252944062896454547e+01
 32 | -1.321191935853068422e+01,1.019836619965118629e+01
 33 | -1.097721961451119022e+01,9.049979573497141772e+00
 34 | -1.112686086462622193e+01,7.644722169015463642e+00
 35 | -1.713955935401163089e+01,-5.578327060160934714e+00
 36 | -1.592480916209403752e+01,-8.508265369385002330e+00
 37 | -1.471336363000017577e+01,-9.490933648576531212e+00
 38 | -1.412443410579964187e+01,-7.265822627898478991e+00
 39 | -1.428621258675190830e+01,-1.046416172884392815e+01
 40 | -1.434939051872324711e+01,-1.066675168384127304e+01
 41 | -1.318427628757119940e+01,-1.317179202987081688e+01
 42 | -1.399002077321009452e+01,-1.597314348991336175e+01
 43 | -1.264027961705749448e+01,-1.311115581850450873e+01
 44 | -1.167927667779478185e+01,-1.548375029057391572e+01
 45 | -1.318247005838433950e+01,-1.825629474455989509e+01
 46 | -1.209726233142010976e+01,-1.634187190569486958e+01
 47 | -1.362264645493182869e+01,-1.361133566942143069e+01
 48 | -1.272135838153200460e+01,-1.753879566898303821e+01
 49 | -1.257616012113411230e+01,-1.326542907798231674e+01
 50 | -1.196867099422467007e+01,-1.511423084952975238e+01
 51 | -1.177892808109381839e+01,-1.877335605902641902e+01
 52 | -1.149040204836195578e+01,-1.421199622215757330e+01
 53 | -1.326989198966397332e+01,-1.649822623080184769e+01
 54 | -1.267251862860992695e+01,-1.244267901794849251e+01
 55 | -1.233690606095980868e+01,-1.292273546574902809e+01
 56 | -1.198396769156340902e+01,-1.255460107767917499e+01
 57 | -1.457189230373988664e+01,-1.746780674466677041e+01
 58 | -1.282400686682571234e+01,-1.667625655239982763e+01
 59 | -1.236772533340117874e+01,-1.395597883448890819e+01
 60 | -1.106485330279897283e+01,-1.769380792570366268e+01
 61 | -1.188454558254445814e+01,-1.272441365623174470e+01
 62 | -1.307046469317520376e+01,-1.059438152709463488e+01
 63 | -1.324191789690385512e+01,-1.685020976613154176e+01
 64 | -1.081773207258006408e+01,-1.301146127447871415e+01
 65 | -1.163313829429259805e+01,-1.332801681904053126e+01
 66 | -1.089699194013976680e+01,-1.431002296554136244e+01
 67 | -9.998364341872099104e+00,-1.445167552353228047e+01
 68 | -1.012063687374656418e+01,-1.465249564077271849e+01
 69 | -1.129115140083126967e+01,-1.523611644720196523e+01
 70 | -1.227310939836927517e+01,-1.705765974945707342e+01
 71 | 2.711764588684072663e+01,2.434701313366004705e+00
 72 | 2.492495930045946650e+01,1.639960738169884413e+00
 73 | 2.426941596462805606e+01,1.789157086327504809e+00
 74 | 2.302252131973602189e+01,4.670294689111417874e+00
 75 | 2.430016884058502669e+01,7.000373847842803476e-01
 76 | 2.325421067446470857e+01,1.374405973782110824e+00
 77 | 2.394162895524445389e+01,1.590654444987497085e+00
 78 | 2.243544068599467067e+01,7.084884716643469194e-01
 79 | 2.320155943704745383e+01,1.621145130468390372e+00
 80 | 2.219829811702116373e+01,2.979719539352659652e+00
 81 | 2.385444875050709612e+01,1.055344306782977526e+00
 82 | 2.379140796780362876e+01,1.979767029457547789e+00
 83 | 2.354037730005953222e+01,2.249025394497004893e+00
 84 | 2.316393953191440502e+01,-3.522178580036868234e-01
 85 | 2.234564478714399627e+01,2.087072173607444014e+00
 86 | 2.350611634937974515e+01,3.009487178965839060e+00
 87 | 2.235786318301708064e+01,3.390229423734798075e+00
 88 | 2.260682729269416313e+01,1.470139858457315096e+00
 89 | 2.280940276560387758e+01,3.481589318628476093e+00
 90 | 2.480547493603340570e+01,3.577622759131037888e+00
 91 | 2.721710729624222225e+01,-1.127524601548393290e+00
 92 | 2.419269282598034465e+01,2.457877826750805017e+00
 93 | 2.632500091457416858e+01,-4.269781028722355209e+00
 94 | 2.711796923963038708e+01,-4.144848233506450152e-01
 95 | 2.682036697016721405e+01,6.401670610583477528e-01
 96 | 2.613154867476419341e+01,1.468532200569654458e-02
 97 | 2.675858719774519656e+01,-1.470567358353822796e+00
 98 | 2.743497861954157102e+01,1.940827016079831602e-01
 99 | 2.712352658167176500e+01,-1.047604743073245359e+00
100 | 2.764490604918904282e+01,-6.345610637026291023e-01
101 | 2.660992439681393051e+01,-2.392968975197947046e+00
102 | 2.757612357903460421e+01,-2.546628304632092110e+00
103 | 2.673582846881784647e+01,-1.389296149867934815e+00
104 | 2.714521821597164930e+01,-1.027199248376986784e-01
105 | 2.707042584472434754e+01,-4.092589228500159160e-01
106 | 2.828955505575184048e+01,-3.724282735181374093e+00
107 | 2.965352116386102921e+01,-2.178087890250238079e+00
108 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/SVDD_FaultDetection.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##           Process  Fault Detection via SVDD in metal etch dataset
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% read data
 6 | import numpy as np
 7 | 
 8 | X_train = np.loadtxt('Metal_etch_2DPCA_trainingData.csv', delimiter=',')
 9 | 
10 | #%% bandwidth via modified mean criteria
11 | import scipy.spatial
12 | 
13 | N = X_train.shape[0]
14 | phi = 1/np.log(N-1)
15 | delta = -0.14818008*np.power(phi,4) + 0.2846623624*np.power(phi,3) - 0.252853808*np.power(phi,2) + 0.159059498*phi - 0.001381145
16 | D2 = np.sum(scipy.spatial.distance.pdist(X_train, 'sqeuclidean'))/(N*(N-1)/2) # pdist computes pairwise distances between observations 
17 | sigma = np.sqrt(D2/np.log((N-1)/delta*delta))
18 | gamma = 1/(2*sigma*sigma)
19 | 
20 | #%% SVM fit
21 | from sklearn.svm import OneClassSVM
22 | 
23 | model = OneClassSVM(nu=0.01, gamma=0.025).fit(X_train) # nu corresponds to f 
24 | 
25 | #%% predict for test data
26 | X_test = np.loadtxt('Metal_etch_2DPCA_testData.csv', delimiter=',')
27 | y_test = model.predict(X_test) # y=-1 for outliers
28 | 
29 | print('Number of faults identified: ', np.sum(y_test == -1), ' out of ', len(y_test)) 
30 | 
31 | #%% plot SVDD boundaries
32 | import matplotlib.pyplot as plt
33 | 
34 | plt.figure()
35 | plt.scatter(X_train[:, 0], X_train[:, 1], edgecolors='k', alpha=0.8)
36 | plt.xlabel('PC1 scores')
37 | plt.ylabel('PC2 scores')
38 | 
39 | # get axis limits
40 | ax = plt.gca()
41 | xlim = ax.get_xlim()
42 | ylim = ax.get_ylim()
43 | 
44 | # create grid to evaluate model
45 | xx = np.linspace(xlim[0], xlim[1], 100)
46 | yy = np.linspace(ylim[0], ylim[1], 100)
47 | YY, XX = np.meshgrid(yy, xx)
48 | xy = np.vstack([XX.ravel(), YY.ravel()]).T
49 | Z = model.decision_function(xy).reshape(XX.shape)
50 | 
51 | # plot decision boundary and supporting planes
52 | ax.contour(XX, YY, Z, levels=[0], alpha=0.9, linestyles=['-'], colors=['red'])
53 | 
54 | #%% plot test data
55 | plt.scatter(X_test[y_test==-1, 0],X_test[y_test==-1,1], c='red', marker = '*', label='True Positive')
56 | plt.scatter(X_test[y_test==1, 0],X_test[y_test==1,1], c='magenta', marker = '*', label='False Negative')
57 | plt.legend()
58 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/SVDD_OneClassClassification.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##          Nonlinear boundary generation via One Class SVM / SVDD
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% generate data
 6 | import numpy as np
 7 | 
 8 | X = np.loadtxt('SVDD_toyDataset.csv', delimiter=',')
 9 | 
10 | #%% bandwidth via modified mean criteria
11 | import scipy.spatial
12 | 
13 | N = X.shape[0]
14 | phi = 1/np.log(N-1)
15 | delta = -0.14818008*np.power(phi,4) + 0.2846623624*np.power(phi,3) - 0.252853808*np.power(phi,2) + 0.159059498*phi - 0.001381145
16 | D2 = np.sum(scipy.spatial.distance.pdist(X, 'sqeuclidean'))/(N*(N-1)/2) # pdist computes pairwise distances between observations 
17 | sigma = np.sqrt(D2/np.log((N-1)/delta*delta))
18 | gamma = 1/(2*sigma*sigma)
19 | 
20 | #%% SVM fit
21 | from sklearn.svm import OneClassSVM
22 | 
23 | model = OneClassSVM(nu=0.01, gamma=5)
24 | model.fit(X) 
25 | 
26 | #%% plot SVM boundaries
27 | import matplotlib.pyplot as plt
28 | 
29 | plt.figure()
30 | plt.scatter(X[:, 0], X[:, 1], edgecolors='k', alpha=0.8)
31 | plt.xlabel('x1')
32 | plt.ylabel('x2')
33 | 
34 | # get axis limits
35 | ax = plt.gca()
36 | xlim = ax.get_xlim()
37 | ylim = ax.get_ylim()
38 | 
39 | # create grid to evaluate model
40 | xx = np.linspace(xlim[0], xlim[1], 100)
41 | yy = np.linspace(ylim[0], ylim[1], 100)
42 | YY, XX = np.meshgrid(yy, xx)
43 | xy = np.vstack([XX.ravel(), YY.ravel()]).T
44 | Z = model.decision_function(xy).reshape(XX.shape)
45 | 
46 | # plot decision boundary and supporting planes
47 | ax.contour(XX, YY, Z, levels=[0], alpha=0.9, linestyles=['-'], colors=['red'])


--------------------------------------------------------------------------------
/Chapter12_SVM/SVM_BinaryClassification.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                    Binary classification via SVM on toy dataset
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% read data
 6 | import numpy as np
 7 | 
 8 | data = np.loadtxt('toyDataset.csv', delimiter=',')
 9 | X = data[:, [0, 1]]; y = data[:, 2]
10 | 
11 | #%% scale model inputs
12 | from sklearn.preprocessing import StandardScaler
13 | 
14 | scaler = StandardScaler() 
15 | X_scaled = scaler.fit_transform(X) 
16 | 
17 | #%% fit SVM model
18 | from sklearn.svm import SVC # for large datasets LinearSVC class is preferable
19 | 
20 | model = SVC(kernel='linear', C=100)
21 | model.fit(X_scaled, y) # note that 
22 | 
23 | #%% get details of support vectors
24 | print('# of support vectors:', len(model.support_))
25 | 
26 | #%% plot SVM boundaries
27 | import matplotlib.pyplot as plt
28 | 
29 | plt.figure()
30 | plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
31 | 
32 | # get axis limits
33 | ax = plt.gca()
34 | xlim = ax.get_xlim()
35 | ylim = ax.get_ylim()
36 | 
37 | # create grid to evaluate model
38 | xx = np.linspace(xlim[0], xlim[1], 100)
39 | yy = np.linspace(ylim[0], ylim[1], 100)
40 | YY, XX = np.meshgrid(yy, xx)
41 | xy = np.vstack([XX.ravel(), YY.ravel()]).T
42 | Z = model.decision_function(xy).reshape(XX.shape)
43 | 
44 | # plot decision boundary and supporting planes
45 | ax.contour(XX, YY, Z, levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--'], colors=['green', 'red', 'green'])
46 | 
47 | # highlight support vectors
48 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=200, linewidth=2, alpha=0.25)
49 | 
50 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/SVM_Kernel_BinaryClassification.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##         Nonlinear binary classification via kernel SVM on toy dataset
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% generate data
 6 | import matplotlib.pyplot as plt
 7 | from sklearn.datasets import make_circles
 8 | 
 9 | X, y = make_circles(500, factor=.08, noise=.1, random_state=1)
10 | # note that y = 0,1 here and need not be +-1; SVM does internal transformation accordingly
11 | 
12 | # plot
13 | plt.figure()
14 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
15 | plt.xlabel('x1')
16 | plt.ylabel('x2')
17 | plt.title('raw data')
18 | 
19 | #%% find optimal hyperparameter via GridSearchCV
20 | from sklearn.svm import SVC
21 | from sklearn.model_selection import GridSearchCV
22 | 
23 | param_grid = {'C':[0.1, 1, 10, 100, 1000], 'gamma':[0.01, 0.1, 1, 10, 100]}
24 | gs = GridSearchCV(SVC(), param_grid, cv=5).fit(X, y) # no scaling required as input variables are already scaled
25 | 
26 | print('Optimal hyperparameter:', gs.best_params_)
27 | 
28 | #%% plot model predictions
29 | y_predicted = gs.predict(X)
30 | 
31 | # plot
32 | plt.figure()
33 | plt.scatter(X[:, 0], X[:, 1], c=y_predicted, cmap=plt.cm.Paired, edgecolors='k')
34 | plt.xlabel('x1')
35 | plt.ylabel('x2')
36 | plt.title('predictions')
37 | 
38 | #%% plot SVM boundaries
39 | plt.figure()
40 | plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
41 | 
42 | # get axis limits
43 | ax = plt.gca()
44 | xlim = ax.get_xlim()
45 | ylim = ax.get_ylim()
46 | 
47 | # create grid to evaluate model
48 | import numpy as np
49 | xx = np.linspace(xlim[0], xlim[1], 100)
50 | yy = np.linspace(ylim[0], ylim[1], 100)
51 | YY, XX = np.meshgrid(yy, xx)
52 | xy = np.vstack([XX.ravel(), YY.ravel()]).T
53 | Z = gs.decision_function(xy).reshape(XX.shape)
54 | 
55 | # plot decision boundary and supporting planes
56 | ax.contour(XX, YY, Z, levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--'], colors=['green', 'red', 'green'])


--------------------------------------------------------------------------------
/Chapter12_SVM/SVM_SoftMarginClassification.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                    Binary classification via SVM on toy dataset
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% read data
 6 | import numpy as np
 7 | 
 8 | data = np.loadtxt('toyDataset2.csv', delimiter=',')
 9 | X = data[:,0:2]; y = data[:,2]
10 | 
11 | #%% scale model inputs
12 | from sklearn.preprocessing import StandardScaler
13 | 
14 | scaler = StandardScaler() 
15 | X_scaled = scaler.fit_transform(X) 
16 | 
17 | #%% SVM fit
18 | from sklearn.svm import SVC
19 | 
20 | model = SVC(kernel='linear', C=100)
21 | model.fit(X_scaled, y)
22 | 
23 | #%% get details of support vectors
24 | print('# of support vectors:', len(model.support_)) 
25 | # The BAD sample lying on the wrong side of the support plane is also a support vector
26 | 
27 | #%% plot SVM boundaries
28 | import matplotlib.pyplot as plt
29 | 
30 | plt.figure()
31 | plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
32 | 
33 | # get axis limits
34 | ax = plt.gca()
35 | xlim = ax.get_xlim()
36 | ylim = ax.get_ylim()
37 | 
38 | # create grid to evaluate model
39 | xx = np.linspace(xlim[0], xlim[1], 100)
40 | yy = np.linspace(ylim[0], ylim[1], 100)
41 | YY, XX = np.meshgrid(yy, xx)
42 | xy = np.vstack([XX.ravel(), YY.ravel()]).T
43 | Z = model.decision_function(xy).reshape(XX.shape)
44 | 
45 | # plot decision boundary and supporting planes
46 | ax.contour(XX, YY, Z, levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--'], colors=['green', 'red', 'green'])
47 | 
48 | # highlight support vectors
49 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=200, linewidth=2, alpha=0.25)
50 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/toyDataset.csv:
--------------------------------------------------------------------------------
 1 | 1.178862847343031817e+00,1.043650985051199021e+00,-1.000000000000000000e+00
 2 | 1.009649746807200765e+00,8.136507296635508979e-01,-1.000000000000000000e+00
 3 | 9.722611797485600782e-01,9.645241020731013526e-01,-1.000000000000000000e+00
 4 | 9.917258518517539922e-01,9.372999323176152142e-01,-1.000000000000000000e+00
 5 | 9.956181831024071283e-01,9.522781969640496946e-01,-1.000000000000000000e+00
 6 | 8.686135246637317620e-01,1.088462238049958453e+00,-1.000000000000000000e+00
 7 | 1.088131804220753063e+00,1.170957306365294937e+00,-1.000000000000000000e+00
 8 | 1.005003364217686102e+00,9.595322585399108650e-01,-1.000000000000000000e+00
 9 | 9.454640052380469672e-01,8.453522684417031918e-01,-1.000000000000000000e+00
10 | 1.098236743425816009e+00,8.898932369888523652e-01,-1.000000000000000000e+00
11 | 8.814953472979827342e-01,9.794350100577459139e-01,-1.000000000000000000e+00
12 | 1.148614835507459020e+00,1.023671626722691297e+00,-1.000000000000000000e+00
13 | 8.976214860073531421e-01,9.287006799887950192e-01,-1.000000000000000000e+00
14 | 1.062524496616283010e+00,9.839486636813076226e-01,-1.000000000000000000e+00
15 | 9.231163649680770300e-01,9.769969277722061474e-01,-1.000000000000000000e+00
16 | 1.789406751968644516e+00,1.487133293975156256e+00,1.000000000000000000e+00
17 | 1.550705200525287486e+00,1.174829970657395695e+00,1.000000000000000000e+00
18 | 1.603548068650810787e+00,9.597100192185596956e-01,1.000000000000000000e+00
19 | 1.589144957396505298e+00,1.127134908698859572e+00,1.000000000000000000e+00
20 | 1.834877355074896244e+00,1.234170292063891949e+00,1.000000000000000000e+00
21 | 1.505205746499770347e+00,1.327601054272420589e+00,1.000000000000000000e+00
22 | 1.657247508866390495e+00,1.040823075565584954e+00,1.000000000000000000e+00
23 | 1.628402042997896038e+00,1.179368674437410780e+00,1.000000000000000000e+00
24 | 1.595134124268523967e+00,1.253565657843322079e+00,1.000000000000000000e+00
25 | 1.430209067890807262e+00,1.217868576218473331e+00,1.000000000000000000e+00
26 | 1.821582013026379343e+00,1.352335740914497819e+00,1.000000000000000000e+00
27 | 1.832982499992199088e+00,1.384326878638266978e+00,1.000000000000000000e+00
28 | 1.878505175839104702e+00,1.115803917871956319e+00,1.000000000000000000e+00
29 | 1.801500008846861789e+00,1.026693256526946429e+00,1.000000000000000000e+00
30 | 1.627653787519133699e+00,1.020263354791302257e+00,1.000000000000000000e+00
31 | 


--------------------------------------------------------------------------------
/Chapter12_SVM/toyDataset2.csv:
--------------------------------------------------------------------------------
 1 | 1.178862847343031817e+00,1.043650985051199021e+00,-1.000000000000000000e+00
 2 | 1.009649746807200765e+00,8.136507296635508979e-01,-1.000000000000000000e+00
 3 | 9.722611797485600782e-01,9.645241020731013526e-01,-1.000000000000000000e+00
 4 | 9.917258518517539922e-01,9.372999323176152142e-01,-1.000000000000000000e+00
 5 | 9.956181831024071283e-01,9.522781969640496946e-01,-1.000000000000000000e+00
 6 | 8.686135246637317620e-01,1.088462238049958453e+00,-1.000000000000000000e+00
 7 | 1.088131804220753063e+00,1.170957306365294937e+00,-1.000000000000000000e+00
 8 | 1.005003364217686102e+00,9.595322585399108650e-01,-1.000000000000000000e+00
 9 | 9.454640052380469672e-01,8.453522684417031918e-01,-1.000000000000000000e+00
10 | 1.098236743425816009e+00,8.898932369888523652e-01,-1.000000000000000000e+00
11 | 8.814953472979827342e-01,9.794350100577459139e-01,-1.000000000000000000e+00
12 | 1.148614835507459020e+00,1.023671626722691297e+00,-1.000000000000000000e+00
13 | 8.976214860073531421e-01,9.287006799887950192e-01,-1.000000000000000000e+00
14 | 1.062524496616283010e+00,9.839486636813076226e-01,-1.000000000000000000e+00
15 | 9.231163649680770300e-01,9.769969277722061474e-01,-1.000000000000000000e+00
16 | 1.789406751968644516e+00,1.487133293975156256e+00,1.000000000000000000e+00
17 | 1.550705200525287486e+00,1.174829970657395695e+00,1.000000000000000000e+00
18 | 1.603548068650810787e+00,9.597100192185596956e-01,1.000000000000000000e+00
19 | 1.589144957396505298e+00,1.127134908698859572e+00,1.000000000000000000e+00
20 | 1.834877355074896244e+00,1.234170292063891949e+00,1.000000000000000000e+00
21 | 1.505205746499770347e+00,1.327601054272420589e+00,1.000000000000000000e+00
22 | 1.657247508866390495e+00,1.040823075565584954e+00,1.000000000000000000e+00
23 | 1.628402042997896038e+00,1.179368674437410780e+00,1.000000000000000000e+00
24 | 1.595134124268523967e+00,1.253565657843322079e+00,1.000000000000000000e+00
25 | 1.430209067890807262e+00,1.217868576218473331e+00,1.000000000000000000e+00
26 | 1.821582013026379343e+00,1.352335740914497819e+00,1.000000000000000000e+00
27 | 1.832982499992199088e+00,1.384326878638266978e+00,1.000000000000000000e+00
28 | 1.878505175839104702e+00,1.115803917871956319e+00,1.000000000000000000e+00
29 | 1.801500008846861789e+00,1.026693256526946429e+00,1.000000000000000000e+00
30 | 1.627653787519133699e+00,1.020263354791302257e+00,1.000000000000000000e+00
31 | 1.699999999999999956e+00,1.100000000000000089e+00,-1.000000000000000000e+00
32 | 


--------------------------------------------------------------------------------
/Chapter13_DT_RF_Ensemble/RandomForest_FaultClassification_Boilers.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##         Fault classification via Random Forests for gas boilers
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import packages
 6 | import numpy as np, pandas as pd
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.preprocessing import LabelEncoder
 9 | from sklearn.model_selection import train_test_split
10 | 
11 | plt.rcParams.update({'font.size': 12})
12 | 
13 | #%% read data
14 | data = pd.read_csv('Boiler_emulator_dataset.txt', delimiter=',')
15 | 
16 | data.drop(data[data.Class == 'Lean'].index, inplace=True) # remove rows where Class is Lean
17 | input_data = data.iloc[:,[0,1,3,4]].values # dropping column Treturn
18 | output_label_text = data.iloc[:,-1]
19 | 
20 | # convert text labels to numeric labels
21 | le = LabelEncoder()
22 | le.fit(output_label_text)
23 | print(le.classes_)
24 | 
25 | output_labels = le.transform(output_label_text)
26 | 
27 | #check number of samples for each class
28 | unique_labels, counts = np.unique(output_labels, return_counts=True)
29 | 
30 | plt.figure()
31 | plt.bar(unique_labels, counts)
32 | plt.xlabel('Class'), plt.ylabel('Number of samples')
33 | plt.xticks(range(4) ,labels=le.classes_)
34 | plt.show()
35 | 
36 | #%% separate training and test data
37 | X_train, X_test, y_train, y_test = train_test_split(input_data, output_labels, test_size=0.3, stratify=output_labels, random_state=1)
38 | 
39 | #%% scale data
40 | from sklearn.preprocessing import StandardScaler
41 | 
42 | scaler = StandardScaler().fit(X_train)
43 | X_train_scaled = scaler.transform(X_train)
44 | X_test_scaled = scaler.transform(X_test)
45 | 
46 | #%% fit random forest model
47 | from sklearn.ensemble import RandomForestClassifier
48 | 
49 | clf = RandomForestClassifier()
50 | clf.fit(X_train_scaled, y_train)
51 | 
52 | y_train_pred = clf.predict(X_train_scaled)
53 | y_test_pred = clf.predict(X_test_scaled)
54 | 
55 | #%% generate and plot confusion matrix
56 | from sklearn.metrics import confusion_matrix
57 | import seaborn as sn
58 | 
59 | conf_mat = confusion_matrix(y_test, y_test_pred)
60 | 
61 | plt.figure(figsize=(12,8))
62 | sn.set(font_scale=2) # for label size
63 | sn.heatmap(conf_mat, fmt='.0f', annot=True, cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
64 | plt.ylabel('True Fault Class', fontsize=30, color='maroon')
65 | plt.xlabel('Predicted Fault Class', fontsize=30, color='green')
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/Chapter13_DT_RF_Ensemble/XGBoostFaultClassification_Boilers.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##         Fault classification via XGBoost for gas boilers
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import packages
 6 | import numpy as np, pandas as pd
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.preprocessing import LabelEncoder
 9 | from sklearn.model_selection import train_test_split
10 | 
11 | plt.rcParams.update({'font.size': 12})
12 | 
13 | #%% read data
14 | data = pd.read_csv('Boiler_emulator_dataset.txt', delimiter=',')
15 | 
16 | data.drop(data[data.Class == 'Lean'].index, inplace=True) # remove rows where Class is Lean
17 | input_data = data.iloc[:,[0,1,3,4]].values # dropping column Treturn
18 | output_label_text = data.iloc[:,-1]
19 | 
20 | # convert text labels to numeric labels
21 | le = LabelEncoder()
22 | le.fit(output_label_text)
23 | print(le.classes_)
24 | 
25 | output_labels = le.transform(output_label_text)
26 | 
27 | #check number of samples for each class
28 | unique_labels, counts = np.unique(output_labels, return_counts=True)
29 | 
30 | plt.figure()
31 | plt.bar(unique_labels, counts)
32 | plt.xlabel('Class'), plt.ylabel('Number of samples')
33 | plt.xticks(range(4) ,labels=le.classes_)
34 | plt.show()
35 | 
36 | #%% separate training and test data
37 | X_train, X_test, y_train, y_test = train_test_split(input_data, output_labels, test_size=0.3, stratify=output_labels)
38 | 
39 | #%% scale data
40 | from sklearn.preprocessing import StandardScaler
41 | 
42 | scaler = StandardScaler().fit(X_train)
43 | X_train_scaled = scaler.transform(X_train)
44 | X_test_scaled = scaler.transform(X_test)
45 | 
46 | #%% fit xgboost model
47 | import xgboost as xgb
48 | 
49 | clf = xgb.XGBClassifier(use_label_encoder=False)
50 | clf.fit(X_train_scaled, y_train)
51 | 
52 | y_train_pred = clf.predict(X_train_scaled)
53 | y_test_pred = clf.predict(X_test_scaled)
54 | 
55 | #%% generate and plot confusion matrix
56 | from sklearn.metrics import confusion_matrix
57 | import seaborn as sn
58 | 
59 | conf_mat = confusion_matrix(y_test, y_test_pred)
60 | 
61 | plt.figure(figsize=(12,8))
62 | sn.set(font_scale=2) # for label size
63 | sn.heatmap(conf_mat, fmt='.0f', annot=True, cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
64 | plt.ylabel('True Fault Class', fontsize=30, color='maroon')
65 | plt.xlabel('Predicted Fault Class', fontsize=30, color='green')
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/Chapter13_DT_RF_Ensemble/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter14_ProximityTechniques/FD-IF.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                Process Fault Detection via isolation forests
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.decomposition import PCA
 10 | from sklearn.pipeline import Pipeline
 11 | 
 12 | plt.rcParams.update({'font.size': 14})
 13 | 
 14 | #%% fetch data
 15 | import scipy.io
 16 | 
 17 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
 18 | Etch_data = matlab_data['LAMDATA']
 19 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
 20 | 
 21 | variable_names = Etch_data[0,0].variables
 22 | 
 23 | #%% generate unfolded data matrix
 24 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
 25 | n_samples = 85 # following the work of He et al.
 26 | 
 27 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
 28 | for expt in range(calibration_dataAll.size):
 29 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
 30 |     
 31 |     if calibration_expt.shape[0] < 85:
 32 |         continue
 33 |     
 34 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
 35 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
 36 | 
 37 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
 38 | 
 39 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 40 | ##               Fit PCA and use PC scores as model inputs
 41 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 42 | 
 43 | #%% scale data & fit PCA model via pipeline
 44 | pipe = Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components = 3))])
 45 | score_train = pipe.fit_transform(unfolded_dataMatrix)
 46 | 
 47 | #%% visualize in 2D
 48 | plt.figure()
 49 | plt.scatter(score_train[:,0],score_train[:,1])
 50 | plt.xlabel('PC1 scores')
 51 | plt.ylabel('PC2 scores')
 52 | 
 53 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 54 | ##                         Fit Isolation Forest model
 55 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 56 | from sklearn.ensemble import IsolationForest
 57 | 
 58 | IF_model = IsolationForest(contamination=0.05)
 59 | IF_model.fit(score_train)
 60 | 
 61 | IFscore_train = -IF_model.score_samples(score_train) # score_samples returns the anomaly score of the input samples. The lower, the more abnormal.
 62 | IF_CL = np.percentile(IFscore_train, 95)
 63 | 
 64 | #%% monitoring chart for training data
 65 | plt.figure()
 66 | plt.plot(IFscore_train, '*')
 67 | plt.plot([1,len(IFscore_train)],[IF_CL, IF_CL], color='red')
 68 | plt.xlabel('Sample #')
 69 | plt.ylabel('IF abnormality score values for training data')
 70 | plt.show()
 71 | 
 72 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 73 | ##                          test data
 74 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 75 | 
 76 | #%% fetch test data and unfold
 77 | test_dataAll = Etch_data[0,0].test
 78 | 
 79 | unfolded_TestdataMatrix = np.empty((1,n_vars*n_samples))
 80 | for expt in range(test_dataAll.size):
 81 |     test_expt = test_dataAll[expt,0][5:90,2:]
 82 |     
 83 |     if test_expt.shape[0] < 85:
 84 |         continue
 85 |     
 86 |     unfolded_row = np.ravel(test_expt, order='F')[np.newaxis,:]
 87 |     unfolded_TestdataMatrix = np.vstack((unfolded_TestdataMatrix, unfolded_row))
 88 | 
 89 | unfolded_TestdataMatrix = unfolded_TestdataMatrix[1:,:]
 90 | 
 91 | #%% scale and perform PCA on faulty test data
 92 | score_test = pipe.transform(unfolded_TestdataMatrix)
 93 | 
 94 | #%% IF_test
 95 | IFscore_test = -IF_model.score_samples(score_test)
 96 | print('Number of flagged faults (using control chart): ', np.sum(IFscore_test > IF_CL))
 97 | 
 98 | # use predict() function of IF class
 99 | print('Number of flagged faults (using predict function): ', np.sum(IF_model.predict(score_test) == -1))
100 | 
101 | #%% monitoring chart for test data
102 | plt.figure()
103 | plt.plot(IFscore_test, '*')
104 | plt.plot([1,len(IFscore_test)],[IF_CL, IF_CL], color='red')
105 | plt.xlabel('Sample #')
106 | plt.ylabel('IF abnormality values for test data')
107 | plt.show()
108 | 


--------------------------------------------------------------------------------
/Chapter14_ProximityTechniques/FD-KNN.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                     Process Fault Detection via KNN
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.decomposition import PCA
 10 | from sklearn.pipeline import Pipeline
 11 | 
 12 | #%% fetch data
 13 | import scipy.io
 14 | 
 15 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
 16 | Etch_data = matlab_data['LAMDATA']
 17 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
 18 | 
 19 | variable_names = Etch_data[0,0].variables
 20 | 
 21 | #%% generate unfolded data matrix
 22 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
 23 | n_samples = 85 # following the work of He et al.
 24 | 
 25 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
 26 | for expt in range(calibration_dataAll.size):
 27 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
 28 |     
 29 |     if calibration_expt.shape[0] < 85:
 30 |         continue
 31 |     
 32 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
 33 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
 34 | 
 35 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
 36 | 
 37 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 38 | ##               Fit PCA and use PC scores as model inputs
 39 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 40 | 
 41 | #%% scale data & fit PCA model via pipeline
 42 | pipe = Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components = 3))])
 43 | score_train = pipe.fit_transform(unfolded_dataMatrix)
 44 | 
 45 | #%% visualize in 2D
 46 | plt.figure()
 47 | plt.scatter(score_train[:,0],score_train[:,1])
 48 | plt.xlabel('PC1 scores')
 49 | plt.ylabel('PC2 scores')
 50 | 
 51 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 52 | ##        k-nearest neighbors of each training sample in score space
 53 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 54 | from sklearn.neighbors import NearestNeighbors
 55 | 
 56 | nbrs = NearestNeighbors(n_neighbors=6).fit(score_train) # a data-point is its own neighbor in training dataset
 57 | d2_nbrs, indices = nbrs.kneighbors(score_train)
 58 | d2_sqrd_nbrs = d2_nbrs**2
 59 | D2 = np.sum(d2_sqrd_nbrs, axis = 1)
 60 | D2_log = np.log(D2) 
 61 | 
 62 | # Compute D2_log control limit
 63 | D2_log_CL = np.percentile(D2_log,95)
 64 | 
 65 | #%% monitoring chart for training data
 66 | plt.figure()
 67 | plt.plot(D2_log, '*')
 68 | plt.plot([1,len(D2_log)],[D2_log_CL, D2_log_CL], color='red')
 69 | plt.xlabel('Sample #')
 70 | plt.ylabel('D2_log for training data')
 71 | plt.show()
 72 | 
 73 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 74 | ##                          test data
 75 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 76 | 
 77 | #%% fetch test data and unfold
 78 | test_dataAll = Etch_data[0,0].test
 79 | 
 80 | unfolded_TestdataMatrix = np.empty((1,n_vars*n_samples))
 81 | for expt in range(test_dataAll.size):
 82 |     test_expt = test_dataAll[expt,0][5:90,2:]
 83 |     
 84 |     if test_expt.shape[0] < 85:
 85 |         continue
 86 |     
 87 |     unfolded_row = np.ravel(test_expt, order='F')[np.newaxis,:]
 88 |     unfolded_TestdataMatrix = np.vstack((unfolded_TestdataMatrix, unfolded_row))
 89 | 
 90 | unfolded_TestdataMatrix = unfolded_TestdataMatrix[1:,:]
 91 | 
 92 | #%% scale and PCA on faulty test data
 93 | score_test = pipe.transform(unfolded_TestdataMatrix)
 94 | 
 95 | #%% D2_log_test
 96 | d2_nbrs_test, indices = nbrs.kneighbors(score_test)
 97 | d2_nbrs_test = d2_nbrs_test[:,0:5] # we want only 5 nearest  neighbors
 98 | d2_sqrd_nbrs_test = d2_nbrs_test**2
 99 | D2_test = np.sum(d2_sqrd_nbrs_test, axis = 1)
100 | D2_log_test = np.log(D2_test)
101 | 
102 | #%% monitoring chart for test data
103 | plt.figure()
104 | plt.plot(D2_log_test, '*')
105 | plt.plot([1,len(D2_log_test)],[D2_log_CL, D2_log_CL], color='red')
106 | plt.xlabel('Sample #')
107 | plt.ylabel('D2_log for test data')
108 | plt.show()
109 | 


--------------------------------------------------------------------------------
/Chapter14_ProximityTechniques/FD-LOF.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                     Process Fault Detection via LOF
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.decomposition import PCA
 10 | from sklearn.pipeline import Pipeline
 11 | 
 12 | plt.rcParams.update({'font.size': 14})
 13 | 
 14 | #%% fetch data
 15 | import scipy.io
 16 | 
 17 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
 18 | Etch_data = matlab_data['LAMDATA']
 19 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
 20 | 
 21 | variable_names = Etch_data[0,0].variables
 22 | 
 23 | #%% generate unfolded data matrix
 24 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
 25 | n_samples = 85 # following the work of He et al.
 26 | 
 27 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
 28 | for expt in range(calibration_dataAll.size):
 29 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
 30 |     
 31 |     if calibration_expt.shape[0] < 85:
 32 |         continue
 33 |     
 34 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
 35 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
 36 | 
 37 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
 38 | 
 39 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 40 | ##               Fit PCA and use PC scores as model inputs
 41 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 42 | 
 43 | #%% scale data & fit PCA model via pipeline
 44 | pipe = Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components = 3))])
 45 | score_train = pipe.fit_transform(unfolded_dataMatrix)
 46 | 
 47 | #%% visualize in 2D
 48 | plt.figure()
 49 | plt.scatter(score_train[:,0],score_train[:,1])
 50 | plt.xlabel('PC1 scores')
 51 | plt.ylabel('PC2 scores')
 52 | 
 53 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 54 | ##                         Fit LOF model
 55 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 56 | from sklearn.neighbors import LocalOutlierFactor
 57 | 
 58 | lof_model = LocalOutlierFactor(n_neighbors=5, novelty=True, contamination=0.05)
 59 | lof_model.fit(score_train)
 60 | 
 61 | lof_train = -lof_model.negative_outlier_factor_ # negative_outlier_factor_ gives the opposite LOF of the training samples
 62 | lof_CL = -lof_model.offset_
 63 | 
 64 | #%% monitoring chart for training data
 65 | plt.figure()
 66 | plt.plot(lof_train, '*')
 67 | plt.plot([1,len(lof_train)],[lof_CL, lof_CL], color='red')
 68 | plt.xlabel('Sample #')
 69 | plt.ylabel('LOF values for training data')
 70 | plt.show()
 71 | 
 72 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 73 | ##                          test data
 74 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 75 | 
 76 | #%% fetch test data and unfold
 77 | test_dataAll = Etch_data[0,0].test
 78 | 
 79 | unfolded_TestdataMatrix = np.empty((1,n_vars*n_samples))
 80 | for expt in range(test_dataAll.size):
 81 |     test_expt = test_dataAll[expt,0][5:90,2:]
 82 |     
 83 |     if test_expt.shape[0] < 85:
 84 |         continue
 85 |     
 86 |     unfolded_row = np.ravel(test_expt, order='F')[np.newaxis,:]
 87 |     unfolded_TestdataMatrix = np.vstack((unfolded_TestdataMatrix, unfolded_row))
 88 | 
 89 | unfolded_TestdataMatrix = unfolded_TestdataMatrix[1:,:]
 90 | 
 91 | #%% scale and perform PCA on faulty test data
 92 | score_test = pipe.transform(unfolded_TestdataMatrix)
 93 | 
 94 | #%% lof_test
 95 | lof_test = -lof_model.score_samples(score_test)
 96 | print('Number of flagged faults (using control chart): ', np.sum(lof_test > lof_CL))
 97 | 
 98 | # can also use predict() function of LOF class to flag faulty samples
 99 | print('Number of flagged faults (using predict function): ', np.sum(lof_model.predict(score_test) == -1))
100 | 
101 | #%% monitoring chart for test data
102 | plt.figure()
103 | plt.plot(lof_test, '*')
104 | plt.plot([1,len(lof_test)],[lof_CL, lof_CL], color='red')
105 | plt.xlabel('Sample #')
106 | plt.ylabel('LOF values for test data')
107 | plt.show()
108 | 


--------------------------------------------------------------------------------
/Chapter14_ProximityTechniques/MACHINE_Data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ML-PSE/Machine_Learning_for_PM_and_PdM/96c21a8aeb4177541ea79e13474e099cc5ea00dd/Chapter14_ProximityTechniques/MACHINE_Data.mat


--------------------------------------------------------------------------------
/Chapter14_ProximityTechniques/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter15_FDD_Supervised_ANN/CCPP_FFNN.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                         FFNN modeling of CCPP
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np, pandas as pd
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #%% read data
10 | data = pd.read_excel('Folds5x2_pp.xlsx', usecols = 'A:E').values
11 | X = data[:,0:4]
12 | y = data[:,4][:,np.newaxis]
13 | 
14 | #%% separate train and test data
15 | from sklearn.model_selection import train_test_split
16 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 100)
17 | 
18 | #%% scale data
19 | from sklearn.preprocessing import StandardScaler
20 | 
21 | X_scaler = StandardScaler()
22 | X_train_scaled = X_scaler.fit_transform(X_train)
23 | X_test_scaled = X_scaler.transform(X_test)
24 | 
25 | y_scaler = StandardScaler()
26 | y_train_scaled = y_scaler.fit_transform(y_train)
27 | y_test_scaled = y_scaler.transform(y_test)
28 | 
29 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30 | ##                          Define & Fit FFNN model
31 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
32 | 
33 | #%% import Keras libraries
34 | from tensorflow.keras import Sequential
35 | from tensorflow.keras.layers import Dense
36 | 
37 | #%% define model
38 | model = Sequential()
39 | model.add(Dense(8, activation='relu', kernel_initializer='he_normal', input_shape=(4,))) # 8 neurons in 1st hidden layer; this hidden layer accepts data from a 4 dimensional input
40 | model.add(Dense(5, activation='relu', kernel_initializer='he_normal')) # 5 neurons in 2nd layer
41 | model.add(Dense(1)) # output layer
42 | 
43 | #%% compile model
44 | model.compile(loss='mse', optimizer='Adam') # mean-squared error is to be minimized
45 | 
46 | #%% fit model
47 | model.fit(X_train_scaled, y_train_scaled, epochs=25, batch_size=50)
48 | 
49 | #%% predict y_test
50 | y_test_scaled_pred = model.predict(X_test_scaled)
51 | y_test_pred = y_scaler.inverse_transform(y_test_scaled_pred)
52 | 
53 | plt.figure()
54 | plt.plot(y_test, y_test_pred, '*')
55 | plt.xlabel('y_test')
56 | plt.ylabel('y_test_pred')
57 | 
58 | #%% metrics
59 | from sklearn.metrics import r2_score
60 | print('R2:', r2_score(y_test, y_test_pred))
61 | 
62 | #%% model summary
63 | model.summary()


--------------------------------------------------------------------------------
/Chapter15_FDD_Supervised_ANN/Folds5x2_pp.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ML-PSE/Machine_Learning_for_PM_and_PdM/96c21a8aeb4177541ea79e13474e099cc5ea00dd/Chapter15_FDD_Supervised_ANN/Folds5x2_pp.xlsx


--------------------------------------------------------------------------------
/Chapter15_FDD_Supervised_ANN/debutanizer_FaultDetection_FFNN.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##              FFNN model with debutanizer data and fault detection
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | #%% random number seed for result reproducibility 
 10 | from numpy.random import seed
 11 | seed(1)
 12 | import tensorflow
 13 | tensorflow.random.set_seed(2)
 14 | 
 15 | #%% read data
 16 | data = np.loadtxt('debutanizer_data_withFault.txt') # (Drift) fault starts from last 200 sample onwards
 17 | 
 18 | #%% separate training and test data
 19 | data_train = data[:-300,:]
 20 | data_test = data[-300:,:]
 21 | 
 22 | X_train, y_train = data_train[:,0:-1], data_train[:,-1][:,np.newaxis]
 23 | X_test, y_test = data_test[:,0:-1], data_test[:,-1][:,np.newaxis]
 24 | 
 25 | #%% separate estimation and validation data
 26 | from sklearn.model_selection import train_test_split
 27 | 
 28 | X_est, X_val, y_est, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state = 100)
 29 | 
 30 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 31 | ##                          Fit FFNN model
 32 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 33 | 
 34 | # import packages
 35 | from tensorflow.keras import Sequential
 36 | from tensorflow.keras.layers import Dense
 37 | from tensorflow.keras import regularizers
 38 | from tensorflow.keras.callbacks import EarlyStopping
 39 | from tensorflow.keras.optimizers import Adam
 40 | 
 41 | #%% define model
 42 | model = Sequential()
 43 | model.add(Dense(40, kernel_regularizer=regularizers.L1(0.000001), activation='relu', kernel_initializer='he_normal', input_shape=(7,)))
 44 | model.add(Dense(20, kernel_regularizer=regularizers.L1(0.000001), activation='relu', kernel_initializer='he_normal'))
 45 | model.add(Dense(1, kernel_regularizer=regularizers.L1(0.000001)))
 46 | 
 47 | #%% compile model
 48 | model.compile(loss='mse', optimizer=Adam(learning_rate=0.005))
 49 | model.summary()
 50 | 
 51 | #%% fit model
 52 | es = EarlyStopping(monitor='val_loss', patience=50)
 53 | history = model.fit(X_est, y_est, epochs=2000, batch_size=64, validation_data=(X_val, y_val), callbacks=es)
 54 | 
 55 | #%% plot validation curve
 56 | plt.figure()
 57 | plt.title('Validation Curves')
 58 | plt.xlabel('Epoch')
 59 | plt.ylabel('MSE')
 60 | plt.plot(history.history['loss'], label='fitting')
 61 | plt.plot(history.history['val_loss'], label='val')
 62 | plt.legend()
 63 | plt.grid()
 64 | plt.show()
 65 | 
 66 | #%% predict C4 content
 67 | y_test_pred = model.predict(X_test)
 68 | y_val_pred = model.predict(X_val)
 69 | y_est_pred = model.predict(X_est)
 70 | y_train_pred = model.predict(X_train)
 71 | 
 72 | # metrics
 73 | from sklearn.metrics import r2_score
 74 | print('R2 for validation data:', r2_score(y_val, y_val_pred))
 75 | print('R2 for fitting data:', r2_score(y_est, y_est_pred))
 76 | 
 77 | #%% plots of raw and predicted data
 78 | plt.figure()
 79 | plt.plot(y_test, 'b', label='Raw data')
 80 | plt.plot(y_test_pred, 'r', label='FFNN prediction')
 81 | plt.ylabel('C4 content (test data)')
 82 | plt.xlabel('Sample #')
 83 | plt.legend()
 84 | 
 85 | plt.figure()
 86 | plt.plot(y_val, 'b', label='Raw data')
 87 | plt.plot(y_val_pred, 'r', label='FFNN prediction')
 88 | plt.ylabel('C4 content (validation data)')
 89 | plt.xlabel('Sample #')
 90 | plt.legend()
 91 | 
 92 | plt.figure()
 93 | plt.plot(y_train, 'b', label='Raw data')
 94 | plt.plot(y_train_pred, 'r', label='FFNN prediction')
 95 | plt.ylabel('C4 content (training data)')
 96 | plt.xlabel('Sample #')
 97 | plt.legend()
 98 | 
 99 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
100 | ##                Monitoring statistics for training samples
101 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
102 | # Q metric for training samples
103 | error_train = y_train - y_train_pred
104 | Q_train = np.sum(error_train*error_train, axis = 1)
105 | Q_CL = np.percentile(Q_train, 95)
106 | 
107 | # Q_train plot with CL
108 | plt.figure()
109 | plt.plot(Q_train, color='black')
110 | plt.plot([1,len(Q_train)],[Q_CL,Q_CL], linestyle='--',color='red', linewidth=2)
111 | plt.xlabel('Sample #')
112 | plt.ylabel('Q metric: training data')
113 | plt.grid()
114 | plt.show()
115 | 
116 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
117 | ##                Monitoring statistics for test samples
118 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
119 | # Q metric for test samples
120 | error_test = y_test - y_test_pred
121 | Q_test = np.sum(error_test*error_test, axis = 1)
122 | 
123 | plt.figure()
124 | plt.plot(Q_test, color='black')
125 | plt.plot([1,len(Q_test)],[Q_CL,Q_CL], linestyle='--',color='red', linewidth=2)
126 | plt.xlabel('Sample #')
127 | plt.ylabel('Q metric: training data')
128 | plt.grid()
129 | plt.show()


--------------------------------------------------------------------------------
/Chapter15_FDD_Supervised_ANN/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter16_FDD_Unsupervised_ANN/Autoencoder_DimensionalityReduction_FCCU.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                Dimensionality reduction using Autoencoder
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np, pandas as pd, matplotlib.pyplot as plt
 7 | import tensorflow
 8 | tensorflow.random.set_seed(2)
 9 | 
10 | plt.rcParams.update({'font.size': 14})
11 | np.random.seed(1)
12 | 
13 | #%% read data
14 | X_train = pd.read_csv('NOC_varyingFeedFlow_outputs.csv', header=None).values
15 | X_train = X_train[:,1:] # first column contains timestamps
16 | 
17 | #%% split data into fitting and validation datasets
18 | from sklearn.model_selection import train_test_split
19 | X_fit, X_val, _, _ = train_test_split(X_train, X_train, test_size=0.2, random_state=10)
20 | 
21 | #%% scale data
22 | from sklearn.preprocessing import StandardScaler
23 | 
24 | scaler = StandardScaler()
25 | X_fit_scaled = scaler.fit_transform(X_fit)
26 | X_val_scaled = scaler.transform(X_val)
27 | X_train_scaled = scaler.transform(X_train)
28 |     
29 | #%% define and compile model
30 | from tensorflow.keras.layers import Input, Dense
31 | from tensorflow.keras.models import Model
32 | 
33 | input_layer = Input(shape=(X_fit_scaled.shape[1],)) # input layer
34 | encoded = Dense(1, activation='relu')(input_layer) # encoder layer
35 | decoded = Dense(X_fit_scaled.shape[1], activation='linear')(encoded) # decoder layer
36 | autoencoder = Model(inputs=input_layer, outputs=decoded)
37 | encoder = Model(inputs=input_layer, outputs=encoded)
38 | 
39 | # Compile autoencoder model
40 | autoencoder.compile(optimizer='adam', loss='mse')
41 | 
42 | # Print model summary
43 | autoencoder.summary()
44 | 
45 | #%% fit model
46 | from tensorflow.keras.callbacks import EarlyStopping
47 | 
48 | es = EarlyStopping(monitor='val_loss', patience=10)
49 | history = autoencoder.fit(X_fit_scaled, X_fit_scaled, epochs=300, batch_size=256, validation_data=(X_val_scaled, X_val_scaled), callbacks=es)
50 | 
51 | #%% plot validation curve
52 | plt.figure()
53 | plt.title('Validation Curves')
54 | plt.xlabel('Epoch')
55 | plt.ylabel('MSE')
56 | plt.plot(history.history['loss'], label='fitting loss')
57 | plt.plot(history.history['val_loss'], label='Validation Loss')
58 | plt.legend()
59 | plt.grid()
60 | plt.show()
61 | 
62 | #%% predict for overall training dataset
63 | X_train_scaled_pred = autoencoder.predict(X_train_scaled)
64 | X_train_pred = scaler.inverse_transform(X_train_scaled_pred)
65 | 
66 | # compare via plots
67 | plt.figure(figsize=[7,5])
68 | var = 7
69 | plt.plot(X_train[:,var],'seagreen', linewidth=1)
70 | plt.plot(X_train_pred[:,var],'red', linewidth=1)
71 | plt.xlabel('time (mins)')
72 | plt.ylabel('Furnace firebox temperature (T3) ')
73 | 
74 | plt.figure(figsize=[7,5])
75 | var = 21
76 | plt.plot(X_train[:,var],'seagreen', linewidth=1)
77 | plt.plot(X_train_pred[:,var],'red', linewidth=1)
78 | plt.xlabel('time (mins)')
79 | plt.ylabel('Feed temperature controller valve opening (V1)')
80 | 
81 | plt.figure(figsize=[7,5])
82 | var = 38
83 | plt.plot(X_train[:,var],'seagreen', linewidth=1)
84 | plt.plot(X_train_pred[:,var],'red', linewidth=1)
85 | plt.xlabel('time (mins)')
86 | plt.ylabel('Reflux flowrate')
87 | 
88 | #%% predict latents
89 | h_train = encoder.predict(X_train_scaled)
90 | 
91 | # plot
92 | plt.figure(figsize=[15,5])
93 | plt.plot(X_train_scaled[:,0],'seagreen', linewidth=1, label='Scaled actual feed flow')
94 | plt.plot(h_train, 'purple', linewidth=1, label='latent signal')
95 | plt.xlabel('time (mins)'), plt.ylabel('h')
96 | plt.legend()


--------------------------------------------------------------------------------
/Chapter16_FDD_Unsupervised_ANN/MACHINE_Data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ML-PSE/Machine_Learning_for_PM_and_PdM/96c21a8aeb4177541ea79e13474e099cc5ea00dd/Chapter16_FDD_Unsupervised_ANN/MACHINE_Data.mat


--------------------------------------------------------------------------------
/Chapter16_FDD_Unsupervised_ANN/SOM_visualization.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##            Semiconductor metal-etch data visualization using SOM
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.preprocessing import StandardScaler
 9 | from sklearn.decomposition import PCA
10 | from sklearn.pipeline import Pipeline
11 | 
12 | plt.rcParams.update({'font.size': 14})
13 | np.random.seed(1)
14 | 
15 | #%% fetch data
16 | import scipy.io
17 | 
18 | matlab_data = scipy.io.loadmat('MACHINE_Data.mat', struct_as_record = False)
19 | Etch_data = matlab_data['LAMDATA']
20 | calibration_dataAll = Etch_data[0,0].calibration # calibration_dataAll[i,0] corresponds to a 2D data from ith batch where columns correspond to different variables 
21 | 
22 | variable_names = Etch_data[0,0].variables
23 | 
24 | #%% generate unfolded data matrix
25 | n_vars = variable_names.size - 2 # first 2 columns are not process variables
26 | n_samples = 85 # following the work of He et al.
27 | 
28 | unfolded_dataMatrix = np.empty((1,n_vars*n_samples))
29 | for expt in range(calibration_dataAll.size):
30 |     calibration_expt = calibration_dataAll[expt,0][5:90,2:] # removing first 5 measurements as done in He et al.
31 |     
32 |     if calibration_expt.shape[0] < 85:
33 |         continue
34 |     
35 |     unfolded_row = np.ravel(calibration_expt, order='F')[np.newaxis,:]
36 |     unfolded_dataMatrix = np.vstack((unfolded_dataMatrix, unfolded_row))
37 | 
38 | unfolded_dataMatrix = unfolded_dataMatrix[1:,:]
39 | 
40 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
41 | ##               Fit PCA and use PC scores as model inputs
42 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
43 | 
44 | #%% scale data & fit PCA model via pipeline
45 | pipe = Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components = 3))])
46 | score_train = pipe.fit_transform(unfolded_dataMatrix)
47 | 
48 | #%% visualize in 2D
49 | plt.figure()
50 | plt.scatter(score_train[:,0],score_train[:,1])
51 | plt.xlabel('PC1 scores')
52 | plt.ylabel('PC2 scores')
53 | 
54 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
55 | ##                           fit SOM model
56 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
57 | from minisom import MiniSom    
58 | 
59 | N = score_train.shape[0]
60 | N_neurons = 5*np.sqrt(N) 
61 | som = MiniSom(np.floor(np.sqrt(N_neurons)).astype(int), np.floor(np.sqrt(N_neurons)).astype(int), score_train.shape[1], sigma=1.5, learning_rate=.7, activation_distance='euclidean',
62 |               topology='hexagonal', neighborhood_function='gaussian', random_seed=10)
63 | som.train(score_train, num_iteration=10000, verbose=True)
64 | 
65 | #%% plot U-matrix
66 | plt.figure(figsize=(9, 9))
67 | plt.pcolor(som.distance_map().T, cmap='bone_r')  # plotting the distance map as background
68 | plt.colorbar()
69 | plt.show()
70 | 
71 | #%% iteration vs errors during training
72 | # max_iter = 10000
73 | # Q_error = []
74 | # T_error = []
75 | 
76 | # som = MiniSom(np.floor(np.sqrt(N_neurons)).astype(int), np.floor(np.sqrt(N_neurons)).astype(int), score_train.shape[1], sigma=1.5, learning_rate=.7, activation_distance='euclidean',
77 | #               topology='hexagonal', neighborhood_function='gaussian', random_seed=10)
78 | 
79 | # for i in range(max_iter):
80 | #     print(i)
81 | #     rand_i = np.random.randint(len(score_train))
82 | #     som.update(score_train[rand_i], som.winner(score_train[rand_i]), i, max_iter)
83 | #     Q_error.append(som.quantization_error(score_train))
84 | #     T_error.append(som.topographic_error(score_train))
85 | 
86 | #
87 | # plt.figure(figsize=(10,5))
88 | # plt.plot(np.arange(max_iter), Q_error)
89 | # plt.ylabel('Quantization error')
90 | # plt.xlabel('iteration index')
91 | # plt.show()
92 | 
93 | # plt.figure(figsize=(10,5))
94 | # plt.plot(np.arange(max_iter), T_error)
95 | # plt.ylabel('Topographic error')
96 | # plt.xlabel('iteration index')
97 | # plt.show()


--------------------------------------------------------------------------------
/Chapter16_FDD_Unsupervised_ANN/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter17_VCM_SignalProcessing/Spectrogram_introduction.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                             Time-frequency decomposition
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | import numpy as np, matplotlib.pyplot as plt
 5 | 
 6 | fs = 1000 # 1000 Hz 
 7 | dt = 1.0/fs
 8 | t = np.arange(0,1,dt) 
 9 | 
10 | y1 = np.sin(2*np.pi*20*t ) # 20Hz component
11 | y2 = np.sin(2*np.pi*80*t) # 80Hz component
12 | y = np.hstack((y1,y2)) # sampled signal
13 | t = np.hstack((t, t[-1]+t))
14 | 
15 | plt.figure(figsize = (20, 3))
16 | plt.plot(t*1000, y, '-*',color='black', label='Samples')
17 | plt.ylabel('Amplitude (g)', fontsize=25), plt.xlabel('Time (ms)', fontsize=25)
18 | plt.grid()
19 | 
20 | #%% spectrogram
21 | from scipy import signal
22 | 
23 | f, t, Sxx = signal.stft(y, fs)
24 | 
25 | plt.figure(figsize=(8,4))
26 | plt.pcolormesh(t, f, np.abs(Sxx), shading='gouraud')
27 | plt.colorbar()
28 | plt.ylabel('Frequency [Hz]', fontsize=25), plt.xlabel('Time [sec]', fontsize=25)
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/Chapter17_VCM_SignalProcessing/WindTurbineVibration_TimeDomainFeatureExtraction.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##           Feature Extraction from a Wind Turbine Vibration signal
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | # packages
 6 | import numpy as np, matplotlib.pyplot as plt
 7 | import scipy.io
 8 | 
 9 | plt.rcParams.update({'font.size': 12})
10 | 
11 | #%% fetch data from all 50 files and plot 
12 | # import glob
13 | # FilenamesList = glob.glob('data-2013*.mat')
14 | 
15 | # plt.figure(figsize=(15,6))
16 | # x_pos_start = 0
17 | # for fileName in FilenamesList:
18 | #     matlab_data = scipy.io.loadmat(fileName, struct_as_record = False)
19 | #     vib_data = matlab_data['vibration'][:,0]
20 |     
21 | #     x_pos_end = x_pos_start + vib_data.shape[0]
22 | #     plt.plot(range(x_pos_start, x_pos_end), vib_data)
23 |       
24 | #     x_pos_start = x_pos_end # start position of next file data
25 | 
26 | # plt.xlabel('sample #')
27 | # plt.ylabel('Acceleration (g)')
28 | # plt.grid()
29 | 
30 | #%% fetch data for a waveform (file data-20130418T230803Z.mat randomly selected)
31 | matlab_data = scipy.io.loadmat('data-20130418T230803Z.mat', struct_as_record = False)
32 | vib_data = matlab_data['vibration'][:,0]
33 | 
34 | plt.figure(figsize=(15,6))
35 | plt.plot(vib_data, linewidth=0.2, color='black')
36 | plt.xlabel('sample #', fontsize=25), plt.ylabel('Acceleration (g)', fontsize=25)
37 | plt.grid()
38 | 
39 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
40 | ##                       Time domain features
41 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
42 | 
43 | #%% compute RMS, STD, peak, peak2peak
44 | N = len(vib_data)
45 | vibStd = np.std(vib_data, ddof=1, axis=0)                                 
46 | vibRMS = np.sqrt(np.sum(vib_data ** 2)/N)
47 | vibPeak = np.max(np.abs(vib_data))
48 | vibPeak2Peak = np.max(vib_data) - np.min(vib_data)
49 | 
50 | print('Standard deviation: ', vibStd)
51 | print('RMS: ', vibRMS)
52 | print('Peak: ', vibPeak)
53 | print('Peak-to-Peak: ', vibPeak2Peak)
54 | 
55 | #%% compute kurtosis, skewness
56 | from scipy.stats import kurtosis, skew
57 | 
58 | vibKurtosis = kurtosis(vib_data, fisher=False) 
59 | vibSkewness = skew(vib_data, axis=0)
60 | 
61 | print('Kurtosis: ', vibKurtosis)
62 | print('Skewness: ', vibSkewness) 
63 | 
64 | #%% rest of the time domain features
65 | vibMean = np.mean(vib_data)
66 | vibShapeFactor = vibRMS / (np.mean(np.abs(vib_data)))          
67 | vibCrestFactor = np.max(np.abs(vib_data)) / vibRMS
68 | vibImpulseFactor = np.max(np.abs(vib_data)) / (np.mean(np.abs(vib_data)))
69 | vibMarginFactor = np.max(np.abs(vib_data)) / (np.mean(np.sqrt(abs(vib_data))) ** 2)
70 | 
71 | print('Mean: ', vibMean)
72 | print('Shape Factor: ', vibShapeFactor)
73 | print('Crest Factor: ', vibCrestFactor)
74 | print('Impulse Factor: ', vibImpulseFactor)
75 | print('Margin Factor: ', vibMarginFactor)
76 | 
77 | 


--------------------------------------------------------------------------------
/Chapter17_VCM_SignalProcessing/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter17_VCM_SignalProcessing/spectrum_introduction.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                             FFT spectrum
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | import numpy as np, matplotlib.pyplot as plt
 5 | 
 6 | fs = 1000 # 1000 Hz 
 7 | dt = 1.0/fs
 8 | duration = 0.5 # 0.5 seconds
 9 | t = np.arange(0,0.5,dt) # sampling instants
10 | 
11 | y1 = np.sin(2*np.pi*50*t ) # 50Hz component
12 | y2 = 2.5*np.sin(2*np.pi*20*t) # 20Hz component
13 | y = y1+y2 # sampled signal
14 | 
15 | plt.figure(figsize = (20, 3))
16 | plt.plot(t*1000, y, color='black', label='original signal')
17 | plt.plot(t*1000, y, '-*',color='maroon', label='Samples')
18 | plt.ylabel('Amplitude (g)', fontsize=25), plt.xlabel('Time (ms)', fontsize=25)
19 | plt.grid()
20 | 
21 | #%% generate spectrum
22 | from scipy.fft import rfft, rfftfreq
23 | 
24 | N = len(t)
25 | Y_spectrum = rfft(y) 
26 | freq_spectrum = rfftfreq(N, dt)
27 | 
28 | plt.figure(figsize=(8,4))
29 | plt.plot(freq_spectrum, 2.0/N *np.abs(Y_spectrum), 'black')
30 | plt.ylabel('Amplitude (g)', fontsize=25), plt.xlabel('frequency (Hz)', fontsize=25)
31 | plt.grid()
32 | plt.show()


--------------------------------------------------------------------------------
/Chapter18_VCM_FaultDetectionDiagnosis/CWRU_SVM_FaultClassification.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##           Fault Classification for CWRU Motor Dataset using SVM
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | # Code has been adapted from the work (Copyright (c) 2022 Biswajit Sahoo) of Biswajit Sahoo (https://github.com/biswajitsahoo1111/cbm_codes_open) which is 
 5 | # shared under MIT License (https://github.com/biswajitsahoo1111/cbm_codes_open/blob/master/LICENSE.md)
 6 | 
 7 | '''
 8 | MIT License
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12 | '''
13 | 
14 | #%% packages
15 | import numpy as np, pandas as pd
16 | import matplotlib.pyplot as plt
17 | 
18 | from sklearn.model_selection import train_test_split
19 | from sklearn.preprocessing import StandardScaler
20 | from sklearn.model_selection import GridSearchCV
21 | 
22 | from sklearn.svm import SVC
23 | from sklearn.metrics import confusion_matrix
24 | import seaborn as sns
25 | 
26 | #%% read feature matrix
27 | featureData = pd.read_csv('feature_timeDomain_48k_2048_load_1HP.csv')
28 | print(featureData.head())
29 | featureData['faultType'] = pd.Categorical(featureData['faultType']) #designates last column as categorical variable
30 | 
31 | #%% generate training and test datasets
32 | train_data, test_data = train_test_split(featureData, test_size = 750, stratify = featureData['faultType'], random_state = 1234)
33 | print(train_data['faultType'].value_counts())
34 | print(test_data['faultType'].value_counts())
35 | 
36 | #%% scale data
37 | scaler = StandardScaler()
38 | train_data_scaled = scaler.fit_transform(train_data.iloc[:,:-1])
39 | test_data_scaled = scaler.transform(test_data.iloc[:,:-1])
40 | 
41 | #%% find best SVM hyperparameters via grid-search
42 | hyperParameters = {'C':[0.1, 1, 10, 50, 100, 300], 'gamma':[0.01, 0.05, 0.1, 0.5, 1, 10]}
43 | svm_clf = GridSearchCV(SVC(), hyperParameters, cv= 5)
44 | svm_clf.fit(train_data_scaled, train_data['faultType'])
45 | print(svm_clf.best_params_)
46 | 
47 | #%% predict fault classes
48 | train_pred = svm_clf.predict(train_data_scaled)
49 | test_pred = svm_clf.predict(test_data_scaled)
50 | 
51 | #%% generate confusion matrices
52 | CM_train = confusion_matrix(train_data['faultType'], train_pred)
53 | CM_test = confusion_matrix(test_data['faultType'], test_pred) 
54 | 
55 | fault_type = ['C1','C2','C3','C4','C5','C6','C7','C8','C9','C10'] # small labels for each fault type
56 | plt.figure(1,figsize=(20,10))
57 | plt.subplot(121)
58 | sns.heatmap(CM_train, annot= True, fmt = "d", xticklabels=fault_type, yticklabels=fault_type, cmap = "Blues", cbar = False)
59 | plt.title('Training Confusion Matrix', fontsize=25)
60 | plt.xlabel('Predicted fault type', fontsize=25)
61 | plt.ylabel('True fault type', fontsize=25)
62 | 
63 | plt.subplot(122)
64 | sns.heatmap(CM_train/155, annot= True, xticklabels=fault_type, yticklabels=fault_type, cmap = "Blues", cbar = False)
65 | # 155 is the number of samples for each fault type in training dataset 
66 | plt.title('Training Confusion Matrix (in %age)', fontsize=25)
67 | plt.xlabel('Predicted fault type', fontsize=25)
68 | plt.ylabel('True fault type', fontsize=25)
69 | plt.show()
70 | 
71 | plt.figure(2,figsize=(20,10))
72 | plt.subplot(121)
73 | sns.heatmap(CM_test, annot = True, xticklabels=fault_type, yticklabels=fault_type, cmap = "Blues", cbar = False)
74 | plt.title('Test Confusion Matrix', fontsize=25)
75 | plt.xlabel('Predicted fault type', fontsize=25)
76 | plt.ylabel('True fault type', fontsize=25)
77 | 
78 | plt.subplot(122)
79 | sns.heatmap(CM_test/75, annot = True, xticklabels=fault_type, yticklabels=fault_type, cmap = "Blues", cbar = False)
80 | plt.title('Test Confusion Matrix (in %age)', fontsize=25)
81 | plt.xlabel('Predicted fault type', fontsize=25)
82 | plt.ylabel('True fault type', fontsize=25)
83 | plt.show()
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/Chapter18_VCM_FaultDetectionDiagnosis/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter19_PrognosisConcepts/WindTurbine_HI.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##              Health Indicator Construction for a Wind Turbine
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | # packages
  6 | import numpy as np, matplotlib.pyplot as plt
  7 | import scipy.io
  8 | from scipy.stats import kurtosis, skew
  9 | 
 10 | plt.rcParams.update({'font.size': 12})
 11 | 
 12 | #%% function to compute time domain features
 13 | def timeDomainFeatures(VibData):
 14 |      N = len(VibData)
 15 |      vibMean = np.mean(VibData)                                                 
 16 |      vibStd = np.std(VibData, ddof=1, axis=0)                                 
 17 |      vibRMS = np.sqrt(np.sum(VibData ** 2)/N)
 18 |      vibPeak = np.max(np.abs(VibData))
 19 |      vibPeak2Peak = np.max(VibData) - np.min(VibData)
 20 |      vibSkewness = skew(VibData, axis=0)
 21 |      vibKurtosis = kurtosis(VibData, fisher=False)  
 22 |      vibShapeFactor = vibRMS / (np.mean(np.abs(VibData)))          
 23 |      vibCrestFactor = np.max(np.abs(VibData)) / vibRMS
 24 |      vibImpulseFactor = np.max(np.abs(VibData)) / (np.mean(np.abs(VibData)))
 25 |      vibMarginFactor = np.max(np.abs(VibData)) / (np.mean(np.sqrt(abs(VibData))) ** 2)
 26 | 
 27 |      features = np.array([vibMean, vibStd, vibRMS, vibPeak, vibPeak2Peak, vibSkewness, vibKurtosis, vibShapeFactor, vibCrestFactor, vibImpulseFactor, vibMarginFactor])
 28 |      return features
 29 | 
 30 | #%% fetch data from all 50 files and plot 
 31 | # import glob
 32 | # FilenamesList = glob.glob('data-2013*.mat')
 33 | 
 34 | # plt.figure(figsize=(15,6))
 35 | # x_pos_start = 0
 36 | # for fileName in FilenamesList:
 37 | #     matlab_data = scipy.io.loadmat(fileName, struct_as_record = False)
 38 | #     vib_data = matlab_data['vibration'][:,0]
 39 |     
 40 | #     x_pos_end = x_pos_start + vib_data.shape[0]
 41 | #     plt.plot(range(x_pos_start, x_pos_end), vib_data)
 42 |       
 43 | #     x_pos_start = x_pos_end # start position of next file data
 44 | 
 45 | # plt.xlabel('sample #')
 46 | # plt.ylabel('Acceleration (g)')
 47 | # plt.grid()
 48 | 
 49 | #%% collect feature values for each day 
 50 | import glob
 51 | FilenamesList = glob.glob('data-2013*.mat') # fetch names of all relevant files 
 52 | 
 53 | Nfeatures = 11
 54 | features50days = np.zeros((50, Nfeatures))
 55 | 
 56 | for i in range(len(FilenamesList)):
 57 |     matlab_data = scipy.io.loadmat(FilenamesList[i], struct_as_record = False) # read data from the file
 58 |     vib_data = matlab_data['vibration'][:,0]
 59 |     
 60 |     features = timeDomainFeatures(vib_data)
 61 |     features50days[i,:] = features
 62 | 
 63 | # plot kurtosis
 64 | plt.figure()
 65 | plt.plot(features50days[:,6], '-o')
 66 | plt.xlabel('day'), plt.ylabel('feature value')
 67 | plt.grid()
 68 | 
 69 | #%% smooth features using moving average
 70 | import pandas as pd
 71 | 
 72 | windowSize = 5
 73 | features50days_smoothed = pd.DataFrame(features50days).rolling(windowSize).mean().values
 74 | features50days_smoothed[:windowSize-1,:] = features50days[:windowSize-1,:]# replace nan in first 4 rows with original values
 75 | 
 76 | plt.figure()
 77 | plt.plot(features50days[:, 5], '-o', label='raw feature')
 78 | plt.plot(features50days_smoothed[:, 5], '-o', label='smoothed feature')
 79 | plt.xlabel('day'), plt.ylabel('feature value')
 80 | plt.title('Skewness')
 81 | plt.legend(), plt.grid()
 82 | 
 83 | #%% separate training data
 84 | Ndays_train = 32 # ~ 2/3rd
 85 | features_train = features50days_smoothed[:Ndays_train, :]
 86 | features_all = features50days_smoothed
 87 | 
 88 | #%% monotonicity of features
 89 | from scipy.stats import spearmanr
 90 | feature_monotonicity = np.zeros((Nfeatures,))
 91 | 
 92 | for feature in range(Nfeatures):
 93 |     result = spearmanr(range(Ndays_train), features_train[:,feature])
 94 |     feature_monotonicity[feature] = result.statistic
 95 | 
 96 | # bar plot
 97 | featureNames = ['Mean', 'Std', 'RMS', 'Peak', 'Peak2Peak', 'Skewness', 'Kurtosis', 'ShapeFactor', 'CrestFactor', 'ImpulseFactor', 'MarginFactor']
 98 | 
 99 | plt.figure(figsize=(15,5))
100 | plt.bar(range(Nfeatures), feature_monotonicity, tick_label=featureNames, color='navy', edgecolor='black')
101 | plt.xticks(rotation=45)
102 | plt.ylabel('Monotonicity')
103 | plt.grid(axis='y')
104 | 
105 | # pick features with monotonicity >= 0.7
106 | featuresSelected = np.where(np.abs(feature_monotonicity) >= 0.7)[0]
107 | selectFeatures_train = features_train[:,featuresSelected]
108 | selectFeatures_all = features_all[:,featuresSelected]
109 | 
110 | #%% perform PCA and extract scores along the first principal component
111 | from sklearn.decomposition import PCA
112 | from sklearn.preprocessing import StandardScaler
113 | 
114 | scaler = StandardScaler().fit(selectFeatures_train)
115 | selectFeatures_train_normal = scaler.transform(selectFeatures_train)
116 | selectFeatures_all_normal = scaler.transform(selectFeatures_all)
117 | 
118 | pca = PCA().fit(selectFeatures_train_normal)
119 | PCA_all_scores = pca.transform(selectFeatures_all_normal)
120 | 
121 | plt.figure()
122 | plt.plot(PCA_all_scores[:,1],'-o', color='darkorange')
123 | plt.xlabel('day'), plt.ylabel('PC1 Score as HI')
124 | plt.grid()
125 | 


--------------------------------------------------------------------------------
/Chapter19_PrognosisConcepts/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter20_RULEstimation/PM_truth.txt:
--------------------------------------------------------------------------------
  1 | 112 
  2 | 98 
  3 | 69 
  4 | 82 
  5 | 91 
  6 | 93 
  7 | 91 
  8 | 95 
  9 | 111 
 10 | 96 
 11 | 97 
 12 | 124 
 13 | 95 
 14 | 107 
 15 | 83 
 16 | 84 
 17 | 50 
 18 | 28 
 19 | 87 
 20 | 16 
 21 | 57 
 22 | 111 
 23 | 113 
 24 | 20 
 25 | 145 
 26 | 119 
 27 | 66 
 28 | 97 
 29 | 90 
 30 | 115 
 31 | 8 
 32 | 48 
 33 | 106 
 34 | 7 
 35 | 11 
 36 | 19 
 37 | 21 
 38 | 50 
 39 | 142 
 40 | 28 
 41 | 18 
 42 | 10 
 43 | 59 
 44 | 109 
 45 | 114 
 46 | 47 
 47 | 135 
 48 | 92 
 49 | 21 
 50 | 79 
 51 | 114 
 52 | 29 
 53 | 26 
 54 | 97 
 55 | 137 
 56 | 15 
 57 | 103 
 58 | 37 
 59 | 114 
 60 | 100 
 61 | 21 
 62 | 54 
 63 | 72 
 64 | 28 
 65 | 128 
 66 | 14 
 67 | 77 
 68 | 8 
 69 | 121 
 70 | 94 
 71 | 118 
 72 | 50 
 73 | 131 
 74 | 126 
 75 | 113 
 76 | 10 
 77 | 34 
 78 | 107 
 79 | 63 
 80 | 90 
 81 | 8 
 82 | 9 
 83 | 137 
 84 | 58 
 85 | 118 
 86 | 89 
 87 | 116 
 88 | 115 
 89 | 136 
 90 | 28 
 91 | 38 
 92 | 20 
 93 | 85 
 94 | 55 
 95 | 128 
 96 | 137 
 97 | 82 
 98 | 59 
 99 | 117 
100 | 20 
101 | 


--------------------------------------------------------------------------------
/Chapter20_RULEstimation/WindTurbine_RUL.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##              RUL Estimation for a Wind Turbine
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | # packages
  6 | import numpy as np, matplotlib.pyplot as plt
  7 | import scipy.io, scipy.optimize
  8 | from scipy.stats import kurtosis, skew
  9 | 
 10 | plt.rcParams.update({'font.size': 12})
 11 | 
 12 | #%% function to compute time domain features
 13 | def timeDomainFeatures(VibData):
 14 |      N = len(VibData)
 15 |      vibMean = np.mean(VibData)                                                 
 16 |      vibStd = np.std(VibData, ddof=1, axis=0)                                 
 17 |      vibRMS = np.sqrt(np.sum(VibData ** 2)/N)
 18 |      vibPeak = np.max(np.abs(VibData))
 19 |      vibPeak2Peak = np.max(VibData) - np.min(VibData)
 20 |      vibSkewness = skew(VibData, axis=0)
 21 |      vibKurtosis = kurtosis(VibData, fisher=False)  
 22 |      vibShapeFactor = vibRMS / (np.mean(np.abs(VibData)))          
 23 |      vibCrestFactor = np.max(np.abs(VibData)) / vibRMS
 24 |      vibImpulseFactor = np.max(np.abs(VibData)) / (np.mean(np.abs(VibData)))
 25 |      vibMarginFactor = np.max(np.abs(VibData)) / (np.mean(np.sqrt(abs(VibData))) ** 2)
 26 | 
 27 |      features = np.array([vibMean, vibStd, vibRMS, vibPeak, vibPeak2Peak, vibSkewness, vibKurtosis, vibShapeFactor, vibCrestFactor, vibImpulseFactor, vibMarginFactor])
 28 |      return features
 29 | 
 30 | #%% fetch data from all 50 files and plot 
 31 | # import glob
 32 | # FilenamesList = glob.glob('data-2013*.mat')
 33 | 
 34 | # plt.figure(figsize=(15,6))
 35 | # x_pos_start = 0
 36 | # for fileName in FilenamesList:
 37 | #     matlab_data = scipy.io.loadmat(fileName, struct_as_record = False)
 38 | #     vib_data = matlab_data['vibration'][:,0]
 39 |     
 40 | #     x_pos_end = x_pos_start + vib_data.shape[0]
 41 | #     plt.plot(range(x_pos_start, x_pos_end), vib_data)
 42 |       
 43 | #     x_pos_start = x_pos_end # start position of next file data
 44 | 
 45 | # plt.xlabel('sample #')
 46 | # plt.ylabel('Acceleration (g)')
 47 | # plt.grid()
 48 | 
 49 | #%% collect feature values for each day 
 50 | import glob
 51 | FilenamesList = glob.glob('data-2013*.mat') # fetch names of all relevant files 
 52 | 
 53 | Nfeatures = 11
 54 | features50days = np.zeros((50, Nfeatures))
 55 | 
 56 | for i in range(len(FilenamesList)):
 57 |     matlab_data = scipy.io.loadmat(FilenamesList[i], struct_as_record = False) # read data from the file
 58 |     vib_data = matlab_data['vibration'][:,0]
 59 |     
 60 |     features = timeDomainFeatures(vib_data)
 61 |     features50days[i,:] = features
 62 | 
 63 | # plot kurtosis
 64 | plt.figure()
 65 | plt.plot(features50days[:,6], '-o')
 66 | plt.xlabel('day'), plt.ylabel('feature value')
 67 | plt.title('Kurtosis')
 68 | plt.grid()
 69 | 
 70 | #%% smooth features using moving average
 71 | import pandas as pd
 72 | 
 73 | windowSize = 5
 74 | features50days_smoothed = pd.DataFrame(features50days).rolling(windowSize).mean().values
 75 | features50days_smoothed[:windowSize-1,:] = features50days[:windowSize-1,:]# replace nan in first 4 rows with original values
 76 | 
 77 | plt.figure()
 78 | plt.plot(features50days[:, 5], '-o', label='raw feature')
 79 | plt.plot(features50days_smoothed[:, 5], '-o', label='smoothed feature')
 80 | plt.xlabel('day'), plt.ylabel('feature value')
 81 | plt.title('Skewness')
 82 | plt.legend(), plt.grid()
 83 | 
 84 | #%% separate training data
 85 | Ndays_train = 32 # ~ 2/3rd
 86 | features_train = features50days_smoothed[:Ndays_train, :]
 87 | features_all = features50days_smoothed
 88 | 
 89 | #%% monotonicity of features
 90 | from scipy.stats import spearmanr
 91 | feature_monotonicity = np.zeros((Nfeatures,))
 92 | 
 93 | for feature in range(Nfeatures):
 94 |     result = spearmanr(range(Ndays_train), features_train[:,feature])
 95 |     feature_monotonicity[feature] = result.statistic
 96 | 
 97 | # bar plot
 98 | featureNames = ['Mean', 'Std', 'RMS', 'Peak', 'Peak2Peak', 'Skewness', 'Kurtosis', 'ShapeFactor', 'CrestFactor', 'ImpulseFactor', 'MarginFactor']
 99 | 
100 | plt.figure(figsize=(15,5))
101 | plt.bar(range(Nfeatures), feature_monotonicity, tick_label=featureNames, color='navy', edgecolor='black')
102 | plt.xticks(rotation=45)
103 | plt.ylabel('Monotonicity')
104 | plt.grid(axis='y')
105 | 
106 | # pick features with monotonicity >= 0.7
107 | featuresSelected = np.where(np.abs(feature_monotonicity) >= 0.7)[0]
108 | selectFeatures_train = features_train[:,featuresSelected]
109 | selectFeatures_all = features_all[:,featuresSelected]
110 | 
111 | #%% perform PCA and extract scores along the first principal component
112 | from sklearn.decomposition import PCA
113 | from sklearn.preprocessing import StandardScaler
114 | 
115 | scaler = StandardScaler().fit(selectFeatures_train)
116 | selectFeatures_train_normal = scaler.transform(selectFeatures_train)
117 | selectFeatures_all_normal = scaler.transform(selectFeatures_all)
118 | 
119 | pca = PCA().fit(selectFeatures_train_normal)
120 | PCA_all_scores = pca.transform(selectFeatures_all_normal)
121 | 
122 | plt.figure()
123 | plt.plot(PCA_all_scores[:,0],'-o', color='darkorange')
124 | plt.xlabel('day'), plt.ylabel('PC1 Score as HI')
125 | plt.grid()
126 | 
127 | #%% fit exponential model (HI = aexp(bt) + c); t is in days
128 | PCA_train_scores = pca.transform(selectFeatures_train_normal)
129 | HI_train = PCA_train_scores[:,0]
130 | HI_train = HI_train - HI_train[0]
131 | 
132 | def func(t, a, b):
133 |     return a*np.exp(b*t)
134 | 
135 | param_opt, param_cov = scipy.optimize.curve_fit(func, range(32), HI_train)
136 | 
137 | plt.figure(figsize=(5,3))
138 | plt.plot(range(32), HI_train,'-o', color='darkorange', label='actual HI')
139 | plt.plot(range(32), func(np.arange(32), *param_opt),'-o', color='maroon', label='predicted HI')
140 | plt.xlabel('day'), plt.ylabel('PC1 Score as HI')
141 | plt.legend()
142 | plt.grid()
143 | 
144 | #%% forecast using the degradation model
145 | HI_all = PCA_all_scores[:,0]
146 | HI_all = HI_all - HI_all[0]
147 | 
148 | plt.figure(figsize=(15,5))
149 | plt.plot(range(50), HI_all,'-o', color='darkorange', label='actual HI')
150 | plt.plot(range(50), func(np.arange(50), *param_opt),'-o', color='maroon', label='predicted HI')
151 | plt.plot([0,60], [HI_all[-1],HI_all[-1]],'--', color='red')
152 | plt.xlabel('day'), plt.ylabel('PC1 Score as HI')
153 | plt.legend()
154 | plt.grid()


--------------------------------------------------------------------------------
/Chapter20_RULEstimation/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter2_ScriptingEnvironment/NumpyBasics.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                    Numpy Basics
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | # create a 2D array
 5 | import numpy as np
 6 | 
 7 | arr2D = np.array([[1,4,6],[2,5,7]]) 
 8 | 
 9 | # getting information about arr2D
10 | print(arr2D.size) # returns 6, the no. of items
11 | print(arr2D.ndim) # returns 2, the no. of dimensions
12 | print(arr2D.shape) # returns tuple(2,3) corresponding to 2 rows & 3 columns
13 | 
14 | # create a 1D array
15 | arr1D = np.array([1,4,6]) 
16 | 
17 | # getting information about arr1D
18 | print(arr1D.size) # returns 3, the no. of items
19 | print(arr1D.ndim) # returns 1, the no. of dimensions
20 | print(arr1D.shape) # returns tuple(3,) corresponding to 3 items
21 | 
22 | #%% creating numpy arrays
23 | # creating sequence of numbers
24 | arr1 = np.arange(3, 6) # same as Python range function; results in array([3,4,5])
25 | arr2 = np.arange(3, 9, 2) # the 3rd argument defines the step size; results in array([3,5,7])
26 | arr3 = np.linspace(1,7,3) # creates evenly spaced 3 values from 1 to 7; results in array([1,4,7])
27 | 
28 | # creating special arrays
29 | arr4 = np.ones((2,1)) # array of shape (2,1) with all items as 1
30 | arr5 = np.zeros((2,2)) # all items as zero; often used as placeholder array at beginning of script
31 | arr6 = np.eye(2) # diagonal items as 1
32 | 
33 | # adding axis to existing arrays (e.g., converting 1D array to 2D array)
34 | print(arr1[:, np.newaxis])
35 | arr7 = arr1[:, None] # same as above
36 | 
37 | # combining / stacking arrays
38 | print(np.hstack((arr1, arr2))) # horizontally stacks passed arrays
39 | print(np.vstack((arr1, arr2))) # vertically stacks passed arrays
40 | print(np.hstack((arr5,arr4))) # array 4 added as a column into arr5
41 | print(np.vstack((arr5,arr6))) # rows of array 6 added onto arr5
42 | 
43 | #%% basic numpy functions
44 | print(arr2D.sum(axis=0))
45 | print(arr2D.sum(axis=1))
46 | 
47 | #%% indexing arrays
48 | # accessing individual items
49 | print(arr2D[1,2]) # returns 7
50 | 
51 | # slicing
52 | arr8 = np.arange(10).reshape((2,5)) # rearrange the 1D array into shape (2,5)
53 | print((arr8[0:1,1:3]))
54 | print((arr8[0,1:3])) # note that a 1D array is returned here instead of the 2D array above
55 | 
56 | # accessing entire row or column
57 | print(arr8[1]) # returns 2nd row as array([5,6,7,8,9]); same as arr8[1,:]
58 | print(arr8[:, 4]) # returns items of 3rd column as a 1D array 
59 | 
60 | # extract a subarray from arr8 and modify it
61 | arr8_sub = arr8[:, :2] # columns 0 and 1 from all rows
62 | arr8_sub[1, 1] = 1000
63 | print(arr8) # arr8 gets modified as well!! 
64 | 
65 | # use copy method for a separate copy
66 | arr8 = np.arange(10).reshape((2,5))
67 | arr8_sub2 = arr8[:, :2].copy()
68 | arr8_sub2[1, 1] = 100
69 | print(arr8)
70 | 
71 | # Fancy indexing
72 | # combination of simple and fancy indexing
73 | arr8_sub3 = arr8[:, [0, 1]] # note how columns are indexed via a list
74 | arr8_sub3[1, 1] = 100 # arr8_sub3 becomes same as arr8_sub2 but arr8 is not modified here
75 | print(arr8)
76 | 
77 | # use boolean mask to select subarray
78 | arr8_sub4 = arr8[arr8 > 5] # returns array([6,7,8,9]), i.e., all values > 5
79 | arr8_sub4[0] = 0 # again, arr8 is not affected
80 | print(arr8)
81 | 
82 | #%% vectorized operations
83 | vec1 = np.array([1,2,3,4])
84 | vec2 = np.array([5,6,7,8])
85 | vec_sum = vec1 + vec2 # returns array([6,8,10,12]); no need to loop through index 0 to 3
86 | 
87 | # slightly more complex operation (computing distance between vectors)
88 | vec_distance = np.sqrt(np.sum((vec1 - vec2)**2)) # vec_distance = 8.0
89 | 


--------------------------------------------------------------------------------
/Chapter2_ScriptingEnvironment/PandasBasics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Chapter: The Scripting Environment\n",
  8 |     "\n",
  9 |     "\n",
 10 |     "# Topic: Pandas Basics"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "name": "stdout",
 20 |      "output_type": "stream",
 21 |      "text": [
 22 |       "0    10\n",
 23 |       "1     8\n",
 24 |       "2     6\n",
 25 |       "dtype: int64\n",
 26 |       "   id  value\n",
 27 |       "0   1     10\n",
 28 |       "1   1      8\n",
 29 |       "2   1      6\n",
 30 |       "   id  value\n",
 31 |       "0   1     10\n",
 32 |       "1   1      8\n",
 33 |       "2   1      6\n"
 34 |      ]
 35 |     }
 36 |    ],
 37 |    "source": [
 38 |     "# create a series (1D structure)\n",
 39 |     "import pandas as pd\n",
 40 |     "\n",
 41 |     "data = [10,8,6]\n",
 42 |     "s = pd.Series(data) # can pass numpy array as well\n",
 43 |     "print(s)\n",
 44 |     "\n",
 45 |     "# create a dataframe\n",
 46 |     "data = [[1,10],[1,8],[1,6]]\n",
 47 |     "df = pd.DataFrame(data, columns=['id', 'value'])\n",
 48 |     "print(df)\n",
 49 |     "\n",
 50 |     "# dataframe from series\n",
 51 |     "s2 = pd.Series([1,1,1])\n",
 52 |     "df = pd.DataFrame({'id':s2, 'value':s})\n",
 53 |     "print(df) # same as above"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 3,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "0    1\n",
 66 |       "1    1\n",
 67 |       "2    1\n",
 68 |       "Name: id, dtype: int64\n",
 69 |       "0    1\n",
 70 |       "1    1\n",
 71 |       "2    1\n",
 72 |       "Name: id, dtype: int64\n",
 73 |       "   id\n",
 74 |       "0   1\n",
 75 |       "1   1\n",
 76 |       "2   1\n",
 77 |       "     id  value\n",
 78 |       "100   1     10\n",
 79 |       "101   1      8\n",
 80 |       "102   1      6\n",
 81 |       "id       1\n",
 82 |       "value    8\n",
 83 |       "Name: 101, dtype: int64\n",
 84 |       "id       1\n",
 85 |       "value    8\n",
 86 |       "Name: 101, dtype: int64\n",
 87 |       "8\n",
 88 |       "8\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "# data access\n",
 94 |     "# column(s) selection\n",
 95 |     "print(df['id']) # returns column 'id' as a series\n",
 96 |     "print(df.id) # same as above\n",
 97 |     "print(df[['id']]) # returns specified columns in the list as a dataframe\n",
 98 |     "\n",
 99 |     "# row selection\n",
100 |     "df.index = [100, 101, 102] # changing row indices from [0,1,2] to [100,101,102]\n",
101 |     "print(df)\n",
102 |     "print(df.loc[101]) # returns 2nd row as a series; can provide a list for multiple rows selection\n",
103 |     "print(df.iloc[1]) # integer location-based selection; same result as above\n",
104 |     "\n",
105 |     "# individual item selection\n",
106 |     "print(df.loc[101, 'value']) # returns 8\n",
107 |     "print(df.iloc[1, 1]) # same as above"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 4,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "name": "stdout",
117 |      "output_type": "stream",
118 |      "text": [
119 |       "     id  value\n",
120 |       "100   2     40\n",
121 |       "101   2     32\n",
122 |       "102   2     24\n",
123 |       "     id  value\n",
124 |       "100   1     10\n",
125 |       "101   1      8\n",
126 |       "102   1      6\n",
127 |       "100   2     40\n",
128 |       "101   2     32\n",
129 |       "102   2     24\n",
130 |       "    value\n",
131 |       "id       \n",
132 |       "1     8.0\n",
133 |       "2    32.0\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "# data aggregation exanple\n",
139 |     "# create another dataframe using df\n",
140 |     "df2 = df.copy()\n",
141 |     "df2.id = 2 # make all items in column 'id' as 2\n",
142 |     "df2.value *= 4 # multiply all items in column 'value' by 4\n",
143 |     "print(df2)\n",
144 |     "\n",
145 |     "# combine df and df2\n",
146 |     "df3 = df.append(df2) # a new object is retuned unlike Python’s append function\n",
147 |     "print(df3)\n",
148 |     "\n",
149 |     "# id-based mean values computation\n",
150 |     "print(df3.groupby('id').mean()) # returns a dataframe"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "# file I/O\n",
160 |     "# reading from excel and csv files\n",
161 |     "dataset1 = pd.read_excel('filename.xlsx') # several parameter  options are available to customize what data is read\n",
162 |     "dataset2 = pd.read_csv('filename.xlsx')"
163 |    ]
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "Python 3 (ipykernel)",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.9.7"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 2
187 | }
188 | 


--------------------------------------------------------------------------------
/Chapter2_ScriptingEnvironment/PandasBasics.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                    Pandas Basics
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | # create a series (1D structure)
 5 | import pandas as pd
 6 | 
 7 | data = [10,8,6]
 8 | s = pd.Series(data) # can pass numpy array as well
 9 | print(s)
10 | 
11 | # create a dataframe
12 | data = [[1,10],[1,8],[1,6]]
13 | df = pd.DataFrame(data, columns=['id', 'value'])
14 | print(df)
15 | 
16 | # dataframe from series
17 | s2 = pd.Series([1,1,1])
18 | df = pd.DataFrame({'id':s2, 'value':s})
19 | print(df)
20 | 
21 | #%% data access
22 | # column(s) selection
23 | print(df['id']) # returns column 'id' as a series
24 | print(df.id) # same as above
25 | print(df[['id']]) # returns specified columns in the list as a dataframe
26 | 
27 | # row selection
28 | df.index = [100, 101, 102] # changing row indices from [0,1,2] to [100,101,102]
29 | print(df)
30 | print(df.loc[101]) # returns 2nd row as a series; can provide a list for multiple rows selection
31 | print(df.iloc[1]) # integer location-based selection; same result as above
32 | 
33 | # individual item selection
34 | print(df.loc[101, 'value']) # returns 8
35 | print(df.iloc[1, 1]) # same as above
36 | 
37 | #%% data aggregation exanple
38 | # create another dataframe using df
39 | df2 = df.copy()
40 | df2.id = 2 # make all items in column 'id' as 2
41 | df2.value *= 4 # multiply all items in column 'value' by 4
42 | print(df2)
43 | 
44 | # combine df and df2
45 | df3 = df.append(df2) # a new object is retuned unlike Python’s append function
46 | print(df3)
47 | 
48 | # id-based mean values computation
49 | print(df3.groupby('id').mean()) # returns a dataframe
50 | 
51 | #%% file I/O
52 | # reading from excel and csv files
53 | dataset1 = pd.read_excel('filename.xlsx') # several parameter  options are available to customize what data is read
54 | dataset2 = pd.read_csv('filename.xlsx')
55 | 


--------------------------------------------------------------------------------
/Chapter2_ScriptingEnvironment/PythonBasics.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                    Python Basics
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% basic data types
 6 | i = 2 # integer; type(i) = int
 7 | f = 1.2 # floating-point number; type(f) = float
 8 | s = 'two' # string; type(s) = str
 9 | b = True # boolean; type(b) = bool
10 | 
11 | # basic operations
12 | print(i+2) # displays 4
13 | print(f*2) # displays 2.4
14 | print(not b)# displays False
15 | 
16 | #%% ordered sequences
17 | # different ways of creating lists
18 | list1 = [2,4,6]
19 | list2 = ['air',3,1,5]
20 | list3 = list(range(4)) # equals [0,1,2,3]; range function returns a sequence of numbers starting from 0 (default) with increments of 1 (default)
21 | list3.append(8) # returns [0,1,2,3,8];  append function adds new items to existing list
22 | list4 = list1 + list2 # equals [2,4,6,'air',3,1,5]
23 | list5 = [list2, list3] # nested list [['air', 3, 1, 5], [0, 1, 2, 3,8]]
24 | 
25 | # creating tuples
26 | tuple1 = (0,1,'two')
27 | tuple2 = (list1, list2) # equals ([2, 4, 6, 8], ['air', 3, 1, 5])
28 | 
29 | #%% list comprehension
30 | # return powers of list items
31 | newList1 = [item**2 for item in list3] # equals [0,1,4,9,64]
32 | # nested list comprehension
33 | newList2 = [item2**2 for item2 in [item**2 for item in list3]] # equals [0,1,16,81,4096]
34 | 
35 | #%% Indexing and slicing sequences
36 | # working with single item using positive or negative indexes
37 | print(list1[0]) # displays 2, the 1st item in list1
38 | list2[1] = 1 # list2 becomes ['air',1,1,5]
39 | print(list2[-2]) # displays 1, the 2nd last element in list2
40 | 
41 | # accessing multiple items through slicing
42 | # Syntax: givenList[start,stop,step]; if unspecified, start=0, stop=list length, step=1
43 | print(list4[0:3]) # displays [2,4,6], the 1st, 2nd, 3rd items; note that index 3 item is excluded
44 | print(list4[:3]) # same as above
45 | print(list4[4:len(list4)]) # displays [3,1,5]; len() function returns the number of items in list
46 | print(list4[4:]) # same as above
47 | print(list4[::3]) # displays [2, 'air', 5]
48 | print(list4[::-1]) # displays list 4 backwards [5, 1, 3, 'air', 6, 4, 2]
49 | list4[2:4] = [0,0,0] # list 4 becomes [2, 4, 0, 0, 0, 3, 1, 5]
50 | 
51 | #%% Execution control statements 
52 | # conditional execution
53 | # selectively execute code based on condition
54 | if list1[0] > 0:
55 |     list1[0] = 'positive'
56 | else:
57 |     list1[0] = 'negative'
58 |     
59 | # loop execution
60 | # code below computes sum of squares of numbers in list 3
61 | sum_of_squares = 0
62 | for i in range(len(list3)):
63 |     sum_of_squares += list3[i]**2
64 | 
65 | print(sum_of_squares) # displays 78
66 | 
67 | #%% custom functions
68 | # define function instructions
69 | def sumSquares(givenList):
70 |     sum_of_squares = 0
71 |     for i in range(len(givenList)):
72 |         sum_of_squares += givenList[i]**2
73 |     
74 |     return sum_of_squares
75 | 
76 | # call/re-use the custom function multiple times
77 | print(sumSquares(list3)) # displays 78
78 | print(sumSquares(list4)) # displays 55
79 | 
80 |  


--------------------------------------------------------------------------------
/Chapter2_ScriptingEnvironment/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter3_EDA/Dynamics_assessment.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                       Dynamics Assessment
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np, matplotlib.pyplot as plt
  7 | 
  8 | plt.rcParams.update({'font.size': 20})
  9 | 
 10 | #%% generate data
 11 | np.random.seed(1)
 12 | N = 250
 13 | e1 = np.random.normal(loc=0, scale=1, size=N)
 14 | e2 = np.random.normal(loc=0, scale=0.2, size=N)
 15 | 
 16 | x1 = np.zeros((N,))
 17 | for k in range(2,N):
 18 |     x1[k] = 0.7*x1[k-1] + e1[k]
 19 |     
 20 | x2 = 0.5*x1 + e2
 21 | 
 22 | plt.figure(figsize=(5,3))
 23 | plt.plot(x1, x2, '.', markersize=2, color='teal')
 24 | plt.xlabel('x1'), plt.ylabel('x2')
 25 | plt.grid()
 26 | 
 27 | plt.figure(figsize=(5,3))
 28 | plt.plot(x1,'-', color='teal')
 29 | plt.xlabel('sample #'), plt.ylabel('x1')
 30 | plt.grid()
 31 | 
 32 | plt.figure(figsize=(5,3))
 33 | plt.plot(x2,'-', color='teal')
 34 | plt.xlabel('sample #'), plt.ylabel('x2')
 35 | plt.grid()
 36 | 
 37 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 38 | #                    check for dynamics via ACF plot
 39 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 40 | 
 41 | from statsmodels.graphics.tsaplots import plot_acf
 42 | from matplotlib.ticker import MaxNLocator
 43 | 
 44 | conf_int = 2/np.sqrt(len(x1))
 45 | 
 46 | plot_acf(x1, lags= 20, alpha=None) # alpha=None avoids plot_acf's inbuilt confidence interval plotting
 47 | plt.gca().axhspan(-conf_int, conf_int, facecolor='lightblue', alpha=0.5) # shaded confidence interval
 48 | plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) # integer xtick labels
 49 | plt.ylim([-0.2,1])
 50 | plt.xlabel('lag')
 51 | plt.show()
 52 | 
 53 | plot_acf(x2, lags= 20, alpha=None) # alpha=None avoids plot_acf's inbuilt confidence interval plotting
 54 | plt.gca().axhspan(-conf_int, conf_int, facecolor='lightblue', alpha=0.5) # shaded confidence interval
 55 | plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) # integer xtick labels
 56 | plt.ylim([-0.2,1])
 57 | plt.xlabel('lag')
 58 | plt.show()
 59 | 
 60 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 61 | #           generate non-dynamic data and plot ACFs for comparison
 62 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 63 | 
 64 | #%% generate data
 65 | np.random.seed(1)
 66 | N = 250
 67 | e1 = np.random.normal(loc=0, scale=1, size=N)
 68 | e2 = np.random.normal(loc=0, scale=0.2, size=N)
 69 | 
 70 | x1 = e1
 71 | x2 = 0.5*x1 + e2
 72 | 
 73 | plt.figure(figsize=(5,3))
 74 | plt.plot(x1, x2, '.', markersize=1, color='teal')
 75 | plt.xlabel('x1'), plt.ylabel('x2')
 76 | plt.grid()
 77 | 
 78 | plt.figure(figsize=(5,3))
 79 | plt.plot(x1,'-', color='teal')
 80 | plt.xlabel('sample #'), plt.ylabel('x1')
 81 | plt.grid()
 82 | 
 83 | plt.figure(figsize=(5,3))
 84 | plt.plot(x2,'-', color='teal')
 85 | plt.xlabel('sample #'), plt.ylabel('x2')
 86 | plt.grid()
 87 | 
 88 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 89 | #                    check for dynamics via ACF plot
 90 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 91 | 
 92 | conf_int = 2/np.sqrt(len(x1))
 93 | 
 94 | plot_acf(x1, lags= 20, alpha=None) # alpha=None avoids plot_acf's inbuilt confidence interval plotting
 95 | plt.gca().axhspan(-conf_int, conf_int, facecolor='lightblue', alpha=0.5) # shaded confidence interval
 96 | plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) # integer xtick labels
 97 | plt.ylim([-0.2,1])
 98 | plt.xlabel('lag')
 99 | plt.show()
100 | 
101 | plot_acf(x2, lags= 20, alpha=None) # alpha=None avoids plot_acf's inbuilt confidence interval plotting
102 | plt.gca().axhspan(-conf_int, conf_int, facecolor='lightblue', alpha=0.5) # shaded confidence interval
103 | plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True)) # integer xtick labels
104 | plt.ylim([-0.2,1])
105 | plt.xlabel('lag')
106 | plt.show()


--------------------------------------------------------------------------------
/Chapter3_EDA/Multimodality_assessment.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                       Multimodality Assessment
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np, matplotlib.pyplot as plt
 7 | 
 8 | plt.rcParams.update({'font.size': 20})
 9 | 
10 | #%% generate data
11 | np.random.seed(1)
12 | 
13 | cov = np.array([[6, -3], [-3, 3.5]])
14 | pts1 = np.random.multivariate_normal([0, 0], cov, size=500)
15 | cov = np.array([[6, -3], [-3, 3.5]])
16 | pts2 = np.random.multivariate_normal([22, -18], cov, size=500)
17 | 
18 | data = np.vstack((pts1, pts2))
19 | 
20 | plt.figure(figsize=(5,3))
21 | plt.plot(data[:, 0], data[:, 1], '.', color='teal',alpha=0.5)
22 | plt.axis('equal')
23 | plt.xlabel('x1'), plt.ylabel('x2')
24 | plt.grid()
25 | plt.show()
26 | 
27 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
28 | #                    check for multi clusters via GMM
29 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30 | from sklearn.mixture import GaussianMixture
31 | 
32 | BICs = []
33 | lowestBIC = np.inf
34 | for n_cluster in range(1, 5):
35 |     gmm = GaussianMixture(n_components = n_cluster, random_state = 100).fit(data)
36 |     BIC = gmm.bic(data)
37 |     BICs.append(BIC)
38 |     
39 |     if BIC < lowestBIC:
40 |         optimal_n_cluster = n_cluster 
41 |         lowestBIC = BIC
42 | 
43 | plt.figure()
44 | plt.plot(range(1, 5), BICs, marker='o')
45 | plt.xlabel('Number of cluster')
46 | plt.ylabel('BIC')
47 | plt.show()
48 | 
49 | #%% refit GMM with optimal number of clusters and show the cluster centers
50 | gmm = GaussianMixture(n_components = optimal_n_cluster, random_state = 100)
51 | cluster_label = gmm.fit_predict(data)
52 | 
53 | plt.figure()
54 | plt.scatter(data[:, 0], data[:, 1], c = cluster_label, s=20, cmap='viridis')
55 | plt.axis('equal')
56 | plt.xlabel('x1'), plt.ylabel('x2')
57 | plt.grid()
58 | 
59 | cluster_centers = gmm.means_
60 | cluster_plot_labels = ['Cluster ' + str(i+1) for i in range(optimal_n_cluster)]
61 | for i in range(optimal_n_cluster):
62 |     plt.scatter(cluster_centers[i, 0], cluster_centers[i, 1], c='red', s=80, marker = '*', alpha=0.5)
63 |     plt.annotate(cluster_plot_labels[i], (cluster_centers[i,0], cluster_centers[i,1]))
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/Chapter3_EDA/NonGaussianity_assessment.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                       Non-Gaussianity Assessment
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np, matplotlib.pyplot as plt
 7 | import scipy.spatial, scipy.stats
 8 | 
 9 | plt.rcParams.update({'font.size': 20})
10 | 
11 | #%% generate data
12 | np.random.seed(1)
13 | 
14 | # non-Gaussian
15 | x1 = np.random.uniform(size=1000)
16 | x2 = np.random.uniform(size=1000)
17 | e = np.random.normal(loc=0, scale=0.1, size=(1000,))
18 | x3 = x1+x2+e
19 | data = np.vstack((x1,x2,x3)).T
20 | 
21 | # Gaussian
22 | # cov = np.array([[5, 4], [4, 6]])
23 | # data = np.random.multivariate_normal([4, 4], cov, size=1000)
24 | 
25 | #%% 3D scatter plot
26 | from mpl_toolkits.mplot3d import Axes3D
27 | fig = plt.figure()
28 | ax = Axes3D(fig)
29 | 
30 | ax.scatter(x1,x2,x3)
31 | ax.set_xlabel('x1')
32 | ax.set_ylabel('x2')
33 | ax.set_zlabel('x3')
34 | 
35 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
36 | #                    check for non-gaussianity
37 | # Code adapted from KydLIB package (https://github.com/afraniomelo/KydLIB/)
38 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 | 
40 | # find data statistics
41 | N, m = data.shape[0], data.shape[1]
42 | 
43 | mu, Scov = np.mean(data, axis=0), np.cov(data, rowvar=False, ddof=1)
44 | Scov_inv = np.linalg.pinv(Scov)
45 | 
46 | #%% calculate D statistic and the fractiles, and plot Dt vs Ft
47 | D = np.array([scipy.spatial.distance.mahalanobis(data[i,:], mu, Scov_inv)**2 for i in range(N)])
48 | Dt, rt = np.sort(D), [(t-0.5)/N for t in range(1,N+1)]
49 | Ft = (m*(N**2-1)/(N*(N-m)))*np.array([scipy.stats.f.ppf(p, m, N-m) for p in rt])
50 | 
51 | plt.figure(figsize=(4,4))
52 | plt.scatter(Dt, Ft, edgecolor='black', s=20)
53 | plt.xlabel('Dt', fontsize=12)
54 | plt.ylabel('Ft', fontsize=12)
55 | 
56 | #%% fit a straight line
57 | linearFit = scipy.stats.linregress(Dt, Ft)
58 | intercept, slope = linearFit[1], linearFit[0]
59 | print('Intercept, Slope: ', intercept, slope)
60 | 
61 | # draw staright line
62 | x = np.linspace(Dt[0], Dt[-1])
63 | y = slope*x + intercept
64 | 
65 | plt.figure(figsize=(4, 4))
66 | plt.scatter(Dt, Ft, edgecolor='black', s=20)
67 | plt.plot(x, y, color='purple', ls='--')
68 | plt.xlabel('Dt', fontsize=12)
69 | plt.ylabel('Ft', fontsize=12)
70 | 
71 | #%% perform significance tests and make inference
72 | Fbar = np.mean(Ft)
73 | Sfit = np.sqrt(((Ft-(intercept+slope*Dt))**2).sum()/(N-2))
74 | Sfit_by_Fbar = Sfit/Fbar
75 | print('S / Fbar: ', Sfit_by_Fbar)
76 | 
77 | if Sfit_by_Fbar > 0.15:
78 |     gaussianity = False
79 | else:
80 |     if np.abs(slope-1) < 0.2 and np.abs(intercept) < Fbar*0.05:
81 |         gaussianity = True
82 |     else:
83 |         gaussianity = False
84 | 
85 | print('Gaussianity: ', gaussianity)


--------------------------------------------------------------------------------
/Chapter3_EDA/Nonlinearity_assessment.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                       Nonlinearity Assessment
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np, matplotlib.pyplot as plt
 7 | from ennemi import pairwise_corr
 8 | 
 9 | np.random.seed(0)
10 | plt.rcParams.update({'font.size': 20})
11 | 
12 | #%% generate data
13 | t = np.linspace(0.01,2,100)
14 | x1 = np.zeros((100,1))
15 | x2 = np.zeros((100,1))
16 | x3 = np.zeros((100,1))
17 | 
18 | for i in range(100):
19 |     x1[i] = t[i] + np.random.normal(scale=0.05)
20 |     x2[i] = np.power(t[i],3) - 3*t[i] + np.random.normal(scale=0.05)
21 |     x3[i] = -np.power(t[i],4) + 3*np.power(t[i],2) +  np.random.normal(scale=0.03)
22 | 
23 | 
24 | #%% 3D scatter plot
25 | from mpl_toolkits.mplot3d import Axes3D
26 | fig = plt.figure()
27 | ax = Axes3D(fig)
28 | ax.scatter(x1,x2,x3)
29 | ax.set_xlabel('x1')
30 | ax.set_ylabel('x2')
31 | ax.set_zlabel('x3')
32 | 
33 | #%% compute linear and Nonlinear correlation coefficients
34 | data = np.hstack((x1,x2,x3))
35 | 
36 | # pair-wise linear correlation coefficients
37 | rho_xy = np.corrcoef(data, rowvar=False)
38 | print('rho_xy:', rho_xy)
39 | 
40 | # pair-wise MI
41 | rho_I_xy = pairwise_corr(data)
42 | print('MI:', rho_I_xy)
43 | 
44 | # pair-wise nonlinear correlation coefficients
45 | rxy = rho_I_xy*(1-np.abs(rho_xy))
46 | print('rxy:', rxy)
47 | 
48 | #%% pair-wise scatter plots
49 | plt.figure(figsize=(5,3))
50 | plt.plot(x1,x2,'.',color='black')
51 | plt.xlabel('x1'), plt.ylabel('x2')
52 | 
53 | plt.figure(figsize=(5,3))
54 | plt.plot(x1,x3,'.',color='black')
55 | plt.xlabel('x1'), plt.ylabel('x3')
56 | 
57 | plt.figure(figsize=(5,3))
58 | plt.plot(x2,x3,'.',color='black')
59 | plt.xlabel('x2'), plt.ylabel('x3')
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/Chapter3_EDA/TEP_dataset_assessment.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                  Data characteristics assessment of TEP data
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np, matplotlib.pyplot as plt
  7 | from ennemi import pairwise_corr
  8 | import scipy.spatial, scipy.stats
  9 | from statsmodels.graphics.tsaplots import acf
 10 | from sklearn.mixture import GaussianMixture
 11 | 
 12 | plt.rcParams.update({'font.size': 20})
 13 | 
 14 | #%% fetch TE data
 15 | TEdata_noFault_train = np.loadtxt('d00.dat').T
 16 | 
 17 | # select data for continuous measured and manipulated variables
 18 | xmeas = TEdata_noFault_train[:,0:22]
 19 | xmv = TEdata_noFault_train[:,41:52]
 20 | data = np.hstack((xmeas, xmv))
 21 | 
 22 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 23 | ##                       Nonlinearity Assessment
 24 | #   Code adapted from KydLIB package (Copyright (c) 2022 afraniomelo; https://github.com/afraniomelo/KydLIB/) shared under MIT License (https://github.com/afraniomelo/KydLIB?tab=MIT-1-ov-file#readme)
 25 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 26 | 
 27 | #%% compute linear and Nonlinear correlation coefficients
 28 | # pair-wise linear correlation coefficients
 29 | rho_xy = np.corrcoef(data, rowvar=False)
 30 | 
 31 | # pair-wise MI
 32 | rho_I_xy = pairwise_corr(data)
 33 | np.fill_diagonal(rho_I_xy,1) # replace diagonal nan values with ones
 34 | 
 35 | # pair-wise nonlinear correlation coefficients
 36 | rxy = rho_I_xy*(1-np.abs(rho_xy))
 37 | 
 38 | # overall coefficients
 39 | m = data.shape[1]
 40 | r = np.sqrt((rxy**2).sum()/(m**2-m))
 41 | rho = np.sqrt(((rho_xy**2).sum()-m)/(m**2-m))
 42 | print('Overall nonlinearity coefficient:' , r)
 43 | print('Overall linearity coefficient:' , rho)
 44 | 
 45 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 46 | #                    check for non-gaussianity
 47 | # Code adapted from KydLIB package (Copyright (c) 2022 afraniomelo; https://github.com/afraniomelo/KydLIB/) shared under MIT License (https://github.com/afraniomelo/KydLIB?tab=MIT-1-ov-file#readme)
 48 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 49 | 
 50 | # find data statistics
 51 | N = data.shape[0]
 52 | m = data.shape[1]
 53 | 
 54 | mu = np.mean(data, axis=0)
 55 | S = np.cov(data, rowvar=False, ddof=1)
 56 | S_inv = np.linalg.pinv(S)
 57 | 
 58 | #%% calculate D statistic and the fractiles and plot Dt vs Ft
 59 | D = np.array([scipy.spatial.distance.mahalanobis(data[i,:], mu, S_inv)**2 for i in range(N)])
 60 | Dt = np.sort(D)
 61 | rt = [(t-0.5)/N for t in range(1,N+1)]
 62 | 
 63 | factor = m*(N**2-1)/(N*(N-m))
 64 | Ft = factor*np.array([scipy.stats.f.ppf(p, m, N-m) for p in rt])
 65 | 
 66 | plt.figure(figsize=(4,4))
 67 | plt.scatter(Dt, Ft, edgecolor='black', s=20)
 68 | plt.xlabel('Dt', fontsize=12)
 69 | plt.ylabel('Ft', fontsize=12)
 70 | 
 71 | #%% fit a straight line
 72 | linearFit = scipy.stats.linregress(Dt,Ft)
 73 | intercept, slope = linearFit[1], linearFit[0]
 74 | print('Intercept, Slope: ', intercept, slope)
 75 | 
 76 | # draw staright line
 77 | x = np.linspace(Dt[0], Dt[-1])
 78 | y = slope*x + intercept
 79 | 
 80 | plt.figure(figsize=(8, 8))
 81 | plt.scatter(Dt, Ft, edgecolor='black', s=40)
 82 | plt.plot(x, y, color='purple', ls='--', lw=5)
 83 | plt.xlabel('Dt', fontsize=25)
 84 | plt.ylabel('Ft', fontsize=25)
 85 | plt.grid()
 86 | 
 87 | #%% perform significance tests and make inference
 88 | Fbar = np.mean(Ft)
 89 | S = np.sqrt(((Ft-(intercept+slope*Dt))**2).sum()/(N-2))
 90 | S_by_Fbar = S/Fbar
 91 | print('S / Fbar: ', S_by_Fbar)
 92 | 
 93 | if S_by_Fbar > 0.15:
 94 |     gaussianity = False
 95 | else:
 96 |     if np.abs(slope-1) < 0.2 and np.abs(intercept) < Fbar*0.05:
 97 |         gaussianity = True
 98 |     else:
 99 |         gaussianity = False
100 | 
101 | print('Gaussianity: ', gaussianity)
102 | 
103 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
104 | ##                       Dynamics Assessment
105 | #   Below code adapted from KydLIB package (Copyright (c) 2022 afraniomelo; https://github.com/afraniomelo/KydLIB/) shared under MIT License (https://github.com/afraniomelo/KydLIB?tab=MIT-1-ov-file#readme)
106 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
107 | # compute ACF values for each variable
108 | nlags = 50
109 | ACF_for_all_vars = [] # each entry contains ACF values for nlags for a single variable
110 | lags_x = np.arange(1,nlags+1)
111 | 
112 | for i in range(m):
113 |     ACF_single_var = acf(data[:,i],nlags=nlags)[1:] # not including the ACF at lag 0
114 |     ACF_for_all_vars.append(ACF_single_var)
115 |  
116 | # bar plot showing lags necessary to achieve <=0.5 autocorrelation for each variable
117 | lags_necessary = [np.searchsorted(-ACF_for_all_vars[i], -0.5) for i in range(len(ACF_for_all_vars))]
118 | 
119 | plt.figure(figsize=(20,4))
120 | plt.bar(range(1,len(lags_necessary)+1), np.array(lags_necessary), color='k');
121 | plt.xlabel('Variable'), plt.ylabel('Number of lags')
122 | plt.xlim([0,m+1]);
123 | plt.xticks(np.arange(1,m+1));
124 | plt.axhline(np.mean(lags_necessary), ls='-',c='k',label='mean')
125 | plt.axhline(np.median(lags_necessary),ls='-.', c='red',label='median')
126 | plt.legend()
127 | plt.title('Number of lags necessary to achieve 0.5 autocorrelation')
128 | plt.grid()
129 | 
130 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
131 | ##                       Multimodality Assessment
132 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
133 | BICs = []
134 | lowestBIC = np.inf
135 | for n_cluster in range(1, 10):
136 |     gmm = GaussianMixture(n_components = n_cluster, random_state = 100)
137 |     gmm.fit(data)
138 |     BIC = gmm.bic(data)
139 |     BICs.append(BIC)
140 |     
141 |     if BIC < lowestBIC:
142 |         optimal_n_cluster = n_cluster 
143 |         lowestBIC = BIC
144 | 
145 | plt.figure()
146 | plt.plot(range(1, len(BICs)+1), BICs, marker='o', ms=15)
147 | plt.xlabel('Number of cluster')
148 | plt.ylabel('BIC')
149 | plt.show()


--------------------------------------------------------------------------------
/Chapter3_EDA/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter4_BestPractices/DataBalancing.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                         Data Balancing
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np, matplotlib.pyplot as plt
 7 | 
 8 | plt.rcParams.update({'font.size': 20})
 9 | 
10 | #%% generate data
11 | np.random.seed(1)
12 | 
13 | cov = np.array([[6, -3], [-3, 2]])
14 | pts_NOC = np.random.multivariate_normal([0, 0], cov, size=500)
15 | cov = np.array([[1, 1], [1, 2]])
16 | pts_Faulty = np.random.multivariate_normal([5,2], cov, size=25)
17 | X = np.vstack((pts_NOC, pts_Faulty))
18 | y = np.vstack((np.zeros((500,1)), np.ones((25,1)))) # labels [0=>NOC; 1=>Faulty]
19 | 
20 | plt.figure(figsize=(4,3))
21 | plt.plot(pts_NOC[:, 0], pts_NOC[:,1], '.', color='teal',alpha=0.5)
22 | plt.plot(pts_Faulty[:, 0], pts_Faulty[:, 1], '.', color='orange',alpha=0.5)
23 | plt.xlabel('x1'), plt.ylabel('x2')
24 | plt.grid()
25 | plt.show()
26 | 
27 | plt.figure(figsize=(5,4))
28 | plt.bar([0,1], [np.sum(y==0), np.sum(y==1)], tick_label=['Fault-free','Faulty'], color=['teal','orange'])
29 | plt.ylabel('Number of samples')
30 | 
31 | #%% Oversampling
32 | from imblearn.over_sampling import SMOTE
33 | 
34 | overSampler = SMOTE(sampling_strategy=0.33)
35 | X_smote, y_smote = overSampler.fit_resample(X, y)
36 | 
37 | pts_NOC_smote = X_smote[y_smote==0]
38 | pts_Faulty_smote = X_smote[y_smote==1]
39 | 
40 | plt.figure(figsize=(4,3))
41 | plt.plot(pts_NOC_smote[:, 0], pts_NOC_smote[:,1], '.', color='teal',alpha=0.5)
42 | plt.plot(pts_Faulty_smote[:, 0], pts_Faulty_smote[:, 1], '.', color='orange',alpha=0.5)
43 | plt.axis('equal')
44 | plt.xlabel('x1'), plt.ylabel('x2')
45 | plt.grid()
46 | plt.show()
47 | 
48 | plt.figure(figsize=(5,4))
49 | plt.bar([0,1], [np.sum(y_smote==0), np.sum(y_smote==1)], tick_label=['Fault-free','Faulty'], color=['teal','orange'])
50 | plt.ylabel('Number of samples')
51 | 
52 | #%% Undersampling
53 | from imblearn.under_sampling import RandomUnderSampler
54 | 
55 | underSampler = RandomUnderSampler(sampling_strategy=0.5)
56 | X_balanced, y_balanced = underSampler.fit_resample(X_smote, y_smote)
57 | 
58 | pts_NOC_balanced = X_balanced[y_balanced==0]
59 | pts_Faulty_balanced = X_balanced[y_balanced==1]
60 | 
61 | plt.figure(figsize=(4,3))
62 | plt.plot(pts_NOC_balanced[:, 0], pts_NOC_balanced[:,1], '.', color='teal',alpha=0.5)
63 | plt.plot(pts_Faulty_balanced[:, 0], pts_Faulty_balanced[:, 1], '.', color='orange',alpha=0.5)
64 | plt.axis('equal')
65 | plt.xlabel('x1'), plt.ylabel('x2')
66 | plt.grid()
67 | plt.show()
68 | 
69 | plt.figure(figsize=(5,4))
70 | plt.bar([0,1], [np.sum(y_balanced==0), np.sum(y_balanced==1)], tick_label=['Fault-free','Faulty'], color=['teal','orange'])
71 | plt.ylabel('Number of samples')


--------------------------------------------------------------------------------
/Chapter4_BestPractices/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/CUSUM_ControlChart.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          CUSUM Control Chart
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | # package
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | plt.rcParams.update({'font.size': 12})
10 | np.random.seed(10)
11 | 
12 | #%% generate data
13 | # NOC data
14 | N = 250
15 | x0 = np.random.normal(loc=10, scale=2, size=N)
16 | 
17 | # faulty data
18 | N = 50
19 | x1 = np.random.normal(loc=11, scale=2, size=N)
20 | 
21 | # combine data
22 | x = np.hstack((x0,x1))
23 | 
24 | #%% plots
25 | plt.figure(figsize=(10,3))
26 | plt.plot(x0,'--',marker='o', markersize=4, color='teal')
27 | plt.grid()
28 | 
29 | plt.figure(figsize=(10,3))
30 | plt.plot(x,'--',marker='o', markersize=4, color='teal')
31 | plt.grid()
32 | 
33 | #%% CUSUM chart
34 | mu, sigma = np.mean(x0), np.std(x0)
35 | k = 0.25*sigma
36 | H = 5*sigma
37 | 
38 | S_positive = np.zeros((len(x),))
39 | S_negative = np.zeros((len(x),))
40 | 
41 | S_positive[0] = 0
42 | S_negative[0] = 0
43 | 
44 | for i in range(1,len(x)):
45 |     S_positive[i] = np.max([0, x[i]-(mu+k) + S_positive[i-1]])
46 |     S_negative[i] = np.max([0, (mu-k)-x[i] + S_negative[i-1]])
47 | 
48 | plt.figure(figsize=(10,3))
49 | plt.plot(S_positive,'--',marker='o', markersize=4, color='teal', label='S+')
50 | plt.plot(S_negative,'--',marker='*', markersize=4, color='steelblue', label='S-')
51 | plt.plot([1,len(x)],[H,H], color='red')
52 | plt.plot([1,len(x)],[0,0], '--', color='maroon')
53 | plt.xlabel('sample #'), plt.ylabel('CUSUM Statistic')
54 | plt.grid()
55 | plt.legend()
56 | 


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/CUSUM_ControlChart_AerationTank.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##              CUSUM control chart-based monitoring of aeration tank
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | # package
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | plt.rcParams.update({'font.size': 12})
10 | np.random.seed(10)
11 | 
12 | #%% read data
13 | data = np.loadtxt('aeration-rate.csv', skiprows=1)
14 | NOC_data = data[:200]
15 | test_data = data[200:]
16 | 
17 | #%% plots
18 | plt.figure(figsize=(10,3))
19 | plt.plot(data,'--',marker='o', markersize=4, color='teal')
20 | plt.xlabel('Time (min)', fontsize=15), plt.ylabel('Aeration rate (L/min)', fontsize=15)
21 | plt.title('Complete dataset', fontsize=15)
22 | plt.grid()
23 | 
24 | plt.figure(figsize=(10,3))
25 | plt.plot(NOC_data,'--',marker='o', markersize=4, color='teal')
26 | plt.xlabel('Time (min)', fontsize=15), plt.ylabel('Aeration rate (L/min)', fontsize=15)
27 | plt.title('First 200 samples taken as NOC data', fontsize=15)
28 | plt.grid()
29 | 
30 | #%% CUSUM chart
31 | mu, sigma = np.mean(NOC_data), np.std(NOC_data)
32 | k, H = 0.25*sigma, 5*sigma
33 | 
34 | S_positive = np.zeros((len(data),))
35 | S_negative = np.zeros((len(data),))
36 | 
37 | S_positive[0], S_negative[0] = 0, 0
38 | 
39 | for i in range(1,len(data)):
40 |     S_positive[i] = np.max([0, data[i]-(mu+k) + S_positive[i-1]])
41 |     S_negative[i] = np.max([0, (mu-k)-data[i] + S_negative[i-1]])
42 | 
43 | plt.figure(figsize=(10,3))
44 | plt.plot(S_positive,'--',marker='o', markersize=4, color='teal', label='S+')
45 | plt.plot(S_negative,'--',marker='*', markersize=4, color='steelblue', label='S-')
46 | plt.plot([1,len(data)],[H,H], color='red'), plt.plot([1,len(data)],[0,0], '--', color='maroon')
47 | plt.xlabel('sample #', fontsize=20), plt.ylabel('CUSUM Statistic', fontsize=20)
48 | plt.grid()
49 | plt.legend()
50 | 


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/CUSUM_intro.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          CUSUM Introduction
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | # package
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | plt.rcParams.update({'font.size': 12})
10 | np.random.seed(10)
11 | 
12 | #%% generate data
13 | # NOC data
14 | N = 250
15 | x0 = np.random.normal(loc=10, scale=2, size=N)
16 | 
17 | # faulty data
18 | N = 50
19 | x1 = np.random.normal(loc=11, scale=2, size=N)
20 | 
21 | # combine data
22 | x = np.hstack((x0,x1))
23 | 
24 | #%% plots
25 | plt.figure(figsize=(10,3))
26 | plt.plot(x0,'--',marker='o', markersize=4, color='teal')
27 | plt.grid()
28 | 
29 | plt.figure(figsize=(10,3))
30 | plt.plot(x,'--',marker='o', markersize=4, color='teal')
31 | plt.grid()
32 | 
33 | #%% CUSUM chart
34 | mu = np.mean(x0)
35 | 
36 | S = np.zeros((len(x),))
37 | S[0] = 0
38 | 
39 | 
40 | for i in range(1,len(S)):
41 |     S[i] = (x[i]-mu) + S[i-1]
42 | 
43 | 
44 | plt.figure(figsize=(10,3))
45 | plt.plot(S,'--',marker='o', markersize=4, color='teal')
46 | plt.plot([1,len(S)],[0,0], '--', color='maroon')
47 | plt.xlabel('sample #'), plt.ylabel('CUSUM Statistic')
48 | plt.grid()
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/EWMA_ControlChart.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          EWMA Control Chart
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | # package
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | plt.rcParams.update({'font.size': 12})
10 | np.random.seed(10)
11 | 
12 | #%% generate data
13 | # NOC data
14 | N = 250
15 | x0 = np.random.normal(loc=10, scale=2, size=N)
16 | 
17 | # faulty data
18 | N = 100
19 | x1 = np.random.normal(loc=11, scale=2, size=N)
20 | 
21 | # combine data
22 | x = np.hstack((x0,x1))
23 | 
24 | #%% plots
25 | plt.figure(figsize=(10,3))
26 | plt.plot(x0,'--',marker='o', markersize=4, color='teal')
27 | plt.grid()
28 | 
29 | plt.figure(figsize=(10,3))
30 | plt.plot(x,'--',marker='o', markersize=4, color='teal')
31 | plt.grid()
32 | 
33 | #%% EWMA chart
34 | mu, sigma = np.mean(x0), np.std(x0)
35 | smoothFactor = 0.1
36 | LCL = mu - 3*sigma*np.sqrt(smoothFactor/(2-smoothFactor))
37 | UCL = mu + 3*sigma*np.sqrt(smoothFactor/(2-smoothFactor))
38 | 
39 | z = np.zeros((len(x),))
40 | z[0] = mu
41 | 
42 | for i in range(1,len(x)):
43 |     z[i] = smoothFactor*x[i] + (1-smoothFactor)*z[i-1]
44 | 
45 | plt.figure(figsize=(10,3))
46 | plt.plot(z,'--',marker='o', markersize=4, color='teal')
47 | plt.plot([1,len(x)],[LCL,LCL], color='red'), plt.plot([1,len(x)],[UCL,UCL], color='red')
48 | plt.plot([1,len(x)],[mu,mu], '--', color='maroon')
49 | plt.xlabel('sample #'), plt.ylabel('EWMA Statistic')
50 | plt.grid()
51 | 


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/ShewhartControlChart.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          Shewhart Control Chart
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | # package
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | plt.rcParams.update({'font.size': 12})
10 | np.random.seed(10)
11 | 
12 | #%% generate data
13 | # NOC data
14 | N = 250
15 | x0 = np.random.normal(loc=10, scale=2, size=N)
16 | 
17 | # faulty data
18 | N = 50
19 | x1 = np.random.normal(loc=11, scale=2, size=N)
20 | 
21 | # combine data
22 | x = np.hstack((x0,x1))
23 | 
24 | #%% fit Shewhart model and plot chart for NOC data
25 | mu, sigma = np.mean(x0), np.std(x0)
26 | UCL, LCL = mu + 3*sigma, mu - 3*sigma
27 | 
28 | plt.figure(figsize=(10,3))
29 | plt.plot(x0,'--',marker='o', markersize=4, color='teal')
30 | plt.plot([1,len(x0)],[UCL,UCL], color='red'), plt.plot([1,len(x0)],[LCL,LCL], color='red')
31 | plt.plot([1,len(x0)],[mu,mu], '--', color='maroon')
32 | plt.xlabel('sample #'), plt.ylabel('x')
33 | plt.grid()
34 | 
35 | #%% control chart for combined data
36 | plt.figure(figsize=(10,3))
37 | plt.plot(x,'--',marker='o', markersize=4, color='teal')
38 | plt.plot([1,len(x)],[UCL,UCL], color='red')
39 | plt.plot([1,len(x)],[LCL,LCL], color='red')
40 | plt.plot([1,len(x)],[mu,mu], '--', color='maroon')
41 | plt.xlabel('sample #'), plt.ylabel('x')
42 | plt.grid()
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/aeration-rate.csv:
--------------------------------------------------------------------------------
  1 | Aeration
  2 | 23.1
  3 | 23.6
  4 | 25.8
  5 | 24.1
  6 | 23.0
  7 | 23.5
  8 | 24.8
  9 | 24.5
 10 | 25.8
 11 | 24.7
 12 | 23.9
 13 | 24.5
 14 | 23.9
 15 | 22.6
 16 | 24.9
 17 | 22.9
 18 | 23.7
 19 | 24.4
 20 | 25.3
 21 | 25.0
 22 | 26.0
 23 | 23.4
 24 | 24.2
 25 | 26.3
 26 | 25.2
 27 | 21.1
 28 | 22.1
 29 | 23.7
 30 | 25.5
 31 | 25.3
 32 | 24.7
 33 | 24.1
 34 | 24.7
 35 | 24.1
 36 | 22.9
 37 | 22.6
 38 | 22.0
 39 | 22.5
 40 | 23.5
 41 | 24.2
 42 | 23.9
 43 | 21.9
 44 | 21.9
 45 | 25.7
 46 | 26.6
 47 | 26.7
 48 | 24.0
 49 | 22.5
 50 | 22.9
 51 | 22.8
 52 | 23.0
 53 | 26.0
 54 | 25.1
 55 | 24.9
 56 | 22.9
 57 | 20.9
 58 | 22.3
 59 | 25.4
 60 | 26.8
 61 | 26.6
 62 | 22.3
 63 | 23.7
 64 | 23.9
 65 | 24.2
 66 | 24.6
 67 | 23.4
 68 | 21.3
 69 | 22.1
 70 | 24.3
 71 | 24.2
 72 | 23.0
 73 | 23.6
 74 | 24.1
 75 | 24.6
 76 | 24.2
 77 | 22.9
 78 | 24.5
 79 | 25.6
 80 | 25.7
 81 | 22.8
 82 | 23.4
 83 | 22.5
 84 | 22.8
 85 | 22.6
 86 | 21.5
 87 | 25.0
 88 | 25.1
 89 | 23.9
 90 | 22.9
 91 | 23.8
 92 | 26.0
 93 | 26.9
 94 | 23.6
 95 | 20.9
 96 | 20.8
 97 | 22.3
 98 | 26.5
 99 | 26.3
100 | 23.2
101 | 21.7
102 | 24.1
103 | 25.4
104 | 24.4
105 | 22.8
106 | 22.1
107 | 25.5
108 | 26.7
109 | 26.8
110 | 23.9
111 | 22.1
112 | 21.9
113 | 23.4
114 | 25.1
115 | 26.3
116 | 25.3
117 | 24.1
118 | 24.6
119 | 25.4
120 | 24.6
121 | 22.8
122 | 24.2
123 | 24.8
124 | 26.0
125 | 24.4
126 | 22.1
127 | 23.2
128 | 23.7
129 | 24.8
130 | 22.9
131 | 21.1
132 | 23.5
133 | 25.4
134 | 25.9
135 | 22.4
136 | 23.2
137 | 25.5
138 | 26.3
139 | 25.4
140 | 25.7
141 | 22.9
142 | 21.8
143 | 23.6
144 | 24.0
145 | 24.2
146 | 24.8
147 | 23.7
148 | 24.0
149 | 24.7
150 | 23.5
151 | 22.4
152 | 24.2
153 | 24.7
154 | 25.0
155 | 23.4
156 | 24.3
157 | 23.1
158 | 24.0
159 | 24.9
160 | 24.7
161 | 24.6
162 | 24.4
163 | 23.2
164 | 23.0
165 | 24.1
166 | 23.2
167 | 22.5
168 | 25.5
169 | 23.5
170 | 21.7
171 | 23.8
172 | 26.1
173 | 26.2
174 | 24.3
175 | 23.4
176 | 22.5
177 | 22.7
178 | 23.7
179 | 23.5
180 | 24.6
181 | 23.0
182 | 22.0
183 | 22.0
184 | 24.2
185 | 23.9
186 | 24.4
187 | 25.4
188 | 24.6
189 | 24.2
190 | 23.1
191 | 23.8
192 | 23.0
193 | 23.4
194 | 22.7
195 | 23.2
196 | 25.2
197 | 25.8
198 | 24.0
199 | 22.3
200 | 22.1
201 | 22.9
202 | 24.6
203 | 23.8
204 | 26.4
205 | 25.7
206 | 22.8
207 | 22.8
208 | 23.7
209 | 24.0
210 | 24.7
211 | 24.6
212 | 25.0
213 | 23.7
214 | 25.1
215 | 23.3
216 | 23.1
217 | 24.2
218 | 25.7
219 | 23.6
220 | 21.7
221 | 24.5
222 | 25.8
223 | 25.5
224 | 23.2
225 | 24.6
226 | 24.8
227 | 24.6
228 | 23.8
229 | 24.4
230 | 23.9
231 | 26.0
232 | 26.7
233 | 22.7
234 | 21.2
235 | 22.9
236 | 23.0
237 | 22.8
238 | 23.2
239 | 24.5
240 | 24.8
241 | 23.6
242 | 23.2
243 | 26.6
244 | 27.1
245 | 25.1
246 | 23.8
247 | 22.8
248 | 23.4
249 | 24.5
250 | 24.1
251 | 23.3
252 | 24.0
253 | 23.9
254 | 23.7
255 | 23.5
256 | 24.6
257 | 25.1
258 | 24.0
259 | 23.1
260 | 24.4
261 | 24.5
262 | 25.4
263 | 24.4
264 | 23.2
265 | 21.3
266 | 22.1
267 | 22.7
268 | 26.1
269 | 26.3
270 | 24.8
271 | 23.3
272 | 20.4
273 | 22.1
274 | 24.6
275 | 24.1
276 | 23.9
277 | 25.5
278 | 24.8
279 | 24.8
280 | 23.6
281 | 23.4
282 | 26.0
283 | 26.3
284 | 25.8
285 | 23.5
286 | 24.3
287 | 26.3
288 | 26.6
289 | 24.6
290 | 24.5
291 | 24.1
292 | 22.8
293 | 24.0
294 | 24.8
295 | 24.4
296 | 25.5
297 | 24.5
298 | 24.1
299 | 21.9
300 | 21.5
301 | 23.0
302 | 24.5
303 | 24.6
304 | 25.2
305 | 22.7
306 | 23.5
307 | 25.3
308 | 25.4
309 | 25.3
310 | 25.5
311 | 23.7
312 | 23.8
313 | 23.7
314 | 23.6
315 | 22.1
316 | 22.6
317 | 24.8
318 | 26.5
319 | 25.4
320 | 23.8
321 | 23.2
322 | 23.2
323 | 25.2
324 | 26.2
325 | 25.2
326 | 24.1
327 | 24.1
328 | 25.2
329 | 26.7
330 | 26.7
331 | 24.1
332 | 23.8
333 | 23.9
334 | 24.0
335 | 24.9
336 | 25.7
337 | 24.4
338 | 24.3
339 | 24.1
340 | 25.6
341 | 25.5
342 | 24.4
343 | 24.1
344 | 22.7
345 | 23.6
346 | 25.3
347 | 24.9
348 | 23.7
349 | 24.5
350 | 25.4
351 | 25.1
352 | 25.6
353 | 25.2
354 | 23.7
355 | 24.9
356 | 27.2
357 | 27.0
358 | 23.4
359 | 23.1
360 | 24.7
361 | 25.1
362 | 26.0
363 | 26.3
364 | 24.1
365 | 22.6
366 | 21.4
367 | 22.1
368 | 23.9
369 | 26.2
370 | 25.2
371 | 23.8
372 | 23.3
373 | 26.1
374 | 25.0
375 | 24.9
376 | 24.2
377 | 23.1
378 | 23.3
379 | 26.7
380 | 26.3
381 | 24.9
382 | 23.1
383 | 22.4
384 | 23.8
385 | 24.1
386 | 25.6
387 | 23.5
388 | 23.1
389 | 23.8
390 | 25.9
391 | 26.2
392 | 24.7
393 | 24.8
394 | 25.5
395 | 25.5
396 | 24.2
397 | 23.9
398 | 23.0
399 | 24.7
400 | 25.3
401 | 25.5
402 | 24.2
403 | 20.8
404 | 21.1
405 | 23.2
406 | 26.0
407 | 25.5
408 | 23.4
409 | 20.6
410 | 23.6
411 | 25.6
412 | 25.4
413 | 23.1
414 | 23.1
415 | 22.8
416 | 23.8
417 | 23.3
418 | 24.9
419 | 24.1
420 | 25.3
421 | 25.2
422 | 25.0
423 | 22.9
424 | 24.4
425 | 25.7
426 | 26.2
427 | 25.6
428 | 24.5
429 | 24.5
430 | 23.0
431 | 23.7
432 | 23.3
433 | 23.6
434 | 23.2
435 | 25.2
436 | 25.3
437 | 24.3
438 | 23.2
439 | 23.8
440 | 24.4
441 | 24.1
442 | 21.9
443 | 22.6
444 | 22.8
445 | 24.4
446 | 25.3
447 | 24.6
448 | 23.1
449 | 24.4
450 | 22.8
451 | 23.7
452 | 24.0
453 | 24.3
454 | 24.1
455 | 23.1
456 | 25.1
457 | 25.0
458 | 23.7
459 | 25.5
460 | 25.9
461 | 24.7
462 | 23.6
463 | 23.7
464 | 23.9
465 | 23.6
466 | 24.9
467 | 25.4
468 | 25.0
469 | 24.0
470 | 25.0
471 | 23.4
472 | 24.9
473 | 26.4
474 | 26.1
475 | 22.9
476 | 22.7
477 | 23.1
478 | 23.9
479 | 23.9
480 | 21.7
481 | 22.3
482 | 23.2
483 | 24.7
484 | 24.1
485 | 23.8
486 | 25.6
487 | 24.0
488 | 21.7
489 | 22.8
490 | 23.7
491 | 23.6
492 | 24.9
493 | 25.1
494 | 25.6
495 | 23.5
496 | 23.4
497 | 24.0
498 | 23.2
499 | 22.7
500 | 23.6
501 | 24.7
502 | 24.3
503 | 24.6
504 | 23.6
505 | 23.1
506 | 24.0
507 | 25.9
508 | 24.5
509 | 24.7
510 | 23.2
511 | 24.8
512 | 23.5
513 | 23.6
514 | 22.6
515 | 25.7
516 | 26.2
517 | 25.0
518 | 25.5
519 | 25.2
520 | 24.1
521 | 24.4
522 | 24.5
523 | 25.1
524 | 24.9
525 | 24.3
526 | 23.8
527 | 24.6
528 | 25.6
529 | 24.9
530 | 25.6
531 | 24.7
532 | 21.7
533 | 22.0
534 | 25.8
535 | 27.4
536 | 25.2
537 | 22.7
538 | 24.3
539 | 24.7
540 | 25.7
541 | 25.5
542 | 24.3
543 | 23.5
544 | 22.9
545 | 25.5
546 | 24.6
547 | 24.4
548 | 23.6
549 | 22.6
550 | 22.3
551 | 26.0
552 | 26.5
553 | 25.2
554 | 24.6
555 | 25.3
556 | 24.5
557 | 24.6
558 | 23.6
559 | 25.7
560 | 26.8
561 | 25.2
562 | 24.2
563 | 22.9
564 | 24.9
565 | 23.9
566 | 22.4
567 | 24.7
568 | 28.1
569 | 27.6
570 | 25.4
571 | 22.9
572 | 24.0
573 | 25.4
574 | 25.1
575 |     


--------------------------------------------------------------------------------
/Chapter5_UnivariateSPC/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter6_PatternMatching/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter7_MSPM_SteadyState1/DimensionalityReduction.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                    Dimensionality reduction via PCA
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | #%% import required packages
 5 | import numpy as np, pandas as pd
 6 | from sklearn.preprocessing import StandardScaler
 7 | from sklearn.decomposition import PCA
 8 | 
 9 | np.set_printoptions(precision=3, suppress=True)
10 | 
11 | #%% fetch data
12 | data = pd.read_excel('proc1a.xls', skiprows = 1,usecols = 'C:AI')
13 | 
14 | #%% separate train data
15 | data_train = data.iloc[0:69,]
16 |            
17 | #%% scale data
18 | scaler = StandardScaler()
19 | data_train_normal = scaler.fit_transform(data_train)
20 |            
21 | #%% PCA
22 | pca = PCA()
23 | score_train = pca.fit_transform(data_train_normal)
24 | 
25 | #%% confirm no correlation
26 | corr_coef = np.corrcoef(score_train,rowvar = False)
27 | print('Correlation matrix: \n', corr_coef[0:3,0:3]) # printing only a portion
28 | 
29 | #%% visualize explained variance
30 | import matplotlib.pyplot as plt
31 | 
32 | explained_variance = 100*pca.explained_variance_ratio_ # in percentage
33 | cum_explained_variance = np.cumsum(explained_variance) # cumulative % variance explained
34 | 
35 | plt.figure()
36 | plt.plot(cum_explained_variance, 'r+', label = 'cumulative % variance explained')
37 | plt.plot(explained_variance, 'b+' , label = '% variance explained by each PC')
38 | plt.ylabel('Explained variance (in %)'), plt.xlabel('Principal component number'), plt.legend()
39 | 
40 | #%% decide # of PCs to retain and compute reduced data in PC space
41 | n_comp = np.argmax(cum_explained_variance >= 90) + 1
42 | score_train_reduced = score_train[:,0:n_comp]
43 | 
44 | print('Number of PCs cumulatively explaining atleast 90% variance: ', n_comp)
45 | 
46 | #%% confirm that only about 10% of original information is lost
47 | from sklearn.metrics import r2_score
48 | 
49 | V_matrix = pca.components_.T
50 | P_matrix = V_matrix[:,0:n_comp] 
51 | 
52 | data_train_normal_reconstruct = np.dot(score_train_reduced, P_matrix.T)
53 | R2_score = r2_score(data_train_normal, data_train_normal_reconstruct) 
54 | 
55 | print('% information lost = ', 100*(1-R2_score))
56 | 
57 | #%% alternative approach
58 | pca = PCA(n_components = 0.9)
59 | score_train_reduced = pca.fit_transform(data_train_normal)
60 | 
61 | data_train_normal_reconstruct = pca.inverse_transform(score_train_reduced)
62 | R2_score = r2_score(data_train_normal, data_train_normal_reconstruct) 
63 | 
64 | print('% information lost = ', 100*(1-R2_score))
65 | 
66 | #%% plot to compare original and reconstructed variables
67 | var = 32
68 | plt.figure()
69 | plt.plot(data_train_normal[:,var],label = 'Measured data')
70 | plt.plot(data_train_normal_reconstruct[:,var],label = 'Reconstructed data')
71 | plt.ylabel('Variable # '+ str(var))
72 | plt.xlabel('sample #')
73 | plt.legend()


--------------------------------------------------------------------------------
/Chapter7_MSPM_SteadyState1/ProcessMonitoring_PCA.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                    Process monitoring via PCA
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import pandas as pd
  8 | from sklearn.preprocessing import StandardScaler
  9 | from sklearn.decomposition import PCA
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | #%% fetch data
 13 | data = pd.read_excel('proc1a.xls', skiprows = 1,usecols = 'C:AI')
 14 | 
 15 | #%% separate train data
 16 | data_train = data.iloc[0:69,]
 17 |            
 18 | #%% scale data
 19 | scaler = StandardScaler()
 20 | data_train_normal = scaler.fit_transform(data_train)
 21 |            
 22 | #%% PCA
 23 | pca = PCA()
 24 | score_train = pca.fit_transform(data_train_normal)
 25 | 
 26 | #%% decide # of PCs to retain and compute reduced data in PC space
 27 | explained_variance = 100*pca.explained_variance_ratio_ # in percentage
 28 | cum_explained_variance = np.cumsum(explained_variance) # cumulative % variance explained
 29 | 
 30 | n_comp = np.argmax(cum_explained_variance >= 90) + 1
 31 | score_train_reduced = score_train[:,0:n_comp]
 32 | 
 33 | print('Number of PCs cumulatively explaining atleast 90% variance: ', n_comp)
 34 | 
 35 | #%% reconstruct original data
 36 | V_matrix = pca.components_.T
 37 | P_matrix = V_matrix[:,0:n_comp] 
 38 | 
 39 | data_train_normal_reconstruct = np.dot(score_train_reduced, P_matrix.T)
 40 | 
 41 | #%% calculate T2 for training data
 42 | N = data_train_normal.shape[0]
 43 | m = data_train_normal.shape[1]
 44 | k = n_comp
 45 | 
 46 | lambda_k = np.diag(pca.explained_variance_[0:k]) # eigenvalue = explained variance
 47 | lambda_k_inv = np.linalg.inv(lambda_k)
 48 | 
 49 | T2_train = np.zeros((N,))
 50 | 
 51 | for i in range(N):
 52 |     T2_train[i] = np.dot(np.dot(score_train_reduced[i,:],lambda_k_inv),score_train_reduced[i,:].T)
 53 | 
 54 | #%% calculate Q for training data
 55 | error_train = data_train_normal - data_train_normal_reconstruct
 56 | Q_train = np.sum(error_train*error_train, axis = 1)
 57 | 
 58 | #%% T2 control limit
 59 | import scipy.stats
 60 | 
 61 | alpha = 0.01# 99% control limit
 62 | T2_CL = k*(N**2-1)*scipy.stats.f.ppf(1-alpha,k,N-k)/(N*(N-k))
 63 | 
 64 | #%% Q_train control limit
 65 | eig_vals = pca.explained_variance_
 66 | 
 67 | theta1 = np.sum(eig_vals[k:])
 68 | theta2 = np.sum([eig_vals[j]**2 for j in range(k,m)])
 69 | theta3 = np.sum([eig_vals[j]**3 for j in range(k,m)])
 70 | h0 = 1-2*theta1*theta3/(3*theta2**2)
 71 | 
 72 | z_alpha = scipy.stats.norm.ppf(1-alpha)
 73 | Q_CL = theta1*(z_alpha*np.sqrt(2*theta2*h0**2)/theta1+ 1 + theta2*h0*(1-h0)/theta1**2)**2 
 74 | 
 75 | #%% Q_train plot with CL
 76 | plt.figure()
 77 | plt.plot(Q_train)
 78 | plt.plot([1,len(Q_train)],[Q_CL,Q_CL], color='red')
 79 | plt.xlabel('Sample #')
 80 | plt.ylabel('Q for training data')
 81 | plt.show()
 82 |            
 83 | #%% T2_train plot with CL
 84 | plt.figure()
 85 | plt.plot(T2_train)
 86 | plt.plot([1,len(T2_train)],[T2_CL,T2_CL], color='red')
 87 | plt.xlabel('Sample #')
 88 | plt.ylabel('T$^2$ for training data')
 89 | plt.show()
 90 | 
 91 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 92 | ##                          test data
 93 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 94 | 
 95 | #%% get test data, normalize it
 96 | data_test = data.iloc[69:,]
 97 | data_test_normal = scaler.transform(data_test) # using scaling parameters from training data
 98 | 
 99 | #%% compute scores and reconstruct
100 | score_test = pca.transform(data_test_normal)
101 | score_test_reduced = score_test[:,0:k]
102 | 
103 | data_test_normal_reconstruct = np.dot(score_test_reduced, P_matrix.T)
104 | 
105 | #%% calculate T2_test
106 | T2_test = np.zeros((data_test_normal.shape[0],))
107 | 
108 | for i in range(data_test_normal.shape[0]): # eigenvalues from training data are used
109 |     T2_test[i] = np.dot(np.dot(score_test_reduced[i,:],lambda_k_inv),score_test_reduced[i,:].T)
110 | 
111 | #%% calculate Q_test
112 | error_test = data_test_normal_reconstruct - data_test_normal
113 | Q_test = np.sum(error_test*error_test, axis = 1)
114 | 
115 | #%% plot T2_test and T2_train with CL
116 | T2_combined = np.concatenate([T2_train,T2_test])
117 | 
118 | plt.figure()
119 | plt.plot(T2_combined)
120 | plt.plot([1,len(T2_combined)],[T2_CL,T2_CL], color='red')
121 | plt.plot([69,69],[0,100], color='cyan')
122 | plt.xlabel('Sample #')
123 | plt.ylabel('T$^2$ for training and test data')
124 | plt.show()
125 | 
126 | #%% plot Q_test and Q_train with CL
127 | Q_combined = np.concatenate([Q_train,Q_test])
128 | 
129 | plt.figure()
130 | plt.plot(Q_combined)
131 | plt.plot([1,len(Q_combined)],[Q_CL,Q_CL], color='red')
132 | plt.plot([69,69],[0,100], color='cyan')
133 | plt.xlabel('Sample #')
134 | plt.ylabel('Q for training and test data')
135 | plt.show()
136 | 
137 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
138 | ##                  fault diagnosis by contribution plots
139 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
140 | 
141 | # T2 contribution
142 | sample = 85 - 69
143 | data_point = np.transpose(data_test_normal[sample-1,])
144 | 
145 | D = np.dot(np.dot(P_matrix,lambda_k_inv),P_matrix.T)
146 | T2_contri = np.dot(scipy.linalg.sqrtm(D),data_point)**2 # vector of contributions
147 | 
148 | plt.figure(figsize=[8,4])
149 | plt.bar(['var ' + str((i+1)) for i in range(len(T2_contri))], T2_contri, color='brown')
150 | plt.xticks(rotation = 90, fontsize=12)
151 | plt.ylabel('T$^2$ contribution plot', fontsize=20)
152 | plt.show()
153 | 
154 | # SPE contribution
155 | error_test_sample = error_test[sample-1,]
156 | SPE_contri = error_test_sample*error_test_sample # vector of contributions
157 | 
158 | plt.figure(figsize=[8,4])
159 | plt.bar(['var ' + str((i+1)) for i in range(len(SPE_contri))], SPE_contri, color='brown')
160 | plt.xticks(rotation = 90, fontsize=12)
161 | plt.ylabel('SPE contribution plot', fontsize=20)
162 | plt.show()
163 | 
164 | # variable plot
165 | plt.figure(figsize=[6,4])
166 | plt.plot(data.iloc[:,23], '-*')
167 | plt.xlabel('Sample #', fontsize=20)
168 | plt.ylabel('Variable 24', fontsize=20)
169 | plt.grid()
170 | plt.show()


--------------------------------------------------------------------------------
/Chapter7_MSPM_SteadyState1/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter7_MSPM_SteadyState1/proc1a.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ML-PSE/Machine_Learning_for_PM_and_PdM/96c21a8aeb4177541ea79e13474e099cc5ea00dd/Chapter7_MSPM_SteadyState1/proc1a.xls


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/DimensionalityReduction_FDA.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                          Dimensionality reduction via FDA for TE data
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | #%% fetch TEP data for faults 5,10,19 
 10 | TEdata_Fault5_train = np.loadtxt('d05.dat')
 11 | TEdata_Fault10_train = np.loadtxt('d10.dat')
 12 | TEdata_Fault19_train = np.loadtxt('d19.dat')
 13 | TEdata_Faulty_train = np.vstack((TEdata_Fault5_train, TEdata_Fault10_train, TEdata_Fault19_train))
 14 | 
 15 | # select variables as done in Lee et al.
 16 | xmeas = TEdata_Faulty_train[:,0:22]
 17 | xmv = TEdata_Faulty_train[:,41:52]
 18 | data_Faulty_train = np.hstack((xmeas, xmv))
 19 | 
 20 | # generate sample labels
 21 | n_rows_train = TEdata_Fault5_train.shape[0]
 22 | y_train = np.concatenate((5*np.ones(n_rows_train,), 10*np.ones(n_rows_train,), 19*np.ones(n_rows_train,)))
 23 |          
 24 | #%% scale data
 25 | from sklearn.preprocessing import StandardScaler
 26 | scaler = StandardScaler()
 27 | Faultydata_train_scaled = scaler.fit_transform(data_Faulty_train)
 28 | 
 29 | #%% visualize all scaled variables
 30 | plt.figure()
 31 | plt.plot(Faultydata_train_scaled)
 32 | plt.show()
 33 |            
 34 | #%% fit LDA model
 35 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 36 | lda = LinearDiscriminantAnalysis()
 37 | scores_train_lda = lda.fit_transform(Faultydata_train_scaled, y_train)
 38 | 
 39 | #%% visualize LDA scores
 40 | plt.figure()
 41 | plt.plot(scores_train_lda[0:n_rows_train,0], scores_train_lda[0:n_rows_train,1], 'b.', label='Fault 5')
 42 | plt.plot(scores_train_lda[n_rows_train:2*n_rows_train,0], scores_train_lda[n_rows_train:2*n_rows_train,1], 'r.', label='Fault 10')
 43 | plt.plot(scores_train_lda[2*n_rows_train:3*n_rows_train,0], scores_train_lda[2*n_rows_train:3*n_rows_train,1], 'm.', label='Fault 19')
 44 | plt.legend()
 45 | plt.xlabel('FD1 (training data)')
 46 | plt.ylabel('FD2 (training data)')
 47 | 
 48 | #%% fit PCA model
 49 | from sklearn.decomposition import PCA
 50 | pca = PCA(n_components=2)
 51 | scores_train_pca = pca.fit_transform(Faultydata_train_scaled)
 52 | 
 53 | #%% visualize PCA scores
 54 | plt.figure()
 55 | plt.plot(scores_train_pca[0:n_rows_train,0], scores_train_pca[0:n_rows_train,1], 'b.', label='Fault 5')
 56 | plt.plot(scores_train_pca[n_rows_train:2*n_rows_train,0], scores_train_pca[n_rows_train:2*n_rows_train,1], 'r.', label='Fault 10')
 57 | plt.plot(scores_train_pca[2*n_rows_train:3*n_rows_train,0], scores_train_pca[2*n_rows_train:3*n_rows_train,1], 'm.', label='Fault 19')
 58 | plt.legend()
 59 | plt.xlabel('PC1 (training data)')
 60 | plt.ylabel('PC2 (training data)')
 61 | 
 62 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 63 | ##                          Visualize test data 
 64 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 65 | #%% fetch TE data
 66 | TEdata_Fault5_test = np.loadtxt('d05_te.dat')
 67 | TEdata_Fault5_test = TEdata_Fault5_test[160:,:]
 68 | TEdata_Fault10_test = np.loadtxt('d10_te.dat')
 69 | TEdata_Fault10_test = TEdata_Fault10_test[160:,:]
 70 | TEdata_Fault19_test = np.loadtxt('d19_te.dat')
 71 | TEdata_Fault19_test = TEdata_Fault19_test[160:,:]
 72 | TEdata_Faulty_test = np.vstack((TEdata_Fault5_test, TEdata_Fault10_test, TEdata_Fault19_test))
 73 | 
 74 | # select variables as done in Lee et al.
 75 | xmeas = TEdata_Faulty_test[:,0:22]
 76 | xmv = TEdata_Faulty_test[:,41:52]
 77 | data_Faulty_test = np.hstack((xmeas, xmv))
 78 | 
 79 | # generate sample labels
 80 | n_rows_test = TEdata_Fault5_test.shape[0]
 81 | y_test = np.concatenate((5*np.ones(n_rows_test,), 10*np.ones(n_rows_test,), 19*np.ones(n_rows_test,)))
 82 | 
 83 | #%% scale data, trnasform via LDA & PCA models
 84 | Faultydata_test_scaled = scaler.transform(data_Faulty_test)
 85 | scores_test_lda = lda.transform(Faultydata_test_scaled)
 86 | scores_test_pca = pca.transform(Faultydata_test_scaled)
 87 | 
 88 | #%% visualize LDA & PCA scores
 89 | plt.figure()
 90 | plt.plot(scores_test_lda[0:n_rows_test,0], scores_test_lda[0:n_rows_test,1], 'b.', label='Fault 5')
 91 | plt.plot(scores_test_lda[n_rows_test:2*n_rows_test,0], scores_test_lda[n_rows_test:2*n_rows_test,1], 'r.', label='Fault 10')
 92 | plt.plot(scores_test_lda[2*n_rows_test:3*n_rows_test,0], scores_test_lda[2*n_rows_test:3*n_rows_test,1], 'm.', label='Fault 19')
 93 | plt.legend()
 94 | plt.xlabel('FD1 (test data)')
 95 | plt.ylabel('FD2 (test data)')
 96 | 
 97 | plt.figure()
 98 | plt.plot(scores_test_pca[0:n_rows_test,0], scores_test_pca[0:n_rows_test,1], 'b.', label='Fault 5')
 99 | plt.plot(scores_test_pca[n_rows_test:2*n_rows_test,0], scores_test_pca[n_rows_test:2*n_rows_test,1], 'r.', label='Fault 10')
100 | plt.plot(scores_test_pca[2*n_rows_test:3*n_rows_test,0], scores_test_pca[2*n_rows_test:3*n_rows_test,1], 'm.', label='Fault 19')
101 | plt.legend()
102 | plt.xlabel('PC1 (test data)')
103 | plt.ylabel('PC2 (test data)')
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/DimensionalityReduction_ICA.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          ICA model for TEP data
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #%% fetch TE data
10 | TEdata_noFault_train = np.loadtxt('d00.dat').T # data arrnagement in d00.dat is different than that in other files
11 | 
12 | # select variables as done in Lee et al.
13 | xmeas = TEdata_noFault_train[:,0:22]
14 | xmv = TEdata_noFault_train[:,41:52]
15 | data_noFault_train = np.hstack((xmeas, xmv))
16 | 
17 | #%% scale data
18 | from sklearn.preprocessing import StandardScaler
19 | scaler = StandardScaler()
20 | data_train_normal = scaler.fit_transform(data_noFault_train)
21 |            
22 | #%% fit ICA model 
23 | from sklearn.decomposition import FastICA
24 | ica = FastICA(max_iter=1000, tol=0.005, random_state=1).fit(data_train_normal)
25 | W = ica.components_
26 | 
27 | #%% confirm L2 norm of all IC scores is 1
28 | S = ica.transform(data_train_normal)
29 | S_L2_norms = np.linalg.norm(S, 2, axis = 0) 
30 | 
31 | #%% sort the ICs in importance order using L2 norm of each row 
32 | L2_norm = np.linalg.norm(W, 2, axis=1)
33 | sort_order = np.flip(np.argsort(L2_norm)) # descending order
34 | L2_norm_sorted_pct = 100*L2_norm[sort_order]/np.sum(L2_norm)
35 | 
36 | plt.figure()
37 | plt.plot(L2_norm, 'b')
38 | plt.xlabel('IC number (unsorted)')
39 | plt.ylabel('L2 norm')
40 | 
41 | plt.figure()
42 | plt.plot(L2_norm_sorted_pct, 'b+')
43 | plt.xlabel('IC number (sorted)')
44 | plt.ylabel('% L2 norm')
45 | 
46 | W_sorted = W[sort_order,:] # row 1 now corresponds to the most important IC and so on
47 | 
48 | #%% decide # of ICs to retain via PCA variance method and compute ICs
49 | from sklearn.decomposition import PCA
50 | pca = PCA().fit(data_train_normal)
51 | 
52 | explained_variance = 100*pca.explained_variance_ratio_ # in percentage
53 | cum_explained_variance = np.cumsum(explained_variance) # cumulative % variance explained
54 | 
55 | n_comp = np.argmax(cum_explained_variance >= 90) + 1
56 | 
57 | print('Number of PCs cumulatively explaining atleast 90% variance: ', n_comp)
58 | 
59 | #%% compute ICs with reduced dimension
60 | Wd = W_sorted[0:n_comp,:]
61 | Sd = np.dot(Wd, data_train_normal.T) # row 1 contains scores of the most important IC


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/FDA_illustration.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          Illustration example for FDA/LDA
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | from sklearn.decomposition import PCA
 8 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 9 | from sklearn.preprocessing import StandardScaler
10 | import matplotlib.pyplot as plt
11 | 
12 | #%% generate data
13 | x1_class1 = np.random.uniform(1, 6, 100)
14 | x2_class1 = x1_class1 + 1 + np.random.normal(0,0.5,100)
15 | X_class1 = np.column_stack((x1_class1, x2_class1))
16 | 
17 | 
18 | x1_class2 = np.random.uniform(2, 7, 100)
19 | x2_class2 = x1_class2 - 1 + np.random.normal(0,0.5,100)
20 | X_class2 = np.column_stack((x1_class2, x2_class2))
21 | 
22 | plt.figure()
23 | plt.plot(x1_class1, x2_class1, 'b.', label='Class 1')
24 | plt.plot(x1_class2, x2_class2, 'r.', label='Class 2')
25 | plt.xlabel('x1')
26 | plt.ylabel('x2')
27 | plt.legend()
28 | plt.show()
29 | 
30 | X = np.vstack((X_class1, X_class2))
31 | y = np.concatenate((np.ones(100,), 2*np.ones(100,)))
32 | 
33 | #%% scale data
34 | scalar = StandardScaler()
35 | X_normal = scalar.fit_transform(X)
36 |            
37 | #%% extract latent variables via PCA
38 | pca = PCA(n_components=1)
39 | score_pca = pca.fit_transform(X_normal)
40 | 
41 | plt.figure()
42 | plt.plot(score_pca[0:100], np.zeros((100,)), 'b.')
43 | plt.plot(score_pca[100:], np.zeros((100,)), 'r.')
44 | plt.ylim((-2,100))
45 | plt.xlabel('PCA score')
46 | plt.ylabel('sample #')
47 | 
48 | #%% extract latent variables via LDA
49 | lda = LinearDiscriminantAnalysis(n_components=1)
50 | score_lda = lda.fit_transform(X_normal, y)
51 | 
52 | plt.figure()
53 | plt.plot(score_lda[0:100], np.zeros((100,)), 'b.')
54 | plt.plot(score_lda[100:], np.zeros((100,)), 'r.')
55 | plt.ylim((-2,100))
56 | plt.xlabel('LDA score')
57 | plt.ylabel('sample #')


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/FaultClassification_FDA.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          Fault classification via FDA
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #%% fetch TEP data for faults 5,10,19 
10 | TEdata_Fault5_train = np.loadtxt('d05.dat')
11 | TEdata_Fault10_train = np.loadtxt('d10.dat')
12 | TEdata_Fault19_train = np.loadtxt('d19.dat')
13 | TEdata_Faulty_train = np.vstack((TEdata_Fault5_train, TEdata_Fault10_train, TEdata_Fault19_train))
14 | 
15 | # select variables as done in Lee et al.
16 | xmeas = TEdata_Faulty_train[:,0:22]
17 | xmv = TEdata_Faulty_train[:,41:52]
18 | data_Faulty_train = np.hstack((xmeas, xmv))
19 | 
20 | # generate sample labels
21 | n_rows_train = TEdata_Fault5_train.shape[0]
22 | y_train = np.concatenate((5*np.ones(n_rows_train,), 10*np.ones(n_rows_train,), 19*np.ones(n_rows_train,)))
23 |          
24 | #%% scale data
25 | from sklearn.preprocessing import StandardScaler
26 | scaler = StandardScaler()
27 | Faultydata_train_scaled = scaler.fit_transform(data_Faulty_train)
28 | 
29 | #%% visualize all scaled variables
30 | plt.figure()
31 | plt.plot(Faultydata_train_scaled)
32 | plt.show()
33 |            
34 | #%% fit LDA model
35 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
36 | lda = LinearDiscriminantAnalysis()
37 | scores_train_lda = lda.fit_transform(Faultydata_train_scaled, y_train)
38 | 
39 | #%% visualize LDA scores
40 | plt.figure()
41 | plt.plot(scores_train_lda[0:n_rows_train,0], scores_train_lda[0:n_rows_train,1], 'b.', label='Fault 5')
42 | plt.plot(scores_train_lda[n_rows_train:2*n_rows_train,0], scores_train_lda[n_rows_train:2*n_rows_train,1], 'r.', label='Fault 10')
43 | plt.plot(scores_train_lda[2*n_rows_train:3*n_rows_train,0], scores_train_lda[2*n_rows_train:3*n_rows_train,1], 'm.', label='Fault 19')
44 | plt.legend()
45 | plt.xlabel('FD1 (training data)')
46 | plt.ylabel('FD2 (training data)')
47 | 
48 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
49 | ##                     Control limit determination for fault5 class
50 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
51 | import scipy.stats
52 | Nj = n_rows_train
53 | k = 2
54 | 
55 | alpha = 0.01# 99% control limit
56 | T2_CL = k*(Nj**2-1)*scipy.stats.f.ppf(1-alpha,k,Nj-k)/(Nj*(Nj-k))
57 | 
58 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
59 | ##                     Fault classification with fault 5 test data
60 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
61 | # mean and covariance for Fault 5 class
62 | scores_train_lda_Fault5 = scores_train_lda[0:n_rows_train,:]
63 | cov_scores_train_Fault5 = np.cov(scores_train_lda_Fault5.T)
64 | mean_scores_train_Fault5 = np.mean(scores_train_lda_Fault5, axis = 0)
65 | 
66 | #%% fetch TE test dta for fault 5
67 | TEdata_Fault5_test = np.loadtxt('d05_te.dat')
68 | TEdata_Fault5_test = TEdata_Fault5_test[160:,:]
69 | n_rows_test = TEdata_Fault5_test.shape[0]
70 | 
71 | # select variables as done in Lee et al.
72 | xmeas = TEdata_Fault5_test[:,0:22]
73 | xmv = TEdata_Fault5_test[:,41:52]
74 | data_Faulty_test = np.hstack((xmeas, xmv))
75 | 
76 | #%% scale data and transform
77 | Faultydata_test_scaled = scaler.transform(data_Faulty_test)
78 | scores_test_lda = lda.transform(Faultydata_test_scaled)
79 | 
80 | #%% compute T2 statistic for test data for Fault 5 class
81 | T2_test = np.zeros((n_rows_test,))
82 | for sample in range(n_rows_test):
83 |     score_sample = scores_test_lda[sample,:]
84 |     score_sample_centered = score_sample - mean_scores_train_Fault5
85 |     T2_test[sample] = np.dot(np.dot(score_sample_centered[np.newaxis,:],np.linalg.inv(cov_scores_train_Fault5)),score_sample_centered[np.newaxis,:].T)
86 | 
87 | #%% plot test prediction
88 | outsideCL_flag = T2_test > T2_CL
89 | insideCL_flag = T2_test <= T2_CL
90 | plt.figure()
91 | plt.plot(scores_test_lda[outsideCL_flag,0], scores_test_lda[outsideCL_flag,1], 'k.', label='outside Fault 5 boundary')
92 | plt.plot(scores_test_lda[insideCL_flag,0], scores_test_lda[insideCL_flag,1], 'b.', label='inside Fault 5 boundary')
93 | plt.xlabel('FD1 (test data)')
94 | plt.ylabel('FD2 (test data)')
95 | plt.legend()
96 | 
97 | print('Percentage  of samples correctly diagnosed as Fault 5: ', 100*np.sum(T2_test < T2_CL)/n_rows_test)
98 | 


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/ICA_illustration.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          Illustration example for ICA vs PCA
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | from sklearn.decomposition import PCA
 8 | from sklearn.decomposition import FastICA
 9 | import matplotlib.pyplot as plt
10 | 
11 | #%% generate independent data
12 | s1 = 2*np.sin(2*np.pi*8*np.arange(500)/500)
13 | s2 = np.random.uniform(-2, 2, 500)
14 | 
15 | plt.figure()
16 | plt.plot(s1)
17 | plt.xlabel('sample #')
18 | plt.ylabel('s1')
19 | 
20 | plt.figure()
21 | plt.plot(s2)
22 | plt.xlabel('sample #')
23 | plt.ylabel('s2')
24 | 
25 | plt.figure()
26 | plt.scatter(s1, s2)
27 | plt.xlabel('s1')
28 | plt.ylabel('s2')
29 | 
30 | #%% generate transformed observed data
31 | x1 = (2/3)*s1 + s2
32 | x2 = (2/3)*s1 + (1/3)*s2
33 | 
34 | X = np.column_stack((x1,x2))
35 | 
36 | plt.figure()
37 | plt.plot(x1)
38 | plt.xlabel('sample #')
39 | plt.ylabel('x1')
40 | 
41 | plt.figure()
42 | plt.plot(x2)
43 | plt.xlabel('sample #')
44 | plt.ylabel('x2')
45 | 
46 | plt.figure()
47 | plt.scatter(x1, x2)
48 | plt.xlabel('x1')
49 | plt.ylabel('x2')
50 |            
51 | #%% extract latent variables via PCA
52 | pca = PCA()
53 | T = pca.fit_transform(X)
54 | 
55 | plt.figure()
56 | plt.plot(T[:,0])
57 | plt.xlabel('sample #')
58 | plt.ylabel('t1')
59 | 
60 | plt.figure()
61 | plt.plot(T[:,1])
62 | plt.xlabel('sample #')
63 | plt.ylabel('t2')
64 | 
65 | plt.figure()
66 | plt.scatter(T[:,0], T[:,1])
67 | plt.xlabel('t1')
68 | plt.ylabel('t2')
69 | 
70 | #%% extract latent variables via ICA
71 | ica = FastICA()
72 | U = ica.fit_transform(X)
73 | 
74 | plt.figure()
75 | plt.plot(U[:,0])
76 | plt.xlabel('sample #')
77 | plt.ylabel('u1')
78 | 
79 | plt.figure()
80 | plt.plot(U[:,1])
81 | plt.xlabel('sample #')
82 | plt.ylabel('u2')
83 | 
84 | plt.figure()
85 | plt.scatter(U[:,0], U[:,1])
86 | plt.xlabel('u1')
87 | plt.ylabel('u2')
88 | 
89 | 


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/ProcessMonitoring_ICA.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                          ICA model for TE data
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | from sklearn.preprocessing import StandardScaler
  8 | from sklearn.decomposition import FastICA
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | #%% fetch TE data and select variables as done in Lee et al.
 12 | TEdata_noFault_train = np.loadtxt('d00.dat').T # data arrnagement in d00.dat is different than that in other files
 13 | 
 14 | xmeas = TEdata_noFault_train[:,0:22]
 15 | xmv = TEdata_noFault_train[:,41:52]
 16 | data_noFault_train = np.hstack((xmeas, xmv))
 17 |          
 18 | #%% scale data
 19 | scaler = StandardScaler()
 20 | data_train_normal = scaler.fit_transform(data_noFault_train)
 21 |            
 22 | #%% fit ICA model
 23 | ica = FastICA(max_iter=1000, tol=0.005, random_state=1).fit(data_train_normal)
 24 | 
 25 | #%% decide # of ICs to retain via PCA variance method
 26 | from sklearn.decomposition import PCA
 27 | pca = PCA().fit(data_train_normal)
 28 | 
 29 | explained_variance = 100*pca.explained_variance_ratio_
 30 | cum_explained_variance = np.cumsum(explained_variance) 
 31 | 
 32 | n_comp = np.argmax(cum_explained_variance >= 90) + 1
 33 | 
 34 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 35 | ##                     Monitoring statistics function
 36 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 37 | 
 38 | def compute_ICA_monitoring_metrics(ica_model, number_comp, data):
 39 |     """ calculate monitoring statistics for given data 
 40 |     
 41 |     parameters
 42 |     -----------
 43 |     data: numpy array of shape = [n_samples, n_features]
 44 |           Training or test data
 45 |     
 46 |     Returns
 47 |     ----------
 48 |     monitoring_stats: numpy array of shape = [n_samples, 3]
 49 |         
 50 |     """
 51 |     
 52 |     # data parameters
 53 |     n = data.shape[0]
 54 |     
 55 |     # model parameters
 56 |     W = ica.components_
 57 |     L2_norm = np.linalg.norm(W, 2, axis=1)
 58 |     sort_order = np.flip(np.argsort(L2_norm))    
 59 |     W_sorted = W[sort_order,:]
 60 |     
 61 |     # I2
 62 |     Wd = W_sorted[0:number_comp,:]
 63 |     Sd = np.dot(Wd, data.T)
 64 |     I2 = np.array([np.dot(Sd[:,i], Sd[:,i]) for i in range(n)]) 
 65 |         
 66 |     # Ie2
 67 |     We = W_sorted[n_comp:,:]
 68 |     Se = np.dot(We, data.T)
 69 |     Ie2 = np.array([np.dot(Se[:,i], Se[:,i]) for i in range(n)]) 
 70 |         
 71 |     # SPE
 72 |     Q = ica.whitening_
 73 |     Q_inv = np.linalg.inv(Q)
 74 |     A = ica.mixing_
 75 |     B = np.dot(Q, A)
 76 |     B_sorted = B[:,sort_order]
 77 |     Bd = B_sorted[:,0:n_comp]
 78 |     
 79 |     data_reconstruct = np.dot(np.dot(np.dot(Q_inv, Bd), Wd), data.T)
 80 |     e = data.T - data_reconstruct
 81 |     SPE = np.array([np.dot(e[:,i], e[:,i]) for i in range(n)])
 82 |     
 83 |     monitoring_stats = np.column_stack((I2, Ie2, SPE))
 84 |     return monitoring_stats
 85 | 
 86 | def draw_monitoring_chart(values, CL, yLabel):
 87 |     plt.figure()
 88 |     plt.plot(values)
 89 |     plt.axhline(CL, color = "red", linestyle = "--")
 90 |     plt.xlabel('Sample #')
 91 |     plt.ylabel(yLabel)
 92 |     plt.show()
 93 | 
 94 | def draw_ICA_monitoring_charts(ICA_statistics, CLs, trainORtest):
 95 |     """ draw monitoring charts for given data 
 96 |     
 97 |     parameters
 98 |     -----------
 99 |     ICA_statistics: numpy array of shape = [n_samples, 3]
100 |     CLs: List of control limits
101 |     trainORtest: 'training' or 'test'
102 | 
103 |     """
104 |     
105 |     # I2 chart, Ie2 chart, SPE chart
106 |     draw_monitoring_chart(ICA_statistics[:,0], CLs[0], 'I2 for ' + trainORtest + ' data')
107 |     draw_monitoring_chart(ICA_statistics[:,1], CLs[1], 'Ie2 for ' + trainORtest + ' data')
108 |     draw_monitoring_chart(ICA_statistics[:,2], CLs[2], 'SPE for ' + trainORtest + ' data')
109 | 
110 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
111 | ##                   Draw monitoring charts for training data
112 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
113 | ICA_statistics_train = compute_ICA_monitoring_metrics(ica, n_comp, data_train_normal)
114 | 
115 | I2_CL = np.percentile(ICA_statistics_train[:,0], 99)
116 | Ie2_CL = np.percentile(ICA_statistics_train[:,1], 99)
117 | SPE_CL = np.percentile(ICA_statistics_train[:,2], 99)
118 | 
119 | draw_ICA_monitoring_charts(ICA_statistics_train, [I2_CL, Ie2_CL, SPE_CL], 'training')
120 | 
121 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
122 | ##                   FAR / FDR computation function
123 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
124 | def compute_alarmRate(monitoring_stats, CLs):
125 |     """ calculate alarm rate 
126 |     
127 |     parameters
128 |     -----------
129 |     monitoring_stats: numpy array of shape = [n_samples, 3]
130 |     CLs: List of control limits
131 |     
132 |     Returns
133 |     ----------
134 |     alarmRate: float
135 |         
136 |     """
137 |     
138 |     violationFlag = monitoring_stats > CLs 
139 |     alarm_overall = np.any(violationFlag, axis=1) # violation of any metric => alarm
140 |     alarmRate = 100*np.sum(alarm_overall)/monitoring_stats.shape[0]
141 |     
142 |     return alarmRate
143 |     
144 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
145 | ##                   Draw monitoring charts for test data
146 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
147 | # fetch data and select data as done in Lee et al.
148 | TEdata_Fault_test = np.loadtxt('d10_te.dat')
149 | 
150 | xmeas = TEdata_Fault_test[:,0:22]
151 | xmv = TEdata_Fault_test[:,41:52]
152 | data_Fault_test = np.hstack((xmeas, xmv))
153 |          
154 | # scale data
155 | data_test_scaled = scaler.transform(data_Fault_test)
156 | 
157 | # compute statistics and draw charts
158 | ICA_statistics_test = compute_ICA_monitoring_metrics(ica, n_comp, data_test_scaled)
159 | draw_ICA_monitoring_charts(ICA_statistics_test, [I2_CL, Ie2_CL, SPE_CL], 'test')
160 | 
161 | # compute FAR or FDR
162 | alarmRate = compute_alarmRate(ICA_statistics_test[160:,:], [I2_CL, Ie2_CL, SPE_CL]) # faults start from sample 160
163 | print(alarmRate)
164 | 
165 | 
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/ProcessMonitoring_PCA.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                          PCA model for TE data
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | from sklearn.preprocessing import StandardScaler
  8 | from sklearn.decomposition import PCA
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | #%% fetch TE data
 12 | TEdata_noFault_train = np.loadtxt('d00.dat').T
 13 | 
 14 | # select data as done in Lee et al.
 15 | xmeas = TEdata_noFault_train[:,0:22]
 16 | xmv = TEdata_noFault_train[:,41:52]
 17 | data_noFault_train = np.hstack((xmeas, xmv))
 18 |          
 19 | #%% scale data
 20 | scaler = StandardScaler()
 21 | data_train_normal = scaler.fit_transform(data_noFault_train)
 22 | 
 23 | #%% decide # of PCs to retain via PCA variance method
 24 | pca = PCA().fit(data_train_normal)
 25 | 
 26 | explained_variance = 100*pca.explained_variance_ratio_
 27 | cum_explained_variance = np.cumsum(explained_variance) 
 28 | 
 29 | n_comp = np.argmax(cum_explained_variance >= 90) + 1
 30 | 
 31 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 32 | ##                     Monitoring statistics function
 33 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 34 | 
 35 | def compute_PCA_monitoring_metrics(pca_model, number_comp, data):
 36 |     """ calculate monitoring statistics for given data 
 37 |     
 38 |     parameters
 39 |     -----------
 40 |     data: numpy array of shape = [n_samples, n_features]
 41 |           Training or test data
 42 |     
 43 |     Returns
 44 |     ----------
 45 |     monitoring_stats: numpy array of shape = [n_samples, 2]
 46 |         
 47 |     """
 48 |     
 49 |     # data parameters
 50 |     n = data.shape[0]
 51 |     
 52 |     # model parameters
 53 |     V_matrix = pca.components_.T
 54 |     P_matrix = V_matrix[:,0:n_comp] 
 55 |     
 56 |     # model computation
 57 |     score = pca.transform(data)
 58 |     score_reduced = score[:,0:n_comp] 
 59 |     data_reconstruct = np.dot(score_reduced, P_matrix.T)
 60 | 
 61 |     # T2
 62 |     lambda_k = np.diag(pca.explained_variance_[0:n_comp])
 63 |     lambda_k_inv = np.linalg.inv(lambda_k)    
 64 |     T2 = np.zeros((data.shape[0],))
 65 |     for i in range(n):
 66 |         T2[i] = np.dot(np.dot(score_reduced[i,:],lambda_k_inv),score_reduced[i,:].T)
 67 |         
 68 |     # SPE
 69 |     error = data - data_reconstruct
 70 |     SPE = np.sum(error*error, axis = 1)
 71 |     
 72 |     monitoring_stats = np.column_stack((T2, SPE))
 73 |     return monitoring_stats
 74 | 
 75 | def draw_monitoring_chart(values, CL, yLabel):
 76 |     plt.figure()
 77 |     plt.plot(values)
 78 |     plt.axhline(CL, color = "red", linestyle = "--")
 79 |     plt.xlabel('Sample #')
 80 |     plt.ylabel(yLabel)
 81 |     plt.show()
 82 | 
 83 | def draw_PCA_monitoring_charts(PCA_statistics, CLs, trainORtest):
 84 |     """ draw monitoring charts for given data 
 85 |     
 86 |     parameters
 87 |     -----------
 88 |     PCA_statistics: numpy array of shape = [n_samples, 2]
 89 |     CLs: List of control limits
 90 |     trainORtest: 'training' or 'test'
 91 | 
 92 |     """
 93 |     
 94 |     # T2 chart, SPE chart
 95 |     draw_monitoring_chart(PCA_statistics[:,0], CLs[0], 'T2 for ' + trainORtest + ' data') 
 96 |     draw_monitoring_chart(PCA_statistics[:,1], CLs[1], 'SPE for ' + trainORtest + ' data')
 97 | 
 98 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 99 | ##                   Draw monitoring charts for training data
100 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
101 | PCA_statistics_train = compute_PCA_monitoring_metrics(pca, n_comp, data_train_normal)
102 | 
103 | T2_CL = np.percentile(PCA_statistics_train[:,0], 99)
104 | SPE_CL = np.percentile(PCA_statistics_train[:,1], 99)
105 | 
106 | draw_PCA_monitoring_charts(PCA_statistics_train, [T2_CL, SPE_CL], 'training')
107 | 
108 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109 | ##                   FAR / FDR computation function
110 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
111 | def compute_alarmRate(monitoring_stats, CLs):
112 |     """ calculate false rate 
113 |     
114 |     parameters
115 |     -----------
116 |     monitoring_stats: numpy array of shape = [n_samples, 2]
117 |     CLs: List of control limits
118 |     
119 |     Returns
120 |     ----------
121 |     alarmRate: float
122 |         
123 |     """
124 |     
125 |     violationFlag = monitoring_stats > CLs 
126 |     alarm_overall = np.any(violationFlag, axis=1) # violation of any metric => alarm
127 |     alarmRate = 100*np.sum(alarm_overall)/monitoring_stats.shape[0]
128 |     
129 |     return alarmRate
130 | 
131 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
132 | ##                   Draw monitoring charts for test data
133 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
134 | # fetch data
135 | TEdata_Fault_test = np.loadtxt('d05_te.dat')
136 | 
137 | # select data as done in Lee et al.
138 | xmeas = TEdata_Fault_test[:,0:22]
139 | xmv = TEdata_Fault_test[:,41:52]
140 | data_Fault_test = np.hstack((xmeas, xmv))
141 |          
142 | # scale data
143 | data_test_scaled = scaler.transform(data_Fault_test)
144 | 
145 | # compute statistics and draw charts
146 | PCA_statistics_test = compute_PCA_monitoring_metrics(pca, n_comp, data_test_scaled)
147 | draw_PCA_monitoring_charts(PCA_statistics_test, [T2_CL, SPE_CL], 'test')
148 | 
149 | # compute FAR or FDR
150 | alarmRate = compute_alarmRate(PCA_statistics_test[160:,:], [T2_CL, SPE_CL]) # faults start from sample 160
151 | print(alarmRate)
152 | 


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/TE_processData_explore.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                          TE data
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #%% fetch TE data
10 | TEdata_noFault_train = np.loadtxt('d00.dat').T # data arrnagement in d00.dat is different than that in other files
11 | TEdata_Fault_train = np.loadtxt('d10.dat')
12 | 
13 | #%% quick visualize
14 | plt.figure()
15 | plt.plot(TEdata_noFault_train[:,17])
16 | plt.xlabel('sample #')
17 | plt.ylabel('Stripper Tempearture')
18 | plt.title('Normal operation')
19 | 
20 | plt.figure()
21 | plt.plot(TEdata_Fault_train[:,17])
22 | plt.xlabel('sample #')
23 | plt.ylabel('Stripper Tempearture')
24 | plt.title('Faulty operation')      
25 | 
26 | #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
27 | ##             Visualize normal and faulty data in PC space
28 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
29 | 
30 | #%% scale data
31 | from sklearn.preprocessing import StandardScaler
32 | scaler = StandardScaler()
33 | TEdata_noFault_scaled = scaler.fit_transform(TEdata_noFault_train)
34 | TEdata_Fault_scaled = scaler.transform(TEdata_Fault_train)
35 | 
36 | #%% build PCA model and copmute PC scores
37 | from sklearn.decomposition import PCA
38 | pca = PCA(n_components = 3).fit(TEdata_noFault_scaled)
39 | TEdata_noFault_scores = pca.transform(TEdata_noFault_scaled)
40 | TEdata_Fault_scores = pca.transform(TEdata_Fault_scaled)
41 | 
42 | #%% visualize in 3D plot
43 | from mpl_toolkits.mplot3d import Axes3D
44 | fig = plt.figure()
45 | ax = Axes3D(fig)
46 | ax.scatter(TEdata_noFault_scores[:,0],TEdata_noFault_scores[:,1],TEdata_noFault_scores[:,2], c='blue', alpha=0.1, label='Normal operation')
47 | ax.scatter(TEdata_Fault_scores[:,0],TEdata_Fault_scores[:,1],TEdata_Fault_scores[:,2], c='red', marker = '*', label='Faulty operation')
48 | ax.set_xlabel('PC1 scores')
49 | ax.set_ylabel('PC2 scores')
50 | ax.set_zlabel('PC3 scores')
51 | ax.legend()
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/Chapter8_MSPM_SteadyState2/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Chapter9_MSPM_Dynamic/DPCA_FaultDetection.py:
--------------------------------------------------------------------------------
  1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | ##                      DPCA model-based fault detection
  3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  4 | 
  5 | #%% import required packages
  6 | import numpy as np
  7 | from sklearn.preprocessing import StandardScaler
  8 | import matplotlib.pyplot as plt
  9 | from sklearn.decomposition import PCA
 10 | 
 11 | #%% utility functions
 12 | def augment(X, n_lags):
 13 |     """ generate augumented matrix with n_lags lagged measurements for each feature
 14 |     
 15 |     Parameters:
 16 |     ---------------------
 17 |     X: ndarray of shape (n_samples, n_features)
 18 |         n_samples is the number of samples and n_features is the number of features.
 19 |     
 20 |     n_lags: The number of lags to be used for data augumentation.   
 21 |     
 22 |     
 23 |     Returns:
 24 |     ---------------------
 25 |     X_aug: ndarray of shape (n_samples-n_lags, (n_lags+1)*n_features).
 26 |         The n_lags+1 values of feature j go into columns (j-1)*(n_lags+1) to j*(n_lags+1)-1.
 27 |         The ith row of X_augmented contains data from row i to row i+n_lags from matrix X.
 28 |     
 29 |     """
 30 |     
 31 |     # augment training data
 32 |     N, m = X.shape
 33 |     X_aug = np.zeros((N-n_lags, (n_lags+1)*m))
 34 |     
 35 |     for sample in range(n_lags, N):
 36 |         XBlock = X[sample-n_lags:sample+1,:]
 37 |         X_aug[sample-n_lags,:] = np.reshape(XBlock, (1,-1), order = 'F')
 38 |         
 39 |     return X_aug
 40 | 
 41 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 42 | ##                               read data
 43 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 44 | X_NOC = np.loadtxt('multivariate_NOC_data.txt')
 45 | X_test = np.loadtxt('multivariate_test_data.txt')
 46 | 
 47 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 48 | ##                         DPCA model training
 49 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 50 | # augment and scale data
 51 | X_NOC_aug = augment(X_NOC, 1)
 52 | scaler = StandardScaler()
 53 | X_NOC_aug_scaled = scaler.fit_transform(X_NOC_aug)
 54 | 
 55 | # fit PCA model
 56 | dpca = PCA().fit(X_NOC_aug_scaled)
 57 | 
 58 | # find n_component
 59 | explained_variance = 100*dpca.explained_variance_ratio_ # in percentage
 60 | cum_explained_variance = np.cumsum(explained_variance) # cumulative % variance explained
 61 | n_comp = np.argmax(cum_explained_variance >= 95) + 1    
 62 | print('Number of PCs cumulatively explaining atleast 95% variance: ', n_comp)
 63 | 
 64 | # refit with n_comp
 65 | dpca = PCA(n_components=n_comp)
 66 | dpca.fit(X_NOC_aug_scaled)
 67 | 
 68 | # compute scores and error for training data
 69 | scores_NOC = dpca.transform(X_NOC_aug_scaled)
 70 | X_NOC_aug_scaled_reconstruct = dpca.inverse_transform(scores_NOC)
 71 | error_NOC = X_NOC_aug_scaled - X_NOC_aug_scaled_reconstruct
 72 | 
 73 | # calculate Q for training data
 74 | Q_NOC = np.sum(error_NOC*error_NOC, axis = 1)
 75 | 
 76 | # calculate T2 for training data
 77 | N = X_NOC_aug_scaled.shape[0]
 78 | 
 79 | lambda_k = np.diag(dpca.explained_variance_) # eigenvalue = explained variance
 80 | lambda_k_inv = np.linalg.inv(lambda_k)
 81 | 
 82 | T2_NOC = np.zeros((N,))
 83 | for i in range(N):
 84 |     T2_NOC[i] = np.dot(np.dot(scores_NOC[i,:], lambda_k_inv), scores_NOC[i,:].T)
 85 | 
 86 | # control limits
 87 | Q_CL = np.percentile(Q_NOC, 99)
 88 | T2_CL = np.percentile(T2_NOC, 99)
 89 | 
 90 | # monitoring charts for NOC/training data
 91 | plt.figure(), plt.plot(Q_NOC), plt.plot([1,len(Q_NOC)],[Q_CL,Q_CL], color='red')
 92 | plt.xlabel('Sample #', fontsize=15), plt.ylabel('Q for training data', fontsize=15), plt.title('DPCA'), plt.grid(), plt.show()
 93 |            
 94 | plt.figure(), plt.plot(T2_NOC), plt.plot([1,len(T2_NOC)],[T2_CL,T2_CL], color='red')
 95 | plt.xlabel('Sample #', fontsize=15), plt.ylabel('T$^2$ for training data', fontsize=15), plt.title('DPCA'), plt.grid(), plt.show()
 96 | 
 97 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 98 | ##                     Monitoring charts for test data
 99 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
100 | # augment and scale test data
101 | X_test_aug = augment(X_test, 1)
102 | X_aug_test_scaled = scaler.transform(X_test_aug)
103 | 
104 | # compute scores and error for test data
105 | scores_test = dpca.transform(X_aug_test_scaled)
106 | X_aug_test_scaled_reconstruct = dpca.inverse_transform(scores_test)
107 | error_test = X_aug_test_scaled - X_aug_test_scaled_reconstruct
108 | 
109 | # calculate Q and T2 for test data
110 | Q_test = np.sum(error_test*error_test, axis = 1)
111 | 
112 | N_test = X_aug_test_scaled.shape[0]
113 | T2_test = np.zeros((N_test,))
114 | for i in range(N_test):
115 |     T2_test[i] = np.dot(np.dot(scores_test[i,:], lambda_k_inv), scores_test[i,:].T)
116 | 
117 | # monitoring charts for test data
118 | plt.figure(), plt.plot(Q_test), plt.plot([1,len(Q_test)],[Q_CL,Q_CL], color='red')
119 | plt.xlabel('Sample #', fontsize=15), plt.ylabel('Q for test data', fontsize=15), plt.grid(), plt.show()
120 |            
121 | plt.figure(), plt.plot(T2_test), plt.plot([1,len(T2_test)],[T2_CL,T2_CL], color='red')
122 | plt.xlabel('Sample #', fontsize=15), plt.ylabel('T$^2$ for test data', fontsize=15), plt.grid(), plt.show()
123 |                                     


--------------------------------------------------------------------------------
/Chapter9_MSPM_Dynamic/LinearRelationshipExtraction_DPCA_versus_PCA.py:
--------------------------------------------------------------------------------
 1 | ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | ##                     DPCA model vs PCA model
 3 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 4 | 
 5 | #%% import required packages
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | from sklearn.decomposition import PCA
 9 | 
10 | # utility functions
11 | def augment(X, n_lags):
12 |     """ generate augumented matrix with n_lags lagged measurements for each feature
13 |     
14 |     Parameters:
15 |     ---------------------
16 |     X: ndarray of shape (n_samples, n_features)
17 |         n_samples is the number of samples and n_features is the number of features.
18 |     
19 |     n_lags: The number of lags to be used for data augumentation.   
20 |     
21 |     
22 |     Returns:
23 |     ---------------------
24 |     X_aug: ndarray of shape (n_samples-n_lags, (n_lags+1)*n_features).
25 |     
26 |     """
27 |     
28 |     # augment training data
29 |     N, m = X.shape
30 |     X_aug = np.zeros((N-n_lags, (n_lags+1)*m))
31 |     
32 |     for sample in range(n_lags, N):
33 |         XBlock = X[sample-n_lags:sample+1,:]
34 |         X_aug[sample-n_lags,:] = np.reshape(XBlock, (1,-1), order = 'F')
35 |         
36 |     return X_aug
37 | 
38 | #%% generate data for the system: x1(k) = 0.8*x1(k-1) + x2(k-1)
39 | np.random.seed(1)
40 | 
41 | N = 1000
42 | x2 = np.random.normal(loc=0, scale=1, size=(N,1))
43 | x1 = np.zeros((N,1))
44 | for k in range(1,N):
45 |     x1[k] = 0.8*x1[k-1] + x2[k-1]
46 | 
47 | X = np.hstack((x1, x2))
48 |        
49 | # plot 
50 | plt.figure(figsize=(6,3))
51 | plt.plot(x1, 'g', linewidth=1)
52 | plt.ylabel('x1'), plt.xlabel('k'), plt.xlim(0)
53 | 
54 | plt.figure(figsize=(6,3))
55 | plt.plot(x2, 'm', linewidth=1)
56 | plt.ylabel('x2'), plt.xlabel('k'), plt.xlim(0)
57 | 
58 | plt.figure(figsize=(6,3))
59 | plt.plot(x1, x2, '.', linewidth=1)
60 | plt.ylabel('x2'), plt.xlabel('x1'), plt.xlim(0)
61 | 
62 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
63 | ##                               Fit PCA model
64 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
65 | # center data
66 | X_centered = X - np.mean(X, axis=0)
67 | 
68 | # fit PCA model
69 | pca = PCA()
70 | pca.fit(X_centered)
71 | 
72 | # get singular values
73 | print('PCA singular values:', pca.singular_values_)
74 | 
75 | # get singular vectors
76 | print(pca.components_)
77 | 
78 | #%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
79 | ##                               Fit DPCA model
80 | ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
81 | # augment data
82 | X_aug = augment(X, 1)
83 | 
84 | # center data
85 | X_aug_centered = X_aug - np.mean(X_aug, axis=0)
86 | 
87 | # fit DPCA model
88 | dpca = PCA()
89 | dpca.fit(X_aug_centered)
90 | 
91 | # get singular values
92 | print('DPCA singular values:', dpca.singular_values_)
93 | 
94 | # get singular vectors
95 | print(dpca.components_)
96 | 
97 | # get 4th singular vector
98 | print('4th singular vector: ', dpca.components_[3,:])
99 | 


--------------------------------------------------------------------------------
/Chapter9_MSPM_Dynamic/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Images/Book3_CoverPage.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ML-PSE/Machine_Learning_for_PM_and_PdM/96c21a8aeb4177541ea79e13474e099cc5ea00dd/Images/Book3_CoverPage.JPG


--------------------------------------------------------------------------------
/Images/info.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Machine_Learning_for_PM_and_PdM
 2 | 
 3 | Code repository for the book 'Machine Learning in Python for Process and Equipment Condition Monitoring, and Predictive Maintenance'
 4 | 
 5 | ![](/Images/Book3_CoverPage.JPG)
 6 | 
 7 | 
 8 | ## Book Links:
 9 | - *Google Play*:  https://play.google.com/store/books/details?id=7JXtEAAAQBAJ
10 | - *LeanPub*:  https://leanpub.com/ML-Python-for-PM-PdM
11 | 
12 | ## Data sources for datasets used in this book:
13 | [Weblinks mentioned below may change or may no longer exist in future. Relevant data files have been provided in the respective folders in this repository. If you plan to share or use any dataset, please abide by the license policy (and/or the citation requests, if any) for the dataset.]
14 | 
15 | 
16 | - *Aeration Tank Data*:  
17 | 
18 |        Publicly available at https://openmv.net/info/aeration-rate.
19 |   
20 | 
21 | - *Steam Generator Data*:
22 | 
23 |        Obtained from https://homes.esat.kuleuven.be/~smc/daisy/daisydata.html’
24 |        
25 |        Citation: De Moor B.L.R. (ed.), DaISy: Database for the Identification of Systems, Department of Electrical Engineering, ESAT/STADIUS, KU Leuven, Belgium, URL: http://homes.esat.kuleuven.be/~smc/daisy/.
26 | 
27 | 
28 | - *Polymer Manufacturing Process Data*:  
29 | 
30 |        Originally obtained from https://landing.umetrics.com/downloads-other-downloads (unfortunately this link no longer seems to work; data file is provided in the respective folder in this repository). 
31 |        Dataset also referenced at https://www.academia.edu/38630159/Multivariate_data_analysis_wiki
32 |        
33 |            
34 | - *Low-Density Polyethylene (LDPE) Process Data*:
35 | 
36 |        Obtained from https://openmv.net.
37 | 
38 |            
39 | - *Tennessee Eastman Process Data*: 
40 | 
41 |        Available at https://github.com/camaramm/tennessee-eastman-profBraatz (Copyright (c) 1998-2002 The Board of Trustees of the University of Illinois).
42 |        
43 |        
44 | - *Semiconductor Manufacturing Process Data*: 
45 | 
46 |        Obtained from http://www.eigenvector.com/data/Etch/. 
47 |        
48 |        Citation: B.M. Wise, N.B. Gallagher, S.W. Butler, D.D. White, Jr. and G.G. Barna, "A Comparison of Principal Components Analysis, Multi-way Principal Components analysis, Tri-linear Decomposition and Parallel Factor Analysis for Fault Detection in a Semiconductor Etch Process", J. Chemometrics (1999).
49 |   
50 | 
51 | - *Gas Boiler Data*: 
52 | 
53 |        Shohet et al., Simulated boiler data for fault detection and classification. Available at https://ieee-dataport.org/open-access/simulated-boiler-data-fault-detection-and-classification, IEEE Dataport, 2019.
54 | 
55 |        Data shared under Creative Commons Attribution license (https://creativecommons.org/licenses/by/4.0/).
56 |        
57 |   
58 | - *Debutanizer Column Data from a Petroleum Refinery*:
59 | 
60 |        Available as supplementary material at https://link.springer.com/book/10.1007/978-1-84628-480-9. 
61 |        
62 |        Citation: Fortuna et. al., Soft sensors for monitoring and control of industrial processes, Springer, 2007
63 | 
64 |        
65 | - *Combined Cycle Power Plant Data*:
66 | 
67 |        Available at the UCI machine learning repository https://archive.ics.uci.edu/ml/datasets/combined+cycle+power+plant
68 |        
69 |        Citation: Pınar Tüfekci, Prediction of full load electrical power output of a base load operated combined cycle power plant using machine learning methods, International Journal of Electrical Power & Energy Systems, Volume 60, September 2014, Pages 126-140, ISSN 0142-0615
70 | 
71 | 
72 | - *Fluid Catalytic Cracking Unit Data*:
73 | 
74 |        Details available at https://mlforpse.com/fccu-dataset/.
75 | 
76 | 
77 | - *Wind Turbine Data*:
78 | 
79 |         Available at https://github.com/mathworks/WindTurbineHighSpeedBearingPrognosis-Data. Data has been shared by MathWorks under Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license (https://creativecommons.org/licenses/by-nc-sa/4.0/). Permission was granted by the original author of the dataset, Eric                Bechhoefer, to use the data in this book.
80 | 
81 |        
82 | - *Gas Turbine Data*:
83 | 
84 |        Originally available at NASA prognostics data repository https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/. Data available at https://data.nasa.gov/Aerospace/CMAPSS-Jet-Engine-Simulated-Data/ff5v-kuh6/about_data.
85 |        Training and validation data file names used in the text are different than the original file names. 
86 |        
87 |        Citation: A. Saxena and K. Goebel (2008). "Turbofan Engine Degradation Simulation Data Set", NASA Ames Prognostics Data Repository (http://ti.arc.nasa.gov/project/prognostic-data-repository), NASA Ames Research Center, Moffett Field, CA
88 | 


--------------------------------------------------------------------------------