├── CreateDataPartition.m
├── CreateDataPartition_CSV_NoAttrName.m
├── CreateDataPartition_Excel_NoAttrName.m
├── FeatureExtraction_CSI_Signal.m
├── Preprocess_Normalization.m
├── README.md
└── machine_learning_framework.m


/CreateDataPartition.m:
--------------------------------------------------------------------------------
 1 | function [trainInput, trainOutput, testInput, testOutput, testSize] = CreateDataPartition(filename, trainSize)
 2 |     
 3 |     % Read data from the given file's name
 4 |     % Note : The format of .csv file must be like this
 5 |     % _____________________________________________________________________
 6 |     %        Input Data[0] ... Input Data[N]       | Output Label (Class)
 7 |     % _____________________________________________________________________
 8 |     %                                              |
 9 |     %                                              |
10 |     %                                              |
11 |     [num, txt, raw] = xlsread(filename);
12 |     [r_raw, c_raw] = size(raw);
13 |     txt = txt(2:r_raw, :);
14 |     c = c_raw - 1; % dimension of input vector (or feature vector)
15 |     r = r_raw - 1; % size of data set
16 | 
17 |     % Randomize the data set before partition
18 |     random_index = randperm(r);
19 |     txt = txt(random_index, :);
20 |     num = num(random_index, :);
21 |     
22 |     % Partition a data for training set and testing set as below...
23 |     % ----------------------------------------|----------------------------
24 |     %             Train set                   |         Test set
25 |     %----------------------------------------------------------------------
26 |     testSize      = r - trainSize;
27 |     trainInput    = num(1 : trainSize, :);
28 |     testInput     = num(trainSize+1 : r, :);
29 |     trainOutput   = txt(1 : trainSize  , c_raw);
30 |     testOutput    = txt(trainSize+1 : r, c_raw);
31 | 
32 | end


--------------------------------------------------------------------------------
/CreateDataPartition_CSV_NoAttrName.m:
--------------------------------------------------------------------------------
 1 | function [trainInput, trainOutput, testInput, testOutput, testSize] = CreateDataPartition_CSV_NoAttrName(filename, trainSize)
 2 | 
 3 |     % Read data from the given file's name
 4 |     % Note : The format of .csv file must be like this.
 5 |     % _____________________________________________________________________
 6 |     %       Output Label (Class)    |   Input Data[0] ... Input Data[N]
 7 |     %                               |
 8 |     %                               |
 9 |     raw = csvread(filename);
10 |     % r => dimension of input vector (or feature vector)
11 |     % c => size of data set
12 |     [r, c] = size(raw);
13 |     
14 |     % Randomize the data set before partition
15 |     for i = 1 : 3
16 |         random_index = randperm(r);
17 |         raw = raw(random_index, :);
18 |     end
19 |     
20 |     % Partition a data for training set and testing set as below...
21 |     % ----------------------------------------|----------------------------
22 |     %             Train set                   |         Test set
23 |     %----------------------------------------------------------------------
24 |     testSize      = r - trainSize;
25 |     trainInput    = raw(1 : trainSize,   2:c);
26 |     testInput     = raw(trainSize+1 : r, 2:c);
27 |     trainOutput   = raw(1 : trainSize  , 1);
28 |     testOutput    = raw(trainSize+1 : r, 1);  
29 | 
30 | end


--------------------------------------------------------------------------------
/CreateDataPartition_Excel_NoAttrName.m:
--------------------------------------------------------------------------------
 1 | function [trainInput, trainOutput, testInput, testOutput, testSize] = CreateDataPartition_Excel_NoAttrName(filename, trainSize)
 2 | 
 3 |     % Read data from the given file's name
 4 |     % Note : The format of .csv file must be like this.
 5 |     % _____________________________________________________________________
 6 |     %       Output Label (Class)    |   Input Data[0] ... Input Data[N]
 7 |     %                               |
 8 |     %                               |
 9 |     [num, txt, raw] = xlsread(filename);
10 |     % r => dimension of input vector (or feature vector)
11 |     % c => size of data set
12 |     [r, c] = size(raw);
13 |     
14 |     % Randomize the data set before partition
15 |     for i = 1 : 3
16 |         random_index = randperm(r);
17 |         raw = raw(random_index, :);
18 |         txt = txt(random_index, :);
19 |         num = num(random_index, :);
20 |     end
21 |     
22 |     % Partition a data for training set and testing set as below...
23 |     % ----------------------------------------|----------------------------
24 |     %             Train set                   |         Test set
25 |     %----------------------------------------------------------------------
26 |     testSize      = r - trainSize;
27 |     trainInput    = num(1 : trainSize,   :);
28 |     testInput     = num(trainSize+1 : r, :);
29 |     trainOutput   = txt(1 : trainSize  , 1);
30 |     testOutput    = txt(trainSize+1 : r, 1); 
31 |     trainOutput = string(trainOutput);
32 |     testOutput = string(testOutput);
33 | end


--------------------------------------------------------------------------------
/FeatureExtraction_CSI_Signal.m:
--------------------------------------------------------------------------------
 1 | %% Read .csv file
 2 | % csi_dataTableFile    = 'csi_data.csv';
 3 | % csi_dataTableFile    = 'data_no_preserve_5PM.csv';
 4 | % csi_dataTableFile    = 'jok.csv';
 5 | % csi_dataTableFile    = 'jok.csv';
 6 | csi_dataTableFile = 'no_people.csv';
 7 | 
 8 | % [num, txt, raw] = xlsread(csi_dataTableFile);
 9 | [num, txt, raw] = xlsread(csi_dataTableFile);
10 | 
11 | % Convert 'cell' data-type to double
12 | rawForCSI   = str2double(txt);
13 | 
14 | % Read time-stamp
15 | t_stamp     = (cell2mat(raw(:, 2)))./1000;
16 | 
17 | % Put raw data into "CSI channel matrix"
18 | [r, c] = size(txt);
19 | csi_complexNumData = zeros(r, 64);
20 | for n = 1:r
21 |     for m = 1:64
22 |         csi_complexNumData(n, m) = rawForCSI(n, m+2);
23 |     end
24 | end
25 | 
26 | %% Extract CSI Magnitude time-waveform
27 | 
28 | % Calculate magnitude & phase
29 | CSI_Magnitude = abs(csi_complexNumData);
30 | CSI_Phase     = angle(csi_complexNumData);
31 | 
32 | % Select CSI signal by the channel
33 | channel = 1:30;
34 | x = zeros(r, length(channel));
35 | 
36 | % Time-waveform interpolation + re-sampling & moving average
37 | fresampling = 50;
38 | dt = 1/fresampling;
39 | t_resampling = (dt : dt : 200)';
40 | x_smooth = zeros(length(t_resampling), length(channel));
41 | for c = 1 : length(channel)
42 |  
43 |     x(:, c) = CSI_Magnitude(:, channel(c));
44 | 
45 |     % Time-waveform interpolation + re-sampling & moving average 
46 |     t_resampling = (dt : dt : 200)';
47 |     interpolate_method = 'linear';
48 |     x_channel_c = x(:, c);
49 |     x_channel_c_interpolated = interp1(t_stamp, x_channel_c, t_resampling, interpolate_method);
50 |     x_channel_c_smooth = smooth(x_channel_c_interpolated, 'moving');
51 |     x_smooth(:, c) = x_channel_c_smooth;
52 | end
53 | 
54 | %% Extract spectrogram from time-domain CSI signal
55 | windowLength  = 128;
56 | overlap = 64;
57 | 
58 | for c = 21 : 21
59 | %for c = 1 : length(channel)
60 |     %figure;
61 |     % spectrogram(x_smooth(:, c), windowLength, overlap, fresampling);
62 |     % title('Without human activity (No human)');
63 |     % title('With human activity (Walking)');
64 |     s = spectrogram(x_smooth(:, c), windowLength, overlap, windowLength);
65 |     s = s';
66 |     %s = s(:, 2:windowLength);
67 |     psd = abs(s);
68 |     
69 |     %% Save spectrogram log to excel
70 | %     filename = ['CSI_spectrogram (channel', num2str(c), ' ', csi_dataTableFile, ').xlsx'];
71 | %     xlswrite(filename, spectrogram_magnitude, 'B1:BM30');
72 | %     xlswrite(filename, t, 'A1:A30');
73 | end
74 | 
75 | %% Extract feature using PCA
76 | % PCA is calculated from matrix of spectrogram(s)
77 | [r_psd, c_psd] = size(psd);
78 | PCA = zeros(c_psd, overlap + 1);
79 | for frame = 3 : r_psd
80 |     psd_1 = psd(frame-1, :);
81 |     psd_2 = psd(frame, :);
82 |     diff_psd = [psd_1; psd_2];
83 |     coeff = pca(diff_psd);
84 |     PCA(frame, :) = coeff;
85 | end
86 | 
87 | %% Save spectrogram log to excel
88 | 
89 | t_frameLog = (1 : r_psd).*(overlap/fresampling);
90 | filename = ['feature_of_csi (', csi_dataTableFile, ').xlsx'];
91 | 
92 | %%
93 | writeData = [t_frameLog' PCA];
94 | xlswrite(filename, writeData, 'A');
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/Preprocess_Normalization.m:
--------------------------------------------------------------------------------
 1 | function normalizedData = Preprocess_Normalization(InputData)
 2 |     
 3 | %     %% This normalization use a "Standardization (Z-score)" method as this equation
 4 | %     % : x_norm = (x - x_mean)/STD
 5 | %     scalingFactor = 0.5;
 6 | %     x_mean = mean(InputData);
 7 | %     STD  = std(InputData);
 8 | %     [r, c] = size(InputData);
 9 | %     normalizedData = scalingFactor.*(InputData - ones(r, c).*x_mean)./STD;
10 |     
11 |     %% This normalization use a "Maximum Divider" method
12 |     scalingFactor = 0.5;
13 |     x_max = max(abs(InputData));
14 |     normalizedData = scalingFactor.*(InputData./x_max);
15 |     
16 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HumanActivityDetection_using_CSI_MATLAB
2 | This repository is used for "Human Activity Detection" system by using CSI signal from WiFi. All procedure are implemented on MATLAB code.
3 | 


--------------------------------------------------------------------------------
/machine_learning_framework.m:
--------------------------------------------------------------------------------
  1 | %% Create data set for machine learning 
  2 | 
  3 | % Read .csv file of data set
  4 | % filename = 'data_for_ML_sleep.csv';
  5 | % filename = 'ecoli.csv';
  6 | filename = 'feature_of_csi (jok) - labeled.csv';
  7 | 
  8 | % Partition a data for training set and testing set
  9 | numberOfTrain = 80;
 10 | [feature_data_train, output_data_train, feature_data_test, output_data_test, numberOfTest] = CreateDataPartition_Excel_NoAttrName(filename, numberOfTrain);
 11 | 
 12 | 
 13 | 
 14 | %% Re-label the binary output (N, P) as [-1, 1] 
 15 | %  This section only apply to the data which is for the binary classification problem.
 16 | for n = 1:numberOfTrain
 17 |     if output_data_train(n) == 'J'
 18 |         output_data_train(n) = 1;
 19 |     elseif output_data_train(n) == 'N'
 20 |         output_data_train(n) = -1;
 21 |     else
 22 |         output_data_train(n) = 0;
 23 |     end
 24 | end
 25 | for n = 1:numberOfTest
 26 |     if output_data_test(n) == 'J'
 27 |         output_data_test(n) = 1;
 28 |     elseif output_data_test(n) == 'N'
 29 |         output_data_test(n) = -1;
 30 |     else
 31 |         output_data_test(n) = 0;
 32 |     end
 33 | end
 34 | 
 35 | 
 36 | %% Preprocessing the data
 37 | 
 38 | % Normalization : train data
 39 | feature_normalized_data_train = Preprocess_Normalization(feature_data_train);
 40 | 
 41 | % Normalization test data
 42 | feature_normalized_data_test = Preprocess_Normalization(feature_data_test);
 43 | 
 44 | 
 45 | 
 46 | %% Initialize and training to all candidated machine learning model
 47 | % Candidated N-models are created with a different SVM parameters in order to select
 48 | % the "best model" from N models
 49 | 
 50 | % Initialize the candiated model
 51 | N = 9;
 52 | modelList = cell(N, 1);
 53 | modelList{1} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'rbf', 0.25);
 54 | modelList{2} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'linear', 0.25);
 55 | modelList{3} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'polynomial', 0.25);
 56 | modelList{4} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'rbf', 0.55);
 57 | modelList{5} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'linear', 0.55);
 58 | modelList{6} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'polynomial', 0.55);
 59 | modelList{7} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'rbf', 0.85);
 60 | modelList{8} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'linear', 0.85);
 61 | modelList{9} = CreateModel_SVM(feature_data_train, output_data_train, [-1, 1], 'polynomial', 0.85);
 62 | 
 63 | 
 64 | 
 65 | %% Cross validation for select the best model
 66 | % Initialize the cross validation method, parameters and data partition for validation
 67 | 
 68 | cv_method = 'KFold';
 69 | numberOfObservation = numberOfTrain;
 70 | k = 10;
 71 | cv_partition = cvpartition(numberOfObservation, cv_method, k);
 72 | 
 73 | % Find loss function of each candidated ML model by using cross-validation 
 74 | loss = zeros(N, 1);
 75 | for m = 1:N
 76 |     % Select the candidated ML model
 77 |     candidatedModel = modelList{m};
 78 |     
 79 |     % Apply cross-validation to SVM model 
 80 |     CV_model = crossval(candidatedModel, 'CVPartition', cv_partition);
 81 |     
 82 |     loss(m) = kfoldLoss(CV_model);
 83 | end
 84 | 
 85 | % Select the best ML model
 86 | [min_loss, min_loss_index] = min(loss);
 87 | model_best = modelList{min_loss_index};
 88 |          
 89 |                         
 90 |                         
 91 | %% Test the resulted ML model with a different data set (test data)
 92 | 
 93 | predictOutput = predict(model_best, feature_data_test);
 94 | pass_count = 0;
 95 | fail_count = 0;
 96 | for n = 1 : length(predictOutput)
 97 |     if (predictOutput(n) == str2double(output_data_test(n)))
 98 |         pass_count = pass_count + 1;
 99 |     else
100 |         fail_count = fail_count + 1;
101 |     end
102 | end
103 | disp('best loss = ');
104 | disp(min_loss);
105 | disp('pass count = ');
106 | disp(pass_count);
107 | disp('fail count = ');
108 | disp(fail_count);
109 | 
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------