├── runner.sh
├── README.md
├── LICENSE
├── .gitignore
├── main.py
├── models
    ├── rocket_functions.py
    └── rocket_rigid.py
└── utils
    ├── visualizer.py
    └── dataloader.py


/runner.sh:
--------------------------------------------------------------------------------
1 | python3.6 main.py -m rigRocket -k 10000 -cv 1 -e 20 -i ../Dataset/Data/
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LiteHAR
 2 | LiteHAR: Lightweight Human Activity Recognition from WiFi Signals with Random Convolution Kernels
 3 | 
 4 | Implementation of the LiteHAR model by Hojjat Salehinejad and Shahrokh Valaee. 
 5 | 
 6 | The corresponding paper has been accepted for presentation at IEEE ICASSP 2022. 
 7 | Paper on ArXiv: https://arxiv.org/abs/2201.09310
 8 | 
 9 | ## Data
10 | Here the link to the dataset used in the paper:
11 | https://github.com/ermongroup/Wifi_Activity_Recognition
12 | 
13 | 
14 | ## Prerequisite
15 | Python >= 3.6
16 | numpy
17 | pandas
18 | scikit-learn
19 | numba
20 | joblib
21 | 
22 | ## How to Run
23 | Run the bash script provided as: ./runner.sh
24 | 
25 | ## Parameters
26 | Setup parameters in the runner.sh:
27 | 
28 | python3.6 main.py -m rigRocket -k 10000 -cv 1 -e 20 -i ../Dataset/Data/
29 | 
30 | where
31 | 
32 | - i: path to the data
33 | - e: number of epochs (if necessary)
34 | - m: model 
35 | - k: number of kernels
36 | - cv: number of cross-validation
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Hojjat Salehinejad
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import argparse
 3 | import numpy as np
 4 | import sys
 5 | sys.path.insert(0, "utils")
 6 | sys.path.insert(0, "models")
 7 | import dataloader
 8 | import rocket_rigid
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("-i", "--input_path", required = True)
12 | parser.add_argument("-cv", "--num_runs", type = int, default = 1)
13 | parser.add_argument("-m", "--model", required = True)
14 | parser.add_argument("-k", "--num_kernels", type = int, default = 10000)
15 | parser.add_argument("-e", "--num_epochs", type = int, default = 100)
16 | parser.add_argument("-g", "--gpu", type = int, default = 2)
17 | args = parser.parse_args()
18 | 
19 | ## Parameters Setup
20 | gpu_id = ['0','1','2']
21 | auto_save = False
22 | N_epochs = args.num_epochs
23 | N_cv = args.num_runs
24 | num_kernels = args.num_kernels
25 | classes = ['run','pickup','bed','fall','sitdown','standup','walk'] 
26 | N_classes = len(classes)
27 | partial_flag = False
28 | rebuild_data = False
29 | val_per = 0
30 | tst_per = 0.2
31 | 
32 | batch_size = 8
33 | lr = 0.001
34 | lr_adaptive = True
35 | decay_rate = 0.5
36 | decay_step = 20 
37 | pooling = 2
38 | reinitialize_rocket = False # If True, will reinitialize rocket kernels for each CV
39 | 
40 | ## Sampling
41 | if pooling ==1:
42 |     fval = '1k'
43 | else:
44 |     fval = str(int(1000/pooling))
45 | frequency = fval+'hz'
46 | print('Sampling Frequency is:',frequency)
47 | 
48 | ## Prep
49 | if lr_adaptive==False:
50 |     decay_step = N_epochs+1 # decay_step more than number of epochs
51 | 
52 | model_name = args.model
53 | 
54 | X,Y = dataloader.preparedataRigRocket(args.input_path,classes,partial_flag,rebuild_data)
55 | print('Data size from blob:',X.shape,Y.shape)
56 | 
57 | 
58 | accuracy_collection = np.zeros((1,N_cv))
59 | cm_collection = np.zeros((N_classes,N_classes,N_cv))
60 | inf_time_collection = np.zeros((1,N_cv))
61 | tr_time_collection = np.zeros((1,N_cv))
62 | for cv_indx in range(N_cv):
63 |     X_tr,X_val,X_ts,Y_tr,Y_val,Y_ts = dataloader.splitter(X,Y,val_per,tst_per)
64 |     
65 |     if model_name=='rigRocket':
66 |         acc,cm, inf_time, tr_tim = rocket_rigid.main(X_tr,X_ts,Y_tr,Y_ts,num_kernels,N_classes,batch_size,N_epochs,gpu_id,partial_flag,lr,decay_rate,decay_step,pooling,frequency, N_cv,reinitialize_rocket,'rigRocket')           
67 | 
68 |     accuracy_collection[0,cv_indx] = acc
69 |     cm_collection[:,:,cv_indx] = cm
70 |     inf_time_collection[0,cv_indx] = inf_time
71 |     tr_time_collection[0,cv_indx] = tr_tim    
72 | 
73 |     accuracy_collection = np.asarray(accuracy_collection)
74 | print(model_name)
75 | print(accuracy_collection)
76 | print('Average Accuracy:',np.mean(accuracy_collection))
77 | print(np.mean(cm_collection,axis=2))
78 | print('Average CV Inference Time:',np.mean(inf_time_collection))
79 | print('Average CV Training Time:',np.mean(tr_time_collection))


--------------------------------------------------------------------------------
/models/rocket_functions.py:
--------------------------------------------------------------------------------
  1 | # Angus Dempster, Francois Petitjean, Geoff Webb
  2 | #
  3 | # @article{dempster_etal_2020,
  4 | #   author  = {Dempster, Angus and Petitjean, Fran\c{c}ois and Webb, Geoffrey I},
  5 | #   title   = {ROCKET: Exceptionally fast and accurate time classification using random convolutional kernels},
  6 | #   year    = {2020},
  7 | #   journal = {Data Mining and Knowledge Discovery},
  8 | #   doi     = {https://doi.org/10.1007/s10618-020-00701-z}
  9 | # }
 10 | #
 11 | # https://arxiv.org/abs/1910.13051 (preprint)
 12 | 
 13 | import numpy as np
 14 | from numba import njit, prange
 15 | 
 16 | @njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
 17 | def generate_kernels(input_length, num_kernels):
 18 | 
 19 |     candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
 20 |     lengths = np.random.choice(candidate_lengths, num_kernels)
 21 | 
 22 |     weights = np.zeros(lengths.sum(), dtype = np.float64)
 23 |     biases = np.zeros(num_kernels, dtype = np.float64)
 24 |     dilations = np.zeros(num_kernels, dtype = np.int32)
 25 |     paddings = np.zeros(num_kernels, dtype = np.int32)
 26 | 
 27 |     a1 = 0
 28 | 
 29 |     for i in range(num_kernels):
 30 | 
 31 |         _length = lengths[i]
 32 | 
 33 |         _weights = np.random.normal(0, 1, _length)
 34 | 
 35 |         b1 = a1 + _length
 36 |         weights[a1:b1] = _weights - _weights.mean()
 37 | 
 38 |         biases[i] = np.random.uniform(-1, 1)
 39 | 
 40 |         dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
 41 |         dilation = np.int32(dilation)
 42 |         dilations[i] = dilation
 43 | 
 44 |         padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
 45 |         paddings[i] = padding
 46 | 
 47 |         a1 = b1
 48 | 
 49 |     return weights, lengths, biases, dilations, paddings
 50 | 
 51 | @njit(fastmath = True)
 52 | def apply_kernel(X, weights, length, bias, dilation, padding):
 53 | 
 54 |     input_length = len(X)
 55 | 
 56 |     output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)
 57 | 
 58 |     _ppv = 0
 59 |     _max = np.NINF
 60 | 
 61 |     end = (input_length + padding) - ((length - 1) * dilation)
 62 | 
 63 |     for i in range(-padding, end):
 64 | 
 65 |         _sum = bias
 66 | 
 67 |         index = i
 68 | 
 69 |         for j in range(length):
 70 | 
 71 |             if index > -1 and index < input_length:
 72 | 
 73 |                 _sum = _sum + weights[j] * X[index]
 74 | 
 75 |             index = index + dilation
 76 | 
 77 |         if _sum > _max:
 78 |             _max = _sum
 79 | 
 80 |         if _sum > 0:
 81 |             _ppv += 1
 82 | 
 83 |     return _ppv / output_length, _max
 84 | 
 85 | @njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
 86 | def apply_kernels(X, kernels):
 87 |     weights, lengths, biases, dilations, paddings = kernels
 88 |     num_examples, _ = X.shape
 89 |     num_kernels = len(lengths)
 90 |     _X = np.zeros((num_examples, 2*num_kernels), dtype = np.float64) # 2 features per kernel         2*num_kernels
 91 |     for i in prange(num_examples):
 92 |         a1 = 0 # for weights
 93 |         a2 = 0 # for features
 94 |         for j in range(num_kernels):
 95 |             b1 = a1 + lengths[j]
 96 |             b2 = a2 + 2 #1
 97 |             _X[i, a2:b2] = apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])
 98 |             a1 = b1
 99 |             a2 = b2
100 | 
101 |     return _X
102 | 


--------------------------------------------------------------------------------
/models/rocket_rigid.py:
--------------------------------------------------------------------------------
  1 | import os, pickle, time
  2 | import numpy as np
  3 | from tqdm import tqdm
  4 | from scipy.spatial.distance import cdist
  5 | from sklearn.metrics import confusion_matrix
  6 | from sklearn.linear_model import RidgeClassifierCV
  7 | import matplotlib.pyplot as plt
  8 | from rocket_functions import generate_kernels, apply_kernels
  9 | from joblib import Parallel, delayed
 10 | 
 11 | def ridigd_training(X,Y):
 12 |     model = RidgeClassifierCV(alphas = np.logspace(-3, 3, 10), normalize = True)
 13 |     model.fit(X, Y)
 14 |     return model
 15 | 
 16 | def scoring(model,X):
 17 |     prediction = model.predict(X)
 18 |     return prediction
 19 | 
 20 | 
 21 | def main(X_tr,X_ts,Y_tr,Y_ts,num_kernels,num_motions,batch_size,n_epochs,gpu_id,partial_flag,lr,decay_rate,decay_step,pooling,frequency,N_cv, reinitialize_rocket,model_):
 22 |     #### Sampling along time
 23 |     print('Sampling Frequency is:',frequency)
 24 |     if pooling>1:
 25 |         print('Sampling along time at window size of ',str(pooling), ' ...')
 26 |         X_tr = X_tr[:,::pooling,:]
 27 |         X_ts = X_ts[:,::pooling,:]
 28 |         T_Max = X_tr.shape[1]
 29 |     T_Max = X_tr.shape[1]
 30 |     print(T_Max)
 31 |     print(X_tr.shape)
 32 |     np.savetxt('sampleInput.txt',X_tr[0,:,:])
 33 |     st = time.time()
 34 | 
 35 |     X_tr,X_ts,Y_tr,Y_ts = rocketize(T_Max,num_kernels,X_tr,X_ts,frequency,N_cv,Y_tr,Y_ts,reinitialize_rocket)
 36 |     print(X_tr.shape,X_ts.shape) #  N,2xKernel, 90
 37 |     np.savetxt('sampleKernel.txt',X_tr[0,:,:])
 38 |     
 39 |     print('Parallel Training ...')
 40 |     Nsubc = X_tr.shape[2]
 41 |     models = Parallel(n_jobs=-2,backend="threading")(delayed(ridigd_training)(X_tr[:,:,m_],Y_tr) for m_ in tqdm(range(Nsubc)))
 42 |     tr_time = time.time() - st
 43 | 
 44 |     # Testing
 45 |     print('Parallel Testing ...')
 46 |     top_collection = []
 47 |     disagrees_subcarries_collect = []
 48 |     disagrees_histogram = np.zeros((1,Nsubc))
 49 |     time_collect = 0
 50 |     for s_indx in range(X_ts.shape[0]): # for each test sample
 51 |         st = time.time()
 52 |         predictions = Parallel(n_jobs=1,backend="threading")(delayed(scoring)(models[m_],np.expand_dims(X_ts[s_indx,:,m_],axis=0)) for m_ in range(Nsubc))
 53 |         time_collect+=(time.time()-st)
 54 |         (unique, counts) = np.unique(predictions, return_counts=True)
 55 |         top_collection.append([unique[np.argmax(counts)],Y_ts[s_indx]]) # prediction Target 
 56 |         disagrees_binary = predictions!=Y_ts[s_indx]
 57 |         disagrees_subcarries = np.where(disagrees_binary==True)[0]
 58 |         disagrees_subcarries_collect.append(disagrees_subcarries)
 59 |         for i in disagrees_subcarries: # histogram of disagrees update
 60 |             disagrees_histogram [0,i]+=1 
 61 | 
 62 |     print('Prediction vs. Target:', top_collection)
 63 |     print('Disagreed subcarriers histogram:',disagrees_histogram/X_ts.shape[0])
 64 |     top_collection = np.asarray(top_collection)
 65 |     acc = (np.sum(top_collection[:,0]==top_collection[:,1]))/X_ts.shape[0]
 66 |     print('Accuracy is:', acc)
 67 |     print('Avg. Inferene Time (full,per sample):',time_collect,time_collect/X_ts.shape[0])
 68 |     print('Training Time (full,per sample):',tr_time,tr_time/X_tr.shape[0])
 69 |     cm = confusion_matrix(top_collection[:,1], top_collection[:,0]) # Target prediction
 70 | 
 71 |     return acc,cm,time_collect/X_ts.shape[0],tr_time/X_tr.shape[0]
 72 | 
 73 | 
 74 | 
 75 | def rocketize(T_Max,num_kernels,X_tr,X_ts,frequency,N_cv,Y_tr,Y_ts,reinitialize_rocket):
 76 |     if os.path.isfile('blob/'+frequency+'rocket'+'/X_tr_RockOnly.pkl') and reinitialize_rocket==False:
 77 |         print('Loading pickled data...') 
 78 |         with open('blob/'+frequency+'rocket'+'/X_tr_RockOnly.pkl', 'rb') as f:
 79 |             X_tr = pickle.load(f)
 80 |         with open('blob/'+frequency+'rocket'+'/X_tst_RockOnly.pkl', 'rb') as f:
 81 |             X_ts = pickle.load(f)           
 82 |         with open('blob/'+frequency+'rocket'+'/T_MAX_RockOnly.pkl', 'rb') as f:
 83 |             T_Max = pickle.load(f)  
 84 |     else:
 85 |         print("Building the rocket  ...")
 86 |         print('Computing Rocket of training samples...')
 87 | 
 88 |         input_length = T_Max
 89 |         kernels = generate_kernels(input_length, num_kernels)
 90 | 
 91 |         print('Rocketizing trianing data ...')
 92 |         X_tr_rock = np.zeros((X_tr.shape[0],X_tr.shape[2],2*num_kernels)) 
 93 |         for sample_indx in tqdm(range(X_tr.shape[0])): # for each sample
 94 |             input_sample = np.swapaxes(X_tr[sample_indx,:,:],0,1)
 95 |             X_tr_rock[sample_indx,:,:] = apply_kernels(input_sample, kernels) # out: (N, 180, 2*N_Kernels)
 96 | 
 97 |         print('Rocketizing testing data ...')
 98 |         X_ts_rock = np.zeros((X_ts.shape[0],X_ts.shape[2],2*num_kernels)) 
 99 |         for sample_indx in tqdm(range(X_ts.shape[0])): # for each sample
100 |             input_sample = np.swapaxes(X_ts[sample_indx,:,:],0,1)
101 |             X_ts_rock[sample_indx,:,:] = apply_kernels(input_sample, kernels) # out: (N, 180, 2*N_Kernels)
102 | 
103 |         X_tr = np.swapaxes(X_tr_rock,1,2)
104 |         X_ts = np.swapaxes(X_ts_rock,1,2)
105 | 
106 |         # Makedir for frequency
107 |         if not os.path.exists('blob/'+frequency+'rocket'):
108 |             os.makedirs('blob/'+frequency+'rocket')
109 |         
110 |         print('Saving the files in the blob ...')
111 |         with open('blob/'+frequency+'rocket'+'/X_tr_RockOnly.pkl', 'wb') as f:
112 |             pickle.dump(X_tr, f,protocol=4)
113 |         with open('blob/'+frequency+'rocket'+'/X_tst_RockOnly.pkl', 'wb') as f:
114 |             pickle.dump(X_ts, f,protocol=4)  
115 |         with open('blob/'+frequency+'rocket'+'/T_MAX_RockOnly.pkl', 'wb') as f:
116 |             pickle.dump(T_Max, f,protocol=4)
117 | 
118 | 
119 |     ## Shuffling for CV
120 |     all_data = np.vstack((X_tr,X_ts))
121 |     all_labels = np.vstack((Y_tr,Y_ts))
122 |     all_data = all_data[:,:,:30]
123 |     ## Remove certain classes : pick up: index 1
124 |     # class_1_keep_indx = [indx for indx in range(all_labels.shape[0]) if all_labels[indx,1]!=1] 
125 |     # all_data = all_data[class_1_keep_indx,:,:]
126 |     # all_labels = all_labels[class_1_keep_indx,:]
127 | 
128 | 
129 |     N_samples = all_data.shape[0]
130 |     indx_ = np.arange(N_samples)
131 |     np.random.shuffle(indx_)
132 |     N_TS = int(np.ceil(0.2*N_samples))
133 |     ts_range = indx_[:N_TS]
134 |     tr_range = indx_[N_TS:]
135 |     X_tr = all_data[tr_range,:,:]
136 |     X_ts = all_data[ts_range,:,:]
137 |     Y_tr = all_labels[tr_range,:]
138 |     Y_ts = all_labels[ts_range,:]
139 |     Y_tr = [np.where(y==1)[0][0] for y in Y_tr] # numeric labels
140 |     Y_ts = [np.where(y==1)[0][0] for y in Y_ts]
141 |     return X_tr,X_ts,Y_tr,Y_ts    
142 | 


--------------------------------------------------------------------------------
/utils/visualizer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | from sklearn.decomposition import PCA
  5 | import csv
  6 | 
  7 | # RUN: visualize(path1 = "170401_activity_data_UABC_L2_building_2_LOS\Input\input_fall_170310_1136_01.csv")
  8 | 
  9 | 
 10 | def moving_average(data, window_size):
 11 |     window= np.ones(int(window_size))/float(window_size)
 12 |     return np.convolve(data, window, 'same')
 13 | 
 14 | def siamak_ff(f):
 15 |     window_size = 1000
 16 |     slide_size = 200 #less than window_size!!!
 17 |     xx = np.empty([0,window_size,90],float)
 18 | 
 19 |     data = [[ float(elm) for elm in v] for v in csv.reader(open(f, "r"))]
 20 |     tmp1 = np.array(data)
 21 |     x2 =np.empty([0,window_size,90],float)
 22 |     print('x2',x2.shape)
 23 | 
 24 |     #data import by slide window
 25 |     k = 0
 26 |     while k <= (len(tmp1) + 1 - 2 * window_size):
 27 |         x = np.dstack(np.array(tmp1[k:k+window_size, 1:91]).T)
 28 |         x2 = np.concatenate((x2, x),axis=0)
 29 |         k += slide_size
 30 |         print(k)
 31 |     xx = np.concatenate((xx,x2),axis=0)
 32 |     df = x2[:,::2,:90]
 33 |     print(df.shape)
 34 |     print('x2',x2.shape)
 35 |     xx = xx.reshape(len(xx),-1)
 36 |     print('xx',xx.shape)
 37 | 
 38 |     return x2
 39 | 
 40 | def visualize(path1):
 41 |     #data import
 42 |     data = pd.read_csv(path1, header=None).values
 43 |     amp = data[:,1:91]
 44 | 
 45 |     #plt
 46 |     # fig = plt.figure(figsize = (18,10))
 47 |     # ax1 = plt.subplot(311)
 48 |     # plt.imshow(amp[:,0:29].T,interpolation = "nearest", aspect = "auto", cmap="jet")
 49 |     # ax1.set_title("Antenna1 Amplitude")
 50 |     # plt.colorbar()
 51 | 
 52 |     # ax2 = plt.subplot(312)
 53 |     # plt.imshow(amp[:,30:59].T,interpolation = "nearest", aspect = "auto", cmap="jet")
 54 |     # ax2.set_title("Antenna2 Amplitude")
 55 |     # plt.colorbar()
 56 | 
 57 |     # ax3 = plt.subplot(313)
 58 |     # plt.imshow(amp[:,60:89].T,interpolation = "nearest", aspect = "auto", cmap="jet")
 59 |     # ax3.set_title("Antenna3 Amplitude")
 60 |     # plt.colorbar()
 61 |     # plt.show()
 62 |     
 63 |     # Initializing valiables
 64 |     constant_offset = np.empty_like(amp)
 65 |     filtered_data = np.empty_like(amp)
 66 | 
 67 |     # Calculating the constant offset (moving average 4 seconds)
 68 |     for i in range(1, len(amp[0])):
 69 |         constant_offset[:,i] = moving_average(amp[:,i], 4000)
 70 | 
 71 |     # Calculating the filtered data (substract the constant offset)
 72 |     filtered_data = amp - constant_offset
 73 |     # Smoothing (moving average 0.01 seconds)
 74 |     for i in range(1, len(amp[0])):
 75 |         filtered_data[:,i] = moving_average(filtered_data[:,i], 10)
 76 |     print('fil data',filtered_data.shape)
 77 |     
 78 |     
 79 |     x2 = siamak_ff(path1)
 80 |     
 81 |     
 82 |     # Calculate correlation matrix (90 * 90 dim)
 83 |     cov_mat2 = np.cov(filtered_data.T)
 84 |     # Calculate eig_val & eig_vec
 85 |     eig_val2, eig_vec2 = np.linalg.eig(cov_mat2)
 86 |     # Sort the eig_val & eig_vec
 87 |     idx = eig_val2.argsort()[::-1]
 88 |     eig_val2 = eig_val2[idx]
 89 |     eig_vec2 = eig_vec2[:,idx]
 90 |     # Calculate H * eig_vec
 91 |     pca_data2 = filtered_data.dot(eig_vec2)
 92 |     
 93 |     xmin = 0
 94 |     xmax = 20000
 95 |     # plt
 96 |     fig3 = plt.figure(figsize = (18,20))
 97 | 
 98 |     ax1 = plt.subplot(611)
 99 |     plt.plot(pca_data2[xmin:xmax,0])
100 |     #plt.plot(pca_data2[2500:17500,0])
101 |     ax1.set_title("PCA 1st component")
102 | 
103 |     ax2 = plt.subplot(612)
104 |     plt.plot(pca_data2[xmin:xmax,1])
105 |     #plt.plot(pca_data2[2500:17500,1])
106 |     ax2.set_title("PCA 2nd component")
107 | 
108 |     ax3 = plt.subplot(613)
109 |     plt.plot(pca_data2[xmin:xmax,2])
110 |     #plt.plot(pca_data2[2500:17500,2])
111 |     ax3.set_title("PCA 3rd component")
112 | 
113 |     ax4 = plt.subplot(614)
114 |     plt.plot(pca_data2[xmin:xmax,3])
115 |     #plt.plot(pca_data2[2500:17500,3])
116 |     ax4.set_title("PCA 4th component")
117 | 
118 |     ax5 = plt.subplot(615)
119 |     plt.plot(pca_data2[xmin:xmax,4])
120 |     #plt.plot(pca_data2[2500:17500,4])
121 |     ax5.set_title("PCA 5th component")
122 | 
123 |     ax6 = plt.subplot(616)
124 |     plt.plot(pca_data2[xmin:xmax,5])
125 |     #plt.plot(pca_data2[2500:17500,5])
126 |     ax6.set_title("PCA 6th component")
127 | 
128 |     plt.show()
129 |     
130 |     plt.figure(figsize = (18,30))
131 |     # Spectrogram(STFT)
132 |     plt.subplot(611)
133 |     Pxx, freqs, bins, im = plt.specgram(pca_data2[:,0], NFFT=128, Fs=1000, noverlap=1, cmap="jet", vmin=-100,vmax=20)
134 |     plt.xlabel("Time[s]")
135 |     plt.ylabel("Frequency [Hz]")
136 |     plt.title("Spectrogram(STFT)")
137 |     plt.colorbar(im)
138 |     plt.xlim(0,10)
139 |     plt.ylim(0,100)
140 | 
141 |     plt.subplot(612)
142 |     Pxx, freqs, bins, im = plt.specgram(pca_data2[:,1], NFFT=128, Fs=1000, noverlap=1, cmap="jet", vmin=-100,vmax=20)
143 |     print(Pxx.shape, freqs, bins, im)
144 |     plt.xlabel("Time[s]")
145 |     plt.ylabel("Frequency [Hz]")
146 |     plt.title("Spectrogram(STFT)")
147 |     plt.colorbar(im)
148 |     plt.xlim(0,10)
149 |     plt.ylim(0,100)
150 | 
151 |     plt.subplot(613)
152 |     Pxx, freqs, bins, im = plt.specgram(pca_data2[:,2], NFFT=128, Fs=1000, noverlap=1, cmap="jet", vmin=-100,vmax=20)
153 |     plt.xlabel("Time[s]")
154 |     plt.ylabel("Frequency [Hz]")
155 |     plt.title("Spectrogram(STFT)")
156 |     plt.colorbar(im)
157 |     plt.xlim(0,10)
158 |     plt.ylim(0,100)
159 | 
160 |     plt.subplot(614)
161 |     Pxx, freqs, bins, im = plt.specgram(pca_data2[:,3], NFFT=128, Fs=1000, noverlap=1, cmap="jet", vmin=-100,vmax=20)
162 |     plt.xlabel("Time[s]")
163 |     plt.ylabel("Frequency [Hz]")
164 |     plt.title("Spectrogram(STFT)")
165 |     plt.colorbar(im)
166 |     plt.xlim(0,10)
167 |     plt.ylim(0,100)
168 | 
169 |     plt.subplot(615)
170 |     Pxx, freqs, bins, im = plt.specgram(pca_data2[:,4], NFFT=128, Fs=1000, noverlap=1, cmap="jet", vmin=-100,vmax=20)
171 |     plt.xlabel("Time[s]")
172 |     plt.ylabel("Frequency [Hz]")
173 |     plt.title("Spectrogram(STFT)")
174 |     plt.colorbar(im)
175 |     plt.xlim(0,10)
176 |     plt.ylim(0,100)
177 |     
178 |     plt.subplot(616)
179 |     Pxx, freqs, bins, im = plt.specgram(pca_data2[:,5], NFFT=128, Fs=1000, noverlap=1, cmap="jet", vmin=-100,vmax=20)
180 |     plt.xlabel("Time[s]")
181 |     plt.ylabel("Frequency [Hz]")
182 |     plt.title("Spectrogram(STFT)")
183 |     plt.colorbar(im)
184 |     plt.xlim(0,10)
185 |     plt.ylim(0,100)
186 |     
187 |     plt.show()
188 | 
189 |     plt.figure(figsize = (18,10))
190 |     ax = plt.subplot(111)
191 | #    ax.magnitude_spectrum(pca_data2[:,0], Fs=1000, scale='dB', color='C1')
192 |     ax.magnitude_spectrum(pca_data2[5000:7500,0], Fs=1000, color='C1')
193 |     plt.xlim(0,100)
194 |     plt.ylim(0,1000)
195 |     plt.show()
196 | 
197 | name = 'input_walk_170308_1305_18.csv'
198 | visualize(path1 = '/home/hojjat/csi/siamak/Dataset/Data/'+name)    


--------------------------------------------------------------------------------
/utils/dataloader.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import scipy.io as scio
  4 | import os, pickle,sys
  5 | from tqdm import tqdm
  6 | from joblib import Parallel, delayed
  7 | import math, pickle
  8 | from joblib import Parallel, delayed
  9 | 
 10 | def read_files(input_path,classes):   
 11 |     print('Reading files...') 
 12 |     input_file_names = [i for i in os.listdir(input_path) if i.endswith('.csv') and i.startswith('input')]
 13 |     annotation_file_names = [i for i in os.listdir(input_path) if i.endswith('.csv') and i.startswith('annotation')]
 14 |     
 15 |     clean_annotation_file_names = ['_'.join(i.split('_')[1:]) for i in annotation_file_names]
 16 |     
 17 |     files_matching = []
 18 |     # Match files with non-name
 19 |     for i in input_file_names:
 20 |         cl_i = '_'.join(i.split('_')[1:])
 21 |         if cl_i in clean_annotation_file_names:
 22 |             files_matching.append([i,'annotation_'+cl_i])
 23 |     # Match files with names
 24 |     for i in input_file_names:
 25 |         cl_i = '_'.join(i.split('_')[2:])
 26 |         if 'siamak' in cl_i or 'sankalp' in cl_i:
 27 |             if cl_i in clean_annotation_file_names:
 28 |                 files_matching.append([i,'annotation_'+cl_i])
 29 |     # listing inputs and annotation and class
 30 |     classes_stat_dict = {el:[] for el in classes} # dictionary of classes and index of each sample
 31 |     files_matching_wclasses = []
 32 |     for indx, i in enumerate(files_matching):
 33 |         for k in classes:
 34 |             if k in i[0].split('_'):                
 35 |                 one_hot_vect = one_hot(k,classes) # one-hot encoding the class
 36 |                 files_matching_wclasses.append([i[0],i[1],k,one_hot_vect]) # input_name, annotation name, class name, onehot class vector
 37 |                 classes_stat_dict[k].append(indx)
 38 |                 break
 39 |     print('Number of samples:',len(files_matching_wclasses))
 40 |     print('Number of samples per class:')
 41 |     for key in classes_stat_dict.keys():
 42 |         print(key,len(classes_stat_dict[key]))
 43 | 
 44 |     return files_matching_wclasses, classes_stat_dict
 45 | 
 46 | def one_hot(k, classes):
 47 |     indx = classes.index(k)
 48 |     one_hot_vect = len(classes)*[0]
 49 |     one_hot_vect[indx] = 1
 50 |     return one_hot_vect
 51 | 
 52 | def zero_padding(X, T, Y, N_classes):
 53 |     print('Zero-padding...')
 54 |     T_Max = np.max(T)
 55 |     N_subcarriers = X[0].shape[1]
 56 |     N_samples = X.shape[0]
 57 |     print('T_Max:',T_Max,'N Subcarriers:',N_subcarriers,'Number of samples:',N_samples)   
 58 |     X_padded = np.zeros((N_samples,T_Max,N_subcarriers))
 59 |     Y_ = np.zeros((N_samples,N_classes))
 60 |     for i in tqdm(range(N_samples)):
 61 |         X_padded[i,:X[i].shape[0],:] = X[i]
 62 |         Y_[i,:] = Y[i]
 63 |     return X_padded, Y_, T_Max
 64 | 
 65 | def normalize_data(X):
 66 |     N_samples = X.shape[0]
 67 |     T_Max = X.shape[1]
 68 |     
 69 |     min_vec = np.min(X,axis=(0,1))
 70 |     min_vec = np.expand_dims(min_vec,axis=(0,1))
 71 |     tiled_min = np.tile(min_vec,(N_samples,T_Max,1))
 72 |     X_ = X - tiled_min
 73 |     max_vec = np.max(X_,axis=(0,1))
 74 |     max_vec = np.expand_dims(max_vec,axis=(0,1))    
 75 |     tiled_max = np.tile(max_vec,(N_samples,T_Max,1))
 76 |     X = X_/tiled_max
 77 |     return  X
 78 | 
 79 | 
 80 | def preparedata(input_path,classes,partial_flag,rebuild_data):
 81 |     blob_type = 'all'
 82 |     if partial_flag:
 83 |         blob_type = 'toy'
 84 | 
 85 |     if rebuild_data==False:
 86 |         if os.path.isfile('blob/X_'+blob_type+'.pkl') and os.path.isfile('blob/Y_'+blob_type+'.pkl'):
 87 |             print('Loading from blob...')
 88 |             with open('blob/X_'+blob_type+'.pkl', 'rb') as f:
 89 |                 X = pickle.load(f)
 90 |             with open('blob/Y_'+blob_type+'.pkl', 'rb') as f:
 91 |                 Y = pickle.load(f)
 92 |         else:
 93 |             print('Pickle files do not exist.')
 94 |             sys.exit()
 95 |     elif rebuild_data==True:
 96 |         N_classes = len(classes)
 97 |         ## Read files and match inputs and annotations and classes
 98 |         files_matching_wclasses, classes_stat_dict = read_files(input_path, classes)
 99 |         ## load csvs
100 |         X, Y, T = load_csv(input_path,files_matching_wclasses,partial_flag) # X.one-hot labels, length of each sample
101 |         ## zero-padding
102 |         X, Y, T_Max = zero_padding(X,T,Y,N_classes) # zero-padded X, max length of signal
103 |         ## Normalization
104 |         X = normalize_data(X)
105 |         ## Saving data
106 |         with open('blob/X_'+blob_type+'.pkl', 'wb') as f:
107 |             pickle.dump(X, f, protocol=4)
108 |         with open('blob/Y_'+blob_type+'.pkl', 'wb') as f:
109 |             pickle.dump(Y, f, protocol=4)
110 | 
111 |     return X,Y
112 | 
113 | 
114 | def preparedataRigRocket(input_path,classes,partial_flag,rebuild_data):
115 |     blob_type = 'all'
116 |     if partial_flag:
117 |         blob_type = 'toy'
118 | 
119 |     if os.path.isfile('blob/1khzrocket/X_'+blob_type+'RigRocket.pkl') and os.path.isfile('blob/1khzrocket/Y_'+blob_type+'RigRocket.pkl'):
120 |         print('Loading from blob...')
121 |         with open('blob/1khzrocket/X_'+blob_type+'RigRocket.pkl', 'rb') as f:
122 |             X = pickle.load(f)
123 |         with open('blob/1khzrocket/Y_'+blob_type+'RigRocket.pkl', 'rb') as f:
124 |             Y = pickle.load(f)
125 |     else:
126 |         print('Pickle files do not exist. Building it')
127 | 
128 |         N_classes = len(classes)
129 |         ## Read files and match inputs and annotations and classes
130 |         files_matching_wclasses, classes_stat_dict = read_files(input_path, classes)
131 |         ## load csvs
132 |         X, Y, T = load_csv(input_path,files_matching_wclasses,partial_flag) # X.one-hot labels, length of each sample
133 |         ## zero-padding
134 |         X, Y, T_Max = zero_padding(X,T,Y,N_classes) # zero-padded X, max length of signal
135 |         ## Normalization
136 |         # X = normalize_data(X)
137 |         ## Saving data
138 |         with open('blob/1khzrocket/X_'+blob_type+'RigRocket.pkl', 'wb') as f:
139 |             pickle.dump(X, f, protocol=4)
140 |         with open('blob/1khzrocket/Y_'+blob_type+'RigRocket.pkl', 'wb') as f:
141 |             pickle.dump(Y, f, protocol=4)
142 | 
143 |     return X,Y
144 | 
145 | 
146 | 
147 | def max_pooling(X):
148 |     T = X.shape[0] # T x 90 
149 |     # print(T,X.shape)
150 |     Xx = np.expand_dims(X,axis=1)
151 |     # print(Xx.shape)
152 | 
153 |     if T%2==1:
154 |         T_temp = T-1
155 |         Xx = np.reshape(Xx[:T_temp,:],(int(T_temp/2),2,X.shape[1]))
156 |         new_X = np.zeros((int(T_temp/2)+1,X.shape[1]))
157 |         dd = np.max(Xx,axis=1)
158 |         new_X[:-1,:] = dd
159 |         new_X[-1,:] = X[-1,:]
160 |     else: 
161 |         T_temp = T
162 |         Xx = np.reshape(Xx,(int(T_temp/2),2,X.shape[1]))
163 |         new_X = np.zeros((int(T_temp/2),X.shape[1]))
164 |         dd = np.max(Xx,axis=1)
165 |         new_X = dd
166 | 
167 |     return new_X
168 | 
169 | 
170 | 
171 | def parallel_read(input_path,i,files_matching_wclasses):
172 |     file_path_x = input_path+files_matching_wclasses[i][0]
173 |     file_path_y = input_path+files_matching_wclasses[i][1]
174 |     Y = np.asarray(files_matching_wclasses[i][3])
175 |     x = np.loadtxt(file_path_x,delimiter=',',dtype='float')
176 |     y = np.loadtxt(file_path_y,delimiter=',',dtype='str')
177 |     yy = np.where(y==files_matching_wclasses[i][2])
178 |     start_ = int(yy[0][0])
179 |     finish_ = int(yy[0][-1])
180 |     
181 |     X = x[start_:finish_+1,1:91] # 90: amplitiude only; for all replace with :
182 | 
183 |     signal_len = X.shape[0]
184 |     return X,Y,signal_len
185 | 
186 | def load_csv(input_path,files_matching_wclasses, partial_flag):
187 |     n_samples = len(files_matching_wclasses)
188 |     if partial_flag:
189 |         n_samples = 40
190 |     results = Parallel(n_jobs=-2,backend="threading")(delayed(parallel_read)(input_path,i,files_matching_wclasses) for i in tqdm(range(n_samples)))
191 |     results = np.asarray(results)
192 |     X = results[:,0]
193 |     Y = results[:,1]
194 |     signal_lengths = results[:,2]
195 |     return X, Y, signal_lengths
196 | 
197 | 
198 | def splitter(X,Y,val_per,tst_per):
199 |     print('Splitting train and test data ...')
200 |     N_samples = X.shape[0]
201 |     indxes = np.arange(N_samples)
202 |     np.random.shuffle(indxes)
203 |     val_range = int(np.ceil(val_per*N_samples))
204 |     ts_range = int(np.ceil(tst_per*N_samples))
205 |     tr_range = N_samples - (val_range+ts_range)
206 | 
207 |     X_tr = X[0:tr_range,:,:]
208 |     X_val = X[tr_range:tr_range+val_range,:,:]
209 |     X_ts = X[tr_range+val_range:,:,:]
210 |     
211 |     Y_tr = Y[0:tr_range,:]
212 |     Y_val = Y[tr_range:tr_range+val_range,:]
213 |     Y_ts = Y[tr_range+val_range:,:]
214 |     print('Number of Training samples:',X_tr.shape[0])
215 |     print('Number of Validation samples:',X_val.shape[0])
216 |     print('Number of Test samples:',X_ts.shape[0])
217 | 
218 |     return X_tr,X_val,X_ts,Y_tr,Y_val,Y_ts
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------