├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── data ├── .gitkeep ├── processed │ └── .gitkeep └── raw │ └── .gitkeep ├── models └── .gitkeep ├── notebooks ├── .gitkeep └── 1.0-dsp-DeepConvLSTM.ipynb ├── setup.py ├── src ├── __init__.py └── data │ ├── .gitkeep │ ├── __init__.py │ ├── preprocess_data.py │ └── sliding_window.py └── test_environment.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * linguist-vendored 2 | *.py linguist-vendored=false 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # DotEnv configuration 60 | .env 61 | 62 | # Database 63 | *.db 64 | *.rdb 65 | 66 | # Pycharm 67 | .idea 68 | 69 | # VS Code 70 | .vscode/ 71 | 72 | # Spyder 73 | .spyproject/ 74 | 75 | # Jupyter NB Checkpoints 76 | .ipynb_checkpoints/ 77 | 78 | # exclude data from source control by default 79 | #/data/ 80 | 81 | # Mac OS-specific storage files 82 | .DS_Store 83 | 84 | # vim 85 | *.swp 86 | *.swo 87 | 88 | # Mypy cache 89 | .mypy_cache/ 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | Implementation of DeepConvLSTM model in pytorch and python3. 4 | To train the model open up jupyter notebook under notebooks directory and follow the instructions. 5 | 6 | This implementation is based on "Deep Convolutional and LSTM Recurrent Neural Networks for Multimodal Wearable Activity Recognition" paper avaiable at 7 | 8 | The original source code has been implemented using Lasange framework which is available at 9 | 10 | ## Dependencies 11 | 12 | - Python 3
13 | - Pytorch 14 | 15 | Project Organization 16 | ------------ 17 | 18 | ├── LICENSE 19 | ├── README.md <- The top-level README for developers using this project. 20 | ├── data 21 | │ ├── processed <- The final, canonical data sets for modeling. 22 | │ └── raw <- The original, immutable data dump. 23 | │ 24 | ├── models <- Trained and serialized models, model predictions, or model summaries 25 | │ 26 | ├── notebooks <- Jupyter notebooks. 27 | │ └── 1.0-dsp-DeepConvLSTM.ipynb <- Jupyter notebook file with step by step instructions 28 | | 29 | ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported 30 | └── src <- Source code for use in this project. 31 | ├── __init__.py <- Makes src a Python module 32 | │ 33 | └── data <- Scripts to download or generate data 34 | 35 | 36 | -------- 37 | 38 |

Project based on the cookiecutter data science project template. #cookiecutterdatascience

39 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/data/.gitkeep -------------------------------------------------------------------------------- /data/processed/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/data/processed/.gitkeep -------------------------------------------------------------------------------- /data/raw/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/data/raw/.gitkeep -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/models/.gitkeep -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/notebooks/.gitkeep -------------------------------------------------------------------------------- /notebooks/1.0-dsp-DeepConvLSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "--2019-03-28 07:13:44-- https://archive.ics.uci.edu/ml/machine-learning-databases/00226/OpportunityUCIDataset.zip\n", 13 | "Resolving archive.ics.uci.edu... 128.195.10.249\n", 14 | "Connecting to archive.ics.uci.edu|128.195.10.249|:443... connected.\n", 15 | "HTTP request sent, awaiting response... 200 OK\n", 16 | "Length: 306636009 (292M) [application/zip]\n", 17 | "Saving to: ‘../data/raw/OpportunityUCIDataset.zip’\n", 18 | "\n", 19 | "OpportunityUCIDatas 100%[===================>] 292.43M 49.4MB/s in 6.9s \n", 20 | "\n", 21 | "2019-03-28 07:13:52 (42.5 MB/s) - ‘../data/raw/OpportunityUCIDataset.zip’ saved [306636009/306636009]\n", 22 | "\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00226/OpportunityUCIDataset.zip -P ../data/raw/" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 4, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "name": "stdout", 37 | "output_type": "stream", 38 | "text": [ 39 | "Checking dataset ../data/raw/OpportunityUCIDataset.zip\n", 40 | "Processing dataset files ...\n", 41 | "... file OpportunityUCIDataset/dataset/S1-Drill.dat\n", 42 | "... file OpportunityUCIDataset/dataset/S1-ADL1.dat\n", 43 | "... file OpportunityUCIDataset/dataset/S1-ADL2.dat\n", 44 | "... file OpportunityUCIDataset/dataset/S1-ADL3.dat\n", 45 | "... file OpportunityUCIDataset/dataset/S1-ADL4.dat\n", 46 | "... file OpportunityUCIDataset/dataset/S1-ADL5.dat\n", 47 | "... file OpportunityUCIDataset/dataset/S2-Drill.dat\n", 48 | "... file OpportunityUCIDataset/dataset/S2-ADL1.dat\n", 49 | "... file OpportunityUCIDataset/dataset/S2-ADL2.dat\n", 50 | "... file OpportunityUCIDataset/dataset/S2-ADL3.dat\n", 51 | "... file OpportunityUCIDataset/dataset/S3-Drill.dat\n", 52 | "... file OpportunityUCIDataset/dataset/S3-ADL1.dat\n", 53 | "... file OpportunityUCIDataset/dataset/S3-ADL2.dat\n", 54 | "... file OpportunityUCIDataset/dataset/S3-ADL3.dat\n", 55 | "... file OpportunityUCIDataset/dataset/S2-ADL4.dat\n", 56 | "... file OpportunityUCIDataset/dataset/S2-ADL5.dat\n", 57 | "... file OpportunityUCIDataset/dataset/S3-ADL4.dat\n", 58 | "... file OpportunityUCIDataset/dataset/S3-ADL5.dat\n", 59 | "Final datasets with size: | train (557963, 113) | test (118750, 113) | \n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "!mkdir ../data/processed\n", 65 | "!python3 ../src/data/preprocess_data.py -i ../data/raw/OpportunityUCIDataset.zip -o ../processed/oppChallenge_gestures.data" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### Setup" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 1, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "import warnings\n", 82 | "warnings.filterwarnings('ignore')\n", 83 | "\n", 84 | "import numpy as np\n", 85 | "import _pickle as cp\n", 86 | "import matplotlib.pyplot as plt\n", 87 | "import pandas as pd\n", 88 | "import sklearn.metrics as metrics\n", 89 | "\n", 90 | "import torch\n", 91 | "from torch import nn\n", 92 | "import torch.nn.functional as F\n", 93 | "\n", 94 | "NB_SENSOR_CHANNELS = 113\n", 95 | "SLIDING_WINDOW_LENGTH = 24\n", 96 | "SLIDING_WINDOW_STEP = 12" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 2, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "import os\n", 106 | "import sys\n", 107 | "\n", 108 | "# add the 'src' directory as one where we can import modules\n", 109 | "src_dir = os.path.join(os.getcwd(), os.pardir, 'src')\n", 110 | "sys.path.append(src_dir)\n", 111 | "\n", 112 | "from data.sliding_window import sliding_window" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Load in Data" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 3, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "Loading data...\n", 132 | " ..from file ../data/processed/oppChallenge_gestures.data\n", 133 | " ..reading instances: train (557963, 113), test (118750, 113)\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "def load_dataset(filename):\n", 139 | " \n", 140 | " with open(filename, 'rb') as f:\n", 141 | " data = cp.load(f)\n", 142 | " \n", 143 | " X_train, y_train = data[0]\n", 144 | " X_test, y_test = data[1]\n", 145 | "\n", 146 | " print(\" ..from file {}\".format(filename))\n", 147 | " print(\" ..reading instances: train {0}, test {1}\".format(X_train.shape, X_test.shape))\n", 148 | "\n", 149 | " X_train = X_train.astype(np.float32)\n", 150 | " X_test = X_test.astype(np.float32)\n", 151 | "\n", 152 | " # The targets are casted to int8 for GPU compatibility.\n", 153 | " y_train = y_train.astype(np.uint8)\n", 154 | " y_test = y_test.astype(np.uint8)\n", 155 | "\n", 156 | " return X_train, y_train, X_test, y_test\n", 157 | "\n", 158 | "print(\"Loading data...\")\n", 159 | "X_train, y_train, X_test, y_test = load_dataset('../data/processed/oppChallenge_gestures.data')" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Data Segmentation" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 4, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | " ..after sliding and reshaping, train data: inputs (46495, 24, 113), targets (46495,)\n", 179 | " ..after sliding and reshaping, test data : inputs (9894, 24, 113), targets (9894,)\n" 180 | ] 181 | } 182 | ], 183 | "source": [ 184 | "assert NB_SENSOR_CHANNELS == X_train.shape[1]\n", 185 | "def opp_sliding_window(data_x, data_y, ws, ss):\n", 186 | " data_x = sliding_window(data_x, (ws, data_x.shape[1]), (ss, 1))\n", 187 | " data_y = np.asarray([[i[-1]] for i in sliding_window(data_y, ws, ss)])\n", 188 | " return data_x.astype(np.float32), data_y.reshape(len(data_y)).astype(np.uint8)\n", 189 | "\n", 190 | "# Sensor data is segmented using a sliding window mechanism\n", 191 | "X_train, y_train = opp_sliding_window(X_train, y_train, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)\n", 192 | "X_test, y_test = opp_sliding_window(X_test, y_test, SLIDING_WINDOW_LENGTH, SLIDING_WINDOW_STEP)\n", 193 | "\n", 194 | "# Data is reshaped\n", 195 | "X_train = X_train.reshape((-1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS)) # for input to Conv1D\n", 196 | "X_test = X_test.reshape((-1, SLIDING_WINDOW_LENGTH, NB_SENSOR_CHANNELS)) # for input to Conv1D\n", 197 | "\n", 198 | "print(\" ..after sliding and reshaping, train data: inputs {0}, targets {1}\".format(X_train.shape, y_train.shape))\n", 199 | "print(\" ..after sliding and reshaping, test data : inputs {0}, targets {1}\".format(X_test.shape, y_test.shape))" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "## Define the Model" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 5, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "class HARModel(nn.Module):\n", 216 | " \n", 217 | " def __init__(self, n_hidden=128, n_layers=1, n_filters=64, \n", 218 | " n_classes=18, filter_size=5, drop_prob=0.5):\n", 219 | " super(HARModel, self).__init__()\n", 220 | " self.drop_prob = drop_prob\n", 221 | " self.n_layers = n_layers\n", 222 | " self.n_hidden = n_hidden\n", 223 | " self.n_filters = n_filters\n", 224 | " self.n_classes = n_classes\n", 225 | " self.filter_size = filter_size\n", 226 | " \n", 227 | " self.conv1 = nn.Conv1d(NB_SENSOR_CHANNELS, n_filters, filter_size)\n", 228 | " self.conv2 = nn.Conv1d(n_filters, n_filters, filter_size)\n", 229 | " self.conv3 = nn.Conv1d(n_filters, n_filters, filter_size)\n", 230 | " self.conv4 = nn.Conv1d(n_filters, n_filters, filter_size)\n", 231 | " \n", 232 | " self.lstm1 = nn.LSTM(n_filters, n_hidden, n_layers)\n", 233 | " self.lstm2 = nn.LSTM(n_hidden, n_hidden, n_layers)\n", 234 | " \n", 235 | " self.fc = nn.Linear(n_hidden, n_classes)\n", 236 | "\n", 237 | " self.dropout = nn.Dropout(drop_prob)\n", 238 | " \n", 239 | " def forward(self, x, hidden, batch_size):\n", 240 | " \n", 241 | " x = x.view(-1, NB_SENSOR_CHANNELS, SLIDING_WINDOW_LENGTH)\n", 242 | " x = F.relu(self.conv1(x))\n", 243 | " x = F.relu(self.conv2(x))\n", 244 | " x = F.relu(self.conv3(x))\n", 245 | " x = F.relu(self.conv4(x))\n", 246 | " \n", 247 | " x = x.view(8, -1, self.n_filters)\n", 248 | " x, hidden = self.lstm1(x, hidden)\n", 249 | " x, hidden = self.lstm2(x, hidden)\n", 250 | " \n", 251 | " x = x.contiguous().view(-1, self.n_hidden)\n", 252 | " x = self.dropout(x)\n", 253 | " x = self.fc(x)\n", 254 | " \n", 255 | " out = x.view(batch_size, -1, self.n_classes)[:,-1,:]\n", 256 | " \n", 257 | " return out, hidden\n", 258 | " \n", 259 | " def init_hidden(self, batch_size):\n", 260 | " ''' Initializes hidden state '''\n", 261 | " # Create two new tensors with sizes n_layers x batch_size x n_hidden,\n", 262 | " # initialized to zero, for hidden state and cell state of LSTM\n", 263 | " weight = next(self.parameters()).data\n", 264 | " \n", 265 | " if (train_on_gpu):\n", 266 | " hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),\n", 267 | " weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())\n", 268 | " else:\n", 269 | " hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),\n", 270 | " weight.new(self.n_layers, batch_size, self.n_hidden).zero_())\n", 271 | " \n", 272 | " return hidden\n", 273 | " \n", 274 | "net = HARModel()" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## Initialize Model Weights" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 6, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/plain": [ 292 | "HARModel(\n", 293 | " (conv1): Conv1d(113, 64, kernel_size=(5,), stride=(1,))\n", 294 | " (conv2): Conv1d(64, 64, kernel_size=(5,), stride=(1,))\n", 295 | " (conv3): Conv1d(64, 64, kernel_size=(5,), stride=(1,))\n", 296 | " (conv4): Conv1d(64, 64, kernel_size=(5,), stride=(1,))\n", 297 | " (lstm1): LSTM(64, 128)\n", 298 | " (lstm2): LSTM(128, 128)\n", 299 | " (fc): Linear(in_features=128, out_features=18, bias=True)\n", 300 | " (dropout): Dropout(p=0.5)\n", 301 | ")" 302 | ] 303 | }, 304 | "execution_count": 6, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "def init_weights(m):\n", 311 | " if type(m) == nn.LSTM:\n", 312 | " for name, param in m.named_parameters():\n", 313 | " if 'weight_ih' in name:\n", 314 | " torch.nn.init.orthogonal_(param.data)\n", 315 | " elif 'weight_hh' in name:\n", 316 | " torch.nn.init.orthogonal_(param.data)\n", 317 | " elif 'bias' in name:\n", 318 | " param.data.fill_(0)\n", 319 | " elif type(m) == nn.Conv1d or type(m) == nn.Linear:\n", 320 | " torch.nn.init.orthogonal_(m.weight)\n", 321 | " m.bias.data.fill_(0)\n", 322 | "net.apply(init_weights) " 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "## Generate Minibatches" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 7, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "def iterate_minibatches(inputs, targets, batchsize, shuffle=True):\n", 339 | " assert len(inputs) == len(targets)\n", 340 | " if shuffle:\n", 341 | " indices = np.arange(len(inputs))\n", 342 | " np.random.shuffle(indices)\n", 343 | " for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):\n", 344 | " if shuffle:\n", 345 | " excerpt = indices[start_idx:start_idx + batchsize]\n", 346 | " else:\n", 347 | " excerpt = slice(start_idx, start_idx + batchsize)\n", 348 | " yield inputs[excerpt], targets[excerpt]" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 8, 354 | "metadata": {}, 355 | "outputs": [ 356 | { 357 | "name": "stdout", 358 | "output_type": "stream", 359 | "text": [ 360 | "Training on GPU!\n" 361 | ] 362 | } 363 | ], 364 | "source": [ 365 | "## check if GPU is available\n", 366 | "train_on_gpu = torch.cuda.is_available()\n", 367 | "if(train_on_gpu):\n", 368 | " print('Training on GPU!')\n", 369 | "else: \n", 370 | " print('No GPU available, training on CPU; consider making n_epochs very small.')" 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "## Train the Network" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 9, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "Epoch: 1/10... Train Loss: 1.4991... Val Loss: 0.9591... Val Acc: 0.8327... F1-Score: 0.9082...\n", 390 | "Epoch: 2/10... Train Loss: 1.4268... Val Loss: 0.9382... Val Acc: 0.8323... F1-Score: 0.9080...\n", 391 | "Epoch: 3/10... Train Loss: 1.4240... Val Loss: 0.9506... Val Acc: 0.8329... F1-Score: 0.9083...\n", 392 | "Epoch: 4/10... Train Loss: 1.4212... Val Loss: 0.9487... Val Acc: 0.8329... F1-Score: 0.9085...\n", 393 | "Epoch: 5/10... Train Loss: 1.4100... Val Loss: 0.9036... Val Acc: 0.8327... F1-Score: 0.9083...\n", 394 | "Epoch: 6/10... Train Loss: 1.2651... Val Loss: 0.8937... Val Acc: 0.7946... F1-Score: 0.8425...\n", 395 | "Epoch: 7/10... Train Loss: 1.1307... Val Loss: 0.7736... Val Acc: 0.8305... F1-Score: 0.8905...\n", 396 | "Epoch: 8/10... Train Loss: 0.9750... Val Loss: 0.7008... Val Acc: 0.8351... F1-Score: 0.8879...\n", 397 | "Epoch: 9/10... Train Loss: 0.8601... Val Loss: 0.6447... Val Acc: 0.8396... F1-Score: 0.8877...\n", 398 | "Epoch: 10/10... Train Loss: 0.7956... Val Loss: 0.5943... Val Acc: 0.8435... F1-Score: 0.8948...\n" 399 | ] 400 | } 401 | ], 402 | "source": [ 403 | "def train(net, epochs=10, batch_size=100, lr=0.01):\n", 404 | " \n", 405 | " opt = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)\n", 406 | " criterion = nn.CrossEntropyLoss()\n", 407 | " \n", 408 | " if(train_on_gpu):\n", 409 | " net.cuda()\n", 410 | " \n", 411 | " for e in range(epochs):\n", 412 | " \n", 413 | " # initialize hidden state\n", 414 | " h = net.init_hidden(batch_size) \n", 415 | " train_losses = [] \n", 416 | " net.train()\n", 417 | " for batch in iterate_minibatches(X_train, y_train, batch_size):\n", 418 | " x, y = batch\n", 419 | "\n", 420 | " inputs, targets = torch.from_numpy(x), torch.from_numpy(y)\n", 421 | "\n", 422 | " if(train_on_gpu):\n", 423 | " inputs, targets = inputs.cuda(), targets.cuda()\n", 424 | "\n", 425 | " # Creating new variables for the hidden state, otherwise\n", 426 | " # we'd backprop through the entire training history\n", 427 | " h = tuple([each.data for each in h])\n", 428 | " \n", 429 | " # zero accumulated gradients\n", 430 | " opt.zero_grad() \n", 431 | " \n", 432 | " # get the output from the model\n", 433 | " output, h = net(inputs, h, batch_size)\n", 434 | " \n", 435 | " loss = criterion(output, targets.long())\n", 436 | " train_losses.append(loss.item())\n", 437 | " loss.backward()\n", 438 | " opt.step()\n", 439 | " \n", 440 | " val_h = net.init_hidden(batch_size)\n", 441 | " val_losses = []\n", 442 | " accuracy=0\n", 443 | " f1score=0\n", 444 | " net.eval()\n", 445 | " with torch.no_grad():\n", 446 | " for batch in iterate_minibatches(X_test, y_test, batch_size):\n", 447 | " x, y = batch \n", 448 | "\n", 449 | " inputs, targets = torch.from_numpy(x), torch.from_numpy(y)\n", 450 | "\n", 451 | " val_h = tuple([each.data for each in val_h])\n", 452 | "\n", 453 | " if(train_on_gpu):\n", 454 | " inputs, targets = inputs.cuda(), targets.cuda()\n", 455 | " \n", 456 | " output, val_h= net(inputs, val_h, batch_size)\n", 457 | "\n", 458 | " val_loss = criterion(output, targets.long())\n", 459 | " val_losses.append(val_loss.item())\n", 460 | "\n", 461 | " top_p, top_class = output.topk(1, dim=1)\n", 462 | " equals = top_class == targets.view(*top_class.shape).long()\n", 463 | " accuracy += torch.mean(equals.type(torch.FloatTensor))\n", 464 | " f1score += metrics.f1_score(top_class.cpu(), targets.view(*top_class.shape).long().cpu(), average='weighted')\n", 465 | " \n", 466 | " net.train() # reset to train mode after iterationg through validation data\n", 467 | " \n", 468 | " print(\"Epoch: {}/{}...\".format(e+1, epochs),\n", 469 | " \"Train Loss: {:.4f}...\".format(np.mean(train_losses)),\n", 470 | " \"Val Loss: {:.4f}...\".format(np.mean(val_losses)),\n", 471 | " \"Val Acc: {:.4f}...\".format(accuracy/(len(X_test)//batch_size)),\n", 472 | " \"F1-Score: {:.4f}...\".format(f1score/(len(X_test)//batch_size)))\n", 473 | "\n", 474 | "train(net)" 475 | ] 476 | } 477 | ], 478 | "metadata": { 479 | "kernelspec": { 480 | "display_name": "Python [conda env:py35]", 481 | "language": "python", 482 | "name": "conda-env-py35-py" 483 | }, 484 | "language_info": { 485 | "codemirror_mode": { 486 | "name": "ipython", 487 | "version": 3 488 | }, 489 | "file_extension": ".py", 490 | "mimetype": "text/x-python", 491 | "name": "python", 492 | "nbconvert_exporter": "python", 493 | "pygments_lexer": "ipython3", 494 | "version": "3.5.5" 495 | } 496 | }, 497 | "nbformat": 4, 498 | "nbformat_minor": 2 499 | } 500 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name='src', 5 | packages=find_packages(), 6 | version='0.1.0', 7 | description='Conversion of DeepConvLSTM model in pytorch', 8 | author='Davoud Shariat Panah', 9 | license='', 10 | ) 11 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/src/__init__.py -------------------------------------------------------------------------------- /src/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/src/data/.gitkeep -------------------------------------------------------------------------------- /src/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dspanah/Sensor-Based-Human-Activity-Recognition-DeepConvLSTM-Pytorch/7a393fa60175a2b584346dd865764e1cd8660c0f/src/data/__init__.py -------------------------------------------------------------------------------- /src/data/preprocess_data.py: -------------------------------------------------------------------------------- 1 | __author__ = 'fjordonez' 2 | 3 | import os 4 | import zipfile 5 | import argparse 6 | import numpy as np 7 | import _pickle as cp 8 | 9 | from io import BytesIO 10 | from pandas import Series 11 | 12 | # Hardcoded number of sensor channels employed in the OPPORTUNITY challenge 13 | NB_SENSOR_CHANNELS = 113 14 | 15 | # Hardcoded names of the files defining the OPPORTUNITY challenge data. As named in the original data. 16 | OPPORTUNITY_DATA_FILES = ['OpportunityUCIDataset/dataset/S1-Drill.dat', 17 | 'OpportunityUCIDataset/dataset/S1-ADL1.dat', 18 | 'OpportunityUCIDataset/dataset/S1-ADL2.dat', 19 | 'OpportunityUCIDataset/dataset/S1-ADL3.dat', 20 | 'OpportunityUCIDataset/dataset/S1-ADL4.dat', 21 | 'OpportunityUCIDataset/dataset/S1-ADL5.dat', 22 | 'OpportunityUCIDataset/dataset/S2-Drill.dat', 23 | 'OpportunityUCIDataset/dataset/S2-ADL1.dat', 24 | 'OpportunityUCIDataset/dataset/S2-ADL2.dat', 25 | 'OpportunityUCIDataset/dataset/S2-ADL3.dat', 26 | 'OpportunityUCIDataset/dataset/S3-Drill.dat', 27 | 'OpportunityUCIDataset/dataset/S3-ADL1.dat', 28 | 'OpportunityUCIDataset/dataset/S3-ADL2.dat', 29 | 'OpportunityUCIDataset/dataset/S3-ADL3.dat', 30 | 'OpportunityUCIDataset/dataset/S2-ADL4.dat', 31 | 'OpportunityUCIDataset/dataset/S2-ADL5.dat', 32 | 'OpportunityUCIDataset/dataset/S3-ADL4.dat', 33 | 'OpportunityUCIDataset/dataset/S3-ADL5.dat' 34 | ] 35 | 36 | 37 | # Hardcoded thresholds to define global maximums and minimums for every one of the 113 sensor channels employed in the 38 | # OPPORTUNITY challenge 39 | NORM_MAX_THRESHOLDS = [3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 40 | 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 41 | 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 42 | 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 3000, 43 | 3000, 3000, 3000, 10000, 10000, 10000, 1500, 1500, 1500, 44 | 3000, 3000, 3000, 10000, 10000, 10000, 1500, 1500, 1500, 45 | 3000, 3000, 3000, 10000, 10000, 10000, 1500, 1500, 1500, 46 | 3000, 3000, 3000, 10000, 10000, 10000, 1500, 1500, 1500, 47 | 3000, 3000, 3000, 10000, 10000, 10000, 1500, 1500, 1500, 48 | 250, 25, 200, 5000, 5000, 5000, 5000, 5000, 5000, 49 | 10000, 10000, 10000, 10000, 10000, 10000, 250, 250, 25, 50 | 200, 5000, 5000, 5000, 5000, 5000, 5000, 10000, 10000, 51 | 10000, 10000, 10000, 10000, 250, ] 52 | 53 | NORM_MIN_THRESHOLDS = [-3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, 54 | -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, 55 | -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, 56 | -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, -3000, 57 | -3000, -3000, -3000, -10000, -10000, -10000, -1000, -1000, -1000, 58 | -3000, -3000, -3000, -10000, -10000, -10000, -1000, -1000, -1000, 59 | -3000, -3000, -3000, -10000, -10000, -10000, -1000, -1000, -1000, 60 | -3000, -3000, -3000, -10000, -10000, -10000, -1000, -1000, -1000, 61 | -3000, -3000, -3000, -10000, -10000, -10000, -1000, -1000, -1000, 62 | -250, -100, -200, -5000, -5000, -5000, -5000, -5000, -5000, 63 | -10000, -10000, -10000, -10000, -10000, -10000, -250, -250, -100, 64 | -200, -5000, -5000, -5000, -5000, -5000, -5000, -10000, -10000, 65 | -10000, -10000, -10000, -10000, -250, ] 66 | 67 | 68 | def select_columns_opp(data): 69 | """Selection of the 113 columns employed in the OPPORTUNITY challenge 70 | 71 | :param data: numpy integer matrix 72 | Sensor data (all features) 73 | :return: numpy integer matrix 74 | Selection of features 75 | """ 76 | 77 | # included-excluded 78 | features_delete = np.arange(46, 50) 79 | features_delete = np.concatenate([features_delete, np.arange(59, 63)]) 80 | features_delete = np.concatenate([features_delete, np.arange(72, 76)]) 81 | features_delete = np.concatenate([features_delete, np.arange(85, 89)]) 82 | features_delete = np.concatenate([features_delete, np.arange(98, 102)]) 83 | features_delete = np.concatenate([features_delete, np.arange(134, 243)]) 84 | features_delete = np.concatenate([features_delete, np.arange(244, 249)]) 85 | return np.delete(data, features_delete, 1) 86 | 87 | 88 | def normalize(data, max_list, min_list): 89 | """Normalizes all sensor channels 90 | 91 | :param data: numpy integer matrix 92 | Sensor data 93 | :param max_list: numpy integer array 94 | Array containing maximums values for every one of the 113 sensor channels 95 | :param min_list: numpy integer array 96 | Array containing minimum values for every one of the 113 sensor channels 97 | :return: 98 | Normalized sensor data 99 | """ 100 | max_list, min_list = np.array(max_list), np.array(min_list) 101 | diffs = max_list - min_list 102 | for i in np.arange(data.shape[1]): 103 | data[:, i] = (data[:, i]-min_list[i])/diffs[i] 104 | # Checking the boundaries 105 | data[data > 1] = 0.99 106 | data[data < 0] = 0.00 107 | return data 108 | 109 | 110 | def divide_x_y(data, label): 111 | """Segments each sample into features and label 112 | 113 | :param data: numpy integer matrix 114 | Sensor data 115 | :param label: string, ['gestures' (default), 'locomotion'] 116 | Type of activities to be recognized 117 | :return: numpy integer matrix, numpy integer array 118 | Features encapsulated into a matrix and labels as an array 119 | """ 120 | 121 | data_x = data[:, 1:114] 122 | if label not in ['locomotion', 'gestures']: 123 | raise RuntimeError("Invalid label: '%s'" % label) 124 | if label == 'locomotion': 125 | data_y = data[:, 114] # Locomotion label 126 | elif label == 'gestures': 127 | data_y = data[:, 115] # Gestures label 128 | 129 | return data_x, data_y 130 | 131 | 132 | def adjust_idx_labels(data_y, label): 133 | """Transforms original labels into the range [0, nb_labels-1] 134 | 135 | :param data_y: numpy integer array 136 | Sensor labels 137 | :param label: string, ['gestures' (default), 'locomotion'] 138 | Type of activities to be recognized 139 | :return: numpy integer array 140 | Modified sensor labels 141 | """ 142 | 143 | if label == 'locomotion': # Labels for locomotion are adjusted 144 | data_y[data_y == 4] = 3 145 | data_y[data_y == 5] = 4 146 | elif label == 'gestures': # Labels for gestures are adjusted 147 | data_y[data_y == 406516] = 1 148 | data_y[data_y == 406517] = 2 149 | data_y[data_y == 404516] = 3 150 | data_y[data_y == 404517] = 4 151 | data_y[data_y == 406520] = 5 152 | data_y[data_y == 404520] = 6 153 | data_y[data_y == 406505] = 7 154 | data_y[data_y == 404505] = 8 155 | data_y[data_y == 406519] = 9 156 | data_y[data_y == 404519] = 10 157 | data_y[data_y == 406511] = 11 158 | data_y[data_y == 404511] = 12 159 | data_y[data_y == 406508] = 13 160 | data_y[data_y == 404508] = 14 161 | data_y[data_y == 408512] = 15 162 | data_y[data_y == 407521] = 16 163 | data_y[data_y == 405506] = 17 164 | return data_y 165 | 166 | 167 | def check_data(data_set): 168 | """Try to access to the file and checks if dataset is in the data directory 169 | In case the file is not found try to download it from original location 170 | 171 | :param data_set: 172 | Path with original OPPORTUNITY zip file 173 | :return: 174 | """ 175 | print('Checking dataset {0}'.format(data_set)) 176 | data_dir, data_file = os.path.split(data_set) 177 | # When a directory is not provided, check if dataset is in the data directory 178 | if data_dir == "" and not os.path.isfile(data_set): 179 | new_path = os.path.join(os.path.split(__file__)[0], "data", data_set) 180 | if os.path.isfile(new_path) or data_file == 'OpportunityUCIDataset.zip': 181 | data_set = new_path 182 | 183 | # When dataset not found, try to download it from UCI repository 184 | if (not os.path.isfile(data_set)) and data_file == 'OpportunityUCIDataset.zip': 185 | print('... dataset path {0} not found'.format(data_set)) 186 | import urllib 187 | origin = ( 188 | 'https://archive.ics.uci.edu/ml/machine-learning-databases/00226/OpportunityUCIDataset.zip' 189 | ) 190 | if not os.path.exists(data_dir): 191 | print('... creating directory {0}'.format(data_dir)) 192 | os.makedirs(data_dir) 193 | print('... downloading data from {0}'.format(origin)) 194 | urllib.request.urlretrieve(origin, data_set) 195 | 196 | return data_dir 197 | 198 | 199 | def process_dataset_file(data, label): 200 | """Function defined as a pipeline to process individual OPPORTUNITY files 201 | 202 | :param data: numpy integer matrix 203 | Matrix containing data samples (rows) for every sensor channel (column) 204 | :param label: string, ['gestures' (default), 'locomotion'] 205 | Type of activities to be recognized 206 | :return: numpy integer matrix, numy integer array 207 | Processed sensor data, segmented into features (x) and labels (y) 208 | """ 209 | 210 | # Select correct columns 211 | data = select_columns_opp(data) 212 | 213 | # Colums are segmentd into features and labels 214 | data_x, data_y = divide_x_y(data, label) 215 | data_y = adjust_idx_labels(data_y, label) 216 | data_y = data_y.astype(int) 217 | 218 | # Perform linear interpolation 219 | data_x = np.array([Series(i).interpolate() for i in data_x.T]).T 220 | 221 | # Remaining missing data are converted to zero 222 | data_x[np.isnan(data_x)] = 0 223 | 224 | # All sensor channels are normalized 225 | data_x = normalize(data_x, NORM_MAX_THRESHOLDS, NORM_MIN_THRESHOLDS) 226 | 227 | return data_x, data_y 228 | 229 | 230 | def generate_data(dataset, target_filename, label): 231 | """Function to read the OPPORTUNITY challenge raw data and process all sensor channels 232 | 233 | :param dataset: string 234 | Path with original OPPORTUNITY zip file 235 | :param target_filename: string 236 | Processed file 237 | :param label: string, ['gestures' (default), 'locomotion'] 238 | Type of activities to be recognized. The OPPORTUNITY dataset includes several annotations to perform 239 | recognition modes of locomotion/postures and recognition of sporadic gestures. 240 | """ 241 | 242 | data_dir = check_data(dataset) 243 | 244 | data_x = np.empty((0, NB_SENSOR_CHANNELS)) 245 | data_y = np.empty((0)) 246 | 247 | zf = zipfile.ZipFile(dataset) 248 | print('Processing dataset files ...') 249 | for filename in OPPORTUNITY_DATA_FILES: 250 | try: 251 | data = np.loadtxt(BytesIO(zf.read(filename))) 252 | print('... file {0}'.format(filename)) 253 | x, y = process_dataset_file(data, label) 254 | data_x = np.vstack((data_x, x)) 255 | data_y = np.concatenate([data_y, y]) 256 | except KeyError: 257 | print('ERROR: Did not find {0} in zip file'.format(filename)) 258 | 259 | # Dataset is segmented into train and test 260 | nb_training_samples = 557963 261 | # The first 18 OPPORTUNITY data files define the traning dataset, comprising 557963 samples 262 | X_train, y_train = data_x[:nb_training_samples,:], data_y[:nb_training_samples] 263 | X_test, y_test = data_x[nb_training_samples:,:], data_y[nb_training_samples:] 264 | 265 | print("Final datasets with size: | train {0} | test {1} | ".format(X_train.shape,X_test.shape)) 266 | 267 | obj = [(X_train, y_train), (X_test, y_test)] 268 | f = open(os.path.join(data_dir, target_filename), 'wb') 269 | cp.dump(obj, f, protocol=-1) 270 | f.close() 271 | 272 | 273 | def get_args(): 274 | '''This function parses and return arguments passed in''' 275 | parser = argparse.ArgumentParser( 276 | description='Preprocess OPPORTUNITY dataset') 277 | # Add arguments 278 | parser.add_argument( 279 | '-i', '--input', type=str, help='OPPORTUNITY zip file', required=True) 280 | parser.add_argument( 281 | '-o', '--output', type=str, help='Processed data file', required=True) 282 | parser.add_argument( 283 | '-t', '--task', type=str.lower, help='Type of activities to be recognized', default="gestures", choices = ["gestures", "locomotion"], required=False) 284 | # Array for all arguments passed to script 285 | args = parser.parse_args() 286 | # Assign args to variables 287 | dataset = args.input 288 | target_filename = args.output 289 | label = args.task 290 | # Return all variable values 291 | return dataset, target_filename, label 292 | 293 | if __name__ == '__main__': 294 | 295 | OpportunityUCIDataset_zip, output, l = get_args(); 296 | generate_data(OpportunityUCIDataset_zip, output, l) 297 | -------------------------------------------------------------------------------- /src/data/sliding_window.py: -------------------------------------------------------------------------------- 1 | # from http://www.johnvinyard.com/blog/?p=268 2 | 3 | import numpy as np 4 | from numpy.lib.stride_tricks import as_strided as ast 5 | 6 | def norm_shape(shape): 7 | ''' 8 | Normalize numpy array shapes so they're always expressed as a tuple, 9 | even for one-dimensional shapes. 10 | 11 | Parameters 12 | shape - an int, or a tuple of ints 13 | 14 | Returns 15 | a shape tuple 16 | ''' 17 | try: 18 | i = int(shape) 19 | return (i,) 20 | except TypeError: 21 | # shape was not a number 22 | pass 23 | 24 | try: 25 | t = tuple(shape) 26 | return t 27 | except TypeError: 28 | # shape was not iterable 29 | pass 30 | 31 | raise TypeError('shape must be an int, or a tuple of ints') 32 | 33 | def sliding_window(a,ws,ss = None,flatten = True): 34 | ''' 35 | Return a sliding window over a in any number of dimensions 36 | 37 | Parameters: 38 | a - an n-dimensional numpy array 39 | ws - an int (a is 1D) or tuple (a is 2D or greater) representing the size 40 | of each dimension of the window 41 | ss - an int (a is 1D) or tuple (a is 2D or greater) representing the 42 | amount to slide the window in each dimension. If not specified, it 43 | defaults to ws. 44 | flatten - if True, all slices are flattened, otherwise, there is an 45 | extra dimension for each dimension of the input. 46 | 47 | Returns 48 | an array containing each n-dimensional window from a 49 | ''' 50 | 51 | if None is ss: 52 | # ss was not provided. the windows will not overlap in any direction. 53 | ss = ws 54 | ws = norm_shape(ws) 55 | ss = norm_shape(ss) 56 | 57 | # convert ws, ss, and a.shape to numpy arrays so that we can do math in every 58 | # dimension at once. 59 | ws = np.array(ws) 60 | ss = np.array(ss) 61 | shape = np.array(a.shape) 62 | 63 | 64 | # ensure that ws, ss, and a.shape all have the same number of dimensions 65 | ls = [len(shape),len(ws),len(ss)] 66 | if 1 != len(set(ls)): 67 | raise ValueError(\ 68 | 'a.shape, ws and ss must all have the same length. They were %s' % str(ls)) 69 | 70 | # ensure that ws is smaller than a in every dimension 71 | if np.any(ws > shape): 72 | raise ValueError(\ 73 | 'ws cannot be larger than a in any dimension.\ 74 | a.shape was %s and ws was %s' % (str(a.shape),str(ws))) 75 | 76 | # how many slices will there be in each dimension? 77 | newshape = norm_shape(((shape - ws) // ss) + 1) 78 | # the shape of the strided array will be the number of slices in each dimension 79 | # plus the shape of the window (tuple addition) 80 | newshape += norm_shape(ws) 81 | # the strides tuple will be the array's strides multiplied by step size, plus 82 | # the array's strides (tuple addition) 83 | newstrides = norm_shape(np.array(a.strides) * ss) + a.strides 84 | strided = ast(a,shape = newshape,strides = newstrides) 85 | if not flatten: 86 | return strided 87 | 88 | # Collapse strided so that it has one more dimension than the window. I.e., 89 | # the new array is a flat list of slices. 90 | meat = len(ws) if ws.shape else 0 91 | firstdim = (np.product(newshape[:-meat]),) if ws.shape else () 92 | dim = firstdim + (newshape[-meat:]) 93 | # remove any dimensions with size 1 94 | # dim = filter(lambda i : i != 1,dim) 95 | return strided.reshape(dim) -------------------------------------------------------------------------------- /test_environment.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | REQUIRED_PYTHON = "python3" 4 | 5 | 6 | def main(): 7 | system_major = sys.version_info.major 8 | if REQUIRED_PYTHON == "python": 9 | required_major = 2 10 | elif REQUIRED_PYTHON == "python3": 11 | required_major = 3 12 | else: 13 | raise ValueError("Unrecognized python interpreter: {}".format( 14 | REQUIRED_PYTHON)) 15 | 16 | if system_major != required_major: 17 | raise TypeError( 18 | "This project requires Python {}. Found: Python {}".format( 19 | required_major, sys.version)) 20 | else: 21 | print(">>> Development environment passes all tests!") 22 | 23 | 24 | if __name__ == '__main__': 25 | main() 26 | --------------------------------------------------------------------------------