├── README.md
├── Load_data.ipynb
└── kmeans.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # Prediction of Future location using Hidden Markov Model 
2 | The aim was to predict the user's future location using the past data. As if we can predict the location of any particular user so we can recommend him about the real time situation of the predicted location. We used Geolife data set having the location of 180 users with date and some other features. So initially we clustered the near by locations then Hidden Markov Model was used to fit the data and predict the most probable future location of a particular user.
3 | 
4 | research paper followed : Detecting Meaningful Places and Predicting Locations Using Varied K-Means and Hidden Markov Model
5 |                           by Neelabh Pant and Ramez Elmasri
6 |                           
7 | dataset : Geolife_Trajectories
8 | 


--------------------------------------------------------------------------------
/Load_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Populating the interactive namespace from numpy and matplotlib\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "import numpy as np\n",
 20 |     "import os\n",
 21 |     "import time, calendar, datetime\n",
 22 |     "from mpl_toolkits.basemap import Basemap\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "%matplotlib inline\n",
 25 |     "%pylab inline\n",
 26 |     "pylab.rcParams['figure.figsize'] = (15, 15)\n",
 27 |     "import glob\n",
 28 |     "import pyproj\n",
 29 |     "import imageio"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 2,
 35 |    "metadata": {
 36 |     "collapsed": true
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "## Remove all warning \n",
 41 |     "import warnings\n",
 42 |     "warnings.filterwarnings('ignore')"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "raw",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "Reading all subfolder in Data folder"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 3,
 55 |    "metadata": {
 56 |     "collapsed": true
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "os.chdir('/home/gaurav/Desktop/Research Paper/Geolife Trajectories 1.3/Data')\n",
 61 |     "## Getting list of all users (182 users)\n",
 62 |     "list_of_folder = glob.glob('*')   \n",
 63 |     "list_of_folder.sort()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "CPU times: user 3min 36s, sys: 3.1 s, total: 3min 39s\n",
 78 |       "Wall time: 5min 24s\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "%%time\n",
 84 |     "data=[0]*182\n",
 85 |     "k=0\n",
 86 |     "\n",
 87 |     "for folder in list_of_folder:\n",
 88 |     "    # Changing subfolder or user per loop\n",
 89 |     "    folder_name='/home/gaurav/Desktop/Research Paper/Geolife Trajectories 1.3/Data/'+folder+'/Trajectory'\n",
 90 |     "    # Change directory\n",
 91 |     "    os.chdir(folder_name) \n",
 92 |     "    a=[]\n",
 93 |     "    # List of all files for a particular user \n",
 94 |     "    list_of_files = glob.glob('*.plt')\n",
 95 |     "    for file_name in list_of_files:\n",
 96 |     "        a.append((np.genfromtxt(file_name,delimiter=',',skip_header=6,names=['Latitude','Longitude','Zero','Altitude',\n",
 97 |     "                                                                                 'Duration','Date','Time'],\n",
 98 |     "                          dtype=[('Latitude','f8'),('Longitude','f8'),('Zero','i8'),('Altitude','f8'),('Duration','f8'),\n",
 99 |     "                                ('Date','S10'),('Time','S8')])))\n",
100 |     "    data[k]=a\n",
101 |     "    # Switching over next user\n",
102 |     "    \n",
103 |     "    os.chdir('/home/gaurav/Desktop/Research Paper/Geolife Trajectories 1.3/Data')\n",
104 |     "    k=k+1\n",
105 |     "    "
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "data[0][0][0]"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "metadata": {},
122 |    "source": [
123 |     "#### We have read all the  files in Data folder\n",
124 |     "data[i][j][k] represent ith user , jth timestamp ,kth row "
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 6,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "# Let's load files of first user that is in subfolder 000 or data[0][:][:]"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 7,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "171"
149 |       ]
150 |      },
151 |      "execution_count": 7,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "len(data[0]) # Number of observation for user one\n",
158 |     "\n"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "raw",
163 |    "metadata": {
164 |     "collapsed": false
165 |    },
166 |    "source": [
167 |     "a=6\n",
168 |     "lat_user_one=[]\n",
169 |     "lon_user_one=[]\n",
170 |     "# Additional for loop here will give plot for entire 182 user data\n",
171 |     "for i in range (len(data[a])):\n",
172 |     "    for j in range(len(data[a][i])):\n",
173 |     "        lat_user_one.append(data[a][i][j]['Latitude'])\n",
174 |     "        lon_user_one.append(data[a][i][j]['Longitude'])\n",
175 |     "map = Basemap(projection='merc', lat_0 =np.mean(lat_user_one) , lon_0 = np.mean(lon_user_one),\n",
176 |     "                                                resolution = 'h', area_thresh = 0.01,\n",
177 |     "                                llcrnrlon=min(lon_user_one), llcrnrlat=min(lat_user_one),\n",
178 |     "    urcrnrlon=max(lon_user_one), urcrnrlat=max(lat_user_one))\n",
179 |     "map.drawcoastlines()\n",
180 |     "map.drawcountries()\n",
181 |     "map.fillcontinents(color = 'coral',lake_color='aqua')\n",
182 |     "map.drawmapboundary()\n",
183 |     "x,y=map(lon_user_one,lat_user_one)\n",
184 |     "map.plot(y,x, 'bo', markersize=5)\n"
185 |    ]
186 |   }
187 |  ],
188 |  "metadata": {
189 |   "kernelspec": {
190 |    "display_name": "Python 3",
191 |    "language": "python",
192 |    "name": "python3"
193 |   },
194 |   "language_info": {
195 |    "codemirror_mode": {
196 |     "name": "ipython",
197 |     "version": 3
198 |    },
199 |    "file_extension": ".py",
200 |    "mimetype": "text/x-python",
201 |    "name": "python",
202 |    "nbconvert_exporter": "python",
203 |    "pygments_lexer": "ipython3",
204 |    "version": "3.6.0"
205 |   }
206 |  },
207 |  "nbformat": 4,
208 |  "nbformat_minor": 2
209 | }
210 | 


--------------------------------------------------------------------------------
/kmeans.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Import necessary packages"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import pandas as pd\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import numpy as np\n",
 21 |     "import sys\n",
 22 |     "import os\n",
 23 |     "from sklearn.metrics import pairwise_distances"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Implement k-means"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "Let us implement the k-means algorithm. First, we choose an initial set of centroids. A common practice is to choose randomly from the data points.\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "def get_initial_centroids(data, k, seed=None):\n",
 49 |     "    '''Randomly choose k data points as initial centroids'''\n",
 50 |     "    if seed is not None: \n",
 51 |     "        np.random.seed(seed)\n",
 52 |     "    n = data.shape[0] \n",
 53 |     "    rand_indices = np.random.randint(0, n, k)\n",
 54 |     "\n",
 55 |     "    centroids = data[rand_indices,:].toarray()\n",
 56 |     "    \n",
 57 |     "    return centroids"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "\n",
 69 |     "def assign_clusters(data, centroids):\n",
 70 |     "    \n",
 71 |     "   \n",
 72 |     "    distances_from_centroids = pairwise_distances(data,centroids)\n",
 73 |     "    \n",
 74 |     " \n",
 75 |     "    cluster_assignment = np.argmin(distances_from_centroids,axis=1)\n",
 76 |     "    \n",
 77 |     "    return cluster_assignment"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "In pseudocode, we iteratively do the following:\n",
 85 |     "```\n",
 86 |     "cluster_assignment = assign_clusters(data, centroids)\n",
 87 |     "centroids = revise_centroids(data, k, cluster_assignment)\n",
 88 |     "```"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "### Assigning clusters"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "data = np.array([[1., 2., 0.],\n",
107 |     "                 [0., 0., 0.],\n",
108 |     "                 [2., 2., 0.]])\n",
109 |     "centroids = np.array([[0.5, 0.5, 0.],\n",
110 |     "                      [0., -0.5, 0.]])"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "Let's assign these data points to the closest centroid."
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 5,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [
127 |     {
128 |      "name": "stdout",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "[0 1 0]\n"
132 |      ]
133 |     }
134 |    ],
135 |    "source": [
136 |     "cluster_assignment = assign_clusters(data, centroids)\n",
137 |     "print (cluster_assignment)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 6,
143 |    "metadata": {
144 |     "collapsed": true
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "def revise_centroids(data, k, cluster_assignment):\n",
149 |     "    new_centroids = []\n",
150 |     "    for i in range(k):\n",
151 |     "       \n",
152 |     "        member_data_points = data.loc[data.cluster_assignment==i,:]\n",
153 |     "        \n",
154 |     "        centroid = np.mean(member_data_points)\n",
155 |     "        \n",
156 |     "       \n",
157 |     "        centroid = centroid.A1\n",
158 |     "        new_centroids.append(centroid)\n",
159 |     "    new_centroids = np.array(new_centroids)\n",
160 |     "    \n",
161 |     "    return new_centroids"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {},
167 |    "source": [
168 |     "**The Basic objective of k-means is to reduce overall euclidean distance inside the cluster **"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 7,
174 |    "metadata": {
175 |     "collapsed": true
176 |    },
177 |    "outputs": [],
178 |    "source": [
179 |     "def compute_heterogeneity(data, k, centroids, cluster_assignment):\n",
180 |     "    \n",
181 |     "    heterogeneity = 0.0\n",
182 |     "    for i in range(k):\n",
183 |     "        \n",
184 |     "        \n",
185 |     "        member_data_points = data[cluster_assignment==i, :]\n",
186 |     "        \n",
187 |     "        if member_data_points.shape[0] > 0: \n",
188 |     "           \n",
189 |     "            distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean')\n",
190 |     "            squared_distances = distances**2\n",
191 |     "            heterogeneity += np.sum(squared_distances)\n",
192 |     "        \n",
193 |     "    return heterogeneity"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "Let's apply the above algorithm in our GPS  data. **Note :We can't use euclidean distance here **"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "metadata": {},
206 |    "source": [
207 |     "### Loading data\n",
208 |     "** Here we  will load only 10,000 rows of our 'all.csv' file  After doing this we will generalize our result to whole data**"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": 9,
214 |    "metadata": {
215 |     "collapsed": false
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "data = pd.read_csv('all.csv',nrows=10000)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "## Finding the number of cluster "
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "**If a user is spending matore than 10 minutes at a particular location . That location is assigned as centroid **"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 10,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/html": [
246 |        "<div>\n",
247 |        "<table border=\"1\" class=\"dataframe\">\n",
248 |        "  <thead>\n",
249 |        "    <tr style=\"text-align: right;\">\n",
250 |        "      <th></th>\n",
251 |        "      <th>index</th>\n",
252 |        "      <th>lat</th>\n",
253 |        "      <th>long</th>\n",
254 |        "      <th>altitude</th>\n",
255 |        "      <th>trajectory_id</th>\n",
256 |        "      <th>subfolder</th>\n",
257 |        "      <th>labels</th>\n",
258 |        "      <th>datetime</th>\n",
259 |        "      <th>distance</th>\n",
260 |        "      <th>timedelta</th>\n",
261 |        "      <th>velocity</th>\n",
262 |        "      <th>acceleration</th>\n",
263 |        "    </tr>\n",
264 |        "  </thead>\n",
265 |        "  <tbody>\n",
266 |        "    <tr>\n",
267 |        "      <th>0</th>\n",
268 |        "      <td>0</td>\n",
269 |        "      <td>39.984702</td>\n",
270 |        "      <td>116.318417</td>\n",
271 |        "      <td>492.0</td>\n",
272 |        "      <td>20081023025304</td>\n",
273 |        "      <td>0</td>\n",
274 |        "      <td>NaN</td>\n",
275 |        "      <td>2008-10-23 02:53:04</td>\n",
276 |        "      <td>3.520694</td>\n",
277 |        "      <td>0 days 00:00:06.000000000</td>\n",
278 |        "      <td>0.586782</td>\n",
279 |        "      <td>-0.003189</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>1</th>\n",
283 |        "      <td>1</td>\n",
284 |        "      <td>39.984683</td>\n",
285 |        "      <td>116.318450</td>\n",
286 |        "      <td>492.0</td>\n",
287 |        "      <td>20081023025304</td>\n",
288 |        "      <td>0</td>\n",
289 |        "      <td>NaN</td>\n",
290 |        "      <td>2008-10-23 02:53:10</td>\n",
291 |        "      <td>2.838241</td>\n",
292 |        "      <td>0 days 00:00:05.000000000</td>\n",
293 |        "      <td>0.567648</td>\n",
294 |        "      <td>-0.003841</td>\n",
295 |        "    </tr>\n",
296 |        "    <tr>\n",
297 |        "      <th>2</th>\n",
298 |        "      <td>2</td>\n",
299 |        "      <td>39.984686</td>\n",
300 |        "      <td>116.318417</td>\n",
301 |        "      <td>492.0</td>\n",
302 |        "      <td>20081023025304</td>\n",
303 |        "      <td>0</td>\n",
304 |        "      <td>NaN</td>\n",
305 |        "      <td>2008-10-23 02:53:15</td>\n",
306 |        "      <td>2.742220</td>\n",
307 |        "      <td>0 days 00:00:05.000000000</td>\n",
308 |        "      <td>0.548444</td>\n",
309 |        "      <td>0.332144</td>\n",
310 |        "    </tr>\n",
311 |        "    <tr>\n",
312 |        "      <th>3</th>\n",
313 |        "      <td>3</td>\n",
314 |        "      <td>39.984688</td>\n",
315 |        "      <td>116.318385</td>\n",
316 |        "      <td>492.0</td>\n",
317 |        "      <td>20081023025304</td>\n",
318 |        "      <td>0</td>\n",
319 |        "      <td>NaN</td>\n",
320 |        "      <td>2008-10-23 02:53:20</td>\n",
321 |        "      <td>11.045822</td>\n",
322 |        "      <td>0 days 00:00:05.000000000</td>\n",
323 |        "      <td>2.209164</td>\n",
324 |        "      <td>0.391130</td>\n",
325 |        "    </tr>\n",
326 |        "    <tr>\n",
327 |        "      <th>4</th>\n",
328 |        "      <td>4</td>\n",
329 |        "      <td>39.984655</td>\n",
330 |        "      <td>116.318263</td>\n",
331 |        "      <td>492.0</td>\n",
332 |        "      <td>20081023025304</td>\n",
333 |        "      <td>0</td>\n",
334 |        "      <td>NaN</td>\n",
335 |        "      <td>2008-10-23 02:53:25</td>\n",
336 |        "      <td>20.824082</td>\n",
337 |        "      <td>0 days 00:00:05.000000000</td>\n",
338 |        "      <td>4.164816</td>\n",
339 |        "      <td>0.072513</td>\n",
340 |        "    </tr>\n",
341 |        "  </tbody>\n",
342 |        "</table>\n",
343 |        "</div>"
344 |       ],
345 |       "text/plain": [
346 |        "   index        lat        long  altitude   trajectory_id  subfolder  labels  \\\n",
347 |        "0      0  39.984702  116.318417     492.0  20081023025304          0     NaN   \n",
348 |        "1      1  39.984683  116.318450     492.0  20081023025304          0     NaN   \n",
349 |        "2      2  39.984686  116.318417     492.0  20081023025304          0     NaN   \n",
350 |        "3      3  39.984688  116.318385     492.0  20081023025304          0     NaN   \n",
351 |        "4      4  39.984655  116.318263     492.0  20081023025304          0     NaN   \n",
352 |        "\n",
353 |        "              datetime   distance                  timedelta  velocity  \\\n",
354 |        "0  2008-10-23 02:53:04   3.520694  0 days 00:00:06.000000000  0.586782   \n",
355 |        "1  2008-10-23 02:53:10   2.838241  0 days 00:00:05.000000000  0.567648   \n",
356 |        "2  2008-10-23 02:53:15   2.742220  0 days 00:00:05.000000000  0.548444   \n",
357 |        "3  2008-10-23 02:53:20  11.045822  0 days 00:00:05.000000000  2.209164   \n",
358 |        "4  2008-10-23 02:53:25  20.824082  0 days 00:00:05.000000000  4.164816   \n",
359 |        "\n",
360 |        "   acceleration  \n",
361 |        "0     -0.003189  \n",
362 |        "1     -0.003841  \n",
363 |        "2      0.332144  \n",
364 |        "3      0.391130  \n",
365 |        "4      0.072513  "
366 |       ]
367 |      },
368 |      "execution_count": 10,
369 |      "metadata": {},
370 |      "output_type": "execute_result"
371 |     }
372 |    ],
373 |    "source": [
374 |     "data.head()"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": 84,
380 |    "metadata": {
381 |     "collapsed": true
382 |    },
383 |    "outputs": [],
384 |    "source": [
385 |     "centroid_index=[]\n",
386 |     "def find_cluster(data,spent_time):\n",
387 |     "    \"\"\"\n",
388 |     "    \n",
389 |     "    Trying to find the number of cluster based upon time spent \n",
390 |     "    \n",
391 |     "    \"\"\"\n",
392 |     "    a=np.array(data['timedelta'])\n",
393 |     "    for i in range(len(a)):\n",
394 |     "        centroid_index.append(a[i][13:15]>spent_time)\n",
395 |     "    return centroid_index"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": 85,
401 |    "metadata": {
402 |     "collapsed": false
403 |    },
404 |    "outputs": [
405 |     {
406 |      "ename": "TypeError",
407 |      "evalue": "'float' object is not subscriptable",
408 |      "output_type": "error",
409 |      "traceback": [
410 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
411 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
412 |       "\u001b[0;32m<ipython-input-85-1958c2afe15c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m(\u001b[0m\u001b[0mfind_cluster\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
413 |       "\u001b[0;32m<ipython-input-84-c4996d1240e4>\u001b[0m in \u001b[0;36mfind_cluster\u001b[0;34m(data, spent_time)\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0ma\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'timedelta'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m         \u001b[0mcentroid_index\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m13\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m15\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0mspent_time\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     11\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mcentroid_index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
414 |       "\u001b[0;31mTypeError\u001b[0m: 'float' object is not subscriptable"
415 |      ]
416 |     }
417 |    ],
418 |    "source": [
419 |     "find_cluster(data,'10')"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": 91,
425 |    "metadata": {
426 |     "collapsed": false
427 |    },
428 |    "outputs": [
429 |     {
430 |      "ename": "TypeError",
431 |      "evalue": "'>' not supported between instances of 'float' and 'str'",
432 |      "output_type": "error",
433 |      "traceback": [
434 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
435 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
436 |       "\u001b[0;32m<ipython-input-91-18367be6bf15>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cluster_check'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'timedelta'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
437 |       "\u001b[0;32m/home/gaurav/anaconda3/lib/python3.6/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, convert_dtype, args, **kwds)\u001b[0m\n\u001b[1;32m   2292\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2293\u001b[0m                 \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masobject\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2294\u001b[0;31m                 \u001b[0mmapped\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconvert_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2295\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2296\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmapped\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
438 |       "\u001b[0;32mpandas/src/inference.pyx\u001b[0m in \u001b[0;36mpandas.lib.map_infer (pandas/lib.c:66124)\u001b[0;34m()\u001b[0m\n",
439 |       "\u001b[0;32m<ipython-input-91-18367be6bf15>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cluster_check'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'timedelta'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;34m'10'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
440 |       "\u001b[0;31mTypeError\u001b[0m: '>' not supported between instances of 'float' and 'str'"
441 |      ]
442 |     }
443 |    ],
444 |    "source": [
445 |     "data['cluster_check']=data['timedelta'].apply(lambda x : x[13:15] >'10')"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": 97,
451 |    "metadata": {
452 |     "collapsed": false
453 |    },
454 |    "outputs": [
455 |     {
456 |      "data": {
457 |       "text/plain": [
458 |        "'06'"
459 |       ]
460 |      },
461 |      "execution_count": 97,
462 |      "metadata": {},
463 |      "output_type": "execute_result"
464 |     }
465 |    ],
466 |    "source": [
467 |     "data.timedelta[0][13:15]"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": null,
473 |    "metadata": {
474 |     "collapsed": true
475 |    },
476 |    "outputs": [],
477 |    "source": []
478 |   }
479 |  ],
480 |  "metadata": {
481 |   "kernelspec": {
482 |    "display_name": "Python 3",
483 |    "language": "python",
484 |    "name": "python3"
485 |   },
486 |   "language_info": {
487 |    "codemirror_mode": {
488 |     "name": "ipython",
489 |     "version": 3
490 |    },
491 |    "file_extension": ".py",
492 |    "mimetype": "text/x-python",
493 |    "name": "python",
494 |    "nbconvert_exporter": "python",
495 |    "pygments_lexer": "ipython3",
496 |    "version": "3.6.0"
497 |   }
498 |  },
499 |  "nbformat": 4,
500 |  "nbformat_minor": 0
501 | }
502 | 


--------------------------------------------------------------------------------