├── .gitignore ├── LICENSE ├── README.md ├── geolife.png ├── geolife.xmind ├── geolife ├── .gitignore ├── .spyderproject ├── __init__.py ├── base │ ├── __init__.py │ ├── base_op.py │ ├── file_op.py │ ├── gps_record.py │ ├── regPython.py │ └── stay_point.py ├── cluster_points │ ├── cluster_points.py │ ├── my_cluster.py │ └── plot_dbscan.py ├── convert_coordinate │ ├── __init__.py │ └── convert_coordinate.py ├── filter_points │ └── PonitsFilter.py ├── get_intresting_spots │ └── optics.py ├── get_stay_point │ └── get_stay_point.py ├── logger.conf ├── query_geolife │ └── query_points.py ├── sql_base │ ├── __init__.py │ └── dbutils.py ├── store_geolife │ ├── Readme.txt │ ├── deal_one_file.py │ ├── file_op.py │ ├── main.py │ ├── read_db.py │ └── respawn.sh └── update_index.sh └── schema_sql └── geolife.sql /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 haicg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | datamining-geolife-with-python 2 | ============================== 3 | 4 | ### 基本介绍 5 | 本项目主要是在微软的geolife数据集上进行聚类分析,得到用户热点停留区域(并用百度地图的api进行展示),分析出用户的基本行为模式。 6 | 该项目主要包括对对geolife的存储,预处理,停留点的发现与展示,聚类分析得到兴趣区域,最后通过周期分析得到用户的行为模式。 7 | 8 | ### 安装与使用 9 | 10 | 11 | ### 重要说明 12 | 13 | 这个源码库中可能存在一些错误,请大家谨慎使用。 14 | 该源码只是作为学习交流之用,不保证稳定性。 15 | 如果您用该代码带来一切后果,都由您自行承担。 16 | 17 | 18 | -------------------------------------------------------------------------------- /geolife.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife.png -------------------------------------------------------------------------------- /geolife.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife.xmind -------------------------------------------------------------------------------- /geolife/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife/.gitignore -------------------------------------------------------------------------------- /geolife/.spyderproject: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife/.spyderproject -------------------------------------------------------------------------------- /geolife/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife/__init__.py -------------------------------------------------------------------------------- /geolife/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife/base/__init__.py -------------------------------------------------------------------------------- /geolife/base/base_op.py: -------------------------------------------------------------------------------- 1 | ###File Name:get_distence.py 2 | ###Author:haicg 3 | ###Mail:lihaicg@126.com 4 | ###Created Time: Mon 07 Jul 2014 08:13:00 PM HKT 5 | ###File Name : get_distence.py 6 | #!/usr/bin/python 7 | 8 | import math 9 | from geopy import distance 10 | 11 | '''Return value is the distance with the unit of mile ''' 12 | 13 | # 这个函数以前计算距离可能有点问题,不够精准,同时对于边界问题处理过于粗暴 14 | # 这个修改成GeoPy的实现 15 | def calc_distance(lat1, lon1, lat2, lon2): 16 | newport_ri = (lat2, lon2) 17 | cleveland_oh = (lat1, lon1,) 18 | miles = distance.distance(newport_ri, cleveland_oh).miles 19 | return miles; 20 | 21 | 22 | '''Return value is the distance with the unit of mile ''' 23 | def calc_points_distance(p1, p2): 24 | return calc_distance(p1.x, p1.y, p2.x, p2.y) 25 | 26 | 27 | def get_distance(begin_point, end_point): 28 | lat1 = float(begin_point.gps_latitude) 29 | lat2 = float(end_point.gps_latitude) 30 | lon1 = float(begin_point.gps_longitude) 31 | lon2 = float(end_point.gps_longitude) 32 | #begin_point.show() 33 | #end_point.show() 34 | ''' The unit of the distance is kilometer''' 35 | euclidean_distence = calc_distance(lat1, lon1, lat2, lon2) * 1.609344 36 | return euclidean_distence 37 | -------------------------------------------------------------------------------- /geolife/base/file_op.py: -------------------------------------------------------------------------------- 1 | ###File Name:filter_points/file_op.py 2 | ###Author:haicg 3 | ###Mail:lihaicg@126.com 4 | ###Created Time: Thu 10 Jul 2014 09:41:37 PM HKT 5 | ###File Name : filter_points/file_op.py 6 | #!/usr/bin/python 7 | 8 | 9 | import errno 10 | import json 11 | def close_file(fp): 12 | try: 13 | fp. close() 14 | except IOError as e: 15 | if e.errno == errno.EACCES: 16 | return "some default data" 17 | # Not a permission error. 18 | raise 19 | 20 | def open_file(filename): 21 | try: 22 | fp = open(filename) 23 | except IOError as e: 24 | if e.errno == errno.EACCES: 25 | return "some default data" 26 | # Not a permission error. 27 | raise IOError 28 | else: 29 | # with fp: 30 | return fp 31 | 32 | def open_file_write(filename): 33 | try: 34 | fp = open(filename, 'w') 35 | except IOError as e: 36 | if e.errno == errno.EACCES: 37 | return "some default data" 38 | # Not a permission error. 39 | raise 40 | else: 41 | # with fp: 42 | return fp 43 | 44 | def store_list(filename, listName, listContext): 45 | fileStr = "" 46 | existFlag = 0 47 | listStr = "" 48 | fp = None 49 | # if(len(listContext) == 0) : 50 | # return ; 51 | 52 | for nodeStr in listContext: 53 | if (isinstance(nodeStr, basestring)): 54 | # if type(nodeStr) is types.StringType: 55 | listStr = listStr + nodeStr + "," 56 | listStr = listStr.strip(',') 57 | try: 58 | fp = open_file(filename) 59 | while True: 60 | line = fp.readline() 61 | ret = line.find("listName:"+ listName) 62 | if(ret == -1): 63 | fileStr = line +fileStr+fp.readline(); 64 | else: 65 | existFlag = 1; 66 | fileStr = line +fileStr+listStr; 67 | fp.readline(); 68 | if(line == ""): 69 | break 70 | print line 71 | except : 72 | print "No list exist"; 73 | finally: 74 | if (fp): 75 | close_file(fp); 76 | if not (existFlag): 77 | fileStr = fileStr + "listName:" + listName + "\n" 78 | fileStr = fileStr + listStr + "\n" 79 | fp = open_file_write(filename) 80 | fp.write(fileStr); 81 | close_file(fp); 82 | 83 | 84 | def get_store_list(filename, listName): 85 | listContext = None 86 | fp = None 87 | try: 88 | fp = open_file(filename) 89 | while True: 90 | line = fp.readline() 91 | ret = line.find("listName:"+ listName) 92 | if(ret == -1): 93 | fp.readline(); 94 | else: 95 | #existFlag = 1; 96 | listContext = fp.readline(); 97 | if(line == ""): 98 | break 99 | print line 100 | except : 101 | print "not exist"; 102 | raise IOError; 103 | finally: 104 | if (fp): 105 | close_file(fp); 106 | return listContext 107 | 108 | def savePointsToJson(distPointList, userid, filetype='', fileId = 0 ): 109 | if not distPointList: 110 | print "Null Value" 111 | return 112 | datalist = [] 113 | for p in distPointList: 114 | data = [] 115 | data.append(p.gps_longitude); 116 | data.append(p.gps_latitude); 117 | data.append(1); 118 | datalist.append(data); 119 | fileGpoints = "%d_points_dir/points_gps_%s%d.js" %(userid,filetype,fileId) 120 | strTmp = "var data%d =" %fileId 121 | saveDate = {'data':datalist,'total':len(datalist),"rt_loc_cnt":47764510,"errorno":0,"NearestTime":"2014-08-29 15:20:00","userTime":"2014-08-29 15:32:11"} 122 | strTmp += json.dumps(saveDate,sort_keys=False) 123 | with open(fileGpoints,"w") as fp: 124 | fp.write(strTmp) 125 | fp.close() 126 | 127 | -------------------------------------------------------------------------------- /geolife/base/gps_record.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # File Name : gps_record.py 3 | import time 4 | import logging 5 | import file_op 6 | 7 | LOG_HANDLE = None 8 | 9 | class gps_record: 10 | def __init__(self): 11 | self.gps_userid = -1 12 | self.gps_latitude = 0.0 13 | self.gps_longitude = 0.0 14 | self.gps_code = 0 15 | self.gps_altitude = 0.0 16 | self.gps_date = None 17 | self.gps_time = None 18 | self.gps_UTC_timestamp = None 19 | self.gps_UTC_unix_timestamp = 0 20 | self.id = 0 21 | 22 | def show(self): 23 | print self.gps_userid 24 | print self.gps_latitude 25 | print self.gps_longitude 26 | print self.gps_code 27 | print self.gps_altitude 28 | print self.gps_UTC_timestamp 29 | print self.gps_UTC_unix_timestamp 30 | 31 | 32 | def __init_with_txt_record__(self, recordStr, userid): 33 | global LOG_HANDLE 34 | record = recordStr.split(','); 35 | try: 36 | self.gps_userid = int(userid) 37 | self.gps_latitude = float(record[0]) 38 | self.gps_longitude = float(record[1]) 39 | self.gps_code = int(record[2]) 40 | self.gps_altitude = float(record[3]) 41 | self.gps_date = record[5] 42 | self.gps_time = record[6] 43 | time_str = (self.gps_date+' '+self.gps_time).rstrip() 44 | self.gps_UTC_timestamp = time_str 45 | except ValueError : 46 | print "Value Error " 47 | #logging.warning("Value Error " + userid + recordStr) 48 | raise ValueError 49 | try: 50 | timeArray = time.strptime(time_str, "%Y-%m-%d %H:%M:%S") 51 | self.gps_UTC_unix_timestamp = int(time.mktime(timeArray)) 52 | except ValueError: 53 | print 'unconverted data remains' 54 | #logging.warning("unconverted data remains " + userid + recordStr) 55 | raise ValueError 56 | return self 57 | 58 | def __init_with_query_sql__(self, recordRes): 59 | global LOG_HANDLE 60 | try: 61 | self.gps_userid = recordRes[0] 62 | self.gps_latitude = recordRes[1] 63 | self.gps_longitude = recordRes[2] 64 | self.gps_code = recordRes[3] 65 | self.gps_altitude = recordRes[4] 66 | self.gps_UTC_timestamp = recordRes[5] 67 | self.gps_UTC_unix_timestamp = recordRes[6] 68 | self.id = recordRes[7]; 69 | 70 | except ValueError : 71 | print "Value Error " 72 | #logging.warning("Value Error " + userid + recordStr) 73 | raise ValueError 74 | return self 75 | def save(self, filename): 76 | try: 77 | fp = file_op.open_file_write(filename) 78 | except IOError, Error: 79 | print "open file error" 80 | print Error 81 | return 82 | 83 | gps_userid= "gps_userid = %d\n" %self.gps_userid 84 | gps_latitude = "gps_latitude = %f\n" %self.gps_latitude 85 | gps_longitude = "gps_longitude = %f\n" % self.gps_longitude 86 | gps_code = "gps_code = %d\n" %self.gps_code 87 | gps_altitude = "gps_altitude = %d\n" %self.gps_altitude 88 | gps_UTC_timestamp = "gps_UTC_timestamp = %s\n" %self.gps_UTC_timestamp 89 | gps_UTC_unix_timestamp = "gps_UTC_unix_timestamp = %d\n" %self.gps_UTC_unix_timestamp 90 | 91 | fp.write(gps_userid + gps_latitude + gps_longitude + gps_code + gps_altitude + gps_UTC_timestamp + gps_UTC_unix_timestamp) 92 | file_op.close_file(fp) 93 | 94 | -------------------------------------------------------------------------------- /geolife/base/regPython.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Dec 14 13:56:05 2014 4 | 5 | @author: hyde 6 | """ 7 | 8 | import sys 9 | from _winreg import * 10 | 11 | # tweak as necessary 12 | version = sys.version[:3] 13 | installpath = sys.prefix 14 | regpath = "SOFTWARE\\Python\\Pythoncore\\%s\\" % (version) 15 | installkey = "InstallPath" 16 | pythonkey = "PythonPath" 17 | pythonpath = "%s;%s\\Lib\\;%s\\DLLs\\" % ( 18 | installpath, installpath, installpath 19 | ) 20 | 21 | def RegisterPy(): 22 | print "begin RegisterPy " 23 | try: 24 | print "open key : %s"%regpath 25 | reg = OpenKey(HKEY_CURRENT_USER, regpath) 26 | except EnvironmentError as e: 27 | try: 28 | reg = CreateKey(HKEY_CURRENT_USER, regpath) 29 | SetValue(reg, installkey, REG_SZ, installpath) 30 | SetValue(reg, pythonkey, REG_SZ, pythonpath) 31 | CloseKey(reg) 32 | except: 33 | print "*** EXCEPT: Unable to register!" 34 | return 35 | 36 | print "--- Python", version, "is now registered!" 37 | return 38 | 39 | 40 | if (QueryValue(reg, installkey) == installpath and 41 | QueryValue(reg, pythonkey) == pythonpath): 42 | CloseKey(reg) 43 | print "=== Python", version, "is already registered!" 44 | return CloseKey(reg) 45 | 46 | print "*** ERROR:Unable to register!" 47 | print "*** REASON:You probably have another Python installation!" 48 | 49 | def UnRegisterPy(): 50 | #print "begin UnRegisterPy " 51 | try: 52 | print "open HKEY_CURRENT_USER key=%s"%(regpath) 53 | reg = OpenKey(HKEY_CURRENT_USER, regpath) 54 | #reg = OpenKey(HKEY_LOCAL_MACHINE, regpath) 55 | except EnvironmentError: 56 | print "*** Python not registered?!" 57 | return 58 | try: 59 | DeleteKey(reg, installkey) 60 | DeleteKey(reg, pythonkey) 61 | DeleteKey(HKEY_LOCAL_MACHINE, regpath) 62 | except: 63 | print "*** Unable to un-register!" 64 | else: 65 | print "--- Python", version, "is no longer registered!" 66 | 67 | if __name__ == "__main__": 68 | RegisterPy() -------------------------------------------------------------------------------- /geolife/base/stay_point.py: -------------------------------------------------------------------------------- 1 | ###File Name:stay_point.py 2 | ###Author:haicg 3 | ###Mail:lihaicg@126.com 4 | ###Created Time: 2014/6/6 15:26:42 5 | ###File Name : stay_point.py 6 | #!/usr/bin/python 7 | 8 | import time 9 | import logging 10 | 11 | 12 | LOG_HANDLE = None 13 | 14 | class stay_point: 15 | def __init__(self): 16 | self.userid= -1 17 | self.arrival_point = -1 18 | self.arrival_timestamp = 0 19 | self.leaving_point = -1 20 | self.leaving_timestamp = 0 21 | self.mean_coordinate_latitude = None 22 | self.mean_coordinate_longtitude = None 23 | self.mean_coordinate_altitude = None 24 | def printSelf(self): 25 | print "userid = %d" %self.userid 26 | print "arrival_point = %d" %self.arrival_point 27 | print "arrival_timestamp = %d \n arrival_timestr=%s" %(self.arrival_timestamp, time.ctime(self.arrival_timestamp)) 28 | print "leaving_point = %d" %(self.leaving_point) 29 | print "leaving_timestamp = %d \n leaving_timestr=%s" %(self.leaving_timestamp, time.ctime(self.leaving_timestamp)) 30 | print "mean_coordinate_latitude = %f" %self.mean_coordinate_latitude 31 | print "mean_coordinate_longtitude = %f" %self.mean_coordinate_longtitude 32 | print "mean_coordinate_altitude = %f" %self.mean_coordinate_altitude 33 | 34 | 35 | -------------------------------------------------------------------------------- /geolife/cluster_points/cluster_points.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Oct 11 09:55:07 2014 4 | 5 | @author: hai 6 | """ 7 | 8 | print(__doc__) 9 | 10 | import numpy as np 11 | 12 | from sklearn.cluster import DBSCAN 13 | from sklearn import metrics 14 | from sklearn.datasets.samples_generator import make_blobs 15 | from sklearn.preprocessing import StandardScaler 16 | # Plot result 17 | import matplotlib.pyplot as plt 18 | 19 | import csv 20 | userid = 0 21 | X = [] 22 | csv_name = "staypoints_%s.csv" %userid 23 | with open(csv_name,"rb") as csvfp: 24 | reader = csv.reader(csvfp) 25 | for line in reader: 26 | X.append(line) 27 | X = np.array(X, np.float) 28 | csvfp.close() 29 | 30 | # Plot the ground truth 31 | fig = plt.figure(1) 32 | col = 'k' 33 | #plt.xlim(30,100) 34 | #plt.ylim(100,200) 35 | plt.plot(X[:, 0], X[:, 1], '*', markerfacecolor='k', 36 | markeredgecolor='k', markersize=5) 37 | 38 | # Compute DBSCAN 39 | db = DBSCAN(eps=0.15, min_samples=4).fit(X) 40 | #db = DBSCAN(eps=0.5, min_samples=10).fit(X) 41 | core_samples_mask = np.zeros_like(db.labels_, dtype=bool) 42 | core_samples_mask[db.core_sample_indices_] = True 43 | labels = db.labels_ 44 | 45 | # Number of clusters in labels, ignoring noise if present. 46 | n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) 47 | ############################################################################## 48 | # Plot result 49 | 50 | 51 | # Black removed and is used for noise instead. 52 | unique_labels = set(labels) 53 | colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) 54 | fig = plt.figure(2) 55 | centerPoint = [] 56 | for k, col in zip(unique_labels, colors): 57 | if k == -1: 58 | # Black used for noise. 59 | col = 'k' 60 | # continue 61 | 62 | class_member_mask = (labels == k) 63 | 64 | xy = X[class_member_mask & core_samples_mask] 65 | 66 | #centerPoint.append() 67 | 68 | print "%d reference points contain %d points" %(k,len(xy)) 69 | #print "%f mean pos %f" %xy. 70 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 71 | markeredgecolor='k', markersize=14) 72 | print "center pos %f %f" %(np.mean(xy[:, 0]), np.mean(xy[:, 1]) ) 73 | ''' 74 | xy = X[class_member_mask & ~core_samples_mask] 75 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 76 | markeredgecolor='k', markersize=6) 77 | ''' 78 | 79 | plt.title('DBSCAN :Estimated number of clusters: %d' % n_clusters_) 80 | 81 | 82 | 83 | # Plot the ground truth 84 | #fig = plt.figure(2, figsize=(4, 3)) 85 | 86 | plt.show() 87 | print "successful" 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /geolife/cluster_points/my_cluster.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 13 19:57:40 2014 4 | 5 | @author: hai 6 | """ 7 | 8 | 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from sklearn.cluster import DBSCAN 14 | from sklearn import metrics 15 | from sklearn.datasets.samples_generator import make_blobs 16 | from sklearn.preprocessing import StandardScaler 17 | 18 | # Plot result 19 | import matplotlib.pyplot as plt 20 | 21 | import sys 22 | sys.path.append("..") 23 | #from sql_base import dbutils 24 | from base import base_op 25 | 26 | 27 | def getPointsDistence(p1, p2): 28 | return base_op.calc_distance(p1[0],p1[1],p2[0],p2[1]) 29 | 30 | def getPointClusterDist(c, p): 31 | distList = [] 32 | for cpoint in c: 33 | distList.append(getPointsDistence(p, cpoint)) 34 | return max(distList) 35 | 36 | 37 | 38 | def updataClusterCenter(clusterP, p, num): 39 | p = clusterP*num + p 40 | return p/(num+1) 41 | 42 | def myCluster(points): 43 | points = np.array(points); 44 | minDistence = 0.5 #km 45 | minCout = 15; 46 | labels = np.zeros(len(points), dtype=int) 47 | #isDeal = np.zeros(len(points), dtype=bool) 48 | k = 0 49 | clusterList = np.zeros((len(points),2),dtype=float) 50 | numInCluster = np.zeros(len(points)) 51 | clusterListSore = [] 52 | for pointIdx in range(len(points)): 53 | 54 | i = 0 55 | while i < k: 56 | if (getPointClusterDist(clusterListSore[i], points[pointIdx]) < minDistence): 57 | clusterListSore[i].append(points[pointIdx]) 58 | clusterList[i] = updataClusterCenter(clusterList[i], points[pointIdx], numInCluster[i]) 59 | labels [pointIdx] = i 60 | numInCluster[i] = numInCluster[i] + 1 61 | break 62 | else: 63 | i = i + 1 64 | if i == k: 65 | 66 | #clusterListSore[i].append(points[pointIdx]) 67 | tmpList = [] 68 | tmpList.append(points[pointIdx]) 69 | clusterListSore.append(tmpList) 70 | clusterList[i] = points[pointIdx] 71 | numInCluster[i] = numInCluster[i] + 1 72 | k = k + 1 73 | # clusterPoints = [] 74 | #numInClusterRet = [] 75 | mask = np.zeros(len(points),dtype=bool) 76 | pos = 0; 77 | for i in range(k): 78 | if (numInCluster[i] > minCout): 79 | mask [i] = True; 80 | labels = [pos if j == i else j for j in labels] 81 | pos = pos +1 82 | else: 83 | labels = [-1 if j == i else j for j in labels] 84 | return clusterList[mask],numInCluster[mask],labels 85 | 86 | 87 | import csv 88 | userid = 0 89 | X = [] 90 | csv_name = "staypoints_%s.csv" %userid 91 | with open(csv_name,"rb") as csvfp: 92 | reader = csv.reader(csvfp) 93 | for line in reader: 94 | X.append(line) 95 | X = np.array(X, np.float) 96 | csvfp.close() 97 | 98 | res = myCluster(X); 99 | print res[0],res[1] 100 | 101 | centerPoints = res[0] 102 | labels = np.array(res[2]) 103 | 104 | # Black removed and is used for noise instead. 105 | core_samples_mask = np.zeros_like(labels, dtype=bool) 106 | #core_samples_mask = [True for i in core_samples_mask] 107 | core_samples_mask [:] = True 108 | unique_labels = set(labels) 109 | colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) 110 | fig = plt.figure(5) 111 | for k, col in zip(unique_labels, colors): 112 | if k == -1: 113 | # Black used for noise. 114 | col = 'k' 115 | continue 116 | 117 | class_member_mask = (labels == k) 118 | 119 | xy = X[class_member_mask & core_samples_mask] 120 | print "%d reference points contain %d points" %(k,len(xy)) 121 | #print "%f mean pos %f" %xy. 122 | 123 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 124 | markeredgecolor='k', markersize=8) 125 | # plt.xlim(30,42) 126 | # plt.ylim(116,122) 127 | ''' 128 | xy = X[class_member_mask & ~core_samples_mask] 129 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 130 | markeredgecolor='k', markersize=6) 131 | ''' 132 | plt.plot(centerPoints[:, 0], centerPoints[:, 1], 'o', markerfacecolor='k', 133 | markeredgecolor='k', markersize=4) 134 | plt.title('Estimated number of clusters: %d' % len(centerPoints)) 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /geolife/cluster_points/plot_dbscan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | =================================== 4 | Demo of DBSCAN clustering algorithm 5 | =================================== 6 | 7 | Finds core samples of high density and expands clusters from them. 8 | 9 | """ 10 | print(__doc__) 11 | 12 | import numpy as np 13 | 14 | from sklearn.cluster import DBSCAN 15 | from sklearn import metrics 16 | from sklearn.datasets.samples_generator import make_blobs 17 | from sklearn.preprocessing import StandardScaler 18 | 19 | 20 | ############################################################################## 21 | # Generate sample data 22 | centers = [[1, 1], [-1, -1], [1, -1]] 23 | X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, 24 | random_state=0) 25 | 26 | X = StandardScaler().fit_transform(X) 27 | 28 | ############################################################################## 29 | # Compute DBSCAN 30 | db = DBSCAN(eps=0.3, min_samples=10).fit(X) 31 | core_samples_mask = np.zeros_like(db.labels_, dtype=bool) 32 | core_samples_mask[db.core_sample_indices_] = True 33 | labels = db.labels_ 34 | 35 | # Number of clusters in labels, ignoring noise if present. 36 | n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) 37 | 38 | print('Estimated number of clusters: %d' % n_clusters_) 39 | print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels)) 40 | print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels)) 41 | print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels)) 42 | print("Adjusted Rand Index: %0.3f" 43 | % metrics.adjusted_rand_score(labels_true, labels)) 44 | print("Adjusted Mutual Information: %0.3f" 45 | % metrics.adjusted_mutual_info_score(labels_true, labels)) 46 | print("Silhouette Coefficient: %0.3f" 47 | % metrics.silhouette_score(X, labels)) 48 | 49 | ############################################################################## 50 | # Plot result 51 | import matplotlib.pyplot as plt 52 | 53 | # Black removed and is used for noise instead. 54 | unique_labels = set(labels) 55 | colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels))) 56 | for k, col in zip(unique_labels, colors): 57 | if k == -1: 58 | # Black used for noise. 59 | col = 'k' 60 | 61 | class_member_mask = (labels == k) 62 | 63 | xy = X[class_member_mask & core_samples_mask] 64 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 65 | markeredgecolor='k', markersize=14) 66 | 67 | xy = X[class_member_mask & ~core_samples_mask] 68 | plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col, 69 | markeredgecolor='k', markersize=6) 70 | 71 | plt.title('Estimated number of clusters: %d' % n_clusters_) 72 | plt.show() 73 | -------------------------------------------------------------------------------- /geolife/convert_coordinate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/haicg/datamining-geolife-with-python/f5cdf6ef7589d9d6b76e3fa3bc00e8d4a64f864d/geolife/convert_coordinate/__init__.py -------------------------------------------------------------------------------- /geolife/convert_coordinate/convert_coordinate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Dec 16 13:59:43 2014 4 | 5 | @author: Administrator 6 | """ 7 | ''' 8 | self.gps_latitude = 0.0 9 | self.gps_longitude = 0.0 10 | self.gps_code = 0 11 | self.gps_altitude = 0.0 12 | ''' 13 | import urllib 14 | import urllib2 15 | import sys 16 | sys.path.append("..") 17 | from sql_base import dbutils 18 | from base import base_op 19 | from base import stay_point 20 | from base import gps_record 21 | import logging.config 22 | import csv 23 | import json 24 | import time 25 | logger = None 26 | 27 | def chunks(s,step): 28 | lenS=len(s) 29 | return [s[i:min(i+step,lenS)] for i in range(0,lenS,step)] 30 | 31 | def convert_coordinate_batch(orinList): 32 | resPointList = [] 33 | if len(orinList) > 100*1000: 34 | print "Over the maximum length " 35 | return 36 | 37 | for p in chunks (orinList, 100): 38 | resPointList += convert_coordinate_post(p) 39 | return resPointList 40 | 41 | def convert_coordinate_batch_array(arrayList): 42 | resPointList = [] 43 | if len(arrayList) > 100*1000: 44 | print "Over the maximum length " 45 | return 46 | 47 | for p in chunks (arrayList, 100): 48 | resPointList += convert_coordinate_post_array(p) 49 | return resPointList 50 | 51 | def convert_coordinate_post_array(origArray): 52 | coordstr = "" 53 | resList = [] 54 | for p in origArray: 55 | coordstr += "%f,%f;" %(p[0],p[1]) 56 | 57 | ak = "88E1cff5f2d3a260ac4b3864d8a9adde" 58 | fromVal= "1" 59 | toVal = "5" 60 | coords = "%s" %coordstr[0:-1] 61 | url = "http://api.map.baidu.com/geoconv/v1/" 62 | 63 | parm = {'ak':ak, 'from':fromVal, 'to':toVal, 'coords':coords } 64 | parm = urllib.urlencode(parm) 65 | req = urllib2.Request(url, parm) 66 | data = None 67 | #print url 68 | try: 69 | while (not data): 70 | response = urllib2.urlopen(req) 71 | data = response.read() 72 | data = json.loads(data) 73 | if (not data): 74 | print "sleep 1s" 75 | time.sleep(1) 76 | 77 | #print type(data) 78 | plist= data["result"] 79 | except : 80 | print "Error happen\n" 81 | return resList 82 | 83 | #print len(plist) 84 | for p in plist: 85 | point = gps_record.gps_record() 86 | 87 | point.gps_longitude = p["x"] 88 | point.gps_latitude = p["y"] 89 | resList.append(point) 90 | return resList 91 | 92 | def convert_coordinate_post(orig): 93 | coordstr = "" 94 | resList = [] 95 | for p in orig: 96 | coordstr += "%f,%f;" %(p.gps_longitude,p.gps_latitude) 97 | 98 | ak = "88E1cff5f2d3a260ac4b3864d8a9adde" 99 | fromVal= "1" 100 | toVal = "5" 101 | coords = "%s" %coordstr[0:-1] 102 | url = "http://api.map.baidu.com/geoconv/v1/" 103 | 104 | parm = {'ak':ak, 'from':fromVal, 'to':toVal, 'coords':coords } 105 | parm = urllib.urlencode(parm) 106 | req = urllib2.Request(url, parm) 107 | data = None 108 | #print url 109 | try: 110 | while (not data): 111 | response = urllib2.urlopen(req) 112 | data = response.read() 113 | data = json.loads(data) 114 | if (not data): 115 | print "sleep 1s" 116 | time.sleep(1) 117 | 118 | #print type(data) 119 | plist= data["result"] 120 | except : 121 | print "Error happen\n" 122 | return resList 123 | 124 | #print len(plist) 125 | for p in plist: 126 | point = gps_record.gps_record() 127 | 128 | point.gps_longitude = p["x"] 129 | point.gps_latitude = p["y"] 130 | resList.append(point) 131 | return resList 132 | 133 | 134 | def convert_coordinate_get(orig): 135 | coordstr = "" 136 | resList = [] 137 | for p in orig: 138 | coordstr += "%f,%f;" %(p.gps_longitude,p.gps_latitude) 139 | coords = "coords=%s" %coordstr[0:-1] 140 | ak = "ak=88E1cff5f2d3a260ac4b3864d8a9adde" 141 | fromVal= "from=1" 142 | toVal = "to=5" 143 | url = "http://api.map.baidu.com/geoconv/v1/?"+ ak \ 144 | + "&"+ fromVal + "&" + toVal + "&" + coords 145 | ak = "88E1cff5f2d3a260ac4b3864d8a9adde" 146 | 147 | data = None 148 | 149 | #print url 150 | try: 151 | while (not data): 152 | 153 | data = urllib2.urlopen(url) 154 | data = data.read() 155 | data = json.loads(data) 156 | if (not data): 157 | print "sleep 1s" 158 | time.sleep(1) 159 | 160 | #print type(data) 161 | plist= data["result"] 162 | except : 163 | print "Error happen\n" 164 | return None 165 | 166 | 167 | for p in plist: 168 | point = gps_record.gps_record() 169 | #print p["x"] 170 | point.gps_longitude = p["x"] 171 | point.gps_latitude = p["y"] 172 | resList.append(point) 173 | return resList 174 | 175 | 176 | 177 | def test(): 178 | 179 | coords = [] 180 | point = gps_record.gps_record() 181 | point.gps_longitude = 116.326624 182 | point.gps_latitude = 39.977897 183 | coords.append(point) 184 | point = gps_record.gps_record() 185 | point.gps_longitude = 116.326626 186 | point.gps_latitude = 39.977882 187 | coords.append(point) 188 | res = convert_coordinate_post(coords) 189 | print len(res) 190 | #test() 191 | -------------------------------------------------------------------------------- /geolife/filter_points/PonitsFilter.py: -------------------------------------------------------------------------------- 1 | ###File Name:filter_points.py 2 | ###Author:haicg 3 | ###Mail:lihaicg@126.com 4 | ###Created Time: Mon 07 Jul 2014 08:30:16 PM HKT 5 | ###File Name : filter_points.py 6 | #!/usr/bin/python 7 | 8 | import sys 9 | import os 10 | sys.path.append("..") 11 | sys.path.append(".") 12 | from sql_base import dbutils 13 | from base import gps_record 14 | from base import base_op 15 | import logging.config 16 | import logging 17 | import math 18 | 19 | logger = None 20 | 21 | class CurrentGPSRecordList: 22 | def __init__(self): 23 | self.gps_list = None 24 | self.currentPos = 0 25 | 26 | currentRecordList = CurrentGPSRecordList() 27 | currentIndex = 0 28 | column_name = ("gps_userid", "gps_latitude", "gps_longitude", "gps_code",\ 29 | "gps_altitude", "gps_UTC_timestamp", "gps_UTC_unix_timestamp") 30 | 31 | def log_init(): 32 | global logger 33 | if(logger == None): 34 | if (os.path.exists(r"logger.conf")): 35 | logging.config.fileConfig("logger.conf") 36 | else: 37 | logging.config.fileConfig(r"../logger.conf") 38 | 39 | logger = logging.getLogger("root") 40 | return logger 41 | 42 | #def getPreviousRecord(): 43 | 44 | def getNextRecord(userId): 45 | global currentRecordList 46 | global currentIndex 47 | if (not currentRecordList.gps_list) or (currentRecordList.currentPos == \ 48 | len(currentRecordList.gps_list)) : 49 | currentRecordList.gps_list = \ 50 | dbutils.get_gps_record_time_order(userId, currentIndex, 20000) 51 | if (not currentRecordList.gps_list) : 52 | currentIndex = 0 53 | currentRecordList.gps_list = None 54 | raise ValueError,'no more gps record' 55 | return None 56 | currentRecordList.currentPos = 0 57 | 58 | res = currentRecordList.gps_list[currentRecordList.currentPos] 59 | currentIndex += 1 60 | currentRecordList.currentPos += 1 61 | return res 62 | 63 | ''' The unit of the speed is km/h ''' 64 | def getSpeed(currentPoint, nextPoint): 65 | euclidean_distence = base_op.get_distance(currentPoint, nextPoint) 66 | #print "distence = %f km" % euclidean_distence 67 | time_tmp = nextPoint.gps_UTC_unix_timestamp - currentPoint.gps_UTC_unix_timestamp 68 | try : 69 | speedVal = euclidean_distence*3600/abs(time_tmp) 70 | except ZeroDivisionError: 71 | raise 72 | return speedVal 73 | 74 | def storeErrorPoint(errorPoint): 75 | errorPoint.show() 76 | errorPoint.save("errorPoint.txt") 77 | 78 | def filterUserRecords(userid = 1): 79 | i =0 80 | errorCount = 0 81 | oneUserTotalNum = dbutils.get_record_total_num(userid) 82 | print oneUserTotalNum 83 | if oneUserTotalNum == 0: 84 | return 0 85 | try: 86 | beginPoint = getNextRecord(userid) 87 | except ValueError, Error: 88 | return ; 89 | currentPoint = beginPoint 90 | nextPoint = beginPoint 91 | errorPointPre = None 92 | while i< oneUserTotalNum: 93 | #while i< 50: 94 | currentPoint = nextPoint 95 | try: 96 | nextPoint = getNextRecord(userid) 97 | except ValueError, Error: 98 | print Error 99 | break; 100 | try: 101 | speedVal = getSpeed(currentPoint, nextPoint) 102 | except ZeroDivisionError, Error: 103 | print Error 104 | ''' set this point as a error point ''' 105 | speedVal = 130 106 | ''' If speed > 120km/h ,the mark the point as error ''' 107 | if abs(speedVal) > 120: 108 | if errorPointPre: 109 | if errorPointPre.gps_UTC_unix_timestamp == \ 110 | currentPoint.gps_UTC_unix_timestamp: 111 | storeErrorPoint(currentPoint); 112 | errorPointPre = None 113 | errorCount += 1 114 | else: 115 | errorPointPre = nextPoint 116 | 117 | i += 1 118 | #print "speed = %f km/h" % speedVal 119 | #nextPoint.show() 120 | return errorCount 121 | 122 | def main(): 123 | log_init() 124 | print "Welcome filter_points" 125 | #errorCount = filterUserRecords(0) 126 | #print "There is %d error records" %errorCount 127 | userlist = dbutils.get_total_users_list() 128 | for row in userlist: 129 | print row[0] 130 | errorCount = filterUserRecords(int(row[0])) 131 | print "There is %d error records" %errorCount 132 | def testOneUser(userid = 1): 133 | errorCount = filterUserRecords(userid) 134 | print "There is %d error records in user %d" %(errorCount,userid) 135 | 136 | 137 | def test(): 138 | log_init() 139 | print "Welcome filter_points test" 140 | i =0 141 | errorCount = 0 142 | beginPoint = getNextRecord(0) 143 | beginPoint.show() 144 | beginPoint.save("test.txt") 145 | currentPoint = beginPoint 146 | nextPoint = beginPoint 147 | oneUserTotalNum = dbutils.get_record_total_num(0) 148 | #dbutils.get_gps_record_time_order(0 , 0, oneUserTotalNum+1) 149 | tmp = dbutils.get_gps_record_time_order(0 , oneUserTotalNum+2, 7) 150 | if tmp : 151 | print "error" 152 | else : 153 | print "empty" 154 | 155 | #test() 156 | #main() 157 | testOneUser(128) 158 | -------------------------------------------------------------------------------- /geolife/get_intresting_spots/optics.py: -------------------------------------------------------------------------------- 1 | ''' 2 | ------------------------------------------------------------------------- 3 | Function: 4 | [RD,CD,order]=optics(x,k) 5 | ------------------------------------------------------------------------- 6 | Aim: 7 | Ordering objects of a data set to obtain the clustering structure 8 | ------------------------------------------------------------------------- 9 | Input: 10 | x - data set (m,n); m-objects, n-variables 11 | k - number of objects in a neighborhood of the selected object 12 | (minimal number of objects considered as a cluster) 13 | ------------------------------------------------------------------------- 14 | Output: 15 | RD - vector with reachability distances (m,1) 16 | CD - vector with core distances (m,1) 17 | order - vector specifying the order of objects (1,m) 18 | ------------------------------------------------------------------------- 19 | Example of use: 20 | x=[randn(30,2)*.4;randn(40,2)*.5+ones(40,1)*[4 4]]; 21 | [RD,CD,order]=optics(x,4) 22 | ------------------------------------------------------------------------- 23 | References: 24 | [1] M. Ankrest, M. Breunig, H. Kriegel, J. Sander, 25 | OPTICS: Ordering Points To Identify the Clustering Structure, 26 | available from www.dbs.informatik.uni-muenchen.de/cgi-bin/papers?query=--CO 27 | [2] M. Daszykowski, B. Walczak, D.L. Massart, Looking for natural 28 | patterns in analytical data. Part 2. Tracing local density 29 | with OPTICS, J. Chem. Inf. Comput. Sci. 42 (2002) 500-507 30 | ------------------------------------------------------------------------- 31 | Written by Michal Daszykowski 32 | Department of Chemometrics, Institute of Chemistry, 33 | The University of Silesia 34 | December 2004 35 | http://www.chemometria.us.edu.pl 36 | 37 | 38 | ported to python Jan, 2009 by Brian H. Clowers, Pacific Northwest National Laboratory. 39 | Dependencies include scipy, numpy, and hcluster. 40 | bhclowers at gmail.com 41 | ''' 42 | 43 | 44 | import numpy as N 45 | import pylab as P 46 | import hcluster as H 47 | 48 | 49 | def optics(x, k, distMethod = 'euclidean'): 50 | if len(x.shape)>1: 51 | m,n = x.shape 52 | else: 53 | m = x.shape[0] 54 | n == 1 55 | 56 | try: 57 | D = H.squareform(H.pdist(x, distMethod)) 58 | distOK = True 59 | except: 60 | print "squareform or pdist error" 61 | distOK = False 62 | 63 | 64 | CD = N.zeros(m) 65 | RD = N.ones(m)*1E10 66 | 67 | for i in xrange(m): 68 | #again you can use the euclid function if you don't want hcluster 69 | # d = euclid(x[i],x) 70 | # d.sort() 71 | # CD[i] = d[k] 72 | 73 | tempInd = D[i].argsort() 74 | tempD = D[i][tempInd] 75 | # tempD.sort() #we don't use this function as it changes the reference 76 | CD[i] = tempD[k]#**2 77 | 78 | 79 | order = [] 80 | seeds = N.arange(m, dtype = N.int) 81 | 82 | ind = 0 83 | while len(seeds) != 1: 84 | # for seed in seeds: 85 | ob = seeds[ind] 86 | seedInd = N.where(seeds != ob) 87 | seeds = seeds[seedInd] 88 | 89 | order.append(ob) 90 | tempX = N.ones(len(seeds))*CD[ob] 91 | tempD = D[ob][seeds]#[seeds] 92 | #you can use this function if you don't want to use hcluster 93 | #tempD = euclid(x[ob],x[seeds]) 94 | 95 | temp = N.column_stack((tempX, tempD)) 96 | mm = N.max(temp, axis = 1) 97 | ii = N.where(RD[seeds]>mm)[0] 98 | RD[seeds[ii]] = mm[ii] 99 | ind = N.argmin(RD[seeds]) 100 | 101 | 102 | order.append(seeds[0]) 103 | RD[0] = 0 #we set this point to 0 as it does not get overwritten 104 | return RD, CD, order 105 | 106 | def euclid(i, x): 107 | """euclidean(i, x) -> euclidean distance between x and y""" 108 | y = N.zeros_like(x) 109 | y += 1 110 | y *= i 111 | if len(x) != len(y): 112 | raise ValueError, "vectors must be same length" 113 | 114 | d = (x-y)**2 115 | return N.sqrt(N.sum(d, axis = 1)) 116 | 117 | 118 | 119 | if __name__ == "__main__": 120 | 121 | testX = N.array([[ 15., 70.], 122 | [ 31., 87.], 123 | [ 45., 32.], 124 | [ 5., 8.], 125 | [ 73., 9.], 126 | [ 32., 83.], 127 | [ 26., 50.], 128 | [ 7., 31.], 129 | [ 43., 97.], 130 | [ 97., 9.]]) 131 | 132 | # mlabOrder = N.array(1,2,6,7,3,8,9,4,5,10) #the order returned by the original MATLAB code 133 | # Remeber MATLAB counts from 1, python from 0 134 | 135 | 136 | P.plot(testX[:,0], testX[:,1], 'ro') 137 | RD, CD, order = optics(testX, 4) 138 | testXOrdered = testX[order] 139 | P.plot(testXOrdered[:,0], testXOrdered[:,1], 'b-') 140 | 141 | print order 142 | 143 | P.show() 144 | -------------------------------------------------------------------------------- /geolife/get_stay_point/get_stay_point.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -* 2 | ###File Name:get_stay_point.py 3 | ###Author:haicg 4 | ###Mail:lihaicg@126.com 5 | ###Created Time: 2014/6/6 15:37:58 6 | ###File Name : get_stay_point.py 7 | #!/usr/bin/python 8 | import sys 9 | sys.path.append("..") 10 | from sql_base import dbutils 11 | from base import base_op 12 | from base import file_op 13 | from base import stay_point 14 | import logging.config 15 | import csv 16 | import pickle 17 | import os 18 | import json 19 | from convert_coordinate import convert_coordinate 20 | 21 | logger = None 22 | 23 | column_name = ("gps_userid", "gps_latitude", "gps_longitude", "gps_code",\ 24 | "gps_altitude", "gps_UTC_timestamp", "gps_UTC_unix_timestamp") 25 | 26 | def log_init(): 27 | global logger 28 | if(logger == None): 29 | logging.config.fileConfig("logger.conf") 30 | logger = logging.getLogger("root") 31 | return logger 32 | 33 | def calc_mean_pos(s_point,tmp_points): 34 | i = 0; 35 | latitude_sum = 0 36 | longitude_sum = 0 37 | altitude_sum = 0 38 | 39 | for p in tmp_points: 40 | latitude_sum = p.gps_latitude + latitude_sum 41 | longitude_sum = p.gps_longitude + longitude_sum 42 | altitude_sum = p.gps_altitude + altitude_sum 43 | i = i + 1 44 | s_point.mean_coordinate_latitude = latitude_sum/i 45 | s_point.mean_coordinate_longtitude = longitude_sum/i 46 | s_point.mean_coordinate_altitude = altitude_sum/i 47 | s_point.arrival_timestamp = tmp_points[0].gps_UTC_unix_timestamp 48 | s_point.leaving_timestamp = tmp_points[i-1].gps_UTC_unix_timestamp 49 | s_point.arrival_point = tmp_points[0].id 50 | s_point.leaving_point = tmp_points[i-1].id 51 | return s_point 52 | 53 | def get_stay_points(userid = 1,max_distence = 0.2, max_speed = 2): 54 | print "Welcome" 55 | #units :km 56 | #userid = 0 57 | gps_obj_list = dbutils.get_gps_record_time_order(userid, 0,-1) 58 | stay_point_list = [] 59 | counts = len(gps_obj_list) 60 | tmp_point_list = [] 61 | i =0 62 | while i < counts : 63 | j = i + 1 64 | point_i =gps_obj_list[i] 65 | k = 0 66 | 67 | #del tmp_point_list[:] 68 | tmp_point_list = [] 69 | tmp_point_list.insert(0, point_i) 70 | while (j < counts) : 71 | point_j = gps_obj_list[j] 72 | euclidean_distence = base_op.get_distance(point_i, point_j) 73 | #print "distence = %f km" % euclidean_distence 74 | k = k + 1 75 | tmp_point_list.insert(k, point_j) 76 | if euclidean_distence > max_distence : 77 | t_diff = point_j.gps_UTC_unix_timestamp - point_i.gps_UTC_unix_timestamp 78 | meanSpeed = euclidean_distence / t_diff * 1000 79 | #print "speed = %f m/s" % meanSpeed 80 | #if t_diff > max_timethreshold: 81 | if meanSpeed < max_speed: 82 | print "distence = %f km" % euclidean_distence 83 | print "speed = %f m/s" % meanSpeed 84 | print "time = %f s" % t_diff 85 | 86 | tmp_point_list.pop(); 87 | s = stay_point.stay_point() 88 | s.userid = userid 89 | calc_mean_pos(s, tmp_point_list) 90 | stay_point_list.append(s) 91 | #dbutils.insert_staypoint(s) 92 | i = j 93 | break 94 | else: 95 | i = j 96 | break; 97 | j = j+1 98 | if j == counts : 99 | s = stay_point.stay_point() 100 | s.userid = userid 101 | calc_mean_pos(s, tmp_point_list) 102 | stay_point_list.append(s) 103 | dbutils.insert_staypoint(s) 104 | i = j 105 | # break; 106 | #for gps_record in gps_obj_list : 107 | # gps_record.show() 108 | #print "\n" 109 | return stay_point_list 110 | 111 | 112 | def get_stay_points_v2(userid = 1,max_distence = 0.2, max_speed = 2, max_time=15*60): 113 | print "Welcome" 114 | #units :km 115 | #userid = 0 116 | gps_obj_list = dbutils.get_gps_record_time_order(userid, 0,-1) 117 | stay_point_list = [] 118 | counts = len(gps_obj_list) 119 | tmp_point_list = [] 120 | i =0 121 | while i < counts : 122 | j = i + 1 123 | point_i =gps_obj_list[i] 124 | k = 0 125 | 126 | #del tmp_point_list[:] 127 | tmp_point_list = [] 128 | tmp_point_list.insert(0, point_i) 129 | while (j < counts) : 130 | point_j = gps_obj_list[j] 131 | euclidean_distence = base_op.get_distance(point_i, point_j) 132 | t_diff = point_j.gps_UTC_unix_timestamp - point_i.gps_UTC_unix_timestamp 133 | #print "distence = %f km" % euclidean_distence 134 | k = k + 1 135 | tmp_point_list.insert(k, point_j) 136 | if t_diff > max_time : 137 | #t_diff = point_j.gps_UTC_unix_timestamp - point_i.gps_UTC_unix_timestamp 138 | meanSpeed = euclidean_distence / t_diff * 1000 139 | #print "speed = %f m/s" % meanSpeed 140 | #if t_diff > max_timethreshold: 141 | if meanSpeed < max_speed: 142 | #print "distence = %f km" % euclidean_distence 143 | #print "speed = %f m/s" % meanSpeed 144 | #print "time = %f s" % t_diff 145 | 146 | tmp_point_list.pop(); 147 | s = stay_point.stay_point() 148 | s.userid = userid 149 | calc_mean_pos(s, tmp_point_list) 150 | stay_point_list.append(s) 151 | #dbutils.insert_staypoint(s) 152 | i = j 153 | break 154 | else: 155 | i = j 156 | break; 157 | j = j+1 158 | if j == counts : 159 | s = stay_point.stay_point() 160 | s.userid = userid 161 | calc_mean_pos(s, tmp_point_list) 162 | stay_point_list.append(s) 163 | #dbutils.insert_staypoint(s) 164 | i = j 165 | # break; 166 | #for gps_record in gps_obj_list : 167 | # gps_record.show() 168 | #print "\n" 169 | return stay_point_list 170 | 171 | def getStayPointsList(stayPointListFile,userid): 172 | if os.path.isfile(stayPointListFile) :#如果不存在就返回False 173 | print "read from local file " 174 | mydb = open(stayPointListFile, 'r') 175 | stay_points_list = pickle.load(mydb) 176 | else: 177 | print "read from mysql" 178 | stay_points_list = get_stay_points_v2(userid) 179 | mydb = open(stayPointListFile, 'w') 180 | pickle.dump(stay_points_list, mydb) 181 | return stay_points_list 182 | 183 | 184 | def convert_staypoint_baidu_corrd(stay_points_list): 185 | orinArry = [] 186 | for s in stay_points_list: 187 | p = [] 188 | p.append(s.mean_coordinate_longtitude) 189 | p.append(s.mean_coordinate_latitude) 190 | orinArry.append(p) 191 | return convert_coordinate.convert_coordinate_batch_array(orinArry) 192 | 193 | 194 | def saveStayPointsToJson(distPointList, filepath='', fileId = 0 ): 195 | if not distPointList: 196 | print "Null Value" 197 | return 198 | datalist = [] 199 | for p in distPointList: 200 | data = [] 201 | data.append(p.gps_longitude); 202 | data.append(p.gps_latitude); 203 | data.append(1); 204 | datalist.append(data); 205 | strTmp = "var data%d =" %fileId 206 | saveDate = {'data':datalist,'total':len(datalist),"rt_loc_cnt":47764510,"errorno":0,"NearestTime":"2014-08-29 15:20:00","userTime":"2014-08-29 15:32:11"} 207 | strTmp += json.dumps(saveDate,sort_keys=False) 208 | with open(filepath, "w") as fp: 209 | fp.write(strTmp) 210 | fp.close() 211 | def saveStayPointsBaiduCoordToJson(stay_points_list,dirName,userid): 212 | stay_points_list_baidu = [] 213 | stay_points_list_baidu = convert_staypoint_baidu_corrd(stay_points_list) 214 | filetype = "" 215 | filepath = "%s/points_staypoints_baidu_%d%s.js" %(dirName, userid,filetype) 216 | saveStayPointsToJson(stay_points_list_baidu, filepath) 217 | 218 | def saveStayPointsToCsv(csv_name, stay_points_list): 219 | with open(csv_name,"wb") as csvfp: 220 | writer = csv.writer(csvfp) 221 | for p in stay_points_list: 222 | writer.writerow([p.mean_coordinate_latitude]+[p.mean_coordinate_longtitude]); 223 | print len(stay_points_list) 224 | csvfp.close() 225 | def printStayPoints(stay_points_list,n): 226 | for i in range(n): 227 | stay_points_list[i].printSelf() 228 | 229 | def sampleImportantSpot(sTime, eTime, minDis, importantSpot,stay_points_list): 230 | sampleRes = [] 231 | disThreh = 200 #unit is mile 232 | n = len(stay_points_list) 233 | i = 0 234 | imLat = importantSpot.mean_coordinate_latitude 235 | imLon = importantSpot.mean_coordinate_longtitude 236 | 237 | while (i < n): 238 | entTime = stay_points_list[i].arrival_timestamp 239 | leaTime =stay_points_list[i].leaving_timestamp 240 | lat = stay_points_list[i].mean_coordinate_latitude 241 | lon = stay_points_list[i].mean_coordinate_longtitude 242 | #unit is mile 243 | dis = base_op.calc_distance(imLat,imLon,lat,lon) 244 | if (dis < disThreh): 245 | j = 0 246 | while(sTime < entTime): 247 | sTime += 60*10 248 | j = j + 1; 249 | if (j== 6) : 250 | sampleRes.append(0) 251 | j = 0 252 | while(sTime < leaTime): 253 | sTime += 60*10*6 254 | sampleRes.append(1) 255 | i += 1 256 | while (sTime < eTime) : 257 | sTime += 60*10*6 258 | sampleRes.append(0) 259 | print len(sampleRes) 260 | return sampleRes 261 | #print sampleRes 262 | 263 | 264 | 265 | 266 | 267 | 268 | def main(): 269 | userid = 128 270 | dirName = "stay_points_dir" 271 | stayPointNumFile = dirName+"/%d_staypoints_num.txt" %userid 272 | stayPointListFile = dirName+"/%d_staypoints_list.txt" %userid 273 | csv_name = dirName+ "/staypoints_%s.csv" %userid 274 | 275 | 276 | if not os.path.exists(dirName): 277 | os.mkdir(dirName) 278 | stay_points_list = getStayPointsList(stayPointListFile, userid) 279 | sTime = 1176483388 280 | eTime = 1299715222 281 | minDis = 200 282 | importantSpot = stay_points_list[0] 283 | #saveStayPointsToCsv(csv_name,stay_points_list) 284 | sampleList = sampleImportantSpot(sTime, eTime, minDis, importantSpot,stay_points_list) 285 | stayPointSampleListFile = dirName+"/%d_staypoints_sample_list_0.txt" %userid 286 | mydb = open(stayPointSampleListFile, 'w') 287 | pickle.dump(sampleList, mydb) 288 | #mydb = open(stayPointNumFile, 'w') 289 | #pickle.dump(len(stay_points_list), mydb) 290 | #saveStayPointsBaiduCoordToJson(stay_points_list, dirName, userid) 291 | #printStayPoints(stay_points_list, 2) 292 | print "successfully!" 293 | 294 | log_init() 295 | main() 296 | 297 | 298 | -------------------------------------------------------------------------------- /geolife/logger.conf: -------------------------------------------------------------------------------- 1 | #reference http://www.red-dove.com/python_logging.html 2 | #logger.conf 3 | ############################################### 4 | [loggers] 5 | keys=root,sql 6 | ############################################### 7 | [handlers] 8 | keys=console,file,sqlfile 9 | 10 | ###################### 11 | [handler_console] 12 | class=StreamHandler 13 | level=DEBUG 14 | formatter=form 15 | args=(sys.stdout,) 16 | 17 | [handler_file] 18 | #class=FileHandler 19 | class=handlers.RotatingFileHandler 20 | level=DEBUG 21 | maxBytes=3145728 22 | formatter=form 23 | args=('Handle.log', 'a') 24 | 25 | [handler_sqlfile] 26 | #class=FileHandler 27 | class=handlers.RotatingFileHandler 28 | level=DEBUG 29 | maxBytes=3145728 30 | formatter=form 31 | args=('SqlHandle.log', 'a') 32 | 33 | ############################### 34 | [logger_root] 35 | level=DEBUG 36 | handlers=console,file 37 | 38 | ############################### 39 | [logger_sql] 40 | level=DEBUG 41 | propagate=1 42 | qualname=sql 43 | channel=sql 44 | parent=root 45 | handlers=console,sqlfile 46 | 47 | ############################################### 48 | [formatters] 49 | keys=form 50 | [formatter_form] 51 | format=%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s 52 | datefmt=%Y-%m-%d %H:%M:%S 53 | -------------------------------------------------------------------------------- /geolife/query_geolife/query_points.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Dec 14 13:31:31 2014 4 | 5 | @author: hyde 6 | """ 7 | 8 | # -*- coding: utf-8 -* 9 | ###File Name:get_stay_point.py 10 | ###Author:haicg 11 | ###Mail:lihaicg@126.com 12 | ###Created Time: 2014/6/6 15:37:58 13 | ###File Name : get_stay_point.py 14 | #!/usr/bin/python 15 | import sys 16 | sys.path.append("..") 17 | from sql_base import dbutils 18 | from convert_coordinate import convert_coordinate 19 | from base import base_op 20 | from base import stay_point 21 | import logging.config 22 | import csv 23 | import json 24 | import os 25 | import pickle 26 | logger = None 27 | 28 | column_name = ("gps_userid", "gps_latitude", "gps_longitude", "gps_code",\ 29 | "gps_altitude", "gps_UTC_timestamp", "gps_UTC_unix_timestamp") 30 | 31 | def log_init(): 32 | global logger 33 | if(logger == None): 34 | logging.config.fileConfig("../logger.conf") 35 | logger = logging.getLogger("root") 36 | return logger 37 | 38 | def chunks(s,step): 39 | lenS=len(s) 40 | return [s[i:min(i+step,lenS)] for i in range(0,lenS,step)] 41 | 42 | def storeVar(): 43 | varFileNamePre = "var_data/%d_var_points" %userid 44 | varFileName = varFileNamePre + '_0.txt' 45 | if os.path.isfile(varFileName) :#如果不存在就返回False 46 | print "read from local file " 47 | mydb = open(varFileName, 'r') 48 | gps_obj_list = pickle.load(mydb) 49 | else: 50 | print "read from mysql" 51 | i = 0 52 | 53 | for gpsTmplist in chunks(gps_obj_list,4000): 54 | varFileName = varFileNamePre + "_%s.txt" %(i) 55 | mydb = open(varFileName, 'w') 56 | pickle.dump(gps_obj_list, mydb) 57 | i += 1 58 | mydb = open("FileNum.txt", 'w') 59 | pickle.dump(i, mydb) 60 | 61 | def savePointsToJson(userid, fileId, distPointList): 62 | datalist = [] 63 | for p in distPointList: 64 | data = [] 65 | data.append(p.gps_longitude); 66 | data.append(p.gps_latitude); 67 | data.append(1); 68 | datalist.append(data); 69 | fileGpoints = "%d_points_dir/points_gps_%d.js" %(userid,fileId) 70 | strTmp = "var data%d =" %fileId 71 | saveDate = {'data':datalist,'total':len(datalist),"rt_loc_cnt":47764510,"errorno":0,"NearestTime":"2014-08-29 15:20:00","userTime":"2014-08-29 15:32:11"} 72 | strTmp += json.dumps(saveDate,sort_keys=False) 73 | with open(fileGpoints,"w") as fp: 74 | fp.write(strTmp) 75 | fp.close() 76 | 77 | def main(): 78 | userid = 128 79 | 80 | gps_obj_list = dbutils.get_gps_record_time_order(userid, 0,-1) 81 | dbutils.close_db(); 82 | print "next" 83 | dirName = "%d_points_dir_baidu_pos" %userid 84 | if not os.path.exists(dirName): 85 | os.mkdir(dirName) 86 | 87 | distPointList = [] 88 | i = 0 89 | j = 0 90 | for p in chunks (gps_obj_list,100): 91 | i = i + 1 92 | distPointList += convert_coordinate.convert_coordinate_post(p) 93 | if i >=1000: 94 | savePointsToJson(userid, j, distPointList) 95 | distPointList = [] 96 | j = j + 1 97 | i = 0 98 | savePointsToJson(userid, j, distPointList) 99 | main() 100 | 101 | 102 | -------------------------------------------------------------------------------- /geolife/sql_base/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | LOG_HANDLE = None 4 | logger = None 5 | def log_init(): 6 | global logger 7 | if(logger == None): 8 | logger = logging.getLogger("root.sql") 9 | return logger 10 | log_init() -------------------------------------------------------------------------------- /geolife/sql_base/dbutils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | from base import gps_record 4 | 5 | import MySQLdb 6 | import os 7 | import logging 8 | 9 | logger = None 10 | 11 | 12 | column_name = ("gps_userid", "gps_latitude", "gps_longitude", "gps_code",\ 13 | "gps_altitude", "gps_UTC_timestamp", "gps_UTC_unix_timestamp","id") 14 | table_name = "geolife" 15 | def log_init(): 16 | global logger 17 | if(logger == None): 18 | logger = logging.getLogger("root.sql.dbutils") 19 | return logger 20 | 21 | #grant all privileges on geolife.`*` to geolife@"%" identified by "geolife" 22 | #FLUSH PRIVILEGES 23 | 24 | dbconn = None 25 | 26 | def connect_db(): 27 | try: 28 | db = MySQLdb.connect(host="localhost" ,user="geolife",passwd="geolife",db="geolife") 29 | return db 30 | except MySQLdb.Error,e: 31 | warnString= "Mysql Error %d: %s" % (e.args[0], e.args[1]) 32 | 33 | log_init().warning(warnString) 34 | print warnString 35 | 36 | if(e.args[0] == 1045 or e.args[0] == 1044): 37 | os._exit(1) 38 | 39 | def query_sql(query_str): 40 | global dbconn 41 | results = None 42 | if (dbconn == None ): 43 | dbconn = connect_db() 44 | if (dbconn) : 45 | try: 46 | cur = dbconn.cursor(); 47 | count = cur.execute(query_str) 48 | results = cur.fetchall() 49 | except MySQLdb.Error,e: 50 | warnString = " Mysql Error sql = %d %s " % (e.args[0],e.args[1]) 51 | log_init().warning(warnString) 52 | sys.exit(1) 53 | return results 54 | 55 | 56 | def close_db(conn=None): 57 | global dbconn 58 | if (conn == None) : 59 | conn = dbconn 60 | if (conn == None) : 61 | return 62 | cursor = conn.cursor() 63 | if(cursor): 64 | cursor.close() 65 | conn.close() 66 | 67 | def insert_into_db(sql,conn=None): 68 | global dbconn 69 | if (conn == None) : 70 | if (dbconn == None ): 71 | conn = connect_db() 72 | else: 73 | conn = dbconn 74 | 75 | if (conn) : 76 | try: 77 | cursor = conn.cursor() 78 | n = cursor.execute(sql) 79 | conn.commit(); 80 | print n 81 | except MySQLdb.Error,e: 82 | #WARNING Mysql Error sql = 1062 Duplicate entry "***" for key 'unique_key' 83 | if (e.args[0] == 1062): 84 | return 0 85 | warnString = " Mysql Error sql = %d %s " % (e.args[0],e.args[1]) 86 | log_init().warning(warnString) 87 | if(e.args[0] == 2006): 88 | return 2 89 | else: 90 | return 0 91 | else : 92 | return 0 93 | 94 | def insert_gps_record(conn, oneRecord): 95 | sql = "INSERT INTO geolife(gps_userid, gps_latitude, gps_longitude, gps_code, gps_altitude, gps_UTC_timestamp, gps_UTC_unix_timestamp) \ 96 | VALUES ('%d', '%f', '%f', '%d', '%f', '%s', '%s')" % \ 97 | (oneRecord.gps_userid,oneRecord.gps_latitude,oneRecord.gps_longitude,oneRecord.gps_code, 98 | oneRecord.gps_altitude, oneRecord.gps_UTC_timestamp, oneRecord.gps_UTC_unix_timestamp) 99 | # print sql 100 | return insert_into_db(sql,conn) 101 | 102 | def insert_staypoint(s_point,conn=None): 103 | sql = "INSERT INTO staypoint(userid, arrival_timestamp, leaving_timestamp, mean_coordinate_latitude,\ 104 | mean_coordinate_longtitude, mean_coordinate_altitude, arrival_point, leaving_point )VALUES ('%d', '%d', '%d', '%f', '%f', '%f','%d','%d')" % \ 105 | (s_point.userid, s_point.arrival_timestamp, s_point.leaving_timestamp, \ 106 | s_point.mean_coordinate_latitude, s_point.mean_coordinate_longtitude, \ 107 | s_point.mean_coordinate_altitude, s_point.arrival_point, s_point.leaving_point) 108 | 109 | return insert_into_db(sql,conn) 110 | 111 | 112 | def query_gps( query_str): 113 | global dbconn 114 | gps_obj_list = [] 115 | if (dbconn == None ): 116 | dbconn = connect_db() 117 | if (dbconn) : 118 | try: 119 | cur = dbconn.cursor(); 120 | count = cur.execute(query_str) 121 | #print 'There is %s rows record' %count 122 | #result = cur.fetchone() 123 | #results = cur.fetchall() 124 | 125 | results = cur.fetchall() 126 | for row in results: 127 | gps_obj = gps_record.gps_record.__init_with_query_sql__(gps_record.gps_record(), row) 128 | gps_obj_list.append(gps_obj); 129 | return gps_obj_list 130 | except MySQLdb.Error,e: 131 | warnString = " Mysql Error sql = %d %s " % (e.args[0],e.args[1]) 132 | log_init().warning(warnString) 133 | sys.exit(1) 134 | 135 | #print results 136 | #conn.commit() 137 | #cur.close() 138 | #dbconn.close() 139 | 140 | ''' 141 | userid : user id 142 | m: the first index 143 | n: the number of elements 144 | ''' 145 | def get_gps_record_time_order(userid, m, n): 146 | displist = "" 147 | 148 | for oneDisp in column_name: 149 | displist = displist + oneDisp + "," 150 | displist = displist.strip(',') 151 | if(n > 0): 152 | sqlStr = 'select %s from geolife where gps_userId =%d order by gps_UTC_unix_timestamp limit %d,%d' %( displist, userid, m, n) 153 | else: 154 | sqlStr = 'select %s from geolife where gps_userId =%d order by gps_UTC_unix_timestamp ' %(displist, userid) 155 | #print sqlStr 156 | #log_init().debug(sqlStr) 157 | return query_gps(sqlStr) 158 | 159 | def get_record_total_num(userid) : 160 | global dbconn 161 | query_str = 'select count(id) from %s where %s = %d' %(table_name, column_name[0], userid) 162 | #print query_str 163 | if (dbconn == None ): 164 | dbconn = connect_db() 165 | if (dbconn) : 166 | try: 167 | cur = dbconn.cursor(); 168 | count = cur.execute(query_str) 169 | #print 'There is %s rows record' %count 170 | result = cur.fetchone() 171 | #print result 172 | #results = cur.fetchall() 173 | except MySQLdb.Error,e: 174 | warnString = " Mysql Error sql = %d %s " % (e.args[0],e.args[1]) 175 | log_init().warning(warnString) 176 | sys.exit(1) 177 | return result[0] 178 | def test(): 179 | displist = "" 180 | for oneDisp in column_name: 181 | displist = displist + oneDisp + "," 182 | displist = displist.strip(',') 183 | sqlStr = 'select %s from geolife where gps_userId =0 limit 10' %displist 184 | print sqlStr 185 | query_gps(sqlStr) 186 | 187 | def get_total_users_list() : 188 | sqlStr = "select distinct gps_userid from %s" %table_name 189 | return query_sql(sqlStr) 190 | #test() 191 | 192 | -------------------------------------------------------------------------------- /geolife/store_geolife/Readme.txt: -------------------------------------------------------------------------------- 1 | The data is from the Microsoft Research. 2 | The source page http://research.microsoft.com/en-us/downloads/b16d359d-d164-469e-9fd4-daa38f2b2e13/ 3 | -------------------------------------------------------------------------------- /geolife/store_geolife/deal_one_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import errno 4 | from base import gps_record 5 | from sql_base import dbutils 6 | import logging 7 | import file_op 8 | 9 | dbConn = None 10 | logger = None 11 | 12 | def log_init(): 13 | global logger 14 | if(logger == None): 15 | logger = logging.getLogger("root.deal_one_file") 16 | return logger 17 | 18 | def setOneFileRecord(filename, userid): 19 | global dbConn 20 | global logger 21 | readAndIstOneFile(filename, userid) 22 | dbConn.commit() 23 | 24 | def insertOneRecord(recordStr, userid): 25 | global dbConn 26 | i = 0 27 | try: 28 | recordObj = gps_record.gps_record.__init_with_txt_record__(gps_record.gps_record(),recordStr, userid) 29 | if(dbConn == None) : 30 | dbConn = dbutils.connect_db() 31 | #if the connectint closed by mysal server ,then open the connection again 32 | while(dbutils.insert_gps_record(dbConn, recordObj) == 2 and i<4): 33 | dbConn = dbutils.connect_db() 34 | i = i+ 1 35 | except ValueError: 36 | log_init().warning("GPS Record Value Error " + userid + recordStr) 37 | 38 | def readAndIstOneFile(filename, userid): 39 | fp = file_op.open_file(filename) 40 | #log_init().warning("GPS File name " + filename) 41 | for i in range(0,6): 42 | fp.readline() 43 | while True: 44 | line = fp.readline() 45 | if(line == ""): 46 | break 47 | insertOneRecord(line, userid) 48 | file_op.close_file(fp) 49 | 50 | def close_conn(): 51 | global dbConn 52 | if(dbConn): 53 | dbutils.close_db(dbConn) 54 | -------------------------------------------------------------------------------- /geolife/store_geolife/file_op.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # File Name : file_op.py 3 | 4 | import errno 5 | def close_file(fp): 6 | try: 7 | fp. close() 8 | except IOError as e: 9 | if e.errno == errno.EACCES: 10 | return "some default data" 11 | # Not a permission error. 12 | raise 13 | 14 | def open_file(filename): 15 | try: 16 | fp = open(filename) 17 | except IOError as e: 18 | if e.errno == errno.EACCES: 19 | return "some default data" 20 | # Not a permission error. 21 | raise IOError 22 | else: 23 | # with fp: 24 | return fp 25 | 26 | def open_file_write(filename): 27 | try: 28 | fp = open(filename, 'w') 29 | except IOError as e: 30 | if e.errno == errno.EACCES: 31 | return "some default data" 32 | # Not a permission error. 33 | raise 34 | else: 35 | # with fp: 36 | return fp 37 | 38 | def store_list(filename, listName, listContext): 39 | fileStr = "" 40 | existFlag = 0 41 | listStr = "" 42 | fp = None 43 | # if(len(listContext) == 0) : 44 | # return ; 45 | 46 | for nodeStr in listContext: 47 | if (isinstance(nodeStr, basestring)): 48 | # if type(nodeStr) is types.StringType: 49 | listStr = listStr + nodeStr + "," 50 | listStr = listStr.strip(',') 51 | try: 52 | fp = open_file(filename) 53 | while True: 54 | line = fp.readline() 55 | ret = line.find("listName:"+ listName) 56 | if(ret == -1): 57 | fileStr = line +fileStr+fp.readline(); 58 | else: 59 | existFlag = 1; 60 | fileStr = line +fileStr+listStr; 61 | fp.readline(); 62 | if(line == ""): 63 | break 64 | print line 65 | except : 66 | print "No list exist"; 67 | finally: 68 | if (fp): 69 | close_file(fp); 70 | if not (existFlag): 71 | fileStr = fileStr + "listName:" + listName + "\n" 72 | fileStr = fileStr + listStr + "\n" 73 | fp = open_file_write(filename) 74 | fp.write(fileStr); 75 | close_file(fp); 76 | 77 | 78 | def get_store_list(filename, listName): 79 | listContext = None 80 | fp = None 81 | try: 82 | fp = open_file(filename) 83 | while True: 84 | line = fp.readline() 85 | ret = line.find("listName:"+ listName) 86 | if(ret == -1): 87 | fp.readline(); 88 | else: 89 | #existFlag = 1; 90 | listContext = fp.readline(); 91 | if(line == ""): 92 | break 93 | print line 94 | except : 95 | print "not exist"; 96 | raise IOError; 97 | finally: 98 | if (fp): 99 | close_file(fp); 100 | return listContext 101 | 102 | -------------------------------------------------------------------------------- /geolife/store_geolife/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("..") 3 | from sql_base import dbutils 4 | import errno 5 | import os 6 | import deal_one_file 7 | import file_op 8 | import logging 9 | import logging.config 10 | 11 | LOG_HANDLE = None 12 | logger = None 13 | def log_init(): 14 | global logger 15 | if(logger == None): 16 | logging.config.fileConfig("../logger.conf") 17 | logger = logging.getLogger("root") 18 | return logger 19 | 20 | def storeFilePathList(name, listContext): 21 | #fp = file_op.open_file_write("filepathList.txt") 22 | file_op.store_list("filepathList.txt", name, listContext); 23 | 24 | def main(): 25 | log_init(); 26 | DataPath = "./Data/" 27 | # getFilePathListL1(); 28 | FilePathListL1Str = None 29 | FilePathListL1 = None 30 | try : 31 | FilePathListL1Str = file_op.get_store_list("filepathList.txt", "FilePathListL1") 32 | if (FilePathListL1Str): 33 | FilePathListL1 = FilePathListL1Str.rstrip().split(","); 34 | else: 35 | return 36 | except IOError: 37 | if not (FilePathListL1Str): 38 | FilePathListL1 = os.listdir(DataPath) #000 001 39 | storeFilePathList("FilePathListL1", FilePathListL1) 40 | FilePathL1 = None 41 | #for FilePathL1 in FilePathListL1: 42 | for i in range (len(FilePathListL1)): 43 | FilePathL1 = FilePathListL1.pop() 44 | FilePathFullL1 = DataPath+FilePathL1 #000 001 45 | if os.path.isdir(FilePathFullL1) : 46 | userId = FilePathL1 47 | try: 48 | int(userId, 10) 49 | except ValueError: 50 | logger.warning("dir error " + FilePathFullL1 ) 51 | storeFilePathList("FilePathListL1", FilePathListL1) 52 | continue 53 | FilePathListL2 = os.listdir(FilePathFullL1) #Trajectory 54 | for FilePathL2 in FilePathListL2: 55 | FilePathFullL2 = FilePathFullL1 + '/' +FilePathL2 #Trajector 56 | try: 57 | FilePathFullL2.index('Trajectory') 58 | except ValueError: 59 | logger.warning("dir error " + FilePathFullL2 ) 60 | continue 61 | print FilePathFullL2 62 | if os.path.isdir(FilePathFullL2) : 63 | FilePathListL3 = os.listdir(FilePathFullL2) #20090428051631.plt 64 | FilePathListL3.sort() 65 | for FilePathL3 in FilePathListL3: 66 | extension = os.path.splitext(FilePathL3) 67 | if extension[1] != '.plt' : 68 | logger.warning("dir error " + FilePathL3 ) 69 | continue; 70 | FilePathFullL3 = FilePathFullL2 + "/" + FilePathL3 #20090428051631.plt 71 | if os.path.isfile(FilePathFullL3) : 72 | deal_one_file.setOneFileRecord(FilePathFullL3, userId) 73 | 74 | #FilePathListL1.remove(FilePathL1) 75 | logger.warning(FilePathListL1); 76 | storeFilePathList("FilePathListL1", FilePathListL1) 77 | print "ok" 78 | 79 | main() 80 | deal_one_file.close_conn(); 81 | print "Store the GPS data Successfully" 82 | -------------------------------------------------------------------------------- /geolife/store_geolife/read_db.py: -------------------------------------------------------------------------------- 1 | import MySQLdb 2 | 3 | def select_points(db, iduser,datestart, dateend): 4 | sql = "SELECT latitude,longitude,date FROM POINT WHERE iduser = '%d' AND date > '%s' AND date < '%s'"%\ 5 | (iduser,datestart.strftime("%Y-%m-%d %H:%M:%S"),dateend.strftime("%Y-%m-%d %H:%M:%S")) 6 | print datestart,dateend 7 | print sql 8 | try: 9 | curs = db.cursor() 10 | curs.execute(sql) 11 | res = curs.fetchall() 12 | except Exception, e: 13 | print e 14 | 15 | return res 16 | 17 | -------------------------------------------------------------------------------- /geolife/store_geolife/respawn.sh: -------------------------------------------------------------------------------- 1 | ###souce file from http://blog.sina.com.cn/s/blog_4c451e0e0100giqg.html 2 | # #! 不是注释符,而是指定脚本由哪个解释器来执行, 3 | # #! 后面有一个空格,空格后面为解释器的全路径且必须正确。 4 | #! /bin/bash 5 | PRO_PATH="" 6 | # testpro 为要守护的可执行程序,即保证它是一直运行的 7 | PROGRAM="main.py" 8 | 9 | # 此脚本一直不停的循环运行,while <条件> 与 do 放在一行上要在条件后加分号 10 | # if、then、while、do等关键字或命令是作为一个新表达式的开头, 11 | # 一个新表达式之前的表达式必须以换行符或分号(;)来结束 12 | # 如果条件不是单个常量或变量而是表达式的话,则要用[]括起来 13 | # while、until与for循环皆以do开始以done结束构成循环体 14 | while true ; do 15 | # 休息10秒以确保要看护的程序运行起来了,这个时间因实际情况而定 16 | # sleep 10 17 | # 单引号''中的$符与\符没有了引用变量和转义的作用,但在双引号""中是可以的! 18 | # 单引号中如果还有单引号,则输出时全部的单引号都将去掉,单引号括住的内容原样输出。 19 | # 例:echo 'have 'test'' --> have test 20 | # ps aux --> a 为显示其他用户启动的进程; 21 | # u 为显示启动进程的用户名与时间; 22 | # x 为显示系统属于自己的进程; 23 | # ps aux | grep 可执行程序名 --> 在得到的当前启动的所有进程信息文本中, 24 | # 过滤出包含有指定文本(即可执行程序名字)的信息文本行 25 | #注:假设 ps aux | grep 可执行程序名 有输出结果,但输出不是一条信而是两条, 26 | # 一个为查找到的包含有指定文本(即可执行程序名字)的信息文本行(以换行符0x10结尾的文本为一行), 27 | # 一个为 grep 可执行程序名 ,即把自己也输出来了, 28 | # 所这条信息是我们不需要的,因为我们只想知指定名字的可执行程序是否启动了 29 | # grep -v 指定文本 --> 输出不包含指定文本的那一行文本信息 30 | # wc -l --> 输出文件中的行数(-l --> 输出换行符统计数) 31 | # ps aux | grep $PROGRAM | grep -v grep | wc -l --> 如果有指定程序名的程序启动的话,结果大于壹 32 | PRO_NOW=`ps aux | grep $PROGRAM | grep -v grep | wc -l` 33 | 34 | # 整数比较:-lt -> 小于,-le -> 小于等于,-gt -> 大于,-ge -> 大于等于,-eq ->等于,-ne -> 不等于 35 | # if [条件] 与 then 放在一行上要在条件后加分号 36 | # 如果当前指定程序启动的个数小于壹的话 37 | if [ $PRO_NOW -lt 1 ]; then 38 | # 0 -> 标准输入,1 -> 标准输出,2 - > 标准错误信息输出 39 | # /dev/null --> Linux的特殊文件,它就像无底洞,所有重定向到它的信息数据都会消失! 40 | # 2 > /dev/null --> 重定向 stderr 到 /dev/null,1 >& 2 --> 重定向 stdout 到 stderr, 41 | # 直接启动指定程序,且不显示任何输出 42 | # 可执行程序后面加空格加&,表示要执行的程序为后台运行 43 | 44 | if [ -e "filepathList.txt" ] 45 | then 46 | echo "file exist" 47 | linenum=`wc -w filepathList.txt|awk -F " " '{print $1}'` 48 | i=2 49 | if [ $linenum -lt $i ] 50 | then 51 | echo "empty" 52 | break 53 | fi 54 | else 55 | echo "no exist" 56 | fi 57 | python $PROGRAM 2>/dev/null 1>&2 & 58 | echo "not empty" 59 | # date >> ./tinfo.log --> 定向输出当前日期时间到文件,添加到文件尾端,如果没有文件,则创建这个文件 60 | date >> ./tinfo.log 61 | # echo "test start" >> ./tinfo.log --> 定向输出 test start 添加到文件尾端 62 | echo "test start" >> ./tinfo.log 63 | echo "restart" 64 | # if 分支结构体结束 65 | fi 66 | 67 | 68 | # 基本与上面的相同,就是多了一个 grep T,其结果为过滤出含 T 字符的信息行 69 | # T --> 进程已停止,D --> 不可中断的深度睡眠,R --> 进程运行或就绪,S --> 可接收信号的睡眠, 70 | # X --> 已完全死掉,Z --> 已完全终止 71 | PRO_STAT=`ps aux|grep $PROGRAM |grep T|grep -v grep|wc -l` 72 | 73 | # 如果指定进程状态为已停止的信息大于零的话 74 | if [ $PRO_STAT -gt 0 ] ; then 75 | # killall --> 用名字方式来杀死进程,-9 --> 即发给程序一个信号值为9的信号,即SIGKILL(非法硬件指令) 76 | # 也可以不指定信号,默认为SIGTERM,即信号值为15 77 | killall -9 $PROGRAM 78 | sleep 2 79 | if [ -e "filepathList.txt" ] 80 | then 81 | linenum=`wc -w filepathList.txt|awk -F " " '{print $1}'` 82 | i=2 83 | if [ $linenum -lt $i ] 84 | then 85 | echo "empty" 86 | break 87 | fi 88 | fi 89 | 90 | python $PROGRAM 2>/dev/null 1>&2 & 91 | echo "not empty" 92 | # date >> ./tinfo.log --> 定向输出当前日期时间到文件,添加到文件尾端,如果没有文件,则创建这个文件 93 | date >> ./tinfo.log 94 | # echo "test start" >> ./tinfo.log --> 定向输出 test start 添加到文件尾端 95 | echo "test start" >> ./tinfo.log 96 | echo "restart" 97 | fi 98 | # while、until与for循环皆以do开始以done结束构成循环体 99 | done 100 | # exit 用来结束脚本并返回状态值,0 - 为成功,非零值为错误码,取值范围为0 ~ 255。 101 | exit 0 102 | -------------------------------------------------------------------------------- /geolife/update_index.sh: -------------------------------------------------------------------------------- 1 | #/bin/sh - 2 | find . -regex '.*\.c\|.*\.cpp\|.*\.h\|.*\.hpp\|.*\.py' > cscope.files 3 | cscope -b -i cscope.files -f cscope.out 4 | 5 | 6 | -------------------------------------------------------------------------------- /schema_sql/geolife.sql: -------------------------------------------------------------------------------- 1 | # Host: 127.0.0.1 (Version: 5.5.37-MariaDB) 2 | # Date: 2014-06-06 15:18:26 3 | # Generator: MySQL-Front 5.3 (Build 4.13) 4 | 5 | /*!40101 SET NAMES utf8 */; 6 | 7 | # 8 | # Source for table "geolife" 9 | # 10 | 11 | CREATE TABLE `geolife` ( 12 | `gps_userid` int(11) DEFAULT NULL, 13 | `gps_latitude` double DEFAULT NULL, 14 | `gps_longitude` double DEFAULT NULL, 15 | `gps_code` int(11) DEFAULT NULL, 16 | `gps_altitude` double DEFAULT NULL, 17 | `gps_UTC_timestamp` timestamp NULL DEFAULT NULL, 18 | `gps_UTC_unix_timestamp` int(11) DEFAULT NULL, 19 | `id` int(11) NOT NULL AUTO_INCREMENT, 20 | PRIMARY KEY (`id`), 21 | UNIQUE KEY `id` (`id`), 22 | UNIQUE KEY `unique_key` (`gps_userid`,`gps_latitude`,`gps_longitude`,`gps_code`,`gps_altitude`,`gps_UTC_timestamp`,`gps_UTC_unix_timestamp`) 23 | ) ENGINE=InnoDB AUTO_INCREMENT=59418621 DEFAULT CHARSET=utf8; 24 | 25 | # 26 | # Source for table "staypoint" 27 | # 28 | 29 | CREATE TABLE `staypoint` ( 30 | `Id` int(11) NOT NULL AUTO_INCREMENT, 31 | `userid` int(11) NOT NULL DEFAULT '0', 32 | `arrival_point` int(11) DEFAULT NULL, 33 | `arrival_timestamp` int(11) DEFAULT NULL, 34 | `leaving_point` int(11) DEFAULT NULL, 35 | `leaving_timestamp` int(11) DEFAULT NULL, 36 | `mean_coordinate_latitude` double DEFAULT NULL, 37 | `mean_coordinate_longtitude` double DEFAULT NULL, 38 | `mean_coordinate_altitude` double DEFAULT NULL, 39 | PRIMARY KEY (`Id`), 40 | KEY `userid` (`userid`), 41 | KEY `leaving_point` (`arrival_point`), 42 | CONSTRAINT `leaving_point` FOREIGN KEY (`arrival_point`) REFERENCES `geolife` (`id`), 43 | CONSTRAINT `arrivel_point` FOREIGN KEY (`arrival_point`) REFERENCES `geolife` (`id`), 44 | CONSTRAINT `userid` FOREIGN KEY (`userid`) REFERENCES `geolife` (`gps_userid`) 45 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 46 | --------------------------------------------------------------------------------