├── .gitignore ├── LICENSE ├── README.md ├── bq_query.sql └── location_parser.py /.gitignore: -------------------------------------------------------------------------------- 1 | API_KEY.secret.txt 2 | inputs 3 | outputs 4 | .pydevproject 5 | .project 6 | 7 | 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 javier ramírez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # google_timeline_parser 2 | parse your google location history (timeline) after exporting from google takeout 3 | 4 | * Export your timeline data with google takeout 5 | * Convert the json into a smaller JSON only with the fields we want using jq `cat LocationHistory.json |jq "[.locations[] | {latitudeE7, longitudeE7, timestampMs}]" > filtered_locations.json ` 6 | * Convert the json summary into CSV with jsonv `cat filtered_locations.json |jsonv latitudeE7,longitudeE7,timestampMs > filtered_locations.csv` 7 | * Import the csv into your db. If using BigQuery you can just copy to gcloud and create an external table pointing to your file. Fields are timestamp(TIMESTAMP), date(DATE), lat(FLOAT),lng(FLOAT), and locality(STRING). All of them are required 8 | * Execute your query. I have an example to calculate whole absence days from the UK based on the collected data 9 | -------------------------------------------------------------------------------- /bq_query.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | MIN(previous_date) AS date_from, 3 | MAX(date) AS date_to, 4 | GREATEST(0, DATEDIFF(CAST(MAX(date) AS timestamp),CAST(MIN(previous_date) AS timestamp)) -1) AS days, 5 | CONCAT('UK>', GROUP_CONCAT(country, '>')) AS itinerary, 6 | trip_no 7 | FROM ( 8 | SELECT 9 | *, 10 | SUM(IF(previous_country='UK',1,0)) OVER (ORDER BY timestamp ASC) AS trip_no 11 | FROM ( 12 | SELECT 13 | *, 14 | REGEXP_EXTRACT(previous_locality, r'(\w+$)') AS previous_country, 15 | REGEXP_EXTRACT(locality, r'(\w+$)') AS country, 16 | FROM ( 17 | SELECT 18 | timestamp, 19 | LAG(date, 1) OVER (ORDER BY timestamp ASC) AS previous_date, 20 | LAG(locality, 1) OVER (ORDER BY timestamp ASC) AS previous_locality, 21 | date, 22 | locality 23 | FROM 24 | [javier-cp300:locations.history] 25 | WHERE 26 | locality <> 'Error' ) ) 27 | WHERE 28 | previous_country <> country ) 29 | GROUP BY 30 | trip_no 31 | ORDER BY 32 | trip_no 33 | -------------------------------------------------------------------------------- /location_parser.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from datetime import datetime, timedelta 3 | import sys 4 | 5 | import requests 6 | 7 | from lxml import etree 8 | 9 | 10 | reload(sys) 11 | sys.setdefaultencoding('utf-8') 12 | 13 | cached_locs = {} 14 | api_key = open('API_KEY.secret.txt').read() 15 | 16 | 17 | #https://gis.stackexchange.com/questions/8650/measuring-accuracy-of-latitude-and-longitude 18 | #for country, 1 decimal digit is more than we need. It will differentiate also cities usually. Check link above 19 | #the more digits, the more precission, but also the more http request and the slower it will be 20 | def key_for(lat,lng): 21 | return str(round(lat,1))+'#'+str(round(lng,1)) 22 | 23 | def city_for(lat,lng, cached_locs): 24 | city = cached_locs.get(key_for(lat, lng)) 25 | if city: 26 | return city 27 | 28 | base_url='https://maps.googleapis.com/maps/api/geocode/json?latlng={},{}&key={}&result_type=locality' 29 | try: 30 | response = requests.get(base_url.format(lat, lng, api_key)).json() 31 | city = response['results'][0]['formatted_address'].replace(',','.') 32 | except: 33 | city = 'Error' 34 | cached_locs[key_for(lat,lng)]=city 35 | return city 36 | 37 | def parse_foursquare_date(date_str): 38 | dt,tz=date_str.rsplit(' ',1) 39 | dt_obj=datetime.strptime(dt,'%a, %d %b %y %H:%M:%S') 40 | tz_delta = timedelta(hours=int(tz[1:3]),minutes=int(tz[3:])) 41 | if tz[0]=='+': 42 | dt_obj-= tz_delta 43 | else: 44 | dt_obj+= tz_delta 45 | return dt_obj 46 | 47 | 48 | with open('inputs/filtered_locations.csv','rb') as csv_in, open('inputs/foursquare.kml', 'rb') as kml_in, open('outputs/output.csv','wb') as csv_out: 49 | c_out=csv.writer(csv_out) 50 | c_in=csv.reader(csv_in) 51 | k_in=csv.reader(kml_in) 52 | 53 | for row in c_in: 54 | lat = int(row[0])/10000000.0 55 | lng = int(row[1])/10000000.0 56 | timestamp = int(row[2]) 57 | date = datetime.fromtimestamp(timestamp/1000.0).strftime('%Y-%m-%d') 58 | city = city_for(lat, lng, cached_locs).encode('utf-8') 59 | if city =="Error": 60 | continue 61 | 62 | c_out.writerow([timestamp,date,lat,lng,city,'location_history', None]) 63 | 64 | tree = etree.parse(kml_in) #("inputs/5000foursquare.kml") 65 | for placemark in tree.findall('.//Placemark'): 66 | name = placemark.find('name').text 67 | lng,lat = placemark.find('Point/coordinates').text.split(',') 68 | lng = float(lng) 69 | lat = float(lat) 70 | foursquare_date = parse_foursquare_date(placemark.find('published').text) 71 | timestamp = (foursquare_date - datetime(1970, 1, 1)).total_seconds() * 1000 72 | date = foursquare_date.strftime('%Y-%m-%d') 73 | city = city_for(lat, lng, cached_locs).encode('utf-8') 74 | if city =="Error": 75 | continue 76 | c_out.writerow([timestamp,date,lat,lng,city,'foursquare',name]) 77 | 78 | 79 | 80 | --------------------------------------------------------------------------------