├── README ├── build_indices.py ├── gtfs └── PUT_GTFS_FILES_HERE ├── gtfssnapshotcalculator.py ├── load_gtfs.py ├── settings.py ├── sql ├── agency.sql ├── calendar.sql ├── calendar_dates.sql ├── routes.sql ├── stop_times.sql ├── stops.sql └── trips.sql └── sql_better └── load.sql /README: -------------------------------------------------------------------------------- 1 | GTFS MySQL IMPORT SCRIPT 2 | Author: Tom Lee (thomas.j.lee@gmail.com) 3 | 4 | This is a simple set of scripts that will import a GTFS dataset into 5 | a lightweight MySQL database. The work is based on Washington, DC's 6 | WMATA GTFS dataset, and has not been thoroughly checked for completeness 7 | against the GTFS spec -- some optional fields or tables may not 8 | currently be supported (but should be trivial to add). 9 | 10 | In addition to the GTFS fields, a number of columns have been created 11 | to assist in the conversion of GTFS's string-based date/time 12 | representations to more useful Unix timestamp-style second counts. 13 | 14 | DEAD-SIMPLE USAGE: 15 | 16 | 1. Create a database, e.g. CREATE DATABASE gtfs 17 | 18 | 2. Run table creation scripts against the database: 19 | 20 | cat sql/*.sql | mysql -p -u USERNAME -h HOST -D gtfs 21 | 22 | 3. Edit settings.py with your mysql details. 23 | 24 | 4. Put your GTFS files into the gtfs/ folder 25 | 26 | 5. Run the import script: 27 | 28 | python load_gtfs.py 29 | 30 | 6. Run the time index creation script: 31 | 32 | python build_indices.py 33 | 34 | 7. Build something neat -------------------------------------------------------------------------------- /build_indices.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | import MySQLdb 4 | import datetime 5 | import time 6 | import settings 7 | 8 | 9 | def convert_date_string_to_timestamp(date_string): 10 | """ converts a string of the format YYYYMMDD into a unix timestamp """ 11 | year = date_string[:4] 12 | month = date_string[4:6] 13 | day = date_string[6:] 14 | d = datetime.datetime(int(year), int(month), int(day)) 15 | return time.mktime(d.timetuple()) 16 | 17 | 18 | def convert_time_string_to_seconds(time_string): 19 | """ converts a string of the format HH:MM:SS into the number of seconds since the start of the day """ 20 | # we can't use the time module because it restricts timetuples to 0..23, etc -- GTFS does not; for time ranges stretching into the next day, you're supposed to use e.g. 23:30-24:30 for a 1-hr 11:30p-12:30a trip 21 | parts = time_string.split(':') 22 | return (int(parts[0])*3600) + (int(parts[1])*60) + int(parts[2]) 23 | 24 | 25 | def calendar(connection): 26 | """ Creates numeric keys for date fields in the calendar table """ 27 | cursor = connection.cursor() 28 | insert_cursor = connection.cursor() 29 | 30 | cursor.execute("SELECT service_id, start_date, end_date FROM calendar;") 31 | row = cursor.fetchone() 32 | while row is not None: 33 | service_id = int(row[0]) 34 | start_date = row[1] 35 | end_date = row[2] 36 | sql = "UPDATE calendar SET start_date_timestamp=%d, end_date_timestamp=%d WHERE service_id=%d AND start_date='%s' AND end_date='%s'" % (convert_date_string_to_timestamp(start_date), convert_date_string_to_timestamp(end_date), service_id, start_date, end_date) 37 | insert_cursor.execute(sql) 38 | row = cursor.fetchone() 39 | 40 | cursor.close() 41 | insert_cursor.close() 42 | 43 | 44 | def calendar_dates(connection): 45 | """ Creates numeric keys for date fields in the calendar_dates table """ 46 | cursor = connection.cursor() 47 | insert_cursor = connection.cursor() 48 | 49 | cursor.execute("SELECT service_id, date FROM calendar_dates;") 50 | row = cursor.fetchone() 51 | while row is not None: 52 | service_id = int(row[0]) 53 | date = row[1] 54 | sql = "UPDATE calendar_dates SET date_timestamp=%d WHERE service_id=%d AND date='%s'" % (convert_date_string_to_timestamp(date), service_id, date) 55 | insert_cursor.execute(sql) 56 | row = cursor.fetchone() 57 | 58 | cursor.close() 59 | insert_cursor.close() 60 | 61 | 62 | def stop_times(connection): 63 | """ Creates numeric keys for date fields in the stop_times table """ 64 | cursor = connection.cursor() 65 | insert_cursor = connection.cursor() 66 | 67 | cursor.execute("SELECT trip_id, arrival_time, departure_time, stop_id FROM stop_times;") 68 | row = cursor.fetchone() 69 | while row is not None: 70 | trip_id = int(row[0]) 71 | arrival_time = row[1] 72 | departure_time = row[2] 73 | stop_id = int(row[3]) 74 | sql = "UPDATE stop_times SET arrival_time_seconds=%d, departure_time_seconds=%d WHERE trip_id=%d AND stop_id=%d AND arrival_time='%s' AND departure_time='%s'" % (convert_time_string_to_seconds(arrival_time), convert_time_string_to_seconds(departure_time), trip_id, stop_id, arrival_time, departure_time) 75 | insert_cursor.execute(sql) 76 | row = cursor.fetchone() 77 | 78 | cursor.close() 79 | insert_cursor.close() 80 | 81 | 82 | def main(): 83 | conn = MySQLdb.connect (host=settings.MYSQL_HOST, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWORD, db=settings.MYSQL_DATABASE) 84 | 85 | print 'processing calendar' 86 | calendar(conn) 87 | 88 | print 'processing calendar_dates' 89 | calendar_dates(conn) 90 | 91 | print 'processing stop_times' 92 | stop_times(conn) 93 | 94 | conn.close() 95 | 96 | print 'done' 97 | 98 | 99 | if __name__ == '__main__': 100 | main() -------------------------------------------------------------------------------- /gtfs/PUT_GTFS_FILES_HERE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sbma44/py-gtfs-mysql/b5567c661db32afc18001f8884045612fb8369d1/gtfs/PUT_GTFS_FILES_HERE -------------------------------------------------------------------------------- /gtfssnapshotcalculator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import MySQLdb 5 | import pickle 6 | import os 7 | from decimal import Decimal 8 | 9 | trips = {} 10 | trip_bounds = {} 11 | 12 | class GTFSSnapshotCalculator(object): 13 | 14 | def __init__(self, service_id): 15 | super(GTFSSnapshotCalculator, self).__init__() 16 | 17 | self.trips = {} 18 | self.trip_bounds = {} 19 | self.service_id = service_id 20 | 21 | self.build_trip_schemes(self.service_id) 22 | 23 | 24 | def build_trip_schemes(self, service_id): 25 | 26 | import settings 27 | 28 | TRIPS_PICKLE_FILE = 'trips.%d.pickle' % service_id 29 | TRIP_BOUNDS_PICKLE_FILE = 'trip_bounds.%d.pickle' % service_id 30 | 31 | if os.path.exists(TRIPS_PICKLE_FILE): 32 | print 'loading trip sequences from file' 33 | f = open(TRIPS_PICKLE_FILE, 'r') 34 | self.trips = pickle.load(f) 35 | f.close() 36 | 37 | else: 38 | 39 | trip_headsigns = {} 40 | 41 | print 'building trip sequences' 42 | 43 | conn = MySQLdb.connect (host=settings.MYSQL_HOST, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWORD, db=settings.MYSQL_DATABASE) 44 | cursor = conn.cursor() 45 | 46 | sql = """ 47 | SELECT 48 | trip_id, trip_headsign 49 | FROM 50 | trips t 51 | WHERE 52 | t.service_id=%d 53 | ORDER BY 54 | t.trip_id ASC 55 | """ % service_id 56 | 57 | cursor.execute(sql.replace("\n", " ")) 58 | while True: 59 | row = cursor.fetchone() 60 | if row is None: 61 | break 62 | 63 | self.trips[int(row[0])] = row[1] 64 | 65 | padding = len(str(len(self.trips))) 66 | i = 0 67 | for trip_id in self.trips: 68 | print '%s/%s %s' % (str(i).zfill(padding), str(len(self.trips)).zfill(padding), self.trips[trip_id]) 69 | 70 | self.trips[trip_id] = [] 71 | 72 | sql = """ 73 | SELECT 74 | s.stop_id, 75 | s.stop_lat, 76 | s.stop_lon, 77 | st.arrival_time_seconds, 78 | st.departure_time_seconds 79 | 80 | FROM 81 | stop_times st 82 | INNER JOIN 83 | stops s 84 | ON s.stop_id=st.stop_id 85 | 86 | WHERE 87 | st.trip_id=%d 88 | 89 | ORDER BY 90 | st.stop_sequence ASC 91 | 92 | """ % trip_id 93 | 94 | cursor.execute(sql.replace("\n", " ")) 95 | while True: 96 | row = cursor.fetchone() 97 | if row is None: 98 | break 99 | 100 | self.trips[trip_id].append({ 101 | 'stop_id': row[0], 102 | 'stop_lat': row[1], 103 | 'stop_lon': row[2], 104 | 'arrival_time_seconds': row[3], 105 | 'departure_time_seconds': row[4] 106 | }) 107 | 108 | i = i + 1 109 | 110 | print 'saving trip sequences to file' 111 | f = open(TRIPS_PICKLE_FILE, 'w') 112 | pickle.dump(self.trips, f) 113 | f.close() 114 | 115 | cursor.close() 116 | conn.close() 117 | 118 | if os.path.exists(TRIP_BOUNDS_PICKLE_FILE): 119 | print 'loading stop time bounds from file' 120 | f = open(TRIP_BOUNDS_PICKLE_FILE, 'r') 121 | self.trip_bounds = pickle.load(f) 122 | f.close() 123 | else: 124 | print 'building stop time bounds' 125 | 126 | conn = MySQLdb.connect (host=settings.MYSQL_HOST, user=settings.MYSQL_USER, passwd=settings.MYSQL_PASSWORD, db=settings.MYSQL_DATABASE) 127 | cursor = conn.cursor() 128 | 129 | for trip_id in self.trips: 130 | self.trip_bounds[trip_id] = (self.trips[trip_id][0]['arrival_time_seconds'], self.trips[trip_id][-1]['departure_time_seconds']) 131 | print 'saving stop time bounds to file' 132 | f = open(TRIP_BOUNDS_PICKLE_FILE, 'w') 133 | pickle.dump(self.trip_bounds, f) 134 | f.close() 135 | 136 | cursor.close() 137 | conn.close() 138 | 139 | def snapshot(self, second): 140 | 141 | bus_locations = [] 142 | 143 | for trip_id in self.trip_bounds: 144 | if second>=self.trip_bounds[trip_id][0] and second<=self.trip_bounds[trip_id][1]: 145 | for stop_i in xrange(0,len(self.trips[trip_id])): 146 | # is the bus at a stop right now? 147 | if second>=self.trips[trip_id][stop_i]['arrival_time_seconds'] and second<=self.trips[trip_id][stop_i]['departure_time_seconds']: 148 | bus_locations.append( (self.trips[trip_id][stop_i]['stop_id'], self.trips[trip_id][stop_i]['stop_id'], self.trips[trip_id][stop_i]['stop_lat'], self.trips[trip_id][stop_i]['stop_lon'], 'AT STOP') ) 149 | break 150 | 151 | # is it between stops? 152 | if stop_i<(len(self.trips[trip_id]) - 1): 153 | if second>self.trips[trip_id][stop_i]['departure_time_seconds'] and second