├── README.md └── traffic_data.py /README.md: -------------------------------------------------------------------------------- 1 | # traffic_data 2 | 3 | Author: Michael Cole 4 | 5 | ## About 6 | 7 | This project takes a list of start addresses as well as a list of end 8 | addresses and compiles traffic data over a period of time into a Pandas 9 | DataFrame. This was born out of a personal use in which I planned to move 10 | to a new city but wanted to become familiarized with traffic times to and 11 | from different cities. 12 | 13 | ## How-To 14 | 15 | Using a text editor, modify these variables: 16 | - **INTERVAL**: Input an integer number of minutes between cycles. 17 | For instance, if you'd like the program to collect data in 20 minutes 18 | intervals, type `20`. 19 | - **DURATION**: Input an integer number of minutes the program should run. 20 | If you'd like for it to run for an entire day - or longer - you can input an 21 | equation using valid Python syntax such as `60 * 24` 22 | *Don't forget order of operations when doing this* 23 | - **START_ADDRESSES**: This is simply a list of start addresses as strings. 24 | This program uses Google Map's API so anything you would typically type into 25 | Google Maps should be sufficient. 26 | - **END_ADDRESSES**: *Exactly as the previous variable* 27 | - **GOOGLE_MAPS_API_KEY**: This is a string of your API key. This can be found 28 | at https://cloud.google.com/maps-platform/ 29 | 30 | ### Sample 31 | 32 | Below would create a Pandas DataFrame from every start address to every end 33 | address. The DataFrame would be updated every 15 minutes for an entire day. 34 | 35 | ```python 36 | INTERVAL = 15 # Fifteen mins 37 | 38 | DURATION = 24 * 60 # One Day 39 | 40 | START_ADDRESSES = [ 41 | '123 Address Lane Foo, CA 12345', 42 | '456 Address Cove Bar, CA 12345', 43 | ] 44 | 45 | END_ADDRESSES = [ 46 | '1400 Suite 600 Address Street Los Angeles', 47 | 'Statue of Liberty', 48 | '42 Python Ave SoftwareLand, WA' 49 | ] 50 | 51 | GOOGLE_MAPS_API_KEY = '4242SUPERSECRETAPIKEY4242' 52 | ``` 53 | 54 | -------------------------------------------------------------------------------- /traffic_data.py: -------------------------------------------------------------------------------- 1 | # traffic_data.py - Program to collect data from various start addresses to 2 | # various end addresses. Designed to run on designated 3 | # intervals for a designated amount of time 4 | # 5 | # Author: Michael Cole 6 | # ============================================================================= 7 | 8 | # Boiler Plate and User-Specified Variables =================================== 9 | from datetime import datetime 10 | import time 11 | import pandas as pd 12 | import googlemaps 13 | 14 | # In Minutes 15 | INTERVAL = # Integer number of minutes between cycles in which travel data 16 | # should be collected 17 | DURATION = # Integer number of minutes until this program terminates 18 | 19 | START_ADDRESSES = [ 20 | # Insert start addresses as strings separated by commas 21 | ] 22 | END_ADDRESSES = [ 23 | # Insert end addresses as strings separated by commas 24 | ] 25 | GOOGLE_MAPS_API_KEY = # String. Visit: https://cloud.google.com/maps-platform/ 26 | 27 | # Variables and Functions ===================================================== 28 | # !!! MODIFY BELOW THIS LINE AT YOUR OWN RISK !!! 29 | # _____________________________________________________________________________ 30 | gmaps = googlemaps.Client(key=GOOGLE_MAPS_API_KEY) 31 | 32 | def get_duration(start_address, end_address): 33 | '''Returns 'duration_in_traffic' from Google API''' 34 | results = gmaps.directions(start_address, end_address, 35 | mode='driving', departure_time=datetime.now()) 36 | # try/except to cover circumstances in which Google API doesn't deliver. 37 | try: 38 | results = results[0]['legs'][0]['duration_in_traffic']['text'] 39 | except: 40 | results = None 41 | return results 42 | 43 | 44 | def get_info(reverse=False): 45 | '''Returns timestamp (datetime.now().ctime()) and a dataframe which can 46 | later be appended in a list of dataframes for concatenation. 47 | Param: 48 | reverse=False : Set True to evaluate trip from work to home 49 | ''' 50 | now = datetime.now().ctime() 51 | df = pd.DataFrame(index=START_ADDRESSES, columns=END_ADDRESSES) 52 | if not reverse: 53 | print() 54 | print('Home to work ==========') 55 | if reverse: 56 | print() 57 | print('Work to home ==========') 58 | for home in df.index: 59 | for work in df.columns: 60 | if not reverse: 61 | duration = get_duration(home, work) 62 | df[work][home] = duration 63 | print(f'{home} to {work}: {duration}') 64 | if reverse: 65 | duration = get_duration(work, home) 66 | df[work][home] = duration 67 | print(f'{work} to {home}: {duration}') 68 | print() 69 | print('----- done -----') 70 | return now, df 71 | 72 | def read_csv(filename='traffic_data_forward.csv', reverse=False): 73 | '''Reads csv into pandas dataframe while preserving the multi-indexing. 74 | Param: 75 | filename='traffic_data_forward.csv' 76 | reverse=False : Set to true in order to read the reverse csv file 77 | ''' 78 | if reverse: 79 | filename='traffic_data_reverse.csv' 80 | df = pd.read_csv(filename, index_col=[0,1]) 81 | return df 82 | 83 | def save_dataframe(dataframe, filename='traffic_data_forward', 84 | reverse=False, csv=True, html=True): 85 | '''Writes the given dataframe to a csv file and html. 86 | Param: 87 | dataframe : The dataframe that will be written to csv 88 | filename='traffic_data_forward.csv' 89 | reverse=False : Set to True if given dataframe is reversed so the csv 90 | file will be saved correctly 91 | csv=True : Set False to avoid saving csv file 92 | html=True : Set False to avoid saving html file 93 | ''' 94 | if reverse: 95 | filename='traffic_data_reverse' 96 | if csv: 97 | dataframe.to_csv(filename+'.csv') 98 | print(f'----- Dataframe saved as {filename}.csv -----') 99 | if html: 100 | dataframe.to_html(filename+'.html') 101 | print(f'----- Dataframe saved as {filename}.html -----') 102 | 103 | def timer_complete(max_seconds): 104 | '''Loops until a timer is complete. Then returns True''' 105 | start_time = time.time() 106 | elapsed_time = start_time - time.time() 107 | while elapsed_time < max_seconds: 108 | print('Checking elapsed time') 109 | time.sleep(60) # check every minute 110 | elapsed_time = time.time() - start_time 111 | return True 112 | 113 | def elapsed_time_complete(max_seconds, start_time): 114 | '''Returns whether or not the elapsed time has exceeded the limit''' 115 | elapsed = time.time() - start_time 116 | return elapsed > max_seconds 117 | 118 | # Scripting =================================================================== 119 | def run_program(): 120 | '''Wrapper function that calls all necessary previous functions.''' 121 | interval_seconds = INTERVAL * 60 # convert to seconds 122 | duration_seconds = DURATION * 60 # convert to seconds 123 | start_time = time.time() 124 | 125 | # Will break when timer completes: 126 | while not elapsed_time_complete(duration_seconds, start_time): 127 | forward_data_dict = {} # Create an empty dictionary 128 | timestamp, dataframe = get_info() # Read data into tuple 129 | forward_data_dict[timestamp] = dataframe # Insert tuple into data 130 | # dictionary 131 | forward_dataframe = pd.concat(forward_data_dict) # Concat dict into 132 | # dataframe - 133 | # (multi-index) 134 | # Repeat for backward data ---- 135 | backward_data_dict = {} # Create an empty dictionary 136 | timestamp, dataframe = get_info(reverse=True) # Read data into 137 | # tuple 138 | backward_data_dict[timestamp] = dataframe # Insert tuple into 139 | # data dictionary 140 | backward_dataframe = pd.concat(backward_data_dict) # Concat dict 141 | # into dataframe (multi-index) 142 | 143 | # Forward: Try to read in old data - if first time, this is skipped 144 | # then write to csv and html 145 | try: 146 | old_forward_dataframe = read_csv() 147 | forward_dataframe = old_forward_dataframe.append(forward_dataframe) 148 | except: 149 | pass 150 | save_dataframe(forward_dataframe) 151 | 152 | # Backward: Repeat with reverse 153 | try: 154 | old_backward_dataframe = read_csv(reverse=True) 155 | backward_dataframe = old_backward_dataframe.append(backward_dataframe) 156 | except: 157 | pass 158 | save_dataframe(backward_dataframe, reverse=True) 159 | while not timer_complete(interval_seconds): 160 | pass 161 | 162 | print('===== Done Collecting Data =====') 163 | 164 | # Main Guard ================================================================== 165 | if __name__ == '__main__': 166 | run_program() 167 | --------------------------------------------------------------------------------