├── .gitignore ├── README.md ├── air_finder.py ├── apis.txt ├── main.py ├── qpx_express.py ├── requirements.txt └── sky_picker.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | config/* 4 | .ropeproject/* 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python Flight Search 2 | ========================= 3 | 4 | A simple and extendable series of Python scripts for searching flight options. Contributions welcome! 5 | 6 | Scripts 7 | -------- 8 | 9 | * `air_finder.py` - A script to simply scrape flights from [airfinder.de](http://airfinder.de) 10 | * `sky_picker.py` - A script to interact with the [skypicker.com](http://skypicker.com) API. [More documentation available here](http://docs.skypickerpublicapi.apiary.io/#reference/flights). Note: this is the only one that supports a flight range (i.e. I want to travel sometime in between these dates...). 11 | * `qpx_express.py` - A script with some simple API, request and response classes for interacting with Google's [QPX Express API](https://developers.google.com/qpx-express/). For more documentation on the format of the request and response, see [the API Reference](https://developers.google.com/qpx-express/v1/). 12 | * `main.py` - Testing by searching them all and comparing in a `pandas` DataFrame. 13 | 14 | Requirements 15 | ------------ 16 | 17 | Install the necessary requirements using the `requirements.txt` file. Not all scripts need all requirements, so please check the script you are interested in using if you'd like to individually install and use the packages. 18 | 19 | The QPX Express API requires an API Key (which you can request on the Google Developer's Console). To use the `main.py` script for comparison, you'll need to have the api key stored in a config folder, in a file called `prod.cfg` (section `qpx`, key `api_key`). Or just modify the `main.py` script to your liking. >.< 20 | 21 | TO DO (feel free to help / chime in!) 22 | ----- 23 | * Get return flight parsing for each working properly 24 | * Investigate child flying options for sky_picker / air_finder 25 | * Active "watching" lookup script (i.e. flight alert) 26 | * Clean up very messy qpx parsing :( 27 | 28 | Questions? 29 | ---------- 30 | /msg kjam on twitter or freenode. 31 | 32 | -------------------------------------------------------------------------------- /air_finder.py: -------------------------------------------------------------------------------- 1 | """ Search flight results on airfinder.de """ 2 | from __future__ import unicode_literals, absolute_import, generators, \ 3 | print_function 4 | 5 | from lxml import html 6 | from urllib import urlencode 7 | import requests 8 | from datetime import datetime, timedelta 9 | import locale 10 | 11 | locale.setlocale(locale.LC_TIME, str('de_DE.utf8')) 12 | 13 | 14 | class AirFinder(object): 15 | """ Air Finder Screen Scraper. """ 16 | def __init__(self): 17 | """ Initialize with base url """ 18 | self.base_url = 'http://www.airfinder.de/Seats.aspx?' 19 | self.date = None 20 | 21 | def search(self, origin, destination, date, num_adults): 22 | """ Search using a simple get request including flight details. 23 | :param origin: string 24 | :param destination: string 25 | :param date: datetime object 26 | :param num_adults: number of adult passengers 27 | """ 28 | params = {'city1': origin, 'city2': destination, 29 | 'date1': date.strftime('%d/%m/%Y'), 30 | 'adults': num_adults, 'ec_search': 1} 31 | self.date = date 32 | response = requests.get('{}{}'.format(self.base_url, urlencode(params))) 33 | return self.parse_response(response) 34 | 35 | def grab_xpath_text(self, element, xpath): 36 | """ Given an element and xpath pattern, return text content. 37 | :param element: lxml element 38 | :param xpath: string 39 | returns string 40 | """ 41 | data = element.xpath(xpath) 42 | if len(data) == 1: 43 | return data[0].text 44 | elif len(data) > 1: 45 | return [x.text for x in data] 46 | return '' 47 | 48 | def parse_response(self, response): 49 | """ Given a requests response object, return a list of dictionaries 50 | containing the pertinent flight info. 51 | :params response: response obj 52 | returns list of dictionaries 53 | """ 54 | page = html.fromstring(response.content) 55 | results = page.xpath('//div[contains(@class, "itemresult")]') 56 | final_results = [] 57 | for res in results: 58 | item_dict = {} 59 | item_dict['price'] = float(self.grab_xpath_text( 60 | res, 'div/div/span[@class="FlightPrice"]').replace(',', '.')) 61 | item_dict['currency'] = self.grab_xpath_text( 62 | res, 'div/div/span[@class="Currency"]') 63 | item_dict['date'] = self.grab_xpath_text( 64 | res, 'div/div/span[contains(@id, "dateLabel")]') 65 | more_flight_info = [r.text for r in res.xpath('div/div/div/span')] 66 | try: 67 | date_str = '{} {}'.format(item_dict['date'], 68 | more_flight_info[0]) 69 | item_dict['departure'] = datetime.strptime(date_str, 70 | '%d %B %Y %H:%M') 71 | except Exception as e: 72 | print('error with locale: %s', e.message) 73 | time_list = more_flight_info[0].split(':') 74 | item_dict['departure'] = self.date + timedelta( 75 | hours=int(time_list[0]), seconds=int(time_list[1]) * 60) 76 | item_dict['num_stops'] = more_flight_info[1] 77 | item_dict['arrival'] = more_flight_info[2] 78 | item_dict['carrier'] = more_flight_info[3] # 4 is dummy text 79 | item_dict['duration'] = more_flight_info[5] 80 | item_dict['duration_hours'] = int(more_flight_info[5].split( 81 | 'h')[0]) + (int(more_flight_info[5].split('h')[1].rstrip( 82 | 'm')) / 60.0) 83 | final_results.append(item_dict) 84 | return final_results 85 | -------------------------------------------------------------------------------- /apis.txt: -------------------------------------------------------------------------------- 1 | https://developers.google.com/qpx-express/v1/requests#Examples 2 | https://github.com/Skyscanner/skyscanner-python-sdk 3 | https://github.com/mayanez/flight_scraper 4 | https://pypi.python.org/pypi/amadeus/0.1.2 5 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ Simple script to compare a very simple search across flight sites. """ 2 | from __future__ import unicode_literals, absolute_import, generators, \ 3 | print_function 4 | 5 | from air_finder import AirFinder 6 | from qpx_express import QPXExpressApi, QPXRequest 7 | from sky_picker import SkyPickerApi 8 | 9 | from ConfigParser import ConfigParser 10 | import pandas as pd 11 | from datetime import datetime 12 | 13 | config = ConfigParser() 14 | config.read(['config/prod.cfg']) 15 | 16 | 17 | def prepare_af_data(results): 18 | """ Prepare airfinder results so they can be easily compared. """ 19 | af_df = pd.DataFrame(results) 20 | af_df['num_stops'] = af_df.num_stops.str.replace(r'[^\d]', '').astype(int) 21 | af_df['search_engine'] = 'airfinder.de' 22 | return af_df 23 | 24 | 25 | def prepare_qpx_data(results): 26 | """ Prepare qpx results so they can be easily compared. """ 27 | qpx_df = pd.DataFrame(results) 28 | qpx_df['num_stops'] = qpx_df['legs'].map(lambda x: len(x) - 1) 29 | qpx_df['search_engine'] = 'QPX Express' 30 | return qpx_df 31 | 32 | 33 | def prepare_sp_data(results): 34 | """ Prepare skypicker results so they can be easily compared. """ 35 | sp_df = pd.DataFrame(results) 36 | sp_df['search_engine'] = 'SkyPicker' 37 | sp_df['num_stops'] = sp_df['legs'].map(lambda x: len(x) - 1) 38 | return sp_df 39 | 40 | 41 | def prepare_final(final_df): 42 | """ Some cleaning for easy sort and comparison. """ 43 | final_df['price'] = final_df.price.astype(float) 44 | final_df['duration_hours'] = final_df.duration_hours.astype(float) 45 | final_df['departure_tod'] = final_df.departure.map(time_of_day) 46 | return final_df 47 | 48 | 49 | def time_of_day(dt_obj): 50 | """ Mapping function to return a string representing time of day for 51 | the departure or arrival datetime objects. """ 52 | if dt_obj.hour < 8 and dt_obj.hour > 2: 53 | return 'early am' 54 | elif dt_obj.hour < 12: 55 | return 'morning' 56 | elif dt_obj.hour < 16: 57 | return 'afternoon' 58 | elif dt_obj.hour < 22: 59 | return 'evening' 60 | return 'late evening' 61 | 62 | 63 | def compare_all(origin, destination, travel_date, num_adults): 64 | """ Call search for each of the flight engines using simple one-way flight. 65 | :param origin: string 66 | :param destination: string 67 | :param travel_date: datetime 68 | :param num_adults: number of adults traveling 69 | """ 70 | af_api = AirFinder() 71 | af_results = af_api.search(origin, destination, travel_date, num_adults) 72 | 73 | qpx_api = QPXExpressApi(api_key=config.get('qpx', 'api_key')) 74 | qpx_request = QPXRequest(origin, destination, travel_date, num_adults) 75 | qpx_resp = qpx_api.search(qpx_request) 76 | qpx_results = qpx_resp.top_flights() 77 | 78 | sp_api = SkyPickerApi() 79 | sp_results = sp_api.search_flights(origin, destination, travel_date, 80 | travel_date, num_adults) 81 | 82 | af_df = prepare_af_data(af_results) 83 | sp_df = prepare_sp_data(sp_results) 84 | qpx_df = prepare_qpx_data(qpx_results) 85 | 86 | final_df = pd.concat([af_df, sp_df, qpx_df], ignore_index=True) 87 | final_df = prepare_final(final_df) 88 | return final_df 89 | 90 | 91 | if __name__ == '__main__': 92 | origin = raw_input('where are you flying from? ') 93 | destination = raw_input('where are you flying to? ') 94 | travel_date = raw_input('what date (%m/%d/%Y format plz)? ') 95 | num_adults = raw_input('number of adults traveling? ') 96 | final = compare_all(origin.strip(), destination.strip(), 97 | datetime.strptime(travel_date, '%m/%d%Y'), 98 | int(num_adults)) 99 | print('lowest prices....') 100 | print(final.sort('price')[:5]) 101 | print('shortest flights....') 102 | print(final.sort('duration_hours')) 103 | -------------------------------------------------------------------------------- /qpx_express.py: -------------------------------------------------------------------------------- 1 | """ Python client to use for requesting Google's QPX Express API. """ 2 | from __future__ import unicode_literals, absolute_import, generators, \ 3 | print_function 4 | 5 | import requests 6 | import json 7 | import re 8 | from datetime import datetime 9 | 10 | 11 | class QPXExpressApi(object): 12 | """ QPX Express API """ 13 | 14 | def __init__(self, api_key=None): 15 | """ API Contrstructor 16 | :param api_key: Google API Key 17 | """ 18 | self.api_key = api_key 19 | self.request_count = 0 20 | self.request_url = 'https://www.googleapis.com/qpxExpress/' + \ 21 | 'v1/trips/search?key={}'.format(api_key) 22 | 23 | def search(self, request): 24 | """ Search the API 25 | :param request: QPXRequest object 26 | 27 | returns QPXResponse object 28 | """ 29 | headers = {'content-type': 'application/json'} 30 | resp = requests.post(self.request_url, data=request.get_json(), 31 | headers=headers) 32 | self.request_count += 1 33 | return QPXResponse(resp.json()) 34 | 35 | def estimate_api_costs(self): 36 | """ Estimate API costs based on current count. """ 37 | return '${.2f}'.format((lambda x: x if x > 0 else 0)( 38 | (self.request_count - 50) * .035)) 39 | 40 | 41 | class QPXRequest(object): 42 | """ QPX Request formatter. """ 43 | def __init__(self, origin, destination, date, num_adults, return_date=None): 44 | """ Create request object. 45 | :param origin: origin airport or IATA code 46 | :param destination: destination airport or IATA code 47 | :param date: datetime object 48 | :param num_adults: integer representing the number of adults 49 | :kwarg return_date: datetime object 50 | """ 51 | self.origin = origin 52 | self.destination = destination 53 | self.date = date 54 | self.return_date = return_date 55 | self.num_adults = num_adults 56 | self.passengers = { 57 | 'kind': 'qpxexpress#passengerCounts', 58 | 'adultCount': num_adults, 59 | } 60 | self.slices = [{ 61 | 'kind': 'qpxexpress#sliceInput', 62 | 'origin': origin, 63 | 'destination': destination, 64 | 'date': date.strftime('%Y-%m-%d') 65 | }] 66 | if return_date: 67 | self.slices.append({ 68 | 'kind': 'qpxexpress#sliceInput', 69 | 'origin': destination, 70 | 'destination': origin, 71 | 'date': return_date.strftime('%Y-%m-%d') 72 | }) 73 | 74 | def add_passengers(self, num_child, num_senior=0, num_inf_lap=0, 75 | num_inf_seat=0): 76 | """ Add passengers to your request. 77 | :param num_child: integer representing number of children 78 | :kwarg num_senior: integer representing number of seniors 79 | :kwarg num_inf_lap: integer representing number of infants in lap 80 | :kwarg num_inf_seat: integer representing number of infants in seats 81 | """ 82 | self.passengers['childCount'] = int(num_child) 83 | self.passengers['seniorCount'] = int(num_senior) 84 | self.passengers['infantInLapCount'] = int(num_inf_lap) 85 | self.passengers['infantInSeatCount'] = int(num_inf_seat) 86 | 87 | def get_json(self): 88 | """ Returns json representation to send to the API.""" 89 | json_format = {'request': {}} 90 | json_format['request']['passengers'] = self.passengers 91 | json_format['request']['slice'] = self.slices 92 | json_format['request']['refundable'] = False 93 | return json.dumps(json_format) 94 | 95 | 96 | class QPXResponse(object): 97 | """ QPX Response object. """ 98 | def __init__(self, json_resp): 99 | self.raw_data = json_resp 100 | self.flight_options = json_resp.get('trips').get('tripOption') 101 | 102 | def sort_by_base_price(self): 103 | """ Sort all flights by base price, putting lowest first. """ 104 | self.flight_options = sorted(self.flight_options, 105 | key=lambda x: float(re.search( 106 | r'\d+', x[ 107 | 'pricing'][0]['baseFareTotal'] 108 | ).group(0))) 109 | 110 | def sort_by_total_price(self): 111 | """ Sort all flights by total price, putting lowest first. """ 112 | self.flight_options = sorted(self.flight_options, 113 | key=lambda x: float(re.search( 114 | r'\d+', x['saleTotal']).group(0))) 115 | 116 | def sort_by_duration(self): 117 | """ Sort all flights by duration, putting shortest first. """ 118 | self.flight_options = sorted(self.flight_options, key=lambda x: 119 | x['slice'][0]['duration']) 120 | 121 | def top_flights(self, num=10, sort='price'): 122 | """ Return a smaller (more readable) dictionary of top cheapest flights. 123 | :kwargs num: integer of how many to show (default: 10) 124 | :kwargs sort: 'price' or 'duration' sort method (default: 'price') 125 | 126 | returns sorted list with some (but not all) details for easy reading 127 | """ 128 | if sort == 'price': 129 | self.sort_by_total_price() 130 | elif sort == 'duration': 131 | self.sort_by_duration() 132 | top_flights = [] 133 | for flight in self.flight_options[:num]: 134 | flight_info = {'price': re.search(r'[\d.]+', 135 | flight.get('saleTotal')).group(), 136 | 'currency': re.search(r'[^\d.]+', 137 | flight.get( 138 | 'saleTotal')).group(), 139 | 'duration': flight['slice'][0]['duration'], 140 | 'duration_hours': flight['slice'][0][ 141 | 'duration'] / 60.0, 142 | 'departure': datetime.strptime( 143 | flight['slice'][0]['segment'][0][ 144 | 'leg'][0]['departureTime'][:15], 145 | '%Y-%m-%dT%H:%S'), 146 | 'arrival': datetime.strptime( 147 | flight['slice'][0]['segment'][-1][ 148 | 'leg'][0]['arrivalTime'][:15], 149 | '%Y-%m-%dT%H:%S'), 150 | 'legs': []} 151 | for segment in flight['slice'][0]['segment']: 152 | flight_info['legs'].append({ 153 | 'origin': segment['leg'][0]['origin'], 154 | 'departure': segment['leg'][0]['departureTime'], 155 | 'arrival': segment['leg'][0]['arrivalTime'], 156 | 'destination': segment['leg'][0]['destination'], 157 | 'carrier': segment['flight']['carrier'], 158 | 'total_duration': (lambda x: segment[x] if x in 159 | segment.keys() else segment['duration'])( 160 | 'connectionDuration') 161 | }) 162 | flight_info['carrier'] = ', '.join(set([c.get('carrier') for c in 163 | flight_info['legs']])) 164 | top_flights.append(flight_info) 165 | return top_flights 166 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse==1.2.1 2 | requests==2.9.1 3 | selenium==2.53.1 4 | skyscanner==1.1.1 5 | wsgiref==0.1.2 6 | -------------------------------------------------------------------------------- /sky_picker.py: -------------------------------------------------------------------------------- 1 | """ Classes and examples for searching for flights using SkyPicker. """ 2 | from __future__ import unicode_literals, absolute_import, generators, \ 3 | print_function 4 | 5 | import requests 6 | from datetime import datetime 7 | 8 | 9 | class SkyPickerApi(object): 10 | """ SkyPicker API. """ 11 | def __init__(self): 12 | """ Initializes the API object with URL attributes. """ 13 | self.base_url = 'https://api.skypicker.com/' 14 | self.path = '' 15 | self.param_str = '' 16 | 17 | @property 18 | def full_url(self): 19 | """ Returns the full URL for requesting the data. """ 20 | return '{}{}{}'.format(self.base_url, self.path, self.param_str) 21 | 22 | def get_request(self): 23 | """ Requests the API endpoint and returns the response """ 24 | headers = {'content-type': 'application/json'} 25 | resp = requests.get(self.full_url, headers=headers) 26 | return resp.json() 27 | 28 | def search_places(self, place_name, locale=None): 29 | """ Finds matching place API ids to use for searches. 30 | :param place_name: string of the place name to search for 31 | :kwarg locale: two letter lowercase locale string 32 | 33 | returns JSON response 34 | """ 35 | self.path = 'places' 36 | self.param_str = '?term={}'.format(place_name) 37 | if locale: 38 | self.param_str += '&locale={}'.format(locale) 39 | return self.get_request() 40 | 41 | def search_flights(self, origin, destination, start_date, end_date, 42 | num_passengers): 43 | """ Searches for flights given a time range and origin and destination. 44 | :param origin: string representing the ID or IATA 45 | :param destination: string representing the ID or IATA 46 | :param start_date: datetime representing first possible travel date 47 | :param end_date: datetime representing last possible travel date 48 | :param num_passengers: integer 49 | 50 | returns JSON response 51 | """ 52 | self.path = 'flights' 53 | self.param_str = '?flyFrom=' + \ 54 | '{}&to={}&dateFrom={}&dateTo={}&passengers={}'.format( 55 | origin, destination, start_date.strftime('%d/%m/%Y'), 56 | end_date.strftime('%d/%m/%Y'), num_passengers) 57 | resp = self.get_request() 58 | flights = [] 59 | for flight in resp.get('data'): 60 | flight_info = { 61 | 'departure': datetime.utcfromtimestamp(flight.get('dTimeUTC')), 62 | 'arrival': datetime.utcfromtimestamp(flight.get('aTimeUTC')), 63 | 'price': flight.get('price'), 64 | 'currency': resp.get('currency'), 65 | 'legs': [] 66 | } 67 | flight_info['duration'] = flight_info['arrival'] - \ 68 | flight_info['departure'] 69 | flight_info['duration_hours'] = (flight_info[ 70 | 'duration'].total_seconds() / 60.0) / 60.0 71 | for route in flight['route']: 72 | flight_info['legs'].append({ 73 | 'carrier': route['airline'], 74 | 'departure': datetime.utcfromtimestamp( 75 | route.get('dTimeUTC')), 76 | 'arrival': datetime.utcfromtimestamp( 77 | route.get('aTimeUTC')), 78 | 'from': '{} ({})'.format(route['cityFrom'], 79 | route['flyFrom']), 80 | 'to': '{} ({})'.format(route['cityTo'], route['flyTo']), 81 | }) 82 | flight_info['carrier'] = ', '.join(set([c.get('carrier') for c 83 | in flight_info['legs']])) 84 | flights.append(flight_info) 85 | return flights 86 | --------------------------------------------------------------------------------