├── .gitignore ├── README.md ├── alert ├── __init__.py └── email_alert.py ├── automation ├── __init__.py ├── scrape_daily.cfg └── scrape_daily.py ├── flight_scraper.cfg.example ├── flight_scraper ├── __init__.py ├── engines │ ├── __init__.py │ ├── flight_stats │ │ ├── __init__.py │ │ └── driver.py │ └── ita_matrix │ │ ├── __init__.py │ │ └── driver.py ├── scraper.py ├── solution_model.py └── utils │ ├── __init__.py │ ├── graph.py │ └── scraper.py ├── setup.py ├── static └── ico │ └── favicon.ico ├── templates ├── base.html ├── calendar_query.html ├── graph.html ├── graph_seats.html ├── index.html ├── query.html └── seats.html ├── test ├── sample_ita_itinerary.json ├── sample_search.json └── sample_search2.json └── web_app.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | build 4 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | flight_scraper 2 | =============== 3 | 4 | This is a quick script that I reversed engineered in order to poll ITA Matrix Airfare Search. It is still a bit rough around the edges as I try to clean it up. The goal is to determine a correlation between seat availability and price fare information to alert me on trends for flight segments. I want to build a cool dashboard in order to feed all this information to me automatically. 5 | 6 | Right now I use ITA Matrix for airfare search & flightstats for seat availability. I'm looking to get information from nome other sites as well in order to get better data. 7 | 8 | The ITA Matrix Airfare Search is a great tool so I suggest you go check it out here: http://matrix.itasoftware.com/ 9 | Also checkout Flightstats (https://flightstats.com) its a great site for finding information about tracking. I use it to monitor whether flights are on-time or not. 10 | 11 | #Installation# 12 | (build steps) 13 | Copy flight_scraper.cfg.example to flight_scraper.cfg 14 | python setup.py build 15 | python setup.py install 16 | 17 | #Dependencies# 18 | * Requests (http://docs.python-requests.org/) 19 | * MongoDB (http://www.mongodb.org/) 20 | * Flask (http://flask.pocoo.org/) 21 | * python-dateutil (http://labix.org/python-dateutil) 22 | * Google Vizualizations API (https://code.google.com/p/google-visualization-python/) 23 | * PhantomJS (http://phantomjs.org/) 24 | * Selenium (http://docs.seleniumhq.org/) 25 | 26 | #TO-DO# 27 | * MapReduce Job to map seat availability to pricing information. 28 | * Integrate with Prediction.io for Analysis. 29 | * Seat map for availability - alert if aisle/window seat becomes available. 30 | * Upgrade list - to track which flights give higher chance of upgrades. 31 | * Add support for non-direct flights 32 | * Calendar automated search support. 33 | * Command Line Interface 34 | * Backtest after enough data is gathered. 35 | 36 | * Add more Search Engine Scrapers 37 | * Add price forecasting from Kayak & Bing 38 | * Output reminders kayak style: 39 | * http://www.kayak.com/images/sample-alerts.gif 40 | * Stylize WebApp for reporting/Dashboard 41 | 42 | -------------------------------------------------------------------------------- /alert/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mayanez' 2 | -------------------------------------------------------------------------------- /alert/email_alert.py: -------------------------------------------------------------------------------- 1 | import mongoengine 2 | import smtplib 3 | from datetime import datetime 4 | 5 | def send_email(user, password, from_addr, to_addr, subject, msg): 6 | server=smtplib.SMTP('smtp.gmail.com:587') 7 | server.starttls() 8 | server.login(user, password) 9 | 10 | senddate=datetime.strftime(datetime.now(), '%Y-%m-%d') 11 | 12 | formatted_message = "Date: %s\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\nX-Mailer: My-Mail\r\n\r\n %s" % (senddate, from_addr, to_addr, subject, msg) 13 | server.sendmail(from_addr, to_addr, formatted_message) 14 | server.quit() 15 | 16 | def send_alert(email, origin, destination, dept_date, ret_date, flights): 17 | """ TODO: Refactor """ 18 | #get_min_price_itinerary(get_itineraries("SEA", "JFK", datetime.strptime("12-13-2013", "%m-%d-%Y"), datetime.strptime("12-15-2013", "%m-%d-%Y"), set([Flight(airline="DL", fno="1542")]))) 19 | 20 | if __name__ == '__main__': 21 | mongoengine.connect('flight_scraper') 22 | print send_alert(None, None, None, None, None, None) -------------------------------------------------------------------------------- /automation/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mayanez' 2 | -------------------------------------------------------------------------------- /automation/scrape_daily.cfg: -------------------------------------------------------------------------------- 1 | [mongodb] 2 | name = flight_scraper 3 | 4 | [email] 5 | username = user 6 | password = pass 7 | 8 | [dates] 9 | end = 1-1-2014 -------------------------------------------------------------------------------- /automation/scrape_daily.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python 2 | import ConfigParser 3 | import datetime 4 | import logging 5 | from mongoengine import * 6 | from dateutil.rrule import * 7 | from flight_scraper.flight_scraper import FlightScraper 8 | from flight_scraper.utils.scraper import generate_date_pairs 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | logger = logging.getLogger(__name__) 12 | Config = ConfigParser.ConfigParser() 13 | Config.read('scrape_daily.cfg') 14 | 15 | def bidirectional_search(origin, dest, until_date): 16 | """ Sample Script for automation.""" 17 | 18 | #Initialize FlightScraper 19 | flight_scraper = FlightScraper() 20 | 21 | MO, TU, WE, TH, FR, SA, SU = tuple(range(7)) 22 | 23 | #Dates to search 24 | weekdays_1 = (FR,SU) 25 | weekdays_2 = (FR,MO) 26 | start_date = __get_start_date() 27 | 28 | #Generates depart_date & return_date pairs in that order from start_date to util_date 29 | date_pairs_1 = generate_date_pairs(DAILY, weekdays_1, start_date, until_date) 30 | date_pairs_2 = generate_date_pairs(DAILY, weekdays_2, start_date, until_date) 31 | 32 | #Search 1 33 | for d in date_pairs_1: 34 | flight_scraper.origin = origin 35 | flight_scraper.destination = dest 36 | flight_scraper.depart_date = d[0] 37 | flight_scraper.return_date = d[1] 38 | 39 | flight_scraper.search_flights() 40 | 41 | #Search 2 42 | for d in date_pairs_2: 43 | flight_scraper.origin = dest 44 | flight_scraper.destination = origin 45 | flight_scraper.depart_date = d[0] 46 | flight_scraper.return_date = d[1] 47 | 48 | flight_scraper.search_flights() 49 | 50 | def __get_start_date(): 51 | 52 | TODAY = datetime.date.today() 53 | start_date = TODAY 54 | 55 | if (TODAY.weekday() == SA or TODAY.weekday() == SU): 56 | start_date = TODAY + datetime.timedelta(days=2) 57 | 58 | if (TODAY.weekday() == MO): 59 | start_date = TODAY + datetime.timedelta(days=1) 60 | 61 | return start_date 62 | 63 | if __name__ == '__main__': 64 | #Connect to MongoDB 65 | connect(Config.get("mongodb", "name")) 66 | 67 | origin = "SEA" 68 | dest = "PDX" 69 | 70 | try: 71 | logger.info("Started at %s" % (datetime.datetime.utcnow())) 72 | bidirectional_search(origin, dest, datetime.datetime.strptime(Config.get("dates", "end"), "%m-%d-%Y")) 73 | except Exception, e: 74 | logger.error(e) 75 | pass 76 | 77 | logger.info("Ended at %s" % (datetime.datetime.utcnow())) -------------------------------------------------------------------------------- /flight_scraper.cfg.example: -------------------------------------------------------------------------------- 1 | [mongodb] 2 | name = flight_scraper 3 | #host = mongodb://localhost/dbname 4 | 5 | [email] 6 | username = user 7 | password = pass 8 | 9 | [webapp] 10 | port = 5454 -------------------------------------------------------------------------------- /flight_scraper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/flight_scraper/__init__.py -------------------------------------------------------------------------------- /flight_scraper/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/flight_scraper/engines/__init__.py -------------------------------------------------------------------------------- /flight_scraper/engines/flight_stats/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mayanez' 2 | -------------------------------------------------------------------------------- /flight_scraper/engines/flight_stats/driver.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import urllib 4 | import datetime 5 | from flight_scraper.solution_model import Seat, Flight, SeatQuery 6 | from selenium import webdriver 7 | 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | class FlightStatsDriver(object): 11 | __logger = logging.getLogger(__name__) 12 | __base_url = "http://www.flightstats.com" 13 | __request_uri = "/go/FlightAvailability/flightAvailability.do" 14 | __http_header = { 15 | 'Host' : 'www.flightstats.com', 16 | 'Content-Type' : 'text/html'} 17 | __params = { 18 | 'departure' : '', 19 | 'airline' : '', 20 | 'arrival' : '', 21 | 'connection' : '', 22 | 'queryDate' : '', #yyyy-mm-dd 23 | 'queryTime' : '2', 24 | 'excludeConnectionCodes' : '', 25 | 'cabinCode' : 'A', 26 | 'numOfSeats' : '1', 27 | 'queryType' : 'D', 28 | 'fareClassCodes' : ''} 29 | __driver = None 30 | 31 | @property 32 | def origin(self): 33 | return self.__params['departure'] 34 | 35 | @origin.setter 36 | def origin(self, origin): 37 | self.__params['departure'] = origin 38 | 39 | @property 40 | def destination(self): 41 | return self.__params['arrival'] 42 | 43 | @destination.setter 44 | def destination(self, destination): 45 | self.__params['arrival'] = destination 46 | 47 | @property 48 | def depart_date(self): 49 | return datetime.datetime.strptime(self.__params['queryDate'], "%Y-%m-%d") 50 | 51 | @depart_date.setter 52 | def depart_date(self, depart_date): 53 | self.__params['queryDate'] = depart_date.strftime("%Y-%m-%d") 54 | 55 | def __init__(self, executable_path, service_log_path): 56 | self.__driver = webdriver.PhantomJS(executable_path=executable_path, service_log_path=service_log_path) 57 | 58 | def __extract_flights_with_seats(self, json_obj): 59 | 60 | flight_list = list() 61 | self.__logger.info('Extracting flights with seats') 62 | for k, results in json_obj.iteritems(): 63 | for k2, flights in results['flights'].iteritems(): 64 | airline = flights['airline'] 65 | fno = flights['flightNumber'] 66 | dep_city = flights['depCode'] 67 | arr_city = flights['arrCode'] 68 | flight = Flight(dep_city=dep_city, arr_city=arr_city, airline=airline, fno=fno, dep_time=self.depart_date) 69 | seats = list() 70 | 71 | for k3, cabin in flights['cabins'].iteritems(): 72 | cabin_code = cabin['code'] 73 | 74 | for fare_class, seat_availability in cabin['fares'].iteritems(): 75 | if seat_availability == "": 76 | seat_availability = 0 77 | else: 78 | seat_availability = int(seat_availability) 79 | 80 | seat = Seat(cabin_code=cabin_code, fare_class=fare_class, availability=seat_availability) 81 | seats.append(seat) 82 | 83 | flight.seats = seats 84 | flight_list.append(flight) 85 | 86 | return flight_list 87 | 88 | def search_seats(self): 89 | params = urllib.urlencode(self.__params) 90 | 91 | request_url = self.__base_url + self.__request_uri +("?%s" % params) 92 | self.__logger.info('Requesting URL: %s' % (request_url)) 93 | self.__driver.get(request_url) 94 | self.__logger.info('Running Javascript to retrieve available routes') 95 | result = self.__driver.execute_script('return JSON.stringify(availRoutes)') 96 | j = json.loads(unicode(result)) 97 | 98 | flight_list = self.__extract_flights_with_seats(j) 99 | self.__logger.info('Saving SeatQuery to Database') 100 | seat_query = SeatQuery(flights=flight_list) 101 | seat_query.save() 102 | self.__logger.info('Quiting the Web Driver') 103 | self.__driver.quit 104 | 105 | return flight_list 106 | -------------------------------------------------------------------------------- /flight_scraper/engines/ita_matrix/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mayanez' 2 | -------------------------------------------------------------------------------- /flight_scraper/engines/ita_matrix/driver.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import datetime 4 | import re 5 | import requests 6 | from abc import abstractmethod 7 | from flight_scraper.solution_model import Flight, Itinerary, CalendarSolution, TripMinimumPrice 8 | from flight_scraper.solution_model import ItaSolution, ItaItinerary, PriceComponent 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | class AbstractItaMatrixDriver(object): 13 | 14 | _logger = logging.getLogger(__name__) 15 | engine = "ITA Matrix" 16 | _base_url = "http://matrix.itasoftware.com" 17 | _request_uri = "/xhr/shop/search?" 18 | _http_header = { 19 | 'Host': 'matrix.itasoftware.com', 20 | 'Content-Type': 'application/x-www-form-urlencoded', 21 | 'Cache-Control': 'no-cache', 22 | 'Content-Length': '0' 23 | } 24 | 25 | def __init__(self, origin, destination, depart_date, return_date, max_stops, airlines): 26 | self.origin = origin 27 | self.destination = destination 28 | self.depart_date = depart_date 29 | self.return_date = return_date 30 | self.max_stops = max_stops 31 | self.airlines = airlines 32 | 33 | @property 34 | def origin(self): 35 | return self._json_request['slices'][0]['origins'][0] 36 | 37 | @origin.setter 38 | def origin(self, origin): 39 | self._json_request['slices'][0]['origins'][0] = origin 40 | self._json_request['slices'][1]['destinations'][0] = origin 41 | 42 | @property 43 | def destination(self): 44 | return self._json_request['slices'][0]['destinations'][0] 45 | 46 | @destination.setter 47 | def destination(self, destination): 48 | self._json_request['slices'][0]['destinations'][0] = destination 49 | self._json_request['slices'][1]['origins'][0] = destination 50 | 51 | @property 52 | def max_stops(self): 53 | return self._json_request['maxStopCount'] 54 | 55 | @max_stops.setter 56 | def max_stops(self, stops): 57 | if stops is None: 58 | stops = 2 59 | self._json_request['maxStopCount'] = stops 60 | 61 | @property 62 | def airlines(self): 63 | return self._json_request['slices'][0]['routeLanguage'] 64 | 65 | @airlines.setter 66 | def airlines(self, airlines): 67 | if airlines is not None: 68 | self._json_request['slices'][0]['routeLanguage'] = airlines 69 | self._json_request['slices'][1]['routeLanguage'] = airlines 70 | 71 | def build_request_url(self): 72 | data = self._base_request + json.dumps(self._json_request) 73 | request_url = self._base_url + self._request_uri + data 74 | print 'Request URl: %s' % (request_url) 75 | return request_url 76 | 77 | def build_solutions(self): 78 | request_url = self.build_request_url() 79 | 80 | self._logger.info('Making request to ITA Matrix: %s', (request_url)) 81 | response = requests.post(request_url, headers=self._http_header) 82 | response_json = json.loads(response.text[4:]) 83 | 84 | print response_json 85 | self._logger.info('Creating objects to insert to database') 86 | return self._parse_solutions(response_json) 87 | 88 | @abstractmethod 89 | def _parse_solutions(self): 90 | raise NotImplementedError('Subclasses must implement _parse_solutions') 91 | 92 | class Slice(object): 93 | def __init__(self, origin, destination, depart_date, airlines=None): 94 | self._json_request = json.loads('{"origins":["PDX"],"originPreferCity":false,"commandLine":"airlines AA DL AS UA",\ 95 | "destinations":["SEA"],"destinationPreferCity":false,"date":"2013-06-07","isArrivalDate":false,\ 96 | "dateModifier":{"minus":0,"plus":0}}') 97 | self.origin = origin 98 | self.destination = destination 99 | self.depart_date = depart_date 100 | self._airlines = None 101 | self.airlines = airlines 102 | 103 | @property 104 | def origin(self): 105 | return self._json_request['origins'][0] 106 | 107 | @origin.setter 108 | def origin(self, origin): 109 | self._json_request['origins'][0] = origin 110 | 111 | @property 112 | def destination(self): 113 | return self._json_request['destinations'][0] 114 | 115 | @destination.setter 116 | def destination(self, destination): 117 | self._json_request['destinations'][0] = destination 118 | 119 | @property 120 | def depart_date(self): 121 | return datetime.datetime.strptime(self._json_request['date'], "%Y-%m-%d") 122 | 123 | @depart_date.setter 124 | def depart_date(self, depart_date): 125 | self._json_request['date'] = depart_date.strftime('%Y-%m-%d') 126 | 127 | @property 128 | def airlines(self): 129 | return ' '.join(self._airlines) 130 | 131 | # return self._airlines 132 | # return self._json_request['commandLine'] 133 | 134 | @airlines.setter 135 | def airlines(self, airlines): 136 | if airlines is None: 137 | self._airlines = list() 138 | return 139 | 140 | self._airlines = [x.strip() for x in re.split('[ ,]', airlines) if x.strip()] 141 | 142 | if airlines is None: 143 | self._json_request['commandLine'] = "" 144 | else: 145 | self._json_request['commandLine'] = "airlines %s" % airlines 146 | 147 | def _build_command_line(self): 148 | route_lang = "" 149 | if (len(self._airlines) > 0): 150 | route_lang = "%s airlines %s" % (route_lang, self.airlines) 151 | 152 | self._json_request['commandLine'] = route_lang 153 | 154 | 155 | class ItaMatrixDriverMulti(AbstractItaMatrixDriver): 156 | _base_request = "name=specificDates&summarizers=carrierStopMatrix"\ 157 | "%2CcurrencyNotice%2CsolutionList%2CitineraryPriceSlider%2C"\ 158 | "itineraryCarrierList%2CitineraryDepartureTimeRanges%2CitineraryArrivalTimeRanges"\ 159 | "%2CdurationSliderItinerary%2CitineraryOrigins%2CitineraryDestinations%2C"\ 160 | "itineraryStopCountList%2CwarningsItinerary&format=JSON&inputs=" 161 | 162 | _json_request = json.loads('{"slices":[],"pax":{"adults":1},"cabin":"COACH","maxStopCount":0,\ 163 | "changeOfAirport":false,"checkAvailability":true,"page":{"size":2000},"sorts":"default"}') 164 | def __init__(self, max_stops): 165 | self.slices = list() 166 | self.max_stops = max_stops 167 | 168 | def add_slice(self, slice): 169 | self.slices.append(slice) 170 | 171 | def add_slice_params(self, origin, destination, depart_date, airlines=None): 172 | self.slices.append(Slice(origin, destination, depart_date, airlines)) 173 | 174 | # TODO: These isn't needed anymore. It's just a hack to get the _parse_solutions method working. 175 | @property 176 | def depart_date(self): 177 | return datetime.datetime.strptime(self._json_request['slices'][0]['date'], "%Y-%m-%d") 178 | # TODO: These isn't needed anymore. It's just a hack to get the _parse_solutions method working. 179 | @property 180 | def return_date(self): 181 | return datetime.datetime.strptime(self._json_request['slices'][-1]['date'], "%Y-%m-%d") 182 | 183 | @property 184 | def max_stops(self): 185 | return self._json_request['maxStopCount'] 186 | 187 | @max_stops.setter 188 | def max_stops(self, stops): 189 | if stops is None: 190 | stops = 2 191 | self._json_request['maxStopCount'] = stops 192 | 193 | def combine_slices(self): 194 | self._json_request['slices'] = [] 195 | for slice in self.slices: 196 | slice._build_command_line() 197 | self._json_request['slices'].append(slice._json_request) 198 | 199 | def build_solutions(self): 200 | self.combine_slices() 201 | return super(ItaMatrixDriverMulti, self).build_solutions() 202 | 203 | def _parse_solutions(self, response_json): 204 | """ 205 | Builds search solution. Adds to MongoDB and returns the Solution object. 206 | FIXME: This method currently assumes direct point-to-point flights. 207 | """ 208 | solution = ItaSolution(engine=self.engine, origin=self.slices[0].origin, destination=self.slices[0].destination, depart_date=self.depart_date, return_date=self.return_date) 209 | solution.min_price = response_json['result']['solutionList']['minPrice'] 210 | solution.session = response_json['result']['session'] 211 | solution.solution_set = response_json['result']['solutionSet'] 212 | 213 | for sol in response_json['result']['solutionList']['solutions']: 214 | itinerary_id = sol['id'] 215 | flight_list = list() 216 | for slice in sol['itinerary']['slices']: 217 | # FIXME: Connecting flights aren't considered; number of flights not considered. 218 | flight_airline = slice['flights'][0][:2] 219 | flight_number = int(slice['flights'][0][2:]) 220 | # FIXME: UTC time might be important 221 | dep_time = datetime.datetime.strptime(slice['departure'][:-6], "%Y-%m-%dT%H:%M") 222 | arr_time = datetime.datetime.strptime(slice['arrival'][:-6], "%Y-%m-%dT%H:%M") 223 | arr_city = slice['destination']['code'] 224 | dep_city = slice['origin']['code'] 225 | 226 | flight = Flight(airline=flight_airline, fno=flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time) 227 | flight.save() 228 | 229 | flight_list.append(flight) 230 | 231 | price = sol['displayTotal'] 232 | price_per_mile = sol['ext']['pricePerMile'] 233 | distance = sol['itinerary']['distance']['value'] 234 | itinerary = ItaItinerary(flights=flight_list, price=price, price_per_mile=price_per_mile, ext_id=itinerary_id, distance=distance) 235 | solution.itineraries.append(itinerary) 236 | 237 | solution.save() 238 | 239 | return solution 240 | 241 | class ItaMatrixDriver(AbstractItaMatrixDriver): 242 | 243 | _base_request = "name=specificDates&summarizers=carrierStopMatrix"\ 244 | "%2CcurrencyNotice%2CsolutionList%2CitineraryPriceSlider%2C"\ 245 | "itineraryCarrierList%2CitineraryDepartureTimeRanges%2CitineraryArrivalTimeRanges"\ 246 | "%2CdurationSliderItinerary%2CitineraryOrigins%2CitineraryDestinations%2C"\ 247 | "itineraryStopCountList%2CwarningsItinerary&format=JSON&inputs=" 248 | 249 | _json_request = json.loads('{"slices":[{"origins":["PDX"],"originPreferCity":false,"commandLine":"airlines AA DL AS UA",\ 250 | "destinations":["SEA"],"destinationPreferCity":false,"date":"2013-06-07","isArrivalDate":false,\ 251 | "dateModifier":{"minus":0,"plus":0}},{"destinations":["PDX"],"destinationPreferCity":false,\ 252 | "origins":["SEA"],"originPreferCity":false,"commandLine":"airlines AA DL AS","date":"2013-06-09",\ 253 | "isArrivalDate":false,"dateModifier":{"minus":0,"plus":0}}],"pax":{"adults":1},"cabin":"COACH","maxStopCount":0,\ 254 | "changeOfAirport":false,"checkAvailability":true,"page":{"size":2000},"sorts":"default"}') 255 | 256 | def __init__(self, origin, destination, depart_date, return_date, max_stops=None, airlines=None): 257 | super(ItaMatrixDriver, self).__init__(origin, destination, depart_date, return_date, max_stops, airlines) 258 | 259 | @property 260 | def depart_date(self): 261 | return datetime.datetime.strptime(self._json_request['slices'][0]['date'], "%Y-%m-%d") 262 | 263 | @depart_date.setter 264 | def depart_date(self, depart_date): 265 | self._json_request['slices'][0]['date'] = depart_date.strftime('%Y-%m-%d') 266 | 267 | @property 268 | def return_date(self): 269 | return datetime.datetime.strptime(self._json_request['slices'][1]['date'], "%Y-%m-%d") 270 | 271 | @return_date.setter 272 | def return_date(self, return_date): 273 | self._json_request['slices'][1]['date'] = return_date.strftime('%Y-%m-%d') 274 | 275 | @property 276 | def airlines(self): 277 | return self._json_request['slices'][0]['commandLine'] 278 | 279 | @airlines.setter 280 | def airlines(self, airlines): 281 | if airlines is None: 282 | self._json_request['commandLine'] = "" 283 | else: 284 | self._json_request['slices'][0]['commandLine'] = "airlines %s" % airlines 285 | self._json_request['slices'][1]['commandLine'] = "airlines %s" % airlines 286 | 287 | def _parse_solutions(self, response_json): 288 | """ 289 | Builds search solution. Adds to MongoDB and returns the Solution object. 290 | FIXME: This method currently assumes direct point-to-point flights. 291 | """ 292 | solution = ItaSolution(engine=self.engine, origin=self.origin, destination=self.destination, depart_date=self.depart_date, return_date=self.return_date) 293 | solution.min_price = response_json['result']['solutionList']['minPrice'] 294 | solution.session = response_json['result']['session'] 295 | solution.solution_set = response_json['result']['solutionSet'] 296 | 297 | for sol in response_json['result']['solutionList']['solutions']: 298 | itinerary_id = sol['id'] 299 | origin_flight_airline = sol['itinerary']['slices'][0]['flights'][0][:2] 300 | origin_flight_number = int(sol['itinerary']['slices'][0]['flights'][0][2:]) 301 | dep_time = datetime.datetime.strptime(sol['itinerary']['slices'][0]['departure'][:-6], "%Y-%m-%dT%H:%M") 302 | arr_time = datetime.datetime.strptime(sol['itinerary']['slices'][0]['arrival'][:-6], "%Y-%m-%dT%H:%M") 303 | arr_city = sol['itinerary']['slices'][0]['destination']['code'] 304 | dep_city = sol['itinerary']['slices'][0]['origin']['code'] 305 | 306 | origin_flight = Flight(airline=origin_flight_airline, fno=origin_flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time) 307 | origin_flight.save() 308 | 309 | return_flight_airline = sol['itinerary']['slices'][1]['flights'][0][:2] 310 | return_flight_number = int(sol['itinerary']['slices'][1]['flights'][0][2:]) 311 | dep_time = datetime.datetime.strptime(sol['itinerary']['slices'][1]['departure'][:-6], "%Y-%m-%dT%H:%M") 312 | arr_time = datetime.datetime.strptime(sol['itinerary']['slices'][1]['arrival'][:-6], "%Y-%m-%dT%H:%M") 313 | arr_city = sol['itinerary']['slices'][1]['destination']['code'] 314 | dep_city = sol['itinerary']['slices'][1]['origin']['code'] 315 | 316 | return_flight = Flight(airline=return_flight_airline, fno=return_flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time) 317 | return_flight.save() 318 | 319 | flight_list = [origin_flight, return_flight] 320 | price = sol['displayTotal'] 321 | itinerary = ItaItinerary(flights=flight_list, price=price, ext_id=itinerary_id) 322 | solution.itineraries.append(itinerary) 323 | 324 | solution.save() 325 | 326 | return solution 327 | 328 | class CalendarItaMatrixDriver(AbstractItaMatrixDriver): 329 | 330 | _base_request = "name=calendar&summarizers=currencyNotice%2CovernightFlightsCalendar"\ 331 | "%2CitineraryStopCountList%2CitineraryCarrierList%2Ccalendar&format=JSON&inputs=" 332 | 333 | _json_request = json.loads('{"slices":[{"origins":["BWI"],"originPreferCity":false,"routeLanguage":"C:DL","destinations":["MSP"],\ 334 | "destinationPreferCity":false},{"destinations":["BWI"],"destinationPreferCity":false,"origins":["MSP"],\ 335 | "originPreferCity":false,"routeLanguage":"C:DL"}],"startDate":"2014-07-01","layover":{"max":5,"min":4},\ 336 | "pax":{"adults":1},"cabin":"COACH","maxStopCount":0,"changeOfAirport":false,"checkAvailability":true,\ 337 | "firstDayOfWeek":"SUNDAY","endDate":"2014-08-01"}') 338 | 339 | def __init__(self, origin, destination, depart_date, return_date, day_range, max_stops=None, airlines=None): 340 | super(CalendarItaMatrixDriver, self).__init__(origin, destination, depart_date, return_date, max_stops, airlines) 341 | self.day_range = day_range 342 | 343 | @property 344 | def depart_date(self): 345 | return datetime.datetime.strptime(self._json_request['startDate'], "%Y-%m-%d") 346 | 347 | @depart_date.setter 348 | def depart_date(self, depart_date): 349 | self._json_request['startDate'] = depart_date.strftime('%Y-%m-%d') 350 | 351 | @property 352 | def return_date(self): 353 | return datetime.datetime.strptime(self._json_request['endDate'], "%Y-%m-%d") 354 | 355 | @return_date.setter 356 | def return_date(self, return_date): 357 | self._json_request['endDate'] = return_date.strftime('%Y-%m-%d') 358 | 359 | @property 360 | def day_range(self): 361 | return self._json_request['layover'] 362 | 363 | @day_range.setter 364 | def day_range(self, days): 365 | self._json_request['layover'] = {'min': days[0], 'max': days[1]} 366 | 367 | def _parse_solutions(self, response_json): 368 | self._logger.info('Creating objects to insert to database') 369 | solution = CalendarSolution(engine=self.engine, origin=self.origin, destination=self.destination, 370 | depart_date=self.depart_date, return_date=self.return_date) 371 | 372 | prices = [] 373 | for month in response_json['result']['calendar']['months']: 374 | for week in month['weeks']: 375 | for day in week['days']: 376 | if day['solutionCount'] == 0: 377 | continue 378 | for sol in day['tripDuration']['options']: 379 | 380 | dep_time = datetime.datetime.strptime(sol['solution']['slices'][0]['departure'][:10], "%Y-%m-%d").date() 381 | arr_time = datetime.datetime.strptime(sol['solution']['slices'][1]['departure'][:10], "%Y-%m-%d").date() 382 | price = sol['minPrice'] 383 | trip = TripMinimumPrice(dep_city=self.origin, arr_city=self.destination, dep_time=dep_time, arr_time=arr_time, price=price) 384 | prices.append(float(price.replace('USD', ''))) #FIXME: Can't assume USD 385 | 386 | solution.trip_prices.append(trip) 387 | 388 | solution.min_price = str(min(prices)) 389 | solution.save() 390 | 391 | return solution 392 | 393 | class ViewItineraryDriver(object): 394 | 395 | _logger = logging.getLogger(__name__) 396 | engine = "ITA Matrix" 397 | _base_url = "http://matrix.itasoftware.com" 398 | _request_uri = "/xhr/shop/summarize?" 399 | _http_header = { 400 | 'Host': 'matrix.itasoftware.com', 401 | 'Content-Type': 'application/x-www-form-urlencoded', 402 | 'Cache-Control': 'no-cache', 403 | 'Content-Length': '0' 404 | } 405 | 406 | _base_request = "solutionSet="\ 407 | "&session="\ 408 | "&summarizers=currencyNotice%2CbookingDetails"\ 409 | "&format=JSON"\ 410 | "&inputs=" 411 | 412 | _json_request = json.loads('{"slices":[{"origins":["VRN"],"originPreferCity":false,"commandLine":"airlines AA BA DL",\ 413 | "destinations":["SEA","YVR"],"destinationPreferCity":false,"date":"2014-10-20","isArrivalDate":false,\ 414 | "dateModifier":{"minus":0,"plus":0}},{"origins":["YVR","SEA"],"originPreferCity":false,"routeLanguage":"X+",\ 415 | "destinations":["VRN"],"destinationPreferCity":false,"date":"2014-11-07","isArrivalDate":false,\ 416 | "dateModifier":{"minus":0,"plus":0}}],\ 417 | "pax":{"adults":1,"children":0,"seniors":0,"infantsInSeat":0,"youth":0,"infantsInLap":0},\ 418 | "cabin":"COACH","changeOfAirport":true,"checkAvailability":true,"currency":"USD","salesCity":"MIL",\ 419 | "page":{"size":30},"sorts":"default","solution":"0EI4mYoNuxZ6UnAPrvqq47B/Jpt7IVb5Qv8NBWx8cO9e4K004"}') 420 | 421 | def __init__(self, itinerary, session, solutionSet): 422 | self.slices = list() 423 | self.itinerary = itinerary 424 | self.session = session 425 | self.solutionSet = solutionSet 426 | pass 427 | 428 | @property 429 | def session(self): 430 | return self._session 431 | @session.setter 432 | def session(self, session): 433 | self._session = session 434 | self._base_request = re.sub('session=[^&]*', "session=%s" % session, self._base_request) 435 | 436 | @property 437 | def solutionSet(self): 438 | return self._solutionSet 439 | @solutionSet.setter 440 | def solutionSet(self, solutionSet): 441 | self._solutionSet = solutionSet 442 | self._base_request = re.sub('solutionSet=[^&]*', "solutionSet=%s" % solutionSet, self._base_request) 443 | 444 | @property 445 | def itinerary(self): 446 | return self._itinerary 447 | @itinerary.setter 448 | def itinerary(self, itinerary): 449 | self._itinerary = itinerary 450 | self.itinerary_to_slices(itinerary) 451 | 452 | def itinerary_to_slices(self, itinerary): 453 | for flight in itinerary.flights: 454 | self.slices.append(Slice(flight.dep_city, flight.arr_city, flight.dep_time, flight.airline)) 455 | 456 | def _build_session_handle(self): 457 | self._json_request['solution'] = "%s/%s" % (self.solutionSet, self.itinerary.ext_id) 458 | 459 | def build_request_url(self): 460 | self._build_session_handle() 461 | data = self._base_request + json.dumps(self._json_request) 462 | request_url = self._base_url + self._request_uri + data 463 | print 'Request URL: %s' % (request_url) 464 | return request_url 465 | 466 | def build_itinerary_breakdown(self): 467 | request_url = self.build_request_url() 468 | 469 | self._logger.info('Making request to ITA Matrix: %s', (request_url)) 470 | response = requests.post(request_url, headers=self._http_header) 471 | response_json = json.loads(response.text[4:]) 472 | 473 | print response_json 474 | self._logger.info('Creating objects to insert to database') 475 | return self._parse_breakdown(response_json) 476 | 477 | def _parse_breakdown(self, response_json): 478 | # Base fares 479 | for base_fare in response_json['result']['bookingDetails']['tickets'][0]['pricings'][0]['fares']: 480 | rate_code = base_fare['code'] 481 | price = base_fare['displayAdjustedPrice'] 482 | key = base_fare['key'] 483 | ori_city = base_fare['originCity'] 484 | arr_city = base_fare['destinationCity'] 485 | 486 | pc = PriceComponent(rate_code=rate_code, price=price, key=key, description="%s-%s" % (ori_city, arr_city)) 487 | self.itinerary.base_fares.append(pc) 488 | 489 | # Taxes 490 | for tax_item in response_json['result']['bookingDetails']['tickets'][0]['pricings'][0]['ext']['taxTotals']: 491 | # {'code': 'US', 'tax': {'name': 'US Transportation Tax', 'key': '0/0'}, 'totalDisplayPrice': 'USD44.81'}, 492 | rate_code = tax_item['code'] 493 | price = tax_item['totalDisplayPrice'] 494 | key = tax_item['tax']['key'] 495 | description = tax_item['tax']['name'] 496 | 497 | pc = PriceComponent(rate_code=rate_code, price=price, key=key, description=description) 498 | self.itinerary.taxes.append(pc) 499 | 500 | total_price = response_json['result']['bookingDetails']['tickets'][0]['displayPrice'] 501 | distance = response_json['result']['bookingDetails']['itinerary']['distance']['value'] 502 | 503 | self.itinerary.distance = distance 504 | 505 | return self.itinerary -------------------------------------------------------------------------------- /flight_scraper/scraper.py: -------------------------------------------------------------------------------- 1 | from flight_scraper.solution_model import Solution, ItaSolution, CalendarSolution, SeatQuery, Itinerary, ItaItinerary 2 | from engines.ita_matrix.driver import ItaMatrixDriver, ItaMatrixDriverMulti, CalendarItaMatrixDriver, Slice, ViewItineraryDriver 3 | from datetime import date, timedelta 4 | 5 | class FlightScraper(object): 6 | 7 | def __init__(self, origin, destination, depart_date, return_date, 8 | max_stops=None, day_range=None, airlines=None): 9 | self.origin = origin 10 | self.destination = destination 11 | self.depart_date = depart_date 12 | self.return_date = return_date 13 | self.day_range = day_range 14 | self.max_stops = max_stops 15 | self.airlines = airlines 16 | 17 | def search_flights(self): 18 | ita_driver = ItaMatrixDriver(self.origin, self.destination, self.depart_date, self.return_date, self.max_stops, self.airlines) 19 | return ita_driver.build_solutions() 20 | 21 | def search_calendar(self): 22 | ita_driver = CalendarItaMatrixDriver(self.origin, self.destination, self.depart_date, self.return_date, 23 | day_range=self.day_range, max_stops=self.max_stops, airlines=self.airlines) 24 | return ita_driver.build_solutions() 25 | 26 | def minimum_trips(self): 27 | """ 28 | Returns a CalendarSolution object from MongoDB 29 | """ 30 | return CalendarSolution.objects(origin=self.origin, destination=self.destination, 31 | depart_date=self.depart_date, return_date=self.return_date) 32 | 33 | def solutions(self): 34 | """ 35 | Returns a Solution object from MongoDB 36 | """ 37 | return Solution.objects(depart_date=self.depart_date, return_date=self.return_date, 38 | origin=self.origin, destination=self.destination) 39 | 40 | def itineraries(self, flights_to_match): 41 | results = list() 42 | solutions = self.solutions() 43 | 44 | for sol in solutions: 45 | itineraries = sol.itineraries 46 | for itinerary in itineraries: 47 | flights = set(itinerary.flights) 48 | matched = flights.intersection(flights_to_match) 49 | if len(matched) > 0: 50 | results.append(itinerary) 51 | 52 | return results 53 | 54 | def __get_seats(self, date): 55 | seat_query = SeatQuery.objects(flights__dep_city=self.__origin, flights__arr_city=self.__destination, flights__dep_time=date) 56 | return seat_query 57 | 58 | def departure_seats(self): 59 | return self.__get_seats(self.__depart_date) 60 | 61 | def return_seats(self): 62 | return self.__get_seats(self.__return_date) 63 | 64 | class FlightScraperMulti(object): 65 | 66 | def __init__(self, max_stops=None): 67 | #self.origin = None 68 | #self.destination = None 69 | #self.depart_date = None 70 | #self.return_date = None 71 | #self.day_range = None 72 | self.max_stops = max_stops 73 | #self.airlines = None 74 | self._ita_driver = ItaMatrixDriverMulti(self.max_stops) 75 | 76 | def add_flight(self, origin, destination, depart_date, airlines=None): 77 | self._ita_driver.add_slice_params(origin, destination, depart_date, airlines) 78 | 79 | def search_flights(self): 80 | return self._ita_driver.build_solutions() 81 | 82 | def scrape_return(): 83 | scraper = FlightScraper('SFO', 'SEA', date.today() + timedelta(days=30), date.today() + timedelta(days=47)) 84 | solution = scraper.search_flights() 85 | 86 | return solution 87 | 88 | def scrape_multi(): 89 | from datetime import date 90 | scraper = FlightScraperMulti() 91 | scraper.add_flight('SFO', 'SEA', date.today() + timedelta(days=30), airlines="AA DL AC") 92 | scraper.add_flight('SEA', 'PHX', date.today() + timedelta(days=40)) 93 | scraper.add_flight('PHX', 'SFO', date.today() + timedelta(days=47)) 94 | solution = scraper.search_flights() 95 | 96 | return solution 97 | 98 | def scrape_itinerary(solution, itinerary): 99 | ita_driver = ViewItineraryDriver(itinerary, solution.session, solution.solution_set) 100 | it_details = ita_driver.build_itinerary_breakdown() 101 | 102 | # Update in the solution 103 | return it_details 104 | 105 | if __name__=="__main__": 106 | import ConfigParser 107 | import mongoengine 108 | 109 | Config = ConfigParser.ConfigParser() 110 | if Config.read('flight_scraper.cfg')==[]: 111 | print "Please copy flight_scraper.cfg.example to flight_scraper.cfg" 112 | raise Exception('Could not read config file') 113 | 114 | try: 115 | host_string=Config.get("mongodb", "host") 116 | mongoengine.connect(Config.get("mongodb", "name"),host=host_string) 117 | except ConfigParser.NoOptionError: 118 | mongoengine.connect(Config.get("mongodb", "name")) 119 | 120 | solution = scrape_multi() 121 | #solution = ItaSolution.objects().limit(1).next() 122 | itinerary = solution.itineraries[-1] 123 | 124 | it_details = scrape_itinerary(solution, itinerary) 125 | -------------------------------------------------------------------------------- /flight_scraper/solution_model.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import urllib 3 | 4 | from mongoengine import * 5 | 6 | class Seat(EmbeddedDocument): 7 | cabin_code = StringField() 8 | fare_class = StringField() 9 | availability = IntField() 10 | 11 | def __str__(self): 12 | return "cabin: %s fare: %s avail: %s" % (self.cabin_code, self.fare_class, self.availability) 13 | 14 | class Flight(Document): 15 | airline = StringField() 16 | fno = IntField() 17 | dep_city = StringField() 18 | arr_city = StringField() 19 | dep_time = DateTimeField() 20 | arr_time = DateTimeField() 21 | seats = ListField(EmbeddedDocumentField(Seat)) 22 | 23 | def __str__(self): 24 | return "Flight: %s %s \n%s-%s\n%s - %s" % (self.airline, self.fno, self.dep_city, self.arr_city, self.dep_time, self.arr_time) 25 | 26 | def __repr__(self): 27 | return self.__str__ 28 | 29 | def __eq__(self, other): 30 | return ((self.airline == other.airline) and (self.fno == other.fno)) 31 | 32 | def __hash__(self): 33 | return hash((self.airline, self.fno)) 34 | 35 | def seat_map(self): 36 | url = "http://www.seatguru.com/findseatmap/findseatmap.php?" 37 | params = { 'carrier':self.airline, 38 | 'flightno':self.fno, 39 | 'date':self.dep_time.strftime('%m-%d-%Y') } 40 | url = url + urllib.urlencode(params) 41 | return url 42 | 43 | 44 | class Itinerary(EmbeddedDocument): 45 | flights = ListField(ReferenceField(Flight)) 46 | price = StringField() 47 | price_per_mile = StringField() 48 | ext_id = StringField(required=False) 49 | 50 | def __str__(self): 51 | return "Itinerary:\n \tPrice=%s\n \t%s" % (self.price, [str(f) for f in self.flights]) 52 | 53 | def set_stop(self, conn_flight): 54 | return None 55 | 56 | meta = {'allow_inheritance': True} 57 | 58 | class PriceComponent(EmbeddedDocument): 59 | rate_code = StringField(required=True) 60 | price = StringField(required=True) 61 | key = StringField() 62 | description = StringField() 63 | 64 | class ItaItinerary(Itinerary): 65 | #flight_details = ListField(EmbeddedDocumentField(FlightDetails)) 66 | taxes = ListField(EmbeddedDocumentField(PriceComponent)) 67 | base_fares = ListField(EmbeddedDocumentField(PriceComponent)) 68 | distance = IntField() 69 | # FIXME: all_flights contains all of the flight connections in the breakdown. 70 | all_flights = ListField(ReferenceField(Flight)) 71 | 72 | class Solution(Document): 73 | query_date = DateTimeField(default=datetime.datetime.utcnow(), required=True) 74 | engine = StringField(required=True) 75 | origin = StringField(max_length=100, required=True) 76 | depart_date = DateTimeField() 77 | destination = StringField(max_length=100, required=True) 78 | return_date = DateTimeField() 79 | min_price = StringField(required=False) 80 | itineraries = ListField(EmbeddedDocumentField(Itinerary)) 81 | session = StringField(required=False) 82 | 83 | meta = {'allow_inheritance': True} 84 | 85 | class ItaSolution(Solution): 86 | session = StringField(required=True) 87 | solution_set = StringField(required=True) 88 | 89 | class SeatQuery(Document): 90 | query_date = DateTimeField(default=datetime.datetime.utcnow(), required=True) 91 | flights = ListField(ReferenceField(Flight)) 92 | 93 | class TripMinimumPrice(EmbeddedDocument): 94 | dep_city = StringField() 95 | arr_city = StringField() 96 | dep_time = DateTimeField() 97 | arr_time = DateTimeField() 98 | price = StringField() 99 | 100 | def __str__(self): 101 | return 'TripMinimimumPrice: %s->%s\n%s - %s: %s' % (self.dep_city, self.arr_city, self.dep_time, self.arr_time, self.price) 102 | 103 | def query(self): 104 | pass 105 | 106 | class CalendarSolution(Document): 107 | query_date = DateTimeField(default=datetime.datetime.utcnow(), required=True) 108 | engine = StringField(required=True) 109 | origin = StringField(max_length=100, required=True) 110 | destination = StringField(max_length=100, required=True) 111 | depart_date = DateTimeField() 112 | return_date = DateTimeField() 113 | min_price = StringField(required=False) 114 | trip_prices = ListField(EmbeddedDocumentField(TripMinimumPrice)) 115 | -------------------------------------------------------------------------------- /flight_scraper/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/flight_scraper/utils/__init__.py -------------------------------------------------------------------------------- /flight_scraper/utils/graph.py: -------------------------------------------------------------------------------- 1 | import gviz_api 2 | from flight_scraper.utils.scraper import get_prices_by_query_dates 3 | 4 | 5 | def graph_prices(flight_scraper): 6 | """ 7 | This function creates a Google Visualizations DataTable JSON object. 8 | It is then passed to the Google Visualizations API to be rendered. 9 | """ 10 | description = {"query_date" : ("datetime", "Query Date"), 11 | "min_price" : ("number", "%s to %s" % (flight_scraper.depart_date, flight_scraper.return_date))} 12 | 13 | dates = list() 14 | dates.append(flight_scraper.depart_date) 15 | dates.append(flight_scraper.return_date) 16 | 17 | result = get_prices_by_query_dates(flight_scraper) 18 | data = list() 19 | for r in result: 20 | for p in result[r]: 21 | v = {"query_date" : r, "min_price" : p} 22 | data.append(v) 23 | 24 | data_table = gviz_api.DataTable(description) 25 | data_table.LoadData(data) 26 | 27 | return data_table.ToJSon(columns_order=("query_date", "min_price"), order_by="query_date") 28 | 29 | def graph_seats(origin, dest, dept_date): 30 | """ TODO: Refactor """ 31 | #description = {"query_date" : ("datetime", "Query Date"), 32 | # "seat_avail" : ("number", "%s" % (dept_date))} 33 | # 34 | #seat_query = scraper.get_total_seat_availability(origin, dest, dept_date) 35 | #data = list() 36 | #for query_date, avail in seat_query.iteritems(): 37 | # v = {"query_date" : query_date, "seat_avail" : avail} 38 | # data.append(v) 39 | # 40 | #data_table = gviz_api.DataTable(description) 41 | #data_table.LoadData(data) 42 | # 43 | #return data_table.ToJSon(columns_order=("query_date", "seat_avail"), order_by="query_date") 44 | 45 | -------------------------------------------------------------------------------- /flight_scraper/utils/scraper.py: -------------------------------------------------------------------------------- 1 | from dateutil.rrule import * 2 | from dateutil.parser import * 3 | 4 | 5 | 6 | def search_seats(origin, dest, dep_date): 7 | """ TODO: Refactor """ 8 | #dep_date = dep_date.strftime("%Y-%m-%d") 9 | #print "Searching %s -> %s : %s" % (origin, dest, dep_date) 10 | raise NotImplementedError('search_seats needs to be implemented') 11 | 12 | def generate_date_pairs(frequency, weekdays, start_date, until_date): 13 | 14 | until_date = until_date.strftime('%m-%d-%Y') 15 | 16 | dates = list(rrule(frequency, byweekday=weekdays, dtstart=start_date, until=parse(until_date))) 17 | 18 | date_pairs = list() 19 | 20 | i = 1 21 | for d in dates: 22 | #For first date in pair - DEPARTURE DATE 23 | if (i%2 != 0): 24 | p = list() 25 | p.append(d) 26 | #For second date in pair - RETURN DATE 27 | else: 28 | p.append(d) 29 | date_pairs.append(p) 30 | i += 1 31 | 32 | return date_pairs 33 | 34 | def get_prices_by_query_dates(flight_scraper): 35 | """ Returns a dict of all queried prices and query_dates for the depart_date & return_date. """ 36 | result = dict() 37 | solutions = flight_scraper.solutions() 38 | 39 | for sol in solutions: 40 | query_date = sol.query_date 41 | min_price = float(sol.min_price[3:]) #gets rid of USD in string 42 | 43 | if (not result.has_key(query_date)): 44 | prices = list() 45 | prices.append(min_price) 46 | result[query_date] = prices 47 | else: 48 | result[query_date].append(min_price) 49 | 50 | return result 51 | def get_total_seat_availability(origin, dest, date): 52 | """ TODO: Refactor """ 53 | 54 | #seat_availability = dict() 55 | #seat_query = get_seats(origin, dest, date) 56 | # 57 | #for query in seat_query: 58 | # flights = query.flights 59 | # 60 | # for flight in flights: 61 | # seats = flight.seats 62 | # for seat in seats: 63 | # if (not seat_availability.has_key(query.query_date)): 64 | # seat_availability[query.query_date] = seat.availability 65 | # else: 66 | # seat_availability[query.query_date] += seat.availability 67 | # 68 | #return seat_availability 69 | pass 70 | 71 | def get_min_price_itinerary(itineraries): 72 | 73 | return min(itineraries, key=lambda x: x.price) 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='flight_scraper', 5 | version='0.1', 6 | packages=['flight_scraper', 'flight_scraper.utils', 'flight_scraper.engines', 'automation'], 7 | url='', 8 | license='', 9 | author='mayanez', 10 | author_email='', 11 | description='' 12 | ) 13 | -------------------------------------------------------------------------------- /static/ico/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/static/ico/favicon.ico -------------------------------------------------------------------------------- /templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% block head %} 5 | {% block title %}{% endblock %} 6 | 7 | 8 | 9 | {% endblock %} 10 | 11 | 12 | {% block body %} 13 | {% endblock %} 14 | 15 | -------------------------------------------------------------------------------- /templates/calendar_query.html: -------------------------------------------------------------------------------- 1 | 5 | {% extends "base.html" %} 6 | 7 | {%block head%} 8 | {{super()}} 9 | 10 | 16 | {% endblock %} 17 | 18 | {% block body %} 19 |
20 |
21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 |
Query DateMin Price
{{result.query_date}} {{result.min_price}}
31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | {% for trip in result.trip_prices %} 40 | 41 | 42 | 43 | 44 | 45 | 46 | {% endfor %} 47 |
Price Departure Date Arrival Date Query Flights
{{trip.price}} {{trip.dep_time}} {{trip.arr_time}} Query Flights
48 |
49 |
50 | {% endblock %} 51 | 52 | ​ -------------------------------------------------------------------------------- /templates/graph.html: -------------------------------------------------------------------------------- 1 | 5 | {% extends "base.html" %} 6 | 7 | {%block head%} 8 | {{super()}} 9 | 10 | 11 | 14 | 28 | 29 | 35 | {% endblock %} 36 | 37 | {% block body %} 38 | 39 |
40 |

{{lengthSol}}

41 |
42 | 43 | {% for sol in solutions %} 44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
Query DateMin Price
{{sol.query_date}} {{sol.min_price}}
55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | {% for itinerary in sol.itineraries %} 69 | 70 | 71 | {% for flight in itinerary.flights %} 72 | 73 | 74 | 75 | 76 | {% endfor %} 77 | 78 | {% endfor %} 79 |
Price Airline Flight No Dept Time Seat Map Airline Flight No Dept Time Seat Map
{{itinerary.price}} {{flight.airline}} {{flight.fno}} {{flight.dep_time}} SeatGuru
80 |
81 | {% endfor %} 82 | 83 |
84 | {% endblock %} 85 | 86 | ​ -------------------------------------------------------------------------------- /templates/graph_seats.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {%block head%} 4 | {{super()}} 5 | 6 | 7 | 10 | 24 | 25 | {% endblock %} 26 | 27 | {% block body %} 28 | 29 |
30 | 31 | 32 | {% endblock %} -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {%block title%} Flight Scraper {% endblock %} 3 | {%block head%} 4 | {{super()}} 5 | 13 | {%endblock%} 14 | {% block body %} 15 |

Flight Scraper

16 | 17 |

Flight Search

18 |
19 |
20 |
Origin:
21 |
Destination:
22 |
Frequency: 23 | 26 |
27 |
28 | Weekdays:
29 | MO
30 | TU
31 | WE
32 | TH
33 | FR
34 | SA
35 | SU
36 |
37 |
Start Date:
38 |
Until Date:
39 |
40 |
41 |
42 | 43 |

Flight Search - Calendar

44 |
45 |
46 |
Origin:
47 |
Destination:
48 |
Start Date:
49 |
Until Date:
50 |
Airlines:
51 |
Length of stay (e.g., 4 or 4-5):
52 |
Maximum Number of Stops:
53 |
54 |
55 |
56 | 57 |

Graph Flights

58 |
59 |
60 |
Origin:
61 |
Destination:
62 |
Departure:
63 |
Return:
64 |
65 |
66 |
67 | 68 |

Find Seats

69 |
70 |
71 |
Origin:
72 |
Destination:
73 |
Departure:
74 |
75 |
76 |
77 | 78 | {% endblock %} 79 | -------------------------------------------------------------------------------- /templates/query.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block body %} 4 | 5 |
6 |
7 | 8 | {% for date_query in result %} 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
Departure DateArrival Date
{{date_query[0]}} {{date_query[1]}}
20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | {% for itinerary in date_query[2].itineraries %} 34 | 35 | 36 | {% for flight in itinerary.flights %} 37 | 38 | 39 | 40 | 41 | {% endfor %} 42 | 43 | {% endfor %} 44 |
Price Airline Flight No Dept Time Seat Map Airline Flight No Dept Time Seat Map
{{itinerary.price}} {{flight.airline}} {{flight.fno}} {{flight.dep_time}} SeatGuru
45 |
46 | {% endfor %} 47 | 48 |
49 | {% endblock %} -------------------------------------------------------------------------------- /templates/seats.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {%block body%} 4 | 14 | {%endblock%} -------------------------------------------------------------------------------- /test/sample_ita_itinerary.json: -------------------------------------------------------------------------------- 1 | { 2 | "result": { 3 | "id": "sx7pjCTUnbu8kuOP50KYR9", 4 | "session": "6W60KYR6zSjTkicN7WgwOA92n", 5 | "solutionCount": 500, 6 | "solutionSet": "0EI4mYoNuxZ6UnAPrvqq47B", 7 | "currencyNotice": { 8 | "ext": { 9 | "price": "USD862.00" 10 | } 11 | }, 12 | "bookingDetails": { 13 | "displayTotal": "USD861.40", 14 | "id": "Jpt7IVb5Qv8NBWx8cO9e4K004", 15 | "passengerCount": 1, 16 | "co2Emissions": { 17 | "estimate": 1761 18 | }, 19 | "ext": { 20 | "totalPrice": "USD861.40" 21 | }, 22 | "itinerary": { 23 | "distance": { 24 | "units": "MI", 25 | "value": 11725 26 | }, 27 | "slices": [ 28 | { 29 | "arrival": "2014-10-20T23:05-07:00", 30 | "departure": "2014-10-20T11:55+02:00", 31 | "stopCount": 2, 32 | "destination": { 33 | "code": "SEA", 34 | "name": "Seattle\/Tacoma Sea\/Tac", 35 | "city": { 36 | "name": "Seattle" 37 | } 38 | }, 39 | "ext": { 40 | "warnings": "CHANGE_OF_AIRPORT_SLICE,LONG_LAYOVER,CHANGE_OF_TERMINAL,RISKY_CONNECTION" 41 | }, 42 | "origin": { 43 | "code": "VRN", 44 | "name": "Verona Villafranca", 45 | "city": { 46 | "name": "Verona" 47 | } 48 | }, 49 | "segments": [ 50 | { 51 | "arrival": "2014-10-20T12:50+01:00", 52 | "departure": "2014-10-20T11:55+02:00", 53 | "duration": 115, 54 | "bookingInfos": [ 55 | { 56 | "bookingCode": "Q", 57 | "cabin": "COACH" 58 | } 59 | ], 60 | "carrier": { 61 | "code": "BA", 62 | "shortName": "British Airways p.l.c." 63 | }, 64 | "connection": { 65 | "changeOfAirport": true, 66 | "changeOfTerminal": true, 67 | "duration": 180 68 | }, 69 | "destination": { 70 | "code": "LGW", 71 | "city": { 72 | "name": "London" 73 | } 74 | }, 75 | "flight": { 76 | "number": 2597 77 | }, 78 | "legs": [ 79 | { 80 | "arrival": "2014-10-20T12:50+01:00", 81 | "departure": "2014-10-20T11:55+02:00", 82 | "duration": 115, 83 | "aircraft": { 84 | "shortName": "Boeing 737" 85 | }, 86 | "destination": { 87 | "code": "LGW", 88 | "name": "London Gatwick", 89 | "city": { 90 | "name": "London" 91 | } 92 | }, 93 | "origin": { 94 | "code": "VRN", 95 | "city": { 96 | "name": "Verona" 97 | } 98 | }, 99 | "services": [ 100 | { 101 | "amenities": [ 102 | "Non-smoking" 103 | ], 104 | "meals": [ 105 | "Snack or Brunch" 106 | ] 107 | } 108 | ] 109 | } 110 | ], 111 | "origin": { 112 | "code": "VRN", 113 | "city": { 114 | "name": "Verona" 115 | } 116 | } 117 | }, 118 | { 119 | "arrival": "2014-10-20T18:25-05:00", 120 | "departure": "2014-10-20T15:50+01:00", 121 | "duration": 515, 122 | "bookingInfos": [ 123 | { 124 | "bookingCode": "Q", 125 | "cabin": "COACH" 126 | } 127 | ], 128 | "carrier": { 129 | "code": "BA", 130 | "shortName": "British Airways p.l.c." 131 | }, 132 | "connection": { 133 | "changeOfTerminal": true, 134 | "duration": 140 135 | }, 136 | "destination": { 137 | "code": "ORD", 138 | "city": { 139 | "name": "Chicago" 140 | } 141 | }, 142 | "flight": { 143 | "number": 297 144 | }, 145 | "legs": [ 146 | { 147 | "arrival": "2014-10-20T18:25-05:00", 148 | "departure": "2014-10-20T15:50+01:00", 149 | "duration": 515, 150 | "aircraft": { 151 | "shortName": "Boeing 777" 152 | }, 153 | "destination": { 154 | "code": "ORD", 155 | "name": "Chicago O'Hare", 156 | "city": { 157 | "name": "Chicago" 158 | } 159 | }, 160 | "origin": { 161 | "code": "LHR", 162 | "city": { 163 | "name": "London" 164 | } 165 | }, 166 | "services": [ 167 | { 168 | "amenities": [ 169 | "Non-smoking" 170 | ], 171 | "meals": [ 172 | "Meal" 173 | ] 174 | } 175 | ] 176 | } 177 | ], 178 | "origin": { 179 | "code": "LHR", 180 | "city": { 181 | "name": "London" 182 | } 183 | } 184 | }, 185 | { 186 | "arrival": "2014-10-20T23:05-07:00", 187 | "codeshare": true, 188 | "departure": "2014-10-20T20:45-05:00", 189 | "duration": 260, 190 | "bookingInfos": [ 191 | { 192 | "bookingCode": "Q", 193 | "cabin": "COACH" 194 | } 195 | ], 196 | "carrier": { 197 | "code": "BA", 198 | "shortName": "British Airways p.l.c." 199 | }, 200 | "destination": { 201 | "code": "SEA", 202 | "city": { 203 | "name": "Seattle" 204 | } 205 | }, 206 | "ext": { 207 | "operationalDisclosure": "OPERATED BY AMERICAN AIRLINES INC." 208 | }, 209 | "flight": { 210 | "number": 5761 211 | }, 212 | "legs": [ 213 | { 214 | "arrival": "2014-10-20T23:05-07:00", 215 | "departure": "2014-10-20T20:45-05:00", 216 | "duration": 260, 217 | "aircraft": { 218 | "shortName": "Boeing 737" 219 | }, 220 | "destination": { 221 | "code": "SEA", 222 | "name": "Seattle\/Tacoma Sea\/Tac", 223 | "city": { 224 | "name": "Seattle" 225 | } 226 | }, 227 | "origin": { 228 | "code": "ORD", 229 | "city": { 230 | "name": "Chicago" 231 | } 232 | }, 233 | "services": [ 234 | { 235 | "amenities": [ 236 | "Non-smoking" 237 | ] 238 | } 239 | ] 240 | } 241 | ], 242 | "origin": { 243 | "code": "ORD", 244 | "city": { 245 | "name": "Chicago" 246 | } 247 | } 248 | } 249 | ] 250 | }, 251 | { 252 | "arrival": "2014-11-09T10:25+01:00", 253 | "departure": "2014-11-07T14:05-08:00", 254 | "stopCount": 1, 255 | "destination": { 256 | "code": "VRN", 257 | "name": "Verona Villafranca", 258 | "city": { 259 | "name": "Verona" 260 | } 261 | }, 262 | "ext": { 263 | "warnings": "LONG_LAYOVER,CHANGE_OF_AIRPORT_SLICE,OVERNIGHT" 264 | }, 265 | "origin": { 266 | "code": "SEA", 267 | "name": "Seattle\/Tacoma Sea\/Tac", 268 | "city": { 269 | "name": "Seattle" 270 | } 271 | }, 272 | "segments": [ 273 | { 274 | "arrival": "2014-11-08T07:30+00:00", 275 | "departure": "2014-11-07T14:05-08:00", 276 | "duration": 565, 277 | "bookingInfos": [ 278 | { 279 | "bookingCode": "O", 280 | "cabin": "COACH" 281 | } 282 | ], 283 | "carrier": { 284 | "code": "BA", 285 | "shortName": "British Airways p.l.c." 286 | }, 287 | "connection": { 288 | "changeOfAirport": true, 289 | "changeOfTerminal": true, 290 | "duration": 1435 291 | }, 292 | "destination": { 293 | "code": "LHR", 294 | "city": { 295 | "name": "London" 296 | } 297 | }, 298 | "flight": { 299 | "number": 52 300 | }, 301 | "legs": [ 302 | { 303 | "arrival": "2014-11-08T07:30+00:00", 304 | "departure": "2014-11-07T14:05-08:00", 305 | "duration": 565, 306 | "aircraft": { 307 | "shortName": "Boeing 777" 308 | }, 309 | "destination": { 310 | "code": "LHR", 311 | "name": "London Heathrow", 312 | "city": { 313 | "name": "London" 314 | } 315 | }, 316 | "origin": { 317 | "code": "SEA", 318 | "city": { 319 | "name": "Seattle" 320 | } 321 | }, 322 | "services": [ 323 | { 324 | "amenities": [ 325 | "Non-smoking" 326 | ], 327 | "meals": [ 328 | "Meal" 329 | ] 330 | } 331 | ] 332 | } 333 | ], 334 | "origin": { 335 | "code": "SEA", 336 | "city": { 337 | "name": "Seattle" 338 | } 339 | } 340 | }, 341 | { 342 | "arrival": "2014-11-09T10:25+01:00", 343 | "departure": "2014-11-09T07:25+00:00", 344 | "duration": 120, 345 | "bookingInfos": [ 346 | { 347 | "bookingCode": "S", 348 | "cabin": "COACH" 349 | } 350 | ], 351 | "carrier": { 352 | "code": "BA", 353 | "shortName": "British Airways p.l.c." 354 | }, 355 | "destination": { 356 | "code": "VRN", 357 | "city": { 358 | "name": "Verona" 359 | } 360 | }, 361 | "flight": { 362 | "number": 2596 363 | }, 364 | "legs": [ 365 | { 366 | "arrival": "2014-11-09T10:25+01:00", 367 | "departure": "2014-11-09T07:25+00:00", 368 | "duration": 120, 369 | "aircraft": { 370 | "shortName": "Boeing 737" 371 | }, 372 | "destination": { 373 | "code": "VRN", 374 | "name": "Verona Villafranca", 375 | "city": { 376 | "name": "Verona" 377 | } 378 | }, 379 | "origin": { 380 | "code": "LGW", 381 | "city": { 382 | "name": "London" 383 | } 384 | }, 385 | "services": [ 386 | { 387 | "amenities": [ 388 | "Non-smoking" 389 | ], 390 | "meals": [ 391 | "Snack or Brunch" 392 | ] 393 | } 394 | ] 395 | } 396 | ], 397 | "origin": { 398 | "code": "LGW", 399 | "city": { 400 | "name": "London" 401 | } 402 | } 403 | } 404 | ] 405 | } 406 | ] 407 | }, 408 | "pricings": [ 409 | { 410 | "displayPrice": "USD861.40", 411 | "ext": { 412 | "totalPrice": "USD861.40", 413 | "pax": { 414 | "adults": 1 415 | } 416 | } 417 | } 418 | ], 419 | "tickets": [ 420 | { 421 | "displayPrice": "USD861.40", 422 | "pricings": [ 423 | { 424 | "displayPrice": "USD861.40", 425 | "ext": { 426 | "totalPrice": "USD861.40", 427 | "pax": { 428 | "adults": 1 429 | }, 430 | "taxTotals": [ 431 | { 432 | "code": "AY", 433 | "totalDisplayPrice": "USD11.20", 434 | "tax": { 435 | "key": "0\/0", 436 | "name": "US September 11th Security Fee" 437 | } 438 | }, 439 | { 440 | "code": "XA", 441 | "totalDisplayPrice": "USD5.00", 442 | "tax": { 443 | "key": "0\/1", 444 | "name": "USDA APHIS Fee" 445 | } 446 | }, 447 | { 448 | "code": "XY", 449 | "totalDisplayPrice": "USD7.00", 450 | "tax": { 451 | "key": "0\/2", 452 | "name": "US Immigration Fee" 453 | } 454 | }, 455 | { 456 | "code": "YC", 457 | "totalDisplayPrice": "USD5.50", 458 | "tax": { 459 | "key": "0\/3", 460 | "name": "US Customs Fee" 461 | } 462 | }, 463 | { 464 | "code": "UB", 465 | "totalDisplayPrice": "USD76.40", 466 | "tax": { 467 | "key": "0\/4", 468 | "name": "United Kingdom Passenger Service Charge" 469 | } 470 | }, 471 | { 472 | "code": "US", 473 | "totalDisplayPrice": "USD17.50", 474 | "tax": { 475 | "key": "0\/5", 476 | "name": "US International Arrival Tax" 477 | } 478 | }, 479 | { 480 | "code": "IT", 481 | "totalDisplayPrice": "USD8.00", 482 | "tax": { 483 | "key": "0\/6", 484 | "name": "Italian Embarkation Tax" 485 | } 486 | }, 487 | { 488 | "code": "VT", 489 | "totalDisplayPrice": "USD4.10", 490 | "tax": { 491 | "key": "0\/7", 492 | "name": "Italian Security Charge" 493 | } 494 | }, 495 | { 496 | "code": "EX", 497 | "totalDisplayPrice": "USD0.90", 498 | "tax": { 499 | "key": "0\/8", 500 | "name": "Italian Security Bag Charge" 501 | } 502 | }, 503 | { 504 | "code": "HB", 505 | "totalDisplayPrice": "USD8.60", 506 | "tax": { 507 | "key": "0\/9", 508 | "name": "Italian Council City Tax" 509 | } 510 | }, 511 | { 512 | "code": "MJ", 513 | "totalDisplayPrice": "USD1.20", 514 | "tax": { 515 | "key": "0\/10", 516 | "name": "Italian Passenger Service Charge" 517 | } 518 | }, 519 | { 520 | "code": "YQ", 521 | "totalDisplayPrice": "USD256.00", 522 | "tax": { 523 | "key": "0\/11" 524 | } 525 | }, 526 | { 527 | "code": "US", 528 | "totalDisplayPrice": "USD17.50", 529 | "tax": { 530 | "key": "0\/14", 531 | "name": "US International Departure Tax" 532 | } 533 | }, 534 | { 535 | "code": "XF", 536 | "totalDisplayPrice": "USD4.50", 537 | "tax": { 538 | "key": "0\/16", 539 | "name": "US Passenger Facility Charge" 540 | } 541 | } 542 | ] 543 | }, 544 | "fareCalculations": [ 545 | { 546 | "lines": [ 547 | "VRN BA X\/LON BA X\/E\/CHI BA SEA M 233.89QKX7SAL BA X\/LON BA VRN M 217.57OKW7SAL NUC 451.46 END ROE 0.735385 XT 0.90EX 8.60HB 8.00IT 1.20MJ 4.10VT 76.40UB 5.50YC 7.00XY 5.00XA 35.00US 11.20AY 256.00YQ 4.50XF SEA4.50" 548 | ] 549 | } 550 | ], 551 | "fares": [ 552 | { 553 | "carrier": "BA", 554 | "code": "QKX7SAL", 555 | "destinationCity": "SEA", 556 | "displayAdjustedPrice": "USD226.91", 557 | "key": "0\/0", 558 | "originCity": "VRN", 559 | "tag": "ROUND-TRIP", 560 | "bookingInfos": [ 561 | { 562 | "bookingCode": "Q", 563 | "cabin": "COACH", 564 | "segment": { 565 | "destination": "LGW", 566 | "origin": "VRN" 567 | } 568 | }, 569 | { 570 | "bookingCode": "Q", 571 | "cabin": "COACH", 572 | "segment": { 573 | "destination": "ORD", 574 | "origin": "LHR" 575 | } 576 | }, 577 | { 578 | "bookingCode": "Q", 579 | "cabin": "COACH", 580 | "segment": { 581 | "destination": "SEA", 582 | "origin": "ORD" 583 | } 584 | } 585 | ], 586 | "ptcs": [ 587 | "ADT" 588 | ] 589 | }, 590 | { 591 | "carrier": "BA", 592 | "code": "OKW7SAL", 593 | "destinationCity": "VRN", 594 | "displayAdjustedPrice": "USD211.08", 595 | "key": "0\/1", 596 | "originCity": "SEA", 597 | "tag": "ROUND-TRIP", 598 | "bookingInfos": [ 599 | { 600 | "bookingCode": "O", 601 | "cabin": "COACH", 602 | "segment": { 603 | "destination": "LHR", 604 | "origin": "SEA" 605 | } 606 | }, 607 | { 608 | "bookingCode": "S", 609 | "cabin": "COACH", 610 | "segment": { 611 | "destination": "VRN", 612 | "origin": "LGW" 613 | } 614 | } 615 | ], 616 | "ptcs": [ 617 | "ADT" 618 | ] 619 | } 620 | ], 621 | "notes": [ 622 | "This ticket is non-refundable.", 623 | "Changes to this ticket will incur a penalty fee." 624 | ], 625 | "pricingTaxes": [ 626 | { 627 | "code": "YQ", 628 | "displayPrice": "USD128.00", 629 | "id": "YQF-BA" 630 | }, 631 | { 632 | "code": "YQ", 633 | "displayPrice": "USD128.00", 634 | "id": "YQF-BA" 635 | } 636 | ] 637 | } 638 | ] 639 | } 640 | ] 641 | } 642 | } 643 | } 644 | 645 | 646 | -------------------------------------------------------------------------------- /web_app.py: -------------------------------------------------------------------------------- 1 | import ConfigParser 2 | import os 3 | import mongoengine 4 | 5 | from dateutil.rrule import DAILY 6 | from datetime import datetime 7 | from flask import Flask, render_template, send_from_directory, request 8 | from flight_scraper.scraper import FlightScraper 9 | from flight_scraper.utils.graph import graph_prices 10 | from flight_scraper.utils.scraper import generate_date_pairs, search_seats 11 | 12 | #---------------------------------------- 13 | # Utilities 14 | #---------------------------------------- 15 | 16 | 17 | #---------------------------------------- 18 | # initialization 19 | #---------------------------------------- 20 | app = Flask(__name__) 21 | 22 | Config = ConfigParser.ConfigParser() 23 | if Config.read('flight_scraper.cfg')==[]: 24 | print "Please copy flight_scraper.cfg.example to flight_scraper.cfg" 25 | raise Exception('Could not read config file') 26 | 27 | try: 28 | host_string=Config.get("mongodb", "host") 29 | mongoengine.connect(Config.get("mongodb", "name"),host=host_string) 30 | except ConfigParser.NoOptionError: 31 | mongoengine.connect(Config.get("mongodb", "name")) 32 | 33 | app.config.update( 34 | DEBUG = True, 35 | ) 36 | 37 | # flight_scraper = FlightScraper() 38 | 39 | #---------------------------------------- 40 | # controllers 41 | #---------------------------------------- 42 | @app.route('/favicon.ico') 43 | def favicon(): 44 | return send_from_directory(os.path.join(app.root_path, 'static'), 'ico/favicon.ico') 45 | 46 | @app.route("/") 47 | def index(): 48 | return render_template('index.html') 49 | 50 | @app.route("/flight/query", methods=['GET']) 51 | def flight_query(): 52 | origin = request.args.get('origin') 53 | dest = request.args.get('dest') 54 | freq = request.args.get('freq') 55 | start_date = request.args.get('start_date') 56 | until_date = request.args.get('until_date') 57 | weekdays = request.args.getlist('weekdays') 58 | 59 | 60 | start_date = datetime.strptime(start_date, '%m-%d-%Y') 61 | until_date = datetime.strptime(until_date, '%m-%d-%Y') 62 | weekdays = map(int, weekdays) 63 | 64 | #Can probably use dateutils parser for this. 65 | if freq == "DAILY": 66 | freq=DAILY 67 | 68 | date_pairs = generate_date_pairs(freq, weekdays, start_date, until_date) 69 | 70 | result = list() 71 | 72 | for d in date_pairs: 73 | flight_scraper = FlightScraper(origin, dest, d[0], d[1]) 74 | v = [d[0].isoformat(), d[1].isoformat(), flight_scraper.search_flights()] 75 | result.append(v) 76 | 77 | return render_template('query.html', result=result) 78 | 79 | @app.route("/flight/calendar_query", methods=['GET']) 80 | def calendar_flight_query(): 81 | origin = request.args.get('origin') 82 | dest = request.args.get('dest') 83 | start_date = request.args.get('start_date') 84 | until_date = request.args.get('until_date') 85 | airlines = request.args.get('airlines') 86 | day_range = request.args.get('length') 87 | max_stops = request.args.get('max_stops') 88 | 89 | if '-' in day_range: 90 | split = day_range.split('-') 91 | day_range = [int(s) for s in split] 92 | else: 93 | day_range = [int(day_range), int(day_range)] 94 | 95 | start_date = datetime.strptime(start_date, '%m-%d-%Y') 96 | until_date = datetime.strptime(until_date, '%m-%d-%Y') 97 | 98 | flight_scraper = FlightScraper(origin, dest, start_date, until_date, airlines=airlines, day_range=day_range, max_stops=max_stops) 99 | return render_template('calendar_query.html', result=flight_scraper.search_calendar()) 100 | 101 | @app.route("/seat/query", methods=['GET']) 102 | def seat_query(): 103 | """ 104 | TODO: Refactor 105 | """ 106 | #origin = request.args.get('origin') 107 | #dest = request.args.get('dest') 108 | #dept = request.args.get('dept') 109 | # 110 | #dept = datetime.strptime(dept, '%m-%d-%Y') 111 | # 112 | #return render_template('seats.html', flights=search_seats(origin, dest, dept)) 113 | 114 | @app.route("/graph", methods=['GET']) 115 | def graph_flights(): 116 | origin = request.args.get('origin') 117 | dest = request.args.get('dest') 118 | dept = request.args.get('dept') 119 | ret = request.args.get('ret') 120 | 121 | dept = datetime.strptime(dept, '%m-%d-%Y') 122 | ret = datetime.strptime(ret, '%m-%d-%Y') 123 | 124 | flight_scraper = FlightScraper(origin, dest, dept, ret) 125 | # flight_scraper.origin = origin 126 | # flight_scraper.destination = dest 127 | # flight_scraper.depart_date = dept 128 | # flight_scraper.return_date = ret 129 | 130 | solutions = flight_scraper.solutions() 131 | 132 | length = len(solutions) 133 | return render_template('graph.html', json_obj=graph_prices(flight_scraper), solutions=solutions, lengthSol=length) 134 | 135 | @app.route("/graph_seats", methods=['GET']) 136 | def graph_2(): 137 | """ 138 | TODO: Refactor 139 | """ 140 | #origin = request.args.get('origin') 141 | #dest = request.args.get('dest') 142 | #dept = request.args.get('dept') 143 | #ret = request.args.get('ret') 144 | # 145 | #dept = datetime.strptime(dept, '%m-%d-%Y') 146 | #ret = datetime.strptime(ret, '%m-%d-%Y') 147 | # 148 | #return render_template('graph_seats.html', json_obj=graph_seats(origin, dest, dept)) 149 | 150 | @app.route("/graph_weekly", methods=['GET']) 151 | def graph_weekly(): 152 | origin = request.args.get('origin') 153 | dest = request.args.get('dest') 154 | dept = request.args.get('dept') 155 | ret = request.args.get('ret') 156 | 157 | start_date = datetime.strptime("1-1-2013", '%m-%d-%Y') 158 | until_date = datetime.strptime("12-31-2013", '%m-%d-%Y') 159 | weekdays = map(int, [4,6]) 160 | 161 | #Can probably use dateutils parser for this. 162 | freq=DAILY 163 | 164 | date_pairs = generate_date_pairs(freq, weekdays, start_date, until_date) 165 | result = list() 166 | 167 | for d in date_pairs: 168 | flight_scraper.depart_date = d[0] 169 | flight_scraper.return_date = d[1] 170 | result.append(graph_prices(flight_scraper)) 171 | 172 | return render_template('graph_weekly.html', graphs=result, length=len(result)) 173 | 174 | 175 | 176 | #---------------------------------------- 177 | # launch 178 | #---------------------------------------- 179 | 180 | if __name__ == "__main__": 181 | port = int(os.environ.get("PORT", Config.get("webapp", "port"))) 182 | app.run(host='0.0.0.0', port=port) 183 | 184 | 185 | --------------------------------------------------------------------------------