├── .gitignore
├── README.md
├── alert
├── __init__.py
└── email_alert.py
├── automation
├── __init__.py
├── scrape_daily.cfg
└── scrape_daily.py
├── flight_scraper.cfg.example
├── flight_scraper
├── __init__.py
├── engines
│ ├── __init__.py
│ ├── flight_stats
│ │ ├── __init__.py
│ │ └── driver.py
│ └── ita_matrix
│ │ ├── __init__.py
│ │ └── driver.py
├── scraper.py
├── solution_model.py
└── utils
│ ├── __init__.py
│ ├── graph.py
│ └── scraper.py
├── setup.py
├── static
└── ico
│ └── favicon.ico
├── templates
├── base.html
├── calendar_query.html
├── graph.html
├── graph_seats.html
├── index.html
├── query.html
└── seats.html
├── test
├── sample_ita_itinerary.json
├── sample_search.json
└── sample_search2.json
└── web_app.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | *.pyc
3 | build
4 | .idea
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | flight_scraper
2 | ===============
3 |
4 | This is a quick script that I reversed engineered in order to poll ITA Matrix Airfare Search. It is still a bit rough around the edges as I try to clean it up. The goal is to determine a correlation between seat availability and price fare information to alert me on trends for flight segments. I want to build a cool dashboard in order to feed all this information to me automatically.
5 |
6 | Right now I use ITA Matrix for airfare search & flightstats for seat availability. I'm looking to get information from nome other sites as well in order to get better data.
7 |
8 | The ITA Matrix Airfare Search is a great tool so I suggest you go check it out here: http://matrix.itasoftware.com/
9 | Also checkout Flightstats (https://flightstats.com) its a great site for finding information about tracking. I use it to monitor whether flights are on-time or not.
10 |
11 | #Installation#
12 | (build steps)
13 | Copy flight_scraper.cfg.example to flight_scraper.cfg
14 | python setup.py build
15 | python setup.py install
16 |
17 | #Dependencies#
18 | * Requests (http://docs.python-requests.org/)
19 | * MongoDB (http://www.mongodb.org/)
20 | * Flask (http://flask.pocoo.org/)
21 | * python-dateutil (http://labix.org/python-dateutil)
22 | * Google Vizualizations API (https://code.google.com/p/google-visualization-python/)
23 | * PhantomJS (http://phantomjs.org/)
24 | * Selenium (http://docs.seleniumhq.org/)
25 |
26 | #TO-DO#
27 | * MapReduce Job to map seat availability to pricing information.
28 | * Integrate with Prediction.io for Analysis.
29 | * Seat map for availability - alert if aisle/window seat becomes available.
30 | * Upgrade list - to track which flights give higher chance of upgrades.
31 | * Add support for non-direct flights
32 | * Calendar automated search support.
33 | * Command Line Interface
34 | * Backtest after enough data is gathered.
35 |
36 | * Add more Search Engine Scrapers
37 | * Add price forecasting from Kayak & Bing
38 | * Output reminders kayak style:
39 | * http://www.kayak.com/images/sample-alerts.gif
40 | * Stylize WebApp for reporting/Dashboard
41 |
42 |
--------------------------------------------------------------------------------
/alert/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mayanez'
2 |
--------------------------------------------------------------------------------
/alert/email_alert.py:
--------------------------------------------------------------------------------
1 | import mongoengine
2 | import smtplib
3 | from datetime import datetime
4 |
5 | def send_email(user, password, from_addr, to_addr, subject, msg):
6 | server=smtplib.SMTP('smtp.gmail.com:587')
7 | server.starttls()
8 | server.login(user, password)
9 |
10 | senddate=datetime.strftime(datetime.now(), '%Y-%m-%d')
11 |
12 | formatted_message = "Date: %s\r\nFrom: %s\r\nTo: %s\r\nSubject: %s\r\nX-Mailer: My-Mail\r\n\r\n %s" % (senddate, from_addr, to_addr, subject, msg)
13 | server.sendmail(from_addr, to_addr, formatted_message)
14 | server.quit()
15 |
16 | def send_alert(email, origin, destination, dept_date, ret_date, flights):
17 | """ TODO: Refactor """
18 | #get_min_price_itinerary(get_itineraries("SEA", "JFK", datetime.strptime("12-13-2013", "%m-%d-%Y"), datetime.strptime("12-15-2013", "%m-%d-%Y"), set([Flight(airline="DL", fno="1542")])))
19 |
20 | if __name__ == '__main__':
21 | mongoengine.connect('flight_scraper')
22 | print send_alert(None, None, None, None, None, None)
--------------------------------------------------------------------------------
/automation/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mayanez'
2 |
--------------------------------------------------------------------------------
/automation/scrape_daily.cfg:
--------------------------------------------------------------------------------
1 | [mongodb]
2 | name = flight_scraper
3 |
4 | [email]
5 | username = user
6 | password = pass
7 |
8 | [dates]
9 | end = 1-1-2014
--------------------------------------------------------------------------------
/automation/scrape_daily.py:
--------------------------------------------------------------------------------
1 | #!/usr/local/bin/python
2 | import ConfigParser
3 | import datetime
4 | import logging
5 | from mongoengine import *
6 | from dateutil.rrule import *
7 | from flight_scraper.flight_scraper import FlightScraper
8 | from flight_scraper.utils.scraper import generate_date_pairs
9 |
10 | logging.basicConfig(level=logging.INFO)
11 | logger = logging.getLogger(__name__)
12 | Config = ConfigParser.ConfigParser()
13 | Config.read('scrape_daily.cfg')
14 |
15 | def bidirectional_search(origin, dest, until_date):
16 | """ Sample Script for automation."""
17 |
18 | #Initialize FlightScraper
19 | flight_scraper = FlightScraper()
20 |
21 | MO, TU, WE, TH, FR, SA, SU = tuple(range(7))
22 |
23 | #Dates to search
24 | weekdays_1 = (FR,SU)
25 | weekdays_2 = (FR,MO)
26 | start_date = __get_start_date()
27 |
28 | #Generates depart_date & return_date pairs in that order from start_date to util_date
29 | date_pairs_1 = generate_date_pairs(DAILY, weekdays_1, start_date, until_date)
30 | date_pairs_2 = generate_date_pairs(DAILY, weekdays_2, start_date, until_date)
31 |
32 | #Search 1
33 | for d in date_pairs_1:
34 | flight_scraper.origin = origin
35 | flight_scraper.destination = dest
36 | flight_scraper.depart_date = d[0]
37 | flight_scraper.return_date = d[1]
38 |
39 | flight_scraper.search_flights()
40 |
41 | #Search 2
42 | for d in date_pairs_2:
43 | flight_scraper.origin = dest
44 | flight_scraper.destination = origin
45 | flight_scraper.depart_date = d[0]
46 | flight_scraper.return_date = d[1]
47 |
48 | flight_scraper.search_flights()
49 |
50 | def __get_start_date():
51 |
52 | TODAY = datetime.date.today()
53 | start_date = TODAY
54 |
55 | if (TODAY.weekday() == SA or TODAY.weekday() == SU):
56 | start_date = TODAY + datetime.timedelta(days=2)
57 |
58 | if (TODAY.weekday() == MO):
59 | start_date = TODAY + datetime.timedelta(days=1)
60 |
61 | return start_date
62 |
63 | if __name__ == '__main__':
64 | #Connect to MongoDB
65 | connect(Config.get("mongodb", "name"))
66 |
67 | origin = "SEA"
68 | dest = "PDX"
69 |
70 | try:
71 | logger.info("Started at %s" % (datetime.datetime.utcnow()))
72 | bidirectional_search(origin, dest, datetime.datetime.strptime(Config.get("dates", "end"), "%m-%d-%Y"))
73 | except Exception, e:
74 | logger.error(e)
75 | pass
76 |
77 | logger.info("Ended at %s" % (datetime.datetime.utcnow()))
--------------------------------------------------------------------------------
/flight_scraper.cfg.example:
--------------------------------------------------------------------------------
1 | [mongodb]
2 | name = flight_scraper
3 | #host = mongodb://localhost/dbname
4 |
5 | [email]
6 | username = user
7 | password = pass
8 |
9 | [webapp]
10 | port = 5454
--------------------------------------------------------------------------------
/flight_scraper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/flight_scraper/__init__.py
--------------------------------------------------------------------------------
/flight_scraper/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/flight_scraper/engines/__init__.py
--------------------------------------------------------------------------------
/flight_scraper/engines/flight_stats/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mayanez'
2 |
--------------------------------------------------------------------------------
/flight_scraper/engines/flight_stats/driver.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import urllib
4 | import datetime
5 | from flight_scraper.solution_model import Seat, Flight, SeatQuery
6 | from selenium import webdriver
7 |
8 | logging.basicConfig(level=logging.INFO)
9 |
10 | class FlightStatsDriver(object):
11 | __logger = logging.getLogger(__name__)
12 | __base_url = "http://www.flightstats.com"
13 | __request_uri = "/go/FlightAvailability/flightAvailability.do"
14 | __http_header = {
15 | 'Host' : 'www.flightstats.com',
16 | 'Content-Type' : 'text/html'}
17 | __params = {
18 | 'departure' : '',
19 | 'airline' : '',
20 | 'arrival' : '',
21 | 'connection' : '',
22 | 'queryDate' : '', #yyyy-mm-dd
23 | 'queryTime' : '2',
24 | 'excludeConnectionCodes' : '',
25 | 'cabinCode' : 'A',
26 | 'numOfSeats' : '1',
27 | 'queryType' : 'D',
28 | 'fareClassCodes' : ''}
29 | __driver = None
30 |
31 | @property
32 | def origin(self):
33 | return self.__params['departure']
34 |
35 | @origin.setter
36 | def origin(self, origin):
37 | self.__params['departure'] = origin
38 |
39 | @property
40 | def destination(self):
41 | return self.__params['arrival']
42 |
43 | @destination.setter
44 | def destination(self, destination):
45 | self.__params['arrival'] = destination
46 |
47 | @property
48 | def depart_date(self):
49 | return datetime.datetime.strptime(self.__params['queryDate'], "%Y-%m-%d")
50 |
51 | @depart_date.setter
52 | def depart_date(self, depart_date):
53 | self.__params['queryDate'] = depart_date.strftime("%Y-%m-%d")
54 |
55 | def __init__(self, executable_path, service_log_path):
56 | self.__driver = webdriver.PhantomJS(executable_path=executable_path, service_log_path=service_log_path)
57 |
58 | def __extract_flights_with_seats(self, json_obj):
59 |
60 | flight_list = list()
61 | self.__logger.info('Extracting flights with seats')
62 | for k, results in json_obj.iteritems():
63 | for k2, flights in results['flights'].iteritems():
64 | airline = flights['airline']
65 | fno = flights['flightNumber']
66 | dep_city = flights['depCode']
67 | arr_city = flights['arrCode']
68 | flight = Flight(dep_city=dep_city, arr_city=arr_city, airline=airline, fno=fno, dep_time=self.depart_date)
69 | seats = list()
70 |
71 | for k3, cabin in flights['cabins'].iteritems():
72 | cabin_code = cabin['code']
73 |
74 | for fare_class, seat_availability in cabin['fares'].iteritems():
75 | if seat_availability == "":
76 | seat_availability = 0
77 | else:
78 | seat_availability = int(seat_availability)
79 |
80 | seat = Seat(cabin_code=cabin_code, fare_class=fare_class, availability=seat_availability)
81 | seats.append(seat)
82 |
83 | flight.seats = seats
84 | flight_list.append(flight)
85 |
86 | return flight_list
87 |
88 | def search_seats(self):
89 | params = urllib.urlencode(self.__params)
90 |
91 | request_url = self.__base_url + self.__request_uri +("?%s" % params)
92 | self.__logger.info('Requesting URL: %s' % (request_url))
93 | self.__driver.get(request_url)
94 | self.__logger.info('Running Javascript to retrieve available routes')
95 | result = self.__driver.execute_script('return JSON.stringify(availRoutes)')
96 | j = json.loads(unicode(result))
97 |
98 | flight_list = self.__extract_flights_with_seats(j)
99 | self.__logger.info('Saving SeatQuery to Database')
100 | seat_query = SeatQuery(flights=flight_list)
101 | seat_query.save()
102 | self.__logger.info('Quiting the Web Driver')
103 | self.__driver.quit
104 |
105 | return flight_list
106 |
--------------------------------------------------------------------------------
/flight_scraper/engines/ita_matrix/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mayanez'
2 |
--------------------------------------------------------------------------------
/flight_scraper/engines/ita_matrix/driver.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import datetime
4 | import re
5 | import requests
6 | from abc import abstractmethod
7 | from flight_scraper.solution_model import Flight, Itinerary, CalendarSolution, TripMinimumPrice
8 | from flight_scraper.solution_model import ItaSolution, ItaItinerary, PriceComponent
9 |
10 | logging.basicConfig(level=logging.INFO)
11 |
12 | class AbstractItaMatrixDriver(object):
13 |
14 | _logger = logging.getLogger(__name__)
15 | engine = "ITA Matrix"
16 | _base_url = "http://matrix.itasoftware.com"
17 | _request_uri = "/xhr/shop/search?"
18 | _http_header = {
19 | 'Host': 'matrix.itasoftware.com',
20 | 'Content-Type': 'application/x-www-form-urlencoded',
21 | 'Cache-Control': 'no-cache',
22 | 'Content-Length': '0'
23 | }
24 |
25 | def __init__(self, origin, destination, depart_date, return_date, max_stops, airlines):
26 | self.origin = origin
27 | self.destination = destination
28 | self.depart_date = depart_date
29 | self.return_date = return_date
30 | self.max_stops = max_stops
31 | self.airlines = airlines
32 |
33 | @property
34 | def origin(self):
35 | return self._json_request['slices'][0]['origins'][0]
36 |
37 | @origin.setter
38 | def origin(self, origin):
39 | self._json_request['slices'][0]['origins'][0] = origin
40 | self._json_request['slices'][1]['destinations'][0] = origin
41 |
42 | @property
43 | def destination(self):
44 | return self._json_request['slices'][0]['destinations'][0]
45 |
46 | @destination.setter
47 | def destination(self, destination):
48 | self._json_request['slices'][0]['destinations'][0] = destination
49 | self._json_request['slices'][1]['origins'][0] = destination
50 |
51 | @property
52 | def max_stops(self):
53 | return self._json_request['maxStopCount']
54 |
55 | @max_stops.setter
56 | def max_stops(self, stops):
57 | if stops is None:
58 | stops = 2
59 | self._json_request['maxStopCount'] = stops
60 |
61 | @property
62 | def airlines(self):
63 | return self._json_request['slices'][0]['routeLanguage']
64 |
65 | @airlines.setter
66 | def airlines(self, airlines):
67 | if airlines is not None:
68 | self._json_request['slices'][0]['routeLanguage'] = airlines
69 | self._json_request['slices'][1]['routeLanguage'] = airlines
70 |
71 | def build_request_url(self):
72 | data = self._base_request + json.dumps(self._json_request)
73 | request_url = self._base_url + self._request_uri + data
74 | print 'Request URl: %s' % (request_url)
75 | return request_url
76 |
77 | def build_solutions(self):
78 | request_url = self.build_request_url()
79 |
80 | self._logger.info('Making request to ITA Matrix: %s', (request_url))
81 | response = requests.post(request_url, headers=self._http_header)
82 | response_json = json.loads(response.text[4:])
83 |
84 | print response_json
85 | self._logger.info('Creating objects to insert to database')
86 | return self._parse_solutions(response_json)
87 |
88 | @abstractmethod
89 | def _parse_solutions(self):
90 | raise NotImplementedError('Subclasses must implement _parse_solutions')
91 |
92 | class Slice(object):
93 | def __init__(self, origin, destination, depart_date, airlines=None):
94 | self._json_request = json.loads('{"origins":["PDX"],"originPreferCity":false,"commandLine":"airlines AA DL AS UA",\
95 | "destinations":["SEA"],"destinationPreferCity":false,"date":"2013-06-07","isArrivalDate":false,\
96 | "dateModifier":{"minus":0,"plus":0}}')
97 | self.origin = origin
98 | self.destination = destination
99 | self.depart_date = depart_date
100 | self._airlines = None
101 | self.airlines = airlines
102 |
103 | @property
104 | def origin(self):
105 | return self._json_request['origins'][0]
106 |
107 | @origin.setter
108 | def origin(self, origin):
109 | self._json_request['origins'][0] = origin
110 |
111 | @property
112 | def destination(self):
113 | return self._json_request['destinations'][0]
114 |
115 | @destination.setter
116 | def destination(self, destination):
117 | self._json_request['destinations'][0] = destination
118 |
119 | @property
120 | def depart_date(self):
121 | return datetime.datetime.strptime(self._json_request['date'], "%Y-%m-%d")
122 |
123 | @depart_date.setter
124 | def depart_date(self, depart_date):
125 | self._json_request['date'] = depart_date.strftime('%Y-%m-%d')
126 |
127 | @property
128 | def airlines(self):
129 | return ' '.join(self._airlines)
130 |
131 | # return self._airlines
132 | # return self._json_request['commandLine']
133 |
134 | @airlines.setter
135 | def airlines(self, airlines):
136 | if airlines is None:
137 | self._airlines = list()
138 | return
139 |
140 | self._airlines = [x.strip() for x in re.split('[ ,]', airlines) if x.strip()]
141 |
142 | if airlines is None:
143 | self._json_request['commandLine'] = ""
144 | else:
145 | self._json_request['commandLine'] = "airlines %s" % airlines
146 |
147 | def _build_command_line(self):
148 | route_lang = ""
149 | if (len(self._airlines) > 0):
150 | route_lang = "%s airlines %s" % (route_lang, self.airlines)
151 |
152 | self._json_request['commandLine'] = route_lang
153 |
154 |
155 | class ItaMatrixDriverMulti(AbstractItaMatrixDriver):
156 | _base_request = "name=specificDates&summarizers=carrierStopMatrix"\
157 | "%2CcurrencyNotice%2CsolutionList%2CitineraryPriceSlider%2C"\
158 | "itineraryCarrierList%2CitineraryDepartureTimeRanges%2CitineraryArrivalTimeRanges"\
159 | "%2CdurationSliderItinerary%2CitineraryOrigins%2CitineraryDestinations%2C"\
160 | "itineraryStopCountList%2CwarningsItinerary&format=JSON&inputs="
161 |
162 | _json_request = json.loads('{"slices":[],"pax":{"adults":1},"cabin":"COACH","maxStopCount":0,\
163 | "changeOfAirport":false,"checkAvailability":true,"page":{"size":2000},"sorts":"default"}')
164 | def __init__(self, max_stops):
165 | self.slices = list()
166 | self.max_stops = max_stops
167 |
168 | def add_slice(self, slice):
169 | self.slices.append(slice)
170 |
171 | def add_slice_params(self, origin, destination, depart_date, airlines=None):
172 | self.slices.append(Slice(origin, destination, depart_date, airlines))
173 |
174 | # TODO: These isn't needed anymore. It's just a hack to get the _parse_solutions method working.
175 | @property
176 | def depart_date(self):
177 | return datetime.datetime.strptime(self._json_request['slices'][0]['date'], "%Y-%m-%d")
178 | # TODO: These isn't needed anymore. It's just a hack to get the _parse_solutions method working.
179 | @property
180 | def return_date(self):
181 | return datetime.datetime.strptime(self._json_request['slices'][-1]['date'], "%Y-%m-%d")
182 |
183 | @property
184 | def max_stops(self):
185 | return self._json_request['maxStopCount']
186 |
187 | @max_stops.setter
188 | def max_stops(self, stops):
189 | if stops is None:
190 | stops = 2
191 | self._json_request['maxStopCount'] = stops
192 |
193 | def combine_slices(self):
194 | self._json_request['slices'] = []
195 | for slice in self.slices:
196 | slice._build_command_line()
197 | self._json_request['slices'].append(slice._json_request)
198 |
199 | def build_solutions(self):
200 | self.combine_slices()
201 | return super(ItaMatrixDriverMulti, self).build_solutions()
202 |
203 | def _parse_solutions(self, response_json):
204 | """
205 | Builds search solution. Adds to MongoDB and returns the Solution object.
206 | FIXME: This method currently assumes direct point-to-point flights.
207 | """
208 | solution = ItaSolution(engine=self.engine, origin=self.slices[0].origin, destination=self.slices[0].destination, depart_date=self.depart_date, return_date=self.return_date)
209 | solution.min_price = response_json['result']['solutionList']['minPrice']
210 | solution.session = response_json['result']['session']
211 | solution.solution_set = response_json['result']['solutionSet']
212 |
213 | for sol in response_json['result']['solutionList']['solutions']:
214 | itinerary_id = sol['id']
215 | flight_list = list()
216 | for slice in sol['itinerary']['slices']:
217 | # FIXME: Connecting flights aren't considered; number of flights not considered.
218 | flight_airline = slice['flights'][0][:2]
219 | flight_number = int(slice['flights'][0][2:])
220 | # FIXME: UTC time might be important
221 | dep_time = datetime.datetime.strptime(slice['departure'][:-6], "%Y-%m-%dT%H:%M")
222 | arr_time = datetime.datetime.strptime(slice['arrival'][:-6], "%Y-%m-%dT%H:%M")
223 | arr_city = slice['destination']['code']
224 | dep_city = slice['origin']['code']
225 |
226 | flight = Flight(airline=flight_airline, fno=flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time)
227 | flight.save()
228 |
229 | flight_list.append(flight)
230 |
231 | price = sol['displayTotal']
232 | price_per_mile = sol['ext']['pricePerMile']
233 | distance = sol['itinerary']['distance']['value']
234 | itinerary = ItaItinerary(flights=flight_list, price=price, price_per_mile=price_per_mile, ext_id=itinerary_id, distance=distance)
235 | solution.itineraries.append(itinerary)
236 |
237 | solution.save()
238 |
239 | return solution
240 |
241 | class ItaMatrixDriver(AbstractItaMatrixDriver):
242 |
243 | _base_request = "name=specificDates&summarizers=carrierStopMatrix"\
244 | "%2CcurrencyNotice%2CsolutionList%2CitineraryPriceSlider%2C"\
245 | "itineraryCarrierList%2CitineraryDepartureTimeRanges%2CitineraryArrivalTimeRanges"\
246 | "%2CdurationSliderItinerary%2CitineraryOrigins%2CitineraryDestinations%2C"\
247 | "itineraryStopCountList%2CwarningsItinerary&format=JSON&inputs="
248 |
249 | _json_request = json.loads('{"slices":[{"origins":["PDX"],"originPreferCity":false,"commandLine":"airlines AA DL AS UA",\
250 | "destinations":["SEA"],"destinationPreferCity":false,"date":"2013-06-07","isArrivalDate":false,\
251 | "dateModifier":{"minus":0,"plus":0}},{"destinations":["PDX"],"destinationPreferCity":false,\
252 | "origins":["SEA"],"originPreferCity":false,"commandLine":"airlines AA DL AS","date":"2013-06-09",\
253 | "isArrivalDate":false,"dateModifier":{"minus":0,"plus":0}}],"pax":{"adults":1},"cabin":"COACH","maxStopCount":0,\
254 | "changeOfAirport":false,"checkAvailability":true,"page":{"size":2000},"sorts":"default"}')
255 |
256 | def __init__(self, origin, destination, depart_date, return_date, max_stops=None, airlines=None):
257 | super(ItaMatrixDriver, self).__init__(origin, destination, depart_date, return_date, max_stops, airlines)
258 |
259 | @property
260 | def depart_date(self):
261 | return datetime.datetime.strptime(self._json_request['slices'][0]['date'], "%Y-%m-%d")
262 |
263 | @depart_date.setter
264 | def depart_date(self, depart_date):
265 | self._json_request['slices'][0]['date'] = depart_date.strftime('%Y-%m-%d')
266 |
267 | @property
268 | def return_date(self):
269 | return datetime.datetime.strptime(self._json_request['slices'][1]['date'], "%Y-%m-%d")
270 |
271 | @return_date.setter
272 | def return_date(self, return_date):
273 | self._json_request['slices'][1]['date'] = return_date.strftime('%Y-%m-%d')
274 |
275 | @property
276 | def airlines(self):
277 | return self._json_request['slices'][0]['commandLine']
278 |
279 | @airlines.setter
280 | def airlines(self, airlines):
281 | if airlines is None:
282 | self._json_request['commandLine'] = ""
283 | else:
284 | self._json_request['slices'][0]['commandLine'] = "airlines %s" % airlines
285 | self._json_request['slices'][1]['commandLine'] = "airlines %s" % airlines
286 |
287 | def _parse_solutions(self, response_json):
288 | """
289 | Builds search solution. Adds to MongoDB and returns the Solution object.
290 | FIXME: This method currently assumes direct point-to-point flights.
291 | """
292 | solution = ItaSolution(engine=self.engine, origin=self.origin, destination=self.destination, depart_date=self.depart_date, return_date=self.return_date)
293 | solution.min_price = response_json['result']['solutionList']['minPrice']
294 | solution.session = response_json['result']['session']
295 | solution.solution_set = response_json['result']['solutionSet']
296 |
297 | for sol in response_json['result']['solutionList']['solutions']:
298 | itinerary_id = sol['id']
299 | origin_flight_airline = sol['itinerary']['slices'][0]['flights'][0][:2]
300 | origin_flight_number = int(sol['itinerary']['slices'][0]['flights'][0][2:])
301 | dep_time = datetime.datetime.strptime(sol['itinerary']['slices'][0]['departure'][:-6], "%Y-%m-%dT%H:%M")
302 | arr_time = datetime.datetime.strptime(sol['itinerary']['slices'][0]['arrival'][:-6], "%Y-%m-%dT%H:%M")
303 | arr_city = sol['itinerary']['slices'][0]['destination']['code']
304 | dep_city = sol['itinerary']['slices'][0]['origin']['code']
305 |
306 | origin_flight = Flight(airline=origin_flight_airline, fno=origin_flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time)
307 | origin_flight.save()
308 |
309 | return_flight_airline = sol['itinerary']['slices'][1]['flights'][0][:2]
310 | return_flight_number = int(sol['itinerary']['slices'][1]['flights'][0][2:])
311 | dep_time = datetime.datetime.strptime(sol['itinerary']['slices'][1]['departure'][:-6], "%Y-%m-%dT%H:%M")
312 | arr_time = datetime.datetime.strptime(sol['itinerary']['slices'][1]['arrival'][:-6], "%Y-%m-%dT%H:%M")
313 | arr_city = sol['itinerary']['slices'][1]['destination']['code']
314 | dep_city = sol['itinerary']['slices'][1]['origin']['code']
315 |
316 | return_flight = Flight(airline=return_flight_airline, fno=return_flight_number, dep_city=dep_city, arr_city=arr_city, dep_time=dep_time, arr_time=arr_time)
317 | return_flight.save()
318 |
319 | flight_list = [origin_flight, return_flight]
320 | price = sol['displayTotal']
321 | itinerary = ItaItinerary(flights=flight_list, price=price, ext_id=itinerary_id)
322 | solution.itineraries.append(itinerary)
323 |
324 | solution.save()
325 |
326 | return solution
327 |
328 | class CalendarItaMatrixDriver(AbstractItaMatrixDriver):
329 |
330 | _base_request = "name=calendar&summarizers=currencyNotice%2CovernightFlightsCalendar"\
331 | "%2CitineraryStopCountList%2CitineraryCarrierList%2Ccalendar&format=JSON&inputs="
332 |
333 | _json_request = json.loads('{"slices":[{"origins":["BWI"],"originPreferCity":false,"routeLanguage":"C:DL","destinations":["MSP"],\
334 | "destinationPreferCity":false},{"destinations":["BWI"],"destinationPreferCity":false,"origins":["MSP"],\
335 | "originPreferCity":false,"routeLanguage":"C:DL"}],"startDate":"2014-07-01","layover":{"max":5,"min":4},\
336 | "pax":{"adults":1},"cabin":"COACH","maxStopCount":0,"changeOfAirport":false,"checkAvailability":true,\
337 | "firstDayOfWeek":"SUNDAY","endDate":"2014-08-01"}')
338 |
339 | def __init__(self, origin, destination, depart_date, return_date, day_range, max_stops=None, airlines=None):
340 | super(CalendarItaMatrixDriver, self).__init__(origin, destination, depart_date, return_date, max_stops, airlines)
341 | self.day_range = day_range
342 |
343 | @property
344 | def depart_date(self):
345 | return datetime.datetime.strptime(self._json_request['startDate'], "%Y-%m-%d")
346 |
347 | @depart_date.setter
348 | def depart_date(self, depart_date):
349 | self._json_request['startDate'] = depart_date.strftime('%Y-%m-%d')
350 |
351 | @property
352 | def return_date(self):
353 | return datetime.datetime.strptime(self._json_request['endDate'], "%Y-%m-%d")
354 |
355 | @return_date.setter
356 | def return_date(self, return_date):
357 | self._json_request['endDate'] = return_date.strftime('%Y-%m-%d')
358 |
359 | @property
360 | def day_range(self):
361 | return self._json_request['layover']
362 |
363 | @day_range.setter
364 | def day_range(self, days):
365 | self._json_request['layover'] = {'min': days[0], 'max': days[1]}
366 |
367 | def _parse_solutions(self, response_json):
368 | self._logger.info('Creating objects to insert to database')
369 | solution = CalendarSolution(engine=self.engine, origin=self.origin, destination=self.destination,
370 | depart_date=self.depart_date, return_date=self.return_date)
371 |
372 | prices = []
373 | for month in response_json['result']['calendar']['months']:
374 | for week in month['weeks']:
375 | for day in week['days']:
376 | if day['solutionCount'] == 0:
377 | continue
378 | for sol in day['tripDuration']['options']:
379 |
380 | dep_time = datetime.datetime.strptime(sol['solution']['slices'][0]['departure'][:10], "%Y-%m-%d").date()
381 | arr_time = datetime.datetime.strptime(sol['solution']['slices'][1]['departure'][:10], "%Y-%m-%d").date()
382 | price = sol['minPrice']
383 | trip = TripMinimumPrice(dep_city=self.origin, arr_city=self.destination, dep_time=dep_time, arr_time=arr_time, price=price)
384 | prices.append(float(price.replace('USD', ''))) #FIXME: Can't assume USD
385 |
386 | solution.trip_prices.append(trip)
387 |
388 | solution.min_price = str(min(prices))
389 | solution.save()
390 |
391 | return solution
392 |
393 | class ViewItineraryDriver(object):
394 |
395 | _logger = logging.getLogger(__name__)
396 | engine = "ITA Matrix"
397 | _base_url = "http://matrix.itasoftware.com"
398 | _request_uri = "/xhr/shop/summarize?"
399 | _http_header = {
400 | 'Host': 'matrix.itasoftware.com',
401 | 'Content-Type': 'application/x-www-form-urlencoded',
402 | 'Cache-Control': 'no-cache',
403 | 'Content-Length': '0'
404 | }
405 |
406 | _base_request = "solutionSet="\
407 | "&session="\
408 | "&summarizers=currencyNotice%2CbookingDetails"\
409 | "&format=JSON"\
410 | "&inputs="
411 |
412 | _json_request = json.loads('{"slices":[{"origins":["VRN"],"originPreferCity":false,"commandLine":"airlines AA BA DL",\
413 | "destinations":["SEA","YVR"],"destinationPreferCity":false,"date":"2014-10-20","isArrivalDate":false,\
414 | "dateModifier":{"minus":0,"plus":0}},{"origins":["YVR","SEA"],"originPreferCity":false,"routeLanguage":"X+",\
415 | "destinations":["VRN"],"destinationPreferCity":false,"date":"2014-11-07","isArrivalDate":false,\
416 | "dateModifier":{"minus":0,"plus":0}}],\
417 | "pax":{"adults":1,"children":0,"seniors":0,"infantsInSeat":0,"youth":0,"infantsInLap":0},\
418 | "cabin":"COACH","changeOfAirport":true,"checkAvailability":true,"currency":"USD","salesCity":"MIL",\
419 | "page":{"size":30},"sorts":"default","solution":"0EI4mYoNuxZ6UnAPrvqq47B/Jpt7IVb5Qv8NBWx8cO9e4K004"}')
420 |
421 | def __init__(self, itinerary, session, solutionSet):
422 | self.slices = list()
423 | self.itinerary = itinerary
424 | self.session = session
425 | self.solutionSet = solutionSet
426 | pass
427 |
428 | @property
429 | def session(self):
430 | return self._session
431 | @session.setter
432 | def session(self, session):
433 | self._session = session
434 | self._base_request = re.sub('session=[^&]*', "session=%s" % session, self._base_request)
435 |
436 | @property
437 | def solutionSet(self):
438 | return self._solutionSet
439 | @solutionSet.setter
440 | def solutionSet(self, solutionSet):
441 | self._solutionSet = solutionSet
442 | self._base_request = re.sub('solutionSet=[^&]*', "solutionSet=%s" % solutionSet, self._base_request)
443 |
444 | @property
445 | def itinerary(self):
446 | return self._itinerary
447 | @itinerary.setter
448 | def itinerary(self, itinerary):
449 | self._itinerary = itinerary
450 | self.itinerary_to_slices(itinerary)
451 |
452 | def itinerary_to_slices(self, itinerary):
453 | for flight in itinerary.flights:
454 | self.slices.append(Slice(flight.dep_city, flight.arr_city, flight.dep_time, flight.airline))
455 |
456 | def _build_session_handle(self):
457 | self._json_request['solution'] = "%s/%s" % (self.solutionSet, self.itinerary.ext_id)
458 |
459 | def build_request_url(self):
460 | self._build_session_handle()
461 | data = self._base_request + json.dumps(self._json_request)
462 | request_url = self._base_url + self._request_uri + data
463 | print 'Request URL: %s' % (request_url)
464 | return request_url
465 |
466 | def build_itinerary_breakdown(self):
467 | request_url = self.build_request_url()
468 |
469 | self._logger.info('Making request to ITA Matrix: %s', (request_url))
470 | response = requests.post(request_url, headers=self._http_header)
471 | response_json = json.loads(response.text[4:])
472 |
473 | print response_json
474 | self._logger.info('Creating objects to insert to database')
475 | return self._parse_breakdown(response_json)
476 |
477 | def _parse_breakdown(self, response_json):
478 | # Base fares
479 | for base_fare in response_json['result']['bookingDetails']['tickets'][0]['pricings'][0]['fares']:
480 | rate_code = base_fare['code']
481 | price = base_fare['displayAdjustedPrice']
482 | key = base_fare['key']
483 | ori_city = base_fare['originCity']
484 | arr_city = base_fare['destinationCity']
485 |
486 | pc = PriceComponent(rate_code=rate_code, price=price, key=key, description="%s-%s" % (ori_city, arr_city))
487 | self.itinerary.base_fares.append(pc)
488 |
489 | # Taxes
490 | for tax_item in response_json['result']['bookingDetails']['tickets'][0]['pricings'][0]['ext']['taxTotals']:
491 | # {'code': 'US', 'tax': {'name': 'US Transportation Tax', 'key': '0/0'}, 'totalDisplayPrice': 'USD44.81'},
492 | rate_code = tax_item['code']
493 | price = tax_item['totalDisplayPrice']
494 | key = tax_item['tax']['key']
495 | description = tax_item['tax']['name']
496 |
497 | pc = PriceComponent(rate_code=rate_code, price=price, key=key, description=description)
498 | self.itinerary.taxes.append(pc)
499 |
500 | total_price = response_json['result']['bookingDetails']['tickets'][0]['displayPrice']
501 | distance = response_json['result']['bookingDetails']['itinerary']['distance']['value']
502 |
503 | self.itinerary.distance = distance
504 |
505 | return self.itinerary
--------------------------------------------------------------------------------
/flight_scraper/scraper.py:
--------------------------------------------------------------------------------
1 | from flight_scraper.solution_model import Solution, ItaSolution, CalendarSolution, SeatQuery, Itinerary, ItaItinerary
2 | from engines.ita_matrix.driver import ItaMatrixDriver, ItaMatrixDriverMulti, CalendarItaMatrixDriver, Slice, ViewItineraryDriver
3 | from datetime import date, timedelta
4 |
5 | class FlightScraper(object):
6 |
7 | def __init__(self, origin, destination, depart_date, return_date,
8 | max_stops=None, day_range=None, airlines=None):
9 | self.origin = origin
10 | self.destination = destination
11 | self.depart_date = depart_date
12 | self.return_date = return_date
13 | self.day_range = day_range
14 | self.max_stops = max_stops
15 | self.airlines = airlines
16 |
17 | def search_flights(self):
18 | ita_driver = ItaMatrixDriver(self.origin, self.destination, self.depart_date, self.return_date, self.max_stops, self.airlines)
19 | return ita_driver.build_solutions()
20 |
21 | def search_calendar(self):
22 | ita_driver = CalendarItaMatrixDriver(self.origin, self.destination, self.depart_date, self.return_date,
23 | day_range=self.day_range, max_stops=self.max_stops, airlines=self.airlines)
24 | return ita_driver.build_solutions()
25 |
26 | def minimum_trips(self):
27 | """
28 | Returns a CalendarSolution object from MongoDB
29 | """
30 | return CalendarSolution.objects(origin=self.origin, destination=self.destination,
31 | depart_date=self.depart_date, return_date=self.return_date)
32 |
33 | def solutions(self):
34 | """
35 | Returns a Solution object from MongoDB
36 | """
37 | return Solution.objects(depart_date=self.depart_date, return_date=self.return_date,
38 | origin=self.origin, destination=self.destination)
39 |
40 | def itineraries(self, flights_to_match):
41 | results = list()
42 | solutions = self.solutions()
43 |
44 | for sol in solutions:
45 | itineraries = sol.itineraries
46 | for itinerary in itineraries:
47 | flights = set(itinerary.flights)
48 | matched = flights.intersection(flights_to_match)
49 | if len(matched) > 0:
50 | results.append(itinerary)
51 |
52 | return results
53 |
54 | def __get_seats(self, date):
55 | seat_query = SeatQuery.objects(flights__dep_city=self.__origin, flights__arr_city=self.__destination, flights__dep_time=date)
56 | return seat_query
57 |
58 | def departure_seats(self):
59 | return self.__get_seats(self.__depart_date)
60 |
61 | def return_seats(self):
62 | return self.__get_seats(self.__return_date)
63 |
64 | class FlightScraperMulti(object):
65 |
66 | def __init__(self, max_stops=None):
67 | #self.origin = None
68 | #self.destination = None
69 | #self.depart_date = None
70 | #self.return_date = None
71 | #self.day_range = None
72 | self.max_stops = max_stops
73 | #self.airlines = None
74 | self._ita_driver = ItaMatrixDriverMulti(self.max_stops)
75 |
76 | def add_flight(self, origin, destination, depart_date, airlines=None):
77 | self._ita_driver.add_slice_params(origin, destination, depart_date, airlines)
78 |
79 | def search_flights(self):
80 | return self._ita_driver.build_solutions()
81 |
82 | def scrape_return():
83 | scraper = FlightScraper('SFO', 'SEA', date.today() + timedelta(days=30), date.today() + timedelta(days=47))
84 | solution = scraper.search_flights()
85 |
86 | return solution
87 |
88 | def scrape_multi():
89 | from datetime import date
90 | scraper = FlightScraperMulti()
91 | scraper.add_flight('SFO', 'SEA', date.today() + timedelta(days=30), airlines="AA DL AC")
92 | scraper.add_flight('SEA', 'PHX', date.today() + timedelta(days=40))
93 | scraper.add_flight('PHX', 'SFO', date.today() + timedelta(days=47))
94 | solution = scraper.search_flights()
95 |
96 | return solution
97 |
98 | def scrape_itinerary(solution, itinerary):
99 | ita_driver = ViewItineraryDriver(itinerary, solution.session, solution.solution_set)
100 | it_details = ita_driver.build_itinerary_breakdown()
101 |
102 | # Update in the solution
103 | return it_details
104 |
105 | if __name__=="__main__":
106 | import ConfigParser
107 | import mongoengine
108 |
109 | Config = ConfigParser.ConfigParser()
110 | if Config.read('flight_scraper.cfg')==[]:
111 | print "Please copy flight_scraper.cfg.example to flight_scraper.cfg"
112 | raise Exception('Could not read config file')
113 |
114 | try:
115 | host_string=Config.get("mongodb", "host")
116 | mongoengine.connect(Config.get("mongodb", "name"),host=host_string)
117 | except ConfigParser.NoOptionError:
118 | mongoengine.connect(Config.get("mongodb", "name"))
119 |
120 | solution = scrape_multi()
121 | #solution = ItaSolution.objects().limit(1).next()
122 | itinerary = solution.itineraries[-1]
123 |
124 | it_details = scrape_itinerary(solution, itinerary)
125 |
--------------------------------------------------------------------------------
/flight_scraper/solution_model.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import urllib
3 |
4 | from mongoengine import *
5 |
6 | class Seat(EmbeddedDocument):
7 | cabin_code = StringField()
8 | fare_class = StringField()
9 | availability = IntField()
10 |
11 | def __str__(self):
12 | return "cabin: %s fare: %s avail: %s" % (self.cabin_code, self.fare_class, self.availability)
13 |
14 | class Flight(Document):
15 | airline = StringField()
16 | fno = IntField()
17 | dep_city = StringField()
18 | arr_city = StringField()
19 | dep_time = DateTimeField()
20 | arr_time = DateTimeField()
21 | seats = ListField(EmbeddedDocumentField(Seat))
22 |
23 | def __str__(self):
24 | return "Flight: %s %s \n%s-%s\n%s - %s" % (self.airline, self.fno, self.dep_city, self.arr_city, self.dep_time, self.arr_time)
25 |
26 | def __repr__(self):
27 | return self.__str__
28 |
29 | def __eq__(self, other):
30 | return ((self.airline == other.airline) and (self.fno == other.fno))
31 |
32 | def __hash__(self):
33 | return hash((self.airline, self.fno))
34 |
35 | def seat_map(self):
36 | url = "http://www.seatguru.com/findseatmap/findseatmap.php?"
37 | params = { 'carrier':self.airline,
38 | 'flightno':self.fno,
39 | 'date':self.dep_time.strftime('%m-%d-%Y') }
40 | url = url + urllib.urlencode(params)
41 | return url
42 |
43 |
44 | class Itinerary(EmbeddedDocument):
45 | flights = ListField(ReferenceField(Flight))
46 | price = StringField()
47 | price_per_mile = StringField()
48 | ext_id = StringField(required=False)
49 |
50 | def __str__(self):
51 | return "Itinerary:\n \tPrice=%s\n \t%s" % (self.price, [str(f) for f in self.flights])
52 |
53 | def set_stop(self, conn_flight):
54 | return None
55 |
56 | meta = {'allow_inheritance': True}
57 |
58 | class PriceComponent(EmbeddedDocument):
59 | rate_code = StringField(required=True)
60 | price = StringField(required=True)
61 | key = StringField()
62 | description = StringField()
63 |
64 | class ItaItinerary(Itinerary):
65 | #flight_details = ListField(EmbeddedDocumentField(FlightDetails))
66 | taxes = ListField(EmbeddedDocumentField(PriceComponent))
67 | base_fares = ListField(EmbeddedDocumentField(PriceComponent))
68 | distance = IntField()
69 | # FIXME: all_flights contains all of the flight connections in the breakdown.
70 | all_flights = ListField(ReferenceField(Flight))
71 |
72 | class Solution(Document):
73 | query_date = DateTimeField(default=datetime.datetime.utcnow(), required=True)
74 | engine = StringField(required=True)
75 | origin = StringField(max_length=100, required=True)
76 | depart_date = DateTimeField()
77 | destination = StringField(max_length=100, required=True)
78 | return_date = DateTimeField()
79 | min_price = StringField(required=False)
80 | itineraries = ListField(EmbeddedDocumentField(Itinerary))
81 | session = StringField(required=False)
82 |
83 | meta = {'allow_inheritance': True}
84 |
85 | class ItaSolution(Solution):
86 | session = StringField(required=True)
87 | solution_set = StringField(required=True)
88 |
89 | class SeatQuery(Document):
90 | query_date = DateTimeField(default=datetime.datetime.utcnow(), required=True)
91 | flights = ListField(ReferenceField(Flight))
92 |
93 | class TripMinimumPrice(EmbeddedDocument):
94 | dep_city = StringField()
95 | arr_city = StringField()
96 | dep_time = DateTimeField()
97 | arr_time = DateTimeField()
98 | price = StringField()
99 |
100 | def __str__(self):
101 | return 'TripMinimimumPrice: %s->%s\n%s - %s: %s' % (self.dep_city, self.arr_city, self.dep_time, self.arr_time, self.price)
102 |
103 | def query(self):
104 | pass
105 |
106 | class CalendarSolution(Document):
107 | query_date = DateTimeField(default=datetime.datetime.utcnow(), required=True)
108 | engine = StringField(required=True)
109 | origin = StringField(max_length=100, required=True)
110 | destination = StringField(max_length=100, required=True)
111 | depart_date = DateTimeField()
112 | return_date = DateTimeField()
113 | min_price = StringField(required=False)
114 | trip_prices = ListField(EmbeddedDocumentField(TripMinimumPrice))
115 |
--------------------------------------------------------------------------------
/flight_scraper/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/flight_scraper/utils/__init__.py
--------------------------------------------------------------------------------
/flight_scraper/utils/graph.py:
--------------------------------------------------------------------------------
1 | import gviz_api
2 | from flight_scraper.utils.scraper import get_prices_by_query_dates
3 |
4 |
5 | def graph_prices(flight_scraper):
6 | """
7 | This function creates a Google Visualizations DataTable JSON object.
8 | It is then passed to the Google Visualizations API to be rendered.
9 | """
10 | description = {"query_date" : ("datetime", "Query Date"),
11 | "min_price" : ("number", "%s to %s" % (flight_scraper.depart_date, flight_scraper.return_date))}
12 |
13 | dates = list()
14 | dates.append(flight_scraper.depart_date)
15 | dates.append(flight_scraper.return_date)
16 |
17 | result = get_prices_by_query_dates(flight_scraper)
18 | data = list()
19 | for r in result:
20 | for p in result[r]:
21 | v = {"query_date" : r, "min_price" : p}
22 | data.append(v)
23 |
24 | data_table = gviz_api.DataTable(description)
25 | data_table.LoadData(data)
26 |
27 | return data_table.ToJSon(columns_order=("query_date", "min_price"), order_by="query_date")
28 |
29 | def graph_seats(origin, dest, dept_date):
30 | """ TODO: Refactor """
31 | #description = {"query_date" : ("datetime", "Query Date"),
32 | # "seat_avail" : ("number", "%s" % (dept_date))}
33 | #
34 | #seat_query = scraper.get_total_seat_availability(origin, dest, dept_date)
35 | #data = list()
36 | #for query_date, avail in seat_query.iteritems():
37 | # v = {"query_date" : query_date, "seat_avail" : avail}
38 | # data.append(v)
39 | #
40 | #data_table = gviz_api.DataTable(description)
41 | #data_table.LoadData(data)
42 | #
43 | #return data_table.ToJSon(columns_order=("query_date", "seat_avail"), order_by="query_date")
44 |
45 |
--------------------------------------------------------------------------------
/flight_scraper/utils/scraper.py:
--------------------------------------------------------------------------------
1 | from dateutil.rrule import *
2 | from dateutil.parser import *
3 |
4 |
5 |
6 | def search_seats(origin, dest, dep_date):
7 | """ TODO: Refactor """
8 | #dep_date = dep_date.strftime("%Y-%m-%d")
9 | #print "Searching %s -> %s : %s" % (origin, dest, dep_date)
10 | raise NotImplementedError('search_seats needs to be implemented')
11 |
12 | def generate_date_pairs(frequency, weekdays, start_date, until_date):
13 |
14 | until_date = until_date.strftime('%m-%d-%Y')
15 |
16 | dates = list(rrule(frequency, byweekday=weekdays, dtstart=start_date, until=parse(until_date)))
17 |
18 | date_pairs = list()
19 |
20 | i = 1
21 | for d in dates:
22 | #For first date in pair - DEPARTURE DATE
23 | if (i%2 != 0):
24 | p = list()
25 | p.append(d)
26 | #For second date in pair - RETURN DATE
27 | else:
28 | p.append(d)
29 | date_pairs.append(p)
30 | i += 1
31 |
32 | return date_pairs
33 |
34 | def get_prices_by_query_dates(flight_scraper):
35 | """ Returns a dict of all queried prices and query_dates for the depart_date & return_date. """
36 | result = dict()
37 | solutions = flight_scraper.solutions()
38 |
39 | for sol in solutions:
40 | query_date = sol.query_date
41 | min_price = float(sol.min_price[3:]) #gets rid of USD in string
42 |
43 | if (not result.has_key(query_date)):
44 | prices = list()
45 | prices.append(min_price)
46 | result[query_date] = prices
47 | else:
48 | result[query_date].append(min_price)
49 |
50 | return result
51 | def get_total_seat_availability(origin, dest, date):
52 | """ TODO: Refactor """
53 |
54 | #seat_availability = dict()
55 | #seat_query = get_seats(origin, dest, date)
56 | #
57 | #for query in seat_query:
58 | # flights = query.flights
59 | #
60 | # for flight in flights:
61 | # seats = flight.seats
62 | # for seat in seats:
63 | # if (not seat_availability.has_key(query.query_date)):
64 | # seat_availability[query.query_date] = seat.availability
65 | # else:
66 | # seat_availability[query.query_date] += seat.availability
67 | #
68 | #return seat_availability
69 | pass
70 |
71 | def get_min_price_itinerary(itineraries):
72 |
73 | return min(itineraries, key=lambda x: x.price)
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 |
3 | setup(
4 | name='flight_scraper',
5 | version='0.1',
6 | packages=['flight_scraper', 'flight_scraper.utils', 'flight_scraper.engines', 'automation'],
7 | url='',
8 | license='',
9 | author='mayanez',
10 | author_email='',
11 | description=''
12 | )
13 |
--------------------------------------------------------------------------------
/static/ico/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mayanez/flight_scraper/17c5f202df0c443740e4f0a428dcc335a838a85c/static/ico/favicon.ico
--------------------------------------------------------------------------------
/templates/base.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% block head %}
5 | {% block title %}{% endblock %}
6 |
7 |
8 |
9 | {% endblock %}
10 |
11 |
12 | {% block body %}
13 | {% endblock %}
14 |
15 |
--------------------------------------------------------------------------------
/templates/calendar_query.html:
--------------------------------------------------------------------------------
1 |
5 | {% extends "base.html" %}
6 |
7 | {%block head%}
8 | {{super()}}
9 |
10 |
16 | {% endblock %}
17 |
18 | {% block body %}
19 |
20 |
21 |
22 |
23 | Query Date |
24 | Min Price |
25 |
26 |
27 | {{result.query_date}} |
28 | {{result.min_price}} |
29 |
30 |
31 |
32 |
33 | Price |
34 | Departure Date |
35 | Arrival Date |
36 | Query Flights |
37 |
38 |
39 | {% for trip in result.trip_prices %}
40 |
41 | {{trip.price}} |
42 | {{trip.dep_time}} |
43 | {{trip.arr_time}} |
44 | Query Flights |
45 |
46 | {% endfor %}
47 |
48 |
49 |
50 | {% endblock %}
51 |
52 |
--------------------------------------------------------------------------------
/templates/graph.html:
--------------------------------------------------------------------------------
1 |
5 | {% extends "base.html" %}
6 |
7 | {%block head%}
8 | {{super()}}
9 |
10 |
11 |
14 |
28 |
29 |
35 | {% endblock %}
36 |
37 | {% block body %}
38 |
39 |
40 | {{lengthSol}}
41 |
42 |
43 | {% for sol in solutions %}
44 |
45 |
46 |
47 | Query Date |
48 | Min Price |
49 |
50 |
51 | {{sol.query_date}} |
52 | {{sol.min_price}} |
53 |
54 |
55 |
56 |
57 | Price |
58 | Airline |
59 | Flight No |
60 | Dept Time |
61 | Seat Map |
62 | Airline |
63 | Flight No |
64 | Dept Time |
65 | Seat Map |
66 |
67 |
68 | {% for itinerary in sol.itineraries %}
69 |
70 | {{itinerary.price}} |
71 | {% for flight in itinerary.flights %}
72 | {{flight.airline}} |
73 | {{flight.fno}} |
74 | {{flight.dep_time}} |
75 | SeatGuru |
76 | {% endfor %}
77 |
78 | {% endfor %}
79 |
80 |
81 | {% endfor %}
82 |
83 |
84 | {% endblock %}
85 |
86 |
--------------------------------------------------------------------------------
/templates/graph_seats.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {%block head%}
4 | {{super()}}
5 |
6 |
7 |
10 |
24 |
25 | {% endblock %}
26 |
27 | {% block body %}
28 |
29 |
30 |
31 |
32 | {% endblock %}
--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | {%block title%} Flight Scraper {% endblock %}
3 | {%block head%}
4 | {{super()}}
5 |
13 | {%endblock%}
14 | {% block body %}
15 | Flight Scraper
16 |
17 | Flight Search
18 |
42 |
43 | Flight Search - Calendar
44 |
56 |
57 | Graph Flights
58 |
67 |
68 | Find Seats
69 |
77 |
78 | {% endblock %}
79 |
--------------------------------------------------------------------------------
/templates/query.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block body %}
4 |
5 |
6 |
7 |
8 | {% for date_query in result %}
9 |
10 |
11 |
12 | Departure Date |
13 | Arrival Date |
14 |
15 |
16 | {{date_query[0]}} |
17 | {{date_query[1]}} |
18 |
19 |
20 |
21 |
22 | Price |
23 | Airline |
24 | Flight No |
25 | Dept Time |
26 | Seat Map |
27 | Airline |
28 | Flight No |
29 | Dept Time |
30 | Seat Map |
31 |
32 |
33 | {% for itinerary in date_query[2].itineraries %}
34 |
35 | {{itinerary.price}} |
36 | {% for flight in itinerary.flights %}
37 | {{flight.airline}} |
38 | {{flight.fno}} |
39 | {{flight.dep_time}} |
40 | SeatGuru |
41 | {% endfor %}
42 |
43 | {% endfor %}
44 |
45 |
46 | {% endfor %}
47 |
48 |
49 | {% endblock %}
--------------------------------------------------------------------------------
/templates/seats.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {%block body%}
4 |
5 | {%for flight in flights%}
6 | - {{flight.airline}} {{flight.fno}}
7 |
8 | {%for seat in flight.seats %}
9 | - {{seat.fare_class}} {{seat.availability}}
10 | {%endfor%}
11 |
12 | {%endfor%}
13 |
14 | {%endblock%}
--------------------------------------------------------------------------------
/test/sample_ita_itinerary.json:
--------------------------------------------------------------------------------
1 | {
2 | "result": {
3 | "id": "sx7pjCTUnbu8kuOP50KYR9",
4 | "session": "6W60KYR6zSjTkicN7WgwOA92n",
5 | "solutionCount": 500,
6 | "solutionSet": "0EI4mYoNuxZ6UnAPrvqq47B",
7 | "currencyNotice": {
8 | "ext": {
9 | "price": "USD862.00"
10 | }
11 | },
12 | "bookingDetails": {
13 | "displayTotal": "USD861.40",
14 | "id": "Jpt7IVb5Qv8NBWx8cO9e4K004",
15 | "passengerCount": 1,
16 | "co2Emissions": {
17 | "estimate": 1761
18 | },
19 | "ext": {
20 | "totalPrice": "USD861.40"
21 | },
22 | "itinerary": {
23 | "distance": {
24 | "units": "MI",
25 | "value": 11725
26 | },
27 | "slices": [
28 | {
29 | "arrival": "2014-10-20T23:05-07:00",
30 | "departure": "2014-10-20T11:55+02:00",
31 | "stopCount": 2,
32 | "destination": {
33 | "code": "SEA",
34 | "name": "Seattle\/Tacoma Sea\/Tac",
35 | "city": {
36 | "name": "Seattle"
37 | }
38 | },
39 | "ext": {
40 | "warnings": "CHANGE_OF_AIRPORT_SLICE,LONG_LAYOVER,CHANGE_OF_TERMINAL,RISKY_CONNECTION"
41 | },
42 | "origin": {
43 | "code": "VRN",
44 | "name": "Verona Villafranca",
45 | "city": {
46 | "name": "Verona"
47 | }
48 | },
49 | "segments": [
50 | {
51 | "arrival": "2014-10-20T12:50+01:00",
52 | "departure": "2014-10-20T11:55+02:00",
53 | "duration": 115,
54 | "bookingInfos": [
55 | {
56 | "bookingCode": "Q",
57 | "cabin": "COACH"
58 | }
59 | ],
60 | "carrier": {
61 | "code": "BA",
62 | "shortName": "British Airways p.l.c."
63 | },
64 | "connection": {
65 | "changeOfAirport": true,
66 | "changeOfTerminal": true,
67 | "duration": 180
68 | },
69 | "destination": {
70 | "code": "LGW",
71 | "city": {
72 | "name": "London"
73 | }
74 | },
75 | "flight": {
76 | "number": 2597
77 | },
78 | "legs": [
79 | {
80 | "arrival": "2014-10-20T12:50+01:00",
81 | "departure": "2014-10-20T11:55+02:00",
82 | "duration": 115,
83 | "aircraft": {
84 | "shortName": "Boeing 737"
85 | },
86 | "destination": {
87 | "code": "LGW",
88 | "name": "London Gatwick",
89 | "city": {
90 | "name": "London"
91 | }
92 | },
93 | "origin": {
94 | "code": "VRN",
95 | "city": {
96 | "name": "Verona"
97 | }
98 | },
99 | "services": [
100 | {
101 | "amenities": [
102 | "Non-smoking"
103 | ],
104 | "meals": [
105 | "Snack or Brunch"
106 | ]
107 | }
108 | ]
109 | }
110 | ],
111 | "origin": {
112 | "code": "VRN",
113 | "city": {
114 | "name": "Verona"
115 | }
116 | }
117 | },
118 | {
119 | "arrival": "2014-10-20T18:25-05:00",
120 | "departure": "2014-10-20T15:50+01:00",
121 | "duration": 515,
122 | "bookingInfos": [
123 | {
124 | "bookingCode": "Q",
125 | "cabin": "COACH"
126 | }
127 | ],
128 | "carrier": {
129 | "code": "BA",
130 | "shortName": "British Airways p.l.c."
131 | },
132 | "connection": {
133 | "changeOfTerminal": true,
134 | "duration": 140
135 | },
136 | "destination": {
137 | "code": "ORD",
138 | "city": {
139 | "name": "Chicago"
140 | }
141 | },
142 | "flight": {
143 | "number": 297
144 | },
145 | "legs": [
146 | {
147 | "arrival": "2014-10-20T18:25-05:00",
148 | "departure": "2014-10-20T15:50+01:00",
149 | "duration": 515,
150 | "aircraft": {
151 | "shortName": "Boeing 777"
152 | },
153 | "destination": {
154 | "code": "ORD",
155 | "name": "Chicago O'Hare",
156 | "city": {
157 | "name": "Chicago"
158 | }
159 | },
160 | "origin": {
161 | "code": "LHR",
162 | "city": {
163 | "name": "London"
164 | }
165 | },
166 | "services": [
167 | {
168 | "amenities": [
169 | "Non-smoking"
170 | ],
171 | "meals": [
172 | "Meal"
173 | ]
174 | }
175 | ]
176 | }
177 | ],
178 | "origin": {
179 | "code": "LHR",
180 | "city": {
181 | "name": "London"
182 | }
183 | }
184 | },
185 | {
186 | "arrival": "2014-10-20T23:05-07:00",
187 | "codeshare": true,
188 | "departure": "2014-10-20T20:45-05:00",
189 | "duration": 260,
190 | "bookingInfos": [
191 | {
192 | "bookingCode": "Q",
193 | "cabin": "COACH"
194 | }
195 | ],
196 | "carrier": {
197 | "code": "BA",
198 | "shortName": "British Airways p.l.c."
199 | },
200 | "destination": {
201 | "code": "SEA",
202 | "city": {
203 | "name": "Seattle"
204 | }
205 | },
206 | "ext": {
207 | "operationalDisclosure": "OPERATED BY AMERICAN AIRLINES INC."
208 | },
209 | "flight": {
210 | "number": 5761
211 | },
212 | "legs": [
213 | {
214 | "arrival": "2014-10-20T23:05-07:00",
215 | "departure": "2014-10-20T20:45-05:00",
216 | "duration": 260,
217 | "aircraft": {
218 | "shortName": "Boeing 737"
219 | },
220 | "destination": {
221 | "code": "SEA",
222 | "name": "Seattle\/Tacoma Sea\/Tac",
223 | "city": {
224 | "name": "Seattle"
225 | }
226 | },
227 | "origin": {
228 | "code": "ORD",
229 | "city": {
230 | "name": "Chicago"
231 | }
232 | },
233 | "services": [
234 | {
235 | "amenities": [
236 | "Non-smoking"
237 | ]
238 | }
239 | ]
240 | }
241 | ],
242 | "origin": {
243 | "code": "ORD",
244 | "city": {
245 | "name": "Chicago"
246 | }
247 | }
248 | }
249 | ]
250 | },
251 | {
252 | "arrival": "2014-11-09T10:25+01:00",
253 | "departure": "2014-11-07T14:05-08:00",
254 | "stopCount": 1,
255 | "destination": {
256 | "code": "VRN",
257 | "name": "Verona Villafranca",
258 | "city": {
259 | "name": "Verona"
260 | }
261 | },
262 | "ext": {
263 | "warnings": "LONG_LAYOVER,CHANGE_OF_AIRPORT_SLICE,OVERNIGHT"
264 | },
265 | "origin": {
266 | "code": "SEA",
267 | "name": "Seattle\/Tacoma Sea\/Tac",
268 | "city": {
269 | "name": "Seattle"
270 | }
271 | },
272 | "segments": [
273 | {
274 | "arrival": "2014-11-08T07:30+00:00",
275 | "departure": "2014-11-07T14:05-08:00",
276 | "duration": 565,
277 | "bookingInfos": [
278 | {
279 | "bookingCode": "O",
280 | "cabin": "COACH"
281 | }
282 | ],
283 | "carrier": {
284 | "code": "BA",
285 | "shortName": "British Airways p.l.c."
286 | },
287 | "connection": {
288 | "changeOfAirport": true,
289 | "changeOfTerminal": true,
290 | "duration": 1435
291 | },
292 | "destination": {
293 | "code": "LHR",
294 | "city": {
295 | "name": "London"
296 | }
297 | },
298 | "flight": {
299 | "number": 52
300 | },
301 | "legs": [
302 | {
303 | "arrival": "2014-11-08T07:30+00:00",
304 | "departure": "2014-11-07T14:05-08:00",
305 | "duration": 565,
306 | "aircraft": {
307 | "shortName": "Boeing 777"
308 | },
309 | "destination": {
310 | "code": "LHR",
311 | "name": "London Heathrow",
312 | "city": {
313 | "name": "London"
314 | }
315 | },
316 | "origin": {
317 | "code": "SEA",
318 | "city": {
319 | "name": "Seattle"
320 | }
321 | },
322 | "services": [
323 | {
324 | "amenities": [
325 | "Non-smoking"
326 | ],
327 | "meals": [
328 | "Meal"
329 | ]
330 | }
331 | ]
332 | }
333 | ],
334 | "origin": {
335 | "code": "SEA",
336 | "city": {
337 | "name": "Seattle"
338 | }
339 | }
340 | },
341 | {
342 | "arrival": "2014-11-09T10:25+01:00",
343 | "departure": "2014-11-09T07:25+00:00",
344 | "duration": 120,
345 | "bookingInfos": [
346 | {
347 | "bookingCode": "S",
348 | "cabin": "COACH"
349 | }
350 | ],
351 | "carrier": {
352 | "code": "BA",
353 | "shortName": "British Airways p.l.c."
354 | },
355 | "destination": {
356 | "code": "VRN",
357 | "city": {
358 | "name": "Verona"
359 | }
360 | },
361 | "flight": {
362 | "number": 2596
363 | },
364 | "legs": [
365 | {
366 | "arrival": "2014-11-09T10:25+01:00",
367 | "departure": "2014-11-09T07:25+00:00",
368 | "duration": 120,
369 | "aircraft": {
370 | "shortName": "Boeing 737"
371 | },
372 | "destination": {
373 | "code": "VRN",
374 | "name": "Verona Villafranca",
375 | "city": {
376 | "name": "Verona"
377 | }
378 | },
379 | "origin": {
380 | "code": "LGW",
381 | "city": {
382 | "name": "London"
383 | }
384 | },
385 | "services": [
386 | {
387 | "amenities": [
388 | "Non-smoking"
389 | ],
390 | "meals": [
391 | "Snack or Brunch"
392 | ]
393 | }
394 | ]
395 | }
396 | ],
397 | "origin": {
398 | "code": "LGW",
399 | "city": {
400 | "name": "London"
401 | }
402 | }
403 | }
404 | ]
405 | }
406 | ]
407 | },
408 | "pricings": [
409 | {
410 | "displayPrice": "USD861.40",
411 | "ext": {
412 | "totalPrice": "USD861.40",
413 | "pax": {
414 | "adults": 1
415 | }
416 | }
417 | }
418 | ],
419 | "tickets": [
420 | {
421 | "displayPrice": "USD861.40",
422 | "pricings": [
423 | {
424 | "displayPrice": "USD861.40",
425 | "ext": {
426 | "totalPrice": "USD861.40",
427 | "pax": {
428 | "adults": 1
429 | },
430 | "taxTotals": [
431 | {
432 | "code": "AY",
433 | "totalDisplayPrice": "USD11.20",
434 | "tax": {
435 | "key": "0\/0",
436 | "name": "US September 11th Security Fee"
437 | }
438 | },
439 | {
440 | "code": "XA",
441 | "totalDisplayPrice": "USD5.00",
442 | "tax": {
443 | "key": "0\/1",
444 | "name": "USDA APHIS Fee"
445 | }
446 | },
447 | {
448 | "code": "XY",
449 | "totalDisplayPrice": "USD7.00",
450 | "tax": {
451 | "key": "0\/2",
452 | "name": "US Immigration Fee"
453 | }
454 | },
455 | {
456 | "code": "YC",
457 | "totalDisplayPrice": "USD5.50",
458 | "tax": {
459 | "key": "0\/3",
460 | "name": "US Customs Fee"
461 | }
462 | },
463 | {
464 | "code": "UB",
465 | "totalDisplayPrice": "USD76.40",
466 | "tax": {
467 | "key": "0\/4",
468 | "name": "United Kingdom Passenger Service Charge"
469 | }
470 | },
471 | {
472 | "code": "US",
473 | "totalDisplayPrice": "USD17.50",
474 | "tax": {
475 | "key": "0\/5",
476 | "name": "US International Arrival Tax"
477 | }
478 | },
479 | {
480 | "code": "IT",
481 | "totalDisplayPrice": "USD8.00",
482 | "tax": {
483 | "key": "0\/6",
484 | "name": "Italian Embarkation Tax"
485 | }
486 | },
487 | {
488 | "code": "VT",
489 | "totalDisplayPrice": "USD4.10",
490 | "tax": {
491 | "key": "0\/7",
492 | "name": "Italian Security Charge"
493 | }
494 | },
495 | {
496 | "code": "EX",
497 | "totalDisplayPrice": "USD0.90",
498 | "tax": {
499 | "key": "0\/8",
500 | "name": "Italian Security Bag Charge"
501 | }
502 | },
503 | {
504 | "code": "HB",
505 | "totalDisplayPrice": "USD8.60",
506 | "tax": {
507 | "key": "0\/9",
508 | "name": "Italian Council City Tax"
509 | }
510 | },
511 | {
512 | "code": "MJ",
513 | "totalDisplayPrice": "USD1.20",
514 | "tax": {
515 | "key": "0\/10",
516 | "name": "Italian Passenger Service Charge"
517 | }
518 | },
519 | {
520 | "code": "YQ",
521 | "totalDisplayPrice": "USD256.00",
522 | "tax": {
523 | "key": "0\/11"
524 | }
525 | },
526 | {
527 | "code": "US",
528 | "totalDisplayPrice": "USD17.50",
529 | "tax": {
530 | "key": "0\/14",
531 | "name": "US International Departure Tax"
532 | }
533 | },
534 | {
535 | "code": "XF",
536 | "totalDisplayPrice": "USD4.50",
537 | "tax": {
538 | "key": "0\/16",
539 | "name": "US Passenger Facility Charge"
540 | }
541 | }
542 | ]
543 | },
544 | "fareCalculations": [
545 | {
546 | "lines": [
547 | "VRN BA X\/LON BA X\/E\/CHI BA SEA M 233.89QKX7SAL BA X\/LON BA VRN M 217.57OKW7SAL NUC 451.46 END ROE 0.735385 XT 0.90EX 8.60HB 8.00IT 1.20MJ 4.10VT 76.40UB 5.50YC 7.00XY 5.00XA 35.00US 11.20AY 256.00YQ 4.50XF SEA4.50"
548 | ]
549 | }
550 | ],
551 | "fares": [
552 | {
553 | "carrier": "BA",
554 | "code": "QKX7SAL",
555 | "destinationCity": "SEA",
556 | "displayAdjustedPrice": "USD226.91",
557 | "key": "0\/0",
558 | "originCity": "VRN",
559 | "tag": "ROUND-TRIP",
560 | "bookingInfos": [
561 | {
562 | "bookingCode": "Q",
563 | "cabin": "COACH",
564 | "segment": {
565 | "destination": "LGW",
566 | "origin": "VRN"
567 | }
568 | },
569 | {
570 | "bookingCode": "Q",
571 | "cabin": "COACH",
572 | "segment": {
573 | "destination": "ORD",
574 | "origin": "LHR"
575 | }
576 | },
577 | {
578 | "bookingCode": "Q",
579 | "cabin": "COACH",
580 | "segment": {
581 | "destination": "SEA",
582 | "origin": "ORD"
583 | }
584 | }
585 | ],
586 | "ptcs": [
587 | "ADT"
588 | ]
589 | },
590 | {
591 | "carrier": "BA",
592 | "code": "OKW7SAL",
593 | "destinationCity": "VRN",
594 | "displayAdjustedPrice": "USD211.08",
595 | "key": "0\/1",
596 | "originCity": "SEA",
597 | "tag": "ROUND-TRIP",
598 | "bookingInfos": [
599 | {
600 | "bookingCode": "O",
601 | "cabin": "COACH",
602 | "segment": {
603 | "destination": "LHR",
604 | "origin": "SEA"
605 | }
606 | },
607 | {
608 | "bookingCode": "S",
609 | "cabin": "COACH",
610 | "segment": {
611 | "destination": "VRN",
612 | "origin": "LGW"
613 | }
614 | }
615 | ],
616 | "ptcs": [
617 | "ADT"
618 | ]
619 | }
620 | ],
621 | "notes": [
622 | "This ticket is non-refundable.",
623 | "Changes to this ticket will incur a penalty fee."
624 | ],
625 | "pricingTaxes": [
626 | {
627 | "code": "YQ",
628 | "displayPrice": "USD128.00",
629 | "id": "YQF-BA"
630 | },
631 | {
632 | "code": "YQ",
633 | "displayPrice": "USD128.00",
634 | "id": "YQF-BA"
635 | }
636 | ]
637 | }
638 | ]
639 | }
640 | ]
641 | }
642 | }
643 | }
644 |
645 |
646 |
--------------------------------------------------------------------------------
/web_app.py:
--------------------------------------------------------------------------------
1 | import ConfigParser
2 | import os
3 | import mongoengine
4 |
5 | from dateutil.rrule import DAILY
6 | from datetime import datetime
7 | from flask import Flask, render_template, send_from_directory, request
8 | from flight_scraper.scraper import FlightScraper
9 | from flight_scraper.utils.graph import graph_prices
10 | from flight_scraper.utils.scraper import generate_date_pairs, search_seats
11 |
12 | #----------------------------------------
13 | # Utilities
14 | #----------------------------------------
15 |
16 |
17 | #----------------------------------------
18 | # initialization
19 | #----------------------------------------
20 | app = Flask(__name__)
21 |
22 | Config = ConfigParser.ConfigParser()
23 | if Config.read('flight_scraper.cfg')==[]:
24 | print "Please copy flight_scraper.cfg.example to flight_scraper.cfg"
25 | raise Exception('Could not read config file')
26 |
27 | try:
28 | host_string=Config.get("mongodb", "host")
29 | mongoengine.connect(Config.get("mongodb", "name"),host=host_string)
30 | except ConfigParser.NoOptionError:
31 | mongoengine.connect(Config.get("mongodb", "name"))
32 |
33 | app.config.update(
34 | DEBUG = True,
35 | )
36 |
37 | # flight_scraper = FlightScraper()
38 |
39 | #----------------------------------------
40 | # controllers
41 | #----------------------------------------
42 | @app.route('/favicon.ico')
43 | def favicon():
44 | return send_from_directory(os.path.join(app.root_path, 'static'), 'ico/favicon.ico')
45 |
46 | @app.route("/")
47 | def index():
48 | return render_template('index.html')
49 |
50 | @app.route("/flight/query", methods=['GET'])
51 | def flight_query():
52 | origin = request.args.get('origin')
53 | dest = request.args.get('dest')
54 | freq = request.args.get('freq')
55 | start_date = request.args.get('start_date')
56 | until_date = request.args.get('until_date')
57 | weekdays = request.args.getlist('weekdays')
58 |
59 |
60 | start_date = datetime.strptime(start_date, '%m-%d-%Y')
61 | until_date = datetime.strptime(until_date, '%m-%d-%Y')
62 | weekdays = map(int, weekdays)
63 |
64 | #Can probably use dateutils parser for this.
65 | if freq == "DAILY":
66 | freq=DAILY
67 |
68 | date_pairs = generate_date_pairs(freq, weekdays, start_date, until_date)
69 |
70 | result = list()
71 |
72 | for d in date_pairs:
73 | flight_scraper = FlightScraper(origin, dest, d[0], d[1])
74 | v = [d[0].isoformat(), d[1].isoformat(), flight_scraper.search_flights()]
75 | result.append(v)
76 |
77 | return render_template('query.html', result=result)
78 |
79 | @app.route("/flight/calendar_query", methods=['GET'])
80 | def calendar_flight_query():
81 | origin = request.args.get('origin')
82 | dest = request.args.get('dest')
83 | start_date = request.args.get('start_date')
84 | until_date = request.args.get('until_date')
85 | airlines = request.args.get('airlines')
86 | day_range = request.args.get('length')
87 | max_stops = request.args.get('max_stops')
88 |
89 | if '-' in day_range:
90 | split = day_range.split('-')
91 | day_range = [int(s) for s in split]
92 | else:
93 | day_range = [int(day_range), int(day_range)]
94 |
95 | start_date = datetime.strptime(start_date, '%m-%d-%Y')
96 | until_date = datetime.strptime(until_date, '%m-%d-%Y')
97 |
98 | flight_scraper = FlightScraper(origin, dest, start_date, until_date, airlines=airlines, day_range=day_range, max_stops=max_stops)
99 | return render_template('calendar_query.html', result=flight_scraper.search_calendar())
100 |
101 | @app.route("/seat/query", methods=['GET'])
102 | def seat_query():
103 | """
104 | TODO: Refactor
105 | """
106 | #origin = request.args.get('origin')
107 | #dest = request.args.get('dest')
108 | #dept = request.args.get('dept')
109 | #
110 | #dept = datetime.strptime(dept, '%m-%d-%Y')
111 | #
112 | #return render_template('seats.html', flights=search_seats(origin, dest, dept))
113 |
114 | @app.route("/graph", methods=['GET'])
115 | def graph_flights():
116 | origin = request.args.get('origin')
117 | dest = request.args.get('dest')
118 | dept = request.args.get('dept')
119 | ret = request.args.get('ret')
120 |
121 | dept = datetime.strptime(dept, '%m-%d-%Y')
122 | ret = datetime.strptime(ret, '%m-%d-%Y')
123 |
124 | flight_scraper = FlightScraper(origin, dest, dept, ret)
125 | # flight_scraper.origin = origin
126 | # flight_scraper.destination = dest
127 | # flight_scraper.depart_date = dept
128 | # flight_scraper.return_date = ret
129 |
130 | solutions = flight_scraper.solutions()
131 |
132 | length = len(solutions)
133 | return render_template('graph.html', json_obj=graph_prices(flight_scraper), solutions=solutions, lengthSol=length)
134 |
135 | @app.route("/graph_seats", methods=['GET'])
136 | def graph_2():
137 | """
138 | TODO: Refactor
139 | """
140 | #origin = request.args.get('origin')
141 | #dest = request.args.get('dest')
142 | #dept = request.args.get('dept')
143 | #ret = request.args.get('ret')
144 | #
145 | #dept = datetime.strptime(dept, '%m-%d-%Y')
146 | #ret = datetime.strptime(ret, '%m-%d-%Y')
147 | #
148 | #return render_template('graph_seats.html', json_obj=graph_seats(origin, dest, dept))
149 |
150 | @app.route("/graph_weekly", methods=['GET'])
151 | def graph_weekly():
152 | origin = request.args.get('origin')
153 | dest = request.args.get('dest')
154 | dept = request.args.get('dept')
155 | ret = request.args.get('ret')
156 |
157 | start_date = datetime.strptime("1-1-2013", '%m-%d-%Y')
158 | until_date = datetime.strptime("12-31-2013", '%m-%d-%Y')
159 | weekdays = map(int, [4,6])
160 |
161 | #Can probably use dateutils parser for this.
162 | freq=DAILY
163 |
164 | date_pairs = generate_date_pairs(freq, weekdays, start_date, until_date)
165 | result = list()
166 |
167 | for d in date_pairs:
168 | flight_scraper.depart_date = d[0]
169 | flight_scraper.return_date = d[1]
170 | result.append(graph_prices(flight_scraper))
171 |
172 | return render_template('graph_weekly.html', graphs=result, length=len(result))
173 |
174 |
175 |
176 | #----------------------------------------
177 | # launch
178 | #----------------------------------------
179 |
180 | if __name__ == "__main__":
181 | port = int(os.environ.get("PORT", Config.get("webapp", "port")))
182 | app.run(host='0.0.0.0', port=port)
183 |
184 |
185 |
--------------------------------------------------------------------------------