├── .gitignore
├── LICENSE
├── Notebooks
├── airbnb-scraper.ipynb
├── scrape_lat_long.ipynb
├── selenium_scrape_amenities_ids.ipynb
├── selenium_scrape_houserules.ipynb
├── selenium_scrape_language_ids.ipynb
├── selenium_scrape_neighborhood_ids.ipynb
├── selenium_scrape_property_type_ids.ipynb
└── url-sandbox.ipynb
├── README.md
├── airbnbapi
├── __init__.py
├── controllers.py
├── helpers.py
└── resources.py
├── images
├── airbnb_logo.jpg
└── airbnb_logo.png
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Halmon Lui
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Notebooks/airbnb-scraper.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 31,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import requests\n",
10 | "import pprint\n",
11 | "from bs4 import BeautifulSoup\n",
12 | "\n",
13 | "pp = pprint.PrettyPrinter(indent=4)\n",
14 | "\n",
15 | "# Get listing info\n",
16 | "# URL = 'https://www.airbnb.com/s/homes?search_type=pagination&query=Boston%2C%20MA&checkin=2020-08-16&checkout=2020-08-22&adults=2&items_offset=0'\n",
17 | "def getListings(city, state, checkin, checkout, adults='1', page='0', search_type='pagination'):\n",
18 | " # Build the URL\n",
19 | " baseurl = 'https://www.airbnb.com/s/homes?'\n",
20 | "\n",
21 | " # Add pagination\n",
22 | " items_offset = str(int(page) * 20)\n",
23 | " URL = baseurl + 'search_type=' + search_type + '&items_offset=' + items_offset\n",
24 | " \n",
25 | " # Add location\n",
26 | " query = city + '%2C%20' + state\n",
27 | " URL = URL + '&query=' + query\n",
28 | " \n",
29 | " # Add logistics\n",
30 | " URL = URL + '&checkin=' + checkin + '&checkout=' + checkout + '&adults=' + adults\n",
31 | " \n",
32 | " page = requests.get(URL)\n",
33 | " soup = BeautifulSoup(page.content, 'html.parser')\n",
34 | "\n",
35 | " listings = []\n",
36 | "\n",
37 | " links = soup.find_all('a')\n",
38 | " # GET LISTING NAME AND URL\n",
39 | " counter = 0\n",
40 | " for link in links:\n",
41 | " # We just want to add real listings, not all link names\n",
42 | " if link.get('data-check-info-section'):\n",
43 | " listing_name = link.get('aria-label')\n",
44 | " url = 'https://www.airbnb.com' + link.get('href')\n",
45 | " listings.append({'listing_name': listing_name, 'url': url})\n",
46 | " counter += 1\n",
47 | "\n",
48 | " # GET TOTAL PRICE\n",
49 | " spans = soup.find_all('span')\n",
50 | " counter = 0\n",
51 | " for span in spans:\n",
52 | " text = span.get_text()\n",
53 | " if text and 'total' in text:\n",
54 | " total = text.replace('$', '')\n",
55 | " total = total.replace(' total', '')\n",
56 | " listings[counter]['total_price'] = total\n",
57 | " counter += 1\n",
58 | "\n",
59 | " \n",
60 | " # GET SUPERHOST, LISTING_TYPE, RATING, NUM_REVIEWS\n",
61 | " divs = soup.find_all('div')\n",
62 | " counter = 0\n",
63 | " for div in divs:\n",
64 | " if counter < len(listings) and div.get_text() == listings[counter]['listing_name']:\n",
65 | " is_superhost = 'False'\n",
66 | " listing_type = ''\n",
67 | " rating = None\n",
68 | " num_reviews = '0'\n",
69 | "\n",
70 | " listing_info = div.previous_sibling\n",
71 | " for child in listing_info:\n",
72 | " if 'Entire ' in child.get_text() or 'Private ' in child.get_text():\n",
73 | " listing_type = child.get_text()\n",
74 | " elif 'SUPERHOST' in child.get_text():\n",
75 | " is_superhost = 'True'\n",
76 | " elif '(' and ')' in child.get_text():\n",
77 | " for c in child:\n",
78 | " split_rating = c.get_text().split()\n",
79 | " rating = split_rating[0]\n",
80 | " num_reviews = split_rating[1].replace('(', '')\n",
81 | " num_reviews = num_reviews.replace(')', '')\n",
82 | "\n",
83 | " listings[counter]['is_superhost'] = is_superhost\n",
84 | " listings[counter]['listing_type'] = listing_type\n",
85 | " listings[counter]['rating'] = rating\n",
86 | " listings[counter]['num_reviews'] = num_reviews\n",
87 | "\n",
88 | " counter += 1\n",
89 | "\n",
90 | " # GET PRICE PER NIGHT, AMENITIES, HOUSING_INFO\n",
91 | " counter = 0\n",
92 | " for span in spans:\n",
93 | " text = span.get_text()\n",
94 | " if text and '/ night' in text and 'total' not in text:\n",
95 | " price_per_night = None\n",
96 | " amenities = []\n",
97 | " housing_info = []\n",
98 | "\n",
99 | " # Some have a discounted price so we only want the actual price per night\n",
100 | " price_per_night = text.rsplit('$', 1)[1]\n",
101 | " price_per_night = price_per_night.replace(' / night', '')\n",
102 | " print('ppn', price_per_night)\n",
103 | "\n",
104 | " # Gets amenities like Wifi/Kitching/Free Parking\n",
105 | " amenities = span.parent.parent.parent.previous_sibling.get_text()\n",
106 | " amenities = amenities.split(' · ')\n",
107 | "\n",
108 | " # Gets guests, bedrooms, baths\n",
109 | " housing_info = span.parent.parent.parent.previous_sibling.previous_sibling.get_text()\n",
110 | " housing_info = housing_info.split(' · ')\n",
111 | "\n",
112 | " listings[counter]['price_per_night'] = price_per_night \n",
113 | " listings[counter]['amenities'] = amenities\n",
114 | " listings[counter]['housing_info'] = housing_info\n",
115 | "\n",
116 | " counter += 1\n",
117 | "\n",
118 | " return listings"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 34,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "name": "stdout",
128 | "output_type": "stream",
129 | "text": [
130 | "ppn 134\n",
131 | "
Wifi · Kitchen
\n",
132 | "ppn 120\n",
133 | "Free parking · Wifi
\n",
134 | "ppn 193\n",
135 | "Free parking · Wifi · Kitchen
\n",
136 | "ppn 89\n",
137 | "Wifi · Kitchen
\n",
138 | "ppn 95\n",
139 | "Wifi · Kitchen
\n",
140 | "ppn 73\n",
141 | "Wifi · Kitchen
\n",
142 | "ppn 112\n",
143 | "Wifi · Kitchen
\n",
144 | "ppn 91\n",
145 | "Free parking · Wifi · Kitchen
\n",
146 | "ppn 140\n",
147 | "Free parking · Wifi
\n",
148 | "ppn 109\n",
149 | "Free parking · Wifi · Kitchen
\n",
150 | "ppn 167\n",
151 | "Wifi · Kitchen
\n",
152 | "ppn 150\n",
153 | "Wifi
\n",
154 | "ppn 197\n",
155 | "Wifi
\n",
156 | "ppn 82\n",
157 | "Free parking · Wifi
\n",
158 | "ppn 99\n",
159 | "Wifi
\n",
160 | "ppn 204\n",
161 | "Wifi · Kitchen
\n",
162 | "ppn 104\n",
163 | "Free parking · Wifi · Kitchen
\n",
164 | "ppn 82\n",
165 | "Free parking · Wifi
\n",
166 | "ppn 43\n",
167 | "Free parking · Wifi
\n",
168 | "ppn 234\n",
169 | "Wifi · Kitchen
\n",
170 | "[ { 'amenities': ['Wifi', 'Kitchen'],\n",
171 | " 'housing_info': ['2 guests', 'Studio', '1 bed', '1 bath'],\n",
172 | " 'is_superhost': 'False',\n",
173 | " 'listing_name': 'South End Studio Perfect for Work Travel #26',\n",
174 | " 'listing_type': 'Entire apartment',\n",
175 | " 'num_reviews': '32',\n",
176 | " 'price_per_night': '134',\n",
177 | " 'rating': '4.66',\n",
178 | " 'total_price': '1,010',\n",
179 | " 'url': 'https://www.airbnb.com/rooms/6759439?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
180 | " { 'amenities': ['Free parking', 'Wifi'],\n",
181 | " 'housing_info': ['2 guests', '1 bedroom', '2 beds', '1 private bath'],\n",
182 | " 'is_superhost': 'True',\n",
183 | " 'listing_name': 'Room For Two Short Walk To Harvard (RM 8)',\n",
184 | " 'listing_type': 'Private room',\n",
185 | " 'num_reviews': '5',\n",
186 | " 'price_per_night': '120',\n",
187 | " 'rating': '5.0',\n",
188 | " 'total_price': '822',\n",
189 | " 'url': 'https://www.airbnb.com/rooms/35333477?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
190 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n",
191 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n",
192 | " 'is_superhost': 'True',\n",
193 | " 'listing_name': 'Luxury 1BR APT w/ parking by MIT/Harvard/BU/Fenway',\n",
194 | " 'listing_type': 'Entire apartment',\n",
195 | " 'num_reviews': '118',\n",
196 | " 'price_per_night': '193',\n",
197 | " 'rating': '4.94',\n",
198 | " 'total_price': '1,381',\n",
199 | " 'url': 'https://www.airbnb.com/rooms/34944649?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
200 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
201 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 private bath'],\n",
202 | " 'is_superhost': 'True',\n",
203 | " 'listing_name': '★ The Map Room | Close to Subway + Downtown ★',\n",
204 | " 'listing_type': 'Private room',\n",
205 | " 'num_reviews': '32',\n",
206 | " 'price_per_night': '89',\n",
207 | " 'rating': '4.97',\n",
208 | " 'total_price': '632',\n",
209 | " 'url': 'https://www.airbnb.com/rooms/32895915?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
210 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
211 | " 'housing_info': ['2 guests', 'Studio', '1 bed', '1 bath'],\n",
212 | " 'is_superhost': 'True',\n",
213 | " 'listing_name': 'Penthouse Room With Private Entrance',\n",
214 | " 'listing_type': 'Entire apartment',\n",
215 | " 'num_reviews': '143',\n",
216 | " 'price_per_night': '95',\n",
217 | " 'rating': '4.92',\n",
218 | " 'total_price': '730',\n",
219 | " 'url': 'https://www.airbnb.com/rooms/18330818?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
220 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
221 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 shared bath'],\n",
222 | " 'is_superhost': 'True',\n",
223 | " 'listing_name': 'R1. Quiet private room near Kendall/MIT',\n",
224 | " 'listing_type': 'Private room',\n",
225 | " 'num_reviews': '134',\n",
226 | " 'price_per_night': '73',\n",
227 | " 'rating': '4.80',\n",
228 | " 'total_price': '511',\n",
229 | " 'url': 'https://www.airbnb.com/rooms/21738836?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
230 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
231 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '2.5 shared baths'],\n",
232 | " 'is_superhost': 'True',\n",
233 | " 'listing_name': '★ Spacious, Modern & Comfy★Professionally Cleaned!',\n",
234 | " 'listing_type': 'Private room',\n",
235 | " 'num_reviews': '300',\n",
236 | " 'price_per_night': '112',\n",
237 | " 'rating': '4.96',\n",
238 | " 'total_price': '792',\n",
239 | " 'url': 'https://www.airbnb.com/rooms/22327141?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
240 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n",
241 | " 'housing_info': ['3 guests', '1 bedroom', '2 beds', '1 bath'],\n",
242 | " 'is_superhost': 'True',\n",
243 | " 'listing_name': 'Garden View 3 (private entrance & free parking)',\n",
244 | " 'listing_type': 'Entire apartment',\n",
245 | " 'num_reviews': '28',\n",
246 | " 'price_per_night': '91',\n",
247 | " 'rating': '4.89',\n",
248 | " 'total_price': '704',\n",
249 | " 'url': 'https://www.airbnb.com/rooms/38871127?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
250 | " { 'amenities': ['Free parking', 'Wifi'],\n",
251 | " 'housing_info': ['2 guests', '1 bedroom', '2 beds', '1 private bath'],\n",
252 | " 'is_superhost': 'True',\n",
253 | " 'listing_name': 'Room For Two Short Walk To Harvard (Rm 9)',\n",
254 | " 'listing_type': 'Private room',\n",
255 | " 'num_reviews': '9',\n",
256 | " 'price_per_night': '140',\n",
257 | " 'rating': '4.78',\n",
258 | " 'total_price': '981',\n",
259 | " 'url': 'https://www.airbnb.com/rooms/28801153?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
260 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n",
261 | " 'housing_info': ['4 guests', '1 bedroom', '3 beds', '1.5 baths'],\n",
262 | " 'is_superhost': 'True',\n",
263 | " 'listing_name': 'Renovated One Bedroom Apartment Allston, MA.',\n",
264 | " 'listing_type': 'Entire apartment',\n",
265 | " 'num_reviews': '78',\n",
266 | " 'price_per_night': '109',\n",
267 | " 'rating': '4.97',\n",
268 | " 'total_price': '803',\n",
269 | " 'url': 'https://www.airbnb.com/rooms/35001266?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
270 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
271 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 private bath'],\n",
272 | " 'is_superhost': 'True',\n",
273 | " 'listing_name': '✔Arlington✔Skydeck ✔Walk Score 95✔Full Kitchen',\n",
274 | " 'listing_type': 'Private room',\n",
275 | " 'num_reviews': '293',\n",
276 | " 'price_per_night': '167',\n",
277 | " 'rating': '4.93',\n",
278 | " 'total_price': '1,194',\n",
279 | " 'url': 'https://www.airbnb.com/rooms/990668?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
280 | " { 'amenities': ['Wifi'],\n",
281 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n",
282 | " 'is_superhost': 'True',\n",
283 | " 'listing_name': 'private studio near Harvard/MIT',\n",
284 | " 'listing_type': 'Entire apartment',\n",
285 | " 'num_reviews': '175',\n",
286 | " 'price_per_night': '150',\n",
287 | " 'rating': '4.93',\n",
288 | " 'total_price': '1,067',\n",
289 | " 'url': 'https://www.airbnb.com/rooms/15512578?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
290 | " { 'amenities': ['Wifi'],\n",
291 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n",
292 | " 'is_superhost': 'False',\n",
293 | " 'listing_name': '2.bostonparkplaza · SUPERIOR ROOM',\n",
294 | " 'listing_type': '',\n",
295 | " 'num_reviews': '0',\n",
296 | " 'price_per_night': '197',\n",
297 | " 'rating': None,\n",
298 | " 'total_price': '1,182',\n",
299 | " 'url': 'https://www.airbnb.com/rooms/43715032?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
300 | " { 'amenities': ['Free parking', 'Wifi'],\n",
301 | " 'housing_info': ['3 guests', 'Studio', '2 beds', '1 bath'],\n",
302 | " 'is_superhost': 'True',\n",
303 | " 'listing_name': 'Guest Suite w Free Parking -\\n10min Train to Boston',\n",
304 | " 'listing_type': 'Entire guest suite',\n",
305 | " 'num_reviews': '169',\n",
306 | " 'price_per_night': '82',\n",
307 | " 'rating': '4.93',\n",
308 | " 'total_price': '588',\n",
309 | " 'url': 'https://www.airbnb.com/rooms/28574516?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
310 | " { 'amenities': ['Wifi'],\n",
311 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n",
312 | " 'is_superhost': 'True',\n",
313 | " 'listing_name': 'Quaint studio Apt close to CBD & Universities',\n",
314 | " 'listing_type': 'Entire apartment',\n",
315 | " 'num_reviews': '30',\n",
316 | " 'price_per_night': '99',\n",
317 | " 'rating': '4.93',\n",
318 | " 'total_price': '724',\n",
319 | " 'url': 'https://www.airbnb.com/rooms/37158199?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
320 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
321 | " 'housing_info': ['3 guests', '1 bedroom', '1 bed', '1 bath'],\n",
322 | " 'is_superhost': 'True',\n",
323 | " 'listing_name': 'Brand New 1 BR | 1 BA on Charles St',\n",
324 | " 'listing_type': 'Entire apartment',\n",
325 | " 'num_reviews': '185',\n",
326 | " 'price_per_night': '204',\n",
327 | " 'rating': '4.95',\n",
328 | " 'total_price': '1,496',\n",
329 | " 'url': 'https://www.airbnb.com/rooms/16693642?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
330 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n",
331 | " 'housing_info': ['3 guests', '1 bedroom', '2 beds', '1 bath'],\n",
332 | " 'is_superhost': 'True',\n",
333 | " 'listing_name': 'Private Garden Level 1 BR APT W/ Parking in Newton',\n",
334 | " 'listing_type': 'Entire house',\n",
335 | " 'num_reviews': '48',\n",
336 | " 'price_per_night': '104',\n",
337 | " 'rating': '4.96',\n",
338 | " 'total_price': '798',\n",
339 | " 'url': 'https://www.airbnb.com/rooms/33319406?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
340 | " { 'amenities': ['Free parking', 'Wifi'],\n",
341 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n",
342 | " 'is_superhost': 'True',\n",
343 | " 'listing_name': 'Newly Renovated Boston College Studio',\n",
344 | " 'listing_type': 'Entire guest suite',\n",
345 | " 'num_reviews': '183',\n",
346 | " 'price_per_night': '82',\n",
347 | " 'rating': '4.75',\n",
348 | " 'total_price': '627',\n",
349 | " 'url': 'https://www.airbnb.com/rooms/22886713?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
350 | " { 'amenities': ['Free parking', 'Wifi'],\n",
351 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 shared bath'],\n",
352 | " 'is_superhost': 'True',\n",
353 | " 'listing_name': 'Cozy room! Amazing Location! Great Price!!',\n",
354 | " 'listing_type': 'Private room',\n",
355 | " 'num_reviews': '94',\n",
356 | " 'price_per_night': '43',\n",
357 | " 'rating': '4.86',\n",
358 | " 'total_price': '299',\n",
359 | " 'url': 'https://www.airbnb.com/rooms/37365036?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n",
360 | " { 'amenities': ['Wifi', 'Kitchen'],\n",
361 | " 'housing_info': ['5 guests', '2 bedrooms', '2 beds', '1.5 baths'],\n",
362 | " 'is_superhost': 'True',\n",
363 | " 'listing_name': 'Contemporary 2BR in Central Sq! Harvard/MIT',\n",
364 | " 'listing_type': 'Entire condominium',\n",
365 | " 'num_reviews': '111',\n",
366 | " 'price_per_night': '234',\n",
367 | " 'rating': '4.90',\n",
368 | " 'total_price': '1,715',\n",
369 | " 'url': 'https://www.airbnb.com/rooms/19183167?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'}]\n"
370 | ]
371 | }
372 | ],
373 | "source": [
374 | "soup = getListings('Boston', 'MA', '2020-08-16', '2020-08-22', adults='2', page='2')\n",
375 | "pp.pprint(soup)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 24,
381 | "metadata": {},
382 | "outputs": [
383 | {
384 | "name": "stdout",
385 | "output_type": "stream",
386 | "text": [
387 | "https://www.airbnb.com/s/homes?search_type=pagination&items_offset=20&query=Boston%2C%20MA&checkin=2020-08-16&checkout=2020-08-22&adults=2\n"
388 | ]
389 | }
390 | ],
391 | "source": [
392 | "def getSoup(city, state, checkin, checkout, adults='1', page='0', search_type='pagination'):\n",
393 | " # Build the URL\n",
394 | " baseurl = 'https://www.airbnb.com/s/homes?'\n",
395 | "\n",
396 | " # Add pagination\n",
397 | " items_offset = str(int(page) * 20)\n",
398 | " URL = baseurl + 'search_type=' + search_type + '&items_offset=' + items_offset\n",
399 | " \n",
400 | " # Add location\n",
401 | " query = city + '%2C%20' + state\n",
402 | " URL = URL + '&query=' + query\n",
403 | " \n",
404 | " # Add logistics\n",
405 | " URL = URL + '&checkin=' + checkin + '&checkout=' + checkout + '&adults=' + adults\n",
406 | " print(URL)\n",
407 | " page = requests.get(URL)\n",
408 | " soup = BeautifulSoup(page.content, 'html.parser')\n",
409 | "\n",
410 | " return soup\n",
411 | "\n",
412 | "soup = getSoup('Boston', 'MA', '2020-08-16', '2020-08-22', adults='2', page='1')\n"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": 26,
418 | "metadata": {},
419 | "outputs": [
420 | {
421 | "ename": "AttributeError",
422 | "evalue": "'NoneType' object has no attribute 'get_text'",
423 | "output_type": "error",
424 | "traceback": [
425 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
426 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
427 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[1;31m# Gets gusts, bedrooms, baths\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m \u001b[0mhousing_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mspan\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprevious_sibling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprevious_sibling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_text\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 69\u001b[0m \u001b[0mhousing_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhousing_info\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m' · '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
428 | "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'get_text'"
429 | ]
430 | }
431 | ],
432 | "source": [
433 | "links = soup.find_all('a')\n",
434 | "listings = []\n",
435 | "# GET LISTING NAME\n",
436 | "for link in links:\n",
437 | " # We just want to add real listings, not all link names\n",
438 | " if link.get('data-check-info-section'):\n",
439 | " listings.append({'listing_name': link.get('aria-label')})\n",
440 | "\n",
441 | "# GET SUPERHOST, LISTING_TYPE, RATING, NUM_REVIEWS\n",
442 | "divs = soup.find_all('div')\n",
443 | "counter = 0\n",
444 | "for div in divs:\n",
445 | " if counter < len(listings) and div.get_text() == listings[counter]['listing_name']:\n",
446 | " is_superhost = 'False'\n",
447 | " listing_type = ''\n",
448 | " rating = None\n",
449 | " num_reviews = '0'\n",
450 | " \n",
451 | " listing_info = div.previous_sibling\n",
452 | " for child in listing_info:\n",
453 | " if 'Entire ' in child.get_text() or 'Private ' in child.get_text():\n",
454 | " listing_type = child.get_text()\n",
455 | " elif 'SUPERHOST' in child.get_text():\n",
456 | " is_superhost = 'True'\n",
457 | " elif '(' and ')' in child.get_text():\n",
458 | " for c in child:\n",
459 | " split_rating = c.get_text().split()\n",
460 | " rating = split_rating[0]\n",
461 | " num_reviews = split_rating[1].replace('(', '')\n",
462 | " num_reviews = num_reviews.replace(')', '')\n",
463 | " \n",
464 | " listings[counter]['is_superhost'] = is_superhost\n",
465 | " listings[counter]['listing_type'] = listing_type\n",
466 | " listings[counter]['rating'] = rating\n",
467 | " listings[counter]['num_reviews'] = num_reviews\n",
468 | " \n",
469 | " counter += 1\n",
470 | " \n",
471 | "# GET TOTAL PRICE\n",
472 | "spans = soup.find_all('span')\n",
473 | "counter = 0\n",
474 | "for span in spans:\n",
475 | " text = span.get_text()\n",
476 | " if text and 'total' in text:\n",
477 | " total = text.replace('$', '')\n",
478 | " total = total.replace(' total', '')\n",
479 | " listings[counter]['total_price'] = total\n",
480 | " counter += 1\n",
481 | "\n",
482 | "# GET PRICE PER NIGHT, AMENITIES, HOUSING_INFO\n",
483 | "counter = 0\n",
484 | "for span in spans:\n",
485 | " text = span.get_text()\n",
486 | " if text and '/ night' in text and 'total' not in text:\n",
487 | " price_per_night = None\n",
488 | " amenities = []\n",
489 | " housing_info = []\n",
490 | "\n",
491 | " # Some have a discounted price so we only want the actual price per night\n",
492 | " price_per_night = text.rsplit('$', 1)[1]\n",
493 | " price_per_night = price_per_night.replace(' / night', '')\n",
494 | " \n",
495 | " # Gets amenities like Wifi/Kitching/Free Parking\n",
496 | " amenities = span.parent.parent.parent.previous_sibling.get_text()\n",
497 | " amenities = amenities.split(' · ')\n",
498 | " \n",
499 | " # Gets gusts, bedrooms, baths\n",
500 | " housing_info = span.parent.parent.parent.previous_sibling.previous_sibling.get_text()\n",
501 | " housing_info = housing_info.split(' · ')\n",
502 | " \n",
503 | "# # Gets is_superhost, listing_type, rating, and num_reviews\n",
504 | "# listing_info = span.parent.parent.parent.previous_sibling.previous_sibling.previous_sibling.previous_sibling.children\n",
505 | "# for child in listing_info:\n",
506 | "# if 'Entire ' in child.get_text() or 'Private ' in child.get_text():\n",
507 | "# listing_type = child.get_text()\n",
508 | "# elif 'SUPERHOST' in child.get_text():\n",
509 | "# is_superhost = 'True'\n",
510 | "# elif '(' and ')' in child.get_text():\n",
511 | "# for c in child:\n",
512 | "# split_rating = c.get_text().split()\n",
513 | "# rating = split_rating[0]\n",
514 | "# num_reviews = split_rating[1].replace('(', '')\n",
515 | "# num_reviews = num_reviews.replace(')', '')\n",
516 | "\n",
517 | " listings[counter]['price_per_night'] = price_per_night \n",
518 | " listings[counter]['amenities'] = amenities\n",
519 | " listings[counter]['housing_info'] = housing_info\n",
520 | "\n",
521 | " counter += 1\n",
522 | "\n",
523 | "\n",
524 | "pp.pprint(listings)"
525 | ]
526 | }
527 | ],
528 | "metadata": {
529 | "kernelspec": {
530 | "display_name": "Python 3",
531 | "language": "python",
532 | "name": "python3"
533 | },
534 | "language_info": {
535 | "codemirror_mode": {
536 | "name": "ipython",
537 | "version": 3
538 | },
539 | "file_extension": ".py",
540 | "mimetype": "text/x-python",
541 | "name": "python",
542 | "nbconvert_exporter": "python",
543 | "pygments_lexer": "ipython3",
544 | "version": "3.7.0"
545 | }
546 | },
547 | "nbformat": 4,
548 | "nbformat_minor": 2
549 | }
550 |
--------------------------------------------------------------------------------
/Notebooks/scrape_lat_long.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 36,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "Couldnt get the coordinate\n",
13 | "Couldnt get the coordinate\n",
14 | "53.8147 -1.52538\n"
15 | ]
16 | }
17 | ],
18 | "source": [
19 | "import requests, re, time\n",
20 | "\n",
21 | "attempts = 0\n",
22 | "success = False\n",
23 | "\n",
24 | "# Sometimes request doesn't have the lat long, this gives it 10 attempts to try to get it\n",
25 | "while not success and attempts < 10:\n",
26 | " try:\n",
27 | " r = requests.get('https://www.airbnb.co.uk/rooms/15307317')\n",
28 | " p_lat = re.compile(r'\"lat\":([-0-9.]+),')\n",
29 | " p_lng = re.compile(r'\"lng\":([-0-9.]+),')\n",
30 | " lat = p_lat.findall(r.text)[0]\n",
31 | " lng = p_lng.findall(r.text)[0]\n",
32 | " success = True # Found the lat and long, stop looping\n",
33 | " except:\n",
34 | " print('Couldnt get the coordinate')\n",
35 | " attempts += 1\n",
36 | "\n",
37 | "print(lat,lng)"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 45,
43 | "metadata": {},
44 | "outputs": [],
45 | "source": [
46 | "import requests, re, time\n",
47 | "\n",
48 | "# Pass in listing id and return coordinates of listing\n",
49 | "def get_listing_coordinates(id):\n",
50 | " attempts = 0\n",
51 | " success = False\n",
52 | "\n",
53 | " # Sometimes request doesn't have the lat long, this gives it 10 attempts to try to get it\n",
54 | " while not success and attempts < 10:\n",
55 | " try:\n",
56 | " URL = 'https://www.airbnb.com/rooms/' + id\n",
57 | " r = requests.get(URL)\n",
58 | " p_lat = re.compile(r'\"lat\":([-0-9.]+),')\n",
59 | " p_lng = re.compile(r'\"lng\":([-0-9.]+),')\n",
60 | " lat = p_lat.findall(r.text)[0]\n",
61 | " lng = p_lng.findall(r.text)[0]\n",
62 | " success = True # Found the lat and long, stop looping\n",
63 | " return {'latitude': lat, 'longitude': lng}\n",
64 | " except:\n",
65 | " # Except is usually page loaded without coordinates so we will retry\n",
66 | " attempts += 1\n",
67 | "\n",
68 | " return {'Unable to get the coordinates'}, 400"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 46,
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "name": "stdout",
78 | "output_type": "stream",
79 | "text": [
80 | "{'latitude': '42.3766', 'longitude': '-71.03634'}\n"
81 | ]
82 | }
83 | ],
84 | "source": [
85 | "print(get_listing_coordinates('17974950'))"
86 | ]
87 | }
88 | ],
89 | "metadata": {
90 | "kernelspec": {
91 | "display_name": "Python 3",
92 | "language": "python",
93 | "name": "python3"
94 | },
95 | "language_info": {
96 | "codemirror_mode": {
97 | "name": "ipython",
98 | "version": 3
99 | },
100 | "file_extension": ".py",
101 | "mimetype": "text/x-python",
102 | "name": "python",
103 | "nbconvert_exporter": "python",
104 | "pygments_lexer": "ipython3",
105 | "version": "3.7.0"
106 | }
107 | },
108 | "nbformat": 4,
109 | "nbformat_minor": 2
110 | }
111 |
--------------------------------------------------------------------------------
/Notebooks/selenium_scrape_amenities_ids.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 22,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import time\n",
10 | "from selenium import webdriver\n",
11 | "from selenium.webdriver.chrome.options import Options \n",
12 | "import pprint\n",
13 | "from bs4 import BeautifulSoup\n",
14 | "\n",
15 | "def getAmenities():\n",
16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n",
17 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n",
18 | " chrome_options = Options() \n",
19 | " chrome_options.add_argument(\"--headless\") \n",
20 | " driver = webdriver.Chrome(options=chrome_options) \n",
21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n",
22 | " driver.get(URL)\n",
23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n",
24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n",
25 | " more_filters_button.click()\n",
26 | " time.sleep(1) # Waiting for page's js to run\n",
27 | " show_amenities = driver.find_elements_by_class_name('_6lth7f')[1] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n",
28 | " show_amenities.click()\n",
29 | "# show_facilities = driver.find_elements_by_class_name('_6lth7f')[2] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n",
30 | "# show_facilities.click()\n",
31 | " show_accessibility = driver.find_elements_by_class_name('_6lth7f')[0] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n",
32 | " show_accessibility.click()\n",
33 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
34 | " driver.quit()\n",
35 | "\n",
36 | " amenities = []\n",
37 | " inputs = soup.find_all('input')\n",
38 | " for i in inputs:\n",
39 | " ids = i.get('id')\n",
40 | " if ids and 'amenities' in ids:\n",
41 | " amenity_id = ids.replace('amenities-', '')\n",
42 | " amenity = i.get('name')\n",
43 | " amenities.append({'amenity': amenity, 'amenity_id': amenity_id})\n",
44 | "\n",
45 | " return amenities"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 23,
51 | "metadata": {},
52 | "outputs": [
53 | {
54 | "data": {
55 | "text/plain": [
56 | "[{'amenity': 'No stairs or steps to enter', 'amenity_id': '110'},\n",
57 | " {'amenity': 'Well-lit path to entrance', 'amenity_id': '113'},\n",
58 | " {'amenity': 'Wide entrance for guests', 'amenity_id': '111'},\n",
59 | " {'amenity': 'Step-free path to entrance', 'amenity_id': '112'},\n",
60 | " {'amenity': 'Wide hallways', 'amenity_id': '109'},\n",
61 | " {'amenity': 'Elevator', 'amenity_id': '21'},\n",
62 | " {'amenity': 'No stairs or steps to enter', 'amenity_id': '115'},\n",
63 | " {'amenity': 'Wide entrance', 'amenity_id': '116'},\n",
64 | " {'amenity': 'Accessible-height bed', 'amenity_id': '118'},\n",
65 | " {'amenity': 'Extra space around bed', 'amenity_id': '117'},\n",
66 | " {'amenity': 'Electric profiling bed', 'amenity_id': '288'},\n",
67 | " {'amenity': 'No stairs or steps to enter', 'amenity_id': '120'},\n",
68 | " {'amenity': 'Wide doorway to guest bathroom', 'amenity_id': '121'},\n",
69 | " {'amenity': 'Extra space around toilet', 'amenity_id': '608'},\n",
70 | " {'amenity': 'Accessible-height toilet', 'amenity_id': '125'},\n",
71 | " {'amenity': 'Fixed grab bars for toilet', 'amenity_id': '295'},\n",
72 | " {'amenity': 'Extra space around shower', 'amenity_id': '609'},\n",
73 | " {'amenity': 'Fixed grab bars for shower', 'amenity_id': '294'},\n",
74 | " {'amenity': 'Step-free shower', 'amenity_id': '296'},\n",
75 | " {'amenity': 'Shower chair', 'amenity_id': '297'},\n",
76 | " {'amenity': 'Handheld shower head', 'amenity_id': '136'},\n",
77 | " {'amenity': 'Bathtub with bath chair', 'amenity_id': '123'},\n",
78 | " {'amenity': 'No stairs or steps to enter', 'amenity_id': '127'},\n",
79 | " {'amenity': 'Wide entryway', 'amenity_id': '128'},\n",
80 | " {'amenity': 'Disabled parking spot', 'amenity_id': '114'},\n",
81 | " {'amenity': 'Mobile hoist', 'amenity_id': '289'},\n",
82 | " {'amenity': 'Pool with pool hoist', 'amenity_id': '290'},\n",
83 | " {'amenity': 'Ceiling hoist', 'amenity_id': '291'},\n",
84 | " {'amenity': 'Kitchen', 'amenity_id': '8'},\n",
85 | " {'amenity': 'Shampoo', 'amenity_id': '41'},\n",
86 | " {'amenity': 'Heating', 'amenity_id': '30'},\n",
87 | " {'amenity': 'Air conditioning', 'amenity_id': '5'},\n",
88 | " {'amenity': 'Washer', 'amenity_id': '33'},\n",
89 | " {'amenity': 'Dryer', 'amenity_id': '34'},\n",
90 | " {'amenity': 'Wifi', 'amenity_id': '4'},\n",
91 | " {'amenity': 'Breakfast', 'amenity_id': '16'},\n",
92 | " {'amenity': 'Indoor fireplace', 'amenity_id': '27'},\n",
93 | " {'amenity': 'Hangers', 'amenity_id': '44'},\n",
94 | " {'amenity': 'Iron', 'amenity_id': '46'},\n",
95 | " {'amenity': 'Hair dryer', 'amenity_id': '45'},\n",
96 | " {'amenity': 'Laptop-friendly workspace', 'amenity_id': '47'},\n",
97 | " {'amenity': 'TV', 'amenity_id': '58'},\n",
98 | " {'amenity': 'Crib', 'amenity_id': '286'},\n",
99 | " {'amenity': 'High chair', 'amenity_id': '64'},\n",
100 | " {'amenity': 'Self check-in', 'amenity_id': '51'},\n",
101 | " {'amenity': 'Smoke alarm', 'amenity_id': '35'},\n",
102 | " {'amenity': 'Carbon monoxide alarm', 'amenity_id': '36'},\n",
103 | " {'amenity': 'Private bathroom', 'amenity_id': '78'},\n",
104 | " {'amenity': 'Pets allowed', 'amenity_id': '12'},\n",
105 | " {'amenity': 'Smoking allowed', 'amenity_id': '11'}]"
106 | ]
107 | },
108 | "execution_count": 23,
109 | "metadata": {},
110 | "output_type": "execute_result"
111 | }
112 | ],
113 | "source": [
114 | "getAmenities()"
115 | ]
116 | }
117 | ],
118 | "metadata": {
119 | "kernelspec": {
120 | "display_name": "Python 3",
121 | "language": "python",
122 | "name": "python3"
123 | },
124 | "language_info": {
125 | "codemirror_mode": {
126 | "name": "ipython",
127 | "version": 3
128 | },
129 | "file_extension": ".py",
130 | "mimetype": "text/x-python",
131 | "name": "python",
132 | "nbconvert_exporter": "python",
133 | "pygments_lexer": "ipython3",
134 | "version": "3.7.0"
135 | }
136 | },
137 | "nbformat": 4,
138 | "nbformat_minor": 2
139 | }
140 |
--------------------------------------------------------------------------------
/Notebooks/selenium_scrape_houserules.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 6,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import time\n",
10 | "from selenium import webdriver\n",
11 | "from selenium.webdriver.chrome.options import Options \n",
12 | "import pprint\n",
13 | "from bs4 import BeautifulSoup\n",
14 | "\n",
15 | "def get_house_rules():\n",
16 | " # Build URL\n",
17 | " base_url = 'https://www.airbnb.com/s/homes?query='\n",
18 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n",
19 | "\n",
20 | " # Set up headless chrome driver\n",
21 | " chrome_options = Options()\n",
22 | " chrome_options.add_argument(\"--headless\")\n",
23 | " driver = webdriver.Chrome(options=chrome_options)\n",
24 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n",
25 | "\n",
26 | " # Control page to show house rules\n",
27 | " driver.get(URL)\n",
28 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n",
29 | " error_message = None\n",
30 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n",
31 | " if more_filters_button:\n",
32 | " more_filters_button.click()\n",
33 | " time.sleep(1) # Waiting for page's js to run\n",
34 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
35 | " else:\n",
36 | " error_message = 'Unable to access filter button'\n",
37 | "\n",
38 | " driver.quit() # Close driver to prevent idle processes\n",
39 | "\n",
40 | " # Return error message if we cannot access languages\n",
41 | " if error_message:\n",
42 | " return {'error': error_message}, 400\n",
43 | "\n",
44 | " house_rules = []\n",
45 | " inputs = soup.find_all('input')\n",
46 | " for i in inputs:\n",
47 | " ids = i.get('id')\n",
48 | " if ids and 'amenities' in ids:\n",
49 | " house_rule_id = ids.replace('amenities-', '')\n",
50 | " house_rule = i.get('name')\n",
51 | " house_rules.append({'house_rule': house_rule, 'house_rule_id': house_rule_id})\n",
52 | "\n",
53 | " return house_rules"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 7,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "data": {
63 | "text/plain": [
64 | "[{'house_rule': 'Pets allowed', 'house_rule_id': '12'},\n",
65 | " {'house_rule': 'Smoking allowed', 'house_rule_id': '11'}]"
66 | ]
67 | },
68 | "execution_count": 7,
69 | "metadata": {},
70 | "output_type": "execute_result"
71 | }
72 | ],
73 | "source": [
74 | "get_house_rules()"
75 | ]
76 | }
77 | ],
78 | "metadata": {
79 | "kernelspec": {
80 | "display_name": "Python 3",
81 | "language": "python",
82 | "name": "python3"
83 | },
84 | "language_info": {
85 | "codemirror_mode": {
86 | "name": "ipython",
87 | "version": 3
88 | },
89 | "file_extension": ".py",
90 | "mimetype": "text/x-python",
91 | "name": "python",
92 | "nbconvert_exporter": "python",
93 | "pygments_lexer": "ipython3",
94 | "version": "3.7.0"
95 | }
96 | },
97 | "nbformat": 4,
98 | "nbformat_minor": 2
99 | }
100 |
--------------------------------------------------------------------------------
/Notebooks/selenium_scrape_language_ids.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 9,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import time\n",
10 | "from selenium import webdriver\n",
11 | "from selenium.webdriver.chrome.options import Options \n",
12 | "import pprint\n",
13 | "from bs4 import BeautifulSoup\n",
14 | "\n",
15 | "def getLanguageIds():\n",
16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n",
17 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n",
18 | " chrome_options = Options() \n",
19 | " chrome_options.add_argument(\"--headless\") \n",
20 | " driver = webdriver.Chrome(options=chrome_options) \n",
21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n",
22 | " driver.get(URL)\n",
23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n",
24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n",
25 | " more_filters_button.click()\n",
26 | " time.sleep(1) # Waiting for page's js to run\n",
27 | " show_all_languages_button = driver.find_elements_by_class_name('_6lth7f')[6] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n",
28 | " show_all_languages_button.click()\n",
29 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
30 | " driver.quit()\n",
31 | "\n",
32 | " languages = []\n",
33 | " inputs = soup.find_all('input')\n",
34 | " for i in inputs:\n",
35 | " ids = i.get('id')\n",
36 | " if ids and 'languages' in ids:\n",
37 | " language_id = ids.replace('languages-', '')\n",
38 | " language = i.get('name')\n",
39 | " languages.append({'language': language, 'language_id': language_id})\n",
40 | "\n",
41 | " return languages"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 10,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "[{'language': 'English', 'language_id': '1'},\n",
53 | " {'language': 'French', 'language_id': '2'},\n",
54 | " {'language': 'German', 'language_id': '4'},\n",
55 | " {'language': 'Japanese', 'language_id': '8'},\n",
56 | " {'language': 'Italian', 'language_id': '16'},\n",
57 | " {'language': 'Russian', 'language_id': '32'},\n",
58 | " {'language': 'Spanish', 'language_id': '64'},\n",
59 | " {'language': 'Chinese (Simplified)', 'language_id': '128'},\n",
60 | " {'language': 'Arabic', 'language_id': '256'},\n",
61 | " {'language': 'Hindi', 'language_id': '512'},\n",
62 | " {'language': 'Portuguese', 'language_id': '1024'},\n",
63 | " {'language': 'Turkish', 'language_id': '2048'},\n",
64 | " {'language': 'Indonesian', 'language_id': '4096'},\n",
65 | " {'language': 'Dutch', 'language_id': '8192'},\n",
66 | " {'language': 'Korean', 'language_id': '16384'},\n",
67 | " {'language': 'Bengali', 'language_id': '32768'},\n",
68 | " {'language': 'Thai', 'language_id': '65536'},\n",
69 | " {'language': 'Punjabi', 'language_id': '131072'},\n",
70 | " {'language': 'Greek', 'language_id': '262144'},\n",
71 | " {'language': 'Sign', 'language_id': '524288'},\n",
72 | " {'language': 'Hebrew', 'language_id': '1048576'},\n",
73 | " {'language': 'Polish', 'language_id': '2097152'},\n",
74 | " {'language': 'Malay', 'language_id': '4194304'},\n",
75 | " {'language': 'Tagalog', 'language_id': '8388608'},\n",
76 | " {'language': 'Danish', 'language_id': '16777216'},\n",
77 | " {'language': 'Swedish', 'language_id': '33554432'},\n",
78 | " {'language': 'Norwegian', 'language_id': '67108864'},\n",
79 | " {'language': 'Finnish', 'language_id': '134217728'},\n",
80 | " {'language': 'Czech', 'language_id': '268435456'},\n",
81 | " {'language': 'Hungarian', 'language_id': '536870912'},\n",
82 | " {'language': 'Ukrainian', 'language_id': '1073741824'}]"
83 | ]
84 | },
85 | "execution_count": 10,
86 | "metadata": {},
87 | "output_type": "execute_result"
88 | }
89 | ],
90 | "source": [
91 | "getLanguageIds()"
92 | ]
93 | }
94 | ],
95 | "metadata": {
96 | "kernelspec": {
97 | "display_name": "Python 3",
98 | "language": "python",
99 | "name": "python3"
100 | },
101 | "language_info": {
102 | "codemirror_mode": {
103 | "name": "ipython",
104 | "version": 3
105 | },
106 | "file_extension": ".py",
107 | "mimetype": "text/x-python",
108 | "name": "python",
109 | "nbconvert_exporter": "python",
110 | "pygments_lexer": "ipython3",
111 | "version": "3.7.0"
112 | }
113 | },
114 | "nbformat": 4,
115 | "nbformat_minor": 2
116 | }
117 |
--------------------------------------------------------------------------------
/Notebooks/selenium_scrape_neighborhood_ids.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 38,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import time\n",
10 | "from selenium import webdriver\n",
11 | "from selenium.webdriver.chrome.options import Options \n",
12 | "import pprint\n",
13 | "from bs4 import BeautifulSoup\n",
14 | "\n",
15 | "def getNeighborhoodIds(city, state):\n",
16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n",
17 | " URL = base_url + city + '%2C%20' + state\n",
18 | " chrome_options = Options() \n",
19 | " chrome_options.add_argument(\"--headless\") \n",
20 | " driver = webdriver.Chrome(options=chrome_options) \n",
21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n",
22 | " driver.get(URL)\n",
23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n",
24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n",
25 | " more_filters_button.click()\n",
26 | " time.sleep(1) # Waiting for page's js to run\n",
27 | " show_all_neighborhoods_button = driver.find_elements_by_class_name('_6lth7f')[5] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n",
28 | " show_all_neighborhoods_button.click()\n",
29 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
30 | " driver.quit()\n",
31 | "\n",
32 | " neighborhoods = []\n",
33 | " inputs = soup.find_all('input')\n",
34 | " for i in inputs:\n",
35 | " ids = i.get('id')\n",
36 | " if ids and 'neighborhood_ids' in ids:\n",
37 | " neighborhood_id = ids.replace('neighborhood_ids-', '')\n",
38 | " neighborhood = i.get('name')\n",
39 | " neighborhoods.append({'neighborhood': neighborhood, 'neighborhood_id': neighborhood_id})\n",
40 | "\n",
41 | " return neighborhoods"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 40,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "[{'neighborhood': 'Allston-Brighton', 'neighborhood_id': '578'},\n",
53 | " {'neighborhood': 'East Boston', 'neighborhood_id': '579'},\n",
54 | " {'neighborhood': 'Winthrop', 'neighborhood_id': '580'},\n",
55 | " {'neighborhood': 'Theater District', 'neighborhood_id': '453'},\n",
56 | " {'neighborhood': 'Cambridge', 'neighborhood_id': '581'},\n",
57 | " {'neighborhood': 'Downtown', 'neighborhood_id': '454'},\n",
58 | " {'neighborhood': 'Roxbury', 'neighborhood_id': '518'},\n",
59 | " {'neighborhood': 'Somerville', 'neighborhood_id': '583'},\n",
60 | " {'neighborhood': 'South Boston', 'neighborhood_id': '456'},\n",
61 | " {'neighborhood': 'Charlestown', 'neighborhood_id': '584'},\n",
62 | " {'neighborhood': 'Chelsea', 'neighborhood_id': '585'},\n",
63 | " {'neighborhood': 'Coolidge Corner', 'neighborhood_id': '1035'},\n",
64 | " {'neighborhood': 'Jamaica Plain', 'neighborhood_id': '525'},\n",
65 | " {'neighborhood': 'Newton', 'neighborhood_id': '598'},\n",
66 | " {'neighborhood': 'Dorchester', 'neighborhood_id': '535'},\n",
67 | " {'neighborhood': 'Everett', 'neighborhood_id': '600'},\n",
68 | " {'neighborhood': 'Watertown', 'neighborhood_id': '601'},\n",
69 | " {'neighborhood': 'Medford', 'neighborhood_id': '603'},\n",
70 | " {'neighborhood': 'Malden', 'neighborhood_id': '604'},\n",
71 | " {'neighborhood': 'Mattapan', 'neighborhood_id': '543'},\n",
72 | " {'neighborhood': 'Revere', 'neighborhood_id': '609'},\n",
73 | " {'neighborhood': 'Financial District', 'neighborhood_id': '611'},\n",
74 | " {'neighborhood': 'Downtown Crossing', 'neighborhood_id': '613'},\n",
75 | " {'neighborhood': 'Hyde Park', 'neighborhood_id': '551'},\n",
76 | " {'neighborhood': 'Government Center', 'neighborhood_id': '618'},\n",
77 | " {'neighborhood': 'Back Bay', 'neighborhood_id': '363'},\n",
78 | " {'neighborhood': 'South End', 'neighborhood_id': '492'},\n",
79 | " {'neighborhood': 'Beacon Hill', 'neighborhood_id': '620'},\n",
80 | " {'neighborhood': 'West End', 'neighborhood_id': '366'},\n",
81 | " {'neighborhood': 'Brookline', 'neighborhood_id': '495'},\n",
82 | " {'neighborhood': 'Roslindale', 'neighborhood_id': '559'},\n",
83 | " {'neighborhood': 'North End', 'neighborhood_id': '367'},\n",
84 | " {'neighborhood': 'Leather District', 'neighborhood_id': '439'},\n",
85 | " {'neighborhood': 'Harvard Square', 'neighborhood_id': '888'},\n",
86 | " {'neighborhood': 'Fenway/Kenmore', 'neighborhood_id': '504'},\n",
87 | " {'neighborhood': 'Chestnut Hill', 'neighborhood_id': '1017'},\n",
88 | " {'neighborhood': 'West Roxbury', 'neighborhood_id': '570'},\n",
89 | " {'neighborhood': 'Mission Hill', 'neighborhood_id': '506'},\n",
90 | " {'neighborhood': 'Chinatown', 'neighborhood_id': '444'}]"
91 | ]
92 | },
93 | "execution_count": 40,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "getNeighborhoodIds('Boston', 'MA')"
100 | ]
101 | }
102 | ],
103 | "metadata": {
104 | "kernelspec": {
105 | "display_name": "Python 3",
106 | "language": "python",
107 | "name": "python3"
108 | },
109 | "language_info": {
110 | "codemirror_mode": {
111 | "name": "ipython",
112 | "version": 3
113 | },
114 | "file_extension": ".py",
115 | "mimetype": "text/x-python",
116 | "name": "python",
117 | "nbconvert_exporter": "python",
118 | "pygments_lexer": "ipython3",
119 | "version": "3.7.0"
120 | }
121 | },
122 | "nbformat": 4,
123 | "nbformat_minor": 2
124 | }
125 |
--------------------------------------------------------------------------------
/Notebooks/selenium_scrape_property_type_ids.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 7,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import time\n",
10 | "from selenium import webdriver\n",
11 | "from selenium.webdriver.chrome.options import Options \n",
12 | "import pprint\n",
13 | "from bs4 import BeautifulSoup\n",
14 | "\n",
15 | "def getPropertyTypes():\n",
16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n",
17 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n",
18 | " chrome_options = Options() \n",
19 | " chrome_options.add_argument(\"--headless\") \n",
20 | " driver = webdriver.Chrome(options=chrome_options) \n",
21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n",
22 | " driver.get(URL)\n",
23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n",
24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n",
25 | " more_filters_button.click()\n",
26 | " time.sleep(1) # Waiting for page's js to run\n",
27 | " show_all_property_types_button = driver.find_elements_by_class_name('_6lth7f')[3] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n",
28 | " show_all_property_types_button.click()\n",
29 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n",
30 | " driver.quit()\n",
31 | "\n",
32 | " property_types = []\n",
33 | " inputs = soup.find_all('input')\n",
34 | " for i in inputs:\n",
35 | " ids = i.get('id')\n",
36 | " if ids and 'property_type_id' in ids:\n",
37 | " property_type_id = ids.replace('property_type_id-', '')\n",
38 | " property_type = i.get('name')\n",
39 | " property_types.append({'property_type': property_type, 'property_type_id': property_type_id})\n",
40 | "\n",
41 | " return property_types"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 8,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "[{'property_type': 'House', 'property_type_id': '2'},\n",
53 | " {'property_type': 'Apartment', 'property_type_id': '1'},\n",
54 | " {'property_type': 'Bed and breakfast', 'property_type_id': '3'},\n",
55 | " {'property_type': 'Boutique hotel', 'property_type_id': '43'},\n",
56 | " {'property_type': 'Bungalow', 'property_type_id': '38'},\n",
57 | " {'property_type': 'Cabin', 'property_type_id': '4'},\n",
58 | " {'property_type': 'Cottage', 'property_type_id': '60'},\n",
59 | " {'property_type': 'Guest suite', 'property_type_id': '53'},\n",
60 | " {'property_type': 'Guesthouse', 'property_type_id': '40'},\n",
61 | " {'property_type': 'Hostel', 'property_type_id': '45'},\n",
62 | " {'property_type': 'Hotel', 'property_type_id': '42'},\n",
63 | " {'property_type': 'Loft', 'property_type_id': '35'},\n",
64 | " {'property_type': 'Serviced apartment', 'property_type_id': '47'},\n",
65 | " {'property_type': 'Townhouse', 'property_type_id': '36'},\n",
66 | " {'property_type': 'Villa', 'property_type_id': '11'}]"
67 | ]
68 | },
69 | "execution_count": 8,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "getPropertyTypes()"
76 | ]
77 | }
78 | ],
79 | "metadata": {
80 | "kernelspec": {
81 | "display_name": "Python 3",
82 | "language": "python",
83 | "name": "python3"
84 | },
85 | "language_info": {
86 | "codemirror_mode": {
87 | "name": "ipython",
88 | "version": 3
89 | },
90 | "file_extension": ".py",
91 | "mimetype": "text/x-python",
92 | "name": "python",
93 | "nbconvert_exporter": "python",
94 | "pygments_lexer": "ipython3",
95 | "version": "3.7.0"
96 | }
97 | },
98 | "nbformat": 4,
99 | "nbformat_minor": 2
100 | }
101 |
--------------------------------------------------------------------------------
/Notebooks/url-sandbox.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 10,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from urllib.parse import urlencode\n",
10 | "\n",
11 | "def build_url(args):\n",
12 | " URL = 'https://www.airbnb.com/s/homes?'\n",
13 | "\n",
14 | " urlparams = []\n",
15 | " params = {}\n",
16 | " # Add pagination\n",
17 | " if args['search_type']:\n",
18 | " params['search_type'] = args['search_type']\n",
19 | " if args['search_type'] == 'pagination' and args['page']:\n",
20 | " items_offset = str(int(args['page']) * 20)\n",
21 | " params['items_offset'] = items_offset\n",
22 | "\n",
23 | " # # Add location, these are required fields\n",
24 | " # if args['city'] and args['state']:\n",
25 | " # URL += '&query=' + args['city'] + '%2C%20' + args['state']\n",
26 | "\n",
27 | " # Add logistics\n",
28 | " if args['checkin'] and args['checkout']:\n",
29 | " params['checkin'] = args['checkin']\n",
30 | " params['checkout'] = args['checkout']\n",
31 | "\n",
32 | " # Add adults, there is default='1' but check just for safety\n",
33 | " if args['adults']:\n",
34 | " params['adults'] = args['adults']\n",
35 | "\n",
36 | " # Add min_price\n",
37 | " if args['min_price']:\n",
38 | " params['min_price'] = args['min_price']\n",
39 | "\n",
40 | " # Add max_price\n",
41 | " if args['max_price']:\n",
42 | " params['max_price'] = args['max_price']\n",
43 | "\n",
44 | " # Add min_bedrooms\n",
45 | " if args['min_beds']:\n",
46 | " params['min_beds'] = args['min_beds']\n",
47 | "\n",
48 | " # Add min_bedrooms\n",
49 | " if args['min_bedrooms']:\n",
50 | " params['min_bedrooms'] = args['min_bedrooms']\n",
51 | "\n",
52 | " # Add min_bathrooms\n",
53 | " if args['min_bathrooms']:\n",
54 | " params['min_bathrooms'] = args['min_bathrooms']\n",
55 | "\n",
56 | " # Add flexible_cancellation\n",
57 | " if args['flexible_cancellation']:\n",
58 | " params['flexible_cancellation'] = args['flexible_cancellation']\n",
59 | "\n",
60 | " # Add instant booking\n",
61 | " if args['instant_booking']:\n",
62 | " params['ib'] = args['instant_booking']\n",
63 | "\n",
64 | " # Add work trip\n",
65 | " if args['work_trip']:\n",
66 | " params['work_trip'] = args['work_trip']\n",
67 | "\n",
68 | " # Add superhost\n",
69 | " if args['superhost']:\n",
70 | " params['superhost'] = args['superhost']\n",
71 | "\n",
72 | " # Add amenities\n",
73 | " if args['amenities']:\n",
74 | " params['amenities[]'] = args['amenities']\n",
75 | " amenities = args['amenities'].split(',')\n",
76 | " for amenity_id in amenities:\n",
77 | " urlparams.append(('amenities[]', amenity_id))\n",
78 | " # amenities = args['amenities'].split(',')\n",
79 | " # for amenity_id in amenities:\n",
80 | " # URL = URL + '&amenities%5B%5D=' + amenity_id\n",
81 | "\n",
82 | " # Add accessibilities\n",
83 | " if args['accessibilities']:\n",
84 | " params['amenities[]'] = args['accessibilities']\n",
85 | " # accessibilities = args['accessibilities'].split(',')\n",
86 | " # for accessibility_id in accessibilities:\n",
87 | " # URL = URL + '&amenities%5B%5D=' + accessibility_id\n",
88 | "\n",
89 | " # Add facilities\n",
90 | " if args['facilities']:\n",
91 | " params['amenities[]'] = args['facilities']\n",
92 | " # facilities = args['facilities'].split(',')\n",
93 | " # for facility_id in facilities:\n",
94 | " # URL = URL + '&amenities%5B%5D=' + facility_id\n",
95 | "\n",
96 | " # Add property types\n",
97 | " if args['property_types']:\n",
98 | " params['property_type_id[]'] = args['property_types']\n",
99 | " # property_types = args['property_types'].split(',')\n",
100 | " # for property_type_id in property_types:\n",
101 | " # URL = URL + '&property_type_id%5B%5D=' + property_type_id\n",
102 | "\n",
103 | " # Add house_rules\n",
104 | " if args['house_rules']:\n",
105 | " house_rules = args['house_rules'].split(',')\n",
106 | " for house_rules_id in house_rules:\n",
107 | " URL = URL + '&amenities%5B%5D=' + house_rules_id\n",
108 | "\n",
109 | " # Add neighborhoods\n",
110 | " if args['neighborhoods']:\n",
111 | " neighborhoods = args['neighborhoods'].split(',')\n",
112 | " for neighborhood_id in neighborhoods:\n",
113 | " URL = URL + '&neighborhood_ids%5B%5D=' + neighborhood_id\n",
114 | "\n",
115 | " # Add languages\n",
116 | " if args['languages']:\n",
117 | " languages = args['languages'].split(',')\n",
118 | " for language_id in languages:\n",
119 | " URL = URL + '&languages%5B%5D=' + language_id\n",
120 | "\n",
121 | " # Add location, these are required fields\n",
122 | " if args['city'] and args['state']:\n",
123 | " URL += '&query=' + args['city'] + '%2C%20' + args['state']\n",
124 | "\n",
125 | " urlparams.append(params)\n",
126 | " URL += urlencode(urlparams)\n",
127 | " # For debugging let's see the URL\n",
128 | " print(URL, flush=True)\n",
129 | " return URL\n"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 11,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "ename": "RuntimeError",
139 | "evalue": "Working outside of request context.\n\nThis typically means that you attempted to use functionality that needed\nan active HTTP request. Consult the documentation on testing for\ninformation about how to avoid this problem.",
140 | "output_type": "error",
141 | "traceback": [
142 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
143 | "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)",
144 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd_argument\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'neighborhoods'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd_argument\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'languages'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse_args\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstrict\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 29\u001b[0m \u001b[0mbuild_url\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
145 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\flask_restful\\reqparse.py\u001b[0m in \u001b[0;36mparse_args\u001b[1;34m(self, req, strict, http_error_code)\u001b[0m\n\u001b[0;32m 323\u001b[0m \u001b[1;31m# A record of arguments not yet parsed; as each is found\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 324\u001b[0m \u001b[1;31m# among self.args, it will be popped out\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 325\u001b[1;33m \u001b[0mreq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munparsed_arguments\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margument_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstrict\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 326\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 327\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0marg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
146 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\flask_restful\\reqparse.py\u001b[0m in \u001b[0;36msource\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 123\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMultiDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 124\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0ml\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlocation\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 125\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ml\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 126\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
147 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\werkzeug\\local.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 345\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'__members__'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 346\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdir\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_current_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 347\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_current_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 348\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 349\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
148 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\werkzeug\\local.py\u001b[0m in \u001b[0;36m_get_current_object\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 304\u001b[0m \"\"\"\n\u001b[0;32m 305\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__local\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'__release_local__'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 306\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__local\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 307\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 308\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__local\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
149 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\flask\\globals.py\u001b[0m in \u001b[0;36m_lookup_req_object\u001b[1;34m(name)\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[0mtop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_request_ctx_stack\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtop\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtop\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 37\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_request_ctx_err_msg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 38\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 39\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
150 | "\u001b[1;31mRuntimeError\u001b[0m: Working outside of request context.\n\nThis typically means that you attempted to use functionality that needed\nan active HTTP request. Consult the documentation on testing for\ninformation about how to avoid this problem."
151 | ]
152 | }
153 | ],
154 | "source": [
155 | "from flask_restful import Resource, reqparse\n",
156 | "\n",
157 | "parser = reqparse.RequestParser()\n",
158 | "parser.add_argument('city', required=True)\n",
159 | "parser.add_argument('state', required=True)\n",
160 | "parser.add_argument('checkin')\n",
161 | "parser.add_argument('checkout')\n",
162 | "parser.add_argument('adults', default='1')\n",
163 | "parser.add_argument('page', default='0')\n",
164 | "parser.add_argument('search_type', default='pagination')\n",
165 | "parser.add_argument('min_price')\n",
166 | "parser.add_argument('max_price')\n",
167 | "parser.add_argument('min_beds')\n",
168 | "parser.add_argument('min_bedrooms')\n",
169 | "parser.add_argument('min_bathrooms')\n",
170 | "parser.add_argument('flexible_cancellation')\n",
171 | "parser.add_argument('instant_booking')\n",
172 | "parser.add_argument('work_trip')\n",
173 | "parser.add_argument('superhost')\n",
174 | "parser.add_argument('amenities')\n",
175 | "parser.add_argument('accessibilities')\n",
176 | "parser.add_argument('facilities')\n",
177 | "parser.add_argument('property_types')\n",
178 | "parser.add_argument('house_rules')\n",
179 | "parser.add_argument('neighborhoods')\n",
180 | "parser.add_argument('languages')\n",
181 | "args = parser.parse_args(strict=True)\n",
182 | "\n",
183 | "build_url(args)"
184 | ]
185 | }
186 | ],
187 | "metadata": {
188 | "kernelspec": {
189 | "display_name": "Python 3",
190 | "language": "python",
191 | "name": "python3"
192 | },
193 | "language_info": {
194 | "codemirror_mode": {
195 | "name": "ipython",
196 | "version": 3
197 | },
198 | "file_extension": ".py",
199 | "mimetype": "text/x-python",
200 | "name": "python",
201 | "nbconvert_exporter": "python",
202 | "pygments_lexer": "ipython3",
203 | "version": "3.7.0"
204 | }
205 | },
206 | "nbformat": 4,
207 | "nbformat_minor": 2
208 | }
209 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Unofficial Airbnb REST Api
2 |
3 | This is an unofficial airbnb RESTful api which scrapes the [Airbnb site](https://www.airbnb.com/) to retrieve data
4 |
5 | Application was built with Python3, Flask, BeautifulSoup, and Selenium.
6 |
7 | Warning: Using this application may be against Airbnb's terms of services.
8 |
9 |
10 | ## Install
11 |
12 | git clone https://github.com/HalmonLui/airbnb-api.git
13 | cd airbnb-api
14 | pip install -r requirements.txt
15 |
16 | Note: For some endpoints, you need selenium with the Chrome driver in your PATH. Follow [this tutorial](https://zwbetz.com/download-chromedriver-binary-and-add-to-your-path-for-automated-functional-testing/) to learn how.
17 | For some reason, not all requirements may get installed. If that is the case, run
18 | pip install selenium
19 | pip install flask-restful
20 |
21 | ## Run the app
22 |
23 | export FLASK_APP=airbnbapi
24 | flask run
25 |
26 | # REST API
27 |
28 | ## Get Listings
29 |
30 | ### Request
31 |
32 | `GET /getListings`
33 |
34 | curl -X GET 'http://localhost:5000/getListings?city=Boston&state=MA'
35 |
36 | ### Parameters
37 |
38 | - **city** *required, str*\
39 | Valid city, ex: Boston
40 | - **state** *required, str*\
41 | Valid state code, ex: MA
42 | - **checkin** *optional, str*\
43 | Checkin date, YYYY-MM-DD
44 | - **checkout** *optional, str*\
45 | Checkout date, YYYY-MM-DD
46 | - **adults** *optional, int, default is 1*\
47 | Number of adults
48 | - **page** *optional, int, default is 0*\
49 | Each page shows 20 items at a time
50 | - **min_price** *optional, int*\
51 | Minimum price per night
52 | - **max_price** *optional, int*\
53 | Maximum price per night
54 | - **min_beds** *optional, int*\
55 | Minimum number of beds
56 | - **min_bedrooms** *optional, int*\
57 | Minimum number of bedrooms
58 | - **min_bathrooms** *optional, int*\
59 | Minimum number of bathrooms
60 | - **flexible_cancellation** *optional, bool*\
61 | Stay has flexible cancellation
62 | - **instant_booking** *optional, bool*\
63 | Book without waiting for host approval
64 | - **work_trip** *optional, bool*\
65 | Traveling for work, 5 star ratings from business travelers
66 | - **superhost** *optional, bool*\
67 | Host is a superhost
68 | - **amenities** *optional*\
69 | Comma separated list of amenity_ids (can retrieve from /getAmenities endpoint), ex: 44,45
70 | - **accessibility** *optional*\
71 | Comma separated list of accessibility_ids (can retrieve from /getAccessibilities endpoint) ex:
72 | - **facilities** *optional*\
73 | Comma separated list of facility_ids (can retrieve from /getFacilities endpoint), ex: 7,9
74 | - **property_types** *optional*\
75 | Comma separated list of property_type_ids (can retrieve from /getPropertyTypes endpoint), ex: 8,5
76 | - **house_rules** *optional*\
77 | Comma separated list of house_rules_ids (can retrieve from /getHouseRules endpoint), ex: 11,12
78 | - **neighborhoods** *optional*\
79 | Comma separated list of neighborhood_ids (can retrieve from /getNeighborhoods endpoint), ex: 578,579
80 | - **languages** *optional*\
81 | Comma separated list of language_ids (can retrieve from /getLanguages endpoint), ex: 1,2
82 |
83 | ### Response
84 | ```json
85 | [
86 | {
87 | "listing_name": "Super Spacious Listing For My Airbnb API",
88 | "url": "https://www.airbnb.com/rooms/1858?adults=1&previous_page_section_name=100&federated_search_id=f41f2c-39b5-4fce-a928-8540423f1",
89 | "price_per_night": "27",
90 | "amenities": [
91 | "Wifi",
92 | "Kitchen"
93 | ],
94 | "housing_info": [
95 | "2 guests",
96 | "1 bedroom",
97 | "1 bed",
98 | "2 shared baths"
99 | ],
100 | "is_superhost": "True",
101 | "listing_type": "Private room",
102 | "rating": "4.89",
103 | "num_reviews": "434"
104 | },
105 | {
106 | "listing_name": "Downtown Room",
107 | "url": "https://www.airbnb.com/rooms/1562?adults=1&previous_page_section_name=100&federated_search_id=fbf2c-39b5-ce-a928-853f1",
108 | "price_per_night": "39",
109 | "amenities": [
110 | "Free parking",
111 | "Wifi",
112 | "Kitchen"
113 | ],
114 | "housing_info": [
115 | "3 guests",
116 | "1 bedroom",
117 | "2 beds",
118 | "2 shared baths"
119 | ],
120 | "is_superhost": "True",
121 | "listing_type": "Private room",
122 | "rating": "4.89",
123 | "num_reviews": "615"
124 | },
125 | ...
126 | ]
127 | ```
128 |
129 | ## Get Listing Coordinates by ID
130 |
131 | ### Request
132 |
133 | `Get /getListingCoordinates/`
134 |
135 | curl -X GET 'http://localhost:5000/getListingCoordinates/17974950'
136 |
137 | ### Response
138 | ```json
139 | {
140 | "latitude": "42.3766",
141 | "longitude": "-71.03634"
142 | }
143 | ```
144 |
145 | ## Get Deep Listings
146 |
147 | ### Request
148 |
149 | `Get /getDeepListings`
150 |
151 | curl -X GET 'http://localhost:5000/getListings?city=Boston&state=MA'
152 |
153 | ### Response
154 | ```json
155 | In progress...
156 | ```
157 |
158 | ## Get Specific Listing
159 |
160 | ### Request
161 |
162 | `GET /getListing/`
163 |
164 | curl -X GET 'http://localhost:5000/getListing?id=123456'
165 |
166 | ### Response
167 | ```json
168 | In progress...
169 | ```
170 |
171 | ## Get Amenities
172 | Airbnb uses unique ids for each amenity, these are needed to query listings by host amenities.\
173 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
174 | ### Request
175 |
176 | `GET /getAmenities`
177 |
178 | curl -X GET 'http://localhost:5000/getAmenities'
179 |
180 | ### Response
181 | ```json
182 | [
183 | {
184 | "amenity": "Kitchen",
185 | "amenity_id": "8"
186 | },
187 | {
188 | "amenity": "Shampoo",
189 | "amenity_id": "41"
190 | },
191 | {
192 | "amenity": "Heating",
193 | "amenity_id": "30"
194 | },
195 | {
196 | "amenity": "Air conditioning",
197 | "amenity_id": "5"
198 | },
199 | {
200 | "amenity": "Washer",
201 | "amenity_id": "33"
202 | },
203 | ...
204 | ]
205 | ```
206 |
207 | ## Get Accessibilities
208 | Airbnb uses unique ids for each accessibility, these are needed to query listings by host accessibility.\
209 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
210 | ### Request
211 |
212 | `GET /getAccessibilities`
213 |
214 | curl -X GET 'http://localhost:5000/getAccessibilities'
215 |
216 | ### Response
217 | ```json
218 | [
219 | {
220 | "accessibility": "No stairs or steps to enter",
221 | "accessibility_id": "110"
222 | },
223 | {
224 | "accessibility": "Well-lit path to entrance",
225 | "accessibility_id": "113"
226 | },
227 | {
228 | "accessibility": "Wide entrance for guests",
229 | "accessibility_id": "111"
230 | },
231 | {
232 | "accessibility": "Step-free path to entrance",
233 | "accessibility_id": "112"
234 | },
235 | {
236 | "accessibility": "Wide hallways",
237 | "accessibility_id": "109"
238 | },
239 | ...
240 | ]
241 | ```
242 |
243 | ## Get Facilities
244 | Airbnb uses unique ids for each facility, these are needed to query listings by host facilities.\
245 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
246 | ### Request
247 |
248 | `GET /getFacilities`
249 |
250 | curl -X GET 'http://localhost:5000/getFacilities'
251 |
252 | ### Response
253 | ```json
254 | [
255 | {
256 | "facility": "Free parking on premises",
257 | "facility_id": "9"
258 | },
259 | {
260 | "facility": "Gym",
261 | "facility_id": "15"
262 | },
263 | {
264 | "facility": "Hot tub",
265 | "facility_id": "25"
266 | },
267 | {
268 | "facility": "Pool",
269 | "facility_id": "7"
270 | }
271 | ]
272 | ```
273 |
274 | ## Get Property Types
275 | Airbnb uses unique property type ids for each property type and unique stay, these are needed to query listings by host property type.\
276 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
277 | ### Request
278 |
279 | `GET /getPropertyTypes`
280 |
281 | curl -X GET 'http://localhost:5000/getPropertyTypes'
282 |
283 | ### Response
284 | ```json
285 | [
286 | {
287 | "property_type": "House",
288 | "property_type_id": "2"
289 | },
290 | {
291 | "property_type": "Apartment",
292 | "property_type_id": "1"
293 | },
294 | {
295 | "property_type": "Bed and breakfast",
296 | "property_type_id": "3"
297 | },
298 | {
299 | "property_type": "Boutique hotel",
300 | "property_type_id": "43"
301 | },
302 | {
303 | "property_type": "Bungalow",
304 | "property_type_id": "38"
305 | },
306 | ...
307 | ]
308 | ```
309 |
310 | ## Get House Rules
311 | Airbnb uses unique ids for each house rule, these are needed to query listings by house rules.\
312 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
313 | ### Request
314 |
315 | `GET /getHouseRules`
316 |
317 | curl -X GET 'http://localhost:5000/getHouseRules'
318 |
319 | ### Response
320 | ```json
321 | [
322 | {
323 | "house_rule": "Pets allowed",
324 | "house_rule_id": "12"
325 | },
326 | {
327 | "house_rule": "Smoking allowed",
328 | "house_rule_id": "11"
329 | }
330 | ]
331 | ```
332 |
333 | ## Get Neighborhoods
334 | Airbnb uses unique neighborhood ids for each neighborhood, these are needed to query listings by neighborhood.\
335 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
336 | ### Request
337 |
338 | `GET /getNeighborhoods`
339 |
340 | curl -X GET 'http://localhost:5000/getNeighborhoods?city=Boston&state=MA'
341 |
342 | ### Parameters
343 |
344 | - **city** *required*\
345 | Valid city, ex: Boston
346 | - **state** *required*\
347 | Valid state code, ex: MA
348 |
349 | ### Response
350 | ```json
351 | [
352 | {
353 | "neighborhood": "Allston-Brighton",
354 | "neighborhood_id": "578"
355 | },
356 | {
357 | "neighborhood": "East Boston",
358 | "neighborhood_id": "579"
359 | },
360 | {
361 | "neighborhood": "Winthrop",
362 | "neighborhood_id": "580"
363 | },
364 | {
365 | "neighborhood": "Theater District",
366 | "neighborhood_id": "453"
367 | },
368 | {
369 | "neighborhood": "Cambridge",
370 | "neighborhood_id": "581"
371 | },
372 | ...
373 | ]
374 | ```
375 |
376 | ## Get Languages
377 | Airbnb uses unique language ids for each language, these are needed to query listings by host language.\
378 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly.
379 | ### Request
380 |
381 | `GET /getLanguages`
382 |
383 | curl -X GET 'http://localhost:5000/getLanguages'
384 |
385 | ### Response
386 | ```json
387 | [
388 | {
389 | "language": "English",
390 | "language_id": "1"
391 | },
392 | {
393 | "language": "French",
394 | "language_id": "2"
395 | },
396 | {
397 | "language": "German",
398 | "language_id": "4"
399 | },
400 | {
401 | "language": "Japanese",
402 | "language_id": "8"
403 | },
404 | {
405 | "language": "Italian",
406 | "language_id": "16"
407 | },
408 | ...
409 | ]
410 | ```
411 |
--------------------------------------------------------------------------------
/airbnbapi/__init__.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | from flask_restful import Resource, Api
3 | app = Flask(__name__)
4 | api = Api(app)
5 |
6 | import airbnbapi.resources
7 |
--------------------------------------------------------------------------------
/airbnbapi/controllers.py:
--------------------------------------------------------------------------------
1 | import json, requests, pprint, time, re
2 | from bs4 import BeautifulSoup
3 | from selenium import webdriver
4 | from selenium.webdriver.chrome.options import Options
5 | from . import helpers
6 |
7 | def get_listings(args):
8 | # Build the URL
9 | URL = helpers.build_url(args)
10 |
11 | page = requests.get(URL)
12 | soup = BeautifulSoup(page.content, 'html.parser')
13 |
14 | listings = []
15 |
16 | links = soup.find_all('a')
17 | # GET LISTING NAME AND URL
18 | counter = 0
19 | for link in links:
20 | # We just want to add real listings, not all link names
21 | if link.get('data-check-info-section'):
22 | listing_name = link.get('aria-label')
23 | url = 'https://www.airbnb.com' + link.get('href')
24 | listings.append({'listing_name': listing_name, 'url': url})
25 | counter += 1
26 |
27 | # GET TOTAL PRICE
28 | spans = soup.find_all('button')
29 | counter = 0
30 | for span in spans:
31 | text = span.get_text()
32 | if text and 'total' in text:
33 | total = text.replace('$', '')
34 | total = total.replace(' total', '')
35 | listings[counter]['total_price'] = total
36 | counter += 1
37 |
38 | # GET SUPERHOST, LISTING_TYPE, RATING, NUM_REVIEWS
39 | # This only works sometimes, airbnb must load their frontend slightly differently every fetch
40 | divs = soup.find_all('div')
41 | counter = 0
42 | for div in divs:
43 | if counter < len(listings) and div.get_text() == listings[counter]['listing_name']:
44 | is_superhost = 'False'
45 | listing_type = ''
46 | rating = None
47 | num_reviews = '0'
48 |
49 | listing_info = div.previous_sibling
50 | if listing_info:
51 | for child in listing_info:
52 | if 'Entire ' in child.get_text() or 'Private ' in child.get_text():
53 | listing_type = child.get_text()
54 | elif 'SUPERHOST' in child.get_text():
55 | is_superhost = 'True'
56 | elif '(' and ')' in child.get_text():
57 | for c in child:
58 | split_rating = c.get_text().split()
59 | rating = split_rating[0]
60 | num_reviews = split_rating[1].replace('(', '')
61 | num_reviews = num_reviews.replace(')', '')
62 |
63 | listings[counter]['is_superhost'] = is_superhost
64 | listings[counter]['listing_type'] = listing_type
65 | listings[counter]['rating'] = rating
66 | listings[counter]['num_reviews'] = num_reviews
67 |
68 | counter += 1
69 |
70 | # GET PRICE PER NIGHT, AMENITIES, HOUSING_INFO
71 | counter = 0
72 | for span in spans:
73 | text = span.get_text()
74 | if text and '/ night' in text and 'total' not in text:
75 | price_per_night = None
76 | amenities = []
77 | housing_info = []
78 |
79 | # Some have a discounted price so we only want the actual price per night
80 | price_per_night = text.rsplit('$', 1)[1]
81 | price_per_night = price_per_night.replace(' / night', '')
82 | price_per_night = ' '.join(price_per_night.split())
83 |
84 | # Gets amenities like Wifi/Kitching/Free Parking
85 | amenities_element = span.parent.parent.parent.previous_sibling
86 | if amenities_element:
87 | amenities = amenities_element.get_text()
88 | amenities = amenities.split(' · ')
89 |
90 | # Gets gusts, bedrooms, baths
91 | housing_info_element = span.parent.parent.parent.previous_sibling.previous_sibling
92 | if housing_info_element:
93 | housing_info = housing_info_element.get_text()
94 | housing_info = housing_info.split(' · ')
95 |
96 | listings[counter]['price_per_night'] = price_per_night
97 | listings[counter]['amenities'] = amenities
98 | listings[counter]['housing_info'] = housing_info
99 |
100 | counter += 1
101 |
102 | return listings, 200
103 |
104 |
105 | def get_coordinates(listing_id):
106 | attempts = 0
107 | success = False
108 |
109 | # Sometimes request doesn't have the lat long, this gives it 10 attempts to try to get it
110 | while not success and attempts < 10:
111 | try:
112 | URL = 'https://www.airbnb.com/rooms/' + str(listing_id)
113 | r = requests.get(URL)
114 | p_lat = re.compile(r'"lat":([-0-9.]+),')
115 | p_lng = re.compile(r'"lng":([-0-9.]+),')
116 | lat = p_lat.findall(r.text)[0]
117 | lng = p_lng.findall(r.text)[0]
118 | success = True # Found the lat and long, stop looping
119 |
120 | return {'latitude': lat, 'longitude': lng}, 200
121 |
122 | except:
123 | # Except is usually page loaded without coordinates so we will retry
124 | attempts += 1
125 |
126 | return {'Unable to get the coordinates'}, 400
127 |
128 |
129 | def get_amenities():
130 | # Build URL
131 | base_url = 'https://www.airbnb.com/s/homes?query='
132 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state
133 |
134 | # Prepare the webdriver
135 | chrome_options = Options()
136 | chrome_options.add_argument("--headless")
137 | driver = webdriver.Chrome(options=chrome_options)
138 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
139 |
140 | # Control the page to show all amenities
141 | driver.get(URL)
142 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
143 | error_message = None
144 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change
145 | if more_filters_button:
146 | more_filters_button.click()
147 | time.sleep(1) # Waiting for page's js to run
148 | show_all_amenities = driver.find_elements_by_class_name('_6lth7f')[1] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
149 | if show_all_amenities:
150 | show_all_amenities.click()
151 | soup = BeautifulSoup(driver.page_source, 'html.parser')
152 | else:
153 | error_message = 'Unable to access amenities'
154 | else:
155 | error_message = 'Unable to access filter button'
156 |
157 | driver.quit() # Close driver so we don't have idle processes
158 |
159 | # Return error message if we cannot access airbnb's amenities
160 | if error_message:
161 | return {'error': error_message}, 400
162 |
163 | # Get amenities and IDs from page
164 | amenities = []
165 | inputs = soup.find_all('input')
166 | for i in inputs:
167 | ids = i.get('id')
168 | if ids and 'amenities' in ids:
169 | amenity_id = ids.replace('amenities-', '')
170 | amenity = i.get('name')
171 | if 'Pets allowed' not in amenity and 'Smoking allowed' not in amenity:
172 | amenities.append({'amenity': amenity, 'amenity_id': amenity_id})
173 |
174 | return amenities, 200
175 |
176 |
177 | def get_accessibilities():
178 | # Build URL
179 | base_url = 'https://www.airbnb.com/s/homes?query='
180 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state
181 |
182 | # Prepare the webdriver
183 | chrome_options = Options()
184 | chrome_options.add_argument("--headless")
185 | driver = webdriver.Chrome(options=chrome_options)
186 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
187 |
188 | # Control the page to show all accessibilities
189 | driver.get(URL)
190 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
191 | error_message = None
192 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change
193 | if more_filters_button:
194 | more_filters_button.click()
195 | time.sleep(1) # Waiting for page's js to run
196 | show_all_accessibilities = driver.find_elements_by_class_name('_6lth7f')[0] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
197 | if show_all_accessibilities:
198 | show_all_accessibilities.click()
199 | soup = BeautifulSoup(driver.page_source, 'html.parser')
200 | else:
201 | error_message = 'Unable to access accessibilities'
202 | else:
203 | error_message = 'Unable to access filter button'
204 |
205 | driver.quit() # Close driver so we don't have idle processes
206 |
207 | # Return error message if we cannot access airbnb's accessibilities
208 | if error_message:
209 | return {'error': error_message}, 400
210 |
211 | # Get accessibilities and IDs from page
212 | accessibilities = []
213 | inputs = soup.find_all('input')
214 | for i in inputs:
215 | ids = i.get('id')
216 | if ids and 'amenities' in ids:
217 | accessibility_id = ids.replace('amenities-', '')
218 | accessibility = i.get('name')
219 | if 'Pets allowed' not in accessibility and 'Smoking allowed' not in accessibility:
220 | accessibilities.append({'accessibility': accessibility, 'accessibility_id': accessibility_id})
221 |
222 | return accessibilities, 200
223 |
224 |
225 | def get_facilities():
226 | # Build URL
227 | base_url = 'https://www.airbnb.com/s/homes?query='
228 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state
229 |
230 | # Prepare the webdriver
231 | chrome_options = Options()
232 | chrome_options.add_argument("--headless")
233 | driver = webdriver.Chrome(options=chrome_options)
234 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
235 |
236 | # Control the page to show all facilities
237 | driver.get(URL)
238 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
239 | error_message = None
240 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change
241 | if more_filters_button:
242 | more_filters_button.click()
243 | time.sleep(1) # Waiting for page's js to run
244 | show_all_facilities = driver.find_elements_by_class_name('_6lth7f')[2] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
245 | if show_all_facilities:
246 | show_all_facilities.click()
247 | soup = BeautifulSoup(driver.page_source, 'html.parser')
248 | else:
249 | error_message = 'Unable to access facilities'
250 | else:
251 | error_message = 'Unable to access filter button'
252 |
253 | driver.quit() # Close driver so we don't have idle processes
254 |
255 | # Return error message if we cannot access airbnb's facilities
256 | if error_message:
257 | return {'error': error_message}, 400
258 |
259 | # Get amenities and IDs from page
260 | facilities = []
261 | inputs = soup.find_all('input')
262 | for i in inputs:
263 | ids = i.get('id')
264 | if ids and 'amenities' in ids:
265 | facility_id = ids.replace('amenities-', '')
266 | facility = i.get('name')
267 | if 'Pets allowed' not in facility and 'Smoking allowed' not in facility:
268 | facilities.append({'facility': facility, 'facility_id': facility_id})
269 |
270 | return facilities, 200
271 |
272 |
273 | def get_property_types():
274 | # Build URL
275 | base_url = 'https://www.airbnb.com/s/homes?query='
276 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state
277 |
278 | # Set up headless chrome driver
279 | chrome_options = Options()
280 | chrome_options.add_argument("--headless")
281 | driver = webdriver.Chrome(options=chrome_options)
282 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
283 |
284 | # Control page to show property types
285 | driver.get(URL)
286 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
287 | error_message = None
288 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0]
289 | if more_filters_button:
290 | more_filters_button.click()
291 | time.sleep(1) # Waiting for page's js to run
292 | show_all_unique_stays_button = driver.find_elements_by_class_name('_6lth7f')[4] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
293 | if show_all_unique_stays_button:
294 | show_all_unique_stays_button.click()
295 | soup = BeautifulSoup(driver.page_source, 'html.parser')
296 | else:
297 | error_message = 'Unable to access unique stays'
298 |
299 | show_all_property_types_button = driver.find_elements_by_class_name('_6lth7f')[3] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
300 | if show_all_property_types_button:
301 | show_all_property_types_button.click()
302 | property_soup = BeautifulSoup(driver.page_source, 'html.parser')
303 | else:
304 | error_message = 'Unable to access property types'
305 | else:
306 | error_message = 'Unable to access filter button'
307 |
308 | driver.quit() # Close driver to prevent idle processes
309 |
310 | # Return error message if we cannot access property types
311 | if error_message:
312 | return {'error': error_message}, 400
313 |
314 | property_types = []
315 | inputs = property_soup.find_all('input')
316 | for i in inputs:
317 | ids = i.get('id')
318 | if ids and 'property_type_id' in ids:
319 | property_type_id = ids.replace('property_type_id-', '')
320 | property_type = i.get('name')
321 | property_types.append({'property_type': property_type, 'property_type_id': property_type_id})
322 |
323 | return property_types, 200
324 |
325 |
326 | def get_house_rules():
327 | # Build URL
328 | base_url = 'https://www.airbnb.com/s/homes?query='
329 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state
330 |
331 | # Set up headless chrome driver
332 | chrome_options = Options()
333 | chrome_options.add_argument("--headless")
334 | driver = webdriver.Chrome(options=chrome_options)
335 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
336 |
337 | # Control page to show house rules
338 | driver.get(URL)
339 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
340 | error_message = None
341 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0]
342 | if more_filters_button:
343 | more_filters_button.click()
344 | time.sleep(1) # Waiting for page's js to run
345 | soup = BeautifulSoup(driver.page_source, 'html.parser')
346 | else:
347 | error_message = 'Unable to access filter button'
348 |
349 | driver.quit() # Close driver to prevent idle processes
350 |
351 | # Return error message if we cannot access languages
352 | if error_message:
353 | return {'error': error_message}, 400
354 |
355 | house_rules = []
356 | inputs = soup.find_all('input')
357 | for i in inputs:
358 | ids = i.get('id')
359 | if ids and 'amenities' in ids:
360 | house_rule_id = ids.replace('amenities-', '')
361 | house_rule = i.get('name')
362 | house_rules.append({'house_rule': house_rule, 'house_rule_id': house_rule_id})
363 |
364 | return house_rules, 200
365 |
366 |
367 | def get_neighborhoods(args):
368 | # Build the URL
369 | URL = helpers.build_url(args)
370 |
371 | # Prepare the webdriver
372 | chrome_options = Options()
373 | chrome_options.add_argument("--headless")
374 | driver = webdriver.Chrome(options=chrome_options)
375 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
376 |
377 | # Control the page to show all neighborhoods
378 | driver.get(URL)
379 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
380 | error_message = None
381 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change
382 | if more_filters_button:
383 | more_filters_button.click()
384 | time.sleep(1) # Waiting for page's js to run
385 | show_all_neighborhoods_button = driver.find_elements_by_class_name('_6lth7f')[5] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
386 | if show_all_neighborhoods_button:
387 | show_all_neighborhoods_button.click()
388 | soup = BeautifulSoup(driver.page_source, 'html.parser')
389 | else:
390 | error_message = 'Unable to access neighborhoods'
391 | else:
392 | error_message = 'Unable to access filter button'
393 |
394 | driver.quit() # Close driver so we don't have idle processes
395 |
396 | # Return error message if we cannot access airbnb's neighborhoods
397 | if error_message:
398 | return {'error': error_message}, 400
399 |
400 | # Get neighborhoods and IDs from page
401 | neighborhoods = []
402 | inputs = soup.find_all('input')
403 | for i in inputs:
404 | ids = i.get('id')
405 | if ids and 'neighborhood_ids' in ids:
406 | neighborhood_id = ids.replace('neighborhood_ids-', '')
407 | neighborhood = i.get('name')
408 | neighborhoods.append({'neighborhood': neighborhood, 'neighborhood_id': neighborhood_id})
409 |
410 | return neighborhoods, 200
411 |
412 |
413 | def get_languages():
414 | # Build URL
415 | base_url = 'https://www.airbnb.com/s/homes?query='
416 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state
417 |
418 | # Set up headless chrome driver
419 | chrome_options = Options()
420 | chrome_options.add_argument("--headless")
421 | driver = webdriver.Chrome(options=chrome_options)
422 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later
423 |
424 | # Control page to show languages
425 | driver.get(URL)
426 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time
427 | error_message = None
428 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0]
429 | if more_filters_button:
430 | more_filters_button.click()
431 | time.sleep(1) # Waiting for page's js to run
432 | show_all_languages_button = driver.find_elements_by_class_name('_6lth7f')[6] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security
433 | if show_all_languages_button:
434 | show_all_languages_button.click()
435 | soup = BeautifulSoup(driver.page_source, 'html.parser')
436 | else:
437 | error_message = 'Unable to access languages'
438 | else:
439 | error_message = 'Unable to access filter button'
440 |
441 | driver.quit() # Close driver to prevent idle processes
442 |
443 | # Return error message if we cannot access languages
444 | if error_message:
445 | return {'error': error_message}, 400
446 |
447 | languages = []
448 | inputs = soup.find_all('input')
449 | for i in inputs:
450 | ids = i.get('id')
451 | if ids and 'languages' in ids:
452 | language_id = ids.replace('languages-', '')
453 | language = i.get('name')
454 | languages.append({'language': language, 'language_id': language_id})
455 |
456 | return languages
457 |
--------------------------------------------------------------------------------
/airbnbapi/helpers.py:
--------------------------------------------------------------------------------
1 | from urllib.parse import urlencode, quote
2 |
3 | def build_url(args):
4 | URL = 'https://www.airbnb.com/s/homes?'
5 |
6 | params = {}
7 | # Add pagination
8 | if args['search_type']:
9 | params['search_type'] = args['search_type']
10 | if args['search_type'] == 'pagination' and args['page']:
11 | items_offset = str(int(args['page']) * 20)
12 | params['items_offset'] = items_offset
13 |
14 | # Add location, these are required fields
15 | if args['city'] and args['state']:
16 | address = args['city'] + ', ' + args['state']
17 | params['query'] = address
18 |
19 | # Add logistics
20 | if args['checkin'] and args['checkout']:
21 | params['checkin'] = args['checkin']
22 | params['checkout'] = args['checkout']
23 |
24 | # Add adults, there is default='1' but check just for safety
25 | if args['adults']:
26 | params['adults'] = args['adults']
27 |
28 | # Add min_price
29 | if args['min_price']:
30 | params['min_price'] = args['min_price']
31 |
32 | # Add max_price
33 | if args['max_price']:
34 | params['max_price'] = args['max_price']
35 |
36 | # Add min_bedrooms
37 | if args['min_beds']:
38 | params['min_beds'] = args['min_beds']
39 |
40 | # Add min_bedrooms
41 | if args['min_bedrooms']:
42 | params['min_bedrooms'] = args['min_bedrooms']
43 |
44 | # Add min_bathrooms
45 | if args['min_bathrooms']:
46 | params['min_bathrooms'] = args['min_bathrooms']
47 |
48 | # Add flexible_cancellation
49 | if args['flexible_cancellation']:
50 | params['flexible_cancellation'] = args['flexible_cancellation']
51 |
52 | # Add instant booking
53 | if args['instant_booking']:
54 | params['ib'] = args['instant_booking']
55 |
56 | # Add work trip
57 | if args['work_trip']:
58 | params['work_trip'] = args['work_trip']
59 |
60 | # Add superhost
61 | if args['superhost']:
62 | params['superhost'] = args['superhost']
63 |
64 | # Add amenities
65 | if args['amenities']:
66 | amenities = args['amenities'].split(',')
67 | if 'amenities[]' in params and params['amenities[]']:
68 | params['amenities[]'].extend(amenities)
69 | else:
70 | params['amenities[]'] = amenities
71 |
72 | # Add accessibilities
73 | if args['accessibilities']:
74 | accessibilities = args['accessibilities'].split(',')
75 | if 'amenities[]' in params and params['amenities[]']:
76 | params['amenities[]'].extend(accessibilities)
77 | else:
78 | params['amenities[]'] = accessibilities
79 |
80 | # Add facilities
81 | if args['facilities']:
82 | facilities = args['facilities'].split(',')
83 | if 'amenities[]' in params and params['amenities[]']:
84 | params['amenities[]'].extend(facilities)
85 | else:
86 | params['amenities[]'] = facilities
87 |
88 | # Add property types
89 | if args['property_types']:
90 | property_types = args['property_types'].split(',')
91 | params['property_type_id[]'] = property_types
92 |
93 | # Add house_rules
94 | if args['house_rules']:
95 | house_rules = args['house_rules'].split(',')
96 | if 'amenities[]' in params and params['amenities[]']:
97 | params['amenities[]'].extend(house_rules)
98 | else:
99 | params['amenities[]'] = house_rules
100 |
101 | # Add neighborhoods
102 | if args['neighborhoods']:
103 | neighborhoods = args['neighborhoods'].split(',')
104 | params['neighborhood_ids[]'] = neighborhoods
105 |
106 | # Add languages
107 | if args['languages']:
108 | languages = args['languages'].split(',')
109 | params['languages[]'] = languages
110 |
111 | URL += urlencode(params, True, quote_via=quote)
112 | # For debugging let's see the URL
113 | print(URL, flush=True)
114 | return URL
115 |
--------------------------------------------------------------------------------
/airbnbapi/resources.py:
--------------------------------------------------------------------------------
1 | from airbnbapi import api
2 | from flask_restful import Resource, reqparse
3 | from flask import jsonify
4 | from . import controllers
5 |
6 | class Index(Resource):
7 | def get(self):
8 | return jsonify({'message': 'Unofficial Airbnb API, visit https://github.com/HalmonLui/airbnb-api for more information'})
9 |
10 |
11 | # Get Airbnb Listings
12 | class ListingsAPI(Resource):
13 | def get(self):
14 | parser = reqparse.RequestParser()
15 | parser.add_argument('city', required=True)
16 | parser.add_argument('state', required=True)
17 | parser.add_argument('checkin')
18 | parser.add_argument('checkout')
19 | parser.add_argument('adults', default='1')
20 | parser.add_argument('page', default='0')
21 | parser.add_argument('search_type', default='pagination')
22 | parser.add_argument('min_price')
23 | parser.add_argument('max_price')
24 | parser.add_argument('min_beds')
25 | parser.add_argument('min_bedrooms')
26 | parser.add_argument('min_bathrooms')
27 | parser.add_argument('flexible_cancellation')
28 | parser.add_argument('instant_booking')
29 | parser.add_argument('work_trip')
30 | parser.add_argument('superhost')
31 | parser.add_argument('amenities')
32 | parser.add_argument('accessibilities')
33 | parser.add_argument('facilities')
34 | parser.add_argument('property_types')
35 | parser.add_argument('house_rules')
36 | parser.add_argument('neighborhoods')
37 | parser.add_argument('languages')
38 | args = parser.parse_args(strict=True)
39 | return controllers.get_listings(args)
40 |
41 |
42 | # Get Listing latitude and longitude coordinates from listing_id
43 | class CoordinatesAPI(Resource):
44 | def get(self, listing_id):
45 | return controllers.get_coordinates(listing_id)
46 |
47 |
48 | # Get Amenities and IDs
49 | class AmenitiesAPI(Resource):
50 | def get(self):
51 | return controllers.get_amenities()
52 |
53 |
54 | # Get Accessibilities and IDs
55 | class AccessibilitiesAPI(Resource):
56 | def get(self):
57 | return controllers.get_accessibilities()
58 |
59 |
60 | # Get Facilities and IDs
61 | class FacilitiesAPI(Resource):
62 | def get(self):
63 | return controllers.get_facilities()
64 |
65 |
66 | # Get Property Types and IDs
67 | class PropertyTypesAPI(Resource):
68 | def get(self):
69 | return controllers.get_property_types()
70 |
71 |
72 | # Get House Rules and IDs
73 | class HouseRulesAPI(Resource):
74 | def get(self):
75 | return controllers.get_house_rules()
76 |
77 |
78 | # Get Neighborhoods and IDs
79 | class NeighborhoodsAPI(Resource):
80 | def get(self):
81 | parser = reqparse.RequestParser()
82 | parser.add_argument('city', required=True)
83 | parser.add_argument('state', required=True)
84 | args = parser.parse_args(strict=True)
85 | return controllers.get_neighborhoods(args)
86 |
87 |
88 | # Get Langauges and IDs
89 | class LanguagesAPI(Resource):
90 | def get(self):
91 | return controllers.get_languages()
92 |
93 |
94 | # adding the defined resources along with their corresponding urls
95 | api.add_resource(Index, '/')
96 | api.add_resource(ListingsAPI, '/getListings')
97 | api.add_resource(CoordinatesAPI, '/getListingCoordinates/')
98 | # api.add_resource(SpecificListingAPI, '/getListing/')
99 | api.add_resource(AmenitiesAPI, '/getAmenities')
100 | api.add_resource(AccessibilitiesAPI, '/getAccessibilities')
101 | api.add_resource(FacilitiesAPI, '/getFacilities')
102 | api.add_resource(PropertyTypesAPI, '/getPropertyTypes')
103 | api.add_resource(HouseRulesAPI, '/getHouseRules')
104 | api.add_resource(NeighborhoodsAPI, '/getNeighborhoods')
105 | api.add_resource(LanguagesAPI, '/getLanguages')
106 |
--------------------------------------------------------------------------------
/images/airbnb_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HalmonLui/airbnb-api/3f8817c1a62413efc3b8952223c0776b68b5179a/images/airbnb_logo.jpg
--------------------------------------------------------------------------------
/images/airbnb_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HalmonLui/airbnb-api/3f8817c1a62413efc3b8952223c0776b68b5179a/images/airbnb_logo.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aniso8601==8.0.0
2 | beautifulsoup4==4.9.1
3 | certifi==2020.4.5.2
4 | chardet==3.0.4
5 | click==7.1.2
6 | Flask==1.1.2
7 | Flask-RESTful==0.3.8
8 | idna==2.9
9 | itsdangerous==1.1.0
10 | Jinja2==2.11.2
11 | MarkupSafe==1.1.1
12 | pytz==2020.1
13 | requests==2.23.0
14 | selenium==3.141.0
15 | six==1.15.0
16 | soupsieve==2.0.1
17 | urllib3==1.25.9
18 | Werkzeug==1.0.1
19 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name='airbnbapi',
5 | packages=['airbnbapi'],
6 | include_package_data=True,
7 | install_requires=[
8 | 'flask',
9 | ],
10 | )
11 |
--------------------------------------------------------------------------------