├── .gitignore ├── LICENSE ├── Notebooks ├── airbnb-scraper.ipynb ├── scrape_lat_long.ipynb ├── selenium_scrape_amenities_ids.ipynb ├── selenium_scrape_houserules.ipynb ├── selenium_scrape_language_ids.ipynb ├── selenium_scrape_neighborhood_ids.ipynb ├── selenium_scrape_property_type_ids.ipynb └── url-sandbox.ipynb ├── README.md ├── airbnbapi ├── __init__.py ├── controllers.py ├── helpers.py └── resources.py ├── images ├── airbnb_logo.jpg └── airbnb_logo.png ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Halmon Lui 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Notebooks/airbnb-scraper.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 31, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import requests\n", 10 | "import pprint\n", 11 | "from bs4 import BeautifulSoup\n", 12 | "\n", 13 | "pp = pprint.PrettyPrinter(indent=4)\n", 14 | "\n", 15 | "# Get listing info\n", 16 | "# URL = 'https://www.airbnb.com/s/homes?search_type=pagination&query=Boston%2C%20MA&checkin=2020-08-16&checkout=2020-08-22&adults=2&items_offset=0'\n", 17 | "def getListings(city, state, checkin, checkout, adults='1', page='0', search_type='pagination'):\n", 18 | " # Build the URL\n", 19 | " baseurl = 'https://www.airbnb.com/s/homes?'\n", 20 | "\n", 21 | " # Add pagination\n", 22 | " items_offset = str(int(page) * 20)\n", 23 | " URL = baseurl + 'search_type=' + search_type + '&items_offset=' + items_offset\n", 24 | " \n", 25 | " # Add location\n", 26 | " query = city + '%2C%20' + state\n", 27 | " URL = URL + '&query=' + query\n", 28 | " \n", 29 | " # Add logistics\n", 30 | " URL = URL + '&checkin=' + checkin + '&checkout=' + checkout + '&adults=' + adults\n", 31 | " \n", 32 | " page = requests.get(URL)\n", 33 | " soup = BeautifulSoup(page.content, 'html.parser')\n", 34 | "\n", 35 | " listings = []\n", 36 | "\n", 37 | " links = soup.find_all('a')\n", 38 | " # GET LISTING NAME AND URL\n", 39 | " counter = 0\n", 40 | " for link in links:\n", 41 | " # We just want to add real listings, not all link names\n", 42 | " if link.get('data-check-info-section'):\n", 43 | " listing_name = link.get('aria-label')\n", 44 | " url = 'https://www.airbnb.com' + link.get('href')\n", 45 | " listings.append({'listing_name': listing_name, 'url': url})\n", 46 | " counter += 1\n", 47 | "\n", 48 | " # GET TOTAL PRICE\n", 49 | " spans = soup.find_all('span')\n", 50 | " counter = 0\n", 51 | " for span in spans:\n", 52 | " text = span.get_text()\n", 53 | " if text and 'total' in text:\n", 54 | " total = text.replace('$', '')\n", 55 | " total = total.replace(' total', '')\n", 56 | " listings[counter]['total_price'] = total\n", 57 | " counter += 1\n", 58 | "\n", 59 | " \n", 60 | " # GET SUPERHOST, LISTING_TYPE, RATING, NUM_REVIEWS\n", 61 | " divs = soup.find_all('div')\n", 62 | " counter = 0\n", 63 | " for div in divs:\n", 64 | " if counter < len(listings) and div.get_text() == listings[counter]['listing_name']:\n", 65 | " is_superhost = 'False'\n", 66 | " listing_type = ''\n", 67 | " rating = None\n", 68 | " num_reviews = '0'\n", 69 | "\n", 70 | " listing_info = div.previous_sibling\n", 71 | " for child in listing_info:\n", 72 | " if 'Entire ' in child.get_text() or 'Private ' in child.get_text():\n", 73 | " listing_type = child.get_text()\n", 74 | " elif 'SUPERHOST' in child.get_text():\n", 75 | " is_superhost = 'True'\n", 76 | " elif '(' and ')' in child.get_text():\n", 77 | " for c in child:\n", 78 | " split_rating = c.get_text().split()\n", 79 | " rating = split_rating[0]\n", 80 | " num_reviews = split_rating[1].replace('(', '')\n", 81 | " num_reviews = num_reviews.replace(')', '')\n", 82 | "\n", 83 | " listings[counter]['is_superhost'] = is_superhost\n", 84 | " listings[counter]['listing_type'] = listing_type\n", 85 | " listings[counter]['rating'] = rating\n", 86 | " listings[counter]['num_reviews'] = num_reviews\n", 87 | "\n", 88 | " counter += 1\n", 89 | "\n", 90 | " # GET PRICE PER NIGHT, AMENITIES, HOUSING_INFO\n", 91 | " counter = 0\n", 92 | " for span in spans:\n", 93 | " text = span.get_text()\n", 94 | " if text and '/ night' in text and 'total' not in text:\n", 95 | " price_per_night = None\n", 96 | " amenities = []\n", 97 | " housing_info = []\n", 98 | "\n", 99 | " # Some have a discounted price so we only want the actual price per night\n", 100 | " price_per_night = text.rsplit('$', 1)[1]\n", 101 | " price_per_night = price_per_night.replace(' / night', '')\n", 102 | " print('ppn', price_per_night)\n", 103 | "\n", 104 | " # Gets amenities like Wifi/Kitching/Free Parking\n", 105 | " amenities = span.parent.parent.parent.previous_sibling.get_text()\n", 106 | " amenities = amenities.split(' · ')\n", 107 | "\n", 108 | " # Gets guests, bedrooms, baths\n", 109 | " housing_info = span.parent.parent.parent.previous_sibling.previous_sibling.get_text()\n", 110 | " housing_info = housing_info.split(' · ')\n", 111 | "\n", 112 | " listings[counter]['price_per_night'] = price_per_night \n", 113 | " listings[counter]['amenities'] = amenities\n", 114 | " listings[counter]['housing_info'] = housing_info\n", 115 | "\n", 116 | " counter += 1\n", 117 | "\n", 118 | " return listings" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 34, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "ppn 134\n", 131 | "
Wifi · Kitchen
\n", 132 | "ppn 120\n", 133 | "
Free parking · Wifi
\n", 134 | "ppn 193\n", 135 | "
Free parking · Wifi · Kitchen
\n", 136 | "ppn 89\n", 137 | "
Wifi · Kitchen
\n", 138 | "ppn 95\n", 139 | "
Wifi · Kitchen
\n", 140 | "ppn 73\n", 141 | "
Wifi · Kitchen
\n", 142 | "ppn 112\n", 143 | "
Wifi · Kitchen
\n", 144 | "ppn 91\n", 145 | "
Free parking · Wifi · Kitchen
\n", 146 | "ppn 140\n", 147 | "
Free parking · Wifi
\n", 148 | "ppn 109\n", 149 | "
Free parking · Wifi · Kitchen
\n", 150 | "ppn 167\n", 151 | "
Wifi · Kitchen
\n", 152 | "ppn 150\n", 153 | "
Wifi
\n", 154 | "ppn 197\n", 155 | "
Wifi
\n", 156 | "ppn 82\n", 157 | "
Free parking · Wifi
\n", 158 | "ppn 99\n", 159 | "
Wifi
\n", 160 | "ppn 204\n", 161 | "
Wifi · Kitchen
\n", 162 | "ppn 104\n", 163 | "
Free parking · Wifi · Kitchen
\n", 164 | "ppn 82\n", 165 | "
Free parking · Wifi
\n", 166 | "ppn 43\n", 167 | "
Free parking · Wifi
\n", 168 | "ppn 234\n", 169 | "
Wifi · Kitchen
\n", 170 | "[ { 'amenities': ['Wifi', 'Kitchen'],\n", 171 | " 'housing_info': ['2 guests', 'Studio', '1 bed', '1 bath'],\n", 172 | " 'is_superhost': 'False',\n", 173 | " 'listing_name': 'South End Studio Perfect for Work Travel #26',\n", 174 | " 'listing_type': 'Entire apartment',\n", 175 | " 'num_reviews': '32',\n", 176 | " 'price_per_night': '134',\n", 177 | " 'rating': '4.66',\n", 178 | " 'total_price': '1,010',\n", 179 | " 'url': 'https://www.airbnb.com/rooms/6759439?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 180 | " { 'amenities': ['Free parking', 'Wifi'],\n", 181 | " 'housing_info': ['2 guests', '1 bedroom', '2 beds', '1 private bath'],\n", 182 | " 'is_superhost': 'True',\n", 183 | " 'listing_name': 'Room For Two Short Walk To Harvard (RM 8)',\n", 184 | " 'listing_type': 'Private room',\n", 185 | " 'num_reviews': '5',\n", 186 | " 'price_per_night': '120',\n", 187 | " 'rating': '5.0',\n", 188 | " 'total_price': '822',\n", 189 | " 'url': 'https://www.airbnb.com/rooms/35333477?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 190 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n", 191 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n", 192 | " 'is_superhost': 'True',\n", 193 | " 'listing_name': 'Luxury 1BR APT w/ parking by MIT/Harvard/BU/Fenway',\n", 194 | " 'listing_type': 'Entire apartment',\n", 195 | " 'num_reviews': '118',\n", 196 | " 'price_per_night': '193',\n", 197 | " 'rating': '4.94',\n", 198 | " 'total_price': '1,381',\n", 199 | " 'url': 'https://www.airbnb.com/rooms/34944649?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 200 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 201 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 private bath'],\n", 202 | " 'is_superhost': 'True',\n", 203 | " 'listing_name': '★ The Map Room | Close to Subway + Downtown ★',\n", 204 | " 'listing_type': 'Private room',\n", 205 | " 'num_reviews': '32',\n", 206 | " 'price_per_night': '89',\n", 207 | " 'rating': '4.97',\n", 208 | " 'total_price': '632',\n", 209 | " 'url': 'https://www.airbnb.com/rooms/32895915?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 210 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 211 | " 'housing_info': ['2 guests', 'Studio', '1 bed', '1 bath'],\n", 212 | " 'is_superhost': 'True',\n", 213 | " 'listing_name': 'Penthouse Room With Private Entrance',\n", 214 | " 'listing_type': 'Entire apartment',\n", 215 | " 'num_reviews': '143',\n", 216 | " 'price_per_night': '95',\n", 217 | " 'rating': '4.92',\n", 218 | " 'total_price': '730',\n", 219 | " 'url': 'https://www.airbnb.com/rooms/18330818?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 220 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 221 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 shared bath'],\n", 222 | " 'is_superhost': 'True',\n", 223 | " 'listing_name': 'R1. Quiet private room near Kendall/MIT',\n", 224 | " 'listing_type': 'Private room',\n", 225 | " 'num_reviews': '134',\n", 226 | " 'price_per_night': '73',\n", 227 | " 'rating': '4.80',\n", 228 | " 'total_price': '511',\n", 229 | " 'url': 'https://www.airbnb.com/rooms/21738836?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 230 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 231 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '2.5 shared baths'],\n", 232 | " 'is_superhost': 'True',\n", 233 | " 'listing_name': '★ Spacious, Modern & Comfy★Professionally Cleaned!',\n", 234 | " 'listing_type': 'Private room',\n", 235 | " 'num_reviews': '300',\n", 236 | " 'price_per_night': '112',\n", 237 | " 'rating': '4.96',\n", 238 | " 'total_price': '792',\n", 239 | " 'url': 'https://www.airbnb.com/rooms/22327141?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 240 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n", 241 | " 'housing_info': ['3 guests', '1 bedroom', '2 beds', '1 bath'],\n", 242 | " 'is_superhost': 'True',\n", 243 | " 'listing_name': 'Garden View 3 (private entrance & free parking)',\n", 244 | " 'listing_type': 'Entire apartment',\n", 245 | " 'num_reviews': '28',\n", 246 | " 'price_per_night': '91',\n", 247 | " 'rating': '4.89',\n", 248 | " 'total_price': '704',\n", 249 | " 'url': 'https://www.airbnb.com/rooms/38871127?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 250 | " { 'amenities': ['Free parking', 'Wifi'],\n", 251 | " 'housing_info': ['2 guests', '1 bedroom', '2 beds', '1 private bath'],\n", 252 | " 'is_superhost': 'True',\n", 253 | " 'listing_name': 'Room For Two Short Walk To Harvard (Rm 9)',\n", 254 | " 'listing_type': 'Private room',\n", 255 | " 'num_reviews': '9',\n", 256 | " 'price_per_night': '140',\n", 257 | " 'rating': '4.78',\n", 258 | " 'total_price': '981',\n", 259 | " 'url': 'https://www.airbnb.com/rooms/28801153?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 260 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n", 261 | " 'housing_info': ['4 guests', '1 bedroom', '3 beds', '1.5 baths'],\n", 262 | " 'is_superhost': 'True',\n", 263 | " 'listing_name': 'Renovated One Bedroom Apartment Allston, MA.',\n", 264 | " 'listing_type': 'Entire apartment',\n", 265 | " 'num_reviews': '78',\n", 266 | " 'price_per_night': '109',\n", 267 | " 'rating': '4.97',\n", 268 | " 'total_price': '803',\n", 269 | " 'url': 'https://www.airbnb.com/rooms/35001266?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 270 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 271 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 private bath'],\n", 272 | " 'is_superhost': 'True',\n", 273 | " 'listing_name': '✔Arlington✔Skydeck ✔Walk Score 95✔Full Kitchen',\n", 274 | " 'listing_type': 'Private room',\n", 275 | " 'num_reviews': '293',\n", 276 | " 'price_per_night': '167',\n", 277 | " 'rating': '4.93',\n", 278 | " 'total_price': '1,194',\n", 279 | " 'url': 'https://www.airbnb.com/rooms/990668?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 280 | " { 'amenities': ['Wifi'],\n", 281 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n", 282 | " 'is_superhost': 'True',\n", 283 | " 'listing_name': 'private studio near Harvard/MIT',\n", 284 | " 'listing_type': 'Entire apartment',\n", 285 | " 'num_reviews': '175',\n", 286 | " 'price_per_night': '150',\n", 287 | " 'rating': '4.93',\n", 288 | " 'total_price': '1,067',\n", 289 | " 'url': 'https://www.airbnb.com/rooms/15512578?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 290 | " { 'amenities': ['Wifi'],\n", 291 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n", 292 | " 'is_superhost': 'False',\n", 293 | " 'listing_name': '2.bostonparkplaza · SUPERIOR ROOM',\n", 294 | " 'listing_type': '',\n", 295 | " 'num_reviews': '0',\n", 296 | " 'price_per_night': '197',\n", 297 | " 'rating': None,\n", 298 | " 'total_price': '1,182',\n", 299 | " 'url': 'https://www.airbnb.com/rooms/43715032?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 300 | " { 'amenities': ['Free parking', 'Wifi'],\n", 301 | " 'housing_info': ['3 guests', 'Studio', '2 beds', '1 bath'],\n", 302 | " 'is_superhost': 'True',\n", 303 | " 'listing_name': 'Guest Suite w Free Parking -\\n10min Train to Boston',\n", 304 | " 'listing_type': 'Entire guest suite',\n", 305 | " 'num_reviews': '169',\n", 306 | " 'price_per_night': '82',\n", 307 | " 'rating': '4.93',\n", 308 | " 'total_price': '588',\n", 309 | " 'url': 'https://www.airbnb.com/rooms/28574516?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 310 | " { 'amenities': ['Wifi'],\n", 311 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n", 312 | " 'is_superhost': 'True',\n", 313 | " 'listing_name': 'Quaint studio Apt close to CBD & Universities',\n", 314 | " 'listing_type': 'Entire apartment',\n", 315 | " 'num_reviews': '30',\n", 316 | " 'price_per_night': '99',\n", 317 | " 'rating': '4.93',\n", 318 | " 'total_price': '724',\n", 319 | " 'url': 'https://www.airbnb.com/rooms/37158199?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 320 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 321 | " 'housing_info': ['3 guests', '1 bedroom', '1 bed', '1 bath'],\n", 322 | " 'is_superhost': 'True',\n", 323 | " 'listing_name': 'Brand New 1 BR | 1 BA on Charles St',\n", 324 | " 'listing_type': 'Entire apartment',\n", 325 | " 'num_reviews': '185',\n", 326 | " 'price_per_night': '204',\n", 327 | " 'rating': '4.95',\n", 328 | " 'total_price': '1,496',\n", 329 | " 'url': 'https://www.airbnb.com/rooms/16693642?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 330 | " { 'amenities': ['Free parking', 'Wifi', 'Kitchen'],\n", 331 | " 'housing_info': ['3 guests', '1 bedroom', '2 beds', '1 bath'],\n", 332 | " 'is_superhost': 'True',\n", 333 | " 'listing_name': 'Private Garden Level 1 BR APT W/ Parking in Newton',\n", 334 | " 'listing_type': 'Entire house',\n", 335 | " 'num_reviews': '48',\n", 336 | " 'price_per_night': '104',\n", 337 | " 'rating': '4.96',\n", 338 | " 'total_price': '798',\n", 339 | " 'url': 'https://www.airbnb.com/rooms/33319406?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 340 | " { 'amenities': ['Free parking', 'Wifi'],\n", 341 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 bath'],\n", 342 | " 'is_superhost': 'True',\n", 343 | " 'listing_name': 'Newly Renovated Boston College Studio',\n", 344 | " 'listing_type': 'Entire guest suite',\n", 345 | " 'num_reviews': '183',\n", 346 | " 'price_per_night': '82',\n", 347 | " 'rating': '4.75',\n", 348 | " 'total_price': '627',\n", 349 | " 'url': 'https://www.airbnb.com/rooms/22886713?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 350 | " { 'amenities': ['Free parking', 'Wifi'],\n", 351 | " 'housing_info': ['2 guests', '1 bedroom', '1 bed', '1 shared bath'],\n", 352 | " 'is_superhost': 'True',\n", 353 | " 'listing_name': 'Cozy room! Amazing Location! Great Price!!',\n", 354 | " 'listing_type': 'Private room',\n", 355 | " 'num_reviews': '94',\n", 356 | " 'price_per_night': '43',\n", 357 | " 'rating': '4.86',\n", 358 | " 'total_price': '299',\n", 359 | " 'url': 'https://www.airbnb.com/rooms/37365036?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'},\n", 360 | " { 'amenities': ['Wifi', 'Kitchen'],\n", 361 | " 'housing_info': ['5 guests', '2 bedrooms', '2 beds', '1.5 baths'],\n", 362 | " 'is_superhost': 'True',\n", 363 | " 'listing_name': 'Contemporary 2BR in Central Sq! Harvard/MIT',\n", 364 | " 'listing_type': 'Entire condominium',\n", 365 | " 'num_reviews': '111',\n", 366 | " 'price_per_night': '234',\n", 367 | " 'rating': '4.90',\n", 368 | " 'total_price': '1,715',\n", 369 | " 'url': 'https://www.airbnb.com/rooms/19183167?adults=2&check_in=2020-08-16&check_out=2020-08-22&previous_page_section_name=1000&federated_search_id=836b997d-abc8-4123-bc24-d3ca3cfd730e'}]\n" 370 | ] 371 | } 372 | ], 373 | "source": [ 374 | "soup = getListings('Boston', 'MA', '2020-08-16', '2020-08-22', adults='2', page='2')\n", 375 | "pp.pprint(soup)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 24, 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "name": "stdout", 385 | "output_type": "stream", 386 | "text": [ 387 | "https://www.airbnb.com/s/homes?search_type=pagination&items_offset=20&query=Boston%2C%20MA&checkin=2020-08-16&checkout=2020-08-22&adults=2\n" 388 | ] 389 | } 390 | ], 391 | "source": [ 392 | "def getSoup(city, state, checkin, checkout, adults='1', page='0', search_type='pagination'):\n", 393 | " # Build the URL\n", 394 | " baseurl = 'https://www.airbnb.com/s/homes?'\n", 395 | "\n", 396 | " # Add pagination\n", 397 | " items_offset = str(int(page) * 20)\n", 398 | " URL = baseurl + 'search_type=' + search_type + '&items_offset=' + items_offset\n", 399 | " \n", 400 | " # Add location\n", 401 | " query = city + '%2C%20' + state\n", 402 | " URL = URL + '&query=' + query\n", 403 | " \n", 404 | " # Add logistics\n", 405 | " URL = URL + '&checkin=' + checkin + '&checkout=' + checkout + '&adults=' + adults\n", 406 | " print(URL)\n", 407 | " page = requests.get(URL)\n", 408 | " soup = BeautifulSoup(page.content, 'html.parser')\n", 409 | "\n", 410 | " return soup\n", 411 | "\n", 412 | "soup = getSoup('Boston', 'MA', '2020-08-16', '2020-08-22', adults='2', page='1')\n" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": 26, 418 | "metadata": {}, 419 | "outputs": [ 420 | { 421 | "ename": "AttributeError", 422 | "evalue": "'NoneType' object has no attribute 'get_text'", 423 | "output_type": "error", 424 | "traceback": [ 425 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 426 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", 427 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[1;31m# Gets gusts, bedrooms, baths\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 68\u001b[1;33m \u001b[0mhousing_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mspan\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprevious_sibling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprevious_sibling\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_text\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 69\u001b[0m \u001b[0mhousing_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhousing_info\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m' · '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 428 | "\u001b[1;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'get_text'" 429 | ] 430 | } 431 | ], 432 | "source": [ 433 | "links = soup.find_all('a')\n", 434 | "listings = []\n", 435 | "# GET LISTING NAME\n", 436 | "for link in links:\n", 437 | " # We just want to add real listings, not all link names\n", 438 | " if link.get('data-check-info-section'):\n", 439 | " listings.append({'listing_name': link.get('aria-label')})\n", 440 | "\n", 441 | "# GET SUPERHOST, LISTING_TYPE, RATING, NUM_REVIEWS\n", 442 | "divs = soup.find_all('div')\n", 443 | "counter = 0\n", 444 | "for div in divs:\n", 445 | " if counter < len(listings) and div.get_text() == listings[counter]['listing_name']:\n", 446 | " is_superhost = 'False'\n", 447 | " listing_type = ''\n", 448 | " rating = None\n", 449 | " num_reviews = '0'\n", 450 | " \n", 451 | " listing_info = div.previous_sibling\n", 452 | " for child in listing_info:\n", 453 | " if 'Entire ' in child.get_text() or 'Private ' in child.get_text():\n", 454 | " listing_type = child.get_text()\n", 455 | " elif 'SUPERHOST' in child.get_text():\n", 456 | " is_superhost = 'True'\n", 457 | " elif '(' and ')' in child.get_text():\n", 458 | " for c in child:\n", 459 | " split_rating = c.get_text().split()\n", 460 | " rating = split_rating[0]\n", 461 | " num_reviews = split_rating[1].replace('(', '')\n", 462 | " num_reviews = num_reviews.replace(')', '')\n", 463 | " \n", 464 | " listings[counter]['is_superhost'] = is_superhost\n", 465 | " listings[counter]['listing_type'] = listing_type\n", 466 | " listings[counter]['rating'] = rating\n", 467 | " listings[counter]['num_reviews'] = num_reviews\n", 468 | " \n", 469 | " counter += 1\n", 470 | " \n", 471 | "# GET TOTAL PRICE\n", 472 | "spans = soup.find_all('span')\n", 473 | "counter = 0\n", 474 | "for span in spans:\n", 475 | " text = span.get_text()\n", 476 | " if text and 'total' in text:\n", 477 | " total = text.replace('$', '')\n", 478 | " total = total.replace(' total', '')\n", 479 | " listings[counter]['total_price'] = total\n", 480 | " counter += 1\n", 481 | "\n", 482 | "# GET PRICE PER NIGHT, AMENITIES, HOUSING_INFO\n", 483 | "counter = 0\n", 484 | "for span in spans:\n", 485 | " text = span.get_text()\n", 486 | " if text and '/ night' in text and 'total' not in text:\n", 487 | " price_per_night = None\n", 488 | " amenities = []\n", 489 | " housing_info = []\n", 490 | "\n", 491 | " # Some have a discounted price so we only want the actual price per night\n", 492 | " price_per_night = text.rsplit('$', 1)[1]\n", 493 | " price_per_night = price_per_night.replace(' / night', '')\n", 494 | " \n", 495 | " # Gets amenities like Wifi/Kitching/Free Parking\n", 496 | " amenities = span.parent.parent.parent.previous_sibling.get_text()\n", 497 | " amenities = amenities.split(' · ')\n", 498 | " \n", 499 | " # Gets gusts, bedrooms, baths\n", 500 | " housing_info = span.parent.parent.parent.previous_sibling.previous_sibling.get_text()\n", 501 | " housing_info = housing_info.split(' · ')\n", 502 | " \n", 503 | "# # Gets is_superhost, listing_type, rating, and num_reviews\n", 504 | "# listing_info = span.parent.parent.parent.previous_sibling.previous_sibling.previous_sibling.previous_sibling.children\n", 505 | "# for child in listing_info:\n", 506 | "# if 'Entire ' in child.get_text() or 'Private ' in child.get_text():\n", 507 | "# listing_type = child.get_text()\n", 508 | "# elif 'SUPERHOST' in child.get_text():\n", 509 | "# is_superhost = 'True'\n", 510 | "# elif '(' and ')' in child.get_text():\n", 511 | "# for c in child:\n", 512 | "# split_rating = c.get_text().split()\n", 513 | "# rating = split_rating[0]\n", 514 | "# num_reviews = split_rating[1].replace('(', '')\n", 515 | "# num_reviews = num_reviews.replace(')', '')\n", 516 | "\n", 517 | " listings[counter]['price_per_night'] = price_per_night \n", 518 | " listings[counter]['amenities'] = amenities\n", 519 | " listings[counter]['housing_info'] = housing_info\n", 520 | "\n", 521 | " counter += 1\n", 522 | "\n", 523 | "\n", 524 | "pp.pprint(listings)" 525 | ] 526 | } 527 | ], 528 | "metadata": { 529 | "kernelspec": { 530 | "display_name": "Python 3", 531 | "language": "python", 532 | "name": "python3" 533 | }, 534 | "language_info": { 535 | "codemirror_mode": { 536 | "name": "ipython", 537 | "version": 3 538 | }, 539 | "file_extension": ".py", 540 | "mimetype": "text/x-python", 541 | "name": "python", 542 | "nbconvert_exporter": "python", 543 | "pygments_lexer": "ipython3", 544 | "version": "3.7.0" 545 | } 546 | }, 547 | "nbformat": 4, 548 | "nbformat_minor": 2 549 | } 550 | -------------------------------------------------------------------------------- /Notebooks/scrape_lat_long.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 36, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Couldnt get the coordinate\n", 13 | "Couldnt get the coordinate\n", 14 | "53.8147 -1.52538\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import requests, re, time\n", 20 | "\n", 21 | "attempts = 0\n", 22 | "success = False\n", 23 | "\n", 24 | "# Sometimes request doesn't have the lat long, this gives it 10 attempts to try to get it\n", 25 | "while not success and attempts < 10:\n", 26 | " try:\n", 27 | " r = requests.get('https://www.airbnb.co.uk/rooms/15307317')\n", 28 | " p_lat = re.compile(r'\"lat\":([-0-9.]+),')\n", 29 | " p_lng = re.compile(r'\"lng\":([-0-9.]+),')\n", 30 | " lat = p_lat.findall(r.text)[0]\n", 31 | " lng = p_lng.findall(r.text)[0]\n", 32 | " success = True # Found the lat and long, stop looping\n", 33 | " except:\n", 34 | " print('Couldnt get the coordinate')\n", 35 | " attempts += 1\n", 36 | "\n", 37 | "print(lat,lng)" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 45, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "import requests, re, time\n", 47 | "\n", 48 | "# Pass in listing id and return coordinates of listing\n", 49 | "def get_listing_coordinates(id):\n", 50 | " attempts = 0\n", 51 | " success = False\n", 52 | "\n", 53 | " # Sometimes request doesn't have the lat long, this gives it 10 attempts to try to get it\n", 54 | " while not success and attempts < 10:\n", 55 | " try:\n", 56 | " URL = 'https://www.airbnb.com/rooms/' + id\n", 57 | " r = requests.get(URL)\n", 58 | " p_lat = re.compile(r'\"lat\":([-0-9.]+),')\n", 59 | " p_lng = re.compile(r'\"lng\":([-0-9.]+),')\n", 60 | " lat = p_lat.findall(r.text)[0]\n", 61 | " lng = p_lng.findall(r.text)[0]\n", 62 | " success = True # Found the lat and long, stop looping\n", 63 | " return {'latitude': lat, 'longitude': lng}\n", 64 | " except:\n", 65 | " # Except is usually page loaded without coordinates so we will retry\n", 66 | " attempts += 1\n", 67 | "\n", 68 | " return {'Unable to get the coordinates'}, 400" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 46, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "{'latitude': '42.3766', 'longitude': '-71.03634'}\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "print(get_listing_coordinates('17974950'))" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python 3", 92 | "language": "python", 93 | "name": "python3" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 3 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython3", 105 | "version": "3.7.0" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 2 110 | } 111 | -------------------------------------------------------------------------------- /Notebooks/selenium_scrape_amenities_ids.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 22, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "from selenium import webdriver\n", 11 | "from selenium.webdriver.chrome.options import Options \n", 12 | "import pprint\n", 13 | "from bs4 import BeautifulSoup\n", 14 | "\n", 15 | "def getAmenities():\n", 16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n", 17 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n", 18 | " chrome_options = Options() \n", 19 | " chrome_options.add_argument(\"--headless\") \n", 20 | " driver = webdriver.Chrome(options=chrome_options) \n", 21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n", 22 | " driver.get(URL)\n", 23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n", 24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n", 25 | " more_filters_button.click()\n", 26 | " time.sleep(1) # Waiting for page's js to run\n", 27 | " show_amenities = driver.find_elements_by_class_name('_6lth7f')[1] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n", 28 | " show_amenities.click()\n", 29 | "# show_facilities = driver.find_elements_by_class_name('_6lth7f')[2] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n", 30 | "# show_facilities.click()\n", 31 | " show_accessibility = driver.find_elements_by_class_name('_6lth7f')[0] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n", 32 | " show_accessibility.click()\n", 33 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n", 34 | " driver.quit()\n", 35 | "\n", 36 | " amenities = []\n", 37 | " inputs = soup.find_all('input')\n", 38 | " for i in inputs:\n", 39 | " ids = i.get('id')\n", 40 | " if ids and 'amenities' in ids:\n", 41 | " amenity_id = ids.replace('amenities-', '')\n", 42 | " amenity = i.get('name')\n", 43 | " amenities.append({'amenity': amenity, 'amenity_id': amenity_id})\n", 44 | "\n", 45 | " return amenities" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 23, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "data": { 55 | "text/plain": [ 56 | "[{'amenity': 'No stairs or steps to enter', 'amenity_id': '110'},\n", 57 | " {'amenity': 'Well-lit path to entrance', 'amenity_id': '113'},\n", 58 | " {'amenity': 'Wide entrance for guests', 'amenity_id': '111'},\n", 59 | " {'amenity': 'Step-free path to entrance', 'amenity_id': '112'},\n", 60 | " {'amenity': 'Wide hallways', 'amenity_id': '109'},\n", 61 | " {'amenity': 'Elevator', 'amenity_id': '21'},\n", 62 | " {'amenity': 'No stairs or steps to enter', 'amenity_id': '115'},\n", 63 | " {'amenity': 'Wide entrance', 'amenity_id': '116'},\n", 64 | " {'amenity': 'Accessible-height bed', 'amenity_id': '118'},\n", 65 | " {'amenity': 'Extra space around bed', 'amenity_id': '117'},\n", 66 | " {'amenity': 'Electric profiling bed', 'amenity_id': '288'},\n", 67 | " {'amenity': 'No stairs or steps to enter', 'amenity_id': '120'},\n", 68 | " {'amenity': 'Wide doorway to guest bathroom', 'amenity_id': '121'},\n", 69 | " {'amenity': 'Extra space around toilet', 'amenity_id': '608'},\n", 70 | " {'amenity': 'Accessible-height toilet', 'amenity_id': '125'},\n", 71 | " {'amenity': 'Fixed grab bars for toilet', 'amenity_id': '295'},\n", 72 | " {'amenity': 'Extra space around shower', 'amenity_id': '609'},\n", 73 | " {'amenity': 'Fixed grab bars for shower', 'amenity_id': '294'},\n", 74 | " {'amenity': 'Step-free shower', 'amenity_id': '296'},\n", 75 | " {'amenity': 'Shower chair', 'amenity_id': '297'},\n", 76 | " {'amenity': 'Handheld shower head', 'amenity_id': '136'},\n", 77 | " {'amenity': 'Bathtub with bath chair', 'amenity_id': '123'},\n", 78 | " {'amenity': 'No stairs or steps to enter', 'amenity_id': '127'},\n", 79 | " {'amenity': 'Wide entryway', 'amenity_id': '128'},\n", 80 | " {'amenity': 'Disabled parking spot', 'amenity_id': '114'},\n", 81 | " {'amenity': 'Mobile hoist', 'amenity_id': '289'},\n", 82 | " {'amenity': 'Pool with pool hoist', 'amenity_id': '290'},\n", 83 | " {'amenity': 'Ceiling hoist', 'amenity_id': '291'},\n", 84 | " {'amenity': 'Kitchen', 'amenity_id': '8'},\n", 85 | " {'amenity': 'Shampoo', 'amenity_id': '41'},\n", 86 | " {'amenity': 'Heating', 'amenity_id': '30'},\n", 87 | " {'amenity': 'Air conditioning', 'amenity_id': '5'},\n", 88 | " {'amenity': 'Washer', 'amenity_id': '33'},\n", 89 | " {'amenity': 'Dryer', 'amenity_id': '34'},\n", 90 | " {'amenity': 'Wifi', 'amenity_id': '4'},\n", 91 | " {'amenity': 'Breakfast', 'amenity_id': '16'},\n", 92 | " {'amenity': 'Indoor fireplace', 'amenity_id': '27'},\n", 93 | " {'amenity': 'Hangers', 'amenity_id': '44'},\n", 94 | " {'amenity': 'Iron', 'amenity_id': '46'},\n", 95 | " {'amenity': 'Hair dryer', 'amenity_id': '45'},\n", 96 | " {'amenity': 'Laptop-friendly workspace', 'amenity_id': '47'},\n", 97 | " {'amenity': 'TV', 'amenity_id': '58'},\n", 98 | " {'amenity': 'Crib', 'amenity_id': '286'},\n", 99 | " {'amenity': 'High chair', 'amenity_id': '64'},\n", 100 | " {'amenity': 'Self check-in', 'amenity_id': '51'},\n", 101 | " {'amenity': 'Smoke alarm', 'amenity_id': '35'},\n", 102 | " {'amenity': 'Carbon monoxide alarm', 'amenity_id': '36'},\n", 103 | " {'amenity': 'Private bathroom', 'amenity_id': '78'},\n", 104 | " {'amenity': 'Pets allowed', 'amenity_id': '12'},\n", 105 | " {'amenity': 'Smoking allowed', 'amenity_id': '11'}]" 106 | ] 107 | }, 108 | "execution_count": 23, 109 | "metadata": {}, 110 | "output_type": "execute_result" 111 | } 112 | ], 113 | "source": [ 114 | "getAmenities()" 115 | ] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.7.0" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 2 139 | } 140 | -------------------------------------------------------------------------------- /Notebooks/selenium_scrape_houserules.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "from selenium import webdriver\n", 11 | "from selenium.webdriver.chrome.options import Options \n", 12 | "import pprint\n", 13 | "from bs4 import BeautifulSoup\n", 14 | "\n", 15 | "def get_house_rules():\n", 16 | " # Build URL\n", 17 | " base_url = 'https://www.airbnb.com/s/homes?query='\n", 18 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n", 19 | "\n", 20 | " # Set up headless chrome driver\n", 21 | " chrome_options = Options()\n", 22 | " chrome_options.add_argument(\"--headless\")\n", 23 | " driver = webdriver.Chrome(options=chrome_options)\n", 24 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n", 25 | "\n", 26 | " # Control page to show house rules\n", 27 | " driver.get(URL)\n", 28 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n", 29 | " error_message = None\n", 30 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n", 31 | " if more_filters_button:\n", 32 | " more_filters_button.click()\n", 33 | " time.sleep(1) # Waiting for page's js to run\n", 34 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n", 35 | " else:\n", 36 | " error_message = 'Unable to access filter button'\n", 37 | "\n", 38 | " driver.quit() # Close driver to prevent idle processes\n", 39 | "\n", 40 | " # Return error message if we cannot access languages\n", 41 | " if error_message:\n", 42 | " return {'error': error_message}, 400\n", 43 | "\n", 44 | " house_rules = []\n", 45 | " inputs = soup.find_all('input')\n", 46 | " for i in inputs:\n", 47 | " ids = i.get('id')\n", 48 | " if ids and 'amenities' in ids:\n", 49 | " house_rule_id = ids.replace('amenities-', '')\n", 50 | " house_rule = i.get('name')\n", 51 | " house_rules.append({'house_rule': house_rule, 'house_rule_id': house_rule_id})\n", 52 | "\n", 53 | " return house_rules" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 7, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "[{'house_rule': 'Pets allowed', 'house_rule_id': '12'},\n", 65 | " {'house_rule': 'Smoking allowed', 'house_rule_id': '11'}]" 66 | ] 67 | }, 68 | "execution_count": 7, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "get_house_rules()" 75 | ] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | }, 84 | "language_info": { 85 | "codemirror_mode": { 86 | "name": "ipython", 87 | "version": 3 88 | }, 89 | "file_extension": ".py", 90 | "mimetype": "text/x-python", 91 | "name": "python", 92 | "nbconvert_exporter": "python", 93 | "pygments_lexer": "ipython3", 94 | "version": "3.7.0" 95 | } 96 | }, 97 | "nbformat": 4, 98 | "nbformat_minor": 2 99 | } 100 | -------------------------------------------------------------------------------- /Notebooks/selenium_scrape_language_ids.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "from selenium import webdriver\n", 11 | "from selenium.webdriver.chrome.options import Options \n", 12 | "import pprint\n", 13 | "from bs4 import BeautifulSoup\n", 14 | "\n", 15 | "def getLanguageIds():\n", 16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n", 17 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n", 18 | " chrome_options = Options() \n", 19 | " chrome_options.add_argument(\"--headless\") \n", 20 | " driver = webdriver.Chrome(options=chrome_options) \n", 21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n", 22 | " driver.get(URL)\n", 23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n", 24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n", 25 | " more_filters_button.click()\n", 26 | " time.sleep(1) # Waiting for page's js to run\n", 27 | " show_all_languages_button = driver.find_elements_by_class_name('_6lth7f')[6] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n", 28 | " show_all_languages_button.click()\n", 29 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n", 30 | " driver.quit()\n", 31 | "\n", 32 | " languages = []\n", 33 | " inputs = soup.find_all('input')\n", 34 | " for i in inputs:\n", 35 | " ids = i.get('id')\n", 36 | " if ids and 'languages' in ids:\n", 37 | " language_id = ids.replace('languages-', '')\n", 38 | " language = i.get('name')\n", 39 | " languages.append({'language': language, 'language_id': language_id})\n", 40 | "\n", 41 | " return languages" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 10, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "[{'language': 'English', 'language_id': '1'},\n", 53 | " {'language': 'French', 'language_id': '2'},\n", 54 | " {'language': 'German', 'language_id': '4'},\n", 55 | " {'language': 'Japanese', 'language_id': '8'},\n", 56 | " {'language': 'Italian', 'language_id': '16'},\n", 57 | " {'language': 'Russian', 'language_id': '32'},\n", 58 | " {'language': 'Spanish', 'language_id': '64'},\n", 59 | " {'language': 'Chinese (Simplified)', 'language_id': '128'},\n", 60 | " {'language': 'Arabic', 'language_id': '256'},\n", 61 | " {'language': 'Hindi', 'language_id': '512'},\n", 62 | " {'language': 'Portuguese', 'language_id': '1024'},\n", 63 | " {'language': 'Turkish', 'language_id': '2048'},\n", 64 | " {'language': 'Indonesian', 'language_id': '4096'},\n", 65 | " {'language': 'Dutch', 'language_id': '8192'},\n", 66 | " {'language': 'Korean', 'language_id': '16384'},\n", 67 | " {'language': 'Bengali', 'language_id': '32768'},\n", 68 | " {'language': 'Thai', 'language_id': '65536'},\n", 69 | " {'language': 'Punjabi', 'language_id': '131072'},\n", 70 | " {'language': 'Greek', 'language_id': '262144'},\n", 71 | " {'language': 'Sign', 'language_id': '524288'},\n", 72 | " {'language': 'Hebrew', 'language_id': '1048576'},\n", 73 | " {'language': 'Polish', 'language_id': '2097152'},\n", 74 | " {'language': 'Malay', 'language_id': '4194304'},\n", 75 | " {'language': 'Tagalog', 'language_id': '8388608'},\n", 76 | " {'language': 'Danish', 'language_id': '16777216'},\n", 77 | " {'language': 'Swedish', 'language_id': '33554432'},\n", 78 | " {'language': 'Norwegian', 'language_id': '67108864'},\n", 79 | " {'language': 'Finnish', 'language_id': '134217728'},\n", 80 | " {'language': 'Czech', 'language_id': '268435456'},\n", 81 | " {'language': 'Hungarian', 'language_id': '536870912'},\n", 82 | " {'language': 'Ukrainian', 'language_id': '1073741824'}]" 83 | ] 84 | }, 85 | "execution_count": 10, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "getLanguageIds()" 92 | ] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "Python 3", 98 | "language": "python", 99 | "name": "python3" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": { 103 | "name": "ipython", 104 | "version": 3 105 | }, 106 | "file_extension": ".py", 107 | "mimetype": "text/x-python", 108 | "name": "python", 109 | "nbconvert_exporter": "python", 110 | "pygments_lexer": "ipython3", 111 | "version": "3.7.0" 112 | } 113 | }, 114 | "nbformat": 4, 115 | "nbformat_minor": 2 116 | } 117 | -------------------------------------------------------------------------------- /Notebooks/selenium_scrape_neighborhood_ids.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 38, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "from selenium import webdriver\n", 11 | "from selenium.webdriver.chrome.options import Options \n", 12 | "import pprint\n", 13 | "from bs4 import BeautifulSoup\n", 14 | "\n", 15 | "def getNeighborhoodIds(city, state):\n", 16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n", 17 | " URL = base_url + city + '%2C%20' + state\n", 18 | " chrome_options = Options() \n", 19 | " chrome_options.add_argument(\"--headless\") \n", 20 | " driver = webdriver.Chrome(options=chrome_options) \n", 21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n", 22 | " driver.get(URL)\n", 23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n", 24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n", 25 | " more_filters_button.click()\n", 26 | " time.sleep(1) # Waiting for page's js to run\n", 27 | " show_all_neighborhoods_button = driver.find_elements_by_class_name('_6lth7f')[5] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n", 28 | " show_all_neighborhoods_button.click()\n", 29 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n", 30 | " driver.quit()\n", 31 | "\n", 32 | " neighborhoods = []\n", 33 | " inputs = soup.find_all('input')\n", 34 | " for i in inputs:\n", 35 | " ids = i.get('id')\n", 36 | " if ids and 'neighborhood_ids' in ids:\n", 37 | " neighborhood_id = ids.replace('neighborhood_ids-', '')\n", 38 | " neighborhood = i.get('name')\n", 39 | " neighborhoods.append({'neighborhood': neighborhood, 'neighborhood_id': neighborhood_id})\n", 40 | "\n", 41 | " return neighborhoods" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 40, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "[{'neighborhood': 'Allston-Brighton', 'neighborhood_id': '578'},\n", 53 | " {'neighborhood': 'East Boston', 'neighborhood_id': '579'},\n", 54 | " {'neighborhood': 'Winthrop', 'neighborhood_id': '580'},\n", 55 | " {'neighborhood': 'Theater District', 'neighborhood_id': '453'},\n", 56 | " {'neighborhood': 'Cambridge', 'neighborhood_id': '581'},\n", 57 | " {'neighborhood': 'Downtown', 'neighborhood_id': '454'},\n", 58 | " {'neighborhood': 'Roxbury', 'neighborhood_id': '518'},\n", 59 | " {'neighborhood': 'Somerville', 'neighborhood_id': '583'},\n", 60 | " {'neighborhood': 'South Boston', 'neighborhood_id': '456'},\n", 61 | " {'neighborhood': 'Charlestown', 'neighborhood_id': '584'},\n", 62 | " {'neighborhood': 'Chelsea', 'neighborhood_id': '585'},\n", 63 | " {'neighborhood': 'Coolidge Corner', 'neighborhood_id': '1035'},\n", 64 | " {'neighborhood': 'Jamaica Plain', 'neighborhood_id': '525'},\n", 65 | " {'neighborhood': 'Newton', 'neighborhood_id': '598'},\n", 66 | " {'neighborhood': 'Dorchester', 'neighborhood_id': '535'},\n", 67 | " {'neighborhood': 'Everett', 'neighborhood_id': '600'},\n", 68 | " {'neighborhood': 'Watertown', 'neighborhood_id': '601'},\n", 69 | " {'neighborhood': 'Medford', 'neighborhood_id': '603'},\n", 70 | " {'neighborhood': 'Malden', 'neighborhood_id': '604'},\n", 71 | " {'neighborhood': 'Mattapan', 'neighborhood_id': '543'},\n", 72 | " {'neighborhood': 'Revere', 'neighborhood_id': '609'},\n", 73 | " {'neighborhood': 'Financial District', 'neighborhood_id': '611'},\n", 74 | " {'neighborhood': 'Downtown Crossing', 'neighborhood_id': '613'},\n", 75 | " {'neighborhood': 'Hyde Park', 'neighborhood_id': '551'},\n", 76 | " {'neighborhood': 'Government Center', 'neighborhood_id': '618'},\n", 77 | " {'neighborhood': 'Back Bay', 'neighborhood_id': '363'},\n", 78 | " {'neighborhood': 'South End', 'neighborhood_id': '492'},\n", 79 | " {'neighborhood': 'Beacon Hill', 'neighborhood_id': '620'},\n", 80 | " {'neighborhood': 'West End', 'neighborhood_id': '366'},\n", 81 | " {'neighborhood': 'Brookline', 'neighborhood_id': '495'},\n", 82 | " {'neighborhood': 'Roslindale', 'neighborhood_id': '559'},\n", 83 | " {'neighborhood': 'North End', 'neighborhood_id': '367'},\n", 84 | " {'neighborhood': 'Leather District', 'neighborhood_id': '439'},\n", 85 | " {'neighborhood': 'Harvard Square', 'neighborhood_id': '888'},\n", 86 | " {'neighborhood': 'Fenway/Kenmore', 'neighborhood_id': '504'},\n", 87 | " {'neighborhood': 'Chestnut Hill', 'neighborhood_id': '1017'},\n", 88 | " {'neighborhood': 'West Roxbury', 'neighborhood_id': '570'},\n", 89 | " {'neighborhood': 'Mission Hill', 'neighborhood_id': '506'},\n", 90 | " {'neighborhood': 'Chinatown', 'neighborhood_id': '444'}]" 91 | ] 92 | }, 93 | "execution_count": 40, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "getNeighborhoodIds('Boston', 'MA')" 100 | ] 101 | } 102 | ], 103 | "metadata": { 104 | "kernelspec": { 105 | "display_name": "Python 3", 106 | "language": "python", 107 | "name": "python3" 108 | }, 109 | "language_info": { 110 | "codemirror_mode": { 111 | "name": "ipython", 112 | "version": 3 113 | }, 114 | "file_extension": ".py", 115 | "mimetype": "text/x-python", 116 | "name": "python", 117 | "nbconvert_exporter": "python", 118 | "pygments_lexer": "ipython3", 119 | "version": "3.7.0" 120 | } 121 | }, 122 | "nbformat": 4, 123 | "nbformat_minor": 2 124 | } 125 | -------------------------------------------------------------------------------- /Notebooks/selenium_scrape_property_type_ids.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 7, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time\n", 10 | "from selenium import webdriver\n", 11 | "from selenium.webdriver.chrome.options import Options \n", 12 | "import pprint\n", 13 | "from bs4 import BeautifulSoup\n", 14 | "\n", 15 | "def getPropertyTypes():\n", 16 | " base_url = 'https://www.airbnb.com/s/homes?query='\n", 17 | " URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state\n", 18 | " chrome_options = Options() \n", 19 | " chrome_options.add_argument(\"--headless\") \n", 20 | " driver = webdriver.Chrome(options=chrome_options) \n", 21 | " driver.set_window_size(500, 951) # Manually set window size so we can find by class name later\n", 22 | " driver.get(URL)\n", 23 | " time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time\n", 24 | " more_filters_button = driver.find_elements_by_xpath('//*[@id=\"filter-menu-chip-group\"]/div[2]/button')[0]\n", 25 | " more_filters_button.click()\n", 26 | " time.sleep(1) # Waiting for page's js to run\n", 27 | " show_all_property_types_button = driver.find_elements_by_class_name('_6lth7f')[3] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security\n", 28 | " show_all_property_types_button.click()\n", 29 | " soup = BeautifulSoup(driver.page_source, 'html.parser')\n", 30 | " driver.quit()\n", 31 | "\n", 32 | " property_types = []\n", 33 | " inputs = soup.find_all('input')\n", 34 | " for i in inputs:\n", 35 | " ids = i.get('id')\n", 36 | " if ids and 'property_type_id' in ids:\n", 37 | " property_type_id = ids.replace('property_type_id-', '')\n", 38 | " property_type = i.get('name')\n", 39 | " property_types.append({'property_type': property_type, 'property_type_id': property_type_id})\n", 40 | "\n", 41 | " return property_types" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 8, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "[{'property_type': 'House', 'property_type_id': '2'},\n", 53 | " {'property_type': 'Apartment', 'property_type_id': '1'},\n", 54 | " {'property_type': 'Bed and breakfast', 'property_type_id': '3'},\n", 55 | " {'property_type': 'Boutique hotel', 'property_type_id': '43'},\n", 56 | " {'property_type': 'Bungalow', 'property_type_id': '38'},\n", 57 | " {'property_type': 'Cabin', 'property_type_id': '4'},\n", 58 | " {'property_type': 'Cottage', 'property_type_id': '60'},\n", 59 | " {'property_type': 'Guest suite', 'property_type_id': '53'},\n", 60 | " {'property_type': 'Guesthouse', 'property_type_id': '40'},\n", 61 | " {'property_type': 'Hostel', 'property_type_id': '45'},\n", 62 | " {'property_type': 'Hotel', 'property_type_id': '42'},\n", 63 | " {'property_type': 'Loft', 'property_type_id': '35'},\n", 64 | " {'property_type': 'Serviced apartment', 'property_type_id': '47'},\n", 65 | " {'property_type': 'Townhouse', 'property_type_id': '36'},\n", 66 | " {'property_type': 'Villa', 'property_type_id': '11'}]" 67 | ] 68 | }, 69 | "execution_count": 8, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "getPropertyTypes()" 76 | ] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python 3", 82 | "language": "python", 83 | "name": "python3" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.7.0" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 2 100 | } 101 | -------------------------------------------------------------------------------- /Notebooks/url-sandbox.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from urllib.parse import urlencode\n", 10 | "\n", 11 | "def build_url(args):\n", 12 | " URL = 'https://www.airbnb.com/s/homes?'\n", 13 | "\n", 14 | " urlparams = []\n", 15 | " params = {}\n", 16 | " # Add pagination\n", 17 | " if args['search_type']:\n", 18 | " params['search_type'] = args['search_type']\n", 19 | " if args['search_type'] == 'pagination' and args['page']:\n", 20 | " items_offset = str(int(args['page']) * 20)\n", 21 | " params['items_offset'] = items_offset\n", 22 | "\n", 23 | " # # Add location, these are required fields\n", 24 | " # if args['city'] and args['state']:\n", 25 | " # URL += '&query=' + args['city'] + '%2C%20' + args['state']\n", 26 | "\n", 27 | " # Add logistics\n", 28 | " if args['checkin'] and args['checkout']:\n", 29 | " params['checkin'] = args['checkin']\n", 30 | " params['checkout'] = args['checkout']\n", 31 | "\n", 32 | " # Add adults, there is default='1' but check just for safety\n", 33 | " if args['adults']:\n", 34 | " params['adults'] = args['adults']\n", 35 | "\n", 36 | " # Add min_price\n", 37 | " if args['min_price']:\n", 38 | " params['min_price'] = args['min_price']\n", 39 | "\n", 40 | " # Add max_price\n", 41 | " if args['max_price']:\n", 42 | " params['max_price'] = args['max_price']\n", 43 | "\n", 44 | " # Add min_bedrooms\n", 45 | " if args['min_beds']:\n", 46 | " params['min_beds'] = args['min_beds']\n", 47 | "\n", 48 | " # Add min_bedrooms\n", 49 | " if args['min_bedrooms']:\n", 50 | " params['min_bedrooms'] = args['min_bedrooms']\n", 51 | "\n", 52 | " # Add min_bathrooms\n", 53 | " if args['min_bathrooms']:\n", 54 | " params['min_bathrooms'] = args['min_bathrooms']\n", 55 | "\n", 56 | " # Add flexible_cancellation\n", 57 | " if args['flexible_cancellation']:\n", 58 | " params['flexible_cancellation'] = args['flexible_cancellation']\n", 59 | "\n", 60 | " # Add instant booking\n", 61 | " if args['instant_booking']:\n", 62 | " params['ib'] = args['instant_booking']\n", 63 | "\n", 64 | " # Add work trip\n", 65 | " if args['work_trip']:\n", 66 | " params['work_trip'] = args['work_trip']\n", 67 | "\n", 68 | " # Add superhost\n", 69 | " if args['superhost']:\n", 70 | " params['superhost'] = args['superhost']\n", 71 | "\n", 72 | " # Add amenities\n", 73 | " if args['amenities']:\n", 74 | " params['amenities[]'] = args['amenities']\n", 75 | " amenities = args['amenities'].split(',')\n", 76 | " for amenity_id in amenities:\n", 77 | " urlparams.append(('amenities[]', amenity_id))\n", 78 | " # amenities = args['amenities'].split(',')\n", 79 | " # for amenity_id in amenities:\n", 80 | " # URL = URL + '&amenities%5B%5D=' + amenity_id\n", 81 | "\n", 82 | " # Add accessibilities\n", 83 | " if args['accessibilities']:\n", 84 | " params['amenities[]'] = args['accessibilities']\n", 85 | " # accessibilities = args['accessibilities'].split(',')\n", 86 | " # for accessibility_id in accessibilities:\n", 87 | " # URL = URL + '&amenities%5B%5D=' + accessibility_id\n", 88 | "\n", 89 | " # Add facilities\n", 90 | " if args['facilities']:\n", 91 | " params['amenities[]'] = args['facilities']\n", 92 | " # facilities = args['facilities'].split(',')\n", 93 | " # for facility_id in facilities:\n", 94 | " # URL = URL + '&amenities%5B%5D=' + facility_id\n", 95 | "\n", 96 | " # Add property types\n", 97 | " if args['property_types']:\n", 98 | " params['property_type_id[]'] = args['property_types']\n", 99 | " # property_types = args['property_types'].split(',')\n", 100 | " # for property_type_id in property_types:\n", 101 | " # URL = URL + '&property_type_id%5B%5D=' + property_type_id\n", 102 | "\n", 103 | " # Add house_rules\n", 104 | " if args['house_rules']:\n", 105 | " house_rules = args['house_rules'].split(',')\n", 106 | " for house_rules_id in house_rules:\n", 107 | " URL = URL + '&amenities%5B%5D=' + house_rules_id\n", 108 | "\n", 109 | " # Add neighborhoods\n", 110 | " if args['neighborhoods']:\n", 111 | " neighborhoods = args['neighborhoods'].split(',')\n", 112 | " for neighborhood_id in neighborhoods:\n", 113 | " URL = URL + '&neighborhood_ids%5B%5D=' + neighborhood_id\n", 114 | "\n", 115 | " # Add languages\n", 116 | " if args['languages']:\n", 117 | " languages = args['languages'].split(',')\n", 118 | " for language_id in languages:\n", 119 | " URL = URL + '&languages%5B%5D=' + language_id\n", 120 | "\n", 121 | " # Add location, these are required fields\n", 122 | " if args['city'] and args['state']:\n", 123 | " URL += '&query=' + args['city'] + '%2C%20' + args['state']\n", 124 | "\n", 125 | " urlparams.append(params)\n", 126 | " URL += urlencode(urlparams)\n", 127 | " # For debugging let's see the URL\n", 128 | " print(URL, flush=True)\n", 129 | " return URL\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 11, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "ename": "RuntimeError", 139 | "evalue": "Working outside of request context.\n\nThis typically means that you attempted to use functionality that needed\nan active HTTP request. Consult the documentation on testing for\ninformation about how to avoid this problem.", 140 | "output_type": "error", 141 | "traceback": [ 142 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 143 | "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", 144 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd_argument\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'neighborhoods'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd_argument\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'languages'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m \u001b[0margs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparse_args\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstrict\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 29\u001b[0m \u001b[0mbuild_url\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 145 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\flask_restful\\reqparse.py\u001b[0m in \u001b[0;36mparse_args\u001b[1;34m(self, req, strict, http_error_code)\u001b[0m\n\u001b[0;32m 323\u001b[0m \u001b[1;31m# A record of arguments not yet parsed; as each is found\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 324\u001b[0m \u001b[1;31m# among self.args, it will be popped out\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 325\u001b[1;33m \u001b[0mreq\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munparsed_arguments\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margument_class\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstrict\u001b[0m \u001b[1;32melse\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 326\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 327\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0marg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 146 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\flask_restful\\reqparse.py\u001b[0m in \u001b[0;36msource\u001b[1;34m(self, request)\u001b[0m\n\u001b[0;32m 123\u001b[0m \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMultiDict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 124\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0ml\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlocation\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 125\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ml\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 126\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 147 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\werkzeug\\local.py\u001b[0m in \u001b[0;36m__getattr__\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 345\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'__members__'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 346\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mdir\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_current_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 347\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_current_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 348\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 349\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 148 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\werkzeug\\local.py\u001b[0m in \u001b[0;36m_get_current_object\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 304\u001b[0m \"\"\"\n\u001b[0;32m 305\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__local\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'__release_local__'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 306\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__local\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 307\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 308\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__local\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 149 | "\u001b[1;32m~\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\flask\\globals.py\u001b[0m in \u001b[0;36m_lookup_req_object\u001b[1;34m(name)\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[0mtop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_request_ctx_stack\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtop\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtop\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 37\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_request_ctx_err_msg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 38\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 39\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 150 | "\u001b[1;31mRuntimeError\u001b[0m: Working outside of request context.\n\nThis typically means that you attempted to use functionality that needed\nan active HTTP request. Consult the documentation on testing for\ninformation about how to avoid this problem." 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "from flask_restful import Resource, reqparse\n", 156 | "\n", 157 | "parser = reqparse.RequestParser()\n", 158 | "parser.add_argument('city', required=True)\n", 159 | "parser.add_argument('state', required=True)\n", 160 | "parser.add_argument('checkin')\n", 161 | "parser.add_argument('checkout')\n", 162 | "parser.add_argument('adults', default='1')\n", 163 | "parser.add_argument('page', default='0')\n", 164 | "parser.add_argument('search_type', default='pagination')\n", 165 | "parser.add_argument('min_price')\n", 166 | "parser.add_argument('max_price')\n", 167 | "parser.add_argument('min_beds')\n", 168 | "parser.add_argument('min_bedrooms')\n", 169 | "parser.add_argument('min_bathrooms')\n", 170 | "parser.add_argument('flexible_cancellation')\n", 171 | "parser.add_argument('instant_booking')\n", 172 | "parser.add_argument('work_trip')\n", 173 | "parser.add_argument('superhost')\n", 174 | "parser.add_argument('amenities')\n", 175 | "parser.add_argument('accessibilities')\n", 176 | "parser.add_argument('facilities')\n", 177 | "parser.add_argument('property_types')\n", 178 | "parser.add_argument('house_rules')\n", 179 | "parser.add_argument('neighborhoods')\n", 180 | "parser.add_argument('languages')\n", 181 | "args = parser.parse_args(strict=True)\n", 182 | "\n", 183 | "build_url(args)" 184 | ] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python 3", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.7.0" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } 209 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unofficial Airbnb REST Api 2 | 3 | This is an unofficial airbnb RESTful api which scrapes the [Airbnb site](https://www.airbnb.com/) to retrieve data 4 | 5 | Application was built with Python3, Flask, BeautifulSoup, and Selenium. 6 | 7 | Warning: Using this application may be against Airbnb's terms of services. 8 | 9 | 10 | ## Install 11 | 12 | git clone https://github.com/HalmonLui/airbnb-api.git 13 | cd airbnb-api 14 | pip install -r requirements.txt 15 | 16 | Note: For some endpoints, you need selenium with the Chrome driver in your PATH. Follow [this tutorial](https://zwbetz.com/download-chromedriver-binary-and-add-to-your-path-for-automated-functional-testing/) to learn how. 17 | For some reason, not all requirements may get installed. If that is the case, run 18 | pip install selenium 19 | pip install flask-restful 20 | 21 | ## Run the app 22 | 23 | export FLASK_APP=airbnbapi 24 | flask run 25 | 26 | # REST API 27 | 28 | ## Get Listings 29 | 30 | ### Request 31 | 32 | `GET /getListings` 33 | 34 | curl -X GET 'http://localhost:5000/getListings?city=Boston&state=MA' 35 | 36 | ### Parameters 37 | 38 | - **city** *required, str*\ 39 | Valid city, ex: Boston 40 | - **state** *required, str*\ 41 | Valid state code, ex: MA 42 | - **checkin** *optional, str*\ 43 | Checkin date, YYYY-MM-DD 44 | - **checkout** *optional, str*\ 45 | Checkout date, YYYY-MM-DD 46 | - **adults** *optional, int, default is 1*\ 47 | Number of adults 48 | - **page** *optional, int, default is 0*\ 49 | Each page shows 20 items at a time 50 | - **min_price** *optional, int*\ 51 | Minimum price per night 52 | - **max_price** *optional, int*\ 53 | Maximum price per night 54 | - **min_beds** *optional, int*\ 55 | Minimum number of beds 56 | - **min_bedrooms** *optional, int*\ 57 | Minimum number of bedrooms 58 | - **min_bathrooms** *optional, int*\ 59 | Minimum number of bathrooms 60 | - **flexible_cancellation** *optional, bool*\ 61 | Stay has flexible cancellation 62 | - **instant_booking** *optional, bool*\ 63 | Book without waiting for host approval 64 | - **work_trip** *optional, bool*\ 65 | Traveling for work, 5 star ratings from business travelers 66 | - **superhost** *optional, bool*\ 67 | Host is a superhost 68 | - **amenities** *optional*\ 69 | Comma separated list of amenity_ids (can retrieve from /getAmenities endpoint), ex: 44,45 70 | - **accessibility** *optional*\ 71 | Comma separated list of accessibility_ids (can retrieve from /getAccessibilities endpoint) ex: 72 | - **facilities** *optional*\ 73 | Comma separated list of facility_ids (can retrieve from /getFacilities endpoint), ex: 7,9 74 | - **property_types** *optional*\ 75 | Comma separated list of property_type_ids (can retrieve from /getPropertyTypes endpoint), ex: 8,5 76 | - **house_rules** *optional*\ 77 | Comma separated list of house_rules_ids (can retrieve from /getHouseRules endpoint), ex: 11,12 78 | - **neighborhoods** *optional*\ 79 | Comma separated list of neighborhood_ids (can retrieve from /getNeighborhoods endpoint), ex: 578,579 80 | - **languages** *optional*\ 81 | Comma separated list of language_ids (can retrieve from /getLanguages endpoint), ex: 1,2 82 | 83 | ### Response 84 | ```json 85 | [ 86 | { 87 | "listing_name": "Super Spacious Listing For My Airbnb API", 88 | "url": "https://www.airbnb.com/rooms/1858?adults=1&previous_page_section_name=100&federated_search_id=f41f2c-39b5-4fce-a928-8540423f1", 89 | "price_per_night": "27", 90 | "amenities": [ 91 | "Wifi", 92 | "Kitchen" 93 | ], 94 | "housing_info": [ 95 | "2 guests", 96 | "1 bedroom", 97 | "1 bed", 98 | "2 shared baths" 99 | ], 100 | "is_superhost": "True", 101 | "listing_type": "Private room", 102 | "rating": "4.89", 103 | "num_reviews": "434" 104 | }, 105 | { 106 | "listing_name": "Downtown Room", 107 | "url": "https://www.airbnb.com/rooms/1562?adults=1&previous_page_section_name=100&federated_search_id=fbf2c-39b5-ce-a928-853f1", 108 | "price_per_night": "39", 109 | "amenities": [ 110 | "Free parking", 111 | "Wifi", 112 | "Kitchen" 113 | ], 114 | "housing_info": [ 115 | "3 guests", 116 | "1 bedroom", 117 | "2 beds", 118 | "2 shared baths" 119 | ], 120 | "is_superhost": "True", 121 | "listing_type": "Private room", 122 | "rating": "4.89", 123 | "num_reviews": "615" 124 | }, 125 | ... 126 | ] 127 | ``` 128 | 129 | ## Get Listing Coordinates by ID 130 | 131 | ### Request 132 | 133 | `Get /getListingCoordinates/` 134 | 135 | curl -X GET 'http://localhost:5000/getListingCoordinates/17974950' 136 | 137 | ### Response 138 | ```json 139 | { 140 | "latitude": "42.3766", 141 | "longitude": "-71.03634" 142 | } 143 | ``` 144 | 145 | ## Get Deep Listings 146 | 147 | ### Request 148 | 149 | `Get /getDeepListings` 150 | 151 | curl -X GET 'http://localhost:5000/getListings?city=Boston&state=MA' 152 | 153 | ### Response 154 | ```json 155 | In progress... 156 | ``` 157 | 158 | ## Get Specific Listing 159 | 160 | ### Request 161 | 162 | `GET /getListing/` 163 | 164 | curl -X GET 'http://localhost:5000/getListing?id=123456' 165 | 166 | ### Response 167 | ```json 168 | In progress... 169 | ``` 170 | 171 | ## Get Amenities 172 | Airbnb uses unique ids for each amenity, these are needed to query listings by host amenities.\ 173 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 174 | ### Request 175 | 176 | `GET /getAmenities` 177 | 178 | curl -X GET 'http://localhost:5000/getAmenities' 179 | 180 | ### Response 181 | ```json 182 | [ 183 | { 184 | "amenity": "Kitchen", 185 | "amenity_id": "8" 186 | }, 187 | { 188 | "amenity": "Shampoo", 189 | "amenity_id": "41" 190 | }, 191 | { 192 | "amenity": "Heating", 193 | "amenity_id": "30" 194 | }, 195 | { 196 | "amenity": "Air conditioning", 197 | "amenity_id": "5" 198 | }, 199 | { 200 | "amenity": "Washer", 201 | "amenity_id": "33" 202 | }, 203 | ... 204 | ] 205 | ``` 206 | 207 | ## Get Accessibilities 208 | Airbnb uses unique ids for each accessibility, these are needed to query listings by host accessibility.\ 209 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 210 | ### Request 211 | 212 | `GET /getAccessibilities` 213 | 214 | curl -X GET 'http://localhost:5000/getAccessibilities' 215 | 216 | ### Response 217 | ```json 218 | [ 219 | { 220 | "accessibility": "No stairs or steps to enter", 221 | "accessibility_id": "110" 222 | }, 223 | { 224 | "accessibility": "Well-lit path to entrance", 225 | "accessibility_id": "113" 226 | }, 227 | { 228 | "accessibility": "Wide entrance for guests", 229 | "accessibility_id": "111" 230 | }, 231 | { 232 | "accessibility": "Step-free path to entrance", 233 | "accessibility_id": "112" 234 | }, 235 | { 236 | "accessibility": "Wide hallways", 237 | "accessibility_id": "109" 238 | }, 239 | ... 240 | ] 241 | ``` 242 | 243 | ## Get Facilities 244 | Airbnb uses unique ids for each facility, these are needed to query listings by host facilities.\ 245 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 246 | ### Request 247 | 248 | `GET /getFacilities` 249 | 250 | curl -X GET 'http://localhost:5000/getFacilities' 251 | 252 | ### Response 253 | ```json 254 | [ 255 | { 256 | "facility": "Free parking on premises", 257 | "facility_id": "9" 258 | }, 259 | { 260 | "facility": "Gym", 261 | "facility_id": "15" 262 | }, 263 | { 264 | "facility": "Hot tub", 265 | "facility_id": "25" 266 | }, 267 | { 268 | "facility": "Pool", 269 | "facility_id": "7" 270 | } 271 | ] 272 | ``` 273 | 274 | ## Get Property Types 275 | Airbnb uses unique property type ids for each property type and unique stay, these are needed to query listings by host property type.\ 276 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 277 | ### Request 278 | 279 | `GET /getPropertyTypes` 280 | 281 | curl -X GET 'http://localhost:5000/getPropertyTypes' 282 | 283 | ### Response 284 | ```json 285 | [ 286 | { 287 | "property_type": "House", 288 | "property_type_id": "2" 289 | }, 290 | { 291 | "property_type": "Apartment", 292 | "property_type_id": "1" 293 | }, 294 | { 295 | "property_type": "Bed and breakfast", 296 | "property_type_id": "3" 297 | }, 298 | { 299 | "property_type": "Boutique hotel", 300 | "property_type_id": "43" 301 | }, 302 | { 303 | "property_type": "Bungalow", 304 | "property_type_id": "38" 305 | }, 306 | ... 307 | ] 308 | ``` 309 | 310 | ## Get House Rules 311 | Airbnb uses unique ids for each house rule, these are needed to query listings by house rules.\ 312 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 313 | ### Request 314 | 315 | `GET /getHouseRules` 316 | 317 | curl -X GET 'http://localhost:5000/getHouseRules' 318 | 319 | ### Response 320 | ```json 321 | [ 322 | { 323 | "house_rule": "Pets allowed", 324 | "house_rule_id": "12" 325 | }, 326 | { 327 | "house_rule": "Smoking allowed", 328 | "house_rule_id": "11" 329 | } 330 | ] 331 | ``` 332 | 333 | ## Get Neighborhoods 334 | Airbnb uses unique neighborhood ids for each neighborhood, these are needed to query listings by neighborhood.\ 335 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 336 | ### Request 337 | 338 | `GET /getNeighborhoods` 339 | 340 | curl -X GET 'http://localhost:5000/getNeighborhoods?city=Boston&state=MA' 341 | 342 | ### Parameters 343 | 344 | - **city** *required*\ 345 | Valid city, ex: Boston 346 | - **state** *required*\ 347 | Valid state code, ex: MA 348 | 349 | ### Response 350 | ```json 351 | [ 352 | { 353 | "neighborhood": "Allston-Brighton", 354 | "neighborhood_id": "578" 355 | }, 356 | { 357 | "neighborhood": "East Boston", 358 | "neighborhood_id": "579" 359 | }, 360 | { 361 | "neighborhood": "Winthrop", 362 | "neighborhood_id": "580" 363 | }, 364 | { 365 | "neighborhood": "Theater District", 366 | "neighborhood_id": "453" 367 | }, 368 | { 369 | "neighborhood": "Cambridge", 370 | "neighborhood_id": "581" 371 | }, 372 | ... 373 | ] 374 | ``` 375 | 376 | ## Get Languages 377 | Airbnb uses unique language ids for each language, these are needed to query listings by host language.\ 378 | Note: If endpoint doesn't work, make sure you [installed](https://github.com/HalmonLui/airbnb-api#install) correctly. 379 | ### Request 380 | 381 | `GET /getLanguages` 382 | 383 | curl -X GET 'http://localhost:5000/getLanguages' 384 | 385 | ### Response 386 | ```json 387 | [ 388 | { 389 | "language": "English", 390 | "language_id": "1" 391 | }, 392 | { 393 | "language": "French", 394 | "language_id": "2" 395 | }, 396 | { 397 | "language": "German", 398 | "language_id": "4" 399 | }, 400 | { 401 | "language": "Japanese", 402 | "language_id": "8" 403 | }, 404 | { 405 | "language": "Italian", 406 | "language_id": "16" 407 | }, 408 | ... 409 | ] 410 | ``` 411 | -------------------------------------------------------------------------------- /airbnbapi/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask_restful import Resource, Api 3 | app = Flask(__name__) 4 | api = Api(app) 5 | 6 | import airbnbapi.resources 7 | -------------------------------------------------------------------------------- /airbnbapi/controllers.py: -------------------------------------------------------------------------------- 1 | import json, requests, pprint, time, re 2 | from bs4 import BeautifulSoup 3 | from selenium import webdriver 4 | from selenium.webdriver.chrome.options import Options 5 | from . import helpers 6 | 7 | def get_listings(args): 8 | # Build the URL 9 | URL = helpers.build_url(args) 10 | 11 | page = requests.get(URL) 12 | soup = BeautifulSoup(page.content, 'html.parser') 13 | 14 | listings = [] 15 | 16 | links = soup.find_all('a') 17 | # GET LISTING NAME AND URL 18 | counter = 0 19 | for link in links: 20 | # We just want to add real listings, not all link names 21 | if link.get('data-check-info-section'): 22 | listing_name = link.get('aria-label') 23 | url = 'https://www.airbnb.com' + link.get('href') 24 | listings.append({'listing_name': listing_name, 'url': url}) 25 | counter += 1 26 | 27 | # GET TOTAL PRICE 28 | spans = soup.find_all('button') 29 | counter = 0 30 | for span in spans: 31 | text = span.get_text() 32 | if text and 'total' in text: 33 | total = text.replace('$', '') 34 | total = total.replace(' total', '') 35 | listings[counter]['total_price'] = total 36 | counter += 1 37 | 38 | # GET SUPERHOST, LISTING_TYPE, RATING, NUM_REVIEWS 39 | # This only works sometimes, airbnb must load their frontend slightly differently every fetch 40 | divs = soup.find_all('div') 41 | counter = 0 42 | for div in divs: 43 | if counter < len(listings) and div.get_text() == listings[counter]['listing_name']: 44 | is_superhost = 'False' 45 | listing_type = '' 46 | rating = None 47 | num_reviews = '0' 48 | 49 | listing_info = div.previous_sibling 50 | if listing_info: 51 | for child in listing_info: 52 | if 'Entire ' in child.get_text() or 'Private ' in child.get_text(): 53 | listing_type = child.get_text() 54 | elif 'SUPERHOST' in child.get_text(): 55 | is_superhost = 'True' 56 | elif '(' and ')' in child.get_text(): 57 | for c in child: 58 | split_rating = c.get_text().split() 59 | rating = split_rating[0] 60 | num_reviews = split_rating[1].replace('(', '') 61 | num_reviews = num_reviews.replace(')', '') 62 | 63 | listings[counter]['is_superhost'] = is_superhost 64 | listings[counter]['listing_type'] = listing_type 65 | listings[counter]['rating'] = rating 66 | listings[counter]['num_reviews'] = num_reviews 67 | 68 | counter += 1 69 | 70 | # GET PRICE PER NIGHT, AMENITIES, HOUSING_INFO 71 | counter = 0 72 | for span in spans: 73 | text = span.get_text() 74 | if text and '/ night' in text and 'total' not in text: 75 | price_per_night = None 76 | amenities = [] 77 | housing_info = [] 78 | 79 | # Some have a discounted price so we only want the actual price per night 80 | price_per_night = text.rsplit('$', 1)[1] 81 | price_per_night = price_per_night.replace(' / night', '') 82 | price_per_night = ' '.join(price_per_night.split()) 83 | 84 | # Gets amenities like Wifi/Kitching/Free Parking 85 | amenities_element = span.parent.parent.parent.previous_sibling 86 | if amenities_element: 87 | amenities = amenities_element.get_text() 88 | amenities = amenities.split(' · ') 89 | 90 | # Gets gusts, bedrooms, baths 91 | housing_info_element = span.parent.parent.parent.previous_sibling.previous_sibling 92 | if housing_info_element: 93 | housing_info = housing_info_element.get_text() 94 | housing_info = housing_info.split(' · ') 95 | 96 | listings[counter]['price_per_night'] = price_per_night 97 | listings[counter]['amenities'] = amenities 98 | listings[counter]['housing_info'] = housing_info 99 | 100 | counter += 1 101 | 102 | return listings, 200 103 | 104 | 105 | def get_coordinates(listing_id): 106 | attempts = 0 107 | success = False 108 | 109 | # Sometimes request doesn't have the lat long, this gives it 10 attempts to try to get it 110 | while not success and attempts < 10: 111 | try: 112 | URL = 'https://www.airbnb.com/rooms/' + str(listing_id) 113 | r = requests.get(URL) 114 | p_lat = re.compile(r'"lat":([-0-9.]+),') 115 | p_lng = re.compile(r'"lng":([-0-9.]+),') 116 | lat = p_lat.findall(r.text)[0] 117 | lng = p_lng.findall(r.text)[0] 118 | success = True # Found the lat and long, stop looping 119 | 120 | return {'latitude': lat, 'longitude': lng}, 200 121 | 122 | except: 123 | # Except is usually page loaded without coordinates so we will retry 124 | attempts += 1 125 | 126 | return {'Unable to get the coordinates'}, 400 127 | 128 | 129 | def get_amenities(): 130 | # Build URL 131 | base_url = 'https://www.airbnb.com/s/homes?query=' 132 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state 133 | 134 | # Prepare the webdriver 135 | chrome_options = Options() 136 | chrome_options.add_argument("--headless") 137 | driver = webdriver.Chrome(options=chrome_options) 138 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 139 | 140 | # Control the page to show all amenities 141 | driver.get(URL) 142 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 143 | error_message = None 144 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change 145 | if more_filters_button: 146 | more_filters_button.click() 147 | time.sleep(1) # Waiting for page's js to run 148 | show_all_amenities = driver.find_elements_by_class_name('_6lth7f')[1] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 149 | if show_all_amenities: 150 | show_all_amenities.click() 151 | soup = BeautifulSoup(driver.page_source, 'html.parser') 152 | else: 153 | error_message = 'Unable to access amenities' 154 | else: 155 | error_message = 'Unable to access filter button' 156 | 157 | driver.quit() # Close driver so we don't have idle processes 158 | 159 | # Return error message if we cannot access airbnb's amenities 160 | if error_message: 161 | return {'error': error_message}, 400 162 | 163 | # Get amenities and IDs from page 164 | amenities = [] 165 | inputs = soup.find_all('input') 166 | for i in inputs: 167 | ids = i.get('id') 168 | if ids and 'amenities' in ids: 169 | amenity_id = ids.replace('amenities-', '') 170 | amenity = i.get('name') 171 | if 'Pets allowed' not in amenity and 'Smoking allowed' not in amenity: 172 | amenities.append({'amenity': amenity, 'amenity_id': amenity_id}) 173 | 174 | return amenities, 200 175 | 176 | 177 | def get_accessibilities(): 178 | # Build URL 179 | base_url = 'https://www.airbnb.com/s/homes?query=' 180 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state 181 | 182 | # Prepare the webdriver 183 | chrome_options = Options() 184 | chrome_options.add_argument("--headless") 185 | driver = webdriver.Chrome(options=chrome_options) 186 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 187 | 188 | # Control the page to show all accessibilities 189 | driver.get(URL) 190 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 191 | error_message = None 192 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change 193 | if more_filters_button: 194 | more_filters_button.click() 195 | time.sleep(1) # Waiting for page's js to run 196 | show_all_accessibilities = driver.find_elements_by_class_name('_6lth7f')[0] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 197 | if show_all_accessibilities: 198 | show_all_accessibilities.click() 199 | soup = BeautifulSoup(driver.page_source, 'html.parser') 200 | else: 201 | error_message = 'Unable to access accessibilities' 202 | else: 203 | error_message = 'Unable to access filter button' 204 | 205 | driver.quit() # Close driver so we don't have idle processes 206 | 207 | # Return error message if we cannot access airbnb's accessibilities 208 | if error_message: 209 | return {'error': error_message}, 400 210 | 211 | # Get accessibilities and IDs from page 212 | accessibilities = [] 213 | inputs = soup.find_all('input') 214 | for i in inputs: 215 | ids = i.get('id') 216 | if ids and 'amenities' in ids: 217 | accessibility_id = ids.replace('amenities-', '') 218 | accessibility = i.get('name') 219 | if 'Pets allowed' not in accessibility and 'Smoking allowed' not in accessibility: 220 | accessibilities.append({'accessibility': accessibility, 'accessibility_id': accessibility_id}) 221 | 222 | return accessibilities, 200 223 | 224 | 225 | def get_facilities(): 226 | # Build URL 227 | base_url = 'https://www.airbnb.com/s/homes?query=' 228 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state 229 | 230 | # Prepare the webdriver 231 | chrome_options = Options() 232 | chrome_options.add_argument("--headless") 233 | driver = webdriver.Chrome(options=chrome_options) 234 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 235 | 236 | # Control the page to show all facilities 237 | driver.get(URL) 238 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 239 | error_message = None 240 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change 241 | if more_filters_button: 242 | more_filters_button.click() 243 | time.sleep(1) # Waiting for page's js to run 244 | show_all_facilities = driver.find_elements_by_class_name('_6lth7f')[2] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 245 | if show_all_facilities: 246 | show_all_facilities.click() 247 | soup = BeautifulSoup(driver.page_source, 'html.parser') 248 | else: 249 | error_message = 'Unable to access facilities' 250 | else: 251 | error_message = 'Unable to access filter button' 252 | 253 | driver.quit() # Close driver so we don't have idle processes 254 | 255 | # Return error message if we cannot access airbnb's facilities 256 | if error_message: 257 | return {'error': error_message}, 400 258 | 259 | # Get amenities and IDs from page 260 | facilities = [] 261 | inputs = soup.find_all('input') 262 | for i in inputs: 263 | ids = i.get('id') 264 | if ids and 'amenities' in ids: 265 | facility_id = ids.replace('amenities-', '') 266 | facility = i.get('name') 267 | if 'Pets allowed' not in facility and 'Smoking allowed' not in facility: 268 | facilities.append({'facility': facility, 'facility_id': facility_id}) 269 | 270 | return facilities, 200 271 | 272 | 273 | def get_property_types(): 274 | # Build URL 275 | base_url = 'https://www.airbnb.com/s/homes?query=' 276 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state 277 | 278 | # Set up headless chrome driver 279 | chrome_options = Options() 280 | chrome_options.add_argument("--headless") 281 | driver = webdriver.Chrome(options=chrome_options) 282 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 283 | 284 | # Control page to show property types 285 | driver.get(URL) 286 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 287 | error_message = None 288 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] 289 | if more_filters_button: 290 | more_filters_button.click() 291 | time.sleep(1) # Waiting for page's js to run 292 | show_all_unique_stays_button = driver.find_elements_by_class_name('_6lth7f')[4] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 293 | if show_all_unique_stays_button: 294 | show_all_unique_stays_button.click() 295 | soup = BeautifulSoup(driver.page_source, 'html.parser') 296 | else: 297 | error_message = 'Unable to access unique stays' 298 | 299 | show_all_property_types_button = driver.find_elements_by_class_name('_6lth7f')[3] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 300 | if show_all_property_types_button: 301 | show_all_property_types_button.click() 302 | property_soup = BeautifulSoup(driver.page_source, 'html.parser') 303 | else: 304 | error_message = 'Unable to access property types' 305 | else: 306 | error_message = 'Unable to access filter button' 307 | 308 | driver.quit() # Close driver to prevent idle processes 309 | 310 | # Return error message if we cannot access property types 311 | if error_message: 312 | return {'error': error_message}, 400 313 | 314 | property_types = [] 315 | inputs = property_soup.find_all('input') 316 | for i in inputs: 317 | ids = i.get('id') 318 | if ids and 'property_type_id' in ids: 319 | property_type_id = ids.replace('property_type_id-', '') 320 | property_type = i.get('name') 321 | property_types.append({'property_type': property_type, 'property_type_id': property_type_id}) 322 | 323 | return property_types, 200 324 | 325 | 326 | def get_house_rules(): 327 | # Build URL 328 | base_url = 'https://www.airbnb.com/s/homes?query=' 329 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state 330 | 331 | # Set up headless chrome driver 332 | chrome_options = Options() 333 | chrome_options.add_argument("--headless") 334 | driver = webdriver.Chrome(options=chrome_options) 335 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 336 | 337 | # Control page to show house rules 338 | driver.get(URL) 339 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 340 | error_message = None 341 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] 342 | if more_filters_button: 343 | more_filters_button.click() 344 | time.sleep(1) # Waiting for page's js to run 345 | soup = BeautifulSoup(driver.page_source, 'html.parser') 346 | else: 347 | error_message = 'Unable to access filter button' 348 | 349 | driver.quit() # Close driver to prevent idle processes 350 | 351 | # Return error message if we cannot access languages 352 | if error_message: 353 | return {'error': error_message}, 400 354 | 355 | house_rules = [] 356 | inputs = soup.find_all('input') 357 | for i in inputs: 358 | ids = i.get('id') 359 | if ids and 'amenities' in ids: 360 | house_rule_id = ids.replace('amenities-', '') 361 | house_rule = i.get('name') 362 | house_rules.append({'house_rule': house_rule, 'house_rule_id': house_rule_id}) 363 | 364 | return house_rules, 200 365 | 366 | 367 | def get_neighborhoods(args): 368 | # Build the URL 369 | URL = helpers.build_url(args) 370 | 371 | # Prepare the webdriver 372 | chrome_options = Options() 373 | chrome_options.add_argument("--headless") 374 | driver = webdriver.Chrome(options=chrome_options) 375 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 376 | 377 | # Control the page to show all neighborhoods 378 | driver.get(URL) 379 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 380 | error_message = None 381 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] # Dangerous, location of filter button may change 382 | if more_filters_button: 383 | more_filters_button.click() 384 | time.sleep(1) # Waiting for page's js to run 385 | show_all_neighborhoods_button = driver.find_elements_by_class_name('_6lth7f')[5] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 386 | if show_all_neighborhoods_button: 387 | show_all_neighborhoods_button.click() 388 | soup = BeautifulSoup(driver.page_source, 'html.parser') 389 | else: 390 | error_message = 'Unable to access neighborhoods' 391 | else: 392 | error_message = 'Unable to access filter button' 393 | 394 | driver.quit() # Close driver so we don't have idle processes 395 | 396 | # Return error message if we cannot access airbnb's neighborhoods 397 | if error_message: 398 | return {'error': error_message}, 400 399 | 400 | # Get neighborhoods and IDs from page 401 | neighborhoods = [] 402 | inputs = soup.find_all('input') 403 | for i in inputs: 404 | ids = i.get('id') 405 | if ids and 'neighborhood_ids' in ids: 406 | neighborhood_id = ids.replace('neighborhood_ids-', '') 407 | neighborhood = i.get('name') 408 | neighborhoods.append({'neighborhood': neighborhood, 'neighborhood_id': neighborhood_id}) 409 | 410 | return neighborhoods, 200 411 | 412 | 413 | def get_languages(): 414 | # Build URL 415 | base_url = 'https://www.airbnb.com/s/homes?query=' 416 | URL = base_url + 'Boston' + '%2C%20' + 'MA' # Can use any city/state 417 | 418 | # Set up headless chrome driver 419 | chrome_options = Options() 420 | chrome_options.add_argument("--headless") 421 | driver = webdriver.Chrome(options=chrome_options) 422 | driver.set_window_size(500, 951) # Manually set window size so we can find by class name later 423 | 424 | # Control page to show languages 425 | driver.get(URL) 426 | time.sleep(1) # Since we are in a browser, the javascript takes time to run so let's give it time 427 | error_message = None 428 | more_filters_button = driver.find_elements_by_xpath('//*[@id="filter-menu-chip-group"]/div[2]/button')[0] 429 | if more_filters_button: 430 | more_filters_button.click() 431 | time.sleep(1) # Waiting for page's js to run 432 | show_all_languages_button = driver.find_elements_by_class_name('_6lth7f')[6] # Dangerous, classnames automatically change based on window dimensions, they might also rotate every once and a while for airbnb security 433 | if show_all_languages_button: 434 | show_all_languages_button.click() 435 | soup = BeautifulSoup(driver.page_source, 'html.parser') 436 | else: 437 | error_message = 'Unable to access languages' 438 | else: 439 | error_message = 'Unable to access filter button' 440 | 441 | driver.quit() # Close driver to prevent idle processes 442 | 443 | # Return error message if we cannot access languages 444 | if error_message: 445 | return {'error': error_message}, 400 446 | 447 | languages = [] 448 | inputs = soup.find_all('input') 449 | for i in inputs: 450 | ids = i.get('id') 451 | if ids and 'languages' in ids: 452 | language_id = ids.replace('languages-', '') 453 | language = i.get('name') 454 | languages.append({'language': language, 'language_id': language_id}) 455 | 456 | return languages 457 | -------------------------------------------------------------------------------- /airbnbapi/helpers.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlencode, quote 2 | 3 | def build_url(args): 4 | URL = 'https://www.airbnb.com/s/homes?' 5 | 6 | params = {} 7 | # Add pagination 8 | if args['search_type']: 9 | params['search_type'] = args['search_type'] 10 | if args['search_type'] == 'pagination' and args['page']: 11 | items_offset = str(int(args['page']) * 20) 12 | params['items_offset'] = items_offset 13 | 14 | # Add location, these are required fields 15 | if args['city'] and args['state']: 16 | address = args['city'] + ', ' + args['state'] 17 | params['query'] = address 18 | 19 | # Add logistics 20 | if args['checkin'] and args['checkout']: 21 | params['checkin'] = args['checkin'] 22 | params['checkout'] = args['checkout'] 23 | 24 | # Add adults, there is default='1' but check just for safety 25 | if args['adults']: 26 | params['adults'] = args['adults'] 27 | 28 | # Add min_price 29 | if args['min_price']: 30 | params['min_price'] = args['min_price'] 31 | 32 | # Add max_price 33 | if args['max_price']: 34 | params['max_price'] = args['max_price'] 35 | 36 | # Add min_bedrooms 37 | if args['min_beds']: 38 | params['min_beds'] = args['min_beds'] 39 | 40 | # Add min_bedrooms 41 | if args['min_bedrooms']: 42 | params['min_bedrooms'] = args['min_bedrooms'] 43 | 44 | # Add min_bathrooms 45 | if args['min_bathrooms']: 46 | params['min_bathrooms'] = args['min_bathrooms'] 47 | 48 | # Add flexible_cancellation 49 | if args['flexible_cancellation']: 50 | params['flexible_cancellation'] = args['flexible_cancellation'] 51 | 52 | # Add instant booking 53 | if args['instant_booking']: 54 | params['ib'] = args['instant_booking'] 55 | 56 | # Add work trip 57 | if args['work_trip']: 58 | params['work_trip'] = args['work_trip'] 59 | 60 | # Add superhost 61 | if args['superhost']: 62 | params['superhost'] = args['superhost'] 63 | 64 | # Add amenities 65 | if args['amenities']: 66 | amenities = args['amenities'].split(',') 67 | if 'amenities[]' in params and params['amenities[]']: 68 | params['amenities[]'].extend(amenities) 69 | else: 70 | params['amenities[]'] = amenities 71 | 72 | # Add accessibilities 73 | if args['accessibilities']: 74 | accessibilities = args['accessibilities'].split(',') 75 | if 'amenities[]' in params and params['amenities[]']: 76 | params['amenities[]'].extend(accessibilities) 77 | else: 78 | params['amenities[]'] = accessibilities 79 | 80 | # Add facilities 81 | if args['facilities']: 82 | facilities = args['facilities'].split(',') 83 | if 'amenities[]' in params and params['amenities[]']: 84 | params['amenities[]'].extend(facilities) 85 | else: 86 | params['amenities[]'] = facilities 87 | 88 | # Add property types 89 | if args['property_types']: 90 | property_types = args['property_types'].split(',') 91 | params['property_type_id[]'] = property_types 92 | 93 | # Add house_rules 94 | if args['house_rules']: 95 | house_rules = args['house_rules'].split(',') 96 | if 'amenities[]' in params and params['amenities[]']: 97 | params['amenities[]'].extend(house_rules) 98 | else: 99 | params['amenities[]'] = house_rules 100 | 101 | # Add neighborhoods 102 | if args['neighborhoods']: 103 | neighborhoods = args['neighborhoods'].split(',') 104 | params['neighborhood_ids[]'] = neighborhoods 105 | 106 | # Add languages 107 | if args['languages']: 108 | languages = args['languages'].split(',') 109 | params['languages[]'] = languages 110 | 111 | URL += urlencode(params, True, quote_via=quote) 112 | # For debugging let's see the URL 113 | print(URL, flush=True) 114 | return URL 115 | -------------------------------------------------------------------------------- /airbnbapi/resources.py: -------------------------------------------------------------------------------- 1 | from airbnbapi import api 2 | from flask_restful import Resource, reqparse 3 | from flask import jsonify 4 | from . import controllers 5 | 6 | class Index(Resource): 7 | def get(self): 8 | return jsonify({'message': 'Unofficial Airbnb API, visit https://github.com/HalmonLui/airbnb-api for more information'}) 9 | 10 | 11 | # Get Airbnb Listings 12 | class ListingsAPI(Resource): 13 | def get(self): 14 | parser = reqparse.RequestParser() 15 | parser.add_argument('city', required=True) 16 | parser.add_argument('state', required=True) 17 | parser.add_argument('checkin') 18 | parser.add_argument('checkout') 19 | parser.add_argument('adults', default='1') 20 | parser.add_argument('page', default='0') 21 | parser.add_argument('search_type', default='pagination') 22 | parser.add_argument('min_price') 23 | parser.add_argument('max_price') 24 | parser.add_argument('min_beds') 25 | parser.add_argument('min_bedrooms') 26 | parser.add_argument('min_bathrooms') 27 | parser.add_argument('flexible_cancellation') 28 | parser.add_argument('instant_booking') 29 | parser.add_argument('work_trip') 30 | parser.add_argument('superhost') 31 | parser.add_argument('amenities') 32 | parser.add_argument('accessibilities') 33 | parser.add_argument('facilities') 34 | parser.add_argument('property_types') 35 | parser.add_argument('house_rules') 36 | parser.add_argument('neighborhoods') 37 | parser.add_argument('languages') 38 | args = parser.parse_args(strict=True) 39 | return controllers.get_listings(args) 40 | 41 | 42 | # Get Listing latitude and longitude coordinates from listing_id 43 | class CoordinatesAPI(Resource): 44 | def get(self, listing_id): 45 | return controllers.get_coordinates(listing_id) 46 | 47 | 48 | # Get Amenities and IDs 49 | class AmenitiesAPI(Resource): 50 | def get(self): 51 | return controllers.get_amenities() 52 | 53 | 54 | # Get Accessibilities and IDs 55 | class AccessibilitiesAPI(Resource): 56 | def get(self): 57 | return controllers.get_accessibilities() 58 | 59 | 60 | # Get Facilities and IDs 61 | class FacilitiesAPI(Resource): 62 | def get(self): 63 | return controllers.get_facilities() 64 | 65 | 66 | # Get Property Types and IDs 67 | class PropertyTypesAPI(Resource): 68 | def get(self): 69 | return controllers.get_property_types() 70 | 71 | 72 | # Get House Rules and IDs 73 | class HouseRulesAPI(Resource): 74 | def get(self): 75 | return controllers.get_house_rules() 76 | 77 | 78 | # Get Neighborhoods and IDs 79 | class NeighborhoodsAPI(Resource): 80 | def get(self): 81 | parser = reqparse.RequestParser() 82 | parser.add_argument('city', required=True) 83 | parser.add_argument('state', required=True) 84 | args = parser.parse_args(strict=True) 85 | return controllers.get_neighborhoods(args) 86 | 87 | 88 | # Get Langauges and IDs 89 | class LanguagesAPI(Resource): 90 | def get(self): 91 | return controllers.get_languages() 92 | 93 | 94 | # adding the defined resources along with their corresponding urls 95 | api.add_resource(Index, '/') 96 | api.add_resource(ListingsAPI, '/getListings') 97 | api.add_resource(CoordinatesAPI, '/getListingCoordinates/') 98 | # api.add_resource(SpecificListingAPI, '/getListing/') 99 | api.add_resource(AmenitiesAPI, '/getAmenities') 100 | api.add_resource(AccessibilitiesAPI, '/getAccessibilities') 101 | api.add_resource(FacilitiesAPI, '/getFacilities') 102 | api.add_resource(PropertyTypesAPI, '/getPropertyTypes') 103 | api.add_resource(HouseRulesAPI, '/getHouseRules') 104 | api.add_resource(NeighborhoodsAPI, '/getNeighborhoods') 105 | api.add_resource(LanguagesAPI, '/getLanguages') 106 | -------------------------------------------------------------------------------- /images/airbnb_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HalmonLui/airbnb-api/3f8817c1a62413efc3b8952223c0776b68b5179a/images/airbnb_logo.jpg -------------------------------------------------------------------------------- /images/airbnb_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HalmonLui/airbnb-api/3f8817c1a62413efc3b8952223c0776b68b5179a/images/airbnb_logo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aniso8601==8.0.0 2 | beautifulsoup4==4.9.1 3 | certifi==2020.4.5.2 4 | chardet==3.0.4 5 | click==7.1.2 6 | Flask==1.1.2 7 | Flask-RESTful==0.3.8 8 | idna==2.9 9 | itsdangerous==1.1.0 10 | Jinja2==2.11.2 11 | MarkupSafe==1.1.1 12 | pytz==2020.1 13 | requests==2.23.0 14 | selenium==3.141.0 15 | six==1.15.0 16 | soupsieve==2.0.1 17 | urllib3==1.25.9 18 | Werkzeug==1.0.1 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='airbnbapi', 5 | packages=['airbnbapi'], 6 | include_package_data=True, 7 | install_requires=[ 8 | 'flask', 9 | ], 10 | ) 11 | --------------------------------------------------------------------------------