├── .ipynb_checkpoints └── Zillow_Arlington_tx-checkpoint.ipynb ├── README.md ├── Zillow_Arlington_tx.ipynb └── chromedriver /.ipynb_checkpoints/Zillow_Arlington_tx-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Project Goal: To web scrape zillow's website and get the name, and details of the places being listed for rent in Arlington Tx and write it out to a CSV file using Pandas." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 81, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# Imported libraries needed \n", 17 | "from selenium import webdriver\n", 18 | "from selenium.webdriver.support.ui import WebDriverWait" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 86, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# Folder path for your chrome web browser driver\n", 28 | "PATH = ''\n", 29 | "\n", 30 | "# Passes your path to a chrome driver\n", 31 | "driver = webdriver.Chrome(PATH)\n", 32 | "\n", 33 | "# Zillows website link for rental properties being listed{string interpolation} for page #'s\n", 34 | "request_url = \"https://www.zillow.com/arlington-tx/rentals/{}_p\"\n", 35 | "\n", 36 | "name = []\n", 37 | "details = []\n", 38 | "address = []\n", 39 | "\n", 40 | "# Iterating through a range (in reference to the page #'s) and grabbing name,details,address.\n", 41 | "for page in range(1,6):\n", 42 | " \n", 43 | " # Formats the zillow's website link with the page num\n", 44 | " base_url = request_url.format(page)\n", 45 | "\n", 46 | " driver.get(base_url)\n", 47 | "\n", 48 | "\n", 49 | " wait = driver.implicitly_wait(60)\n", 50 | " \n", 51 | " # Grabs the all the search result element in a page \n", 52 | " main = driver.find_element_by_id(\"search-page-list-container\")\n", 53 | " \n", 54 | " # Grabs all the info about a listing\n", 55 | " card_info = main.find_elements_by_class_name('list-card-info')\n", 56 | " \n", 57 | " # Iterates through all every listing on the page & appends it all to a list.\n", 58 | " for listing in card_info:\n", 59 | " name.append(listing.find_element_by_class_name('list-card-footer').text)\n", 60 | " address.append(listing.find_element_by_class_name('list-card-addr').text)\n", 61 | " details.append(listing.find_element_by_class_name('list-card-heading').text)\n", 62 | " \n", 63 | " wait" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 87, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# Quits your session.\n", 73 | "driver.quit()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 88, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# Dictionary that stores the listing scraped\n", 83 | "zillow_data = {}\n", 84 | "\n", 85 | "zillow_data['Name'],zillow_data['Details'],zillow_data['Address'] = name,details,address" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "# Writing the listing info out to a CSV file." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 89, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "import pandas as pd" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 90, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# Using Pandas to import the data from the dictionary and transposes it.\n", 111 | "df = pd.DataFrame.from_dict(zillow_data, orient='index').T" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 91, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/html": [ 122 | "
\n", 123 | "\n", 136 | "\n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | "
NameDetailsAddress
0House for rent$1,895/mo\\n4 bds2 ba1,937 sqft8111 Mosstree Dr, Arlington, TX 76001
1Apartment for rent$1,425/mo\\n3 bds2 ba1,201 sqft2303 Kingsford Ct, Arlington, TX 76017
2Apartment for rent$1,525/mo\\n3 bds2 ba904 sqft2305 Kingsford Ct, Arlington, TX 76017
3Wimbledon Oaks Apartment Homes$1,035+ 1 bd$1,425+ 2 bdsWimbledon Oaks Apartment Homes | 1802 Wimbledo...
4House for rent$1,600/mo\\n3 bds2 ba1,536 sqft7210 Fossil Rim Trl, Arlington, TX 76002
............
195Woodwind$735+ 1 bd$875+ 2 bdsWoodwind | 1605 S Cooper St, Arlington, TX
196The Enclave at Arlington$800+ 1 bd$1,050+ 2 bds$1,368+ 3 bdsThe Enclave at Arlington | 1249 Enclave Cir, A...
197House for rent$1,800/mo\\n3 bds2 ba1,870 sqft2622 Meadowview Dr, Arlington, TX 76016
198House for rent$1,575/mo\\n3 bds2 ba1,338 sqft2507 Arapaho Dr, Arlington, TX 76018
199House for rent$2,750/mo\\n3 bds2 ba1,219 sqft1006 Andrews St, Arlington, TX 76011
\n", 214 | "

200 rows × 3 columns

\n", 215 | "
" 216 | ], 217 | "text/plain": [ 218 | " Name Details \\\n", 219 | "0 House for rent $1,895/mo\\n4 bds2 ba1,937 sqft \n", 220 | "1 Apartment for rent $1,425/mo\\n3 bds2 ba1,201 sqft \n", 221 | "2 Apartment for rent $1,525/mo\\n3 bds2 ba904 sqft \n", 222 | "3 Wimbledon Oaks Apartment Homes $1,035+ 1 bd$1,425+ 2 bds \n", 223 | "4 House for rent $1,600/mo\\n3 bds2 ba1,536 sqft \n", 224 | ".. ... ... \n", 225 | "195 Woodwind $735+ 1 bd$875+ 2 bds \n", 226 | "196 The Enclave at Arlington $800+ 1 bd$1,050+ 2 bds$1,368+ 3 bds \n", 227 | "197 House for rent $1,800/mo\\n3 bds2 ba1,870 sqft \n", 228 | "198 House for rent $1,575/mo\\n3 bds2 ba1,338 sqft \n", 229 | "199 House for rent $2,750/mo\\n3 bds2 ba1,219 sqft \n", 230 | "\n", 231 | " Address \n", 232 | "0 8111 Mosstree Dr, Arlington, TX 76001 \n", 233 | "1 2303 Kingsford Ct, Arlington, TX 76017 \n", 234 | "2 2305 Kingsford Ct, Arlington, TX 76017 \n", 235 | "3 Wimbledon Oaks Apartment Homes | 1802 Wimbledo... \n", 236 | "4 7210 Fossil Rim Trl, Arlington, TX 76002 \n", 237 | ".. ... \n", 238 | "195 Woodwind | 1605 S Cooper St, Arlington, TX \n", 239 | "196 The Enclave at Arlington | 1249 Enclave Cir, A... \n", 240 | "197 2622 Meadowview Dr, Arlington, TX 76016 \n", 241 | "198 2507 Arapaho Dr, Arlington, TX 76018 \n", 242 | "199 1006 Andrews St, Arlington, TX 76011 \n", 243 | "\n", 244 | "[200 rows x 3 columns]" 245 | ] 246 | }, 247 | "execution_count": 91, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# Visualization of CSV file.\n", 254 | "df" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | } 264 | ], 265 | "metadata": { 266 | "kernelspec": { 267 | "display_name": "Python 3", 268 | "language": "python", 269 | "name": "python3" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.7.6" 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 4 286 | } 287 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Zillow_Scraper 2 | I recently just completed a Python course on Udemy and I wanted to build my own project from stratch. I wanted the project to be meaningful and impactful to me, and since I am considering moving from my current apartment, I wanted to know what other apartments/homes are renting for in the city I live in(Arlington Tx). I decided to build a webscraper that grabs all the listing info for places being put up for rent on Zillows website and writes it all out to a CSV file using Pandas. 3 | 4 | ## Features 5 | The code is currently hard coded to Arlington Tx, but it can be easily formmated to the city and state you want to scrape. 6 | * Grabs all the listing info in specified city 7 | * Writes the Name,Details(price,bedroom,sqft), Address to a CSV file 8 | 9 | ## Install 10 | Use the following pip command: 11 | * pip install selenium 12 | * pip install pandas 13 | * chromedriver 14 | -------------------------------------------------------------------------------- /Zillow_Arlington_tx.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Project Goal: To web scrape zillow's website and get the name, and details of the places being listed for rent in Arlington Tx and write it out to a CSV file using Pandas." 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 81, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# Imported libraries needed \n", 17 | "from selenium import webdriver\n", 18 | "from selenium.webdriver.support.ui import WebDriverWait" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 86, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# Folder path for your chrome web browser driver\n", 28 | "PATH = ''\n", 29 | "\n", 30 | "# Passes your path to a chrome driver\n", 31 | "driver = webdriver.Chrome(PATH)\n", 32 | "\n", 33 | "# Zillows website link for rental properties being listed{string interpolation} for page #'s\n", 34 | "request_url = \"https://www.zillow.com/arlington-tx/rentals/{}_p\"\n", 35 | "\n", 36 | "name = []\n", 37 | "details = []\n", 38 | "address = []\n", 39 | "\n", 40 | "# Iterating through a range (in reference to the page #'s) and grabbing name,details,address.\n", 41 | "for page in range(1,6):\n", 42 | " \n", 43 | " # Formats the zillow's website link with the page num\n", 44 | " base_url = request_url.format(page)\n", 45 | "\n", 46 | " driver.get(base_url)\n", 47 | "\n", 48 | "\n", 49 | " wait = driver.implicitly_wait(60)\n", 50 | " \n", 51 | " # Grabs the all the search result element in a page \n", 52 | " main = driver.find_element_by_id(\"search-page-list-container\")\n", 53 | " \n", 54 | " # Grabs all the info about a listing\n", 55 | " card_info = main.find_elements_by_class_name('list-card-info')\n", 56 | " \n", 57 | " # Iterates through all every listing on the page & appends it all to a list.\n", 58 | " for listing in card_info:\n", 59 | " name.append(listing.find_element_by_class_name('list-card-footer').text)\n", 60 | " address.append(listing.find_element_by_class_name('list-card-addr').text)\n", 61 | " details.append(listing.find_element_by_class_name('list-card-heading').text)\n", 62 | " \n", 63 | " wait" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 87, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# Quits your session.\n", 73 | "driver.quit()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 88, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# Dictionary that stores the listing scraped\n", 83 | "zillow_data = {}\n", 84 | "\n", 85 | "zillow_data['Name'],zillow_data['Details'],zillow_data['Address'] = name,details,address" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "# Writing the listing info out to a CSV file." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 89, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "import pandas as pd" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 90, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# Using Pandas to import the data from the dictionary and transposes it.\n", 111 | "df = pd.DataFrame.from_dict(zillow_data, orient='index').T" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 91, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/html": [ 122 | "
\n", 123 | "\n", 136 | "\n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | "
NameDetailsAddress
0House for rent$1,895/mo\\n4 bds2 ba1,937 sqft8111 Mosstree Dr, Arlington, TX 76001
1Apartment for rent$1,425/mo\\n3 bds2 ba1,201 sqft2303 Kingsford Ct, Arlington, TX 76017
2Apartment for rent$1,525/mo\\n3 bds2 ba904 sqft2305 Kingsford Ct, Arlington, TX 76017
3Wimbledon Oaks Apartment Homes$1,035+ 1 bd$1,425+ 2 bdsWimbledon Oaks Apartment Homes | 1802 Wimbledo...
4House for rent$1,600/mo\\n3 bds2 ba1,536 sqft7210 Fossil Rim Trl, Arlington, TX 76002
............
195Woodwind$735+ 1 bd$875+ 2 bdsWoodwind | 1605 S Cooper St, Arlington, TX
196The Enclave at Arlington$800+ 1 bd$1,050+ 2 bds$1,368+ 3 bdsThe Enclave at Arlington | 1249 Enclave Cir, A...
197House for rent$1,800/mo\\n3 bds2 ba1,870 sqft2622 Meadowview Dr, Arlington, TX 76016
198House for rent$1,575/mo\\n3 bds2 ba1,338 sqft2507 Arapaho Dr, Arlington, TX 76018
199House for rent$2,750/mo\\n3 bds2 ba1,219 sqft1006 Andrews St, Arlington, TX 76011
\n", 214 | "

200 rows × 3 columns

\n", 215 | "
" 216 | ], 217 | "text/plain": [ 218 | " Name Details \\\n", 219 | "0 House for rent $1,895/mo\\n4 bds2 ba1,937 sqft \n", 220 | "1 Apartment for rent $1,425/mo\\n3 bds2 ba1,201 sqft \n", 221 | "2 Apartment for rent $1,525/mo\\n3 bds2 ba904 sqft \n", 222 | "3 Wimbledon Oaks Apartment Homes $1,035+ 1 bd$1,425+ 2 bds \n", 223 | "4 House for rent $1,600/mo\\n3 bds2 ba1,536 sqft \n", 224 | ".. ... ... \n", 225 | "195 Woodwind $735+ 1 bd$875+ 2 bds \n", 226 | "196 The Enclave at Arlington $800+ 1 bd$1,050+ 2 bds$1,368+ 3 bds \n", 227 | "197 House for rent $1,800/mo\\n3 bds2 ba1,870 sqft \n", 228 | "198 House for rent $1,575/mo\\n3 bds2 ba1,338 sqft \n", 229 | "199 House for rent $2,750/mo\\n3 bds2 ba1,219 sqft \n", 230 | "\n", 231 | " Address \n", 232 | "0 8111 Mosstree Dr, Arlington, TX 76001 \n", 233 | "1 2303 Kingsford Ct, Arlington, TX 76017 \n", 234 | "2 2305 Kingsford Ct, Arlington, TX 76017 \n", 235 | "3 Wimbledon Oaks Apartment Homes | 1802 Wimbledo... \n", 236 | "4 7210 Fossil Rim Trl, Arlington, TX 76002 \n", 237 | ".. ... \n", 238 | "195 Woodwind | 1605 S Cooper St, Arlington, TX \n", 239 | "196 The Enclave at Arlington | 1249 Enclave Cir, A... \n", 240 | "197 2622 Meadowview Dr, Arlington, TX 76016 \n", 241 | "198 2507 Arapaho Dr, Arlington, TX 76018 \n", 242 | "199 1006 Andrews St, Arlington, TX 76011 \n", 243 | "\n", 244 | "[200 rows x 3 columns]" 245 | ] 246 | }, 247 | "execution_count": 91, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# Visualization of CSV file.\n", 254 | "df" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [] 263 | } 264 | ], 265 | "metadata": { 266 | "kernelspec": { 267 | "display_name": "Python 3", 268 | "language": "python", 269 | "name": "python3" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.7.6" 282 | } 283 | }, 284 | "nbformat": 4, 285 | "nbformat_minor": 4 286 | } 287 | -------------------------------------------------------------------------------- /chromedriver: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faithfulalabi/Zillow_Scraper/9f7725ac4b709780f486539d4a41314b2f7ec1bb/chromedriver --------------------------------------------------------------------------------