├── .ipynb_checkpoints
└── Zillow_Arlington_tx-checkpoint.ipynb
├── README.md
├── Zillow_Arlington_tx.ipynb
└── chromedriver
/.ipynb_checkpoints/Zillow_Arlington_tx-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Project Goal: To web scrape zillow's website and get the name, and details of the places being listed for rent in Arlington Tx and write it out to a CSV file using Pandas."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 81,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# Imported libraries needed \n",
17 | "from selenium import webdriver\n",
18 | "from selenium.webdriver.support.ui import WebDriverWait"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 86,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "# Folder path for your chrome web browser driver\n",
28 | "PATH = ''\n",
29 | "\n",
30 | "# Passes your path to a chrome driver\n",
31 | "driver = webdriver.Chrome(PATH)\n",
32 | "\n",
33 | "# Zillows website link for rental properties being listed{string interpolation} for page #'s\n",
34 | "request_url = \"https://www.zillow.com/arlington-tx/rentals/{}_p\"\n",
35 | "\n",
36 | "name = []\n",
37 | "details = []\n",
38 | "address = []\n",
39 | "\n",
40 | "# Iterating through a range (in reference to the page #'s) and grabbing name,details,address.\n",
41 | "for page in range(1,6):\n",
42 | " \n",
43 | " # Formats the zillow's website link with the page num\n",
44 | " base_url = request_url.format(page)\n",
45 | "\n",
46 | " driver.get(base_url)\n",
47 | "\n",
48 | "\n",
49 | " wait = driver.implicitly_wait(60)\n",
50 | " \n",
51 | " # Grabs the all the search result element in a page \n",
52 | " main = driver.find_element_by_id(\"search-page-list-container\")\n",
53 | " \n",
54 | " # Grabs all the info about a listing\n",
55 | " card_info = main.find_elements_by_class_name('list-card-info')\n",
56 | " \n",
57 | " # Iterates through all every listing on the page & appends it all to a list.\n",
58 | " for listing in card_info:\n",
59 | " name.append(listing.find_element_by_class_name('list-card-footer').text)\n",
60 | " address.append(listing.find_element_by_class_name('list-card-addr').text)\n",
61 | " details.append(listing.find_element_by_class_name('list-card-heading').text)\n",
62 | " \n",
63 | " wait"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 87,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "# Quits your session.\n",
73 | "driver.quit()"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 88,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "# Dictionary that stores the listing scraped\n",
83 | "zillow_data = {}\n",
84 | "\n",
85 | "zillow_data['Name'],zillow_data['Details'],zillow_data['Address'] = name,details,address"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "# Writing the listing info out to a CSV file."
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 89,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "import pandas as pd"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 90,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "# Using Pandas to import the data from the dictionary and transposes it.\n",
111 | "df = pd.DataFrame.from_dict(zillow_data, orient='index').T"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 91,
117 | "metadata": {},
118 | "outputs": [
119 | {
120 | "data": {
121 | "text/html": [
122 | "
\n",
123 | "\n",
136 | "
\n",
137 | " \n",
138 | " \n",
139 | " | \n",
140 | " Name | \n",
141 | " Details | \n",
142 | " Address | \n",
143 | "
\n",
144 | " \n",
145 | " \n",
146 | " \n",
147 | " 0 | \n",
148 | " House for rent | \n",
149 | " $1,895/mo\\n4 bds2 ba1,937 sqft | \n",
150 | " 8111 Mosstree Dr, Arlington, TX 76001 | \n",
151 | "
\n",
152 | " \n",
153 | " 1 | \n",
154 | " Apartment for rent | \n",
155 | " $1,425/mo\\n3 bds2 ba1,201 sqft | \n",
156 | " 2303 Kingsford Ct, Arlington, TX 76017 | \n",
157 | "
\n",
158 | " \n",
159 | " 2 | \n",
160 | " Apartment for rent | \n",
161 | " $1,525/mo\\n3 bds2 ba904 sqft | \n",
162 | " 2305 Kingsford Ct, Arlington, TX 76017 | \n",
163 | "
\n",
164 | " \n",
165 | " 3 | \n",
166 | " Wimbledon Oaks Apartment Homes | \n",
167 | " $1,035+ 1 bd$1,425+ 2 bds | \n",
168 | " Wimbledon Oaks Apartment Homes | 1802 Wimbledo... | \n",
169 | "
\n",
170 | " \n",
171 | " 4 | \n",
172 | " House for rent | \n",
173 | " $1,600/mo\\n3 bds2 ba1,536 sqft | \n",
174 | " 7210 Fossil Rim Trl, Arlington, TX 76002 | \n",
175 | "
\n",
176 | " \n",
177 | " ... | \n",
178 | " ... | \n",
179 | " ... | \n",
180 | " ... | \n",
181 | "
\n",
182 | " \n",
183 | " 195 | \n",
184 | " Woodwind | \n",
185 | " $735+ 1 bd$875+ 2 bds | \n",
186 | " Woodwind | 1605 S Cooper St, Arlington, TX | \n",
187 | "
\n",
188 | " \n",
189 | " 196 | \n",
190 | " The Enclave at Arlington | \n",
191 | " $800+ 1 bd$1,050+ 2 bds$1,368+ 3 bds | \n",
192 | " The Enclave at Arlington | 1249 Enclave Cir, A... | \n",
193 | "
\n",
194 | " \n",
195 | " 197 | \n",
196 | " House for rent | \n",
197 | " $1,800/mo\\n3 bds2 ba1,870 sqft | \n",
198 | " 2622 Meadowview Dr, Arlington, TX 76016 | \n",
199 | "
\n",
200 | " \n",
201 | " 198 | \n",
202 | " House for rent | \n",
203 | " $1,575/mo\\n3 bds2 ba1,338 sqft | \n",
204 | " 2507 Arapaho Dr, Arlington, TX 76018 | \n",
205 | "
\n",
206 | " \n",
207 | " 199 | \n",
208 | " House for rent | \n",
209 | " $2,750/mo\\n3 bds2 ba1,219 sqft | \n",
210 | " 1006 Andrews St, Arlington, TX 76011 | \n",
211 | "
\n",
212 | " \n",
213 | "
\n",
214 | "
200 rows × 3 columns
\n",
215 | "
"
216 | ],
217 | "text/plain": [
218 | " Name Details \\\n",
219 | "0 House for rent $1,895/mo\\n4 bds2 ba1,937 sqft \n",
220 | "1 Apartment for rent $1,425/mo\\n3 bds2 ba1,201 sqft \n",
221 | "2 Apartment for rent $1,525/mo\\n3 bds2 ba904 sqft \n",
222 | "3 Wimbledon Oaks Apartment Homes $1,035+ 1 bd$1,425+ 2 bds \n",
223 | "4 House for rent $1,600/mo\\n3 bds2 ba1,536 sqft \n",
224 | ".. ... ... \n",
225 | "195 Woodwind $735+ 1 bd$875+ 2 bds \n",
226 | "196 The Enclave at Arlington $800+ 1 bd$1,050+ 2 bds$1,368+ 3 bds \n",
227 | "197 House for rent $1,800/mo\\n3 bds2 ba1,870 sqft \n",
228 | "198 House for rent $1,575/mo\\n3 bds2 ba1,338 sqft \n",
229 | "199 House for rent $2,750/mo\\n3 bds2 ba1,219 sqft \n",
230 | "\n",
231 | " Address \n",
232 | "0 8111 Mosstree Dr, Arlington, TX 76001 \n",
233 | "1 2303 Kingsford Ct, Arlington, TX 76017 \n",
234 | "2 2305 Kingsford Ct, Arlington, TX 76017 \n",
235 | "3 Wimbledon Oaks Apartment Homes | 1802 Wimbledo... \n",
236 | "4 7210 Fossil Rim Trl, Arlington, TX 76002 \n",
237 | ".. ... \n",
238 | "195 Woodwind | 1605 S Cooper St, Arlington, TX \n",
239 | "196 The Enclave at Arlington | 1249 Enclave Cir, A... \n",
240 | "197 2622 Meadowview Dr, Arlington, TX 76016 \n",
241 | "198 2507 Arapaho Dr, Arlington, TX 76018 \n",
242 | "199 1006 Andrews St, Arlington, TX 76011 \n",
243 | "\n",
244 | "[200 rows x 3 columns]"
245 | ]
246 | },
247 | "execution_count": 91,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "# Visualization of CSV file.\n",
254 | "df"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {},
261 | "outputs": [],
262 | "source": []
263 | }
264 | ],
265 | "metadata": {
266 | "kernelspec": {
267 | "display_name": "Python 3",
268 | "language": "python",
269 | "name": "python3"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.7.6"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 4
286 | }
287 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Zillow_Scraper
2 | I recently just completed a Python course on Udemy and I wanted to build my own project from stratch. I wanted the project to be meaningful and impactful to me, and since I am considering moving from my current apartment, I wanted to know what other apartments/homes are renting for in the city I live in(Arlington Tx). I decided to build a webscraper that grabs all the listing info for places being put up for rent on Zillows website and writes it all out to a CSV file using Pandas.
3 |
4 | ## Features
5 | The code is currently hard coded to Arlington Tx, but it can be easily formmated to the city and state you want to scrape.
6 | * Grabs all the listing info in specified city
7 | * Writes the Name,Details(price,bedroom,sqft), Address to a CSV file
8 |
9 | ## Install
10 | Use the following pip command:
11 | * pip install selenium
12 | * pip install pandas
13 | * chromedriver
14 |
--------------------------------------------------------------------------------
/Zillow_Arlington_tx.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Project Goal: To web scrape zillow's website and get the name, and details of the places being listed for rent in Arlington Tx and write it out to a CSV file using Pandas."
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 81,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "# Imported libraries needed \n",
17 | "from selenium import webdriver\n",
18 | "from selenium.webdriver.support.ui import WebDriverWait"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 86,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "# Folder path for your chrome web browser driver\n",
28 | "PATH = ''\n",
29 | "\n",
30 | "# Passes your path to a chrome driver\n",
31 | "driver = webdriver.Chrome(PATH)\n",
32 | "\n",
33 | "# Zillows website link for rental properties being listed{string interpolation} for page #'s\n",
34 | "request_url = \"https://www.zillow.com/arlington-tx/rentals/{}_p\"\n",
35 | "\n",
36 | "name = []\n",
37 | "details = []\n",
38 | "address = []\n",
39 | "\n",
40 | "# Iterating through a range (in reference to the page #'s) and grabbing name,details,address.\n",
41 | "for page in range(1,6):\n",
42 | " \n",
43 | " # Formats the zillow's website link with the page num\n",
44 | " base_url = request_url.format(page)\n",
45 | "\n",
46 | " driver.get(base_url)\n",
47 | "\n",
48 | "\n",
49 | " wait = driver.implicitly_wait(60)\n",
50 | " \n",
51 | " # Grabs the all the search result element in a page \n",
52 | " main = driver.find_element_by_id(\"search-page-list-container\")\n",
53 | " \n",
54 | " # Grabs all the info about a listing\n",
55 | " card_info = main.find_elements_by_class_name('list-card-info')\n",
56 | " \n",
57 | " # Iterates through all every listing on the page & appends it all to a list.\n",
58 | " for listing in card_info:\n",
59 | " name.append(listing.find_element_by_class_name('list-card-footer').text)\n",
60 | " address.append(listing.find_element_by_class_name('list-card-addr').text)\n",
61 | " details.append(listing.find_element_by_class_name('list-card-heading').text)\n",
62 | " \n",
63 | " wait"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 87,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "# Quits your session.\n",
73 | "driver.quit()"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 88,
79 | "metadata": {},
80 | "outputs": [],
81 | "source": [
82 | "# Dictionary that stores the listing scraped\n",
83 | "zillow_data = {}\n",
84 | "\n",
85 | "zillow_data['Name'],zillow_data['Details'],zillow_data['Address'] = name,details,address"
86 | ]
87 | },
88 | {
89 | "cell_type": "markdown",
90 | "metadata": {},
91 | "source": [
92 | "# Writing the listing info out to a CSV file."
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 89,
98 | "metadata": {},
99 | "outputs": [],
100 | "source": [
101 | "import pandas as pd"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 90,
107 | "metadata": {},
108 | "outputs": [],
109 | "source": [
110 | "# Using Pandas to import the data from the dictionary and transposes it.\n",
111 | "df = pd.DataFrame.from_dict(zillow_data, orient='index').T"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 91,
117 | "metadata": {},
118 | "outputs": [
119 | {
120 | "data": {
121 | "text/html": [
122 | "\n",
123 | "\n",
136 | "
\n",
137 | " \n",
138 | " \n",
139 | " | \n",
140 | " Name | \n",
141 | " Details | \n",
142 | " Address | \n",
143 | "
\n",
144 | " \n",
145 | " \n",
146 | " \n",
147 | " 0 | \n",
148 | " House for rent | \n",
149 | " $1,895/mo\\n4 bds2 ba1,937 sqft | \n",
150 | " 8111 Mosstree Dr, Arlington, TX 76001 | \n",
151 | "
\n",
152 | " \n",
153 | " 1 | \n",
154 | " Apartment for rent | \n",
155 | " $1,425/mo\\n3 bds2 ba1,201 sqft | \n",
156 | " 2303 Kingsford Ct, Arlington, TX 76017 | \n",
157 | "
\n",
158 | " \n",
159 | " 2 | \n",
160 | " Apartment for rent | \n",
161 | " $1,525/mo\\n3 bds2 ba904 sqft | \n",
162 | " 2305 Kingsford Ct, Arlington, TX 76017 | \n",
163 | "
\n",
164 | " \n",
165 | " 3 | \n",
166 | " Wimbledon Oaks Apartment Homes | \n",
167 | " $1,035+ 1 bd$1,425+ 2 bds | \n",
168 | " Wimbledon Oaks Apartment Homes | 1802 Wimbledo... | \n",
169 | "
\n",
170 | " \n",
171 | " 4 | \n",
172 | " House for rent | \n",
173 | " $1,600/mo\\n3 bds2 ba1,536 sqft | \n",
174 | " 7210 Fossil Rim Trl, Arlington, TX 76002 | \n",
175 | "
\n",
176 | " \n",
177 | " ... | \n",
178 | " ... | \n",
179 | " ... | \n",
180 | " ... | \n",
181 | "
\n",
182 | " \n",
183 | " 195 | \n",
184 | " Woodwind | \n",
185 | " $735+ 1 bd$875+ 2 bds | \n",
186 | " Woodwind | 1605 S Cooper St, Arlington, TX | \n",
187 | "
\n",
188 | " \n",
189 | " 196 | \n",
190 | " The Enclave at Arlington | \n",
191 | " $800+ 1 bd$1,050+ 2 bds$1,368+ 3 bds | \n",
192 | " The Enclave at Arlington | 1249 Enclave Cir, A... | \n",
193 | "
\n",
194 | " \n",
195 | " 197 | \n",
196 | " House for rent | \n",
197 | " $1,800/mo\\n3 bds2 ba1,870 sqft | \n",
198 | " 2622 Meadowview Dr, Arlington, TX 76016 | \n",
199 | "
\n",
200 | " \n",
201 | " 198 | \n",
202 | " House for rent | \n",
203 | " $1,575/mo\\n3 bds2 ba1,338 sqft | \n",
204 | " 2507 Arapaho Dr, Arlington, TX 76018 | \n",
205 | "
\n",
206 | " \n",
207 | " 199 | \n",
208 | " House for rent | \n",
209 | " $2,750/mo\\n3 bds2 ba1,219 sqft | \n",
210 | " 1006 Andrews St, Arlington, TX 76011 | \n",
211 | "
\n",
212 | " \n",
213 | "
\n",
214 | "
200 rows × 3 columns
\n",
215 | "
"
216 | ],
217 | "text/plain": [
218 | " Name Details \\\n",
219 | "0 House for rent $1,895/mo\\n4 bds2 ba1,937 sqft \n",
220 | "1 Apartment for rent $1,425/mo\\n3 bds2 ba1,201 sqft \n",
221 | "2 Apartment for rent $1,525/mo\\n3 bds2 ba904 sqft \n",
222 | "3 Wimbledon Oaks Apartment Homes $1,035+ 1 bd$1,425+ 2 bds \n",
223 | "4 House for rent $1,600/mo\\n3 bds2 ba1,536 sqft \n",
224 | ".. ... ... \n",
225 | "195 Woodwind $735+ 1 bd$875+ 2 bds \n",
226 | "196 The Enclave at Arlington $800+ 1 bd$1,050+ 2 bds$1,368+ 3 bds \n",
227 | "197 House for rent $1,800/mo\\n3 bds2 ba1,870 sqft \n",
228 | "198 House for rent $1,575/mo\\n3 bds2 ba1,338 sqft \n",
229 | "199 House for rent $2,750/mo\\n3 bds2 ba1,219 sqft \n",
230 | "\n",
231 | " Address \n",
232 | "0 8111 Mosstree Dr, Arlington, TX 76001 \n",
233 | "1 2303 Kingsford Ct, Arlington, TX 76017 \n",
234 | "2 2305 Kingsford Ct, Arlington, TX 76017 \n",
235 | "3 Wimbledon Oaks Apartment Homes | 1802 Wimbledo... \n",
236 | "4 7210 Fossil Rim Trl, Arlington, TX 76002 \n",
237 | ".. ... \n",
238 | "195 Woodwind | 1605 S Cooper St, Arlington, TX \n",
239 | "196 The Enclave at Arlington | 1249 Enclave Cir, A... \n",
240 | "197 2622 Meadowview Dr, Arlington, TX 76016 \n",
241 | "198 2507 Arapaho Dr, Arlington, TX 76018 \n",
242 | "199 1006 Andrews St, Arlington, TX 76011 \n",
243 | "\n",
244 | "[200 rows x 3 columns]"
245 | ]
246 | },
247 | "execution_count": 91,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "# Visualization of CSV file.\n",
254 | "df"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {},
261 | "outputs": [],
262 | "source": []
263 | }
264 | ],
265 | "metadata": {
266 | "kernelspec": {
267 | "display_name": "Python 3",
268 | "language": "python",
269 | "name": "python3"
270 | },
271 | "language_info": {
272 | "codemirror_mode": {
273 | "name": "ipython",
274 | "version": 3
275 | },
276 | "file_extension": ".py",
277 | "mimetype": "text/x-python",
278 | "name": "python",
279 | "nbconvert_exporter": "python",
280 | "pygments_lexer": "ipython3",
281 | "version": "3.7.6"
282 | }
283 | },
284 | "nbformat": 4,
285 | "nbformat_minor": 4
286 | }
287 |
--------------------------------------------------------------------------------
/chromedriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/faithfulalabi/Zillow_Scraper/9f7725ac4b709780f486539d4a41314b2f7ec1bb/chromedriver
--------------------------------------------------------------------------------