├── src
    └── pyzill
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── parse.py
    │   ├── details.py
    │   └── search.py
├── pyproject.toml
├── LICENSE
├── test.py
├── .gitignore
└── README.md


/src/pyzill/__init__.py:
--------------------------------------------------------------------------------
1 | from pyzill.details import get_from_home_id, get_from_deparment_id, get_from_deparment_url, get_from_home_url
2 | from pyzill.search import for_sale,for_rent,sold
3 | from pyzill.utils import parse_proxy
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "pyzill"
 7 | version = '1.0.3'
 8 | description = 'Zillow scraper in Python'
 9 | authors = [
10 |   { name="John Balvin", email="johnchristian@hotmail.es" },
11 | ]
12 | readme= "README.md"
13 | license = { text = "MIT" }
14 | keywords=['zillow', 'scraper', 'crawler']
15 | dependencies=['bs4','requests','curl_cffi']
16 | 
17 | [project.urls]
18 | Homepage='https://github.com/johnbalvin/pyzill'


--------------------------------------------------------------------------------
/src/pyzill/utils.py:
--------------------------------------------------------------------------------
 1 | from re import compile
 2 | from typing import Tuple
 3 | from urllib.parse import quote
 4 | 
 5 | regex_space = compile(r"[\s ]+")
 6 | regx_price = compile(r"\d+")
 7 | 
 8 | 
 9 | def remove_space(value: str) -> str:
10 |     """remove unwanted spaces in given string
11 | 
12 |     Args:
13 |         value (str): input string with unwanted spaces
14 | 
15 |     Returns:
16 |         str: string with single spaces
17 |     """
18 |     return regex_space.sub(" ", value.strip())
19 | 
20 | 
21 | def get_nested_value(dic, key_path, default=None):
22 |     keys = key_path.split(".")
23 |     current = dic
24 |     for key in keys:
25 |         current = current.get(key, {})
26 |         if current == {} or current is None:
27 |             return default
28 |     return current
29 | 
30 | def parse_proxy(ip_or_domain: str,port: str, username: str, password: str) -> (str):
31 |     encoded_username = quote(username)
32 |     encoded_password = quote(password)
33 |     proxy_url = f"http://{encoded_username}:{encoded_password}@{ip_or_domain}:{port}"
34 |     return proxy_url
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 John
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pyzill
 3 | import json
 4 | proxy_url = pyzill.parse_proxy("premium.residential.proxyrack.net","9000","masamasa-country-US","G7NR8PY-6UUOGDK-B3KHXDU-JLMUNR7-IXHHRVL-0N0MR6S-AX0ESBN")
 5 | ne_lat = 38.602951833355434
 6 | ne_long = -87.22283859375
 7 | sw_lat = 23.42674607019482
 8 | sw_long = -112.93084640625
 9 | pagination = 1
10 | #pagination is for the list that you see at the right when searching
11 | #you don't need to iterate over all the pages because zillow sends the whole data on mapresults at once on the first page
12 | #however the maximum result zillow returns is 500, so if mapResults is 500
13 | #try playing with the zoom or moving the coordinates, pagination won't help because you will always get at maximum 500 results
14 | pagination = 1
15 | 
16 | results_rent = pyzill.for_rent(pagination, 
17 |               search_value="",is_entire_place=False,is_room=True,
18 |               min_beds=1,max_beds=None,
19 |               min_bathrooms=None,max_bathrooms=None,
20 |               min_price=10000,max_price=None,
21 |               ne_lat=ne_lat,ne_long=ne_long,sw_lat=sw_lat,sw_long=sw_long,
22 |               zoom_value=15,
23 |               proxy_url=proxy_url)
24 | jsondata_rent = json.dumps(results_rent)
25 | f = open("./jsondata_rent2.json", "w")
26 | f.write(jsondata_rent)
27 | f.close()


--------------------------------------------------------------------------------
/src/pyzill/parse.py:
--------------------------------------------------------------------------------
 1 | from html import unescape
 2 | from json import loads
 3 | from typing import Any
 4 | 
 5 | from bs4 import BeautifulSoup  # type: ignore
 6 | 
 7 | from pyzill.utils import remove_space,get_nested_value
 8 | 
 9 | 
10 | def parse_body_home(body: bytes) -> dict[str, Any]:
11 |     """parse HTML content to retrieve JSON data
12 | 
13 |     Args:
14 |         body (bytes): HTML content of web page
15 | 
16 |     Returns:
17 |         dict[str, Any]: parsed property information
18 |     """
19 |     componentProps = parse_body(body)
20 |     data_raw = get_nested_value(componentProps,"gdpClientCache")
21 |     property_json = loads(data_raw)
22 |     parsed_data={}
23 |     for data in property_json.values():
24 |         if "property" in str(data):
25 |             parsed_data = data.get("property")
26 |     return parsed_data
27 | 
28 | def parse_body_deparments(body: bytes) -> dict[str, Any]:
29 |     """parse HTML content to retrieve JSON data
30 | 
31 |     Args:
32 |         body (bytes): HTML content of web page
33 | 
34 |     Returns:
35 |         dict[str, Any]: parsed property information
36 |     """
37 |     componentProps = parse_body(body)
38 |     department_json = get_nested_value(componentProps,"initialReduxState.gdp")
39 |     return department_json
40 | 
41 | def parse_body(body: bytes) -> dict[str, Any]:
42 |     """parse HTML content to retrieve JSON data
43 | 
44 |     Args:
45 |         body (bytes): HTML content of web page
46 | 
47 |     Returns:
48 |         dict[str, Any]: parsed property information
49 |     """
50 |     soup = BeautifulSoup(body, "html.parser")
51 |     selection = soup.select_one("#__NEXT_DATA__")
52 |     if selection:
53 |         htmlData = selection.getText()
54 |         htmlData = remove_space(unescape(htmlData))
55 |         data = loads(htmlData)
56 |         return get_nested_value(data,"props.pageProps.componentProps")


--------------------------------------------------------------------------------
/src/pyzill/details.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from curl_cffi import requests
 3 | from pyzill.parse import parse_body_home,parse_body_deparments
 4 | 
 5 | headers = {
 6 |     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
 7 |     "Accept-Language": "en",
 8 |     "Cache-Control": "no-cache",
 9 |     "Pragma": "no-cache",
10 |     "Sec-Ch-Ua": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
11 |     "Sec-Ch-Ua-Mobile": "?0",
12 |     "Sec-Ch-Ua-Platform": '"Windows"',
13 |     "Sec-Fetch-Dest": "document",
14 |     "Sec-Fetch-Mode": "navigate",
15 |     "Sec-Fetch-Site": "none",
16 |     "Sec-Fetch-User": "?1",
17 |     "Upgrade-Insecure-Requests": "1",
18 |     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
19 | }
20 | 
21 | def get_from_home_id(
22 |     property_id: int, proxy_url: str | None = None
23 | ) -> dict[str, Any]:
24 |     """Scrape data for property based on property ID from zillow
25 | 
26 |     Args:
27 |         property_id (int): ID for any property from zillow
28 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
29 | 
30 |     Returns:
31 |         dict[str, Any]: parsed property information
32 |     """
33 |     home_url = f"https://www.zillow.com/homedetails/any-title/{property_id}_zpid/"
34 |     data = get_from_home_url(home_url, proxy_url)
35 |     return data
36 | 
37 | def get_from_deparment_id(
38 |     deparment_id: str, proxy_url: str | None = None
39 | ) -> dict[str, Any]:
40 |     """Scrape data for property based on deparment ID from zillow
41 | 
42 |     Args:
43 |         property_id (int): ID for any property from zillow
44 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
45 | 
46 |     Returns:
47 |         dict[str, Any]: parsed property information
48 |     """
49 |     
50 |     home_url = f"https://www.zillow.com/apartments/texas/the-lennox/{deparment_id}"
51 |     proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
52 |     response = requests.get(url=home_url, headers=headers, proxies=proxies, impersonate="chrome124")
53 |     data = parse_body_deparments(response.content)
54 |     return data
55 | 
56 | def get_from_deparment_url(
57 |     deparment_url: int, proxy_url: str | None = None
58 | ) -> dict[str, Any]:
59 |     """Scrape data for property based on deparment ID from zillow
60 | 
61 |     Args:
62 |         property_id (int): ID for any property from zillow
63 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
64 | 
65 |     Returns:
66 |         dict[str, Any]: parsed property information
67 |     """
68 |     proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
69 |     response = requests.get(url=deparment_url, headers=headers, proxies=proxies, impersonate="chrome124")
70 |     data = parse_body_deparments(response.content)
71 |     return data
72 | 
73 | def get_from_home_url(home_url: str, proxy_url: str | None = None) -> dict[str, Any]:
74 |     """Scrape given URL and parse home detail
75 | 
76 |     Args:
77 |         home_url (str): URL for the property
78 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
79 | 
80 |     Returns:
81 |         dict[str, Any]: parsed property information
82 |     """
83 |     proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
84 |     response = requests.get(url=home_url, headers=headers, proxies=proxies, impersonate="chrome124")
85 |     response.raise_for_status()
86 |     data = parse_body_home(response.content)
87 |     return data


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 
166 | ### Python Patch ###
167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168 | poetry.toml
169 | 
170 | # ruff
171 | .ruff_cache/
172 | 
173 | # LSP config files
174 | pyrightconfig.json
175 | 
176 | # End of https://www.toptal.com/developers/gitignore/api/python
177 | 
178 | *.DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Zillow scraper in Python
  2 | 
  3 | ## Overview
  4 | This project is an open-source tool developed in Python for extracting product information from Zillow. It's designed to be easy to use, making it an ideal solution for developers looking for Zillow product data.
  5 | 
  6 | ## Features
  7 | - Full search support
  8 | - Extracts detailed product information from Zillow
  9 | - Implemented in Python just because it's popular
 10 | - Easy to integrate with existing Python projects
 11 | 
 12 | ### Important
 13 | - Use rotating residential proxies, zillow will block if you make multiple requests with the same IP, 
 14 | 
 15 | ### Install
 16 | 
 17 | ```bash
 18 | $ pip install pyzill
 19 | ```
 20 | ## Examples
 21 | 
 22 | ```Python
 23 | import pyzill
 24 | import json
 25 | ne_lat = 38.602951833355434
 26 | ne_long = -87.22283859375
 27 | sw_lat = 23.42674607019482
 28 | sw_long = -112.93084640625
 29 | pagination = 1
 30 | #pagination is for the list that you see at the right when searching
 31 | #you don't need to iterate over all the pages because zillow sends the whole data on mapresults at once on the first page
 32 | #however the maximum result zillow returns is 500, so if mapResults is 500
 33 | #try playing with the zoom or moving the coordinates, pagination won't help because you will always get at maximum 500 results
 34 | pagination = 1
 35 | proxy_url = pyzill.parse_proxy("[proxy_ip or proxy_domain]","[proxy_port]","[proxy_username]","[proxy_password]")
 36 | results_sold = pyzill.sold(pagination, 
 37 |               search_value="miami",
 38 |               min_beds=1,max_beds=1,
 39 |               min_bathrooms=None,max_bathrooms=None,
 40 |               min_price=10000,max_price=None,
 41 |               ne_lat=ne_lat,ne_long=ne_long,sw_lat=sw_lat,sw_long=sw_long,
 42 |               zoom_value=5,
 43 |               proxy_url=proxy_url)
 44 |               
 45 | results_sale = pyzill.for_sale(pagination, 
 46 |               search_value="",
 47 |               min_beds=None,max_beds=None,
 48 |               min_bathrooms=3,max_bathrooms=None,
 49 |               min_price=None,max_price=None,
 50 |               ne_lat=ne_lat,ne_long=ne_long,sw_lat=sw_lat,sw_long=sw_long,
 51 |               zoom_value=10,
 52 |               proxy_url=proxy_url)
 53 | 
 54 | results_rent = pyzill.for_rent(pagination, 
 55 |               search_value="",is_entire_place=False,is_room=True,
 56 |               min_beds=1,max_beds=None,
 57 |               min_bathrooms=None,max_bathrooms=None,
 58 |               min_price=10000,max_price=None,
 59 |               ne_lat=ne_lat,ne_long=ne_long,sw_lat=sw_lat,sw_long=sw_long,
 60 |               zoom_value=15,
 61 |               proxy_url=proxy_url)
 62 | jsondata_sold = json.dumps(results_sold)
 63 | jsondata_sale = json.dumps(results_sale)
 64 | jsondata_rent = json.dumps(results_rent)
 65 | f = open("./jsondata_sold.json", "w")
 66 | f.write(jsondata_sold)
 67 | f.close()
 68 | f = open("./jsondata_sale.json", "w")
 69 | f.write(jsondata_sale)
 70 | f.close()
 71 | f = open("./jsondata_rent.json", "w")
 72 | f.write(jsondata_rent)
 73 | f.close()
 74 | ```
 75 | # For homes
 76 | 
 77 | ```Python
 78 | import pyzill
 79 | import json
 80 | property_url="https://www.zillow.com/homedetails/858-Shady-Grove-Ln-Harrah-OK-73045/339897685_zpid/"
 81 | proxy_url = pyzill.parse_proxy("[proxy_ip or proxy_domain]","[proxy_port]","[proxy_username]","[proxy_password]")
 82 | data = pyzill.get_from_home_url(property_url,proxy_url)
 83 | jsondata = json.dumps(data)
 84 | f = open("details.json", "w")
 85 | f.write(jsondata)
 86 | f.close()
 87 | ```
 88 | 
 89 | ```Python
 90 | import pyzill
 91 | import json
 92 | property_id=2056016566
 93 | proxy_url = pyzill.parse_proxy("[proxy_ip or proxy_domain]","[proxy_port]","[proxy_username]","[proxy_password]")
 94 | data = pyzill.get_from_home_id(property_id,proxy_url)
 95 | jsondata = json.dumps(data)
 96 | f = open("details.json", "w")
 97 | f.write(jsondata)
 98 | f.close()
 99 | ```
100 | 
101 | # For departments
102 | 
103 | ```Python
104 | import pyzill
105 | import json
106 | property_url="https://www.zillow.com/apartments/kissimmee-fl/the-nexus-at-overbrook/9DSWrh/"
107 | proxy_url = pyzill.parse_proxy("[proxy_ip or proxy_domain]","[proxy_port]","[proxy_username]","[proxy_password]")
108 | data = pyzill.get_from_deparment_url(property_url,proxy_url)
109 | jsondata = json.dumps(data)
110 | f = open("details.json", "w")
111 | f.write(jsondata)
112 | f.close()
113 | ```
114 | 
115 | ```Python
116 | import pyzill
117 | import json
118 | property_id="CgKZT4"
119 | proxy_url = pyzill.parse_proxy("[proxy_ip or proxy_domain]","[proxy_port]","[proxy_username]","[proxy_password]")
120 | data = pyzill.get_from_deparment_id(property_id,proxy_url)
121 | jsondata = json.dumps(data)
122 | f = open("details.json", "w")
123 | f.write(jsondata)
124 | f.close()
125 | ```


--------------------------------------------------------------------------------
/src/pyzill/search.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, List
  2 | from curl_cffi import requests
  3 | import json
  4 | 
  5 | 
  6 | def for_sale(
  7 |     pagination: int,
  8 |     search_value: str,
  9 |     min_beds: int,
 10 |     max_beds: int,
 11 |     min_bathrooms: int,
 12 |     max_bathrooms: int,
 13 |     min_price: int,
 14 |     max_price: int,
 15 |     ne_lat: float,
 16 |     ne_long: float,
 17 |     sw_lat: float,
 18 |     sw_long: float,
 19 |     zoom_value: int,
 20 |     proxy_url: str | None = None,
 21 | ) -> dict[str, Any]:
 22 |     """get results of the listing that are for sale, you will get a dictionary with the keywords
 23 |     mapResults and listResults, use mapResults which contains all the listings from all paginations
 24 |     listResults is more for the right side bar that you see when searching on zillow. 
 25 |     Be aware the the maximum size of mapResults is 500 so if you get results with size 500, so if you want 
 26 |     to get the whole result frm a particular area, you need to play with the zoom, or the coordinates.
 27 |     Even if you try to paginate over all results, it won't work even if you use mapResults or listResults
 28 |     I would recomend not use pagination because you have all results(with 500 maximum) on mapResults
 29 |     Args:
 30 |         pagination (int): number of page in pagination
 31 |         ne_lat (float): ne latitude value
 32 |         ne_long (float): ne longitude value
 33 |         sw_lat (float): sw latitude value
 34 |         sw_long (float): sw longitude value
 35 |         sw_long (float): sw longitude value
 36 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
 37 | 
 38 |     Returns:
 39 |         dict[str, Any]: listing of properties in JSON format
 40 |     """
 41 |     rent = {
 42 | 		"sortSelection":  {"value": "globalrelevanceex"},
 43 | 		"isAllHomes":  {"value": True},
 44 | 	}
 45 |     return search(pagination,search_value,min_beds,max_beds,min_bathrooms,max_bathrooms,min_price,max_price,ne_lat,ne_long,sw_lat,sw_long,zoom_value,rent,proxy_url)
 46 | 
 47 | def for_rent(
 48 |     pagination: int,
 49 |     search_value: str,
 50 |     is_entire_place: bool,
 51 |     is_room: bool,
 52 |     min_beds: int,
 53 |     max_beds: int,
 54 |     min_bathrooms: int,
 55 |     max_bathrooms: int,
 56 |     min_price: int,
 57 |     max_price: int,
 58 |     ne_lat: float,
 59 |     ne_long: float,
 60 |     sw_lat: float,
 61 |     sw_long: float,
 62 |     zoom_value: int,
 63 |     proxy_url: str | None = None,
 64 | ) -> dict[str, Any]:
 65 |     """get results of the listing that are for rent, you will get a dictionary with the keywords
 66 |     mapResults and listResults, use mapResults which contains all the listings from all paginations
 67 |     listResults is more for the right side bar that you see when searching on zillow. 
 68 |     Be aware the the maximum size of mapResults is 500 so if you get results with size 500, so if you want 
 69 |     to get the whole result frm a particular area, you need to play with the zoom, or the coordinates.
 70 |     Even if you try to paginate over all results, it won't work even if you use mapResults or listResults
 71 |     I would recomend not use pagination because you have all results(with 500 maximum) on mapResults
 72 |     Args:
 73 |         pagination (int): number of page in pagination
 74 |         ne_lat (float): ne latitude value
 75 |         ne_long (float): ne longitude value
 76 |         sw_lat (float): sw latitude value
 77 |         sw_long (float): sw longitude value
 78 |         sw_long (float): sw longitude value
 79 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
 80 | 
 81 |     Returns:
 82 |         dict[str, Any]: listing of properties in JSON format
 83 |     """
 84 |     rent = {
 85 | 		"sortSelection":  {"value": "priorityscore"},
 86 | 		"isNewConstruction":  {"value": False},
 87 | 		"isForSaleForeclosure":  {"value": False},
 88 | 		"isForSaleByOwner":  {"value": False},
 89 | 		"isForSaleByAgent":  {"value": False},
 90 | 		"isForRent":  {"value": True},
 91 | 		"isComingSoon":  {"value": False},
 92 | 		"isAuction":  {"value": False},
 93 | 		"isAllHomes":  {"value": True},
 94 | 	}
 95 |     if is_room:
 96 |         rent["isRoomForRent"] = {"value": True}
 97 |     if not is_entire_place:    
 98 |         rent["isEntirePlaceForRent"] = {"value": False}
 99 |     return search(pagination,search_value,min_beds,max_beds,min_bathrooms,max_bathrooms,min_price,max_price,ne_lat,ne_long,sw_lat,sw_long,zoom_value,rent,proxy_url)
100 | 
101 | def sold(
102 |     pagination: int,
103 |     search_value: str,
104 |     min_beds: int,
105 |     max_beds: int,
106 |     min_bathrooms: int,
107 |     max_bathrooms: int,
108 |     min_price: int,
109 |     max_price: int,
110 |     ne_lat: float,
111 |     ne_long: float,
112 |     sw_lat: float,
113 |     sw_long: float,
114 |     zoom_value: int,
115 |     proxy_url: str | None = None,
116 | ) -> dict[str, Any]:
117 |     """get results of the listing that were sold, you will get a dictionary with the keywords
118 |     mapResults and listResults, use mapResults which contains all the listings from all paginations
119 |     listResults is more for the right side bar that you see when searching on zillow. 
120 |     Be aware the the maximum size of mapResults is 500 so if you get results with size 500, so if you want 
121 |     to get the whole result frm a particular area, you need to play with the zoom, or the coordinates.
122 |     Even if you try to paginate over all results, it won't work even if you use mapResults or listResults
123 |     I would recomend not use pagination because you have all results(with 500 maximum) on mapResults
124 |     Args:
125 |         pagination (int): number of page in pagination
126 |         ne_lat (float): ne latitude value
127 |         ne_long (float): ne longitude value
128 |         sw_lat (float): sw latitude value
129 |         sw_long (float): sw longitude value
130 |         sw_long (float): sw longitude value
131 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
132 | 
133 |     Returns:
134 |         dict[str, Any]: listing of properties in JSON format
135 |     """
136 |     rent = {
137 | 		"sortSelection":  {"value": "globalrelevanceex"},
138 | 		"isNewConstruction":  {"value": False},
139 | 		"isForSaleForeclosure":  {"value": False},
140 | 		"isForSaleByOwner":  {"value": False},
141 | 		"isForSaleByAgent":  {"value": False},
142 | 		"isForRent":  {"value": False},
143 | 		"isComingSoon":  {"value": False},
144 | 		"isAuction":  {"value": False},
145 | 		"isAllHomes":  {"value": True},
146 | 		"isRecentlySold":  {"value": True},
147 | 	}
148 |     return search(pagination,search_value,min_beds,max_beds,min_bathrooms,max_bathrooms,min_price,max_price,ne_lat,ne_long,sw_lat,sw_long,zoom_value,rent,proxy_url)
149 |     
150 | def search(
151 |     pagination: int,
152 |     search_value: str,
153 |     min_beds: int,
154 |     max_beds: int,
155 |     min_bathrooms: int,
156 |     max_bathrooms: int,
157 |     min_price: int,
158 |     max_price: int,
159 |     ne_lat: float,
160 |     ne_long: float,
161 |     sw_lat: float,
162 |     sw_long: float,
163 |     zoom_value: int,
164 |     filter_state: dict[str, Any],
165 |     proxy_url: str | None = None,
166 | ) -> dict[str, Any]:
167 |     """get results of the listing of the given page number
168 | 
169 |     Args:
170 |         pagination (int): number of page in pagination
171 |         ne_lat (float): ne latitude value
172 |         ne_long (float): ne longitude value
173 |         sw_lat (float): sw latitude value
174 |         sw_long (float): sw longitude value
175 |         sw_long (float): sw longitude value
176 |         filter_state (dict[str, Any]): input data for making the search
177 |         proxy_url (str | None, optional): proxy URL for masking the request. Defaults to None.
178 | 
179 |     Returns:
180 |         dict[str, Any]: listing of properties in JSON format
181 |     """
182 |     headers = {
183 |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
184 |         "Accept-Language": "en",
185 |         "Content-Type": "application/json",
186 |         "Cache-Control": "no-cache",
187 |         "Pragma": "no-cache",
188 |         "origin": "https://www.zillow.com",
189 |         "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
190 |         "Sec-Ch-Ua-Mobile": "?0",
191 |         "Sec-Ch-Ua-Platform": '"Windows"',
192 |         "Sec-Fetch-Dest": "document",
193 |         "Sec-Fetch-Mode": "navigate",
194 |         "Sec-Fetch-Site": "none",
195 |         "Sec-Fetch-User": "?1",
196 |         "Upgrade-Insecure-Requests": "1",
197 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
198 |     }
199 |     inputData = {
200 |         "searchQueryState": {
201 |             "isMapVisible": True,
202 |             "isListVisible": True,
203 |             "mapBounds": {
204 |                 "north": ne_lat,
205 |                 "east": ne_long,
206 |                 "south": sw_lat,
207 |                 "west": sw_long,
208 |             },
209 |             "filterState": filter_state,
210 |             "mapZoom": zoom_value,
211 |             "pagination": {
212 |                 "currentPage": pagination,
213 |             },
214 |         },
215 |         "wants": {
216 |             "cat1": ["listResults", "mapResults"],
217 |             "cat2": ["total"],
218 |         },
219 |         "requestId": 10,
220 |         "isDebugRequest": False,
221 |     }
222 |     if search_value is not None:
223 |         inputData["searchQueryState"]["usersSearchTerm"]=search_value
224 | 
225 |     if min_beds is not None or  max_beds is not None:
226 |         beds = {}
227 |         if min_beds is not None:
228 |             beds["min"] = min_beds
229 |         if max_beds is not None:
230 |             beds["max"] = max_beds
231 |         inputData["searchQueryState"]["filterState"]["beds"] = beds
232 | 
233 |     if min_bathrooms is not None or  max_bathrooms is not None:
234 |         baths = {}
235 |         if min_bathrooms is not None:
236 |             baths["min"] = min_bathrooms
237 |         if max_bathrooms is not None:
238 |             baths["max"] = max_bathrooms
239 |         inputData["searchQueryState"]["filterState"]["baths"] = baths
240 | 
241 |     if min_price is not None or  max_price is not None:
242 |         price = {}
243 |         if min_price is not None:
244 |             price["min"] = min_price
245 |         if max_price is not None:
246 |             price["max"] = max_price
247 |         inputData["searchQueryState"]["filterState"]["price"] = price
248 | 
249 |     proxies = {"http": proxy_url, "https": proxy_url} if proxy_url else None
250 |     response = requests.put(
251 |         url="https://www.zillow.com/async-create-search-page-state",
252 |         json=inputData,
253 |         headers=headers,
254 |         proxies=proxies,  
255 |         impersonate="chrome124",
256 |     )
257 |     data = response.json()
258 |     return data.get("cat1", {}).get("searchResults", {})


--------------------------------------------------------------------------------