├── src
    └── oxylabs
    │   ├── sources
    │       ├── __init__.py
    │       ├── bing
    │       │   ├── __init__.py
    │       │   └── bing.py
    │       ├── amazon
    │       │   └── __init__.py
    │       ├── google
    │       │   └── __init__.py
    │       ├── kroger
    │       │   ├── __init__.py
    │       │   └── kroger.py
    │       ├── wayfair
    │       │   ├── __init__.py
    │       │   └── wayfair.py
    │       ├── universal
    │       │   ├── __init__.py
    │       │   └── universal.py
    │       ├── google_shopping
    │       │   ├── __init__.py
    │       │   └── google_shopping.py
    │       ├── youtube_transcript
    │       │   ├── __init__.py
    │       │   └── youtube_transcript.py
    │       └── response.py
    │   ├── utils
    │       ├── types
    │       │   ├── __init__.py
    │       │   ├── render.py
    │       │   ├── locale.py
    │       │   ├── user_agent_type.py
    │       │   ├── fn_name.py
    │       │   ├── source.py
    │       │   └── domain.py
    │       ├── __init__.py
    │       ├── defaults.py
    │       └── utils.py
    │   ├── _version.py
    │   ├── proxy
    │       ├── __init__.py
    │       └── proxy.py
    │   ├── internal
    │       ├── __init__.py
    │       ├── client.py
    │       └── api.py
    │   └── __init__.py
├── pyproject.toml
├── scripts
    ├── publish.sh
    ├── fmt.sh
    └── tests.sh
├── .gitignore
├── requirements.txt
├── LICENSE
├── setup.py
├── CHANGELOG.md
├── tests
    ├── proxy
    │   └── test_proxy.py
    └── sources
    │   ├── wayfair
    │       └── test_wayfair.py
    │   └── bing
    │       └── test_bing.py
├── CONTRIBUTING.md
└── README.md


/src/oxylabs/sources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/oxylabs/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "2.0.0"
2 | 


--------------------------------------------------------------------------------
/src/oxylabs/proxy/__init__.py:
--------------------------------------------------------------------------------
1 | from .proxy import ProxyClient
2 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/bing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bing import Bing, BingAsync


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/render.py:
--------------------------------------------------------------------------------
1 | PNG = "png"
2 | HTML = "html"
3 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/amazon/__init__.py:
--------------------------------------------------------------------------------
1 | from .amazon import Amazon, AmazonAsync


--------------------------------------------------------------------------------
/src/oxylabs/sources/google/__init__.py:
--------------------------------------------------------------------------------
1 | from .google import Google, GoogleAsync


--------------------------------------------------------------------------------
/src/oxylabs/sources/kroger/__init__.py:
--------------------------------------------------------------------------------
1 | from .kroger import Kroger, KrogerAsync


--------------------------------------------------------------------------------
/src/oxylabs/sources/wayfair/__init__.py:
--------------------------------------------------------------------------------
1 | from .wayfair import Wayfair, WayfairAsync


--------------------------------------------------------------------------------
/src/oxylabs/internal/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import AsyncClient, RealtimeClient
2 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/universal/__init__.py:
--------------------------------------------------------------------------------
1 | from .universal import Universal, UniversalAsync


--------------------------------------------------------------------------------
/src/oxylabs/sources/google_shopping/__init__.py:
--------------------------------------------------------------------------------
1 | from .google_shopping import GoogleShopping, GoogleShoppingAsync


--------------------------------------------------------------------------------
/src/oxylabs/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .types import domain, fn_name, locale, render, source, user_agent_type
2 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/src/oxylabs/__init__.py:
--------------------------------------------------------------------------------
1 | from .internal import AsyncClient, RealtimeClient
2 | from .proxy.proxy import ProxyClient
3 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/youtube_transcript/__init__.py:
--------------------------------------------------------------------------------
1 | from .youtube_transcript import YoutubeTranscript, YoutubeTranscriptAsync


--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | rm -rf dist/ build/ oxylabs.egg-info/
4 | python setup.py sdist bdist_wheel
5 | twine upload dist/*
6 | 


--------------------------------------------------------------------------------
/scripts/fmt.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Run isort on the src directory
4 | isort src
5 | 
6 | # Run black on the src directory
7 | black --line-length 79 src
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | *.pyc
 3 | *.pyo
 4 | *.pyd
 5 | __pycache__/
 6 | *.egg-info/
 7 | dist/
 8 | build/
 9 | .venv/
10 | *.json
11 | *.html
12 | .env
13 | .idea
14 | virtual-env


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/locale.py:
--------------------------------------------------------------------------------
 1 | EN = "en"
 2 | RU = "ru"
 3 | BY = "by"
 4 | DE = "de"
 5 | FR = "fr"
 6 | ID = "id"
 7 | KK = "kk"
 8 | TT = "tt"
 9 | TR = "tr"
10 | UK = "uk"
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.9.4
 2 | aiosignal==1.3.1
 3 | async-timeout==4.0.3
 4 | asyncio==3.4.3; python_version < '3.7'
 5 | attrs==23.2.0
 6 | certifi==2024.7.4
 7 | charset-normalizer==3.3.2
 8 | frozenlist==1.4.1
 9 | idna==3.7
10 | multidict==6.0.5
11 | requests==2.32.2
12 | urllib3==2.2.2
13 | yarl==1.9.4
14 | 


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/user_agent_type.py:
--------------------------------------------------------------------------------
 1 | MOBILE = "mobile"
 2 | TABLET = "tablet"
 3 | DESKTOP = "desktop"
 4 | MOBILE_IOS = "mobile_ios"
 5 | TABLET_IOS = "tablet_ios"
 6 | DESKTOP_EDGE = "desktop_edge"
 7 | DESKTOP_OPERA = "desktop_opera"
 8 | DESKTOP_SAFARI = "desktop_safari"
 9 | MOBILE_ANDROID = "mobile_android"
10 | DESKTOP_CHROME = "desktop_chrome"
11 | TABLET_ANDROID = "tablet_android"
12 | DESKTOP_FIREFOX = "desktop_firefox"
13 | 


--------------------------------------------------------------------------------
/src/oxylabs/utils/defaults.py:
--------------------------------------------------------------------------------
 1 | SYNC_BASE_URL = "https://realtime.oxylabs.io/v1/queries"
 2 | ASYNC_BASE_URL = "https://data.oxylabs.io/v1/queries"
 3 | 
 4 | PROXY_BASE_URL = "realtime.oxylabs.io"
 5 | PROXY_PORT = 60000
 6 | NON_UNIVERSAL_DOMAINS = {"google", "bing", "amazon", "wayfair"}
 7 | 
 8 | 
 9 | DEFAULT_REQUEST_TIMEOUT = 165
10 | DEFAULT_POLL_INTERVAL = 5
11 | DEFAULT_REQUEST_TIMEOUT_ASYNC = 105
12 | DEFAULT_JOB_COMPLETION_TIMEOUT = 50
13 | 


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/fn_name.py:
--------------------------------------------------------------------------------
 1 | ELEMENT_TEXT = "element_text"
 2 | XPATH = "xpath"
 3 | XPATH_ONE = "xpath_one"
 4 | CSS = "css"
 5 | CSS_ONE = "css_one"
 6 | AMOUNT_FROM_STRING = "amount_from_string"
 7 | AMOUNT_RANGE_FROM_STRING = "amount_range_from_string"
 8 | JOIN = "join"
 9 | REGEX_FIND_ALL = "regex_find_all"
10 | REGEX_SEARCH = "regex_search"
11 | REGEX_SUBSTRING = "regex_substring"
12 | LENGTH = "length"
13 | SELECT_NTH = "select_nth"
14 | CONVERT_TO_FLOAT = "convert_to_float"
15 | CONVERT_TO_INT = "convert_to_int"
16 | CONVERT_TO_STR = "convert_to_str"
17 | AVERAGE = "average"
18 | MAX = "max"
19 | MIN = "min"
20 | PRODUCT = "product"
21 | 


--------------------------------------------------------------------------------
/scripts/tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Run source tests
 4 | python -m unittest tests.sources.bing.test_bing.TestBingSearchSync
 5 | python -m unittest tests.sources.bing.test_bing.TestBingSearchAsync
 6 | python -m unittest tests.sources.bing.test_bing.TestBingUrlSync
 7 | python -m unittest tests.sources.bing.test_bing.TestBingUrlAsync
 8 | 
 9 | python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairSearchSync
10 | python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairSearchAsync
11 | python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairUrlSync
12 | python -m unittest tests.sources.wayfair.test_wayfair.TestWayfairUrlAsync
13 | 
14 | # Run proxy tests
15 | python -m unittest tests.proxy.test_proxy.TestProxyGet
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024-now Oxylabs
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | 
 4 | def get_version(rel_path):
 5 |     with open(rel_path, "r") as file:
 6 |         for line in file.read().splitlines():
 7 |             if line.startswith("__version__"):
 8 |                 delim = '"' if '"' in line else "'"
 9 |                 return line.split(delim)[1]
10 |         else:
11 |             raise RuntimeError("Unable to find version string.")
12 | 
13 | 
14 | long_description = """
15 | The official Python library for the Oxylabs Scraper APIs.
16 | 
17 | Collect public data at scale with industry-leading web scraping solutions and the world's largest ethical proxy network.
18 | Documentation can be found at https://github.com/oxylabs/oxylabs-sdk-python.
19 | """
20 | 
21 | setup(
22 |     name="oxylabs",
23 |     version=get_version("src/oxylabs/_version.py"),
24 |     description="Official Python library for Oxylabs Scraper APIs",
25 |     long_description=long_description,
26 |     url="https://oxylabs.io/",
27 |     author="Oxylabs",
28 |     author_email="support@oxylabs.io",
29 |     license="MIT",
30 |     package_dir={"": "src"},
31 |     packages=find_packages(where="src"),
32 |     install_requires=["aiohttp", "requests"],
33 | )
34 | 


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/source.py:
--------------------------------------------------------------------------------
 1 | GOOGLE_URL = "google"
 2 | GOOGLE_ADS = "google_ads"
 3 | GOOGLE_SEARCH = "google_search"
 4 | GOOGLE_IMAGES = "google_images"
 5 | GOOGLE_SUGGESTIONS = "google_suggest"
 6 | GOOGLE_TRAVEL_HOTELS = "google_travel_hotels"
 7 | GOOGLE_TRENDS_EXPLORE = "google_trends_explore"
 8 | GOOGLE_MAPS = "google_maps"
 9 | GOOGLE_LENS = "google_lens"
10 | 
11 | BING_URL = "bing"
12 | BING_SEARCH = "bing_search"
13 | 
14 | YANDEX_URL = "yandex"
15 | YANDEX_SEARCH = "yandex_search"
16 | 
17 | BAIDU_URL = "baidu"
18 | BAIDU_SEARCH = "baidu_search"
19 | 
20 | GOOGLE_SHOPPING_URL = "google_shopping"
21 | GOOGLE_SHOPPING_SEARCH = "google_shopping_search"
22 | GOOGLE_SHOPPING_PRODUCT = "google_shopping_product"
23 | GOOGLE_SHOPPING_PRICING = "google_shopping_pricing"
24 | 
25 | WAYFAIR = "wayfair"
26 | WAYFAIR_SEARCH = "wayfair_search"
27 | 
28 | UNIVERSAL = "universal_ecommerce"
29 | 
30 | AMAZON_URL = "amazon"
31 | AMAZON_SEARCH = "amazon_search"
32 | AMAZON_PRODUCT = "amazon_product"
33 | AMAZON_PRICING = "amazon_pricing"
34 | AMAZON_REVIEWS = "amazon_reviews"
35 | AMAZON_QUESTIONS = "amazon_questions"
36 | AMAZON_BEST_SELLERS = "amazon_bestsellers"
37 | AMAZON_SELLERS = "amazon_sellers"
38 | 
39 | KROGER = "kroger"
40 | KROGER_PRODUCT = "kroger_product"
41 | KROGER_SEARCH = "kroger_search"
42 | 
43 | YOUTUBE_TRANSCRIPT = "youtube_transcript"
44 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 2.0.0
 4 | - SERP and Ecommerce source split has been removed. New common modules have been created: API, Client and Response.
 5 | - Support for new sources has been added: google_maps, google_lens, kroger, kroger_product, kroger_search, youtube_transcript.
 6 | - Added browser_instructions parameter for universal source.
 7 | - Docs update.
 8 | 
 9 | ## 1.0.7
10 | - Add an SDK version identifier to all requests.
11 | 
12 | ## 1.0.6
13 | 
14 | - Security updates in 3rd party libraries.
15 | - Docs update.
16 | 
17 | ## 1.0.5
18 | 
19 | - Cleaned up tests from obsolete credentials.
20 | 
21 | ## 1.0.4
22 | 
23 | - Security updates in 3rd party libraries.
24 | 
25 | ## 1.0.3
26 | 
27 | - Updated import paths to resolve module not found errors.
28 | - Scraper methods now accept direct parameters and include a response object for easier access to results and metadata.
29 | - Replaced print statements with the logging module for better error handling.
30 | - Introduced AsyncClient, RealtimeClient, and ProxyClient to support all sources, providing a more organized structure.
31 | 
32 | ## 1.0.2
33 | 
34 | - Fixed function and class naming.
35 | - Added request timeout in proxy.
36 | - Removed Yandex and Baidu sources from SERP.
37 | 
38 | ## 1.0.1
39 | 
40 | - Fixed issue with uploaded package.
41 | 
42 | ## 1.0.0
43 | 
44 | - Initial release of Oxylabs SDK.
45 | - Scraper APIs:
46 |   - SERP
47 |   - Ecommerce
48 | - Integration methods:
49 |   - Proxy
50 |   - Push-Pull
51 |   - Realtime
52 | 


--------------------------------------------------------------------------------
/src/oxylabs/internal/client.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from oxylabs.internal.api import APICredentials, RealtimeAPI, AsyncAPI
 3 | from oxylabs.sources.amazon import Amazon, AmazonAsync
 4 | from oxylabs.sources.bing import Bing, BingAsync
 5 | from oxylabs.sources.google import Google, GoogleAsync
 6 | from oxylabs.sources.google_shopping import GoogleShopping, GoogleShoppingAsync
 7 | from oxylabs.sources.kroger import Kroger, KrogerAsync
 8 | from oxylabs.sources.universal import Universal, UniversalAsync
 9 | from oxylabs.sources.wayfair import Wayfair, WayfairAsync
10 | from oxylabs.sources.youtube_transcript import YoutubeTranscript, YoutubeTranscriptAsync
11 | 
12 | # Configure logging
13 | logging.basicConfig(level=logging.INFO)
14 | logger = logging.getLogger(__name__)
15 | 
16 | class RealtimeClient:
17 |     def __init__(self, username: str, password: str, **kwargs) -> None:
18 |         """
19 |         Initializes an instance of the RealtimeClient class.
20 | 
21 |         Args:
22 |             username (str): The username for API authentication.
23 |             password (str): The password for API authentication.
24 |         """
25 |         api = RealtimeAPI(APICredentials(username, password), **kwargs)
26 |         self.amazon = Amazon(api)
27 |         self.bing = Bing(api)
28 |         self.google = Google(api)
29 |         self.google_shopping = GoogleShopping(api)
30 |         self.kroger = Kroger(api)
31 |         self.universal = Universal(api)
32 |         self.wayfair = Wayfair(api)
33 |         self.youtube_transcript = YoutubeTranscript(api)
34 | 
35 | class AsyncClient:
36 |     def __init__(self, username: str, password: str, **kwargs) -> None:
37 |         """
38 |         Initializes an instance of the AsyncClient class.
39 | 
40 |         Args:
41 |             username (str): The username for API authentication.
42 |             password (str): The password for API authentication.
43 |         """
44 |         api = AsyncAPI(APICredentials(username, password), **kwargs)
45 |         self.amazon = AmazonAsync(api)
46 |         self.bing = BingAsync(api)
47 |         self.google = GoogleAsync(api)
48 |         self.google_shopping = GoogleShoppingAsync(api)
49 |         self.kroger = KrogerAsync(api)
50 |         self.universal = UniversalAsync(api)
51 |         self.wayfair = WayfairAsync(api)
52 |         self.youtube_transcript = YoutubeTranscriptAsync(api)
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/proxy/test_proxy.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch, Mock
 3 | from oxylabs.proxy import ProxyClient
 4 | 
 5 | class TestProxyGet(unittest.TestCase):
 6 |     @patch('requests.Session')
 7 |     def test_proxy_get_with_timeout(self, MockSession):
 8 |         """
 9 |         Tests the Proxy.get method for correct timeout handling and header 
10 |         setup.
11 |         
12 |         This test uses a mocked requests.Session to simulate HTTP responses and 
13 |         validate
14 |         the interaction, ensuring the Proxy class constructs requests with the 
15 |         correct headers and timeout.
16 | 
17 |         Args:
18 |             MockSession (MagicMock): A mock of the requests.Session to verify 
19 |             request execution.
20 | 
21 |         Steps:
22 |         1. Set up a mock response to simulate an HTTP response.
23 |         2. Configure Proxy instance with headers.
24 |         3. Make a request using Proxy.get with a timeout and verify the method 
25 |            call and response.
26 | 
27 |         Assertions:
28 |         - Verify correct URL and timeout parameters are passed to the session's 
29 |           get method.
30 |         - Check the response text matches expected content.
31 |         """
32 |         
33 |         # Setup the mock response object with desired properties (like .text)
34 |         mock_response = Mock()
35 |         mock_response.status_code = 200
36 |         mock_response.text = "Mock response content"
37 | 
38 |         # Setup the mock session to return the mock response on .get
39 |         session_instance = MockSession.return_value
40 |         session_instance.get.return_value = mock_response
41 | 
42 |         # Initialize the Proxy with credentials
43 |         proxy = ProxyClient("CHANGEME", "CHANGEME")
44 | 
45 |         # Customize headers (optional)
46 |         proxy.add_user_agent_header("desktop_chrome")
47 |         proxy.add_geo_location_header("Germany")
48 |         proxy.add_render_header("html")
49 | 
50 |         # Make the request using the proxy to the test URL
51 |         result = proxy.get("https://www.example.com", request_timeout=10)
52 | 
53 |         # Assertions to ensure the request was made correctly
54 |         session_instance.get.assert_called_with(
55 |             "https://www.example.com", timeout=10
56 |         )
57 |         self.assertEqual(result.text, "Mock response content")
58 | 


--------------------------------------------------------------------------------
/src/oxylabs/utils/types/domain.py:
--------------------------------------------------------------------------------
  1 | RU = "ru"
  2 | UA = "ua"
  3 | TR = "tr"
  4 | CN = "cn"
  5 | COM_AI = "com.ai"
  6 | COM_PR = "com.pr"
  7 | SR = "sr"
  8 | ML = "ml"
  9 | COM_LB = "com.lb"
 10 | BF = "bf"
 11 | FM = "fm"
 12 | COM_MX = "com.mx"
 13 | BJ = "bj"
 14 | EE = "ee"
 15 | MV = "mv"
 16 | NE = "ne"
 17 | AT = "at"
 18 | GG = "gg"
 19 | AE = "ae"
 20 | CO_UZ = "co.uz"
 21 | AM = "am"
 22 | COM_SA = "com.sa"
 23 | TL = "tl"
 24 | COM_NA = "com.na"
 25 | COM_BH = "com.bh"
 26 | DK = "dk"
 27 | COM_SB = "com.sb"
 28 | RO = "ro"
 29 | BY = "by"
 30 | COM_CO = "com.co"
 31 | COM_GI = "com.gi"
 32 | CO_ID = "co.id"
 33 | MS = "ms"
 34 | COM_NG = "com.ng"
 35 | IS = "is"
 36 | COM_EG = "com.eg"
 37 | COM_ET = "com.et"
 38 | COM_AF = "com.af"
 39 | CH = "ch"
 40 | CO_AO = "co.ao"
 41 | CL = "cl"
 42 | CO_ZA = "co.za"
 43 | COM_NF = "com.nf"
 44 | DK_RO = "ro"
 45 | MD = "md"
 46 | ES = "es"
 47 | BJ_YO = "bj"
 48 | HU = "hu"
 49 | DJ = "dj"
 50 | COM_MT = "com.mt"
 51 | COM_EC = "com.ec"
 52 | CO_IN = "co.in"
 53 | LK = "lk"
 54 | CO_KE = "co.ke"
 55 | GY = "gy"
 56 | BE = "be"
 57 | VG = "vg"
 58 | CO_BW = "co.bw"
 59 | COM_VN = "com.vn"
 60 | CO_TZ = "co.tz"
 61 | NE_HA = "ne"
 62 | CO_ZW = "co.zw"
 63 | TO = "to"
 64 | KZ = "kz"
 65 | COM_UY = "com.uy"
 66 | IQ = "iq"
 67 | COM_TW = "com.tw"
 68 | RW = "rw"
 69 | AD = "ad"
 70 | COM_LY = "com.ly"
 71 | AL = "al"
 72 | CO_IL = "co.il"
 73 | KI = "ki"
 74 | COM = "com"
 75 | MU = "mu"
 76 | SC = "sc"
 77 | COM_HK = "com.hk"
 78 | COM_PA = "com.pa"
 79 | CA = "ca"
 80 | GE = "ge"
 81 | COM_GT = "com.gt"
 82 | LI = "li"
 83 | COM_KH = "com.kh"
 84 | CO_CR = "co.cr"
 85 | COM_BO = "com.bo"
 86 | CO_VE = "co.ve"
 87 | COM_NI = "com.ni"
 88 | TD = "td"
 89 | CF = "cf"
 90 | TK = "tk"
 91 | BI = "bi"
 92 | MG = "mg"
 93 | COM_BD = "com.bd"
 94 | COM_BZ = "com.bz"
 95 | GM = "gm"
 96 | LA = "la"
 97 | COM_KW = "com.kw"
 98 | CM = "cm"
 99 | HT = "ht"
100 | NO = "no"
101 | COM_FJ = "com.fj"
102 | TM = "tm"
103 | COM_SL = "com.sl"
104 | COM_MM = "com.mm"
105 | IM = "im"
106 | SI = "si"
107 | COM_QA = "com.qa"
108 | COM_PE = "com.pe"
109 | CD = "cd"
110 | TT = "tt"
111 | COM_TR = "com.tr"
112 | TG = "tg"
113 | CO_LS = "co.ls"
114 | GR = "gr"
115 | GL = "gl"
116 | MK = "mk"
117 | CO_ZM = "co.zm"
118 | COM_PH = "com.ph"
119 | IT = "it"
120 | CO_JP = "co.jp"
121 | WS = "ws"
122 | COM_AR = "com.ar"
123 | CO_MZ = "co.mz"
124 | AZ = "az"
125 | CO_CK = "co.ck"
126 | FI = "fi"
127 | COM_BN = "com.bn"
128 | PT = "pt"
129 | COM_TJ = "com.tj"
130 | COM_CY = "com.cy"
131 | CV = "cv"
132 | COM_MY = "com.my"
133 | IE = "ie"
134 | COM_SG = "com.sg"
135 | DE = "de"
136 | BA = "ba"
137 | LU = "lu"
138 | BG = "bg"
139 | CO_VI = "co.vi"
140 | COM_OM = "com.om"
141 | AS = "as"
142 | DZ = "dz"
143 | FR = "fr"
144 | LV = "lv"
145 | LT = "lt"
146 | PS = "ps"
147 | SE = "se"
148 | CG = "cg"
149 | NR = "nr"
150 | CO_UG = "co.ug"
151 | COM_VC = "com.vc"
152 | JO = "jo"
153 | CO_TH = "co.th"
154 | RS = "rs"
155 | BS = "bs"
156 | COM_PK = "com.pk"
157 | CO_UK = "co.uk"
158 | SO = "so"
159 | GA = "ga"
160 | COM_UA = "com.ua"
161 | HR = "hr"
162 | COM_CU = "com.cu"
163 | SK = "sk"
164 | COM_NP = "com.np"
165 | NU = "nu"
166 | MN = "mn"
167 | VU = "vu"
168 | NL = "nl"
169 | PT_ST = "st"
170 | COM_BR = "com.br"
171 | TH = "co.th"
172 | MW = "mw"
173 | COM_PG = "com.pg"
174 | PL = "pl"
175 | CO_NZ = "co.nz"
176 | KG = "kg"
177 | CI = "ci"
178 | SH = "sh"
179 | COM_DO = "com.do"
180 | SN = "sn"
181 | COM_JM = "com.jm"
182 | CO_MA = "co.ma"
183 | COM_TN = "com.tn"
184 | DM = "dm"
185 | COM_SV = "com.sv"
186 | COM_SG_2 = "com.sg"
187 | GP = "gp"
188 | ME = "me"
189 | COM_AG = "com.ag"
190 | CZ = "cz"
191 | COM_PY = "com.py"
192 | MR_IN = "co.in"
193 | COM_GH = "com.gh"
194 | ST_LS = "co.ls"
195 | BT = "bt"
196 | RU_KZ = "kz"
197 | IT_SM = "sm"
198 | JE = "je"
199 | TN = "tn"
200 | COM_AU = "com.au"
201 | ME_ME = "me"
202 | PN = "pn"
203 | HN = "hn"
204 | CO_KR = "co.kr"
205 | AR = "com.ar"
206 | BO = "com.bo"
207 | BZ = "com.bz"
208 | UY = "com.uy"
209 | COM_VE = "com.ve"
210 | ID_TL = "tl"
211 | 


--------------------------------------------------------------------------------
/tests/sources/wayfair/test_wayfair.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI, APICredentials
 3 | from oxylabs.sources.wayfair import Wayfair, WayfairAsync
 4 | from oxylabs.utils.types import user_agent_type
 5 | 
 6 | class TestWayfairSearchSync(unittest.TestCase):
 7 |     def test_wayfair_search_sync(self):
 8 |         """
 9 |         Tests synchronous search functionality for Wayfair to ensure
10 |         it returns expected results.
11 | 
12 |         This test mocks the get_response method to simulate the API responses
13 |         and checks that the method handles the search query
14 |         correctly and returns the correct mock response.
15 |         """
16 |         api = RealtimeAPI(APICredentials('user', 'pass'))
17 |         api.get_response = lambda payload, config: {"mocked_response": "search_results"}
18 |         wayfair = Wayfair(api)
19 |         query = "furniture"
20 |         opts = {"start_page": 1, "pages": 1, "limit": 24}
21 |         
22 |         result = wayfair.scrape_search(query, opts)
23 |         self.assertIn("mocked_response", result.raw)
24 |         self.assertEqual(result.raw["mocked_response"], "search_results")
25 | 
26 | class TestWayfairUrlSync(unittest.TestCase):
27 |     def test_wayfair_url_sync(self):
28 |         """
29 |         Tests the Wayfair URL scraping functionality in a
30 |         synchronous manner.
31 | 
32 |         This test mocks the get_response method to return controlled responses,
33 |         ensuring that the method correctly processes the URL and user agent 
34 |         type, returning the expected data.
35 |         """
36 |         api = RealtimeAPI(APICredentials('user', 'pass'))
37 |         api.get_response = lambda payload, config: {"mocked_response": "url_results"}
38 |         wayfair = Wayfair(api)
39 |         url = "https://www.wayfair.com/furniture/sb0/sofas-c413892.html"
40 |         opts = {"user_agent_type": user_agent_type.DESKTOP}
41 |         
42 |         result = wayfair.scrape_url(url, opts)
43 |         self.assertIn("mocked_response", result.raw)
44 |         self.assertEqual(result.raw["mocked_response"], "url_results")
45 | 
46 | class TestWayfairSearchAsync(unittest.IsolatedAsyncioTestCase):
47 |     async def test_wayfair_search_async(self):
48 |         """
49 |         Asynchronously tests Wayfair search to validate the async
50 |         API handling.
51 | 
52 |         Uses a mocked asynchronous response to verify that the search query 
53 |         processing is handled correctly and that the async functionality 
54 |         returns expected results.
55 |         """
56 |         api = AsyncAPI(APICredentials('user', 'pass'))
57 |         async def mock_get_resp(payload, config):
58 |             return {"mocked_response": "async_search_results"}
59 |         api.get_response = mock_get_resp
60 |         wayfair = WayfairAsync(api)
61 |         query = "furniture"
62 |         opts = {"start_page": 1, "pages": 1, "limit": 24}
63 |         
64 |         result = await wayfair.scrape_search(query, opts)
65 |         self.assertIn("mocked_response", result.raw)
66 |         self.assertEqual(result.raw["mocked_response"], "async_search_results")
67 | 
68 | class TestWayfairUrlAsync(unittest.IsolatedAsyncioTestCase):
69 |     async def test_wayfair_url_async(self):
70 |         """
71 |         Asynchronously tests Wayfair URL scraping functionality.
72 | 
73 |         This test mocks the get_response method to provide controlled async
74 |         responses, verifying that the URL and user agent options are processed 
75 |         correctly and yield expected outcomes.
76 |         """
77 |         api = AsyncAPI(APICredentials('user', 'pass'))
78 |         async def mock_get_resp(payload, config):
79 |             return {"mocked_response": "async_url_results"}
80 |         api.get_response = mock_get_resp
81 |         wayfair = WayfairAsync(api)
82 |         url = "https://www.wayfair.com/furniture/sb0/sofas-c413892.html"
83 |         opts = {"user_agent_type": user_agent_type.DESKTOP}
84 |         
85 |         result = await wayfair.scrape_url(url, opts)
86 |         self.assertIn("mocked_response", result.raw)
87 |         self.assertEqual(result.raw["mocked_response"], "async_url_results")
88 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/youtube_transcript/youtube_transcript.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI
  4 | from oxylabs.sources.response import Response
  5 | from oxylabs.utils.types import source
  6 | from oxylabs.utils.utils import prepare_config
  7 | 
  8 | 
  9 | class YoutubeTranscript:
 10 |     def __init__(self, api_instance:RealtimeAPI) -> None:
 11 |         """
 12 |         Initializes an instance of the YoutubeTranscript class.
 13 | 
 14 |         Args:
 15 |             api_instance: An instance of the RealtimeAPI class used for making requests.
 16 |         """
 17 |         self._api_instance = api_instance
 18 | 
 19 |     def scrape_transcript(
 20 |         self,
 21 |         query: str,
 22 |         context: Optional[list] = None,
 23 |         callback_url: Optional[str] = None,
 24 |         request_timeout: Optional[int] = 165,
 25 |         **kwargs
 26 |     ) -> Response:
 27 |         """
 28 |         Scrapes a YouTube video transcript for a given query.
 29 | 
 30 |         Args:
 31 |             query (str): A YouTube video ID
 32 |             context: Optional[list],
 33 |             callback_url (Optional[str]): URL to your callback endpoint.
 34 |             request_timeout (int | 165, optional): The interval in seconds for
 35 |                             the request to time out if no response is returned.
 36 |                             Defaults to 165.
 37 | 
 38 |         Returns:
 39 |             Response: The response from the server after the job is completed.
 40 |         """
 41 | 
 42 |         config = prepare_config(request_timeout=request_timeout)
 43 |         payload = {
 44 |             "source": source.YOUTUBE_TRANSCRIPT,
 45 |             "query": query,
 46 |             "context": context,
 47 |             "callback_url": callback_url,
 48 |             **kwargs,
 49 |         }
 50 |         api_response = self._api_instance.get_response(payload, config)
 51 |         return Response(api_response)
 52 | 
 53 | class YoutubeTranscriptAsync:
 54 |     def __init__(self, api_instance:AsyncAPI) -> None:
 55 |         """
 56 |         Initializes an instance of the YoutubeTranscriptAsync class.
 57 | 
 58 |         Args:
 59 |             api_instance: An instance of the AsyncAPI class used for making requests.
 60 |         """
 61 |         self._api_instance = api_instance
 62 | 
 63 |     async def scrape_transcript(
 64 |         self,
 65 |         query: str,
 66 |         context: Optional[list] = None,
 67 |         callback_url: Optional[str] = None,
 68 |         request_timeout: Optional[int] = 165,
 69 |         job_completion_timeout: Optional[int] = None,
 70 |         poll_interval: Optional[int] = None,
 71 |         **kwargs
 72 |     ) -> Response:
 73 |         """
 74 |         Asynchronously scrapes a YouTube video transcript for a given query.
 75 | 
 76 |         Args:
 77 |             query (str): A YouTube video ID
 78 |             context: Optional[list],
 79 |             callback_url (Optional[str]): URL to your callback endpoint.
 80 |             request_timeout (int | 165, optional): The interval in seconds for
 81 |                             the request to time out if no response is returned.
 82 |                             Defaults to 165.
 83 |             poll_interval (Optional[int]): The interval in seconds to poll
 84 |                             the server for a response.
 85 |             job_completion_timeout (Optional[int]): The interval in
 86 |                             seconds for the job to time out if no response is returned.
 87 | 
 88 |         Returns:
 89 |             Response: The response from the server after the job is completed.
 90 |         """
 91 | 
 92 |         config = prepare_config(
 93 |             request_timeout=request_timeout,
 94 |             poll_interval=poll_interval,
 95 |             job_completion_timeout=job_completion_timeout,
 96 |             async_integration=True,
 97 |         )
 98 |         payload = {
 99 |             "source": source.YOUTUBE_TRANSCRIPT,
100 |             "query": query,
101 |             "context": context,
102 |             "callback_url": callback_url,
103 |             **kwargs,
104 |         }
105 |         api_response = await self._api_instance.get_response(payload, config)
106 |         return Response(api_response)
107 | 


--------------------------------------------------------------------------------
/tests/sources/bing/test_bing.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from oxylabs.utils.types import user_agent_type
  3 | from oxylabs.internal import AsyncClient, RealtimeClient
  4 | 
  5 | class TestBingSearchSync(unittest.TestCase):
  6 |     """
  7 |     Test case for synchronous Bing search.
  8 | 
  9 |     This test case tests the functionality of the synchronous Bing search
 10 |     in the RealtimeClient class. It uses a mock response to simulate the
 11 |     behavior of the Bing search.
 12 |     """
 13 | 
 14 |     def test_bing_search_sync(self):
 15 |         """
 16 |         Test the synchronous Bing search.
 17 | 
 18 |         This test creates a RealtimeClient, finds an api instance that is used for requests and
 19 |         sets its get_response method to a lambda function that returns a mock response.
 20 |         It then calls the scrape_search method with a query and checks that the returned result
 21 |         contains the mock response.
 22 |         """
 23 |         client = RealtimeClient('user', 'pass')
 24 |         api = client.bing._api_instance
 25 |         api._get_http_response = lambda payload, method, config: {"mocked_response": "search_results"}
 26 |         query = "nike"
 27 |         
 28 |         result = client.bing.scrape_search(query, domain="com", limit=10)
 29 |         self.assertIn("mocked_response", result.raw)
 30 |         self.assertEqual(result.raw["mocked_response"], "search_results")
 31 | 
 32 | class TestBingUrlSync(unittest.TestCase):
 33 |     """
 34 |     Test case for synchronous Bing URL scraping.
 35 | 
 36 |     This test case tests the functionality of the synchronous Bing URL scraping
 37 |     in the RealtimeClient class. It uses a mock response to simulate the
 38 |     behavior of the Bing URL scraping.
 39 |     """
 40 | 
 41 |     def test_bing_url_sync(self):
 42 |         """
 43 |         Test the synchronous Bing URL scraping.
 44 | 
 45 |         This test creates a RealtimeClient, finds an api instance that is used for requests and
 46 |         sets its get_response method to a lambda function that returns a mock response.
 47 |         It then calls the scrape_url method with a URL and checks that the returned result
 48 |         contains the mock response.
 49 |         """
 50 |         client = RealtimeClient('user', 'pass')
 51 |         api = client.bing._api_instance
 52 |         api._get_http_response = lambda payload, method, config: {"mocked_response": "url_results"}
 53 |         url = "https://www.bing.com/search?q=nike"
 54 |         opts = {"user_agent_type": user_agent_type.DESKTOP}
 55 | 
 56 |         result = client.bing.scrape_url(url, opts)
 57 |         self.assertIn("mocked_response", result.raw)
 58 |         self.assertEqual(result.raw["mocked_response"], "url_results")
 59 | 
 60 | 
 61 | class TestBingSearchAsync(unittest.IsolatedAsyncioTestCase):
 62 |     """
 63 |     Test case for asynchronous Bing search.
 64 | 
 65 |     This test case tests the functionality of the asynchronous Bing search
 66 |     in the AsyncClient class. It uses a mock response to simulate the
 67 |     behavior of the Bing search.
 68 |     """
 69 | 
 70 |     async def test_bing_search_async(self):
 71 |         """
 72 |         Test the asynchronous Bing search.
 73 | 
 74 |         This test creates an AsyncClient, finds an api instance that is used for requests and
 75 |         sets its get_response method to a mock function that returns a mock response.
 76 |         It then calls the scrape_search method with a query and checks that the returned result
 77 |         contains the mock response.
 78 |         """
 79 |         client = AsyncClient('user', 'pass')
 80 |         api = client.bing._api_instance
 81 |         async def mock_get_resp(payload, config):
 82 |             return {"mocked_response": "async_search_results"}
 83 |         api.get_response = mock_get_resp
 84 |         query = "nike"
 85 |         opts = {"domain": "com", "limit": 10}
 86 |         
 87 |         result = await client.bing.scrape_search(query, opts)
 88 |         self.assertIn("mocked_response", result.raw)
 89 |         self.assertEqual(result.raw["mocked_response"], "async_search_results")
 90 | 
 91 | class TestBingUrlAsync(unittest.IsolatedAsyncioTestCase):
 92 |     """
 93 |     Test case for asynchronous Bing URL scraping.
 94 | 
 95 |     This test case tests the functionality of the asynchronous Bing URL scraping
 96 |     in the AsyncClient class. It uses a mock response to simulate the
 97 |     behavior of the Bing URL scraping.
 98 |     """
 99 | 
100 |     async def test_bing_url_async(self):
101 |         """
102 |         Test the asynchronous Bing URL scraping.
103 | 
104 |         This test creates an AsyncClient, finds an api instance that is used for requests and
105 |         sets its get_response method to a mock function that returns a mock response.
106 |         It then calls the scrape_url method with a URL and checks that the returned result
107 |         contains the mock response.
108 |         """
109 |         client = AsyncClient('user', 'pass')
110 |         api = client.bing._api_instance
111 |         async def mock_get_resp(payload, config):
112 |             return {"mocked_response": "async_url_results"}
113 |         api.get_response = mock_get_resp
114 |         url = "https://www.bing.com/search?q=nike"
115 |         opts = {"user_agent_type": user_agent_type.DESKTOP}
116 |         
117 |         result = await client.bing.scrape_url(url, opts)
118 |         self.assertIn("mocked_response", result.raw)
119 |         self.assertEqual(result.raw["mocked_response"], "async_url_results")
120 | 


--------------------------------------------------------------------------------
/src/oxylabs/proxy/proxy.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from platform import python_version, architecture
  3 | from typing import Optional
  4 | from urllib.parse import quote, urlparse
  5 | 
  6 | import requests
  7 | 
  8 | from oxylabs.utils.defaults import (
  9 |     NON_UNIVERSAL_DOMAINS,
 10 |     PROXY_BASE_URL,
 11 |     PROXY_PORT,
 12 | )
 13 | from oxylabs.utils.utils import prepare_config
 14 | from oxylabs._version import __version__
 15 | 
 16 | # Configure logging
 17 | logging.basicConfig(level=logging.INFO)
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class ProxyClient:
 22 |     def __init__(self, username: str, password: str) -> None:
 23 |         """
 24 |         Initializes a ProxyClient object with the provided username and password.
 25 | 
 26 |         Args:
 27 |             username (str): The username for the proxy authentication.
 28 |             password (str): The password for the proxy authentication.
 29 |         """
 30 |         self._username = quote(username)
 31 |         self._password = quote(password)
 32 |         self._proxy_url = self._build_proxy_url()
 33 |         self._session = requests.Session()
 34 |         self._session.proxies = {
 35 |             "http": self._proxy_url,
 36 |             "https": self._proxy_url,
 37 |         }
 38 |         self._session.verify = False
 39 |         self._url_to_scrape = None
 40 |         bits, _ = architecture()
 41 |         self._session.headers["x-oxylabs-sdk"] = f"oxylabs-sdk-python/{__version__} ({python_version()}; {bits})"
 42 | 
 43 | 
 44 |     def _build_proxy_url(self) -> str:
 45 |         """
 46 |         Build the proxy URL using configured constants.
 47 | 
 48 |         Returns:
 49 |             str: The constructed proxy URL.
 50 |         """
 51 |         return f"http://{self._username}:{self._password}@{PROXY_BASE_URL}:{PROXY_PORT}"
 52 | 
 53 |     def get(
 54 |         self, url: str, request_timeout: Optional[int] = None
 55 |     ) -> Optional[requests.Response]:
 56 |         """
 57 |         Sends a GET request to the specified URL using the session object.
 58 | 
 59 |         Args:
 60 |             url (str): The URL to send the GET request to.
 61 |             request_timeout (Optional[int]): The request timeout in seconds.
 62 |             Defaults to None (no timeout).
 63 | 
 64 |         Returns:
 65 |             Optional[requests.Response]: The response object returned by the
 66 |             GET request, or None if an error occurred.
 67 |         """
 68 |         try:
 69 |             config = prepare_config(request_timeout=request_timeout)
 70 |             self._url_to_scrape = url
 71 |             response = self._session.get(
 72 |                 url, timeout=config["request_timeout"]
 73 |             )
 74 |             response.raise_for_status()
 75 |             return response
 76 |         except requests.exceptions.Timeout:
 77 |             logger.error(
 78 |                 f"Timeout error. The request to {url} has timed out after {request_timeout} seconds."
 79 |             )
 80 |             return None
 81 |         except requests.exceptions.RequestException as e:
 82 |             logger.error(f"Request failed: {e}")
 83 |             return None
 84 | 
 85 |     def add_user_agent_header(self, user_agent_type: str) -> None:
 86 |         """
 87 |         Adds a user agent header to the session headers.
 88 |         There is no way to indicate a specific User-Agent, but you can let us
 89 |         know which user-agent type you would like us to use.
 90 | 
 91 |         Args:
 92 |             user_agent_type (str): The user agent to add. Must be one of the
 93 |             following:
 94 |             - "desktop"         - A User-Agent of a desktop browser.
 95 |             - "desktop_chrome"  - A User-Agent of one of the latest versions of
 96 |               a desktop Chrome browser.
 97 |             - "desktop_edge"    - A User-Agent of one of the latest versions of
 98 |               a desktop Edge browser.
 99 |             - "desktop_firefox" - A User-Agent of one of the latest versions of
100 |               a desktop Firefox browser.
101 |             - "desktop_opera"   - A User-Agent of one of the latest versions of
102 |               a desktop Opera browser.
103 |             - "desktop_safari"  - A User-Agent of one of the latest versions of
104 |               a desktop Safari browser.
105 |             - "mobile"          - A User-Agent of a mobile browser.
106 |             - "mobile_android"  - A User-Agent of one of the latest versions of
107 |               an Android mobile browser.
108 |             - "mobile_ios"      - A User-Agent of one of the latest versions of
109 |               an iOS mobile browser.
110 |             - "tablet"          - A User-Agent of a tablet browser.
111 |             - "tablet_android"  - A User-Agent of one of the latest versions of
112 |               an Android tablet browser.
113 |             - "tablet_ios"      - A User-Agent of one of the latest versions of
114 |               an iOS tablet browser.
115 | 
116 |         Returns:
117 |             None
118 |         """
119 |         self._session.headers["x-oxylabs-user-agent-type"] = user_agent_type
120 | 
121 |     def add_render_header(self, render: str) -> None:
122 |         """
123 |         Adds a render header to the session headers.
124 | 
125 |         Args:
126 |             render (str): The render type to add. Must be one of the following:
127 |             - "html" - The output will include an HTML result.
128 |             - "png"  - The output will include a PNG screenshot of the result.
129 | 
130 |         Returns:
131 |             None
132 |         """
133 |         self._session.headers["x-oxylabs-render"] = render
134 | 
135 |     def add_parse_header(
136 |         self, parse: bool = False, parsing_instructions: Optional[dict] = None
137 |     ) -> None:
138 |         """
139 |         Adds a parse header to the session headers.
140 | 
141 |         Args:
142 |             parse (bool, optional): Whether to enable parsing. Defaults to
143 |             False.
144 |             parsing_instructions (dict, optional): Instructions for parsing.
145 |             Defaults to None.
146 | 
147 |         Returns:
148 |             None
149 |         """
150 | 
151 |         if parse or parsing_instructions:
152 |             self._session.headers["x-oxylabs-parse"] = "1"
153 |             if self._is_universal_source():
154 |                 self._session.headers["x-oxylabs-parser-type"] = (
155 |                     "universal_ecommerce"
156 |                 )
157 |             else:
158 |                 self._session.headers.pop("x-oxylabs-parser-type", None)
159 |         else:
160 |             self._session.headers.pop("x-oxylabs-parse", None)
161 | 
162 |     def _is_universal_source(self) -> bool:
163 |         """
164 |         Checks if the URL to scrape belongs to a universal source.
165 | 
166 |         Returns:
167 |             bool: True if the URL belongs to a universal source, False
168 |             otherwise.
169 |         """
170 |         parsed_url = urlparse(self._url_to_scrape)
171 |         if any(
172 |             domain in parsed_url.netloc.decode()
173 |             for domain in NON_UNIVERSAL_DOMAINS
174 |         ):
175 |             return False
176 | 
177 |         return True
178 | 
179 |     def add_geo_location_header(self, geo_location: str) -> None:
180 |         """
181 |         Adds a geo location header to the session headers.
182 |         In some cases, you may need to indicate the geographical location that
183 |         the result should be adapted for.
184 | 
185 |         Args:
186 |             geo_location (str): The geo location to add. Accepted values depend
187 |             on the URL you would like us to scrape.
188 | 
189 |         Returns:
190 |             None
191 |         """
192 |         self._session.headers["x-oxylabs-geo-location"] = geo_location
193 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/universal/universal.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI
  4 | from oxylabs.sources.response import Response
  5 | from oxylabs.utils.types import source
  6 | from oxylabs.utils.utils import (
  7 |     check_parsing_instructions_validity,
  8 |     prepare_config,
  9 | )
 10 | 
 11 | 
 12 | class Universal:
 13 |     def __init__(self, api_instance:RealtimeAPI) -> None:
 14 |         """
 15 |         Initializes an instance of the Universal class.
 16 | 
 17 |         Args:
 18 |             api_instance: An instance of the RealtimeAPI class used for making requests.
 19 |         """
 20 |         self._api_instance = api_instance
 21 | 
 22 |     def scrape_url(
 23 |         self,
 24 |         url: str,
 25 |         user_agent_type: Optional[str] = None,
 26 |         geo_location: Optional[str] = None,
 27 |         locale: Optional[str] = None,
 28 |         render: Optional[str] = None,
 29 |         content_encoding: Optional[str] = None,
 30 |         context: Optional[list] = None,
 31 |         callback_url: Optional[str] = None,
 32 |         parse: Optional[bool] = None,
 33 |         parser_type: Optional[str] = None,
 34 |         parsing_instructions: Optional[dict] = None,
 35 |         browser_instructions: Optional[dict] = None,
 36 |         request_timeout: Optional[int] = 165,
 37 |         **kwargs
 38 |     ) -> Response:
 39 |         """
 40 |         Scrapes Universal search results for a given URL.
 41 | 
 42 |         Args:
 43 |             url (str): The URL to be scraped.
 44 |             user_agent_type (Optional[str]): Device type and browser.
 45 |             geo_location (Optional[str]): None,
 46 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
 47 |             render (Optional[str]): Enables JavaScript rendering.
 48 |             content_encoding:  Add this parameter if you are downloading images.
 49 |             context: Optional[list],
 50 |             callback_url (Optional[str]): URL to your callback endpoint.
 51 |             parse (Optional[bool]): true will return structured data.
 52 |             parser_type: Set the value to ecommerce_product to access our AI-powered Adaptive Parser.
 53 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
 54 |             browser_instructions (Optional[dict]): Browser instructions that are executed when rendering JavaScript.
 55 |             request_timeout (int | 165, optional): The interval in seconds for
 56 |                             the request to time out if no response is returned.
 57 |                             Defaults to 165.
 58 | 
 59 |         Returns:
 60 |             Response: The response from the server after the job is completed.
 61 |         """
 62 | 
 63 |         config = prepare_config(request_timeout=request_timeout)
 64 |         payload = {
 65 |             "source": source.UNIVERSAL,
 66 |             "url": url,
 67 |             "user_agent_type": user_agent_type,
 68 |             "geo_location": geo_location,
 69 |             "locale": locale,
 70 |             "render": render,
 71 |             "content_encoding": content_encoding,
 72 |             "context": context,
 73 |             "callback_url": callback_url,
 74 |             "parse": parse,
 75 |             "parser_type": parser_type,
 76 |             "parsing_instructions": parsing_instructions,
 77 |             "browser_instructions": browser_instructions,
 78 |             **kwargs,
 79 |         }
 80 |         check_parsing_instructions_validity(parsing_instructions)
 81 |         api_response = self._api_instance.get_response(payload, config)
 82 |         return Response(api_response)
 83 | 
 84 | class UniversalAsync:
 85 |     def __init__(self, api_instance:AsyncAPI) -> None:
 86 |         """
 87 |         Initializes an instance of the Universal class.
 88 | 
 89 |         Args:
 90 |             api_instance: An instance of the AsyncAPI class used for making requests.
 91 |         """
 92 |         self._api_instance = api_instance
 93 | 
 94 |     async def scrape_url(
 95 |         self,
 96 |         url: str,
 97 |         user_agent_type: Optional[str] = None,
 98 |         geo_location: Optional[str] = None,
 99 |         locale: Optional[str] = None,
100 |         render: Optional[str] = None,
101 |         content_encoding: Optional[str] = None,
102 |         context: Optional[list] = None,
103 |         callback_url: Optional[str] = None,
104 |         parse: Optional[bool] = None,
105 |         parser_type: Optional[str] = None,
106 |         parsing_instructions: Optional[dict] = None,
107 |         browser_instructions: Optional[dict] = None,
108 |         request_timeout: Optional[int] = 165,
109 |         job_completion_timeout: Optional[int] = None,
110 |         poll_interval: Optional[int] = None,
111 |         **kwargs
112 |     ) -> Response:
113 |         """
114 |         Asynchronously scrapes Universal search results for a given URL.
115 | 
116 |         Args:
117 |             url (str): The URL to be scraped.
118 |             user_agent_type (Optional[str]): Device type and browser.
119 |             geo_location (Optional[str]): None,
120 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
121 |             render (Optional[str]): Enables JavaScript rendering.
122 |             content_encoding:  Add this parameter if you are downloading images.
123 |             context: Optional[list],
124 |             callback_url (Optional[str]): URL to your callback endpoint.
125 |             parse (Optional[bool]): true will return structured data.
126 |             parser_type: Set the value to ecommerce_product to access our AI-powered Adaptive Parser.
127 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
128 |             browser_instructions (Optional[dict]): Browser instructions that are executed when rendering JavaScript.
129 |             request_timeout (int | 165, optional): The interval in seconds for
130 |                             the request to time out if no response is returned.
131 |                             Defaults to 165.
132 |             poll_interval (Optional[int]): The interval in seconds to poll
133 |                             the server for a response.
134 |             job_completion_timeout (Optional[int]): The interval in
135 |                             seconds for the job to time out if no response is returned.
136 | 
137 |         Returns:
138 |             Response: The response from the server after the job is completed.
139 |         """
140 | 
141 |         config = prepare_config(
142 |             request_timeout=request_timeout,
143 |             poll_interval=poll_interval,
144 |             job_completion_timeout=job_completion_timeout,
145 |             async_integration=True,
146 |         )
147 |         payload = {
148 |             "source": source.UNIVERSAL,
149 |             "url": url,
150 |             "user_agent_type": user_agent_type,
151 |             "geo_location": geo_location,
152 |             "locale": locale,
153 |             "render": render,
154 |             "content_encoding": content_encoding,
155 |             "context": context,
156 |             "callback_url": callback_url,
157 |             "parse": parse,
158 |             "parser_type": parser_type,
159 |             "parsing_instructions": parsing_instructions,
160 |             "browser_instructions": browser_instructions,
161 |             **kwargs,
162 |         }
163 |         check_parsing_instructions_validity(parsing_instructions)
164 |         api_response = await self._api_instance.get_response(payload, config)
165 |         return Response(api_response)
166 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Thanks for taking the time to contribute!
  4 | 
  5 | All types of contributions are encouraged and valued. Please try to read the
  6 | relevant sections in this document before making your contribution. It will
  7 | make it a lot easier for us maintainers and smooth out the experience for all
  8 | involved. The community looks forward to your contributions.
  9 | 
 10 | > And if you like the project, but just don't have time to contribute, that's
 11 | > fine. There are other easy ways to support the project and show your
 12 | > appreciation, which we would also be very happy about:
 13 | > - Star the project
 14 | > - Post about it on social media
 15 | > - Refer this project in your own project's README
 16 | > - Mention the project at local meetups and tell your friends/colleagues
 17 | 
 18 | ## I Have a Question
 19 | 
 20 | > Please ensure you've already read the available
 21 | > [Documentation](https://developers.oxylabs.io/), which may have
 22 | > answered your question.
 23 | 
 24 | Before you ask a question, it is best to search for existing
 25 | [Issues](https://github.com/oxylabs/oxylabs-sdk-python/issues) that might help
 26 | you. In case you have found a suitable issue and still need clarification, you
 27 | can write your question in this issue. It is also advisable to search the
 28 | internet for answers first.
 29 | 
 30 | If you then still feel the need to ask a question and need clarification, we
 31 | recommend the following:
 32 | 
 33 | - Open an [Issue](https://github.com/oxylabs/oxylabs-sdk-python/issues/new).
 34 | - Provide as much context as you can about what you're running into.
 35 | - Provide project and platform versions (nodejs, npm, etc), depending on what
 36 |   seems relevant.
 37 | 
 38 | We will then take care of the issue as soon as possible.
 39 | 
 40 | ## I Want To Contribute
 41 | 
 42 | > ### Legal Notice
 43 | >
 44 | > When contributing to this project, you must agree that you have authored 100%
 45 | > of the content, that you have the necessary rights to the content and that
 46 | > the content you contribute may be provided under the project licence.
 47 | 
 48 | ## Code Formatting
 49 | 
 50 | This project uses [Black](https://black.readthedocs.io/en/stable/) and
 51 | [isort](https://pycqa.github.io/isort/) for code formatting. Before you submit
 52 | your contribution, please make sure your code is formatted according to these
 53 | style guides.
 54 | 
 55 | First, you will need to install the required tools if you haven't already:
 56 | 
 57 | ```bash
 58 | pip install black isort
 59 | ```
 60 | 
 61 | You can use the provided `fmt.sh` script to automatically format your code.
 62 | This script runs `isort` and `black` on the `src` directory. Here's how you can
 63 | run it:
 64 | 
 65 | ```bash
 66 | scripts/fmt.sh
 67 | ```
 68 | 
 69 | ## Running Tests
 70 | 
 71 | To ensure the quality of the code, we encourage you to run tests after making
 72 | any changes and before submitting a contribution. We have a script that
 73 | facilitates running the unit tests for the project.
 74 | 
 75 | To run the tests, use the `tests.sh` script located in the `scripts` directory.
 76 | This will execute all the unit tests and report any failures.
 77 | 
 78 | Here's how you can run it:
 79 | 
 80 | ```bash
 81 | scripts/tests.sh
 82 | ```
 83 | 
 84 | ### Reporting Bugs
 85 | 
 86 | #### Before Submitting a Bug Report
 87 | 
 88 | A good bug report shouldn't leave others needing to chase you up for more
 89 | information. Therefore, we ask you to investigate carefully, collect
 90 | information and describe the issue in detail in your report. Please complete
 91 | the following steps in advance to help us fix any potential bug as fast as
 92 | possible.
 93 | 
 94 | - Make sure that you are using the latest version.
 95 | - Determine if your bug is really a bug and not an error on your side e.g.
 96 |   using incompatible environment components/versions (Make sure that you have
 97 |   read the
 98 |   [documentation](https://developers.oxylabs.io/). If you
 99 |   are looking for support, you might want to check [this
100 |   section](#i-have-a-question)).
101 | - To see if other users have experienced (and potentially already solved) the
102 |   same issue you are having, check if there is not already a bug report
103 |   existing for your bug or error in the [bug
104 |   tracker](https://github.com/oxylabs/oxylabs-sdk-python/issues?q=label%3Abug).
105 | - Also make sure to search the internet (including Stack Overflow) to see if
106 |   users outside of the GitHub community have discussed the issue.
107 | - Collect information about the bug:
108 | - Stack trace (Traceback)
109 | - OS, Platform and Version (Windows, Linux, macOS, x86, ARM)
110 | - Version of the interpreter, compiler, SDK, runtime environment, package
111 |   manager, depending on what seems relevant.
112 | - Possibly your input and the output
113 | - Can you reliably reproduce the issue? And can you also reproduce it with
114 |   older versions?
115 | 
116 | #### How Do I Submit a Good Bug Report?
117 | 
118 | > You must never report security related issues, vulnerabilities or bugs
119 | > including sensitive information to the issue tracker, or elsewhere in public.
120 | > Instead sensitive bugs must be sent by email to .
121 | 
122 | We use GitHub issues to track bugs and errors. If you run into an issue with the
123 | project:
124 | 
125 | - Open an [Issue](https://github.com/oxylabs/oxylabs-sdk-python/issues/new).
126 |   (Since we can't be sure at this point whether it is a bug or not, we ask you
127 |   not to talk about a bug yet and not to label the issue.)
128 | - Explain the behavior you would expect and the actual behavior.
129 | - Please provide as much context as possible and describe the *reproduction
130 |   steps* that someone else can follow to recreate the issue on their own. This
131 |   usually includes your code. For good bug reports you should isolate the
132 |   problem and create a reduced test case.
133 | - Provide the information you collected in the previous section.
134 | 
135 | Once it's filed:
136 | 
137 | - The project team will label the issue accordingly.
138 | - A team member will try to reproduce the issue with your provided steps. If
139 |   there are no reproduction steps or no obvious way to reproduce the issue, the
140 |   team will ask you for those steps and mark the issue as `needs-repro`. Bugs
141 |   with the `needs-repro` tag will not be addressed until they are reproduced.
142 | - If the team is able to reproduce the issue, it will be marked `needs-fix`, as
143 |   well as possibly other tags (such as `critical`), and the issue will be left
144 |   to be [implemented by someone](#your-first-code-contribution).
145 | 
146 | ### Suggesting Enhancements
147 | 
148 | This section guides you through submitting an enhancement suggestion,
149 | **including completely new features and minor improvements to existing
150 | functionality**. Following these guidelines will help maintainers and the
151 | community to understand your suggestion and find related suggestions.
152 | 
153 | #### Before Submitting an Enhancement
154 | 
155 | - Make sure that you are using the latest version.
156 | - Read the [documentation](https://developers.oxylabs.io/) carefully and
157 |   find out if the functionality is already covered, maybe by an individual
158 |   configuration.
159 | - Perform a [search](https://github.com/oxylabs/oxylabs-sdk-python/issues) to see
160 |   if the enhancement has already been suggested. If it has, add a comment to the
161 |   existing issue instead of opening a new one.
162 | - Find out whether your idea fits with the scope and aims of the project. It's
163 |   up to you to make a strong case to convince the project's developers of the
164 |   merits of this feature. Keep in mind that we want features that will be
165 |   useful to the majority of our users and not just a small subset. If you're
166 |   just targeting a minority of users, consider writing an add-on/plugin
167 |   library.
168 | 
169 | #### How Do I Submit a Good Enhancement Suggestion?
170 | 
171 | Enhancement suggestions are tracked as
172 | [GitHub issues](https://github.com/oxylabs/oxylabs-sdk-python/issues).
173 | 
174 | - Use a **clear and descriptive title** for the issue to identify the
175 |   suggestion.
176 | - Provide a **step-by-step description of the suggested enhancement** in as
177 |   many details as possible.
178 | - **Describe the current behavior** and **explain which behavior you expected
179 |   to see instead** and why. At this point you can also tell which alternatives
180 |   do not work for you.
181 | - You may want to **include screenshots or screen recordings** which help you
182 |   demonstrate the steps or point out the part which the suggestion is related
183 |   to.
184 | - **Explain why this enhancement would be useful** to most users. You may also
185 |   want to point out the other projects that solved it better and which could
186 |   serve as inspiration.
187 | 
188 | ## Security Issue Notifications
189 | 
190 | Please see Oxylabs' [Vulnerability Disclosure
191 | Policy](https://oxylabs.io/legal/vulnerability-disclosure-policy) for details.
192 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/wayfair/wayfair.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI
  4 | from oxylabs.sources.response import Response
  5 | from oxylabs.utils.types import source
  6 | from oxylabs.utils.utils import prepare_config
  7 | 
  8 | 
  9 | class Wayfair:
 10 |     def __init__(self, api_instance:RealtimeAPI) -> None:
 11 |         """
 12 |         Initializes an instance of the Wayfair class.
 13 | 
 14 |         Args:
 15 |             api_instance: An instance of the RealtimeAPI class used for making requests.
 16 |         """
 17 |         self._api_instance = api_instance
 18 | 
 19 |     def scrape_search(
 20 |         self,
 21 |         query: str,
 22 |         start_page: Optional[int] = None,
 23 |         pages: Optional[int] = None,
 24 |         limit: Optional[int] = None,
 25 |         user_agent_type: Optional[str] = None,
 26 |         callback_url: Optional[str] = None,
 27 |         request_timeout: Optional[int] = 165,
 28 |         **kwargs
 29 |     ) -> Response:
 30 |         """
 31 |         Scrapes Wayfair search results for a given query.
 32 | 
 33 |         Args:
 34 |             query (str): The search query.
 35 |             start_page (Optional[int]): The starting page number.
 36 |             pages (Optional[int]): The number of pages to scrape.
 37 |             limit (Optional[int]): Number of results to retrieve in each page.
 38 |             user_agent_type (Optional[str]): Device type and browser.
 39 |             callback_url (Optional[str]): URL to your callback endpoint.
 40 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
 41 |             request_timeout (int | 165, optional): The interval in seconds for
 42 |                             the request to time out if no response is returned.
 43 |                             Defaults to 165.
 44 | 
 45 |         Returns:
 46 |             Response: The response from the server after the job is completed.
 47 |         """
 48 | 
 49 |         config = prepare_config(request_timeout=request_timeout)
 50 |         payload = {
 51 |             "source": source.WAYFAIR_SEARCH,
 52 |             "query": query,
 53 |             "start_page": start_page,
 54 |             "pages": pages,
 55 |             "limit": limit,
 56 |             "user_agent_type": user_agent_type,
 57 |             "callback_url": callback_url,
 58 |             **kwargs,
 59 |         }
 60 |         api_response = self._api_instance.get_response(payload, config)
 61 |         return Response(api_response)
 62 | 
 63 |     def scrape_url(
 64 |         self,
 65 |         url: str,
 66 |         user_agent_type: Optional[str] = None,
 67 |         callback_url: Optional[str] = None,
 68 |         request_timeout: Optional[int] = 165,
 69 |         **kwargs
 70 |     ) -> Response:
 71 |         """
 72 |         Scrapes Wayfair search results for a given URL.
 73 | 
 74 |         Args:
 75 |             url (str): The URL to be scraped.
 76 |             user_agent_type (Optional[str]): Device type and browser.
 77 |             callback_url (Optional[str]): URL to your callback endpoint.
 78 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
 79 |             request_timeout (int | 165, optional): The interval in seconds for
 80 |                             the request to time out if no response is returned.
 81 |                             Defaults to 165.
 82 | 
 83 |         Returns:
 84 |             Response: The response from the server after the job is completed.
 85 |         """
 86 | 
 87 |         config = prepare_config(request_timeout=request_timeout)
 88 |         payload = {
 89 |             "source": source.WAYFAIR,
 90 |             "url": url,
 91 |             "user_agent_type": user_agent_type,
 92 |             "callback_url": callback_url,
 93 |             **kwargs,
 94 |         }
 95 |         api_response = self._api_instance.get_response(payload, config)
 96 |         return Response(api_response)
 97 | 
 98 | 
 99 | class WayfairAsync:
100 |     def __init__(self, api_instance:AsyncAPI) -> None:
101 |         """
102 |         Initializes an instance of the Wayfair class.
103 | 
104 |         Args:
105 |             api_instance: An instance of the AsyncAPI class used for making requests.
106 |         """
107 |         self._api_instance = api_instance
108 | 
109 |     async def scrape_search(
110 |         self,
111 |         query: str,
112 |         start_page: Optional[int] = None,
113 |         pages: Optional[int] = None,
114 |         limit: Optional[int] = None,
115 |         user_agent_type: Optional[str] = None,
116 |         callback_url: Optional[str] = None,
117 |         request_timeout: Optional[int] = 165,
118 |         job_completion_timeout: Optional[int] = None,
119 |         poll_interval: Optional[int] = None,
120 |         **kwargs
121 |     ) -> Response:
122 |         """
123 |         Asynchronously scrapes Wayfair search results for a given query.
124 | 
125 |         Args:
126 |             query (str): The search query.
127 |             start_page (Optional[int]): The starting page number.
128 |             pages (Optional[int]): The number of pages to scrape.
129 |             limit (Optional[int]): Number of results to retrieve in each page
130 |             user_agent_type (Optional[str]): Device type and browser.
131 |             callback_url (Optional[str]): URL to your callback endpoint.
132 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
133 |             request_timeout (int | 165, optional): The interval in seconds for
134 |                             the request to time out if no response is returned.
135 |                             Defaults to 165.
136 |             poll_interval (Optional[int]): The interval in seconds to poll
137 |                             the server for a response.
138 |             job_completion_timeout (Optional[int]): The interval in
139 |                             seconds for the job to time out if no response is returned.
140 | 
141 |         Returns:
142 |             Response: The response from the server after the job is completed.
143 |         """
144 | 
145 |         config = prepare_config(
146 |             request_timeout=request_timeout,
147 |             poll_interval=poll_interval,
148 |             job_completion_timeout=job_completion_timeout,
149 |             async_integration=True,
150 |         )
151 |         payload = {
152 |             "source": source.WAYFAIR_SEARCH,
153 |             "query": query,
154 |             "start_page": start_page,
155 |             "pages": pages,
156 |             "limit": limit,
157 |             "user_agent_type": user_agent_type,
158 |             "callback_url": callback_url,
159 |             **kwargs,
160 |         }
161 |         api_response = await self._api_instance.get_response(payload, config)
162 |         return Response(api_response)
163 | 
164 |     async def scrape_url(
165 |         self,
166 |         url: str,
167 |         user_agent_type: Optional[str] = None,
168 |         callback_url: Optional[str] = None,
169 |         request_timeout: Optional[int] = 165,
170 |         job_completion_timeout: Optional[int] = None,
171 |         poll_interval: Optional[int] = None,
172 |         **kwargs
173 |     ) -> Response:
174 |         """
175 |         Asynchronously scrapes Wayfair search results for a given URL.
176 | 
177 |         Args:
178 |             url (str): The URL to be scraped.
179 |             user_agent_type (Optional[str]): Device type and browser.
180 |             callback_url (Optional[str]): URL to your callback endpoint.
181 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
182 |             request_timeout (int | 165, optional): The interval in seconds for
183 |                             the request to time out if no response is returned.
184 |                             Defaults to 165.
185 |             poll_interval (Optional[int]): The interval in seconds to poll
186 |                             the server for a response.
187 |             job_completion_timeout (Optional[int]): The interval in
188 |                             seconds for the job to time out if no response is returned.
189 | 
190 |         Returns:
191 |             Response: The response from the server after the job is completed.
192 |         """
193 | 
194 |         config = prepare_config(
195 |             request_timeout=request_timeout,
196 |             poll_interval=poll_interval,
197 |             job_completion_timeout=job_completion_timeout,
198 |             async_integration=True,
199 |         )
200 |         payload = {
201 |             "source": source.WAYFAIR,
202 |             "url": url,
203 |             "user_agent_type": user_agent_type,
204 |             "callback_url": callback_url,
205 |             **kwargs,
206 |         }
207 |         api_response = await self._api_instance.get_response(payload, config)
208 |         return Response(api_response)
209 | 


--------------------------------------------------------------------------------
/src/oxylabs/internal/api.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import logging
  3 | import requests
  4 | import aiohttp
  5 | import asyncio
  6 | from platform import python_version, architecture
  7 | from oxylabs._version import __version__
  8 | from oxylabs.utils.defaults import ASYNC_BASE_URL, SYNC_BASE_URL
  9 | from oxylabs.utils.utils import ensure_session, close_session
 10 | 
 11 | # Configure logging
 12 | logging.basicConfig(level=logging.INFO)
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | class APICredentials:
 16 |     def __init__(self, username: str, password: str) -> None:
 17 |         """
 18 |         Initializes an instance of the ApiCredentials class.
 19 | 
 20 |         Args:
 21 |             username (str): The username for API authentication.
 22 |             password (str): The password for API authentication.
 23 |         """
 24 |         credentials = f"{username}:{password}"
 25 |         self.encoded_credentials = base64.b64encode(credentials.encode()).decode()
 26 | 
 27 | class BaseAPI:
 28 |     def __init__(self, base_url: str, api_credentials: APICredentials, **kwargs) -> None:
 29 |         """
 30 |         Initializes an instance of the BaseAPI class.
 31 | 
 32 |         Args:
 33 |             base_url (str): The URL of the API.
 34 |             api_credentials (APICredentials): An instance of APICredentials used for authentication.
 35 |         """
 36 |         self._base_url = base_url
 37 |         bits, _ = architecture()
 38 |         sdk_type = kwargs.get("sdk_type", f"oxylabs-sdk-python/{__version__} ({python_version()}; {bits})")
 39 |         self._headers = {
 40 |             "Content-Type": "application/json",
 41 |             "Authorization": f"Basic {api_credentials.encoded_credentials}",
 42 |             "x-oxylabs-sdk": sdk_type,
 43 |         }
 44 | 
 45 | class RealtimeAPI(BaseAPI):
 46 |     def __init__(self, api_credentials: APICredentials, **kwargs) -> None:
 47 |         """
 48 |         Initializes an instance of the RealtimeAPI class.
 49 | 
 50 |         Args:
 51 |             api_credentials (APICredentials): An instance of APICredentials used for authentication.
 52 |         """
 53 |         super().__init__(SYNC_BASE_URL, api_credentials, **kwargs)
 54 | 
 55 |     def get_response(self, payload:dict, config:dict) -> dict:
 56 |         """
 57 |         Sends the payload synchronously and fetches the response.
 58 | 
 59 |         Args:
 60 |             payload (dict): The payload for the request.
 61 |             config (dict): The configuration for the request.
 62 | 
 63 |         Returns:
 64 |             dict: The response from the server after the job is completed.
 65 |         """
 66 |         # Remove empty or null values from the payload
 67 |         payload = {k: v for k, v in payload.items() if v is not None}
 68 | 
 69 |         return self._get_http_response(payload, "POST", config)
 70 | 
 71 |     def _get_http_response(self, payload: dict, method: str, config: dict) -> dict | None:
 72 |         """
 73 |         Sends an HTTP request to the specified URL with the given payload
 74 |         and method.
 75 | 
 76 |         Args:
 77 |             payload (dict): The payload to be sent with the request.
 78 |             method (str): The HTTP method to be used for the request
 79 |             (e.g., "POST", "GET").
 80 |             config (dict): Additional configuration options for the
 81 |             request.
 82 | 
 83 |         Returns:
 84 |             dict: The JSON response from the server, if the request is
 85 |             successful.
 86 |                   None, if an error occurs during the request.
 87 | 
 88 |         Raises:
 89 |             requests.exceptions.Timeout: If the request times out.
 90 |             requests.exceptions.HTTPError: If an HTTP error occurs.
 91 |             requests.exceptions.RequestException: If a general request
 92 |             error occurs.
 93 |         """
 94 |         try:
 95 |             if method == "POST":
 96 |                 response = requests.post(
 97 |                     self._base_url,
 98 |                     headers=self._headers,
 99 |                     json=payload,
100 |                     timeout=config["request_timeout"],
101 |                 )
102 |             else:
103 |                 logger.error(f"Unsupported method: {method}")
104 |                 return None
105 | 
106 |             response.raise_for_status()
107 | 
108 |             if response.status_code == 200:
109 |                 return response.json()
110 |             else:
111 |                 logger.error(f"Error occurred: {response.status_code}")
112 |                 return None
113 | 
114 |         except requests.exceptions.Timeout:
115 |             logger.error(
116 |                 f"Timeout error. The request to {self._base_url} with method {method} has timed out."
117 |             )
118 |             return None
119 |         except requests.exceptions.HTTPError as err:
120 |             logger.error(f"HTTP error occurred: {err}")
121 |             logger.error(response.text)
122 |             return None
123 |         except requests.exceptions.RequestException as err:
124 |             logger.error(f"Error occurred: {err}")
125 |             return None
126 | 
127 | class AsyncAPI(BaseAPI):
128 |     def __init__(self, api_credentials: APICredentials, **kwargs) -> None:
129 |         """
130 |         Initializes an instance of the AsyncAPI class.
131 | 
132 |         Args:
133 |             api_credentials (APICredentials): An instance of APICredentials used for authentication.
134 |         """
135 |         super().__init__(ASYNC_BASE_URL, api_credentials, **kwargs)
136 |         self._session = None
137 |         self._requests = 0
138 | 
139 |     async def get_response(self, payload: dict, config: dict) -> dict | None:
140 |         """
141 |         Processes the payload asynchronously and fetches the response.
142 | 
143 |         Args:
144 |             payload (dict): The payload for the request.
145 |             config (dict): The configuration for the request.
146 | 
147 |         Returns:
148 |             dict: The response from the server after the job is completed.
149 |         """
150 |         # Remove empty or null values from the payload
151 |         payload = {k: v for k, v in payload.items() if v is not None}
152 | 
153 |         result = None
154 |         self._requests += 1
155 | 
156 |         try:
157 |             self._session = await ensure_session(self._session)
158 | 
159 |             result = await self._execute_with_timeout(
160 |                 payload, config, self._session
161 |             )
162 |             return result
163 | 
164 |         except Exception as e:
165 |             logger.error(f"An error occurred: {e}")
166 | 
167 |         finally:
168 |             self._requests -= 1
169 |             if self._requests == 0:
170 |                 await close_session(self._session)
171 |         return None
172 | 
173 |     async def _get_job_id(
174 |         self,
175 |         payload: dict,
176 |         user_session: aiohttp.ClientSession,
177 |         request_timeout: int,
178 |     ) -> str | None:
179 |         try:
180 |             async with user_session.post(
181 |                 self._base_url,
182 |                 headers=self._headers,
183 |                 json=payload,
184 |                 timeout=request_timeout,
185 |             ) as response:
186 |                 data = await response.json()
187 |                 response.raise_for_status()
188 |                 return data["id"]
189 |         except aiohttp.ClientResponseError as e:
190 |             logger.error(
191 |                 f"HTTP error occurred: {e.status} - {e.message} - {data['message']}"
192 |             )
193 |         except aiohttp.ClientConnectionError as e:
194 |             logger.error(f"Connection error occurred: {e}")
195 |         except asyncio.TimeoutError:
196 |             logger.error(
197 |                 f"Timeout error. The request to {self._base_url} has timed out."
198 |             )
199 |         except Exception as e:
200 |             logger.error(f"Error occurred: {str(e)}")
201 |             return None
202 | 
203 |     async def _poll_job_status(
204 |         self,
205 |         job_id: str,
206 |         poll_interval: int,
207 |         user_session: aiohttp.ClientSession,
208 |         timeout: int,
209 |     ) -> bool:
210 |         job_status_url = f"{self._base_url}/{job_id}"
211 |         end_time = asyncio.get_event_loop().time() + timeout
212 |         while asyncio.get_event_loop().time() < end_time:
213 |             try:
214 |                 async with user_session.get(
215 |                     job_status_url,
216 |                     headers=self._headers,
217 |                     timeout=poll_interval,
218 |                 ) as response:
219 |                     data = await response.json()
220 |                     response.raise_for_status()
221 |                     if data["status"] == "done":
222 |                         return True
223 |                     elif data["status"] == "faulted":
224 |                         raise Exception("Job faulted")
225 |             except Exception as e:
226 |                 logger.error(f"Error occurred: {str(e)}")
227 |                 return False
228 |             await asyncio.sleep(poll_interval)
229 | 
230 |         logger.info("Job completion timeout exceeded")
231 |         return False
232 | 
233 |     async def _get_http_response(
234 |         self, job_id: str, user_session: aiohttp.ClientSession
235 |     ) -> dict | None:
236 |         """
237 |         Retrieves the HTTP response for a given job ID.
238 | 
239 |         Args:
240 |             job_id (str): The ID of the job.
241 |             user_session (aiohttp.ClientSession): The client session used for
242 |             making the request.
243 | 
244 |         Returns:
245 |             dict: The JSON response data.
246 | 
247 |         Raises:
248 |             aiohttp.ClientResponseError: If a client response error occurs.
249 |             aiohttp.ClientConnectionError: If a client connection error occurs.
250 |             asyncio.TimeoutError: If the request times out.
251 |             Exception: If any other error occurs.
252 |         """
253 |         result_url = f"{self._base_url}/{job_id}/results"
254 |         try:
255 |             async with user_session.get(
256 |                 result_url, headers=self._headers
257 |             ) as response:
258 |                 data = await response.json()
259 |                 response.raise_for_status()
260 |                 return data
261 |         except aiohttp.ClientResponseError as e:
262 |             logger.error(
263 |                 f"HTTP error occurred: {e.status} - {e.message} - {data['message']}"
264 |             )
265 |         except aiohttp.ClientConnectionError as e:
266 |             logger.error(f"Connection error occurred: {e}")
267 |         except asyncio.TimeoutError:
268 |             logger.error(
269 |                 f"Timeout error. The request to {result_url} has timed out."
270 |             )
271 |         except Exception as e:
272 |             logger.error(f"An error occurred: {e} - {data['message']}")
273 |         return None
274 | 
275 |     async def _execute_with_timeout(
276 |         self, payload: dict, config: dict, user_session: aiohttp.ClientSession
277 |     ) -> dict:
278 | 
279 |         request_timeout = config["request_timeout"]
280 |         job_completion_timeout = config["job_completion_timeout"]
281 |         poll_interval = config["poll_interval"]
282 | 
283 |         job_id = await self._get_job_id(payload, user_session, request_timeout)
284 |         if not job_id:
285 |             logger.error("Failed to get job ID")
286 | 
287 |         job_completed = await self._poll_job_status(
288 |             job_id, poll_interval, user_session, job_completion_timeout
289 |         )
290 |         if not job_completed:
291 |             logger.error("Job did not complete successfully")
292 | 
293 |         result = await self._get_http_response(job_id, user_session)
294 |         return result


--------------------------------------------------------------------------------
/src/oxylabs/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, List
  2 | from urllib.parse import urlparse
  3 | 
  4 | import aiohttp
  5 | 
  6 | from .defaults import (
  7 |     DEFAULT_JOB_COMPLETION_TIMEOUT,
  8 |     DEFAULT_POLL_INTERVAL,
  9 |     DEFAULT_REQUEST_TIMEOUT,
 10 |     DEFAULT_REQUEST_TIMEOUT_ASYNC,
 11 | )
 12 | from .types import fn_name
 13 | 
 14 | 
 15 | def get_valid_values(module: object) -> list:
 16 |     """
 17 |     Returns a list of valid values from the given module.
 18 | 
 19 |     Args:
 20 |         module (object): The module to retrieve valid values from.
 21 | 
 22 |     Returns:
 23 |         list: A list of valid values from the module.
 24 |     """
 25 |     return [
 26 |         getattr(module, name)
 27 |         for name in dir(module)
 28 |         if not name.startswith("__")
 29 |     ]
 30 | 
 31 | 
 32 | VALID_FN_NAMES = get_valid_values(fn_name)
 33 | 
 34 | 
 35 | def prepare_config(**kwargs):
 36 |     """
 37 |     Prepare a configuration dictionary based on the provided keyword arguments.
 38 | 
 39 |     Args:
 40 |         request_timeout (int, optional): The timeout value in seconds. Defaults
 41 |         to None.
 42 |         poll_interval (int, optional): The poll interval value in seconds.
 43 |         Defaults to None.
 44 |         job_completion_timeout (int, optional): The job completion timeout
 45 |         value in seconds. Defaults to None.
 46 | 
 47 |     Returns:
 48 |         dict: The prepared configuration dictionary.
 49 | 
 50 |     """
 51 |     config = {}
 52 |     config["request_timeout"] = (
 53 |         kwargs["request_timeout"]
 54 |         if kwargs.get("request_timeout") is not None
 55 |         else (
 56 |             DEFAULT_REQUEST_TIMEOUT_ASYNC
 57 |             if kwargs.get("async_integration") is not None
 58 |             else DEFAULT_REQUEST_TIMEOUT
 59 |         )
 60 |     )
 61 |     config["poll_interval"] = (
 62 |         kwargs["poll_interval"]
 63 |         if kwargs.get("poll_interval") is not None
 64 |         else DEFAULT_POLL_INTERVAL
 65 |     )
 66 |     config["job_completion_timeout"] = (
 67 |         kwargs["job_completion_timeout"]
 68 |         if kwargs.get("job_completion_timeout") is not None
 69 |         else DEFAULT_JOB_COMPLETION_TIMEOUT
 70 |     )
 71 | 
 72 |     return config
 73 | 
 74 | 
 75 | def validate_url(input_url: str, host: str) -> None:
 76 |     """
 77 |     Validates if the given URL is valid and belongs to the specified host.
 78 | 
 79 |     Args:
 80 |         input_url (str): The URL to be validated.
 81 |         host (str): The expected domain or host.
 82 | 
 83 |     Raises:
 84 |         ValueError: If the URL parameter is empty, missing scheme, missing
 85 |         host, or does not belong to the specified host.
 86 | 
 87 |     Returns:
 88 |         None
 89 |     """
 90 |     # Check if the URL is empty
 91 |     if not input_url:
 92 |         raise ValueError("URL parameter is empty")
 93 | 
 94 |     # Parse the URL
 95 |     parsed_url = urlparse(input_url)
 96 | 
 97 |     # Check if the scheme (protocol) is present and not empty
 98 |     if not parsed_url.scheme:
 99 |         raise ValueError("URL is missing scheme")
100 | 
101 |     # Check if the host is present and not empty
102 |     if not parsed_url.netloc:
103 |         raise ValueError("URL is missing a host")
104 | 
105 |     # Check if the host matches the expected domain or host
106 |     if host not in parsed_url.netloc:
107 |         raise ValueError(f"URL does not belong to {host}")
108 | 
109 |     return None
110 | 
111 | 
112 | async def ensure_session(session) -> aiohttp.ClientSession:
113 |     """
114 |     Ensure the provided session is valid and return a valid session.
115 | 
116 |     Args:
117 |         session: The session to ensure.
118 | 
119 |     Returns:
120 |         A valid aiohttp.ClientSession object.
121 | 
122 |     """
123 |     if session is None or session.closed:
124 |         session = aiohttp.ClientSession()
125 |     return session
126 | 
127 | 
128 | async def close_session(user_session: aiohttp.ClientSession) -> None:
129 |     """
130 |     Closes the user session.
131 | 
132 |     Args:
133 |         user_session: The user session to be closed.
134 | 
135 |     Returns:
136 |         None
137 |     """
138 | 
139 |     if user_session:
140 |         await user_session.close()
141 | 
142 | 
143 | def check_parsing_instructions_validity(instructions: dict) -> None:
144 |     """
145 |     Check the validity of parsing instructions.
146 | 
147 |     Args:
148 |         instructions (dict): The parsing instructions to be validated.
149 | 
150 |     Raises:
151 |         Exception: If the parsing instructions have an invalid structure.
152 | 
153 |     Returns:
154 |         None
155 |     """
156 |     if instructions is None:
157 |         return
158 | 
159 |     if "_fns" in instructions:
160 |         validate_fns(instructions["_fns"])
161 |     else:
162 |         for key, value in instructions.items():
163 |             if isinstance(value, dict):
164 |                 check_parsing_instructions_validity(value)
165 |             else:
166 |                 raise Exception(f"Invalid structure for key: {key}")
167 | 
168 | 
169 | def validate_fns(fns: List[Any]) -> None:
170 |     """
171 |     Validates a list of functions.
172 | 
173 |     Args:
174 |         fns (list): A list of functions to validate.
175 | 
176 |     Raises:
177 |         Exception: If `fns` is None or not a list.
178 |     """
179 |     if fns is None:
180 |         raise Exception("_fns cannot be nil")
181 |     if not isinstance(fns, list):
182 |         raise Exception("_fns must be a list")
183 | 
184 |     for fn in fns:
185 |         validate_fn(fn)
186 | 
187 | 
188 | def validate_fn(fn: dict) -> None:
189 |     """
190 |     Validates the given function dictionary.
191 | 
192 |     Args:
193 |         fn: A dictionary representing the function.
194 | 
195 |     Raises:
196 |         ValueError: If the function dictionary is not valid.
197 |     """
198 |     if not isinstance(fn, dict):
199 |         raise ValueError("Each item in _fns must be a dictionary")
200 |     if "_fn" not in fn:
201 |         raise ValueError("_fn must be set in each function")
202 |     if fn["_fn"] not in VALID_FN_NAMES:
203 |         raise ValueError(f"_fn must be a valid function name, got {fn['_fn']}")
204 | 
205 |     # Delegate to specific argument validators
206 |     validate_fn_args(fn["_fn"], fn.get("_args"))
207 | 
208 | 
209 | def validate_fn_args(function: str, args: Any) -> None:
210 |     """
211 |     Validate the arguments for a given function.
212 | 
213 |     Args:
214 |         function (str): The name of the function to validate.
215 |         args (Any): The arguments to validate.
216 | 
217 |     Raises:
218 |         ValueError: If there is no validator for the given function name.
219 |     """
220 |     # Map function name to validator function
221 |     validators = {
222 |         fn_name.ELEMENT_TEXT: validate_empty,
223 |         fn_name.LENGTH: validate_empty,
224 |         fn_name.CONVERT_TO_FLOAT: validate_empty,
225 |         fn_name.CONVERT_TO_INT: validate_empty,
226 |         fn_name.CONVERT_TO_STR: validate_empty,
227 |         fn_name.MAX: validate_empty,
228 |         fn_name.MIN: validate_empty,
229 |         fn_name.PRODUCT: validate_empty,
230 |         fn_name.XPATH: validate_string_array,
231 |         fn_name.XPATH_ONE: validate_string_array,
232 |         fn_name.CSS: validate_string_array,
233 |         fn_name.CSS_ONE: validate_string_array,
234 |         fn_name.AMOUNT_FROM_STRING: validate_string,
235 |         fn_name.AMOUNT_RANGE_FROM_STRING: validate_string,
236 |         fn_name.REGEX_FIND_ALL: validate_string,
237 |         fn_name.JOIN: validate_optional_string,
238 |         fn_name.REGEX_SEARCH: validate_list_string_optional_int,
239 |         fn_name.REGEX_SUBSTRING: validate_list_string_optional_int,
240 |         fn_name.SELECT_NTH: validate_non_zero_int,
241 |         fn_name.AVERAGE: validate_optional_int,
242 |     }
243 | 
244 |     if function not in validators:
245 |         raise ValueError(f"No validator for function name: {function}")
246 | 
247 |     # Call the appropriate validator
248 |     validator = validators[function]
249 |     validator(args)
250 | 
251 | 
252 | def validate_empty(args: Any) -> None:
253 |     """
254 |     Validates if the given argument is empty.
255 | 
256 |     Args:
257 |         args: The argument to be validated.
258 | 
259 |     Raises:
260 |         ValueError: If the argument is not empty.
261 |     """
262 |     if args:
263 |         raise ValueError("_args must be empty")
264 | 
265 | 
266 | def validate_string_array(args: List[str]) -> None:
267 |     """
268 |     Validates a list of non-empty strings.
269 | 
270 |     Args:
271 |         args: A list of strings to be validated.
272 | 
273 |     Raises:
274 |         ValueError: If `args` is not a list of non-empty strings.
275 |     """
276 |     if not isinstance(args, list) or not all(
277 |         isinstance(elem, str) and elem for elem in args
278 |     ):
279 |         raise ValueError("_args must be a list of non-empty strings")
280 | 
281 | 
282 | def validate_string(args: str) -> None:
283 |     """
284 |     Validates if the given argument is a non-empty string.
285 | 
286 |     Args:
287 |         args: The argument to be validated.
288 | 
289 |     Raises:
290 |         ValueError: If the argument is not a non-empty string.
291 |     """
292 |     if not isinstance(args, str) or not args:
293 |         raise ValueError("_args must be a non-empty string")
294 | 
295 | 
296 | def validate_optional_string(args: str) -> None:
297 |     """
298 |     Validates if the given argument is a non-empty string or None.
299 | 
300 |     Args:
301 |         args (str): The argument to be validated.
302 | 
303 |     Raises:
304 |         ValueError: If the argument is not a non-empty string or None.
305 |     """
306 |     if args is not None and (not isinstance(args, str) or not args):
307 |         raise ValueError("_args must be a non-empty string or None")
308 | 
309 | 
310 | def validate_non_zero_int(args: int) -> None:
311 |     """
312 |     Validates if the given argument is a non-zero integer.
313 | 
314 |     Args:
315 |         args (int): The argument to be validated.
316 | 
317 |     Raises:
318 |         ValueError: If the argument is not a non-zero integer.
319 | 
320 |     Returns:
321 |         None
322 |     """
323 |     if not isinstance(args, int) or args == 0:
324 |         raise ValueError("_args must be a non-zero integer")
325 | 
326 | 
327 | def validate_optional_int(args: int) -> None:
328 |     """
329 |     Validates if the given argument is a non-zero integer or None.
330 | 
331 |     Args:
332 |         args (int): The argument to be validated.
333 | 
334 |     Raises:
335 |         ValueError: If the argument is not a non-zero integer or None.
336 |     """
337 |     if args is not None and (not isinstance(args, int) or args == 0):
338 |         raise ValueError("_args must be a non-zero integer or None")
339 | 
340 | 
341 | def validate_list_string_optional_int(args: list) -> None:
342 |     """
343 |     Validates the input arguments.
344 | 
345 |     Args:
346 |         args: A list containing the arguments to be validated.
347 | 
348 |     Raises:
349 |         ValueError: If the first argument is not a non-empty string or if the
350 |         second argument is not a non-zero integer when present.
351 |     """
352 |     if (
353 |         not isinstance(args, list)
354 |         or len(args) < 1
355 |         or not isinstance(args[0], str)
356 |         or not args[0]
357 |     ):
358 |         raise ValueError("_args first argument must be a non-empty string")
359 |     if len(args) > 1 and (not isinstance(args[1], int) or args[1] == 0):
360 |         raise ValueError(
361 |             "_args second argument must be a non-zero integer when present"
362 |         )
363 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/bing/bing.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI
  4 | from oxylabs.sources.response import Response
  5 | from oxylabs.utils.types import source
  6 | from oxylabs.utils.utils import (
  7 |     check_parsing_instructions_validity,
  8 |     prepare_config,
  9 | )
 10 | 
 11 | 
 12 | class Bing:
 13 |     def __init__(self, api_instance:RealtimeAPI) -> None:
 14 |         """
 15 |         Initializes an instance of the Bing class.
 16 | 
 17 |         Args:
 18 |             api_instance: An instance of the RealtimeAPI class used for making requests.
 19 |         """
 20 |         self._api_instance = api_instance
 21 | 
 22 |     def scrape_search(
 23 |         self,
 24 |         query: str,
 25 |         domain: Optional[str] = None,
 26 |         start_page: Optional[int] = None,
 27 |         pages: Optional[int] = None,
 28 |         limit: Optional[int] = None,
 29 |         user_agent_type: Optional[str] = None,
 30 |         callback_url: Optional[str] = None,
 31 |         locale: Optional[str] = None,
 32 |         geo_location: Optional[str] = None,
 33 |         render: Optional[str] = None,
 34 |         parse: Optional[bool] = None,
 35 |         parsing_instructions: Optional[dict] = None,
 36 |         request_timeout: Optional[int] = 165,
 37 |         **kwargs,
 38 |     ) -> Response:
 39 |         """
 40 |         Scrapes search results from Bing.
 41 | 
 42 |         Args:
 43 |             query (str): UTF-encoded keyword.
 44 |             domain (Optional[str]): The domain to limit the search results to.
 45 |             start_page (Optional[int]): The starting page number.
 46 |             pages (Optional[int]): The number of pages to scrape.
 47 |             limit (Optional[int]): Number of results to retrieve in each page.
 48 |             user_agent_type (Optional[str]): Device type and browser.
 49 |             callback_url (Optional[str]): URL to your callback endpoint.
 50 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
 51 |             geo_location (Optional[str]):  It goes like this: City,Region,Country.
 52 |             render (Optional[str]): Enables JavaScript rendering.
 53 |             parse (Optional[bool]): true will return structured data.
 54 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
 55 |             request_timeout (Optional[int]): The timeout for the request in seconds.
 56 |             **kwargs: Additional keyword arguments.
 57 |         Returns:
 58 |             Response: The response containing the scraped results.
 59 |         """
 60 | 
 61 |         config = prepare_config(request_timeout=request_timeout)
 62 |         payload = {
 63 |             "source": source.BING_SEARCH,
 64 |             "domain": domain,
 65 |             "query": query,
 66 |             "start_page": start_page,
 67 |             "pages": pages,
 68 |             "limit": limit,
 69 |             "locale": locale,
 70 |             "geo_location": geo_location,
 71 |             "user_agent_type": user_agent_type,
 72 |             "callback_url": callback_url,
 73 |             "render": render,
 74 |             "parse": parse,
 75 |             "parsing_instructions": parsing_instructions,
 76 |             **kwargs,
 77 |         }
 78 | 
 79 |         check_parsing_instructions_validity(parsing_instructions)
 80 |         api_response = self._api_instance.get_response(payload, config)
 81 |         return Response(api_response)
 82 | 
 83 |     def scrape_url(
 84 |         self,
 85 |         url: str,
 86 |         user_agent_type: Optional[str] = None,
 87 |         geo_location: Optional[str] = None,
 88 |         callback_url: Optional[str] = None,
 89 |         render: Optional[str] = None,
 90 |         parse: Optional[bool] = None,
 91 |         parsing_instructions: Optional[dict] = None,
 92 |         request_timeout: Optional[int] = 165,
 93 |         **kwargs,
 94 |     ) -> Response:
 95 |         """
 96 |         Scrapes Bing search results for a given URL.
 97 | 
 98 |         Args:
 99 |             url (str): The URL to be scraped.
100 |             user_agent_type (Optional[str]): Device type and browser.
101 |             geo_location (Optional[str]): The API uses Canonical Geo Location format to
102 |                             determine request location. It goes like this: City,Region,Country
103 |             callback_url (Optional[str]): URL to your callback endpoint.
104 |             render (Optional[str]): Enables JavaScript rendering.
105 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
106 |             parse (Optional[bool]): true will return structured data.
107 |             request_timeout (int | 165, optional): The interval in seconds for
108 |                             the request to time out if no response is returned.
109 |                             Defaults to 165.
110 | 
111 |         Returns:
112 |             Response: The response containing the scraped results.
113 |         """
114 | 
115 |         config = prepare_config(request_timeout=request_timeout)
116 |         payload = {
117 |             "source": source.BING_URL,
118 |             "url": url,
119 |             "user_agent_type": user_agent_type,
120 |             "geo_location": geo_location,
121 |             "callback_url": callback_url,
122 |             "render": render,
123 |             "parse": parse,
124 |             "parsing_instructions": parsing_instructions,
125 |             **kwargs,
126 |         }
127 |         check_parsing_instructions_validity(parsing_instructions)
128 |         api_response = self._api_instance.get_response(payload, config)
129 |         return Response(api_response)
130 | 
131 | 
132 | class BingAsync:
133 |     def __init__(self, api_instance:AsyncAPI) -> None:
134 |         """
135 |         Initializes an instance of the Bing class.
136 | 
137 |         Args:
138 |             api_instance: An instance of the AsyncAPI class used for making requests.
139 |         """
140 |         self._api_instance = api_instance
141 | 
142 |     async def scrape_search(
143 |         self,
144 |         query: str,
145 |         domain: Optional[str] = None,
146 |         start_page: Optional[int] = None,
147 |         pages: Optional[int] = None,
148 |         limit: Optional[int] = None,
149 |         user_agent_type: Optional[str] = None,
150 |         callback_url: Optional[str] = None,
151 |         locale: Optional[str] = None,
152 |         geo_location: Optional[str] = None,
153 |         render: Optional[str] = None,
154 |         parse: Optional[bool] = None,
155 |         parsing_instructions: Optional[dict] = None,
156 |         request_timeout: Optional[int] = 165,
157 |         job_completion_timeout: Optional[int] = None,
158 |         poll_interval: Optional[int] = None,
159 |         **kwargs,
160 |     ) -> Response:
161 |         """
162 |         Asynchronously scrapes Bing search results for a given query.
163 | 
164 |         Args:
165 |             query (str): The search query.
166 |             domain (Optional[str]): The domain to limit the search results to.
167 |             start_page (Optional[int]): The starting page number.
168 |             pages (Optional[int]): The number of pages to scrape.
169 |             limit (Optional[int]): Number of results to retrieve in each page.
170 |             user_agent_type (Optional[str]): Device type and browser.
171 |             callback_url (Optional[str]): URL to your callback endpoint.
172 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
173 |             geo_location (Optional[str]): The API uses Canonical Geo Location format to
174 |                             determine request location. It goes like this: City,Region,Country
175 |             render (Optional[str]): Enables JavaScript rendering.
176 |             parse (Optional[bool]): true will return structured data.
177 |             request_timeout (int | 165, optional): The interval in seconds for
178 |                             the request to time out if no response is returned.
179 |                             Defaults to 165.
180 |             poll_interval (Optional[int]): The interval in seconds to poll
181 |                             the server for a response.
182 |             job_completion_timeout (Optional[int]): The interval in
183 |                             seconds for the job to time out if no response is returned.
184 | 
185 |         Returns:
186 |             Response: The response containing the scraped results.
187 |         """
188 | 
189 |         config = prepare_config(
190 |             request_timeout=request_timeout,
191 |             poll_interval=poll_interval,
192 |             job_completion_timeout=job_completion_timeout,
193 |             async_integration=True,
194 |         )
195 | 
196 |         payload = {
197 |             "source": source.BING_SEARCH,
198 |             "domain": domain,
199 |             "query": query,
200 |             "start_page": start_page,
201 |             "pages": pages,
202 |             "limit": limit,
203 |             "locale": locale,
204 |             "geo_location": geo_location,
205 |             "user_agent_type": user_agent_type,
206 |             "callback_url": callback_url,
207 |             "render": render,
208 |             "parse": parse,
209 |             "parsing_instructions": parsing_instructions,
210 |             **kwargs,
211 |         }
212 |         check_parsing_instructions_validity(parsing_instructions)
213 |         api_response = await self._api_instance.get_response(payload, config)
214 |         return Response(api_response)
215 | 
216 |     async def scrape_url(
217 |         self,
218 |         url: str,
219 |         user_agent_type: Optional[str] = None,
220 |         geo_location: Optional[str] = None,
221 |         callback_url: Optional[str] = None,
222 |         render: Optional[str] = None,
223 |         parse: Optional[bool] = None,
224 |         parsing_instructions: Optional[dict] = None,
225 |         request_timeout: Optional[int] = 165,
226 |         job_completion_timeout: Optional[int] = None,
227 |         poll_interval: Optional[int] = None,
228 |         **kwargs,
229 |     ) -> Response:
230 |         """
231 |         Asynchronously scrapes Bing search results for a given URL.
232 | 
233 |         Args:
234 |             url (str): The URL to be scraped.
235 | 
236 |             user_agent_type (Optional[str]): Device type and browser.
237 |             geo_location (Optional[str]): The API uses Canonical Geo Location format to
238 |                             determine request location. It goes like this: City,Region,Country
239 |             callback_url (Optional[str]): URL to your callback endpoint.
240 |             render (Optional[str]): Enables JavaScript rendering.
241 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
242 |             parse (Optional[bool]): true will return structured data.
243 |             request_timeout (int | 165, optional): The interval in seconds for
244 |                             the request to time out if no response is returned.
245 |                             Defaults to 165.
246 |             poll_interval (Optional[int]): The interval in seconds to poll
247 |                             the server for a response.
248 |             job_completion_timeout (Optional[int]): The interval in
249 |                             seconds for the job to time out if no response is returned.
250 | 
251 |         Returns:
252 |             Response: The response containing the scraped results.
253 |         """
254 | 
255 |         config = prepare_config(
256 |             request_timeout=request_timeout,
257 |             poll_interval=poll_interval,
258 |             job_completion_timeout=job_completion_timeout,
259 |             async_integration=True,
260 |         )
261 | 
262 |         payload = {
263 |             "source": source.BING_URL,
264 |             "url": url,
265 |             "user_agent_type": user_agent_type,
266 |             "geo_location": geo_location,
267 |             "callback_url": callback_url,
268 |             "render": render,
269 |             "parse": parse,
270 |             "parsing_instructions": parsing_instructions,
271 |         }
272 |         check_parsing_instructions_validity(parsing_instructions)
273 |         api_response = await self._api_instance.get_response(payload, config)
274 |         return Response(api_response)
275 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/kroger/kroger.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI
  4 | from oxylabs.sources.response import Response
  5 | from oxylabs.utils.types import source
  6 | from oxylabs.utils.utils import prepare_config
  7 | 
  8 | 
  9 | class Kroger:
 10 |     def __init__(self, api_instance:RealtimeAPI) -> None:
 11 |         """
 12 |         Initializes an instance of the Kroger class.
 13 | 
 14 |         Args:
 15 |             api_instance: An instance of the RealtimeAPI class used for making requests.
 16 |         """
 17 |         self._api_instance = api_instance
 18 | 
 19 |     def scrape_product(
 20 |         self,
 21 |         product_id: str,
 22 |         render: Optional[str] = None,
 23 |         callback_url: Optional[str] = None,
 24 |         user_agent_type: Optional[str] = None,
 25 |         store_id: Optional[int] = None,
 26 |         delivery_zip: Optional[str] = None,
 27 |         fulfillment_type: Optional[str] = None,
 28 |         request_timeout: Optional[int] = 165,
 29 |         **kwargs
 30 |     ) -> Response:
 31 |         """
 32 |         Scrapes Kroger product page for a given query and product ID.
 33 | 
 34 |         Args:
 35 |             product_id (str): The product ID.
 36 |             render (Optional[str]): Enables JavaScript rendering.
 37 |             callback_url (Optional[str]): URL to your callback endpoint.
 38 |             user_agent_type (Optional[str]): Device type and browser.
 39 |             store_id (Optional[int]): The store ID.
 40 |             delivery_zip (Optional[str]): The delivery location ZIP code.
 41 |             fulfillment_type (Optional[str]): The Fulfillment method.
 42 |             request_timeout (int | 165, optional): The interval in seconds for
 43 |                             the request to time out if no response is returned.
 44 |                             Defaults to 165.
 45 | 
 46 |         Returns:
 47 |             Response: The response from the server after the job is completed.
 48 |         """
 49 | 
 50 |         config = prepare_config(request_timeout=request_timeout)
 51 |         payload = {
 52 |             "source": source.KROGER_PRODUCT,
 53 |             "product_id": product_id,
 54 |             "render": render,
 55 |             "callback_url": callback_url,
 56 |             "user_agent_type": user_agent_type,
 57 |             "store_id": store_id,
 58 |             "delivery_zip": delivery_zip,
 59 |             "fulfillment_type": fulfillment_type,
 60 |             **kwargs,
 61 |         }
 62 |         api_response = self._api_instance.get_response(payload, config)
 63 |         return Response(api_response)
 64 | 
 65 |     def scrape_search(
 66 |         self,
 67 |         query: str,
 68 |         render: Optional[str] = None,
 69 |         callback_url: Optional[str] = None,
 70 |         user_agent_type: Optional[str] = None,
 71 |         store_id: Optional[int] = None,
 72 |         delivery_zip: Optional[str] = None,
 73 |         fulfillment_type: Optional[str] = None,
 74 |         request_timeout: Optional[int] = 165,
 75 |         **kwargs
 76 |     ) -> Response:
 77 |         """
 78 |         Scrapes Kroger search page for a given query.
 79 | 
 80 |         Args:
 81 |             query (str): The search query.
 82 |             render (Optional[str]): Enables JavaScript rendering.
 83 |             callback_url (Optional[str]): URL to your callback endpoint.
 84 |             user_agent_type (Optional[str]): Device type and browser.
 85 |             store_id (Optional[int]): The store ID.
 86 |             delivery_zip (Optional[str]): The delivery location ZIP code.
 87 |             fulfillment_type (Optional[str]): The Fulfillment method.
 88 |             request_timeout (int | 165, optional): The interval in seconds for
 89 |                             the request to time out if no response is returned.
 90 |                             Defaults to 165.
 91 | 
 92 |         Returns:
 93 |             Response: The response from the server after the job is completed.
 94 |         """
 95 | 
 96 |         config = prepare_config(request_timeout=request_timeout)
 97 |         payload = {
 98 |             "source": source.KROGER_SEARCH,
 99 |             "query": query,
100 |             "render": render,
101 |             "callback_url": callback_url,
102 |             "user_agent_type": user_agent_type,
103 |             "store_id": store_id,
104 |             "delivery_zip": delivery_zip,
105 |             "fulfillment_type": fulfillment_type,
106 |             **kwargs,
107 |         }
108 |         api_response = self._api_instance.get_response(payload, config)
109 |         return Response(api_response)
110 | 
111 |     def scrape_url(
112 |         self,
113 |         url: str,
114 |         render: Optional[str] = None,
115 |         callback_url: Optional[str] = None,
116 |         user_agent_type: Optional[str] = None,
117 |         store_id: Optional[int] = None,
118 |         delivery_zip: Optional[str] = None,
119 |         fulfillment_type: Optional[str] = None,
120 |         request_timeout: Optional[int] = 165,
121 |         **kwargs
122 |     ) -> Response:
123 |         """
124 |         Scrapes Kroger page for a given URL.
125 | 
126 |         Args:
127 |             url (str): Direct URL (link) to Kroger page.
128 |             render (Optional[str]): Enables JavaScript rendering.
129 |             callback_url (Optional[str]): URL to your callback endpoint.
130 |             user_agent_type (Optional[str]): Device type and browser.
131 |             store_id (Optional[int]): The store ID.
132 |             delivery_zip (Optional[str]): The delivery location ZIP code.
133 |             fulfillment_type (Optional[str]): The Fulfillment method.
134 |             request_timeout (int | 165, optional): The interval in seconds for
135 |                             the request to time out if no response is returned.
136 |                             Defaults to 165.
137 | 
138 |         Returns:
139 |             Response: The response from the server after the job is completed.
140 |         """
141 | 
142 |         config = prepare_config(request_timeout=request_timeout)
143 |         payload = {
144 |             "source": source.KROGER,
145 |             "url": url,
146 |             "render": render,
147 |             "callback_url": callback_url,
148 |             "user_agent_type": user_agent_type,
149 |             "store_id": store_id,
150 |             "delivery_zip": delivery_zip,
151 |             "fulfillment_type": fulfillment_type,
152 |             **kwargs,
153 |         }
154 |         api_response = self._api_instance.get_response(payload, config)
155 |         return Response(api_response)
156 | 
157 | 
158 | class KrogerAsync:
159 |     def __init__(self, api_instance:AsyncAPI) -> None:
160 |         """
161 |         Initializes an instance of the Kroger class.
162 | 
163 |         Args:
164 |             api_instance: An instance of the AsyncAPI class used for making requests.
165 |         """
166 |         self._api_instance = api_instance
167 | 
168 |     async def scrape_product(
169 |         self,
170 |         product_id: str,
171 |         render: Optional[str] = None,
172 |         callback_url: Optional[str] = None,
173 |         user_agent_type: Optional[str] = None,
174 |         store_id: Optional[int] = None,
175 |         delivery_zip: Optional[str] = None,
176 |         fulfillment_type: Optional[str] = None,
177 |         request_timeout: Optional[int] = 165,
178 |         job_completion_timeout: Optional[int] = None,
179 |         poll_interval: Optional[int] = None,
180 |         **kwargs
181 |     ) -> Response:
182 |         """
183 |         Asynchronously scrapes Kroger product page for a given query and product ID.
184 | 
185 |         Args:
186 |             product_id (str): The product ID.
187 |             render (Optional[str]): Enables JavaScript rendering.
188 |             callback_url (Optional[str]): URL to your callback endpoint.
189 |             user_agent_type (Optional[str]): Device type and browser.
190 |             store_id (Optional[int]): The store ID.
191 |             delivery_zip (Optional[str]): The delivery location ZIP code.
192 |             fulfillment_type (Optional[str]): The Fulfillment method.
193 |             request_timeout (int | 165, optional): The interval in seconds for
194 |                             the request to time out if no response is returned.
195 |                             Defaults to 165.
196 |             poll_interval (Optional[int]): The interval in seconds to poll
197 |                             the server for a response.
198 |             job_completion_timeout (Optional[int]): The interval in
199 |                             seconds for the job to time out if no response is returned.
200 | 
201 |         Returns:
202 |             Response: The response from the server after the job is completed.
203 |         """
204 | 
205 |         config = prepare_config(
206 |             request_timeout=request_timeout,
207 |             poll_interval=poll_interval,
208 |             job_completion_timeout=job_completion_timeout,
209 |             async_integration=True,
210 |         )
211 |         payload = {
212 |             "source": source.KROGER_PRODUCT,
213 |             "product_id": product_id,
214 |             "render": render,
215 |             "callback_url": callback_url,
216 |             "user_agent_type": user_agent_type,
217 |             "store_id": store_id,
218 |             "delivery_zip": delivery_zip,
219 |             "fulfillment_type": fulfillment_type,
220 |             **kwargs,
221 |         }
222 |         api_response = await self._api_instance.get_response(payload, config)
223 |         return Response(api_response)
224 | 
225 |     async def scrape_search(
226 |         self,
227 |         query: str,
228 |         render: Optional[str] = None,
229 |         callback_url: Optional[str] = None,
230 |         user_agent_type: Optional[str] = None,
231 |         store_id: Optional[int] = None,
232 |         delivery_zip: Optional[str] = None,
233 |         fulfillment_type: Optional[str] = None,
234 |         request_timeout: Optional[int] = 165,
235 |         job_completion_timeout: Optional[int] = None,
236 |         poll_interval: Optional[int] = None,
237 |         **kwargs
238 |     ) -> Response:
239 |         """
240 |         Asynchronously scrapes Kroger search page for a given query.
241 | 
242 |         Args:
243 |             query (str): The search query.
244 |             render (Optional[str]): Enables JavaScript rendering.
245 |             callback_url (Optional[str]): URL to your callback endpoint.
246 |             user_agent_type (Optional[str]): Device type and browser.
247 |             store_id (Optional[int]): The store ID.
248 |             delivery_zip (Optional[str]): The delivery location ZIP code.
249 |             fulfillment_type (Optional[str]): The Fulfillment method.
250 |             request_timeout (int | 165, optional): The interval in seconds for
251 |                             the request to time out if no response is returned.
252 |                             Defaults to 165.
253 |             poll_interval (Optional[int]): The interval in seconds to poll
254 |                             the server for a response.
255 |             job_completion_timeout (Optional[int]): The interval in
256 |                             seconds for the job to time out if no response is returned.
257 | 
258 |         Returns:
259 |             Response: The response from the server after the job is completed.
260 |         """
261 | 
262 |         config = prepare_config(
263 |             request_timeout=request_timeout,
264 |             poll_interval=poll_interval,
265 |             job_completion_timeout=job_completion_timeout,
266 |             async_integration=True,
267 |         )
268 |         payload = {
269 |             "source": source.KROGER_SEARCH,
270 |             "query": query,
271 |             "render": render,
272 |             "callback_url": callback_url,
273 |             "user_agent_type": user_agent_type,
274 |             "store_id": store_id,
275 |             "delivery_zip": delivery_zip,
276 |             "fulfillment_type": fulfillment_type,
277 |             **kwargs,
278 |         }
279 |         api_response = await self._api_instance.get_response(payload, config)
280 |         return Response(api_response)
281 | 
282 |     async def scrape_url(
283 |         self,
284 |         url: str,
285 |         render: Optional[str] = None,
286 |         callback_url: Optional[str] = None,
287 |         user_agent_type: Optional[str] = None,
288 |         store_id: Optional[int] = None,
289 |         delivery_zip: Optional[str] = None,
290 |         fulfillment_type: Optional[str] = None,
291 |         request_timeout: Optional[int] = 165,
292 |         job_completion_timeout: Optional[int] = None,
293 |         poll_interval: Optional[int] = None,
294 |         **kwargs
295 |     ) -> Response:
296 |         """
297 |         Asynchronously scrapes Kroger page for a given URL.
298 | 
299 |         Args:
300 |             url (str): Direct URL (link) to Kroger page.
301 |             render (Optional[str]): Enables JavaScript rendering.
302 |             callback_url (Optional[str]): URL to your callback endpoint.
303 |             user_agent_type (Optional[str]): Device type and browser.
304 |             store_id (Optional[int]): The store ID.
305 |             delivery_zip (Optional[str]): The delivery location ZIP code.
306 |             fulfillment_type (Optional[str]): The Fulfillment method.
307 |             request_timeout (int | 165, optional): The interval in seconds for
308 |                             the request to time out if no response is returned.
309 |                             Defaults to 165.
310 |             poll_interval (Optional[int]): The interval in seconds to poll
311 |                             the server for a response.
312 |             job_completion_timeout (Optional[int]): The interval in
313 |                             seconds for the job to time out if no response is returned.
314 | 
315 |         Returns:
316 |             Response: The response from the server after the job is completed.
317 |         """
318 | 
319 |         config = prepare_config(
320 |             request_timeout=request_timeout,
321 |             poll_interval=poll_interval,
322 |             job_completion_timeout=job_completion_timeout,
323 |             async_integration=True,
324 |         )
325 |         payload = {
326 |             "source": source.KROGER,
327 |             "url": url,
328 |             "render": render,
329 |             "callback_url": callback_url,
330 |             "user_agent_type": user_agent_type,
331 |             "store_id": store_id,
332 |             "delivery_zip": delivery_zip,
333 |             "fulfillment_type": fulfillment_type,
334 |             **kwargs,
335 |         }
336 |         api_response = await self._api_instance.get_response(payload, config)
337 |         return Response(api_response)
338 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Oxylabs Python SDK
  2 | 
  3 | [![Oxylabs promo code](https://raw.githubusercontent.com/oxylabs/product-integrations/refs/heads/master/Affiliate-Universal-1090x275.png)](https://oxylabs.io/pages/gitoxy?utm_source=877&utm_medium=affiliate&groupid=877&utm_content=oxylabs-sdk-python-github&transaction_id=102f49063ab94276ae8f116d224b67)
  4 | 
  5 | [![](https://dcbadge.limes.pink/api/server/Pds3gBmKMH?style=for-the-badge&theme=discord)](https://discord.gg/Pds3gBmKMH) [![YouTube](https://img.shields.io/badge/YouTube-Oxylabs-red?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@oxylabs)
  6 | 
  7 | This is a Python SDK for the [Oxylabs](https://oxylabs.io)
  8 | [Scraper APIs](https://developers.oxylabs.io/scraper-apis/web-scraper-api#getting-started).
  9 | 
 10 | This SDK helps integrate with Oxylabs’ all-in-one Web Scraper API. 
 11 | It can help you retrieve data from e-commerce websites, search engines (SERP), 
 12 | real estate platforms, and more.
 13 | 
 14 | The Python SDK provides you with several benefits over using the raw APIs
 15 | directly:
 16 | 
 17 | - **Simplified Interface**: abstracts away complexities, offering a
 18 | straightforward user interface for interacting with the Oxylabs API.
 19 | - **Automated Request Management**: streamlines the handling of API requests and
 20 |  responses for enhanced efficiency and reliability.
 21 | - **Error Handling**: provides meaningful error messages and handles common API
 22 | errors, simplifying troubleshooting.
 23 | - **Result Parsing**: streamlines the process of extracting relevant data from HTML results, 
 24 | allowing developers to focus on application logic.
 25 | 
 26 | ## Requirements
 27 | 
 28 | - Python 3.5 or above.
 29 | 
 30 | You can check your Python version by running the following command in your
 31 | preferred terminal:
 32 | 
 33 | ```sh
 34 | python --version
 35 | ```
 36 | 
 37 | Or, for systems with multiple Python versions installed:
 38 | 
 39 | ```sh
 40 | python3 --version
 41 | ```
 42 | 
 43 | If you need to install or update python you can do so by following the steps
 44 | mentioned [here](https://www.python.org/downloads/).
 45 | 
 46 | ## Authentication
 47 | 
 48 | You will need an Oxylabs API username and password which you can get by signing
 49 | up at https://oxylabs.io. You can check things out with a free trial at
 50 | https://oxylabs.io/products/scraper-api.
 51 | 
 52 | ## Installation
 53 | 
 54 | ```bash
 55 | pip install oxylabs
 56 | ```
 57 | 
 58 | ### Quick Start
 59 | 
 60 | ```python
 61 | from oxylabs import RealtimeClient
 62 | 
 63 | # Set your Oxylabs API Credentials.
 64 | username = "username"
 65 | password = "password"
 66 | 
 67 | # Initialize the Realtime client with your credentials.
 68 | client = RealtimeClient(username, password)
 69 | 
 70 | # Use `bing_search` as a source to scrape Bing with nike as a query.
 71 | result = client.bing.scrape_search("nike")
 72 | 
 73 | print(result.raw)
 74 | ```
 75 | 
 76 | ### Integration Methods
 77 | 
 78 | There are three integration methods for the Oxylabs SERP API, each exposed via
 79 | different packages:
 80 | 
 81 | - Realtime (Sync) - `RealtimeClient(username, password)`
 82 | - Push-Pull (Async) - `AsyncClient(username, password)`
 83 | - Proxy Endpoint - `ProxyClient(username, password)`
 84 | 
 85 | Learn more about integration methods [on the official documentation](https://developers.oxylabs.io/scraper-apis/web-scraper-api/integration-methods)
 86 | and how this SDK uses them [here](#integration-methods-1).
 87 | 
 88 | ### Sources
 89 | 
 90 | The Oxylabs API scrapes according to the sources provided via the API:
 91 | 
 92 | | Target                 | Sources
 93 | |------------------------| --------------
 94 | | **Amazon**             | `amazon`, `amazon_product`, `amazon_search`, `amazon_pricing`, `amazon_sellers`, `amazon_bestsellers`, `amazon_reviews`, `amazon_questions`
 95 | | **Google**             | `google`, `google_search`, `google_ads`, `google_travel_hotels`, `google_suggest`,`google_trends_explore`,`google_maps`,`google_lens`
 96 | | **Google Shopping**    | `google_shopping`, `google_shopping_product`, `google_shopping_search`, `google_shopping_pricing`
 97 | | **Bing**               | `bing`, `bing_search`
 98 | | **Kroger**             | `kroger`, `kroger_product`, `kroger_search`
 99 | | **Wayfair**            | `wayfair`, `wayfair_search`
100 | | **Youtube Transcript** | `youtube_transcript`
101 | | **Other Websites**     | `universal`
102 | 
103 | These are the equivalent targets and methods available for scraping in the Python SDK:
104 | 
105 | | Target                 | Methods
106 | |------------------------| --------------
107 | | **amazon**             | `scrape_search`, `scrape_url`, `scrape_product`, `scrape_pricing`, `scrape_reviews`, `scrape_questions`, `scrape_bestsellers`, `scrape_sellers` 
108 | | **bing**               | `scrape_search`, `scrape_url`
109 | | **google**             | `scrape_search`, `scrape_url`, `scrape_ads`, `scrape_suggestions`, `scrape_travel_hotels`, `scrape_images`, `scrape_trends_explore`, `scrape_maps`, `scrape_lens`
110 | | **google_shopping**    | `scrape_shopping_search`, `scrape_shopping_url`, `scrape_shopping_products`, `scrape_product_pricing`
111 | | **kroger**             | `scrape_product`, `scrape_search`, `scrape_url`
112 | | **wayfair**            | `scrape_search`, `scrape_url`
113 | | **youtube_transcript** | `scrape_transcript`
114 | | **universal**          | `scrape_url`
115 | 
116 | In the SDK you'll just need to call the relevant method name from the client.
117 | 
118 | For example if you wish to scrape Bing search you can do it with the following code:
119 | 
120 | ```python
121 | client = RealtimeClient(username, password)
122 | result = client.bing.scrape_search("football")
123 | ```
124 | 
125 | ### Query Parameters
126 | 
127 | Each source has different accepted query parameters. For a detailed list of
128 | accepted parameters by each source you can head over to
129 | https://developers.oxylabs.io/scraper-apis/web-scraper-api.
130 | 
131 | By default, scrape functions will use default parameters. If you need to send
132 | specific query parameters, here is an example of how to do it:
133 | 
134 | ```python
135 | client = RealtimeClient(username, password)
136 | result = client.bing.scrape_search(
137 |     "football",
138 |     start_page=1,
139 |     pages=3,
140 |     limit=4,
141 |     domain="com",
142 | )
143 | ```
144 | 
145 | ### Configurable Options
146 | 
147 | For consistency and ease of use, this SDK provides a list of pre-defined
148 | commonly used parameter values as constants in our library. You can use them by
149 | importing the oxylabs type module.
150 | 
151 | ```python
152 | from oxylabs.utils.types import user_agent_type, render, domain
153 | ```
154 | 
155 | For the full list you can check the `types` directory. You can send in these
156 | values as strings too.
157 | 
158 | These can be used as follows:
159 | 
160 | ```python
161 | from oxylabs import RealtimeClient
162 | from oxylabs.utils.types import user_agent_type, render, domain
163 | 
164 | client = RealtimeClient(username, password)
165 | result = client.google.scrape_search(
166 |     "adidas",
167 |     user_agent_type=user_agent_type.DESKTOP,
168 |     render=render.HTML,
169 |     domain=domain.COM,
170 | )
171 | ```
172 | 
173 | ### Context Options for Google sources
174 | 
175 | You can send in context options relevant to `google`, `amazon` and `universal`
176 | sources. Here are the [supported context values for google search](https://developers.oxylabs.io/scraper-apis/web-scraper-api/google/search).
177 | Similarly you can find supported context values for other sources in the
178 | documentation.
179 | Here's an example for Google Search scraping:
180 | 
181 | ```python
182 | client = RealtimeClient(username, password)
183 | result = client.google.scrape_search(
184 |     "adidas",
185 |     parse=True,
186 |     context=[
187 |         {"key": "results_language", "value": "en"},
188 |         {"key": "filter", "value": 0},
189 |         {"key": "tbm", "value": "isch"},
190 |         {
191 |             "key": "limit_per_page",
192 |             "value": [
193 |                 {"page": 1, "limit": 10},
194 |                 {"page": 2, "limit": 10},
195 |             ],
196 |         },
197 |     ],
198 | )
199 | ```
200 | 
201 | ### Parse instructions
202 | 
203 | SDK supports [custom parsing](https://developers.oxylabs.io/scraper-apis/custom-parser) which lets
204 | you define your own parsing and data processing logic that is executed on a raw scraping result.
205 | 
206 | ```python
207 | # Use `bing_search` as a source to scrape Bing using custom parsing
208 | # instructions.
209 | client = RealtimeClient(username, password)
210 | result = client.bing.scrape_url(
211 |     "https://www.bing.com/search?q=nike",
212 |     parse=True,
213 |     parsing_instructions={
214 |         "number_of_results": {
215 |             "_fns": [
216 |                 {
217 |                     "_fn": "xpath_one",
218 |                     "_args": [".//span[@class='sb_count']/text()"],
219 |                 }
220 |             ]
221 |         }
222 |     },
223 | )
224 | ```
225 | 
226 | ### Browser instructions
227 | 
228 | SDK allows you to define your own [browser instructions](https://developers.oxylabs.io/scraper-apis/web-scraper-api/features/browser-instructions)
229 | that are executed when rendering JavaScript.
230 | 
231 | ```python
232 | client = RealtimeClient(username, password)
233 | result = client.universal.scrape_url(
234 |     "https://www.ebay.com/",
235 |     render="html",
236 |     browser_instructions=[
237 |         {
238 |             "type": "input",
239 |             "value": "pizza boxes",
240 |             "selector": {
241 |                 "type": "xpath",
242 |                 "value": "//input[@class='gh-tb ui-autocomplete-input']"
243 |             }
244 |         },
245 |         {
246 |             "type": "click",
247 |             "selector": {
248 |                 "type": "xpath",
249 |                 "value": "//input[@type='submit']"
250 |             }
251 |         },
252 |         {
253 |             "type": "wait",
254 |             "wait_time_s": 10
255 |         }
256 | ])
257 | ```
258 | 
259 | ### Dedicated parsers
260 | Oxylab's Web Scraper API has dedicated parsers for some sources. You can find a list of available
261 | dedicated parsers [here](https://developers.oxylabs.io/scraper-apis/web-scraper-api/features/dedicated-parsers). If you want to use a dedicated parser to get structured data,
262 | then add **parse=True** parameter when calling scrape method.
263 | 
264 | Here is an example of using a dedicated parser:
265 | 
266 | ```python
267 | # Scrape Amazon search results for keyword "headset"
268 | # Then print a list of products including their ASIN and title
269 | client = RealtimeClient(username, password)
270 | response = client.amazon.scrape_search("headset", parse=True)
271 | 
272 | for result in response.results:
273 |     for item in result.content["results"]["organic"]:
274 |         print(f"{item["asin"]}: {item["title"]}")
275 | ```
276 | 
277 | ## Integration Methods
278 | 
279 | ### Realtime Integration
280 | 
281 | Realtime is a synchronous integration method. This means that upon sending your
282 | job submission request, **you will have to keep the connection open** until we
283 | successfully finish your job or return an error.
284 | 
285 | The **TTL** of Realtime connections is **150 seconds**. There may be rare cases
286 | where your connection times out before you receive a response from us, for
287 | example, if our system is under heavier-than-usual load or the job you submitted
288 | was extremely hard to complete:
289 | 
290 | ### Push-Pull(Polling) Integration <a id="push-pull"></a>
291 | 
292 | Push-Pull is an asynchronous integration method. This SDK implements this
293 | integration with a polling technique to poll the endpoint for results after a
294 | set interval of time.
295 | 
296 | Using it is as straightforward as using the Realtime integration. The only
297 | difference is that it will return an asyncio Task that will eventually contain
298 | the Response. Below is an example of this integration method:
299 | 
300 | ```python
301 | import asyncio
302 | from oxylabs import AsyncClient
303 | 
304 | async def main():
305 |     # Set your Oxylabs API Credentials.
306 |     username = "username"
307 |     password = "password"
308 | 
309 |     # Initialize the async client with your credentials.
310 |     client = AsyncClient(username, password)
311 | 
312 |     # 'timeout' specifies the maximum time (in seconds) to wait for the scraping
313 |     #  job to complete.
314 |     # It is applicable for both Realtime and Push-Pull integrations.
315 |     # 'poll_interval' is used only in Push-Pull integrations to set the delay
316 |     # (in seconds)
317 |     # between consecutive status checks of the job.
318 |     tasks = [
319 |         client.bing.scrape_url(
320 |             "https://www.bing.com/search?q=adidas",
321 |             parse=True,
322 |             timeout=35,
323 |             poll_interval=3,
324 |         ),
325 |         client.bing.scrape_url(
326 |             "https://www.bing.com/search?q=puma",
327 |             parse=True,
328 |             timeout=45,
329 |             poll_interval=5,
330 |         ),
331 |     ]
332 | 
333 |     for future in asyncio.as_completed(tasks):
334 |         result = await future
335 | 
336 | 
337 | if __name__ == "__main__":
338 |     asyncio.run(main())
339 | ```
340 | 
341 | ### Proxy Endpoint
342 | 
343 | This method is also synchronous (like Realtime), but instead of using our
344 | service via a RESTful interface, you **can use our endpoint like a proxy**. Use
345 | Proxy Endpoint if you've used proxies before and would just like to get
346 | unblocked content from us.
347 | 
348 | Since the parameters in this method are sent as headers there are only a few
349 | parameters which this integration method accepts. You can find those parameters
350 | at
351 | https://developers.oxylabs.io/scraper-apis/web-scraper-api/integration-methods/proxy-endpoint#accepted-parameters.
352 | 
353 | The Proxy endpoint integration is very open-ended allowing many different use
354 | cases:
355 | 
356 | ```python
357 | from oxylabs import ProxyClient
358 | 
359 | # Set your Oxylabs API Credentials.
360 | username = "username"
361 | password = "password"
362 | 
363 | # Initialize the ProxyClient with your credentials.
364 | proxy = ProxyClient(username, password)
365 | 
366 | # Customize headers for specific requirements (optional).
367 | proxy.add_user_agent_header("desktop_chrome")
368 | proxy.add_geo_location_header("Germany")
369 | proxy.add_render_header("html")
370 | 
371 | # Use the proxy to make a request.
372 | result = proxy.get("https://www.example.com")
373 | 
374 | print(result.text)
375 | ```
376 | 
377 | ## Additional Resources
378 | 
379 | See the official [API Documentation](https://developers.oxylabs.io/) for
380 | details on each API's actual interface, which is implemented by this SDK.
381 | 
382 | ## Contributing
383 | 
384 | See [CONTRIBUTING](CONTRIBUTING.md) for more information.
385 | 
386 | ## Security
387 | 
388 | See [Security Issue
389 | Notifications](CONTRIBUTING.md#security-issue-notifications) for more
390 | information.
391 | 
392 | ## License
393 | 
394 | This project is licensed under the [MIT License](LICENSE).
395 | 
396 | ## About Oxylabs
397 | 
398 | Established in 2015, Oxylabs are a market-leading web intelligence collection
399 | platform, driven by the highest business, ethics, and compliance standards,
400 | enabling companies worldwide to unlock data-driven insights.
401 | 
402 | [![image](https://oxylabs.io/images/og-image.png)](https://oxylabs.io/)
403 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/google_shopping/google_shopping.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from oxylabs.internal.api import RealtimeAPI, AsyncAPI
  4 | from oxylabs.sources.response import Response
  5 | from oxylabs.utils.types import source
  6 | from oxylabs.utils.utils import (
  7 |     check_parsing_instructions_validity,
  8 |     prepare_config,
  9 | )
 10 | 
 11 | 
 12 | class GoogleShopping:
 13 |     def __init__(self, api_instance:RealtimeAPI) -> None:
 14 |         """
 15 |         Initializes an instance of the Google Shopping class.
 16 | 
 17 |         Args:
 18 |             api_instance: An instance of the RealtimeAPI class used for making requests.
 19 |         """
 20 |         self._api_instance = api_instance
 21 | 
 22 |     def scrape_shopping_search(
 23 |         self,
 24 |         query: str,
 25 |         domain: Optional[str] = None,
 26 |         start_page: Optional[int] = None,
 27 |         pages: Optional[int] = None,
 28 |         locale: Optional[str] = None,
 29 |         results_language: Optional[str] = None,
 30 |         geo_location: Optional[str] = None,
 31 |         user_agent_type: Optional[str] = None,
 32 |         callback_url: Optional[str] = None,
 33 |         render: Optional[str] = None,
 34 |         parse: Optional[bool] = None,
 35 |         context: Optional[list] = None,
 36 |         parsing_instructions: Optional[dict] = None,
 37 |         request_timeout: Optional[int] = 165,
 38 |         **kwargs
 39 |     ) -> Response:
 40 |         """
 41 |         Scrapes Google Shopping search results for a given query.
 42 | 
 43 |         Args:
 44 |             query (str): UTF-encoded keyword
 45 |             domain (Optional[str]): The domain to limit the search results to.
 46 |             start_page (Optional[int]): The starting page number.
 47 |             pages (Optional[int]): The number of pages to scrape.
 48 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
 49 |             results_language (Optional[str]): None,
 50 |             geo_location (Optional[str]): None,
 51 |             user_agent_type (Optional[str]): Device type and browser.
 52 |             callback_url (Optional[str]): URL to your callback endpoint.
 53 |             render (Optional[str]): Enables JavaScript rendering.
 54 |             parse (Optional[bool]): true will return structured data.
 55 |             context: Optional[list],
 56 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
 57 |             request_timeout (int | 165, optional): The interval in seconds for
 58 |                             the request to time out if no response is returned.
 59 |                             Defaults to 165.
 60 | 
 61 |         Returns:
 62 |             Response: The response from the server after the job is completed.
 63 |         """
 64 | 
 65 |         config = prepare_config(request_timeout=request_timeout)
 66 |         payload = {
 67 |             "source": source.GOOGLE_SHOPPING_SEARCH,
 68 |             "domain": domain,
 69 |             "query": query,
 70 |             "start_page": start_page,
 71 |             "pages": pages,
 72 |             "locale": locale,
 73 |             "results_language": results_language,
 74 |             "geo_location": geo_location,
 75 |             "user_agent_type": user_agent_type,
 76 |             "render": render,
 77 |             "callback_url": callback_url,
 78 |             "context": context,
 79 |             "parse": parse,
 80 |             "parsing_instructions": parsing_instructions,
 81 |             **kwargs,
 82 |         }
 83 |         check_parsing_instructions_validity(parsing_instructions)
 84 |         api_response = self._api_instance.get_response(payload, config)
 85 |         return Response(api_response)
 86 | 
 87 |     def scrape_shopping_url(
 88 |         self,
 89 |         url: str,
 90 |         user_agent_type: Optional[str] = None,
 91 |         render: Optional[str] = None,
 92 |         callback_url: Optional[str] = None,
 93 |         geo_location: Optional[str] = None,
 94 |         parse: Optional[bool] = None,
 95 |         parsing_instructions: Optional[dict] = None,
 96 |         request_timeout: Optional[int] = 165,
 97 |         **kwargs
 98 |     ) -> Response:
 99 |         """
100 |         Scrapes Google Shopping search results for a given URL.
101 | 
102 |         Args:
103 |             url (str): Direct URL (link) to Google page
104 |             the search.
105 |             user_agent_type (Optional[str]): Device type and browser.
106 |             render (Optional[str]): Enables JavaScript rendering.
107 |             callback_url (Optional[str]): URL to your callback endpoint.
108 |             geo_location (Optional[str]): None,
109 |             parse (Optional[bool]): true will return structured data.
110 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
111 |             request_timeout (int | 165, optional): The interval in seconds for
112 |                             the request to time out if no response is returned.
113 |                             Defaults to 165.
114 | 
115 |         Returns:
116 |             Response: The response from the server after the job is completed.
117 |         """
118 | 
119 |         config = prepare_config(request_timeout=request_timeout)
120 |         payload = {
121 |             "source": source.GOOGLE_SHOPPING_URL,
122 |             "url": url,
123 |             "user_agent_type": user_agent_type,
124 |             "render": render,
125 |             "callback_url": callback_url,
126 |             "geo_location": geo_location,
127 |             "parse": parse,
128 |             "parsing_instructions": parsing_instructions,
129 |             **kwargs,
130 |         }
131 |         check_parsing_instructions_validity(parsing_instructions)
132 |         api_response = self._api_instance.get_response(payload, config)
133 |         return Response(api_response)
134 | 
135 |     def scrape_shopping_products(
136 |         self,
137 |         query: str,
138 |         domain: Optional[str] = None,
139 |         locale: Optional[str] = None,
140 |         results_language: Optional[str] = None,
141 |         geo_location: Optional[str] = None,
142 |         user_agent_type: Optional[str] = None,
143 |         render: Optional[str] = None,
144 |         callback_url: Optional[str] = None,
145 |         parse: Optional[bool] = None,
146 |         parsing_instructions: Optional[dict] = None,
147 |         request_timeout: Optional[int] = 165,
148 |         **kwargs
149 |     ) -> Response:
150 |         """
151 |         Scrapes Google Shopping product results for a given query.
152 | 
153 |         Args:
154 |             query (str): UTF-encoded product code.
155 |             domain (Optional[str]): The domain to limit the search results to.
156 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
157 |             "results_language": None,
158 |             geo_location (Optional[str]): None,
159 |             user_agent_type (Optional[str]): Device type and browser.
160 |             render (Optional[str]): Enables JavaScript rendering.
161 |             callback_url (Optional[str]): URL to your callback endpoint.
162 |             parse (Optional[bool]): true will return structured data.
163 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
164 |             request_timeout (int | 165, optional): The interval in seconds for
165 |                             the request to time out if no response is returned.
166 |                             Defaults to 165.
167 |         Returns:
168 |             Response: The response from the server after the job is completed.
169 |         """
170 | 
171 |         config = prepare_config(request_timeout=request_timeout)
172 |         payload = {
173 |             "source": source.GOOGLE_SHOPPING_PRODUCT,
174 |             "query": query,
175 |             "domain": domain,
176 |             "locale": locale,
177 |             "results_language": results_language,
178 |             "geo_location": geo_location,
179 |             "user_agent_type": user_agent_type,
180 |             "render": render,
181 |             "callback_url": callback_url,
182 |             "parse": parse,
183 |             "parsing_instructions": parsing_instructions,
184 |             **kwargs,
185 |         }
186 |         check_parsing_instructions_validity(parsing_instructions)
187 |         api_response = self._api_instance.get_response(payload, config)
188 |         return Response(api_response)
189 | 
190 |     def scrape_product_pricing(
191 |         self,
192 |         query: str,
193 |         domain: Optional[str] = None,
194 |         start_page: Optional[int] = None,
195 |         pages: Optional[int] = None,
196 |         locale: Optional[str] = None,
197 |         results_language: Optional[str] = None,
198 |         geo_location: Optional[str] = None,
199 |         user_agent_type: Optional[str] = None,
200 |         render: Optional[str] = None,
201 |         callback_url: Optional[str] = None,
202 |         parse: Optional[bool] = None,
203 |         parsing_instructions: Optional[dict] = None,
204 |         request_timeout: Optional[int] = 165,
205 |         **kwargs
206 |     ) -> Response:
207 |         """
208 |         Scrapes Google Shopping product pricing results for a given product code.
209 | 
210 |         Args:
211 |             query (str): UTF-encoded product code.
212 |             domain (Optional[str]): The domain to limit the search results to.
213 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
214 |             start_page (Optional[int]): The starting page number.
215 |             pages (Optional[int]): The number of pages to scrape.
216 |             "results_language": None,
217 |             geo_location (Optional[str]): None,
218 |             user_agent_type (Optional[str]): Device type and browser.
219 |             render (Optional[str]): Enables JavaScript rendering.
220 |             callback_url (Optional[str]): URL to your callback endpoint.
221 |             parse (Optional[bool]): true will return structured data.
222 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
223 |             request_timeout (int | 165, optional): The interval in seconds for
224 |                             the request to time out if no response is returned.
225 |                             Defaults to 165.
226 |         Returns:
227 |             Response: The response from the server after the job is completed.
228 |         """
229 | 
230 |         config = prepare_config(request_timeout=request_timeout)
231 |         payload = {
232 |             "source": source.GOOGLE_SHOPPING_PRICING,
233 |             "domain": domain,
234 |             "query": query,
235 |             "start_page": start_page,
236 |             "pages": pages,
237 |             "locale": locale,
238 |             "results_language": results_language,
239 |             "geo_location": geo_location,
240 |             "user_agent_type": user_agent_type,
241 |             "render": render,
242 |             "callback_url": callback_url,
243 |             "parse": parse,
244 |             "parsing_instructions": parsing_instructions,
245 |             **kwargs,
246 |         }
247 |         check_parsing_instructions_validity(parsing_instructions)
248 |         api_response = self._api_instance.get_response(payload, config)
249 |         return Response(api_response)
250 | 
251 | class GoogleShoppingAsync:
252 |     def __init__(self, api_instance:AsyncAPI) -> None:
253 |         """
254 |         Initializes an instance of the Google Shopping class.
255 | 
256 |         Args:
257 |             api_instance: An instance of the AsyncAPI class used for making requests.
258 |         """
259 |         self._api_instance = api_instance
260 | 
261 |     async def scrape_shopping_search(
262 |         self,
263 |         query: str,
264 |         domain: Optional[str] = None,
265 |         start_page: Optional[int] = None,
266 |         pages: Optional[int] = None,
267 |         locale: Optional[str] = None,
268 |         results_language: Optional[str] = None,
269 |         geo_location: Optional[str] = None,
270 |         user_agent_type: Optional[str] = None,
271 |         callback_url: Optional[str] = None,
272 |         render: Optional[str] = None,
273 |         parse: Optional[bool] = None,
274 |         context: Optional[list] = None,
275 |         parsing_instructions: Optional[dict] = None,
276 |         request_timeout: Optional[int] = 165,
277 |         job_completion_timeout: Optional[int] = None,
278 |         poll_interval: Optional[int] = None,
279 |         **kwargs
280 |     ) -> Response:
281 |         """
282 |         Scrapes Google Shopping search results for a given query.
283 | 
284 |         Args:
285 |             query (str): UTF-encoded keyword.
286 |             domain (Optional[str]): The domain to limit the search results to.
287 |             start_page (Optional[int]): The starting page number.
288 |             pages (Optional[int]): The number of pages to scrape.
289 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
290 |             "results_language": None,
291 |             geo_location (Optional[str]): None,
292 |             user_agent_type (Optional[str]): Device type and browser.
293 |             callback_url (Optional[str]): URL to your callback endpoint.
294 |             render (Optional[str]): Enables JavaScript rendering.
295 |             parse (Optional[bool]): true will return structured data.
296 |             context: Optional[list],
297 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
298 |             request_timeout (int | 165, optional): The interval in seconds for
299 |                             the request to time out if no response is returned.
300 |                             Defaults to 165.
301 |             poll_interval (Optional[int]): The interval in seconds to poll
302 |                             the server for a response.
303 |             job_completion_timeout (Optional[int]): The interval in
304 |                             seconds for the job to time out if no response is returned.
305 | 
306 |         Returns:
307 |             Response: The response from the server after the job is completed.
308 |         """
309 | 
310 |         config = prepare_config(
311 |             request_timeout=request_timeout,
312 |             poll_interval=poll_interval,
313 |             job_completion_timeout=job_completion_timeout,
314 |             async_integration=True,
315 |         )
316 |         payload = {
317 |             "source": source.GOOGLE_SHOPPING_SEARCH,
318 |             "domain": domain,
319 |             "query": query,
320 |             "start_page": start_page,
321 |             "pages": pages,
322 |             "locale": locale,
323 |             "results_language": results_language,
324 |             "geo_location": geo_location,
325 |             "user_agent_type": user_agent_type,
326 |             "render": render,
327 |             "callback_url": callback_url,
328 |             "context": context,
329 |             "parse": parse,
330 |             "parsing_instructions": parsing_instructions,
331 |             **kwargs,
332 |         }
333 |         check_parsing_instructions_validity(parsing_instructions)
334 |         api_response = await self._api_instance.get_response(payload, config)
335 |         return Response(api_response)
336 | 
337 |     async def scrape_shopping_url(
338 |         self,
339 |         url: str,
340 |         user_agent_type: Optional[str] = None,
341 |         render: Optional[str] = None,
342 |         callback_url: Optional[str] = None,
343 |         geo_location: Optional[str] = None,
344 |         parse: Optional[bool] = None,
345 |         parsing_instructions: Optional[dict] = None,
346 |         request_timeout: Optional[int] = 165,
347 |         job_completion_timeout: Optional[int] = None,
348 |         poll_interval: Optional[int] = None,
349 |         **kwargs
350 |     ) -> Response:
351 |         """
352 |         Scrapes Google Shopping search results for a given URL.
353 | 
354 |         Args:
355 |             url (str): Direct URL (link) to Google page.
356 |             user_agent_type (Optional[str]): Device type and browser.
357 |             render (Optional[str]): Enables JavaScript rendering.
358 |             callback_url (Optional[str]): URL to your callback endpoint.
359 |             geo_location (Optional[str]): None,
360 |             parse (Optional[bool]): true will return structured data.
361 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
362 |             request_timeout (int | 165, optional): The interval in seconds for
363 |                             the request to time out if no response is returned.
364 |                             Defaults to 165.
365 |             poll_interval (Optional[int]): The interval in seconds to poll
366 |                             the server for a response.
367 |             job_completion_timeout (Optional[int]): The interval in
368 |                             seconds for the job to time out if no response is returned.
369 |         Returns:
370 |             Response: The response from the server after the job is completed.
371 |         """
372 | 
373 |         config = prepare_config(
374 |             request_timeout=request_timeout,
375 |             poll_interval=poll_interval,
376 |             job_completion_timeout=job_completion_timeout,
377 |             async_integration=True,
378 |         )
379 |         payload = {
380 |             "source": source.GOOGLE_SHOPPING_URL,
381 |             "url": url,
382 |             "user_agent_type": user_agent_type,
383 |             "render": render,
384 |             "callback_url": callback_url,
385 |             "geo_location": geo_location,
386 |             "parse": parse,
387 |             "parsing_instructions": parsing_instructions,
388 |             **kwargs,
389 |         }
390 |         check_parsing_instructions_validity(parsing_instructions)
391 |         api_response = await self._api_instance.get_response(payload, config)
392 |         return Response(api_response)
393 | 
394 |     async def scrape_shopping_products(
395 |         self,
396 |         query: str,
397 |         domain: Optional[str] = None,
398 |         locale: Optional[str] = None,
399 |         results_language: Optional[str] = None,
400 |         geo_location: Optional[str] = None,
401 |         user_agent_type: Optional[str] = None,
402 |         render: Optional[str] = None,
403 |         callback_url: Optional[str] = None,
404 |         parse: Optional[bool] = None,
405 |         parsing_instructions: Optional[dict] = None,
406 |         request_timeout: Optional[int] = 165,
407 |         job_completion_timeout: Optional[int] = None,
408 |         poll_interval: Optional[int] = None,
409 |         **kwargs
410 |     ) -> Response:
411 |         """
412 |         Scrapes Google Shopping product results for a given query.
413 | 
414 |         Args:
415 |             query (str): UTF-encoded product code.
416 |             domain (Optional[str]): The domain to limit the search results to.
417 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
418 |             "results_language": None,
419 |             geo_location (Optional[str]): None,
420 |             user_agent_type (Optional[str]): Device type and browser.
421 |             render (Optional[str]): Enables JavaScript rendering.
422 |             callback_url (Optional[str]): URL to your callback endpoint.
423 |             parse (Optional[bool]): true will return structured data.
424 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
425 |             request_timeout (int | 165, optional): The interval in seconds for
426 |                             the request to time out if no response is returned.
427 |                             Defaults to 165.
428 |             poll_interval (Optional[int]): The interval in seconds to poll
429 |                             the server for a response.
430 |             job_completion_timeout (Optional[int]): The interval in
431 |                             seconds for the job to time out if no response is returned.
432 |         Returns:
433 |             Response: The response from the server after the job is completed.
434 |         """
435 | 
436 |         config = prepare_config(
437 |             request_timeout=request_timeout,
438 |             poll_interval=poll_interval,
439 |             job_completion_timeout=job_completion_timeout,
440 |             async_integration=True,
441 |         )
442 |         payload = {
443 |             "source": source.GOOGLE_SHOPPING_PRODUCT,
444 |             "query": query,
445 |             "domain": domain,
446 |             "locale": locale,
447 |             "results_language": results_language,
448 |             "geo_location": geo_location,
449 |             "user_agent_type": user_agent_type,
450 |             "render": render,
451 |             "callback_url": callback_url,
452 |             "parse": parse,
453 |             "parsing_instructions": parsing_instructions,
454 |             **kwargs,
455 |         }
456 |         check_parsing_instructions_validity(parsing_instructions)
457 |         api_response = await self._api_instance.get_response(payload, config)
458 |         return Response(api_response)
459 | 
460 |     async def scrape_product_pricing(
461 |         self,
462 |         query: str,
463 |         domain: Optional[str] = None,
464 |         start_page: Optional[int] = None,
465 |         pages: Optional[int] = None,
466 |         locale: Optional[str] = None,
467 |         results_language: Optional[str] = None,
468 |         geo_location: Optional[str] = None,
469 |         user_agent_type: Optional[str] = None,
470 |         render: Optional[str] = None,
471 |         callback_url: Optional[str] = None,
472 |         parse: Optional[bool] = None,
473 |         parsing_instructions: Optional[dict] = None,
474 |         request_timeout: Optional[int] = 165,
475 |         job_completion_timeout: Optional[int] = None,
476 |         poll_interval: Optional[int] = None,
477 |         **kwargs
478 |     ) -> Response:
479 |         """
480 |         Scrapes Google Shopping product pricing results for a given product code.
481 | 
482 |         Args:
483 |             url (str): UTF-encoded product code.
484 |             domain (Optional[str]): The domain to limit the search results to.
485 |             start_page (Optional[int]): The starting page number.
486 |             pages (Optional[int]): The number of pages to scrape.
487 |             locale (Optional[str]): Accept-Language header value which changes page web interface language.
488 |             "results_language": None,
489 |             geo_location (Optional[str]): None,
490 |             user_agent_type (Optional[str]): Device type and browser.
491 |             render (Optional[str]): Enables JavaScript rendering.
492 |             callback_url (Optional[str]): URL to your callback endpoint.
493 |             parse (Optional[bool]): true will return structured data.
494 |             parsing_instructions (Optional[dict]): Instructions for parsing the results.
495 |             request_timeout (int | 165, optional): The interval in seconds for
496 |                             the request to time out if no response is returned.
497 |                             Defaults to 165.
498 |             poll_interval (Optional[int]): The interval in seconds to poll
499 |                             the server for a response.
500 |             job_completion_timeout (Optional[int]): The interval in
501 |                             seconds for the job to time out if no response is returned.
502 |         Returns:
503 |             Response: The response from the server after the job is completed.
504 |         """
505 | 
506 |         config = prepare_config(
507 |             request_timeout=request_timeout,
508 |             poll_interval=poll_interval,
509 |             job_completion_timeout=job_completion_timeout,
510 |             async_integration=True,
511 |         )
512 |         payload = {
513 |             "source": source.GOOGLE_SHOPPING_PRICING,
514 |             "domain": domain,
515 |             "query": query,
516 |             "start_page": start_page,
517 |             "pages": pages,
518 |             "locale": locale,
519 |             "results_language": results_language,
520 |             "geo_location": geo_location,
521 |             "user_agent_type": user_agent_type,
522 |             "render": render,
523 |             "callback_url": callback_url,
524 |             "parse": parse,
525 |             "parsing_instructions": parsing_instructions,
526 |             **kwargs,
527 |         }
528 |         check_parsing_instructions_validity(parsing_instructions)
529 |         api_response = await self._api_instance.get_response(payload, config)
530 |         return Response(api_response)
531 | 


--------------------------------------------------------------------------------
/src/oxylabs/sources/response.py:
--------------------------------------------------------------------------------
   1 | 
   2 | class Response:
   3 |     def __init__(self, data):
   4 |         if data is None:
   5 |             data = {}
   6 |         self.raw = data
   7 |         self.results = [Results(item) for item in data.get("results", [])]
   8 |         self.job = Job(data.get("job", {}))
   9 | 
  10 | 
  11 | class Results:
  12 |     def __init__(self, data):
  13 |         if data is None:
  14 |             data = {}
  15 |         self.custom_content_parsed = data.get("custom_content_parsed", {})
  16 |         self.content_parsed = Content(data.get("content_parsed", {}))
  17 |         self.content = data.get("content")
  18 |         self.created_at = data.get("created_at")
  19 |         self.updated_at = data.get("updated_at")
  20 |         self.page = data.get("page")
  21 |         self.url = data.get("url")
  22 |         self.job_id = data.get("job_id")
  23 |         self.status_code = data.get("status_code")
  24 |         self.parser_type = data.get("parser_type")
  25 | 
  26 | 
  27 | class Content:
  28 |     def __init__(self, data):
  29 |         if data is None:
  30 |             data = {}
  31 |         self.raw = data
  32 |         self.url = data.get("url")
  33 |         self.title = data.get("title")
  34 |         self.pages = data.get("pages")
  35 |         self.query = data.get("query")
  36 |         self.images = data.get("images")
  37 |         self.variants = Variants(data.get("variants", {}))
  38 |         self.highlights = data.get("highlights", [])
  39 |         self.description = data.get("description")
  40 |         self.related_items = RelatedItems(data.get("related_items", {}))
  41 |         self.specifications = Specifications(data.get("specifications", {}))
  42 |         self.page = data.get("page")
  43 |         self.errors = data.get("_errors")
  44 |         self.results = Result(data.get("results", {}))
  45 |         self.rating = data.get("rating")
  46 |         self.pricing = [Pricing(item) for item in data.get("pricing", [])]
  47 |         self.ads = [AmazonProductAds(item) for item in data.get("ads", [])]
  48 |         self.asin = data.get("asin")
  49 |         self.price = data.get("price")
  50 |         self.stock = data.get("stock")
  51 |         self.coupon = data.get("coupon")
  52 |         self.category = [
  53 |             AmazonProductCategory(item) for item in data.get("category", [])
  54 |         ]
  55 |         self.currency = data.get("currency")
  56 |         self.delivery = [
  57 |             AmazonProductDelivery(item) for item in data.get("delivery", [])
  58 |         ]
  59 |         self.warnings = data.get("_warnings", [])
  60 |         self.deal_type = data.get("deal_type")
  61 |         self.page_type = data.get("page_type")
  62 |         self.price_sns = data.get("price_sns")
  63 |         self.variation = data.get("variation")
  64 |         self.has_videos = data.get("has_videos")
  65 |         self.sales_rank = [
  66 |             AmazonProductSalesRank(item) for item in data.get("sales_rank", [])
  67 |         ]
  68 |         self.top_review = data.get("top_review")
  69 |         self.asin_in_url = data.get("asin_in_url")
  70 |         self.price_upper = data.get("price_upper")
  71 |         self.pricing_str = data.get("pricing_str")
  72 |         self.pricing_url = data.get("pricing_url")
  73 |         self.discount_end = data.get("discount_end")
  74 |         self.manufacturer = data.get("manufacturer")
  75 |         self.max_quantity = data.get("max_quantity")
  76 |         self.price_buybox = data.get("price_buybox")
  77 |         self.product_name = data.get("product_name")
  78 |         self.bullet_points = data.get("bullet_points")
  79 |         self.is_addon_item = data.get("is_addon_item")
  80 |         self.price_initial = data.get("price_initial")
  81 |         self.pricing_count = data.get("pricing_count")
  82 |         self.reviews_count = data.get("reviews_count")
  83 |         self.sns_discounts = data.get("sns_discounts", [])
  84 |         self.developer_info = data.get("developer_info", [])
  85 |         self.lightning_deal = data.get("lightning_deal")
  86 |         self.price_shipping = data.get("price_shipping")
  87 |         self.is_prime_pantry = data.get("is_prime_pantry")
  88 |         self.product_details = ProductDetails(data.get("product_details", {}))
  89 |         self.featured_merchant = data.get("featured_merchant", [])
  90 |         self.is_prime_eligible = data.get("is_prime_eligible")
  91 |         self.product_dimensions = data.get("product_dimensions")
  92 |         self.refurbished_product = AmazonRefurbishedProduct(
  93 |             data.get("refurbished_product", {})
  94 |         )
  95 |         self.answered_questions_count = data.get("answered_questions_count")
  96 |         self.rating_star_distribution = [
  97 |             AmazonRatingStarDistribution(item)
  98 |             for item in data.get("rating_star_distribution", [])
  99 |         ]
 100 |         self.reviews = [
 101 |             AmazonReviews(item) for item in data.get("reviews", [])
 102 |         ]
 103 |         self.questions = AmazonQuestions(data.get("questions", {}))
 104 |         self.questions_total = data.get("questions_total")
 105 |         self.business_name = data.get("business_name")
 106 |         self.recent_feedback = [
 107 |             RecentFeedback(item) for item in data.get("recent_feedback", [])
 108 |         ]
 109 |         self.business_address = data.get("business_address")
 110 |         self.feedback_summary_table = FeedbackSummaryTable(
 111 |             data.get("feedback_summary_table", {})
 112 |         )
 113 |         self.review_count = data.get("review_count")
 114 |         self.last_visible_page = data.get("last_visible_page")
 115 |         self.parse_status_code = data.get("parse_status_code")
 116 | 
 117 | 
 118 | class Result:
 119 |     def __init__(self, data):
 120 |         if data is None:
 121 |             data = {}
 122 |         self.raw = data
 123 |         self.paid = [Paid(item) for item in data.get("paid", [])]
 124 |         self.filters = [Filters(item) for item in data.get("filters", [])]
 125 |         self.search_information = SearchInformation(
 126 |             data.get("search_information")
 127 |         )
 128 |         self.suggested = [
 129 |             SuggestedAmazonSearch(item) for item in data.get("suggested", [])
 130 |         ]
 131 |         self.amazon_choices = [
 132 |             AmazonChoices(item) for item in data.get("amazon_choices", [])
 133 |         ]
 134 |         self.instant_recommendations = [
 135 |             InstantRecommendations(item)
 136 |             for item in data.get("instant_recommendations", [])
 137 |         ]
 138 |         self.pos = data.get("pos")
 139 |         self.url = data.get("url")
 140 |         self.asin = data.get("asin")
 141 |         self.price = data.get("price")
 142 |         self.title = data.get("title")
 143 |         self.rating = data.get("rating")
 144 |         self.currency = data.get("currency")
 145 |         self.is_prime = data.get("is_prime")
 146 |         self.price_str = data.get("price_str")
 147 |         self.price_upper = data.get("price_upper")
 148 |         self.ratings_count = data.get("ratings_count")
 149 |         self.pla = Pla(data.get("pla", {}))
 150 |         self.images = Image(data.get("images", {}))
 151 |         self.twitter = Twitter(data.get("twitter", {}))
 152 |         self.knowledge = Knowledge(data.get("knowledge", {}))
 153 |         self.local_pack = LocalPack(data.get("local_pack", {}))
 154 |         self.top_stories = TopStory(data.get("top_stories", {}))
 155 |         self.popular_products = [
 156 |             PopularProducts(item) for item in data.get("popular_products", [])
 157 |         ]
 158 |         self.related_searches = RelatedSearches(
 159 |             data.get("related_searches", {})
 160 |         )
 161 |         self.related_questions = RelatedQuestions(
 162 |             data.get("related_questions", {})
 163 |         )
 164 |         self.item_carousel = ItemCarousel(data.get("item_carousel", {}))
 165 |         self.recipes = Recipes(data.get("recipes", {}))
 166 |         self.videos = Videos(data.get("videos", {}))
 167 |         self.featured_snippet = [
 168 |             FeaturedSnippet(item) for item in data.get("featured_snippet", [])
 169 |         ]
 170 |         self.related_searches_categorized = [
 171 |             RelatedSearchesCategorized(item)
 172 |             for item in data.get("related_searches_categorized", [])
 173 |         ]
 174 |         self.hotels = Hotels(data.get("hotels", {}))
 175 |         self.flights = Flights(data.get("flights", {}))
 176 |         self.video_box = VideoBox(data.get("video_box", {}))
 177 |         self.local_service_ads = LocalServiceAds(
 178 |             data.get("local_service_ads", {})
 179 |         )
 180 |         self.navigation = [
 181 |             Navigation(item) for item in data.get("navigation", [])
 182 |         ]
 183 |         self.instant_answers = [
 184 |             InstantAnswers(item) for item in data.get("instant_answers", [])
 185 |         ]
 186 |         self.visually_similar_images = VisuallySimilarImages(
 187 |             data.get("visually_similar_images", {})
 188 |         )
 189 |         self.total_results_count = data.get("total_results_count")
 190 | 
 191 | 
 192 | class Paid:
 193 |     def __init__(self, data):
 194 |         if data is None:
 195 |             data = {}
 196 |         self.raw = data
 197 |         self.pos = data.get("pos")
 198 |         self.url = data.get("url")
 199 |         self.desc = data.get("desc")
 200 |         self.title = data.get("title")
 201 |         self.data_rw = data.get("data_rw")
 202 |         self.data_pcu = data.get("data_pcu")
 203 |         self.sitelinks = PaidSitelinks(data.get("sitelinks", {}))
 204 |         self.url_shown = data.get("url_shown")
 205 |         self.asin = data.get("asin")
 206 |         self.price = data.get("price")
 207 |         self.rating = data.get("rating")
 208 |         self.rel_pos = data.get("rel_pos")
 209 |         self.currency = data.get("currency")
 210 |         self.url_image = data.get("url_image")
 211 |         self.best_seller = data.get("best_seller")
 212 |         self.price_upper = data.get("price_upper")
 213 |         self.is_sponsored = data.get("is_sponsored")
 214 |         self.manufacturer = data.get("manufacturer")
 215 |         self.pricing_count = data.get("pricing_count")
 216 |         self.reviews_count = data.get("reviews_count")
 217 |         self.is_amazons_choice = data.get("is_amazons_choice")
 218 |         self.no_price_reason = data.get("no_price_reason")
 219 |         self.sales_volume = data.get("sales_volume")
 220 |         self.is_prime = data.get("is_prime")
 221 |         self.shipping_information = data.get("shipping_information")
 222 |         self.pos_overall = data.get("pos_overall")
 223 | 
 224 | 
 225 | class PaidSitelinks:
 226 |     def __init__(self, data):
 227 |         if data is None:
 228 |             data = {}
 229 |         self.raw = data
 230 |         self.expanded = [Expanded(item) for item in data.get("expanded", [])]
 231 |         self.inline = [Inline(item) for item in data.get("inline", [])]
 232 | 
 233 | 
 234 | class Expanded:
 235 |     def __init__(self, data):
 236 |         if data is None:
 237 |             data = {}
 238 |         self.raw = data
 239 |         self.url = data.get("url")
 240 |         self.desc = data.get("desc")
 241 |         self.title = data.get("title")
 242 | 
 243 | 
 244 | class Inline:
 245 |     def __init__(self, data):
 246 |         if data is None:
 247 |             data = {}
 248 |         self.raw = data
 249 |         self.url = data.get("url")
 250 |         self.desc = data.get("desc")
 251 |         self.title = data.get("title")
 252 | 
 253 | 
 254 | class Filters:
 255 |     def __init__(self, data):
 256 |         if data is None:
 257 |             data = {}
 258 |         self.raw = data
 259 |         self.name = data.get("name")
 260 |         self.values = [FilterValues(item) for item in data.get("values", [])]
 261 | 
 262 | 
 263 | class FilterValues:
 264 |     def __init__(self, data):
 265 |         if data is None:
 266 |             data = {}
 267 |         self.raw = data
 268 |         self.url = data.get("url")
 269 |         self.value = data.get("value")
 270 | 
 271 | 
 272 | class Organic:
 273 |     def __init__(self, data):
 274 |         if data is None:
 275 |             data = {}
 276 |         self.raw = data
 277 |         self.pos = data.get("pos")
 278 |         self.url = data.get("url")
 279 |         self.desc = data.get("desc")
 280 |         self.type = data.get("type")
 281 |         self.price = data.get("price")
 282 |         self.title = data.get("title")
 283 |         self.currency = data.get("currency")
 284 |         self.merchant = Merchant(data.get("merchant", {}))
 285 |         self.price_str = data.get("price_str")
 286 |         self.product_id = data.get("product_id")
 287 |         self.asin = data.get("asin")
 288 |         self.rating = data.get("rating")
 289 |         self.url_image = data.get("url_image")
 290 |         self.best_seller = data.get("best_seller")
 291 |         self.price_upper = data.get("price_upper")
 292 |         self.is_sponsored = data.get("is_sponsored")
 293 |         self.manufacturer = data.get("manufacturer")
 294 |         self.pricing_count = data.get("pricing_count")
 295 |         self.reviews_count = data.get("reviews_count")
 296 |         self.is_amazons_choice = data.get("is_amazons_choice")
 297 |         self.no_price_reason = data.get("no_price_reason")
 298 |         self.is_prime = data.get("is_prime")
 299 |         self.sales_volume = data.get("sales_volume")
 300 |         self.variations = [
 301 |             Variations(item) for item in data.get("variations", [])
 302 |         ]
 303 |         self.images = [item for item in data.get("images", [])]
 304 |         self.site_links = OrganicSitelinks(data.get("sitelinks", {}))
 305 |         self.url_shown = data.get("url_shown")
 306 |         self.pos_overall = data.get("pos_overall")
 307 | 
 308 | 
 309 | class Merchant:
 310 |     def __init__(self, data):
 311 |         if data is None:
 312 |             data = {}
 313 |         self.raw = data
 314 |         self.url = data.get("url")
 315 |         self.name = data.get("name")
 316 | 
 317 | 
 318 | class Variations:
 319 |     def __init__(self, data):
 320 |         if data is None:
 321 |             data = {}
 322 |         self.raw = data
 323 |         self.asin = data.get("asin")
 324 |         self.title = data.get("title")
 325 |         self.price = data.get("price")
 326 |         self.price_strikethrough = data.get("price_strikethrough")
 327 |         self.not_available = data.get("not_available")
 328 | 
 329 | 
 330 | class SearchInformation:
 331 |     def __init__(self, data):
 332 |         if data is None:
 333 |             data = {}
 334 |         self.raw = data
 335 |         self.query = data.get("query")
 336 |         self.showing_results_for = data.get("showing_results_for")
 337 |         self.image = SearchInformationImage(data.get("image", {}))
 338 |         self.total_results_count = data.get("total_results_count")
 339 | 
 340 | 
 341 | class Variants:
 342 |     def __init__(self, data):
 343 |         if data is None:
 344 |             data = {}
 345 |         self.raw = data
 346 |         self.type = data.get("type")
 347 |         self.items = [VariantItem(item) for item in data.get("items", [])]
 348 | 
 349 | 
 350 | class VariantItem:
 351 |     def __init__(self, data):
 352 |         if data is None:
 353 |             data = {}
 354 |         self.raw = data
 355 |         self.value = data.get("value")
 356 |         self.selected = data.get("selected")
 357 |         self.available = data.get("available")
 358 |         self.product_id = data.get("product_id")
 359 | 
 360 | 
 361 | class RelatedItems:
 362 |     def __init__(self, data):
 363 |         if data is None:
 364 |             data = {}
 365 |         self.raw = data
 366 |         self.items = [RelatedItem(item) for item in data.get("items", [])]
 367 | 
 368 | 
 369 | class RelatedItem:
 370 |     def __init__(self, data):
 371 |         if data is None:
 372 |             data = {}
 373 |         self.raw = data
 374 |         self.url = data.get("url")
 375 |         self.price = data.get("price")
 376 |         self.title = data.get("title")
 377 |         self.rating = data.get("rating")
 378 |         self.currency = data.get("currency")
 379 |         self.reviews_count = data.get("reviews_count")
 380 | 
 381 | 
 382 | class Specifications:
 383 |     def __init__(self, data):
 384 |         if data is None:
 385 |             data = {}
 386 |         self.raw = data
 387 |         self.items = [
 388 |             SpecificationItem(item) for item in data.get("items", [])
 389 |         ]
 390 |         self.section_title = data.get("section_title")
 391 | 
 392 | 
 393 | class SpecificationItem:
 394 |     def __init__(self, data):
 395 |         if data is None:
 396 |             data = {}
 397 |         self.raw = data
 398 |         self.title = data.get("title")
 399 |         self.value = data.get("value")
 400 | 
 401 | 
 402 | class Pricing:
 403 |     def __init__(self, data):
 404 |         if data is None:
 405 |             data = {}
 406 |         self.raw = data
 407 |         self.price = data.get("price")
 408 |         self.seller = data.get("seller")
 409 |         self.details = data.get("details")
 410 |         self.currency = data.get("currency")
 411 |         self.condition = data.get("condition")
 412 |         self.price_tax = data.get("price_tax")
 413 |         self.price_total = data.get("price_total")
 414 |         self.seller_link = data.get("seller_link")
 415 |         self.price_shipping = data.get("price_shipping")
 416 |         self.delivery = data.get("delivery")
 417 |         self.seller_id = data.get("seller_id")
 418 |         self.rating_count = data.get("rating_count")
 419 |         self.delivery_options = data.get("delivery_options")
 420 | 
 421 | 
 422 | class SuggestedAmazonSearch:
 423 |     def __init__(self, data):
 424 |         if data is None:
 425 |             data = {}
 426 |         self.raw = data
 427 |         self.url = data.get("url")
 428 |         self.asin = data.get("asin")
 429 |         self.price = data.get("price")
 430 |         self.title = data.get("title")
 431 |         self.rating = data.get("rating")
 432 |         self.currency = data.get("currency")
 433 |         self.url_image = data.get("url_image")
 434 |         self.best_seller = data.get("best_seller")
 435 |         self.price_upper = data.get("price_upper")
 436 |         self.is_sponsored = data.get("is_sponsored")
 437 |         self.manufacturer = data.get("manufacturer")
 438 |         self.pricing_count = data.get("pricing_count")
 439 |         self.reviews_count = data.get("reviews_count")
 440 |         self.is_amazons_choice = data.get("is_amazons_choice")
 441 |         self.pos = data.get("pos")
 442 |         self.shipping_information = data.get("shipping_information")
 443 |         self.sales_volume = data.get("sales_volume")
 444 |         self.no_price_reason = data.get("no_price_reason")
 445 |         self.suggested_query = data.get("suggested_query")
 446 | 
 447 | 
 448 | class AmazonChoices:
 449 |     def __init__(self, data):
 450 |         if data is None:
 451 |             data = {}
 452 |         self.raw = data
 453 |         self.url = data.get("url")
 454 |         self.asin = data.get("asin")
 455 |         self.price = data.get("price")
 456 |         self.title = data.get("title")
 457 |         self.rating = data.get("rating")
 458 |         self.currency = data.get("currency")
 459 |         self.url_image = data.get("url_image")
 460 |         self.best_seller = data.get("best_seller")
 461 |         self.price_upper = data.get("price_upper")
 462 |         self.is_sponsored = data.get("is_sponsored")
 463 |         self.manufacturer = data.get("manufacturer")
 464 |         self.pricing_count = data.get("pricing_count")
 465 |         self.reviews_count = data.get("reviews_count")
 466 |         self.is_amazons_choice = data.get("is_amazons_choice")
 467 |         self.pos = data.get("pos")
 468 |         self.is_prime = data.get("is_prime")
 469 |         self.shipping_information = data.get("shipping_information")
 470 |         self.sales_volume = data.get("sales_volume")
 471 |         self.no_price_reason = data.get("no_price_reason")
 472 |         self.variations = [
 473 |             Variations(item) for item in data.get("variations", [])
 474 |         ]
 475 | 
 476 | 
 477 | class InstantRecommendations:
 478 |     def __init__(self, data):
 479 |         if data is None:
 480 |             data = {}
 481 |         self.raw = data
 482 |         self.url = data.get("url")
 483 |         self.asin = data.get("asin")
 484 |         self.price = data.get("price")
 485 |         self.title = data.get("title")
 486 |         self.rating = data.get("rating")
 487 |         self.currency = data.get("currency")
 488 |         self.url_image = data.get("url_image")
 489 |         self.best_seller = data.get("best_seller")
 490 |         self.price_upper = data.get("price_upper")
 491 |         self.is_sponsored = data.get("is_sponsored")
 492 |         self.manufacturer = data.get("manufacturer")
 493 |         self.pricing_count = data.get("pricing_count")
 494 |         self.reviews_count = data.get("reviews_count")
 495 |         self.is_amazons_choice = data.get("is_amazons_choice")
 496 |         self.pos = data.get("pos")
 497 |         self.sales_volume = data.get("sales_volume")
 498 |         self.no_price_reason = data.get("no_price_reason")
 499 | 
 500 | 
 501 | class AmazonProductAds:
 502 |     def __init__(self, data):
 503 |         if data is None:
 504 |             data = {}
 505 |         self.raw = data
 506 |         self.pos = data.get("pos")
 507 |         self.asin = data.get("asin")
 508 |         self.type = data.get("type")
 509 |         self.price = data.get("price")
 510 |         self.title = data.get("title")
 511 |         self.images = data.get("images", [])
 512 |         self.rating = data.get("rating")
 513 |         self.location = data.get("location")
 514 |         self.price_upper = data.get("price_upper")
 515 |         self.reviews_count = data.get("reviews_count")
 516 |         self.is_prime_eligible = data.get("is_prime_eligible")
 517 | 
 518 | 
 519 | class AmazonProductCategory:
 520 |     def __init__(self, data):
 521 |         if data is None:
 522 |             data = {}
 523 |         self.raw = data
 524 |         self.ladder = [
 525 |             {"url": item.get("url"), "name": item.get("name")}
 526 |             for item in data.get("ladder", [])
 527 |         ]
 528 | 
 529 | 
 530 | class AmazonProductDelivery:
 531 |     def __init__(self, data):
 532 |         if data is None:
 533 |             data = {}
 534 |         self.raw = data
 535 |         self.date = Date(data.get("date", {}))
 536 |         self.type = data.get("type")
 537 | 
 538 | 
 539 | class Date:
 540 |     def __init__(self, data):
 541 |         if data is None:
 542 |             data = {}
 543 |         self.raw = data
 544 |         self.by = data.get("by")
 545 |         self.from_date = data.get("from")
 546 | 
 547 | 
 548 | class AmazonProductSalesRank:
 549 |     def __init__(self, data):
 550 |         if data is None:
 551 |             data = {}
 552 |         self.raw = data
 553 |         self.rank = data.get("rank")
 554 |         self.ladder = [
 555 |             {"url": item.get("url"), "name": item.get("name")}
 556 |             for item in data.get("ladder", [])
 557 |         ]
 558 | 
 559 | 
 560 | class ProductDetails:
 561 |     def __init__(self, data):
 562 |         if data is None:
 563 |             data = {}
 564 |         self.raw = data
 565 |         self.asin = data.get("asin")
 566 |         self.batteries = data.get("batteries")
 567 |         self.item_weight = data.get("item_weight")
 568 |         self.manufacturer = data.get("manufacturer")
 569 |         self.customer_reviews = data.get("customer_reviews")
 570 |         self.best_sellers_rank = data.get("best_sellers_rank")
 571 |         self.country_of_origin = data.get("country_of_origin")
 572 |         self.item_model_number = data.get("item_model_number")
 573 |         self.product_dimensions = data.get("product_dimensions")
 574 |         self.date_first_available = data.get("date_first_available")
 575 |         self.is_discontinued_by_manufacturer = data.get(
 576 |             "is_discontinued_by_manufacturer"
 577 |         )
 578 | 
 579 | 
 580 | class AmazonRefurbishedProduct:
 581 |     def __init__(self, data):
 582 |         if data is None:
 583 |             data = {}
 584 |         self.raw = data
 585 |         self.link = Link(data.get("link", {}))
 586 |         self.condition_title = data.get("condition_title")
 587 | 
 588 | 
 589 | class Link:
 590 |     def __init__(self, data):
 591 |         if data is None:
 592 |             data = {}
 593 |         self.raw = data
 594 |         self.url = data.get("url")
 595 |         self.title = data.get("title")
 596 | 
 597 | 
 598 | class AmazonRatingStarDistribution:
 599 |     def __init__(self, data):
 600 |         if data is None:
 601 |             data = {}
 602 |         self.raw = data
 603 |         self.rating = data.get("rating")
 604 |         self.percentage = data.get("percentage")
 605 | 
 606 | 
 607 | class AmazonReviews:
 608 |     def __init__(self, data):
 609 |         if data is None:
 610 |             data = {}
 611 |         self.raw = data
 612 |         self.id = data.get("id")
 613 |         self.title = data.get("title")
 614 |         self.author = data.get("author")
 615 |         self.rating = data.get("rating")
 616 |         self.content = data.get("content")
 617 |         self.timestamp = data.get("timestamp")
 618 |         self.is_verified = data.get("is_verified")
 619 |         self.product_attributes = data.get("product_attributes")
 620 | 
 621 | 
 622 | class AmazonQuestions:
 623 |     def __init__(self, data):
 624 |         if data is None:
 625 |             data = {}
 626 |         self.raw = data
 627 |         self.title = data.get("title")
 628 |         self.votes = data.get("votes")
 629 |         self.answers = [Answer(item) for item in data.get("answers", [])]
 630 | 
 631 | 
 632 | class Answer:
 633 |     def __init__(self, data):
 634 |         if data is None:
 635 |             data = {}
 636 |         self.raw = data
 637 |         self.author = data.get("author")
 638 |         self.content = data.get("content")
 639 |         self.timestamp = data.get("timestamp")
 640 | 
 641 | 
 642 | class RecentFeedback:
 643 |     def __init__(self, data):
 644 |         if data is None:
 645 |             data = {}
 646 |         self.raw = data
 647 |         self.feedback = data.get("feedback")
 648 |         self.rated_by = data.get("rated_by")
 649 |         self.rating_stars = data.get("rating_stars")
 650 | 
 651 | 
 652 | class FeedbackSummaryTable:
 653 |     def __init__(self, data):
 654 |         if data is None:
 655 |             data = {}
 656 |         self.raw = data
 657 |         self.counts = Counts(data.get("counts", {}))
 658 |         self.neutral = Counts(data.get("neutral", {}))
 659 |         self.negative = Counts(data.get("negative", {}))
 660 |         self.positive = Counts(data.get("positive", {}))
 661 | 
 662 | 
 663 | class Counts:
 664 |     def __init__(self, data):
 665 |         if data is None:
 666 |             data = {}
 667 |         self.raw = data
 668 |         self.thirty_days = data.get("30_days")
 669 |         self.ninety_days = data.get("90_days")
 670 |         self.all_time = data.get("all_time")
 671 |         self.twelve_months = data.get("12_months")
 672 | 
 673 | 
 674 | class Job:
 675 |     def __init__(self, data):
 676 |         if data is None:
 677 |             data = {}
 678 |         self.raw = data
 679 |         self.callback_url = data.get("callback_url")
 680 |         self.client_id = data.get("client_id")
 681 |         self.context = [Context(item) for item in data.get("context", [])]
 682 |         self.created_at = data.get("created_at")
 683 |         self.domain = data.get("domain")
 684 |         self.geo_location = data.get("geo_location")
 685 |         self.id = data.get("id")
 686 |         self.limit = data.get("limit")
 687 |         self.locale = data.get("locale")
 688 |         self.pages = data.get("pages")
 689 |         self.parse = data.get("parse")
 690 |         self.parser_type = data.get("parser_type")
 691 |         self.parsing_instructions = data.get("parsing_instructions")
 692 |         self.browser_instructions = data.get("browser_instructions")
 693 |         self.render = data.get("render")
 694 |         self.url = data.get("url")
 695 |         self.query = data.get("query")
 696 |         self.source = data.get("source")
 697 |         self.start_page = data.get("start_page")
 698 |         self.status = data.get("status")
 699 |         self.storage_type = data.get("storage_type")
 700 |         self.storage_url = data.get("storage_url")
 701 |         self.subdomain = data.get("subdomain")
 702 |         self.content_encoding = data.get("content_encoding")
 703 |         self.updated_at = data.get("updated_at")
 704 |         self.user_agent_type = data.get("user_agent_type")
 705 |         self.session_info = data.get("session_info")
 706 |         self.statuses = data.get("statuses")
 707 |         self.client_notes = data.get("client_notes")
 708 |         self.links = [JobLink(item) for item in data.get("_links", [])]
 709 | 
 710 | 
 711 | class Context:
 712 |     def __init__(self, data):
 713 |         if data is None:
 714 |             data = {}
 715 |         self.raw = data
 716 |         self.key = data.get("key")
 717 |         self.value = data.get("value")
 718 | 
 719 | 
 720 | class JobLink:
 721 |     def __init__(self, data):
 722 |         if data is None:
 723 |             data = {}
 724 |         self.raw = data
 725 |         self.rel = data.get("rel")
 726 |         self.href = data.get("href")
 727 |         self.method = data.get("method")
 728 | 
 729 | class Pla:
 730 |     def __init__(self, data):
 731 |         if data is None:
 732 |             data = {}
 733 |         self.raw = data
 734 |         self.items = [PlaItem(item) for item in data.get("items", [])]
 735 |         self.pos_overall = data.get("pos_overall")
 736 | 
 737 | class PlaItem:
 738 |     def __init__(self, data):
 739 |         if data is None:
 740 |             data = {}
 741 |         self.raw = data
 742 |         self.pos = data.get("pos")
 743 |         self.url = data.get("url")
 744 |         self.price = data.get("price")
 745 |         self.title = data.get("title")
 746 |         self.seller = data.get("seller")
 747 |         self.url_image = data.get("url_image")
 748 |         self.image_data = data.get("image_data")
 749 | 
 750 | class Image:
 751 |     def __init__(self, data):
 752 |         if data is None:
 753 |             data = {}
 754 |         self.items = [ImageItem(item) for item in data.get("items", [])]
 755 |         self.pos_overall = data.get("pos_overall")
 756 | 
 757 | 
 758 | class ImageItem:
 759 |     def __init__(self, data):
 760 |         if data is None:
 761 |             data = {}
 762 |         self.raw = data
 763 |         self.alt = data.get("alt")
 764 |         self.pos = data.get("pos")
 765 |         self.url = data.get("url")
 766 |         self.data = data.get("data")
 767 |         self.source = data.get("source")
 768 | 
 769 | 
 770 | class OrganicSitelinks:
 771 |     def __init__(self, data):
 772 |         if data is None:
 773 |             data = {}
 774 |         self.raw = data
 775 |         self.expanded = [Expanded(item) for item in data.get("expanded", [])]
 776 |         self.inline = [Inline(item) for item in data.get("inline", [])]
 777 | 
 778 | 
 779 | class Twitter:
 780 |     def __init__(self, data):
 781 |         if data is None:
 782 |             data = {}
 783 |         self.raw = data
 784 |         self.pos = data.get("pos")
 785 |         self.url = data.get("url")
 786 |         self.items = [TwitterItem(item) for item in data.get("items", [])]
 787 |         self.title = data.get("title")
 788 |         self.pos_overall = data.get("pos_overall")
 789 | 
 790 | 
 791 | class TwitterItem:
 792 |     def __init__(self, data):
 793 |         if data is None:
 794 |             data = {}
 795 |         self.raw = data
 796 |         self.pos = data.get("pos")
 797 |         self.url = data.get("url")
 798 |         self.content = data.get("content")
 799 |         self.time_frame = data.get("time_frame")
 800 | 
 801 | 
 802 | class Knowledge:
 803 |     def __init__(self, data):
 804 |         if data is None:
 805 |             data = {}
 806 |         self.raw = data
 807 |         self.title = data.get("title")
 808 |         self.images = [item for item in data.get("images", [])]
 809 |         self.factoids = [Factoid(item) for item in data.get("factoids", [])]
 810 |         self.profiles = [Profile(item) for item in data.get("profiles", [])]
 811 |         self.subtitle = data.get("subtitle")
 812 |         self.description = data.get("description")
 813 |         self.related_searches = [
 814 |             RelatedSearches(item) for item in data.get("related_searches", [])
 815 |         ]
 816 | 
 817 | 
 818 | class Factoid:
 819 |     def __init__(self, data):
 820 |         if data is None:
 821 |             data = {}
 822 |         self.raw = data
 823 |         self.links = [LinkElement(item) for item in data.get("links", [])]
 824 |         self.title = data.get("title")
 825 |         self.content = data.get("content")
 826 | 
 827 | 
 828 | class LinkElement:
 829 |     def __init__(self, data):
 830 |         if data is None:
 831 |             data = {}
 832 |         self.raw = data
 833 |         self.href = data.get("href")
 834 |         self.title = data.get("title")
 835 | 
 836 | 
 837 | class Profile:
 838 |     def __init__(self, data):
 839 |         if data is None:
 840 |             data = {}
 841 |         self.raw = data
 842 |         self.url = data.get("url")
 843 |         self.title = data.get("title")
 844 | 
 845 | 
 846 | class RelatedSearches:
 847 |     def __init__(self, data):
 848 |         if data is None:
 849 |             data = {}
 850 |         self.raw = data
 851 |         self.url = data.get("url")
 852 |         self.title = data.get("title")
 853 |         self.section_title = data.get("section_title")
 854 |         self.pos_overall = data.get("pos_overall")
 855 |         self.related_searches = [
 856 |             item for item in data.get("related_searches", [])
 857 |         ]
 858 | 
 859 | 
 860 | class LocalPack:
 861 |     def __init__(self, data):
 862 |         if data is None:
 863 |             data = {}
 864 |         self.raw = data
 865 |         self.items = [LocalPackItem(item) for item in data.get("items", [])]
 866 |         self.pos_overall = data.get("pos_overall")
 867 | 
 868 | 
 869 | class LocalPackItem:
 870 |     def __init__(self, data):
 871 |         if data is None:
 872 |             data = {}
 873 |         self.raw = data
 874 |         self.cid = data.get("cid")
 875 |         self.pos = data.get("pos")
 876 |         self.links = [LocalPackLink(item) for item in data.get("links", [])]
 877 |         self.phone = data.get("phone")
 878 |         self.title = data.get("title")
 879 |         self.rating = data.get("rating")
 880 |         self.address = data.get("address")
 881 |         self.subtitle = data.get("subtitle")
 882 |         self.rating_count = data.get("rating_count")
 883 | 
 884 | 
 885 | class LocalPackLink:
 886 |     def __init__(self, data):
 887 |         if data is None:
 888 |             data = {}
 889 |         self.raw = data
 890 |         self.href = data.get("href")
 891 |         self.title = data.get("title")
 892 | 
 893 | 
 894 | class TopStory:
 895 |     def __init__(self, data):
 896 |         if data is None:
 897 |             data = {}
 898 |         self.raw = data
 899 |         self.items = [TopStoryItem(item) for item in data.get("items", [])]
 900 |         self.pos_overall = data.get("pos_overall")
 901 | 
 902 | 
 903 | class TopStoryItem:
 904 |     def __init__(self, data):
 905 |         if data is None:
 906 |             data = {}
 907 |         self.pos = data.get("pos")
 908 |         self.url = data.get("url")
 909 |         self.title = data.get("title")
 910 |         self.source = data.get("source")
 911 |         self.time_frame = data.get("time_frame")
 912 | 
 913 | 
 914 | class PopularProducts:
 915 |     def __init__(self, data):
 916 |         if data is None:
 917 |             data = {}
 918 |         self.raw = data
 919 |         self.pos = data.get("pos")
 920 |         self.price = data.get("price")
 921 |         self.rating = data.get("rating")
 922 |         self.seller = data.get("seller")
 923 |         self.title = data.get("title")
 924 |         self.image_data = data.get("image_data")
 925 | 
 926 | class RelatedQuestions:
 927 |     def __init__(self, data):
 928 |         if data is None:
 929 |             data = {}
 930 |         self.raw = data
 931 |         self.pos_overall = data.get("pos_overall")
 932 |         self.related_questions = [
 933 |             RelatedQuestionsItem(item)
 934 |             for item in data.get("related_questions", [])
 935 |         ]
 936 | 
 937 | 
 938 | class RelatedQuestionsItem:
 939 |     def __init__(self, data):
 940 |         if data is None:
 941 |             data = {}
 942 |         self.raw = data
 943 |         self.pos = data.get("pos")
 944 |         self.answer = data.get("answer")
 945 |         self.source = data.get("source")
 946 |         self.question = data.get("question")
 947 | 
 948 | 
 949 | class Source:
 950 |     def __init__(self, data):
 951 |         if data is None:
 952 |             data = {}
 953 |         self.raw = data
 954 |         self.url = data.get("url")
 955 |         self.title = data.get("title")
 956 |         self.url_shown = data.get("url_shown")
 957 | 
 958 | class SearchInformationImage:
 959 |     def __init__(self, data):
 960 |         if data is None:
 961 |             data = {}
 962 |         self.raw = data
 963 |         self.url = data.get("url")
 964 |         self.width = data.get("width")
 965 |         self.height = data.get("height")
 966 |         self.other_sizes = data.get("other_sizes")
 967 | 
 968 | 
 969 | class ItemCarousel:
 970 |     def __init__(self, data):
 971 |         if data is None:
 972 |             data = {}
 973 |         self.raw = data
 974 |         self.items = [ItemCarouselItem(item) for item in data.get("items", [])]
 975 |         self.pos_overall = data.get("pos_overall")
 976 |         self.title = data.get("title")
 977 | 
 978 | 
 979 | class ItemCarouselItem:
 980 |     def __init__(self, data):
 981 |         if data is None:
 982 |             data = {}
 983 |         self.raw = data
 984 |         self.pos = data.get("pos")
 985 |         self.href = data.get("href")
 986 |         self.title = data.get("title")
 987 |         self.subtitle = data.get("subtitle")
 988 | 
 989 | 
 990 | class Recipes:
 991 |     def __init__(self, data):
 992 |         if data is None:
 993 |             data = {}
 994 |         self.raw = data
 995 |         self.items = [RecipesItem(item) for item in data.get("items", [])]
 996 |         self.pos_overall = data.get("pos_overall")
 997 | 
 998 | 
 999 | class RecipesItem:
1000 |     def __init__(self, data):
1001 |         if data is None:
1002 |             data = {}
1003 |         self.raw = data
1004 |         self.pos = data.get("pos")
1005 |         self.url = data.get("url")
1006 |         self.title = data.get("title")
1007 |         self.rating = data.get("rating")
1008 |         self.source = data.get("source")
1009 |         self.duration = data.get("duration")
1010 | 
1011 | 
1012 | class Videos:
1013 |     def __init__(self, data):
1014 |         if data is None:
1015 |             data = {}
1016 |         self.raw = data
1017 |         self.items = [VideosItem(item) for item in data.get("items", [])]
1018 |         self.pos_overall = data.get("pos_overall")
1019 | 
1020 | 
1021 | class VideosItem:
1022 |     def __init__(self, data):
1023 |         if data is None:
1024 |             data = {}
1025 |         self.raw = data
1026 |         self.pos = data.get("pos")
1027 |         self.url = data.get("url")
1028 |         self.title = data.get("title")
1029 |         self.author = data.get("author")
1030 |         self.source = data.get("source")
1031 | 
1032 | 
1033 | class FeaturedSnippet:
1034 |     def __init__(self, data):
1035 |         if data is None:
1036 |             data = {}
1037 |         self.raw = data
1038 |         self.url = data.get("url")
1039 |         self.desc = data.get("desc")
1040 |         self.title = data.get("title")
1041 |         self.url_shown = data.get("url_shown")
1042 |         self.pos_overall = data.get("pos_overall")
1043 | 
1044 | 
1045 | class RelatedSearchesCategorized:
1046 |     def __init__(self, data):
1047 |         if data is None:
1048 |             data = {}
1049 |         self.raw = data
1050 |         self.items = [
1051 |             RelatedSearchesCategorizedItem(item)
1052 |             for item in data.get("items", [])
1053 |         ]
1054 |         self.category = data.get("category")
1055 |         self.pos_overall = data.get("pos_overall")
1056 | 
1057 | 
1058 | class RelatedSearchesCategorizedItem:
1059 |     def __init__(self, data):
1060 |         if data is None:
1061 |             data = {}
1062 |         self.raw = data
1063 |         self.url = data.get("url")
1064 |         self.title = data.get("title")
1065 | 
1066 | 
1067 | class Category:
1068 |     def __init__(self, data):
1069 |         if data is None:
1070 |             data = {}
1071 |         self.raw = data
1072 |         self.name = data.get("name")
1073 |         self.type = data.get("type")
1074 | 
1075 | 
1076 | class Hotels:
1077 |     def __init__(self, data):
1078 |         if data is None:
1079 |             data = {}
1080 |         self.raw = data
1081 |         self.date_to = data.get("date_to")
1082 |         self.results = [HotelsResult(item) for item in data.get("results", [])]
1083 |         self.date_from = data.get("date_from")
1084 |         self.pos_overall = data.get("pos_overall")
1085 | 
1086 | 
1087 | class HotelsResult:
1088 |     def __init__(self, data):
1089 |         if data is None:
1090 |             data = {}
1091 |         self.raw = data
1092 |         self.price = data.get("price")
1093 |         self.title = data.get("title")
1094 |         self.from_location = data.get("from")
1095 | 
1096 | 
1097 | class Flights:
1098 |     def __init__(self, data):
1099 |         if data is None:
1100 |             data = {}
1101 |         self.raw = data
1102 |         self.to = data.get("to")
1103 |         self.from_location = data.get("from")
1104 |         self.results = [
1105 |             FlightsResult(item) for item in data.get("results", [])
1106 |         ]
1107 |         self.date_from = data.get("date_from")
1108 |         self.pos_overall = data.get("pos_overall")
1109 | 
1110 | 
1111 | class FlightsResult:
1112 |     def __init__(self, data):
1113 |         if data is None:
1114 |             data = {}
1115 |         self.raw = data
1116 |         self.url = data.get("url")
1117 |         self.type = data.get("type")
1118 |         self.price = data.get("price")
1119 |         self.airline = data.get("airline")
1120 |         self.duration = data.get("duration")
1121 | 
1122 | 
1123 | class VideoBox:
1124 |     def __init__(self, data):
1125 |         if data is None:
1126 |             data = {}
1127 |         self.raw = data
1128 |         self.url = data.get("url")
1129 |         self.title = data.get("title")
1130 |         self.pos_overall = data.get("pos_overall")
1131 | 
1132 | 
1133 | class LocalServiceAds:
1134 |     def __init__(self, data):
1135 |         if data is None:
1136 |             data = {}
1137 |         self.raw = data
1138 |         self.pos_overall = data.get("pos_overall")
1139 |         self.items = [
1140 |             LocalServiceAdsItem(item) for item in data.get("items", [])
1141 |         ]
1142 | 
1143 | 
1144 | class LocalServiceAdsItem:
1145 |     def __init__(self, data):
1146 |         if data is None:
1147 |             data = {}
1148 |         self.raw = data
1149 |         self.pos = data.get("pos")
1150 |         self.url = data.get("url")
1151 |         self.title = data.get("title")
1152 |         self.rating = data.get("rating")
1153 |         self.reviews_count = data.get("reviews_count")
1154 |         self.google_guaranteed = data.get("google_guaranteed")
1155 | 
1156 | 
1157 | class Navigation:
1158 |     def __init__(self, data):
1159 |         if data is None:
1160 |             data = {}
1161 |         self.raw = data
1162 |         self.url = data.get("url")
1163 |         self.title = data.get("title")
1164 |         self.pos = data.get("pos")
1165 | 
1166 | 
1167 | class InstantAnswers:
1168 |     def __init__(self, data):
1169 |         if data is None:
1170 |             data = {}
1171 |         self.raw = data
1172 |         self.type = data.get("type")
1173 |         self.parsed = data.get("_parsed")
1174 |         self.pos_overall = data.get("pos_overall")
1175 | 
1176 | 
1177 | class VisuallySimilarImages:
1178 |     def __init__(self, data):
1179 |         if data is None:
1180 |             data = {}
1181 |         self.raw = data
1182 |         self.all_images_url = data.get("all_images_url")
1183 |         self.featured_images = data.get("featured_images")
1184 | 


--------------------------------------------------------------------------------