├── .gitignore
├── LICENSE
├── MANIFEST.in
├── dribbble_py
    ├── __init__.py
    ├── cli.py
    ├── dribbble_user.py
    ├── silent_selector.py
    └── utils.py
├── readme.md
├── requirements.txt
├── setup.py
└── test
    ├── __init__.py
    └── test_dribbble_user.py


/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | dribbble_py.egg-info
4 | proto
5 | venv
6 | __pycache__
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 rand-net
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | 


--------------------------------------------------------------------------------
/dribbble_py/__init__.py:
--------------------------------------------------------------------------------
1 | from .dribbble_user import *
2 | 


--------------------------------------------------------------------------------
/dribbble_py/cli.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | import argparse
  4 | import textwrap
  5 | from art import tprint
  6 | 
  7 | from .dribbble_user import *
  8 | 
  9 | __version__ = "0.0.1"
 10 | 
 11 | 
 12 | t1 = time.perf_counter()
 13 | 
 14 | 
 15 | def main(argv=None):
 16 |     argv = sys.argv if argv is None else argv
 17 |     argparser = argparse.ArgumentParser(
 18 |         prog="drbl_py",
 19 |         formatter_class=argparse.RawTextHelpFormatter,
 20 |         description=textwrap.dedent(
 21 |             """
 22 |             Dribbble-py 0.0.1\n
 23 |             Program to scrape dribbble user information\n
 24 |              """
 25 |         ),
 26 |         epilog="""
 27 |         Example usage
 28 |         -------------\n
 29 |         Download  info about a user.\n
 30 |             $ drbl_py -u JohnDoe\n
 31 | 
 32 |         Download info about a user to a custom JSON file.\n
 33 |             $ drbl_py -u JohnDoe -j John\n
 34 | 
 35 | 
 36 |         """,
 37 |     )
 38 | 
 39 |     # User Arguments
 40 |     # ---
 41 | 
 42 |     argparser.add_argument(
 43 |         "-u",
 44 |         "--username",
 45 |         help=textwrap.dedent(
 46 |             """Enter username to scrape.\n
 47 |                              """
 48 |         ),
 49 |         dest="username",
 50 |     )
 51 |     argparser.add_argument(
 52 |         "-m",
 53 |         "--get-metadata",
 54 |         help=textwrap.dedent(
 55 |             """Get metadata about every user shot.\nTakes longer to scrape.\nDefault = No metadata about user shots
 56 | 
 57 |                              """
 58 |         ),
 59 |         action="store_true",
 60 |     )
 61 | 
 62 |     argparser.add_argument(
 63 |         "-j",
 64 |         "--json-file",
 65 |         help=textwrap.dedent(
 66 |             """Name of output JSON filename.\nDefault = username.json\n
 67 |             """
 68 |         ),
 69 |         dest="json_file",
 70 |     )
 71 | 
 72 |     argparser.add_argument("--version", action="version", version="%(prog)s 0.0.1")
 73 |     args = argparser.parse_args()
 74 | 
 75 |     if args.username:
 76 |         # Set json filename
 77 |         if args.json_file is None:
 78 |             json_file = args.username + ".json"
 79 | 
 80 |         elif args.json_file:
 81 |             json_file = args.json_file + ".json"
 82 | 
 83 |         tprint("DRIBBBLE-PY")
 84 |         print("version {}".format(__version__))
 85 |         if args.get_metadata:
 86 |             try:
 87 |                 dribbble_user = DribbbleUser(args.username, json_file)
 88 |                 dribbble_user.check_user()
 89 |                 dribbble_user.run_nursery_with_metadata_scraper()
 90 |                 dribbble_user.export_to_json()
 91 | 
 92 |                 t2 = time.perf_counter()
 93 |                 print(f"\nScraping took {t2-t1:0.2f} second(s)...\n")
 94 |             except KeyboardInterrupt:
 95 |                 print("Exiting dribbble-py...\n")
 96 |                 sys.exit(0)
 97 |         else:
 98 |             try:
 99 |                 dribbble_user = DribbbleUser(args.username, json_file)
100 |                 dribbble_user.check_user()
101 |                 dribbble_user.run_nursery_without_metadata_scraper()
102 |                 dribbble_user.export_to_json()
103 | 
104 |                 t2 = time.perf_counter()
105 |                 print(f"\nScraping took {t2-t1:0.2f} second(s)...\n")
106 | 
107 |             except KeyboardInterrupt:
108 |                 print("Exiting dribbble-py...\n")
109 |                 sys.exit(0)
110 | 


--------------------------------------------------------------------------------
/dribbble_py/dribbble_user.py:
--------------------------------------------------------------------------------
   1 | import chompjs
   2 | import re
   3 | import json
   4 | import trio
   5 | import httpx
   6 | from datetime import datetime
   7 | from bs4 import BeautifulSoup
   8 | import sys
   9 | from dribbble_py.silent_selector import SilentSelector
  10 | from dribbble_py.utils import int_k, get_redirect_url, string_to_number
  11 | 
  12 | sys.path.append("../dribbble_py")
  13 | 
  14 | 
  15 | DRIBBBLE_URL = "https://dribbble.com"
  16 | 
  17 | 
  18 | class DribbbleUser:
  19 |     """
  20 |     Scrapes available data of a dribbble user
  21 | 
  22 |     Arguments:
  23 |         username: string
  24 |         json_file: string
  25 | 
  26 |     """
  27 | 
  28 |     def __init__(self, username: str, json_file: str):
  29 |         self.username = username
  30 | 
  31 |         # Set JSON file name
  32 |         if json_file is None:
  33 |             self.jsonf_file = username
  34 |         elif json_file is not None:
  35 |             self.json_file = json_file
  36 |         self.dribbble_user_data = {}
  37 | 
  38 |         self.join_date_format = "%b %Y"
  39 |         self.shot_published_date_format = "%b %d, %Y"
  40 |         self.preferred_time_format = "%Y-%m-%d"
  41 | 
  42 |         self.shots_per_page = 8
  43 |         self.project_shots_per_page = 8
  44 |         self.members_per_page = 6
  45 | 
  46 |         self.scraper_header = {
  47 |             "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4690.0 Safari/537.36/QInxREvS-38"
  48 |         }
  49 |         # Construct URLs for various pages
  50 |         self.user_pages = {
  51 |             "main": "/",
  52 |             "shots": "/shots",
  53 |             "about": "/about",
  54 |             "projects": "/projects",
  55 |             "goods": "/goods",
  56 |             "collections": "/collections",
  57 |             "members": "/members?page=",
  58 |         }
  59 | 
  60 |         self.user_pages = {
  61 |             key: DRIBBBLE_URL + "/" + self.username + value
  62 |             for key, value in self.user_pages.items()
  63 |         }
  64 | 
  65 |     def check_user(self) -> bool:
  66 |         """
  67 |         Check whether a dribbble user exists or not
  68 |         """
  69 |         try:
  70 |             print("\n🔍 Searching for user " + self.username + "...\n")
  71 |             user_page = httpx.get(self.user_pages["main"])
  72 |             user_page_soup = BeautifulSoup(user_page.text, "lxml")
  73 | 
  74 |             sselect = SilentSelector(user_page_soup)
  75 |             if (
  76 |                 sselect.select_one("section.message-404", False, None)
  77 |                 and sselect.select_one("section.collage-404", False, None)
  78 |                 and sselect.select_one("div.collage-404-images", False, None)
  79 |             ):
  80 | 
  81 |                 self.dribbble_user_data["user_exists"] = "No"
  82 |                 print("✗ {} not found\n".format(self.username))
  83 |             else:
  84 |                 self.dribbble_user_data["user_exists"] = "Yes"
  85 |                 print("✓ {} found\n".format(self.username))
  86 |                 print("Profile URL     : {}".format(self.user_pages["main"]))
  87 | 
  88 |         except httpx.RequestError as ex:
  89 |             print(f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}")
  90 | 
  91 |         except httpx.HTTPStatusError as ex:
  92 |             print(
  93 |                 f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
  94 |             )
  95 | 
  96 |     async def scrape_user_pages_with_metadata_nursery(self):
  97 |         """
  98 |         Scrape all available dribbble user pages with trio nursery
  99 |         """
 100 |         async with trio.open_nursery() as nursery:
 101 |             nursery.start_soon(self.scrape_main_page)
 102 |             nursery.start_soon(self.scrape_about_page)
 103 |             nursery.start_soon(self.scrape_projects_page)
 104 |             nursery.start_soon(self.scrape_goods_page)
 105 |             nursery.start_soon(self.scrape_members_page)
 106 |             nursery.start_soon(self.scrape_collections_page)
 107 |             nursery.start_soon(self.scrape_shots_with_metadata_page)
 108 | 
 109 |     def run_nursery_with_metadata_scraper(self):
 110 |         """
 111 |         Run the trio nursery for scraping pages of a dribbble user
 112 |         """
 113 |         trio.run(self.scrape_user_pages_with_metadata_nursery)
 114 | 
 115 |     async def scrape_user_pages_without_metadata_nursery(self):
 116 |         """
 117 |         Scrape all available dribbble user pages with trio nursery
 118 |         """
 119 |         async with trio.open_nursery() as nursery:
 120 |             nursery.start_soon(self.scrape_main_page)
 121 |             nursery.start_soon(self.scrape_about_page)
 122 |             nursery.start_soon(self.scrape_projects_page)
 123 |             nursery.start_soon(self.scrape_goods_page)
 124 |             nursery.start_soon(self.scrape_members_page)
 125 |             nursery.start_soon(self.scrape_collections_page)
 126 |             nursery.start_soon(self.scrape_shots_without_metadata_page)
 127 | 
 128 |     def run_nursery_without_metadata_scraper(self):
 129 |         """
 130 |         Run the trio nursery for scraping pages of a dribbble user
 131 |         """
 132 |         trio.run(self.scrape_user_pages_without_metadata_nursery)
 133 | 
 134 |     async def scrape_main_page(self):
 135 |         """
 136 |         Scrape data from the main page of a dribbble user
 137 |         """
 138 | 
 139 |         async with httpx.AsyncClient() as client:
 140 | 
 141 |             try:
 142 |                 user_page = await client.get(
 143 |                     self.user_pages["main"], headers=self.scraper_header
 144 |                 )
 145 |                 user_page_soup = BeautifulSoup(user_page.text, "lxml")
 146 |                 sselect = SilentSelector(user_page_soup)
 147 | 
 148 |                 # shots count
 149 |                 shots_count = sselect.select_one("li.shots a span.count", True, None)
 150 |                 self.dribbble_user_data["shots_count"] = string_to_number(shots_count)
 151 | 
 152 |                 # projects count
 153 |                 projects_count = sselect.select_one(
 154 |                     "li.projects a span.count", True, None
 155 |                 )
 156 |                 self.dribbble_user_data["projects_count"] = string_to_number(
 157 |                     projects_count
 158 |                 )
 159 | 
 160 |                 # collections count
 161 |                 collections_count = sselect.select_one(
 162 |                     "li.collections a span.count", True, None
 163 |                 )
 164 |                 self.dribbble_user_data["collections_count"] = string_to_number(
 165 |                     collections_count
 166 |                 )
 167 | 
 168 |                 # liked shots count
 169 |                 liked_shots = sselect.select_one("li.liked a span.count", True, None)
 170 |                 self.dribbble_user_data["liked_shots"] = string_to_number(liked_shots)
 171 | 
 172 |                 # user description
 173 |                 self.dribbble_user_data["user_description"] = sselect.select_one(
 174 |                     "div.masthead-intro h2", True, None
 175 |                 )
 176 | 
 177 |                 # hire status
 178 |                 self.dribbble_user_data["hire_status"] = bool(
 179 |                     sselect.select_one(
 180 |                         "div.hire-prompt-trigger.profile-action-item", False, None
 181 |                     )
 182 |                 )
 183 | 
 184 |                 # members count
 185 |                 members_count = sselect.select_one("li.members span.count", True, None)
 186 |                 self.dribbble_user_data["members_count"] = string_to_number(
 187 |                     members_count
 188 |                 )
 189 | 
 190 |                 # team profile
 191 |                 team_profile = sselect.select_one(
 192 |                     "div.masthead-teams a.team-avatar-link[href]", False, "href"
 193 |                 )
 194 | 
 195 |                 if team_profile is not None:
 196 |                     self.dribbble_user_data["team_url"] = DRIBBBLE_URL + team_profile
 197 |                 else:
 198 |                     self.dribbble_user_data["team_url"] = None
 199 | 
 200 |                 # print some of the info
 201 |                 print(
 202 |                     "Shots           : {}".format(
 203 |                         self.dribbble_user_data["shots_count"]
 204 |                     )
 205 |                 )
 206 |                 print(
 207 |                     "Projects        : {}".format(
 208 |                         self.dribbble_user_data["projects_count"]
 209 |                     )
 210 |                 )
 211 |                 print(
 212 |                     "Collections     : {}".format(
 213 |                         self.dribbble_user_data["collections_count"]
 214 |                     )
 215 |                 )
 216 |                 print(
 217 |                     "Liked Shots     : {}".format(
 218 |                         self.dribbble_user_data["liked_shots"]
 219 |                     )
 220 |                 )
 221 |                 print("\n✓ Main page scraped...")
 222 | 
 223 |             except httpx.RequestError as ex:
 224 |                 print(
 225 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 226 |                 )
 227 | 
 228 |             except httpx.HTTPStatusError as ex:
 229 |                 print(
 230 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 231 |                 )
 232 | 
 233 |     async def scrape_about_page(self):
 234 |         """
 235 |         Retrieves data from the about page of a dribbble user
 236 |         """
 237 | 
 238 |         async with httpx.AsyncClient() as client:
 239 |             try:
 240 |                 about_page = await client.get(
 241 |                     self.user_pages["about"], headers=self.scraper_header
 242 |                 )
 243 |                 about_page_soup = BeautifulSoup(about_page.text, "lxml")
 244 |                 sselect = SilentSelector(about_page_soup)
 245 | 
 246 |                 # profile stats - following, followers, tags
 247 |                 profile_stats = [
 248 |                     stat.find("span", class_="count").text
 249 |                     for stat in sselect.select(
 250 |                         "section.content-section.profile-stats-section.medium-screens-only a "
 251 |                     )
 252 |                 ]
 253 | 
 254 |                 # user followers
 255 |                 user_followers_count = profile_stats[0].replace(",", "")
 256 |                 self.dribbble_user_data["followers"] = string_to_number(
 257 |                     user_followers_count
 258 |                 )
 259 | 
 260 |                 # user following
 261 |                 user_following_count = profile_stats[1].replace(",", "")
 262 |                 self.dribbble_user_data["following"] = string_to_number(
 263 |                     user_following_count
 264 |                 )
 265 | 
 266 |                 # user tags
 267 |                 try:
 268 |                     self.dribbble_user_data["tags"] = profile_stats[2]
 269 |                 except IndexError:
 270 |                     self.dribbble_user_data["tags"] = None
 271 | 
 272 |                 # user location
 273 |                 self.dribbble_user_data["location"] = (
 274 |                     str(sselect.select_one("p.location", True, None))
 275 |                     .replace("\n", "")
 276 |                     .strip()
 277 |                 )
 278 | 
 279 |                 # user bio
 280 |                 self.dribbble_user_data["bio"] = str(
 281 |                     sselect.select_one("p.bio-text", True, None)
 282 |                 ).replace("\n", "")
 283 | 
 284 |                 # user pro status
 285 |                 self.dribbble_user_data["is_pro"] = bool(
 286 |                     sselect.select_one("p.info-item.pro", False, None)
 287 |                 )
 288 | 
 289 |                 # user join date
 290 |                 join_date_string = (
 291 |                     str(sselect.select_one("p.info-item.created span", True, None))
 292 |                     .replace("Member since", "")
 293 |                     .strip()
 294 |                 )
 295 |                 join_date = datetime.strptime(join_date_string, self.join_date_format)
 296 |                 self.dribbble_user_data["join_date"] = join_date.strftime(
 297 |                     self.preferred_time_format
 298 |                 )
 299 | 
 300 |                 # user skills
 301 |                 skills_list = [
 302 |                     skill.text for skill in sselect.select("ul.skills-list a")
 303 |                 ]
 304 |                 self.dribbble_user_data["skills"] = skills_list
 305 | 
 306 |                 # social media profiles
 307 |                 self.dribbble_user_data["social_media_profiles"] = {}
 308 |                 social_media_redirect_urls = [
 309 |                     DRIBBBLE_URL + anchor["href"]
 310 |                     for anchor in sselect.select("ul.social-links-list a")
 311 |                 ]
 312 | 
 313 |                 for url in social_media_redirect_urls:
 314 |                     profile_url, site = get_redirect_url(url)
 315 |                     self.dribbble_user_data["social_media_profiles"][site] = profile_url
 316 | 
 317 |                 # Print some of the info
 318 |                 print("Followers       :", self.dribbble_user_data["followers"])
 319 |                 print("Following       :", self.dribbble_user_data["following"])
 320 |                 print("Location        :", self.dribbble_user_data["location"])
 321 |                 print("Pro Status      :", self.dribbble_user_data["is_pro"])
 322 |                 print("Join Date       :", self.dribbble_user_data["join_date"])
 323 |                 print("Skills          :", self.dribbble_user_data["skills"])
 324 | 
 325 |                 print("\n✓ About page scraped...")
 326 |             except httpx.RequestError as ex:
 327 |                 print(
 328 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 329 |                 )
 330 | 
 331 |             except httpx.HTTPStatusError as ex:
 332 |                 print(
 333 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 334 |                 )
 335 | 
 336 |     async def scrape_shots_without_metadata_page(self):
 337 |         """
 338 |         Retrieves data from the shots page of a dribbble user
 339 |         """
 340 | 
 341 |         user_shots = {}
 342 | 
 343 |         async with httpx.AsyncClient() as client:
 344 |             try:
 345 |                 shots_page = await client.get(
 346 |                     self.user_pages["main"], headers=self.scraper_header
 347 |                 )
 348 |                 shots_page_soup = BeautifulSoup(shots_page.text, "lxml")
 349 |                 sselect = SilentSelector(shots_page_soup)
 350 | 
 351 |                 # total shots
 352 |                 shots_count = string_to_number(
 353 |                     sselect.select_one("li.shots a span.count", True, None)
 354 |                 )
 355 |                 user_shots["shots_count"] = shots_count
 356 |                 user_shots["shots"] = {}
 357 |                 shots_urls = []
 358 |                 shot_names = []
 359 | 
 360 |                 # number of pages to scrape
 361 |                 page_counter = 0
 362 |                 max_pages = (shots_count // self.shots_per_page) + 5
 363 | 
 364 |                 # iterate over pages
 365 |                 while page_counter <= max_pages:
 366 | 
 367 |                     current_shots_page = (
 368 |                         self.user_pages["shots"]
 369 |                         + "?page="
 370 |                         + str(page_counter)
 371 |                         + "&per_page="
 372 |                         + str(self.shots_per_page)
 373 |                     )
 374 |                     async with httpx.AsyncClient() as client_i:
 375 | 
 376 |                         # grab all shots info from current page
 377 |                         shots_page = await client_i.get(
 378 |                             current_shots_page, headers=self.scraper_header
 379 |                         )
 380 |                         shots_page_soup = BeautifulSoup(shots_page.text, "lxml")
 381 |                         sselect_shots = SilentSelector(shots_page_soup)
 382 | 
 383 |                         # loop through each found shot
 384 |                         for shot_soup in sselect_shots.find_all(
 385 |                             "li", "shot-thumbnail", None, False, None
 386 |                         ):
 387 |                             current_shot = {}
 388 | 
 389 |                             sselect_current_shot = SilentSelector(shot_soup)
 390 | 
 391 |                             # shot titles
 392 |                             current_shot_title = sselect_current_shot.select_one(
 393 |                                 "div.shot-title", True, None
 394 |                             )
 395 |                             shot_names.append(current_shot_title)
 396 | 
 397 |                             # shot URL
 398 |                             current_shot_url = DRIBBBLE_URL + str(
 399 |                                 sselect_current_shot.select_one(
 400 |                                     "a.shot-thumbnail-link", False, "href"
 401 |                                 )
 402 |                             )
 403 |                             shots_urls.append(current_shot_url)
 404 | 
 405 |                             current_shot["shot_url"] = current_shot_url
 406 | 
 407 |                             # shot alt description
 408 |                             current_shot[
 409 |                                 "alt_description"
 410 |                             ] = sselect_current_shot.select_one("img", False, "alt")
 411 |                             user_shots["shots"][current_shot_title] = current_shot
 412 |                     page_counter += 1
 413 | 
 414 |             except httpx.RequestError as ex:
 415 |                 print(
 416 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 417 |                 )
 418 | 
 419 |             except httpx.HTTPStatusError as ex:
 420 |                 print(
 421 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 422 |                 )
 423 | 
 424 |         self.dribbble_user_data["shots"] = user_shots
 425 |         print("\n✓ Shots page scraped...")
 426 | 
 427 |     async def scrape_shots_with_metadata_page(self):
 428 |         """
 429 |         Retrieves data from the shots page of a dribbble user
 430 |         """
 431 | 
 432 |         user_shots = {}
 433 | 
 434 |         async with httpx.AsyncClient() as client:
 435 |             try:
 436 |                 shots_page = await client.get(
 437 |                     self.user_pages["main"], headers=self.scraper_header
 438 |                 )
 439 |                 shots_page_soup = BeautifulSoup(shots_page.text, "lxml")
 440 |                 sselect = SilentSelector(shots_page_soup)
 441 | 
 442 |                 # total shots
 443 |                 shots_count = string_to_number(
 444 |                     sselect.select_one("li.shots a span.count", True, None)
 445 |                 )
 446 |                 user_shots["shots_count"] = shots_count
 447 |                 user_shots["shots"] = {}
 448 |                 shots_urls = []
 449 |                 shot_names = []
 450 | 
 451 |                 # number of pages to scrape
 452 |                 page_counter = 0
 453 |                 max_pages = (shots_count // self.shots_per_page) + 5
 454 | 
 455 |                 # iterate over pages
 456 |                 while page_counter <= max_pages:
 457 | 
 458 |                     current_shots_page = (
 459 |                         self.user_pages["shots"]
 460 |                         + "?page="
 461 |                         + str(page_counter)
 462 |                         + "&per_page="
 463 |                         + str(self.shots_per_page)
 464 |                     )
 465 |                     async with httpx.AsyncClient() as client_i:
 466 | 
 467 |                         # grab all shots info from current page
 468 |                         shots_page = await client_i.get(
 469 |                             current_shots_page, headers=self.scraper_header
 470 |                         )
 471 |                         shots_page_soup = BeautifulSoup(shots_page.text, "lxml")
 472 |                         sselect_shots = SilentSelector(shots_page_soup)
 473 | 
 474 |                         # loop through each found shot
 475 |                         for shot_soup in sselect_shots.find_all(
 476 |                             "li", "shot-thumbnail", None, False, None
 477 |                         ):
 478 |                             current_shot = {}
 479 | 
 480 |                             sselect_current_shot = SilentSelector(shot_soup)
 481 | 
 482 |                             # shot titles
 483 |                             current_shot_title = sselect_current_shot.select_one(
 484 |                                 "div.shot-title", True, None
 485 |                             )
 486 |                             shot_names.append(current_shot_title)
 487 | 
 488 |                             # shot URL
 489 |                             current_shot_url = DRIBBBLE_URL + str(
 490 |                                 sselect_current_shot.select_one(
 491 |                                     "a.shot-thumbnail-link", False, "href"
 492 |                                 )
 493 |                             )
 494 |                             shots_urls.append(current_shot_url)
 495 | 
 496 |                             current_shot["shot_url"] = current_shot_url
 497 | 
 498 |                             # shot alt description
 499 |                             current_shot[
 500 |                                 "alt_description"
 501 |                             ] = sselect_current_shot.select_one("img", False, "alt")
 502 |                             user_shots["shots"][current_shot_title] = current_shot
 503 |                     page_counter += 1
 504 | 
 505 |                 # Get more data about the shots
 506 |                 user_shots = await self.get_shots_data(
 507 |                     shots_urls, shot_names, user_shots["shots"]
 508 |                 )
 509 | 
 510 |             except httpx.RequestError as ex:
 511 |                 print(
 512 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 513 |                 )
 514 | 
 515 |             except httpx.HTTPStatusError as ex:
 516 |                 print(
 517 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 518 |                 )
 519 | 
 520 |         self.dribbble_user_data["shots"] = user_shots
 521 |         print("\n✓ About page scraped...")
 522 | 
 523 |     async def scrape_projects_page(self):
 524 |         """
 525 |         Retrieves data from the projects' page of a dribbble user
 526 |         """
 527 |         user_projects = {}
 528 | 
 529 |         async with httpx.AsyncClient() as client:
 530 |             try:
 531 |                 # scrape projects page
 532 |                 projects_page = await client.get(
 533 |                     self.user_pages["projects"], headers=self.scraper_header
 534 |                 )
 535 |                 projects_page_soup = BeautifulSoup(projects_page.text, "lxml")
 536 |                 sselect = SilentSelector(projects_page_soup)
 537 | 
 538 |                 # project titles
 539 |                 project_titles = [
 540 |                     str(project.text).strip()
 541 |                     for project in sselect.select("div.collection-name")
 542 |                 ]
 543 | 
 544 |                 # project shot count
 545 |                 project_shots_count = [
 546 |                     int(
 547 |                         str(project_shots.text)
 548 |                         .replace("Shots", "")
 549 |                         .replace("Shot", "")
 550 |                         .strip()
 551 |                     )
 552 |                     for project_shots in sselect.select(
 553 |                         "div.shots-group-meta>span.shots-count"
 554 |                     )
 555 |                 ]
 556 | 
 557 |                 # project updated date
 558 |                 project_updated_dates = [
 559 |                     datetime.strptime(
 560 |                         str(project_updated_date.text).replace("Updated", "").strip(),
 561 |                         "%B %d, %Y",
 562 |                     ).strftime(self.preferred_time_format)
 563 |                     for project_updated_date in sselect.select("span.timestamp")
 564 |                 ]
 565 | 
 566 |                 # project urls
 567 |                 project_urls = [
 568 |                     DRIBBBLE_URL + str(anchor["href"])
 569 |                     for anchor in sselect.select("a.shots-group")
 570 |                 ]
 571 | 
 572 |                 # retrieve data about each project and its shots
 573 |                 for (
 574 |                     project_title,
 575 |                     project_url,
 576 |                     shots_count,
 577 |                     project_updated_date,
 578 |                 ) in zip(
 579 |                     project_titles,
 580 |                     project_urls,
 581 |                     project_shots_count,
 582 |                     project_updated_dates,
 583 |                 ):
 584 | 
 585 |                     max_pages = (shots_count // self.project_shots_per_page) + 1
 586 |                     page_number = 1
 587 | 
 588 |                     project_shots = {}
 589 | 
 590 |                     # loop through all pages of a projects
 591 |                     while page_number <= max_pages:
 592 |                         project_page_url = project_url + "?page=" + str(page_number)
 593 |                         page_number += 1
 594 | 
 595 |                         # get current project page soup
 596 |                         async with httpx.AsyncClient() as client_i:
 597 |                             try:
 598 |                                 individual_project_page = await client_i.get(
 599 |                                     project_page_url
 600 |                                 )
 601 |                                 individual_project_page_soup = BeautifulSoup(
 602 |                                     individual_project_page.text, "lxml"
 603 |                                 )
 604 |                                 sselect_project = SilentSelector(
 605 |                                     individual_project_page_soup
 606 |                                 )
 607 | 
 608 |                                 # loop though each found shot
 609 |                                 for shot_soup in sselect_project.find_all(
 610 |                                     "div", "shot-section-item", None, False, None
 611 |                                 ):
 612 | 
 613 |                                     current_shot = {}
 614 |                                     sselect_shot = SilentSelector(shot_soup)
 615 | 
 616 |                                     # shot title
 617 |                                     current_shot_title = sselect_shot.select_one(
 618 |                                         "h3.shot-title a", True, None
 619 |                                     )
 620 | 
 621 |                                     # shot published date
 622 |                                     shot_pub_date = sselect_shot.select_one(
 623 |                                         "p.shot-date", True, None
 624 |                                     )
 625 |                                     current_shot["shot_pub_date"] = datetime.strptime(
 626 |                                         shot_pub_date, "%B %d, %Y"
 627 |                                     ).strftime(self.preferred_time_format)
 628 | 
 629 |                                     # shot description
 630 |                                     current_shot[
 631 |                                         "shot_description"
 632 |                                     ] = sselect_shot.select_one(
 633 |                                         "p.shot-description", True, None
 634 |                                     )
 635 | 
 636 |                                     # shot URL
 637 |                                     current_shot[
 638 |                                         "shot_url"
 639 |                                     ] = DRIBBBLE_URL + sselect_shot.select_one(
 640 |                                         "a.shot-link", False, "href"
 641 |                                     )
 642 | 
 643 |                                     project_shots[current_shot_title] = current_shot
 644 | 
 645 |                                 # Assign the projects' shots to the project
 646 |                                 user_projects[project_title] = {}
 647 |                                 user_projects[project_title][
 648 |                                     "updated_date"
 649 |                                 ] = project_updated_date
 650 |                                 user_projects[project_title]["shots"] = project_shots
 651 | 
 652 |                             except httpx.RequestError as ex:
 653 |                                 print(
 654 |                                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 655 |                                 )
 656 | 
 657 |                             except httpx.HTTPStatusError as ex:
 658 |                                 print(
 659 |                                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 660 |                                 )
 661 | 
 662 |             except httpx.RequestError as ex:
 663 |                 print(
 664 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 665 |                 )
 666 | 
 667 |             except httpx.HTTPStatusError as ex:
 668 |                 print(
 669 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 670 |                 )
 671 | 
 672 |         # Add projects to main dict
 673 |         self.dribbble_user_data["projects"] = user_projects
 674 |         print("\n✓ Projects page scraped...")
 675 | 
 676 |     async def scrape_collections_page(self):
 677 |         """
 678 |         Retrieves data from the collections' page of a dribbble user
 679 |         """
 680 |         user_collections = {}
 681 | 
 682 |         async with httpx.AsyncClient() as client:
 683 | 
 684 |             try:
 685 |                 collections_page = await client.get(
 686 |                     self.user_pages["collections"], headers=self.scraper_header
 687 |                 )
 688 |                 collections_page_soup = BeautifulSoup(collections_page.text, "lxml")
 689 |                 sselect = SilentSelector(collections_page_soup)
 690 | 
 691 |                 # loop through each collection
 692 |                 for collection in sselect.find_all(
 693 |                     "li", "shots-group-item", None, False, None
 694 |                 ):
 695 |                     current_collection = {}
 696 | 
 697 |                     sselect_collection = SilentSelector(collection)
 698 | 
 699 |                     # collections' name
 700 |                     current_collection_name = str(
 701 |                         sselect_collection.find(
 702 |                             "div", "collection-name", None, True, None
 703 |                         )
 704 |                     ).strip()
 705 | 
 706 |                     # collections' shots count
 707 |                     shots_count = str(
 708 |                         sselect_collection.find("span", "shots-count", None, True, None)
 709 |                     ).strip()
 710 |                     shots_count = str(re.sub("Shot*.", "", shots_count)).strip()
 711 |                     current_collection["shots_count"] = int(shots_count)
 712 | 
 713 |                     # collections' designer Count
 714 |                     designer_count = str(
 715 |                         sselect_collection.find(
 716 |                             "span", "designers-count", None, True, None
 717 |                         )
 718 |                     ).strip()
 719 |                     designer_count = str(
 720 |                         re.sub("Designer*.", "", designer_count)
 721 |                     ).strip()
 722 |                     current_collection["designers_count"] = int(designer_count)
 723 | 
 724 |                     # collections' URL
 725 |                     collection_url = DRIBBBLE_URL + sselect_collection.find(
 726 |                         "a", "shots-group", None, False, "href"
 727 |                     )
 728 |                     current_collection["collection_url"] = collection_url
 729 | 
 730 |                     # assign the collections' data to dict
 731 |                     user_collections[current_collection_name] = current_collection
 732 | 
 733 |                     # get current collections' page soup
 734 |                     async with httpx.AsyncClient() as client_ii:
 735 | 
 736 |                         try:
 737 |                             collection_shots_page = await client_ii.get(
 738 |                                 collection_url, headers=self.scraper_header
 739 |                             )
 740 |                             collection_shots_page_soup = BeautifulSoup(
 741 |                                 collection_shots_page.text, "lxml"
 742 |                             )
 743 |                             user_collections[current_collection_name]["shots"] = {}
 744 | 
 745 |                             sselect_current_collection = SilentSelector(
 746 |                                 collection_shots_page_soup
 747 |                             )
 748 | 
 749 |                             # loop through each found shot
 750 |                             for shot in sselect_current_collection.find_all(
 751 |                                 "li", "shot-thumbnail", None, False, None
 752 |                             ):
 753 | 
 754 |                                 current_shot = {}
 755 |                                 sselect_shot = SilentSelector(shot)
 756 | 
 757 |                                 # shot title
 758 |                                 shot_title = str(
 759 |                                     sselect_shot.find(
 760 |                                         "div", "shot-title", None, True, None
 761 |                                     )
 762 |                                 ).strip()
 763 | 
 764 |                                 # shot designer profile URL
 765 |                                 current_shot[
 766 |                                     "designer_profile_url"
 767 |                                 ] = DRIBBBLE_URL + str(
 768 |                                     sselect_shot.select_one(
 769 |                                         "a.hoverable.url", False, "href"
 770 |                                     )
 771 |                                 )
 772 |                                 # shot designer username
 773 |                                 current_shot["designer_name"] = str(
 774 |                                     sselect_shot.find(
 775 |                                         "span", "display-name", None, True, None
 776 |                                     )
 777 |                                 ).strip()
 778 | 
 779 |                                 # shot likes
 780 |                                 shot_likes = str(
 781 |                                     sselect_shot.find(
 782 |                                         "span", "js-shot-likes-count", None, True, None
 783 |                                     )
 784 |                                 ).strip()
 785 |                                 if "k" in shot_likes or "K" in shot_likes:
 786 |                                     current_shot["shot_likes"] = int_k(shot_likes)
 787 |                                 else:
 788 |                                     current_shot["shot_likes"] = int(shot_likes)
 789 | 
 790 |                                 # shot views
 791 |                                 shot_views = str(
 792 |                                     (
 793 |                                         sselect_shot.find(
 794 |                                             "span",
 795 |                                             "js-shot-views-count",
 796 |                                             None,
 797 |                                             True,
 798 |                                             None,
 799 |                                         )
 800 |                                     )
 801 |                                 ).strip()
 802 | 
 803 |                                 if "k" in shot_views or "K" in shot_views:
 804 |                                     current_shot["shot_views"] = int_k(shot_views)
 805 |                                 else:
 806 |                                     current_shot["shot_views"] = int(shot_views)
 807 | 
 808 |                                 # designer pro status
 809 |                                 if (
 810 |                                     sselect_shot.find(
 811 |                                         "span", "badge-pro", None, False, None
 812 |                                     )
 813 |                                     is not None
 814 |                                 ):
 815 |                                     is_pro = True
 816 |                                 else:
 817 |                                     is_pro = False
 818 |                                 current_shot["is_pro"] = is_pro
 819 | 
 820 |                                 # shot URL
 821 |                                 current_shot["shot_url"] = sselect_shot.find(
 822 |                                     "img", None, None, False, "src"
 823 |                                 )
 824 | 
 825 |                                 # assign current collection dict to collections
 826 |                                 user_collections[current_collection_name]["shots"][
 827 |                                     shot_title
 828 |                                 ] = current_shot
 829 | 
 830 |                         except httpx.RequestError as ex:
 831 |                             print(
 832 |                                 f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 833 |                             )
 834 | 
 835 |                         except httpx.HTTPStatusError as ex:
 836 |                             print(
 837 |                                 f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 838 |                             )
 839 | 
 840 |             except httpx.RequestError as ex:
 841 |                 print(
 842 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 843 |                 )
 844 | 
 845 |             except httpx.HTTPStatusError as ex:
 846 |                 print(
 847 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 848 |                 )
 849 | 
 850 |         self.dribbble_user_data["collections"] = user_collections
 851 |         print("\n✓ Collections page scraped...")
 852 | 
 853 |     async def scrape_members_page(self):
 854 |         """
 855 |         Retrieves data from the members' page of a dribbble user
 856 |         """
 857 | 
 858 |         user_members = {}
 859 | 
 860 |         async with httpx.AsyncClient() as client:
 861 |             try:
 862 | 
 863 |                 # get members count
 864 |                 member_page = await client.get(
 865 |                     self.user_pages["main"], headers=self.scraper_header
 866 |                 )
 867 |                 member_page_soup = BeautifulSoup(member_page.text, "lxml")
 868 |                 sselect = SilentSelector(member_page_soup)
 869 | 
 870 |                 members_count = sselect.select_one(
 871 |                     "li.members a span.count", True, None
 872 |                 )
 873 | 
 874 |                 if members_count is not None:
 875 |                     members_count = int(members_count)
 876 |                 else:
 877 |                     members_count = 0
 878 | 
 879 |                 # scrape if members are available
 880 |                 if members_count > 0:
 881 |                     member_page_count = 1
 882 | 
 883 |                     if members_count < self.members_per_page:
 884 |                         max_pages = 1
 885 |                     elif members_count > self.members_per_page:
 886 |                         max_pages = (members_count // self.members_per_page) + 1
 887 | 
 888 |                     while member_page_count <= max_pages:
 889 | 
 890 |                         # construct members page URL
 891 |                         current_user_members_page_url = (
 892 |                             self.user_pages["members"]
 893 |                             + str(member_page_count)
 894 |                             + "&per_page="
 895 |                             + str(self.members_per_page)
 896 |                         )
 897 | 
 898 |                         # get current member page soup
 899 |                         async with httpx.AsyncClient() as client:
 900 |                             try:
 901 |                                 members_page = await client.get(
 902 |                                     current_user_members_page_url,
 903 |                                     headers=self.scraper_header,
 904 |                                 )
 905 |                                 members_page_soup = BeautifulSoup(
 906 |                                     members_page.text, "lxml"
 907 |                                 )
 908 |                                 sselect = SilentSelector(members_page_soup)
 909 | 
 910 |                                 # loop through each found member
 911 |                                 if sselect.find_all(
 912 |                                     "li", "scrolling-row", None, False, None
 913 |                                 ):
 914 |                                     for member in sselect.find_all(
 915 |                                         "li", "scrolling-row", None, False, None
 916 |                                     ):
 917 |                                         current_member = {}
 918 | 
 919 |                                         sselect_member = SilentSelector(member)
 920 | 
 921 |                                         # member username
 922 |                                         member_username = str(
 923 |                                             sselect_member.select_one(
 924 |                                                 "span.designer-card-username a.designer-link",
 925 |                                                 False,
 926 |                                                 "href",
 927 |                                             )
 928 |                                         ).replace("/", "")
 929 | 
 930 |                                         # member profile URL
 931 |                                         current_member["profile_url"] = (
 932 |                                             DRIBBBLE_URL + "/" + member_username
 933 |                                         )
 934 | 
 935 |                                         # member pofile name
 936 |                                         profile_name = sselect_member.select_one(
 937 |                                             "span.designer-card-username a.designer-link",
 938 |                                             True,
 939 |                                             None,
 940 |                                         )
 941 | 
 942 |                                         current_member["profile_name"] = profile_name
 943 | 
 944 |                                         # member location
 945 |                                         current_member[
 946 |                                             "location"
 947 |                                         ] = sselect_member.select_one(
 948 |                                             "span.designer-card-location", True, None
 949 |                                         )
 950 | 
 951 |                                         # member pro status
 952 |                                         current_member["is_pro"] = bool(
 953 |                                             sselect_member.select_one(
 954 |                                                 "span.badge.badge-pro", False, None
 955 |                                             )
 956 |                                         )
 957 | 
 958 |                                         user_members[member_username] = current_member
 959 |                                     member_page_count += 1
 960 |                                 else:
 961 |                                     break
 962 | 
 963 |                             except httpx.RequestError as ex:
 964 |                                 print(
 965 |                                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 966 |                                 )
 967 | 
 968 |                             except httpx.HTTPStatusError as ex:
 969 |                                 print(
 970 |                                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 971 |                                 )
 972 | 
 973 |                     user_members["members_count"] = members_count
 974 |                     self.dribbble_user_data["members"] = user_members
 975 | 
 976 |                 else:
 977 |                     self.dribbble_user_data["members"] = None
 978 | 
 979 |                 print("\n✓ Members page scraped...")
 980 |             except httpx.RequestError as ex:
 981 |                 print(
 982 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
 983 |                 )
 984 | 
 985 |             except httpx.HTTPStatusError as ex:
 986 |                 print(
 987 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
 988 |                 )
 989 | 
 990 |     async def scrape_goods_page(self):
 991 |         """
 992 |         Retrieves data from the goods' page of a dribbble user
 993 | 
 994 |         """
 995 | 
 996 |         user_goods = {}
 997 |         async with httpx.AsyncClient() as client:
 998 |             try:
 999 |                 goods_page = await client.get(
1000 |                     self.user_pages["goods"], headers=self.scraper_header
1001 |                 )
1002 |                 goods_page_soup = BeautifulSoup(goods_page.text, "lxml")
1003 |                 sselect = SilentSelector(goods_page_soup)
1004 | 
1005 |                 # goods' Names
1006 |                 goods_names = [
1007 |                     name.text
1008 |                     for name in sselect.select(
1009 |                         "div.shot-details-container>div.font-label"
1010 |                     )
1011 |                 ]
1012 | 
1013 |                 # goods' prices
1014 |                 goods_prices = [
1015 |                     str(price.text).strip()
1016 |                     for price in sselect.select(
1017 |                         "div.shot-details-container>div.price-label>span"
1018 |                     )
1019 |                 ]
1020 | 
1021 |                 # goods' urls
1022 |                 goods_urls = [
1023 |                     DRIBBBLE_URL
1024 |                     + "/shots/"
1025 |                     + str(goods_id_soup.get("data-thumbnail-id"))
1026 |                     for goods_id_soup in sselect.find_all(
1027 |                         "li", "shot-thumbnail-container", None, False, None
1028 |                     )
1029 |                 ]
1030 | 
1031 |                 for goods_url, goods_name, goods_price in zip(
1032 |                     goods_urls, goods_names, goods_prices
1033 |                 ):
1034 |                     current_user_good = {}
1035 | 
1036 |                     # Construct Goods shot URL
1037 |                     current_user_good["url"] = goods_url
1038 |                     current_user_good["price"] = goods_price
1039 |                     user_goods[goods_name] = current_user_good
1040 | 
1041 |                 # Get more data about the goods on sale
1042 |                 user_goods = await self.get_shots_data(
1043 |                     goods_urls, goods_names, user_goods
1044 |                 )
1045 | 
1046 |             except httpx.RequestError as ex:
1047 |                 print(
1048 |                     f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
1049 |                 )
1050 | 
1051 |             except httpx.HTTPStatusError as ex:
1052 |                 print(
1053 |                     f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
1054 |                 )
1055 | 
1056 |         self.dribbble_user_data["goods_for_sale"] = user_goods
1057 |         print("\n✓ Goods page scraped...")
1058 | 
1059 |     async def get_shots_data(
1060 |         self, shot_urls: list, shot_names: list, shots_dict: dict
1061 |     ) -> dict:
1062 |         """
1063 |         Retrieves data about a list of given shots
1064 |         """
1065 |         for shot_url, shot_name in zip(shot_urls, shot_names):
1066 | 
1067 |             # get current shot page HTML
1068 |             async with httpx.AsyncClient() as client:
1069 |                 try:
1070 |                     shot_page = await client.get(shot_url, headers=self.scraper_header)
1071 |                     shot_page_soup = BeautifulSoup(shot_page.text, "lxml")
1072 |                     sselect = SilentSelector(shot_page_soup)
1073 |                     current_shot_data = {}
1074 | 
1075 |                     # get shot color palette
1076 |                     shot_color_palette = [
1077 |                         color.find("a").text
1078 |                         for color in sselect.select("ul.color-chips.group li")
1079 |                     ]
1080 |                     current_shot_data["color_palette"] = shot_color_palette
1081 | 
1082 |                     # extract JSON  from script tag
1083 |                     shot_data_script = sselect.select("body script")[6]
1084 |                     shot_data_js = shot_data_script.text
1085 |                     shot_data_js = "".join(shot_data_js.split("\n")[3:])
1086 |                     shot_data_json = chompjs.parse_js_object(
1087 |                         shot_data_js, json_params={"strict": False}
1088 |                     )
1089 |                     shot_data_dict = dict(shot_data_json)
1090 | 
1091 |                     # shot metadata
1092 |                     current_shot_data["likes"] = shot_data_dict["shotData"][
1093 |                         "likesCount"
1094 |                     ]
1095 |                     shot_published_date = datetime.strptime(
1096 |                         shot_data_dict["shotData"]["postedOn"],
1097 |                         self.shot_published_date_format,
1098 |                     ).strftime(self.preferred_time_format)
1099 | 
1100 |                     current_shot_data["published_date"] = shot_published_date
1101 |                     current_shot_data["saves_count"] = shot_data_dict["shotData"][
1102 |                         "savesCount"
1103 |                     ]
1104 |                     current_shot_data["isAnimated"] = shot_data_dict["shotData"][
1105 |                         "isAnimated"
1106 |                     ]
1107 |                     current_shot_data["isAnimatedGif"] = shot_data_dict["shotData"][
1108 |                         "isAnimatedGif"
1109 |                     ]
1110 |                     current_shot_data["tags"] = shot_data_dict["shotData"]["tags"]
1111 |                     current_shot_data["views_count"] = shot_data_dict["shotData"][
1112 |                         "viewsCount"
1113 |                     ]
1114 | 
1115 |                 except httpx.RequestError as ex:
1116 |                     print(
1117 |                         f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}"
1118 |                     )
1119 | 
1120 |                 except httpx.HTTPStatusError as ex:
1121 |                     print(
1122 |                         f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}."
1123 |                     )
1124 | 
1125 |             shots_dict[shot_name]["metadata"] = current_shot_data
1126 |         return shots_dict
1127 | 
1128 |     def export_to_json(self):
1129 |         """
1130 |         Exports the scraped user data as a JSON file
1131 | 
1132 |         """
1133 | 
1134 |         # Convert dict to JSON
1135 |         dribbble_json = json.dumps(self.dribbble_user_data)
1136 | 
1137 |         # Write to JSON file
1138 |         with open(self.json_file, "w") as json_file:
1139 |             json_file.write(dribbble_json)
1140 | 
1141 |         print("\nResults saved to {}".format(self.json_file))
1142 | 


--------------------------------------------------------------------------------
/dribbble_py/silent_selector.py:
--------------------------------------------------------------------------------
  1 | class SilentSelector:
  2 |     """
  3 |     Wrapper class for handling exceptions during scraping of a page soup of bs4
  4 |     """
  5 | 
  6 |     def __init__(self, page_soup):
  7 |         self.page_soup = page_soup
  8 | 
  9 |     def select_one(self, query_string: str, get_text: bool, attribute: str):
 10 |         """
 11 |         Wrapper for select_one() of bs4 with exception handling.
 12 | 
 13 |         Arguments:
 14 |             query_string: string
 15 |             get_text: bool
 16 |             attribute: string
 17 | 
 18 |         Returns:
 19 | 
 20 | 
 21 |         """
 22 |         try:
 23 |             if get_text:
 24 |                 return self.page_soup.select_one(query_string).text
 25 | 
 26 |             elif attribute is not None:
 27 |                 return self.page_soup.select_one(query_string).get(attribute)
 28 | 
 29 |             else:
 30 |                 return self.page_soup.select_one(query_string)
 31 | 
 32 |         except (AttributeError, TypeError):
 33 |             return None
 34 | 
 35 |     def select(self, query_string: str):
 36 |         """
 37 |         Wrapper for select() of bs4 with exception handling.
 38 | 
 39 |         Arguments:
 40 |             query_string: string
 41 |         Returns:
 42 | 
 43 | 
 44 |         """
 45 |         try:
 46 |             return self.page_soup.select(query_string)
 47 |         except (AttributeError, TypeError):
 48 |             return None
 49 | 
 50 |     def find_all(
 51 |         self,
 52 |         query_tag: str,
 53 |         tag_class: str,
 54 |         tag_id: str,
 55 |         get_string: bool,
 56 |         attribute: str,
 57 |     ):
 58 |         """
 59 |         Wrapper for find_all() of bs4 with exception handling.
 60 | 
 61 |         Arguments:
 62 |             query_tag: string
 63 |             tag_class: string
 64 |             tag_id: string
 65 |             get_string: bool
 66 |             attribute: string
 67 | 
 68 |         Returns:
 69 | 
 70 | 
 71 |         """
 72 | 
 73 |         if tag_class is not None:
 74 |             if get_string:
 75 |                 return self.page_soup.find_all(query_tag, class_=tag_class).text
 76 | 
 77 |             elif attribute is not None:
 78 |                 return self.page_soup.find_all(query_tag, class_=tag_class).get(
 79 |                     attribute
 80 |                 )
 81 |             else:
 82 |                 return self.page_soup.find_all(query_tag, class_=tag_class)
 83 | 
 84 |         elif tag_id is not None:
 85 |             if get_string:
 86 |                 return self.page_soup.find_all(query_tag, id_=tag_class).text
 87 |             elif attribute is not None:
 88 |                 return self.page_soup.find_all(query_tag, id_=tag_class).get(attribute)
 89 |             else:
 90 |                 return self.page_soup.find_all(query_tag, id_=tag_class)
 91 |         elif attribute is not None:
 92 |             return self.page_soup.find_all(query_tag).get(attribute)
 93 |         else:
 94 |             return self.page_soup.find_all(query_tag)
 95 | 
 96 |     def find(
 97 |         self,
 98 |         query_tag: str,
 99 |         tag_class: str,
100 |         tag_id: str,
101 |         get_string: bool,
102 |         attribute: str,
103 |     ):
104 |         """
105 |         Wrapper for find() of bs4 with exception handling.
106 | 
107 |         Arguments:
108 |             query_string: string
109 |             tag_class: string
110 |             tag_id: string
111 |             get_string: bool
112 |             attribute: string
113 | 
114 |         Returns:
115 | 
116 | 
117 |         """
118 | 
119 |         if tag_class is not None:
120 |             if get_string:
121 |                 return self.page_soup.find(query_tag, class_=tag_class).text
122 | 
123 |             elif attribute is not None:
124 |                 return self.page_soup.find(query_tag, class_=tag_class).get(attribute)
125 |             else:
126 |                 return self.page_soup.find(query_tag, class_=tag_class)
127 | 
128 |         elif tag_id is not None:
129 |             if get_string:
130 |                 return self.page_soup.find(query_tag, id_=tag_class).text
131 |             elif attribute is not None:
132 |                 return self.page_soup.find(query_tag, id_=tag_class).get(attribute)
133 |             else:
134 |                 return self.page_soup.find(query_tag, id_=tag_class)
135 |         elif attribute is not None:
136 |             return self.page_soup.find(query_tag).get(attribute)
137 |         else:
138 |             return self.page_soup.find(query_tag)
139 | 


--------------------------------------------------------------------------------
/dribbble_py/utils.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | 
 5 | def int_k(string_k: str):
 6 |     """
 7 |         Convert strings ending with 'k's to an integer
 8 |     Arguments:
 9 |         string_k: string
10 | 
11 |     Returns:
12 |         number_k : string
13 |     """
14 |     number_k = float(string_k.replace("k", "").replace("K", ""))
15 |     number_k = int(number_k * 1000)
16 |     return number_k
17 | 
18 | 
19 | def get_redirect_url(query_url: str):
20 |     """
21 |     Returns the last URL in history of redirects
22 |     API:
23 |         https://github.com/encode/httpx/httpx/_models.py
24 | 
25 |     Arguments:
26 |         query: string
27 | 
28 |     Returns:
29 |          [redirected_url, response_site]: list
30 | 
31 | 
32 |     """
33 |     response = httpx.get(
34 |         query_url,
35 |         timeout=10,
36 |         follow_redirects=True,
37 |     )
38 |     response_site = response.url.host
39 | 
40 |     # Get History of redirect URLS
41 |     history_urls = []
42 |     for history in response.history:
43 |         history_url = history.url.__str__()
44 |         history_urls.append(history_url)
45 | 
46 |     history_urls.append(response.url)
47 | 
48 |     # Discard login redirect URLs of facebook, instagram and etc.,
49 |     if "login" in str(history_urls[-1:][0]) or "authwall" in str(history_urls[-1:][0]):
50 |         redirected_url = str(history_urls[-2:][0])
51 |     else:
52 |         redirected_url = str(history_urls[-1:][0])
53 | 
54 |     return [redirected_url, response_site]
55 | 
56 | 
57 | def string_to_number(number_string: str) -> int:
58 |     try:
59 |         if number_string is not None:
60 |             int_number = int(number_string.replace(",", ""))
61 |         else:
62 |             int_number = 0
63 |     except ValueError:
64 |         int_number = 0
65 |     return int_number
66 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Dribbble-py
 2 | 
 3 | A python scraper for dribbble.com
 4 | 
 5 | ![PyPI](https://img.shields.io/pypi/v/dribbble-py?style=flat-square)
 6 | ![GitHub](https://img.shields.io/github/license/rand-net/dribbble-py?style=flat-square)
 7 | 
 8 | ## Disclaimer
 9 | 
10 | Any legal issues regarding the downloading of graphic assets of Dribbble users should be taken up with the responsible forks and content abusers themselves, as we are not affiliated with them.
11 | 
12 | Dribbble-py does not support or promote downloading of graphic assets of a Dribbble user and using their assets without permission.
13 | 
14 | In case of copyright infringement, please directly contact the responsible forks or the individuals responsible for the abuse.
15 | 
16 | This app merely scrapes information about a Dribbble user, publicly available on the website.
17 | 
18 | 
19 | ## Installation
20 | 
21 | ```
22 | $ pip install -U dribbble-py
23 | ```
24 | 
25 | ## Usage
26 | 
27 | ```
28 | $ drbl_py -h
29 | 
30 | usage: drbl_py [-h] [-u USERNAME] [-j JSON_FILE] [-v VERBOSE] [--version]
31 | 
32 | Dribble-py 0.0.1
33 | 
34 | Program to scrape dribbble user information
35 | 
36 | optional arguments:
37 |   -h, --help            show this help message and exit
38 |   -u USERNAME, --username USERNAME
39 |                         Enter username to scrape.
40 | 
41 | 
42 |   -m, --get-metadata    Get metadata about every user shot.
43 |                         Takes longer to scrape.
44 |                         Default = No metadata about user shots
45 | 
46 | 
47 |   -j JSON_FILE, --json-file JSON_FILE
48 |                         Name of output JSON filename.
49 |                         Default = username.json
50 | 
51 | 
52 |  --version             show program's version number and exit
53 | 
54 |     Example usage
55 |     -------------
56 | 
57 |     Download info about a user.
58 | 
59 |         $ drbl_py -u JohnDoe
60 | 
61 |     Download info about a user to a custom JSON file.
62 | 
63 |         $ drbl_py -u JohnDoe -j John
64 | 
65 | ```
66 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | anyio==3.5.0
 2 | art==5.4
 3 | async-generator==1.10
 4 | attrs==21.4.0
 5 | beautifulsoup4==4.10.0
 6 | certifi==2021.10.8
 7 | charset-normalizer==2.0.11
 8 | chompjs==1.1.6
 9 | h11==0.12.0
10 | httpcore==0.14.6
11 | httpx==0.22.0
12 | idna==3.3
13 | outcome==1.1.0
14 | rfc3986==1.5.0
15 | sniffio==1.2.0
16 | sortedcontainers==2.4.0
17 | soupsieve==2.3.1
18 | trio==0.19.0
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from setuptools import setup
 3 | 
 4 | HERE = pathlib.Path(__file__).parent
 5 | 
 6 | README = (HERE / "readme.md").read_text()
 7 | 
 8 | setup(
 9 |     name="dribbble-py",
10 |     version="0.0.1",
11 |     description="Dribbble Downloader",
12 |     long_description=README,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/rand-net/dribbble-py",
15 |     author="rand-net",
16 |     license="MIT",
17 |     classifiers=[
18 |         "License :: OSI Approved :: MIT License",
19 |         "Programming Language :: Python :: 3",
20 |         "Programming Language :: Python :: 3.8",
21 |     ],
22 |     packages=["dribbble_py"],
23 |     include_package_data=True,
24 |     entry_points={"console_scripts": ["drbl_py = dribbble_py.cli:main"]},
25 |     install_requires=[
26 |         "art",
27 |         "beautifulsoup4",
28 |         "chompjs",
29 |         "requests",
30 |         "lxml",
31 |         "httpx",
32 |         "trio",
33 |     ],
34 |     keywords=["dribbble", "dribbble-scraper", "scraper", "graphic-design", "design"],
35 | )
36 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rand-net/dribbble-py/c8f309bdbdf19659cdeb6dbb70b495e98804e2b4/test/__init__.py


--------------------------------------------------------------------------------
/test/test_dribbble_user.py:
--------------------------------------------------------------------------------
 1 | from unittest import IsolatedAsyncioTestCase
 2 | import unittest
 3 | import sys
 4 | 
 5 | 
 6 | sys.path.append("../dribbble_py")
 7 | from dribbble_py import *
 8 | 
 9 | 
10 | class TestDribbbleUser(IsolatedAsyncioTestCase):
11 |     def test_check_user(self):
12 |         print("Testing check user...")
13 |         drbl_usr = DribbbleUser("theosm", None)
14 |         drbl_usr.check_user()
15 |         possible_outcomes = ["Yes", "No"]
16 |         self.assertTrue(
17 |             any(
18 |                 outcome
19 |                 for outcome in possible_outcomes
20 |                 if (outcome in drbl_usr.dribbble_user_data["user_exists"])
21 |             )
22 |         )
23 | 
24 |     async def test_scrape_main_page(self):
25 |         print("Testing scrape_main_page... ")
26 |         drbl_usr = DribbbleUser("TonyBabel", None)
27 |         await drbl_usr.scrape_main_page()
28 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["projects_count"], 0)
29 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["shots_count"], 0)
30 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["collections_count"], 0)
31 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["liked_shots"], 0)
32 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["members_count"], 0)
33 | 
34 |     async def test_scrape_about_page(self):
35 |         print("Testing scrape_about_page... ")
36 |         drbl_usr = DribbbleUser("TonyBabel", None)
37 |         await drbl_usr.scrape_about_page()
38 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["followers"], 0)
39 |         self.assertGreaterEqual(drbl_usr.dribbble_user_data["following"], 0)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------