├── .gitignore ├── LICENSE ├── MANIFEST.in ├── dribbble_py ├── __init__.py ├── cli.py ├── dribbble_user.py ├── silent_selector.py └── utils.py ├── readme.md ├── requirements.txt ├── setup.py └── test ├── __init__.py └── test_dribbble_user.py /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | dribbble_py.egg-info 4 | proto 5 | venv 6 | __pycache__ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 rand-net 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | -------------------------------------------------------------------------------- /dribbble_py/__init__.py: -------------------------------------------------------------------------------- 1 | from .dribbble_user import * 2 | -------------------------------------------------------------------------------- /dribbble_py/cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import argparse 4 | import textwrap 5 | from art import tprint 6 | 7 | from .dribbble_user import * 8 | 9 | __version__ = "0.0.1" 10 | 11 | 12 | t1 = time.perf_counter() 13 | 14 | 15 | def main(argv=None): 16 | argv = sys.argv if argv is None else argv 17 | argparser = argparse.ArgumentParser( 18 | prog="drbl_py", 19 | formatter_class=argparse.RawTextHelpFormatter, 20 | description=textwrap.dedent( 21 | """ 22 | Dribbble-py 0.0.1\n 23 | Program to scrape dribbble user information\n 24 | """ 25 | ), 26 | epilog=""" 27 | Example usage 28 | -------------\n 29 | Download info about a user.\n 30 | $ drbl_py -u JohnDoe\n 31 | 32 | Download info about a user to a custom JSON file.\n 33 | $ drbl_py -u JohnDoe -j John\n 34 | 35 | 36 | """, 37 | ) 38 | 39 | # User Arguments 40 | # --- 41 | 42 | argparser.add_argument( 43 | "-u", 44 | "--username", 45 | help=textwrap.dedent( 46 | """Enter username to scrape.\n 47 | """ 48 | ), 49 | dest="username", 50 | ) 51 | argparser.add_argument( 52 | "-m", 53 | "--get-metadata", 54 | help=textwrap.dedent( 55 | """Get metadata about every user shot.\nTakes longer to scrape.\nDefault = No metadata about user shots 56 | 57 | """ 58 | ), 59 | action="store_true", 60 | ) 61 | 62 | argparser.add_argument( 63 | "-j", 64 | "--json-file", 65 | help=textwrap.dedent( 66 | """Name of output JSON filename.\nDefault = username.json\n 67 | """ 68 | ), 69 | dest="json_file", 70 | ) 71 | 72 | argparser.add_argument("--version", action="version", version="%(prog)s 0.0.1") 73 | args = argparser.parse_args() 74 | 75 | if args.username: 76 | # Set json filename 77 | if args.json_file is None: 78 | json_file = args.username + ".json" 79 | 80 | elif args.json_file: 81 | json_file = args.json_file + ".json" 82 | 83 | tprint("DRIBBBLE-PY") 84 | print("version {}".format(__version__)) 85 | if args.get_metadata: 86 | try: 87 | dribbble_user = DribbbleUser(args.username, json_file) 88 | dribbble_user.check_user() 89 | dribbble_user.run_nursery_with_metadata_scraper() 90 | dribbble_user.export_to_json() 91 | 92 | t2 = time.perf_counter() 93 | print(f"\nScraping took {t2-t1:0.2f} second(s)...\n") 94 | except KeyboardInterrupt: 95 | print("Exiting dribbble-py...\n") 96 | sys.exit(0) 97 | else: 98 | try: 99 | dribbble_user = DribbbleUser(args.username, json_file) 100 | dribbble_user.check_user() 101 | dribbble_user.run_nursery_without_metadata_scraper() 102 | dribbble_user.export_to_json() 103 | 104 | t2 = time.perf_counter() 105 | print(f"\nScraping took {t2-t1:0.2f} second(s)...\n") 106 | 107 | except KeyboardInterrupt: 108 | print("Exiting dribbble-py...\n") 109 | sys.exit(0) 110 | -------------------------------------------------------------------------------- /dribbble_py/dribbble_user.py: -------------------------------------------------------------------------------- 1 | import chompjs 2 | import re 3 | import json 4 | import trio 5 | import httpx 6 | from datetime import datetime 7 | from bs4 import BeautifulSoup 8 | import sys 9 | from dribbble_py.silent_selector import SilentSelector 10 | from dribbble_py.utils import int_k, get_redirect_url, string_to_number 11 | 12 | sys.path.append("../dribbble_py") 13 | 14 | 15 | DRIBBBLE_URL = "https://dribbble.com" 16 | 17 | 18 | class DribbbleUser: 19 | """ 20 | Scrapes available data of a dribbble user 21 | 22 | Arguments: 23 | username: string 24 | json_file: string 25 | 26 | """ 27 | 28 | def __init__(self, username: str, json_file: str): 29 | self.username = username 30 | 31 | # Set JSON file name 32 | if json_file is None: 33 | self.jsonf_file = username 34 | elif json_file is not None: 35 | self.json_file = json_file 36 | self.dribbble_user_data = {} 37 | 38 | self.join_date_format = "%b %Y" 39 | self.shot_published_date_format = "%b %d, %Y" 40 | self.preferred_time_format = "%Y-%m-%d" 41 | 42 | self.shots_per_page = 8 43 | self.project_shots_per_page = 8 44 | self.members_per_page = 6 45 | 46 | self.scraper_header = { 47 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4690.0 Safari/537.36/QInxREvS-38" 48 | } 49 | # Construct URLs for various pages 50 | self.user_pages = { 51 | "main": "/", 52 | "shots": "/shots", 53 | "about": "/about", 54 | "projects": "/projects", 55 | "goods": "/goods", 56 | "collections": "/collections", 57 | "members": "/members?page=", 58 | } 59 | 60 | self.user_pages = { 61 | key: DRIBBBLE_URL + "/" + self.username + value 62 | for key, value in self.user_pages.items() 63 | } 64 | 65 | def check_user(self) -> bool: 66 | """ 67 | Check whether a dribbble user exists or not 68 | """ 69 | try: 70 | print("\n🔍 Searching for user " + self.username + "...\n") 71 | user_page = httpx.get(self.user_pages["main"]) 72 | user_page_soup = BeautifulSoup(user_page.text, "lxml") 73 | 74 | sselect = SilentSelector(user_page_soup) 75 | if ( 76 | sselect.select_one("section.message-404", False, None) 77 | and sselect.select_one("section.collage-404", False, None) 78 | and sselect.select_one("div.collage-404-images", False, None) 79 | ): 80 | 81 | self.dribbble_user_data["user_exists"] = "No" 82 | print("✗ {} not found\n".format(self.username)) 83 | else: 84 | self.dribbble_user_data["user_exists"] = "Yes" 85 | print("✓ {} found\n".format(self.username)) 86 | print("Profile URL : {}".format(self.user_pages["main"])) 87 | 88 | except httpx.RequestError as ex: 89 | print(f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}") 90 | 91 | except httpx.HTTPStatusError as ex: 92 | print( 93 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 94 | ) 95 | 96 | async def scrape_user_pages_with_metadata_nursery(self): 97 | """ 98 | Scrape all available dribbble user pages with trio nursery 99 | """ 100 | async with trio.open_nursery() as nursery: 101 | nursery.start_soon(self.scrape_main_page) 102 | nursery.start_soon(self.scrape_about_page) 103 | nursery.start_soon(self.scrape_projects_page) 104 | nursery.start_soon(self.scrape_goods_page) 105 | nursery.start_soon(self.scrape_members_page) 106 | nursery.start_soon(self.scrape_collections_page) 107 | nursery.start_soon(self.scrape_shots_with_metadata_page) 108 | 109 | def run_nursery_with_metadata_scraper(self): 110 | """ 111 | Run the trio nursery for scraping pages of a dribbble user 112 | """ 113 | trio.run(self.scrape_user_pages_with_metadata_nursery) 114 | 115 | async def scrape_user_pages_without_metadata_nursery(self): 116 | """ 117 | Scrape all available dribbble user pages with trio nursery 118 | """ 119 | async with trio.open_nursery() as nursery: 120 | nursery.start_soon(self.scrape_main_page) 121 | nursery.start_soon(self.scrape_about_page) 122 | nursery.start_soon(self.scrape_projects_page) 123 | nursery.start_soon(self.scrape_goods_page) 124 | nursery.start_soon(self.scrape_members_page) 125 | nursery.start_soon(self.scrape_collections_page) 126 | nursery.start_soon(self.scrape_shots_without_metadata_page) 127 | 128 | def run_nursery_without_metadata_scraper(self): 129 | """ 130 | Run the trio nursery for scraping pages of a dribbble user 131 | """ 132 | trio.run(self.scrape_user_pages_without_metadata_nursery) 133 | 134 | async def scrape_main_page(self): 135 | """ 136 | Scrape data from the main page of a dribbble user 137 | """ 138 | 139 | async with httpx.AsyncClient() as client: 140 | 141 | try: 142 | user_page = await client.get( 143 | self.user_pages["main"], headers=self.scraper_header 144 | ) 145 | user_page_soup = BeautifulSoup(user_page.text, "lxml") 146 | sselect = SilentSelector(user_page_soup) 147 | 148 | # shots count 149 | shots_count = sselect.select_one("li.shots a span.count", True, None) 150 | self.dribbble_user_data["shots_count"] = string_to_number(shots_count) 151 | 152 | # projects count 153 | projects_count = sselect.select_one( 154 | "li.projects a span.count", True, None 155 | ) 156 | self.dribbble_user_data["projects_count"] = string_to_number( 157 | projects_count 158 | ) 159 | 160 | # collections count 161 | collections_count = sselect.select_one( 162 | "li.collections a span.count", True, None 163 | ) 164 | self.dribbble_user_data["collections_count"] = string_to_number( 165 | collections_count 166 | ) 167 | 168 | # liked shots count 169 | liked_shots = sselect.select_one("li.liked a span.count", True, None) 170 | self.dribbble_user_data["liked_shots"] = string_to_number(liked_shots) 171 | 172 | # user description 173 | self.dribbble_user_data["user_description"] = sselect.select_one( 174 | "div.masthead-intro h2", True, None 175 | ) 176 | 177 | # hire status 178 | self.dribbble_user_data["hire_status"] = bool( 179 | sselect.select_one( 180 | "div.hire-prompt-trigger.profile-action-item", False, None 181 | ) 182 | ) 183 | 184 | # members count 185 | members_count = sselect.select_one("li.members span.count", True, None) 186 | self.dribbble_user_data["members_count"] = string_to_number( 187 | members_count 188 | ) 189 | 190 | # team profile 191 | team_profile = sselect.select_one( 192 | "div.masthead-teams a.team-avatar-link[href]", False, "href" 193 | ) 194 | 195 | if team_profile is not None: 196 | self.dribbble_user_data["team_url"] = DRIBBBLE_URL + team_profile 197 | else: 198 | self.dribbble_user_data["team_url"] = None 199 | 200 | # print some of the info 201 | print( 202 | "Shots : {}".format( 203 | self.dribbble_user_data["shots_count"] 204 | ) 205 | ) 206 | print( 207 | "Projects : {}".format( 208 | self.dribbble_user_data["projects_count"] 209 | ) 210 | ) 211 | print( 212 | "Collections : {}".format( 213 | self.dribbble_user_data["collections_count"] 214 | ) 215 | ) 216 | print( 217 | "Liked Shots : {}".format( 218 | self.dribbble_user_data["liked_shots"] 219 | ) 220 | ) 221 | print("\n✓ Main page scraped...") 222 | 223 | except httpx.RequestError as ex: 224 | print( 225 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 226 | ) 227 | 228 | except httpx.HTTPStatusError as ex: 229 | print( 230 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 231 | ) 232 | 233 | async def scrape_about_page(self): 234 | """ 235 | Retrieves data from the about page of a dribbble user 236 | """ 237 | 238 | async with httpx.AsyncClient() as client: 239 | try: 240 | about_page = await client.get( 241 | self.user_pages["about"], headers=self.scraper_header 242 | ) 243 | about_page_soup = BeautifulSoup(about_page.text, "lxml") 244 | sselect = SilentSelector(about_page_soup) 245 | 246 | # profile stats - following, followers, tags 247 | profile_stats = [ 248 | stat.find("span", class_="count").text 249 | for stat in sselect.select( 250 | "section.content-section.profile-stats-section.medium-screens-only a " 251 | ) 252 | ] 253 | 254 | # user followers 255 | user_followers_count = profile_stats[0].replace(",", "") 256 | self.dribbble_user_data["followers"] = string_to_number( 257 | user_followers_count 258 | ) 259 | 260 | # user following 261 | user_following_count = profile_stats[1].replace(",", "") 262 | self.dribbble_user_data["following"] = string_to_number( 263 | user_following_count 264 | ) 265 | 266 | # user tags 267 | try: 268 | self.dribbble_user_data["tags"] = profile_stats[2] 269 | except IndexError: 270 | self.dribbble_user_data["tags"] = None 271 | 272 | # user location 273 | self.dribbble_user_data["location"] = ( 274 | str(sselect.select_one("p.location", True, None)) 275 | .replace("\n", "") 276 | .strip() 277 | ) 278 | 279 | # user bio 280 | self.dribbble_user_data["bio"] = str( 281 | sselect.select_one("p.bio-text", True, None) 282 | ).replace("\n", "") 283 | 284 | # user pro status 285 | self.dribbble_user_data["is_pro"] = bool( 286 | sselect.select_one("p.info-item.pro", False, None) 287 | ) 288 | 289 | # user join date 290 | join_date_string = ( 291 | str(sselect.select_one("p.info-item.created span", True, None)) 292 | .replace("Member since", "") 293 | .strip() 294 | ) 295 | join_date = datetime.strptime(join_date_string, self.join_date_format) 296 | self.dribbble_user_data["join_date"] = join_date.strftime( 297 | self.preferred_time_format 298 | ) 299 | 300 | # user skills 301 | skills_list = [ 302 | skill.text for skill in sselect.select("ul.skills-list a") 303 | ] 304 | self.dribbble_user_data["skills"] = skills_list 305 | 306 | # social media profiles 307 | self.dribbble_user_data["social_media_profiles"] = {} 308 | social_media_redirect_urls = [ 309 | DRIBBBLE_URL + anchor["href"] 310 | for anchor in sselect.select("ul.social-links-list a") 311 | ] 312 | 313 | for url in social_media_redirect_urls: 314 | profile_url, site = get_redirect_url(url) 315 | self.dribbble_user_data["social_media_profiles"][site] = profile_url 316 | 317 | # Print some of the info 318 | print("Followers :", self.dribbble_user_data["followers"]) 319 | print("Following :", self.dribbble_user_data["following"]) 320 | print("Location :", self.dribbble_user_data["location"]) 321 | print("Pro Status :", self.dribbble_user_data["is_pro"]) 322 | print("Join Date :", self.dribbble_user_data["join_date"]) 323 | print("Skills :", self.dribbble_user_data["skills"]) 324 | 325 | print("\n✓ About page scraped...") 326 | except httpx.RequestError as ex: 327 | print( 328 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 329 | ) 330 | 331 | except httpx.HTTPStatusError as ex: 332 | print( 333 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 334 | ) 335 | 336 | async def scrape_shots_without_metadata_page(self): 337 | """ 338 | Retrieves data from the shots page of a dribbble user 339 | """ 340 | 341 | user_shots = {} 342 | 343 | async with httpx.AsyncClient() as client: 344 | try: 345 | shots_page = await client.get( 346 | self.user_pages["main"], headers=self.scraper_header 347 | ) 348 | shots_page_soup = BeautifulSoup(shots_page.text, "lxml") 349 | sselect = SilentSelector(shots_page_soup) 350 | 351 | # total shots 352 | shots_count = string_to_number( 353 | sselect.select_one("li.shots a span.count", True, None) 354 | ) 355 | user_shots["shots_count"] = shots_count 356 | user_shots["shots"] = {} 357 | shots_urls = [] 358 | shot_names = [] 359 | 360 | # number of pages to scrape 361 | page_counter = 0 362 | max_pages = (shots_count // self.shots_per_page) + 5 363 | 364 | # iterate over pages 365 | while page_counter <= max_pages: 366 | 367 | current_shots_page = ( 368 | self.user_pages["shots"] 369 | + "?page=" 370 | + str(page_counter) 371 | + "&per_page=" 372 | + str(self.shots_per_page) 373 | ) 374 | async with httpx.AsyncClient() as client_i: 375 | 376 | # grab all shots info from current page 377 | shots_page = await client_i.get( 378 | current_shots_page, headers=self.scraper_header 379 | ) 380 | shots_page_soup = BeautifulSoup(shots_page.text, "lxml") 381 | sselect_shots = SilentSelector(shots_page_soup) 382 | 383 | # loop through each found shot 384 | for shot_soup in sselect_shots.find_all( 385 | "li", "shot-thumbnail", None, False, None 386 | ): 387 | current_shot = {} 388 | 389 | sselect_current_shot = SilentSelector(shot_soup) 390 | 391 | # shot titles 392 | current_shot_title = sselect_current_shot.select_one( 393 | "div.shot-title", True, None 394 | ) 395 | shot_names.append(current_shot_title) 396 | 397 | # shot URL 398 | current_shot_url = DRIBBBLE_URL + str( 399 | sselect_current_shot.select_one( 400 | "a.shot-thumbnail-link", False, "href" 401 | ) 402 | ) 403 | shots_urls.append(current_shot_url) 404 | 405 | current_shot["shot_url"] = current_shot_url 406 | 407 | # shot alt description 408 | current_shot[ 409 | "alt_description" 410 | ] = sselect_current_shot.select_one("img", False, "alt") 411 | user_shots["shots"][current_shot_title] = current_shot 412 | page_counter += 1 413 | 414 | except httpx.RequestError as ex: 415 | print( 416 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 417 | ) 418 | 419 | except httpx.HTTPStatusError as ex: 420 | print( 421 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 422 | ) 423 | 424 | self.dribbble_user_data["shots"] = user_shots 425 | print("\n✓ Shots page scraped...") 426 | 427 | async def scrape_shots_with_metadata_page(self): 428 | """ 429 | Retrieves data from the shots page of a dribbble user 430 | """ 431 | 432 | user_shots = {} 433 | 434 | async with httpx.AsyncClient() as client: 435 | try: 436 | shots_page = await client.get( 437 | self.user_pages["main"], headers=self.scraper_header 438 | ) 439 | shots_page_soup = BeautifulSoup(shots_page.text, "lxml") 440 | sselect = SilentSelector(shots_page_soup) 441 | 442 | # total shots 443 | shots_count = string_to_number( 444 | sselect.select_one("li.shots a span.count", True, None) 445 | ) 446 | user_shots["shots_count"] = shots_count 447 | user_shots["shots"] = {} 448 | shots_urls = [] 449 | shot_names = [] 450 | 451 | # number of pages to scrape 452 | page_counter = 0 453 | max_pages = (shots_count // self.shots_per_page) + 5 454 | 455 | # iterate over pages 456 | while page_counter <= max_pages: 457 | 458 | current_shots_page = ( 459 | self.user_pages["shots"] 460 | + "?page=" 461 | + str(page_counter) 462 | + "&per_page=" 463 | + str(self.shots_per_page) 464 | ) 465 | async with httpx.AsyncClient() as client_i: 466 | 467 | # grab all shots info from current page 468 | shots_page = await client_i.get( 469 | current_shots_page, headers=self.scraper_header 470 | ) 471 | shots_page_soup = BeautifulSoup(shots_page.text, "lxml") 472 | sselect_shots = SilentSelector(shots_page_soup) 473 | 474 | # loop through each found shot 475 | for shot_soup in sselect_shots.find_all( 476 | "li", "shot-thumbnail", None, False, None 477 | ): 478 | current_shot = {} 479 | 480 | sselect_current_shot = SilentSelector(shot_soup) 481 | 482 | # shot titles 483 | current_shot_title = sselect_current_shot.select_one( 484 | "div.shot-title", True, None 485 | ) 486 | shot_names.append(current_shot_title) 487 | 488 | # shot URL 489 | current_shot_url = DRIBBBLE_URL + str( 490 | sselect_current_shot.select_one( 491 | "a.shot-thumbnail-link", False, "href" 492 | ) 493 | ) 494 | shots_urls.append(current_shot_url) 495 | 496 | current_shot["shot_url"] = current_shot_url 497 | 498 | # shot alt description 499 | current_shot[ 500 | "alt_description" 501 | ] = sselect_current_shot.select_one("img", False, "alt") 502 | user_shots["shots"][current_shot_title] = current_shot 503 | page_counter += 1 504 | 505 | # Get more data about the shots 506 | user_shots = await self.get_shots_data( 507 | shots_urls, shot_names, user_shots["shots"] 508 | ) 509 | 510 | except httpx.RequestError as ex: 511 | print( 512 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 513 | ) 514 | 515 | except httpx.HTTPStatusError as ex: 516 | print( 517 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 518 | ) 519 | 520 | self.dribbble_user_data["shots"] = user_shots 521 | print("\n✓ About page scraped...") 522 | 523 | async def scrape_projects_page(self): 524 | """ 525 | Retrieves data from the projects' page of a dribbble user 526 | """ 527 | user_projects = {} 528 | 529 | async with httpx.AsyncClient() as client: 530 | try: 531 | # scrape projects page 532 | projects_page = await client.get( 533 | self.user_pages["projects"], headers=self.scraper_header 534 | ) 535 | projects_page_soup = BeautifulSoup(projects_page.text, "lxml") 536 | sselect = SilentSelector(projects_page_soup) 537 | 538 | # project titles 539 | project_titles = [ 540 | str(project.text).strip() 541 | for project in sselect.select("div.collection-name") 542 | ] 543 | 544 | # project shot count 545 | project_shots_count = [ 546 | int( 547 | str(project_shots.text) 548 | .replace("Shots", "") 549 | .replace("Shot", "") 550 | .strip() 551 | ) 552 | for project_shots in sselect.select( 553 | "div.shots-group-meta>span.shots-count" 554 | ) 555 | ] 556 | 557 | # project updated date 558 | project_updated_dates = [ 559 | datetime.strptime( 560 | str(project_updated_date.text).replace("Updated", "").strip(), 561 | "%B %d, %Y", 562 | ).strftime(self.preferred_time_format) 563 | for project_updated_date in sselect.select("span.timestamp") 564 | ] 565 | 566 | # project urls 567 | project_urls = [ 568 | DRIBBBLE_URL + str(anchor["href"]) 569 | for anchor in sselect.select("a.shots-group") 570 | ] 571 | 572 | # retrieve data about each project and its shots 573 | for ( 574 | project_title, 575 | project_url, 576 | shots_count, 577 | project_updated_date, 578 | ) in zip( 579 | project_titles, 580 | project_urls, 581 | project_shots_count, 582 | project_updated_dates, 583 | ): 584 | 585 | max_pages = (shots_count // self.project_shots_per_page) + 1 586 | page_number = 1 587 | 588 | project_shots = {} 589 | 590 | # loop through all pages of a projects 591 | while page_number <= max_pages: 592 | project_page_url = project_url + "?page=" + str(page_number) 593 | page_number += 1 594 | 595 | # get current project page soup 596 | async with httpx.AsyncClient() as client_i: 597 | try: 598 | individual_project_page = await client_i.get( 599 | project_page_url 600 | ) 601 | individual_project_page_soup = BeautifulSoup( 602 | individual_project_page.text, "lxml" 603 | ) 604 | sselect_project = SilentSelector( 605 | individual_project_page_soup 606 | ) 607 | 608 | # loop though each found shot 609 | for shot_soup in sselect_project.find_all( 610 | "div", "shot-section-item", None, False, None 611 | ): 612 | 613 | current_shot = {} 614 | sselect_shot = SilentSelector(shot_soup) 615 | 616 | # shot title 617 | current_shot_title = sselect_shot.select_one( 618 | "h3.shot-title a", True, None 619 | ) 620 | 621 | # shot published date 622 | shot_pub_date = sselect_shot.select_one( 623 | "p.shot-date", True, None 624 | ) 625 | current_shot["shot_pub_date"] = datetime.strptime( 626 | shot_pub_date, "%B %d, %Y" 627 | ).strftime(self.preferred_time_format) 628 | 629 | # shot description 630 | current_shot[ 631 | "shot_description" 632 | ] = sselect_shot.select_one( 633 | "p.shot-description", True, None 634 | ) 635 | 636 | # shot URL 637 | current_shot[ 638 | "shot_url" 639 | ] = DRIBBBLE_URL + sselect_shot.select_one( 640 | "a.shot-link", False, "href" 641 | ) 642 | 643 | project_shots[current_shot_title] = current_shot 644 | 645 | # Assign the projects' shots to the project 646 | user_projects[project_title] = {} 647 | user_projects[project_title][ 648 | "updated_date" 649 | ] = project_updated_date 650 | user_projects[project_title]["shots"] = project_shots 651 | 652 | except httpx.RequestError as ex: 653 | print( 654 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 655 | ) 656 | 657 | except httpx.HTTPStatusError as ex: 658 | print( 659 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 660 | ) 661 | 662 | except httpx.RequestError as ex: 663 | print( 664 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 665 | ) 666 | 667 | except httpx.HTTPStatusError as ex: 668 | print( 669 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 670 | ) 671 | 672 | # Add projects to main dict 673 | self.dribbble_user_data["projects"] = user_projects 674 | print("\n✓ Projects page scraped...") 675 | 676 | async def scrape_collections_page(self): 677 | """ 678 | Retrieves data from the collections' page of a dribbble user 679 | """ 680 | user_collections = {} 681 | 682 | async with httpx.AsyncClient() as client: 683 | 684 | try: 685 | collections_page = await client.get( 686 | self.user_pages["collections"], headers=self.scraper_header 687 | ) 688 | collections_page_soup = BeautifulSoup(collections_page.text, "lxml") 689 | sselect = SilentSelector(collections_page_soup) 690 | 691 | # loop through each collection 692 | for collection in sselect.find_all( 693 | "li", "shots-group-item", None, False, None 694 | ): 695 | current_collection = {} 696 | 697 | sselect_collection = SilentSelector(collection) 698 | 699 | # collections' name 700 | current_collection_name = str( 701 | sselect_collection.find( 702 | "div", "collection-name", None, True, None 703 | ) 704 | ).strip() 705 | 706 | # collections' shots count 707 | shots_count = str( 708 | sselect_collection.find("span", "shots-count", None, True, None) 709 | ).strip() 710 | shots_count = str(re.sub("Shot*.", "", shots_count)).strip() 711 | current_collection["shots_count"] = int(shots_count) 712 | 713 | # collections' designer Count 714 | designer_count = str( 715 | sselect_collection.find( 716 | "span", "designers-count", None, True, None 717 | ) 718 | ).strip() 719 | designer_count = str( 720 | re.sub("Designer*.", "", designer_count) 721 | ).strip() 722 | current_collection["designers_count"] = int(designer_count) 723 | 724 | # collections' URL 725 | collection_url = DRIBBBLE_URL + sselect_collection.find( 726 | "a", "shots-group", None, False, "href" 727 | ) 728 | current_collection["collection_url"] = collection_url 729 | 730 | # assign the collections' data to dict 731 | user_collections[current_collection_name] = current_collection 732 | 733 | # get current collections' page soup 734 | async with httpx.AsyncClient() as client_ii: 735 | 736 | try: 737 | collection_shots_page = await client_ii.get( 738 | collection_url, headers=self.scraper_header 739 | ) 740 | collection_shots_page_soup = BeautifulSoup( 741 | collection_shots_page.text, "lxml" 742 | ) 743 | user_collections[current_collection_name]["shots"] = {} 744 | 745 | sselect_current_collection = SilentSelector( 746 | collection_shots_page_soup 747 | ) 748 | 749 | # loop through each found shot 750 | for shot in sselect_current_collection.find_all( 751 | "li", "shot-thumbnail", None, False, None 752 | ): 753 | 754 | current_shot = {} 755 | sselect_shot = SilentSelector(shot) 756 | 757 | # shot title 758 | shot_title = str( 759 | sselect_shot.find( 760 | "div", "shot-title", None, True, None 761 | ) 762 | ).strip() 763 | 764 | # shot designer profile URL 765 | current_shot[ 766 | "designer_profile_url" 767 | ] = DRIBBBLE_URL + str( 768 | sselect_shot.select_one( 769 | "a.hoverable.url", False, "href" 770 | ) 771 | ) 772 | # shot designer username 773 | current_shot["designer_name"] = str( 774 | sselect_shot.find( 775 | "span", "display-name", None, True, None 776 | ) 777 | ).strip() 778 | 779 | # shot likes 780 | shot_likes = str( 781 | sselect_shot.find( 782 | "span", "js-shot-likes-count", None, True, None 783 | ) 784 | ).strip() 785 | if "k" in shot_likes or "K" in shot_likes: 786 | current_shot["shot_likes"] = int_k(shot_likes) 787 | else: 788 | current_shot["shot_likes"] = int(shot_likes) 789 | 790 | # shot views 791 | shot_views = str( 792 | ( 793 | sselect_shot.find( 794 | "span", 795 | "js-shot-views-count", 796 | None, 797 | True, 798 | None, 799 | ) 800 | ) 801 | ).strip() 802 | 803 | if "k" in shot_views or "K" in shot_views: 804 | current_shot["shot_views"] = int_k(shot_views) 805 | else: 806 | current_shot["shot_views"] = int(shot_views) 807 | 808 | # designer pro status 809 | if ( 810 | sselect_shot.find( 811 | "span", "badge-pro", None, False, None 812 | ) 813 | is not None 814 | ): 815 | is_pro = True 816 | else: 817 | is_pro = False 818 | current_shot["is_pro"] = is_pro 819 | 820 | # shot URL 821 | current_shot["shot_url"] = sselect_shot.find( 822 | "img", None, None, False, "src" 823 | ) 824 | 825 | # assign current collection dict to collections 826 | user_collections[current_collection_name]["shots"][ 827 | shot_title 828 | ] = current_shot 829 | 830 | except httpx.RequestError as ex: 831 | print( 832 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 833 | ) 834 | 835 | except httpx.HTTPStatusError as ex: 836 | print( 837 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 838 | ) 839 | 840 | except httpx.RequestError as ex: 841 | print( 842 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 843 | ) 844 | 845 | except httpx.HTTPStatusError as ex: 846 | print( 847 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 848 | ) 849 | 850 | self.dribbble_user_data["collections"] = user_collections 851 | print("\n✓ Collections page scraped...") 852 | 853 | async def scrape_members_page(self): 854 | """ 855 | Retrieves data from the members' page of a dribbble user 856 | """ 857 | 858 | user_members = {} 859 | 860 | async with httpx.AsyncClient() as client: 861 | try: 862 | 863 | # get members count 864 | member_page = await client.get( 865 | self.user_pages["main"], headers=self.scraper_header 866 | ) 867 | member_page_soup = BeautifulSoup(member_page.text, "lxml") 868 | sselect = SilentSelector(member_page_soup) 869 | 870 | members_count = sselect.select_one( 871 | "li.members a span.count", True, None 872 | ) 873 | 874 | if members_count is not None: 875 | members_count = int(members_count) 876 | else: 877 | members_count = 0 878 | 879 | # scrape if members are available 880 | if members_count > 0: 881 | member_page_count = 1 882 | 883 | if members_count < self.members_per_page: 884 | max_pages = 1 885 | elif members_count > self.members_per_page: 886 | max_pages = (members_count // self.members_per_page) + 1 887 | 888 | while member_page_count <= max_pages: 889 | 890 | # construct members page URL 891 | current_user_members_page_url = ( 892 | self.user_pages["members"] 893 | + str(member_page_count) 894 | + "&per_page=" 895 | + str(self.members_per_page) 896 | ) 897 | 898 | # get current member page soup 899 | async with httpx.AsyncClient() as client: 900 | try: 901 | members_page = await client.get( 902 | current_user_members_page_url, 903 | headers=self.scraper_header, 904 | ) 905 | members_page_soup = BeautifulSoup( 906 | members_page.text, "lxml" 907 | ) 908 | sselect = SilentSelector(members_page_soup) 909 | 910 | # loop through each found member 911 | if sselect.find_all( 912 | "li", "scrolling-row", None, False, None 913 | ): 914 | for member in sselect.find_all( 915 | "li", "scrolling-row", None, False, None 916 | ): 917 | current_member = {} 918 | 919 | sselect_member = SilentSelector(member) 920 | 921 | # member username 922 | member_username = str( 923 | sselect_member.select_one( 924 | "span.designer-card-username a.designer-link", 925 | False, 926 | "href", 927 | ) 928 | ).replace("/", "") 929 | 930 | # member profile URL 931 | current_member["profile_url"] = ( 932 | DRIBBBLE_URL + "/" + member_username 933 | ) 934 | 935 | # member pofile name 936 | profile_name = sselect_member.select_one( 937 | "span.designer-card-username a.designer-link", 938 | True, 939 | None, 940 | ) 941 | 942 | current_member["profile_name"] = profile_name 943 | 944 | # member location 945 | current_member[ 946 | "location" 947 | ] = sselect_member.select_one( 948 | "span.designer-card-location", True, None 949 | ) 950 | 951 | # member pro status 952 | current_member["is_pro"] = bool( 953 | sselect_member.select_one( 954 | "span.badge.badge-pro", False, None 955 | ) 956 | ) 957 | 958 | user_members[member_username] = current_member 959 | member_page_count += 1 960 | else: 961 | break 962 | 963 | except httpx.RequestError as ex: 964 | print( 965 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 966 | ) 967 | 968 | except httpx.HTTPStatusError as ex: 969 | print( 970 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 971 | ) 972 | 973 | user_members["members_count"] = members_count 974 | self.dribbble_user_data["members"] = user_members 975 | 976 | else: 977 | self.dribbble_user_data["members"] = None 978 | 979 | print("\n✓ Members page scraped...") 980 | except httpx.RequestError as ex: 981 | print( 982 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 983 | ) 984 | 985 | except httpx.HTTPStatusError as ex: 986 | print( 987 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 988 | ) 989 | 990 | async def scrape_goods_page(self): 991 | """ 992 | Retrieves data from the goods' page of a dribbble user 993 | 994 | """ 995 | 996 | user_goods = {} 997 | async with httpx.AsyncClient() as client: 998 | try: 999 | goods_page = await client.get( 1000 | self.user_pages["goods"], headers=self.scraper_header 1001 | ) 1002 | goods_page_soup = BeautifulSoup(goods_page.text, "lxml") 1003 | sselect = SilentSelector(goods_page_soup) 1004 | 1005 | # goods' Names 1006 | goods_names = [ 1007 | name.text 1008 | for name in sselect.select( 1009 | "div.shot-details-container>div.font-label" 1010 | ) 1011 | ] 1012 | 1013 | # goods' prices 1014 | goods_prices = [ 1015 | str(price.text).strip() 1016 | for price in sselect.select( 1017 | "div.shot-details-container>div.price-label>span" 1018 | ) 1019 | ] 1020 | 1021 | # goods' urls 1022 | goods_urls = [ 1023 | DRIBBBLE_URL 1024 | + "/shots/" 1025 | + str(goods_id_soup.get("data-thumbnail-id")) 1026 | for goods_id_soup in sselect.find_all( 1027 | "li", "shot-thumbnail-container", None, False, None 1028 | ) 1029 | ] 1030 | 1031 | for goods_url, goods_name, goods_price in zip( 1032 | goods_urls, goods_names, goods_prices 1033 | ): 1034 | current_user_good = {} 1035 | 1036 | # Construct Goods shot URL 1037 | current_user_good["url"] = goods_url 1038 | current_user_good["price"] = goods_price 1039 | user_goods[goods_name] = current_user_good 1040 | 1041 | # Get more data about the goods on sale 1042 | user_goods = await self.get_shots_data( 1043 | goods_urls, goods_names, user_goods 1044 | ) 1045 | 1046 | except httpx.RequestError as ex: 1047 | print( 1048 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 1049 | ) 1050 | 1051 | except httpx.HTTPStatusError as ex: 1052 | print( 1053 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 1054 | ) 1055 | 1056 | self.dribbble_user_data["goods_for_sale"] = user_goods 1057 | print("\n✓ Goods page scraped...") 1058 | 1059 | async def get_shots_data( 1060 | self, shot_urls: list, shot_names: list, shots_dict: dict 1061 | ) -> dict: 1062 | """ 1063 | Retrieves data about a list of given shots 1064 | """ 1065 | for shot_url, shot_name in zip(shot_urls, shot_names): 1066 | 1067 | # get current shot page HTML 1068 | async with httpx.AsyncClient() as client: 1069 | try: 1070 | shot_page = await client.get(shot_url, headers=self.scraper_header) 1071 | shot_page_soup = BeautifulSoup(shot_page.text, "lxml") 1072 | sselect = SilentSelector(shot_page_soup) 1073 | current_shot_data = {} 1074 | 1075 | # get shot color palette 1076 | shot_color_palette = [ 1077 | color.find("a").text 1078 | for color in sselect.select("ul.color-chips.group li") 1079 | ] 1080 | current_shot_data["color_palette"] = shot_color_palette 1081 | 1082 | # extract JSON from script tag 1083 | shot_data_script = sselect.select("body script")[6] 1084 | shot_data_js = shot_data_script.text 1085 | shot_data_js = "".join(shot_data_js.split("\n")[3:]) 1086 | shot_data_json = chompjs.parse_js_object( 1087 | shot_data_js, json_params={"strict": False} 1088 | ) 1089 | shot_data_dict = dict(shot_data_json) 1090 | 1091 | # shot metadata 1092 | current_shot_data["likes"] = shot_data_dict["shotData"][ 1093 | "likesCount" 1094 | ] 1095 | shot_published_date = datetime.strptime( 1096 | shot_data_dict["shotData"]["postedOn"], 1097 | self.shot_published_date_format, 1098 | ).strftime(self.preferred_time_format) 1099 | 1100 | current_shot_data["published_date"] = shot_published_date 1101 | current_shot_data["saves_count"] = shot_data_dict["shotData"][ 1102 | "savesCount" 1103 | ] 1104 | current_shot_data["isAnimated"] = shot_data_dict["shotData"][ 1105 | "isAnimated" 1106 | ] 1107 | current_shot_data["isAnimatedGif"] = shot_data_dict["shotData"][ 1108 | "isAnimatedGif" 1109 | ] 1110 | current_shot_data["tags"] = shot_data_dict["shotData"]["tags"] 1111 | current_shot_data["views_count"] = shot_data_dict["shotData"][ 1112 | "viewsCount" 1113 | ] 1114 | 1115 | except httpx.RequestError as ex: 1116 | print( 1117 | f"\nAn error occurred while requesting {ex.request.url!r}.\n {ex}" 1118 | ) 1119 | 1120 | except httpx.HTTPStatusError as ex: 1121 | print( 1122 | f"\nError response {ex.response.status_code} while requesting {ex.request.url!r}." 1123 | ) 1124 | 1125 | shots_dict[shot_name]["metadata"] = current_shot_data 1126 | return shots_dict 1127 | 1128 | def export_to_json(self): 1129 | """ 1130 | Exports the scraped user data as a JSON file 1131 | 1132 | """ 1133 | 1134 | # Convert dict to JSON 1135 | dribbble_json = json.dumps(self.dribbble_user_data) 1136 | 1137 | # Write to JSON file 1138 | with open(self.json_file, "w") as json_file: 1139 | json_file.write(dribbble_json) 1140 | 1141 | print("\nResults saved to {}".format(self.json_file)) 1142 | -------------------------------------------------------------------------------- /dribbble_py/silent_selector.py: -------------------------------------------------------------------------------- 1 | class SilentSelector: 2 | """ 3 | Wrapper class for handling exceptions during scraping of a page soup of bs4 4 | """ 5 | 6 | def __init__(self, page_soup): 7 | self.page_soup = page_soup 8 | 9 | def select_one(self, query_string: str, get_text: bool, attribute: str): 10 | """ 11 | Wrapper for select_one() of bs4 with exception handling. 12 | 13 | Arguments: 14 | query_string: string 15 | get_text: bool 16 | attribute: string 17 | 18 | Returns: 19 | 20 | 21 | """ 22 | try: 23 | if get_text: 24 | return self.page_soup.select_one(query_string).text 25 | 26 | elif attribute is not None: 27 | return self.page_soup.select_one(query_string).get(attribute) 28 | 29 | else: 30 | return self.page_soup.select_one(query_string) 31 | 32 | except (AttributeError, TypeError): 33 | return None 34 | 35 | def select(self, query_string: str): 36 | """ 37 | Wrapper for select() of bs4 with exception handling. 38 | 39 | Arguments: 40 | query_string: string 41 | Returns: 42 | 43 | 44 | """ 45 | try: 46 | return self.page_soup.select(query_string) 47 | except (AttributeError, TypeError): 48 | return None 49 | 50 | def find_all( 51 | self, 52 | query_tag: str, 53 | tag_class: str, 54 | tag_id: str, 55 | get_string: bool, 56 | attribute: str, 57 | ): 58 | """ 59 | Wrapper for find_all() of bs4 with exception handling. 60 | 61 | Arguments: 62 | query_tag: string 63 | tag_class: string 64 | tag_id: string 65 | get_string: bool 66 | attribute: string 67 | 68 | Returns: 69 | 70 | 71 | """ 72 | 73 | if tag_class is not None: 74 | if get_string: 75 | return self.page_soup.find_all(query_tag, class_=tag_class).text 76 | 77 | elif attribute is not None: 78 | return self.page_soup.find_all(query_tag, class_=tag_class).get( 79 | attribute 80 | ) 81 | else: 82 | return self.page_soup.find_all(query_tag, class_=tag_class) 83 | 84 | elif tag_id is not None: 85 | if get_string: 86 | return self.page_soup.find_all(query_tag, id_=tag_class).text 87 | elif attribute is not None: 88 | return self.page_soup.find_all(query_tag, id_=tag_class).get(attribute) 89 | else: 90 | return self.page_soup.find_all(query_tag, id_=tag_class) 91 | elif attribute is not None: 92 | return self.page_soup.find_all(query_tag).get(attribute) 93 | else: 94 | return self.page_soup.find_all(query_tag) 95 | 96 | def find( 97 | self, 98 | query_tag: str, 99 | tag_class: str, 100 | tag_id: str, 101 | get_string: bool, 102 | attribute: str, 103 | ): 104 | """ 105 | Wrapper for find() of bs4 with exception handling. 106 | 107 | Arguments: 108 | query_string: string 109 | tag_class: string 110 | tag_id: string 111 | get_string: bool 112 | attribute: string 113 | 114 | Returns: 115 | 116 | 117 | """ 118 | 119 | if tag_class is not None: 120 | if get_string: 121 | return self.page_soup.find(query_tag, class_=tag_class).text 122 | 123 | elif attribute is not None: 124 | return self.page_soup.find(query_tag, class_=tag_class).get(attribute) 125 | else: 126 | return self.page_soup.find(query_tag, class_=tag_class) 127 | 128 | elif tag_id is not None: 129 | if get_string: 130 | return self.page_soup.find(query_tag, id_=tag_class).text 131 | elif attribute is not None: 132 | return self.page_soup.find(query_tag, id_=tag_class).get(attribute) 133 | else: 134 | return self.page_soup.find(query_tag, id_=tag_class) 135 | elif attribute is not None: 136 | return self.page_soup.find(query_tag).get(attribute) 137 | else: 138 | return self.page_soup.find(query_tag) 139 | -------------------------------------------------------------------------------- /dribbble_py/utils.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from bs4 import BeautifulSoup 3 | 4 | 5 | def int_k(string_k: str): 6 | """ 7 | Convert strings ending with 'k's to an integer 8 | Arguments: 9 | string_k: string 10 | 11 | Returns: 12 | number_k : string 13 | """ 14 | number_k = float(string_k.replace("k", "").replace("K", "")) 15 | number_k = int(number_k * 1000) 16 | return number_k 17 | 18 | 19 | def get_redirect_url(query_url: str): 20 | """ 21 | Returns the last URL in history of redirects 22 | API: 23 | https://github.com/encode/httpx/httpx/_models.py 24 | 25 | Arguments: 26 | query: string 27 | 28 | Returns: 29 | [redirected_url, response_site]: list 30 | 31 | 32 | """ 33 | response = httpx.get( 34 | query_url, 35 | timeout=10, 36 | follow_redirects=True, 37 | ) 38 | response_site = response.url.host 39 | 40 | # Get History of redirect URLS 41 | history_urls = [] 42 | for history in response.history: 43 | history_url = history.url.__str__() 44 | history_urls.append(history_url) 45 | 46 | history_urls.append(response.url) 47 | 48 | # Discard login redirect URLs of facebook, instagram and etc., 49 | if "login" in str(history_urls[-1:][0]) or "authwall" in str(history_urls[-1:][0]): 50 | redirected_url = str(history_urls[-2:][0]) 51 | else: 52 | redirected_url = str(history_urls[-1:][0]) 53 | 54 | return [redirected_url, response_site] 55 | 56 | 57 | def string_to_number(number_string: str) -> int: 58 | try: 59 | if number_string is not None: 60 | int_number = int(number_string.replace(",", "")) 61 | else: 62 | int_number = 0 63 | except ValueError: 64 | int_number = 0 65 | return int_number 66 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Dribbble-py 2 | 3 | A python scraper for dribbble.com 4 | 5 | ![PyPI](https://img.shields.io/pypi/v/dribbble-py?style=flat-square) 6 | ![GitHub](https://img.shields.io/github/license/rand-net/dribbble-py?style=flat-square) 7 | 8 | ## Disclaimer 9 | 10 | Any legal issues regarding the downloading of graphic assets of Dribbble users should be taken up with the responsible forks and content abusers themselves, as we are not affiliated with them. 11 | 12 | Dribbble-py does not support or promote downloading of graphic assets of a Dribbble user and using their assets without permission. 13 | 14 | In case of copyright infringement, please directly contact the responsible forks or the individuals responsible for the abuse. 15 | 16 | This app merely scrapes information about a Dribbble user, publicly available on the website. 17 | 18 | 19 | ## Installation 20 | 21 | ``` 22 | $ pip install -U dribbble-py 23 | ``` 24 | 25 | ## Usage 26 | 27 | ``` 28 | $ drbl_py -h 29 | 30 | usage: drbl_py [-h] [-u USERNAME] [-j JSON_FILE] [-v VERBOSE] [--version] 31 | 32 | Dribble-py 0.0.1 33 | 34 | Program to scrape dribbble user information 35 | 36 | optional arguments: 37 | -h, --help show this help message and exit 38 | -u USERNAME, --username USERNAME 39 | Enter username to scrape. 40 | 41 | 42 | -m, --get-metadata Get metadata about every user shot. 43 | Takes longer to scrape. 44 | Default = No metadata about user shots 45 | 46 | 47 | -j JSON_FILE, --json-file JSON_FILE 48 | Name of output JSON filename. 49 | Default = username.json 50 | 51 | 52 | --version show program's version number and exit 53 | 54 | Example usage 55 | ------------- 56 | 57 | Download info about a user. 58 | 59 | $ drbl_py -u JohnDoe 60 | 61 | Download info about a user to a custom JSON file. 62 | 63 | $ drbl_py -u JohnDoe -j John 64 | 65 | ``` 66 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | anyio==3.5.0 2 | art==5.4 3 | async-generator==1.10 4 | attrs==21.4.0 5 | beautifulsoup4==4.10.0 6 | certifi==2021.10.8 7 | charset-normalizer==2.0.11 8 | chompjs==1.1.6 9 | h11==0.12.0 10 | httpcore==0.14.6 11 | httpx==0.22.0 12 | idna==3.3 13 | outcome==1.1.0 14 | rfc3986==1.5.0 15 | sniffio==1.2.0 16 | sortedcontainers==2.4.0 17 | soupsieve==2.3.1 18 | trio==0.19.0 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from setuptools import setup 3 | 4 | HERE = pathlib.Path(__file__).parent 5 | 6 | README = (HERE / "readme.md").read_text() 7 | 8 | setup( 9 | name="dribbble-py", 10 | version="0.0.1", 11 | description="Dribbble Downloader", 12 | long_description=README, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/rand-net/dribbble-py", 15 | author="rand-net", 16 | license="MIT", 17 | classifiers=[ 18 | "License :: OSI Approved :: MIT License", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3.8", 21 | ], 22 | packages=["dribbble_py"], 23 | include_package_data=True, 24 | entry_points={"console_scripts": ["drbl_py = dribbble_py.cli:main"]}, 25 | install_requires=[ 26 | "art", 27 | "beautifulsoup4", 28 | "chompjs", 29 | "requests", 30 | "lxml", 31 | "httpx", 32 | "trio", 33 | ], 34 | keywords=["dribbble", "dribbble-scraper", "scraper", "graphic-design", "design"], 35 | ) 36 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rand-net/dribbble-py/c8f309bdbdf19659cdeb6dbb70b495e98804e2b4/test/__init__.py -------------------------------------------------------------------------------- /test/test_dribbble_user.py: -------------------------------------------------------------------------------- 1 | from unittest import IsolatedAsyncioTestCase 2 | import unittest 3 | import sys 4 | 5 | 6 | sys.path.append("../dribbble_py") 7 | from dribbble_py import * 8 | 9 | 10 | class TestDribbbleUser(IsolatedAsyncioTestCase): 11 | def test_check_user(self): 12 | print("Testing check user...") 13 | drbl_usr = DribbbleUser("theosm", None) 14 | drbl_usr.check_user() 15 | possible_outcomes = ["Yes", "No"] 16 | self.assertTrue( 17 | any( 18 | outcome 19 | for outcome in possible_outcomes 20 | if (outcome in drbl_usr.dribbble_user_data["user_exists"]) 21 | ) 22 | ) 23 | 24 | async def test_scrape_main_page(self): 25 | print("Testing scrape_main_page... ") 26 | drbl_usr = DribbbleUser("TonyBabel", None) 27 | await drbl_usr.scrape_main_page() 28 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["projects_count"], 0) 29 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["shots_count"], 0) 30 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["collections_count"], 0) 31 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["liked_shots"], 0) 32 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["members_count"], 0) 33 | 34 | async def test_scrape_about_page(self): 35 | print("Testing scrape_about_page... ") 36 | drbl_usr = DribbbleUser("TonyBabel", None) 37 | await drbl_usr.scrape_about_page() 38 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["followers"], 0) 39 | self.assertGreaterEqual(drbl_usr.dribbble_user_data["following"], 0) 40 | 41 | 42 | if __name__ == "__main__": 43 | unittest.main() 44 | --------------------------------------------------------------------------------