├── README.md ├── scrape_with.py └── scrape_with.yml /README.md: -------------------------------------------------------------------------------- 1 | # Stash plugin scrape with 2 | 3 | This is a plugin for stash to automatically run scrapers for scenes and performers. 4 | 5 | This runs on fragment based scrapers like the xbvrdb scraper. 6 | Tag the scene with scrape_with_xbvrdb and go to tasks and run the task "run fragment scraper with tag" task. 7 | This will run the scraper and create performers and tags as needed. 8 | 9 | The plugin also has performer tasks. 10 | trigger the "run performer scrapers" task to automatically run a series of scrapers in order. 11 | "run performer scraper on all performers" will run the python scraper performers-image-dir on all performers 12 | -------------------------------------------------------------------------------- /scrape_with.py: -------------------------------------------------------------------------------- 1 | #from stash_interface import StashInterface 2 | import requests 3 | import sys 4 | import json 5 | 6 | 7 | class scrape_with: 8 | headers = { 9 | "Accept-Encoding": "gzip, deflate, br", 10 | "Content-Type": "application/json", 11 | "Accept": "application/json", 12 | "Connection": "keep-alive", 13 | "DNT": "1" 14 | } 15 | 16 | def __init__(self, url): 17 | self.url = url 18 | 19 | def __prefix(self,levelChar): 20 | startLevelChar = b'\x01' 21 | endLevelChar = b'\x02' 22 | 23 | ret = startLevelChar + levelChar + endLevelChar 24 | return ret.decode() 25 | 26 | def __log(self,levelChar, s): 27 | if levelChar == "": 28 | return 29 | 30 | print(self.__prefix(levelChar) + s + "\n", file=sys.stderr, flush=True) 31 | 32 | def trace(self,s): 33 | self.__log(b't', s) 34 | 35 | def debug(self,s): 36 | self.__log(b'd', s) 37 | 38 | def info(self,s): 39 | self.__log(b'i', s) 40 | 41 | def warning(self,s): 42 | self.__log(b'w', s) 43 | 44 | def error(self,s): 45 | self.__log(b'e', s) 46 | 47 | def progress(self,p): 48 | progress = min(max(0, p), 1) 49 | self.__log(b'p', str(progress)) 50 | 51 | def __callGraphQL(self, query, variables=None): 52 | json = {} 53 | json['query'] = query 54 | if variables != None: 55 | json['variables'] = variables 56 | 57 | # handle cookies 58 | response = requests.post(self.url, json=json, headers=self.headers) 59 | 60 | if response.status_code == 200: 61 | result = response.json() 62 | if result.get("error", None): 63 | for error in result["error"]["errors"]: 64 | raise Exception("GraphQL error: {}".format(error)) 65 | if result.get("data", None): 66 | return result.get("data") 67 | else: 68 | raise Exception( 69 | "GraphQL query failed:{} - {}. Query: {}. Variables: {}".format(response.status_code, response.content, 70 | query, variables)) 71 | 72 | def listTags(self): 73 | query = """ 74 | query { 75 | allTags { 76 | id 77 | name 78 | } 79 | }""" 80 | 81 | result = self.__callGraphQL(query) 82 | return result["allTags"] 83 | 84 | def findTagIdWithName(self, name): 85 | query = """ 86 | query { 87 | allTags { 88 | id 89 | name 90 | } 91 | } 92 | """ 93 | 94 | result = self.__callGraphQL(query) 95 | 96 | for tag in result["allTags"]: 97 | if tag["name"] == name: 98 | return tag["id"] 99 | return None 100 | 101 | 102 | def createTagWithName(self, name): 103 | query = """ 104 | mutation tagCreate($input:TagCreateInput!) { 105 | tagCreate(input: $input){ 106 | id 107 | } 108 | } 109 | """ 110 | variables = {'input': { 111 | 'name': name 112 | }} 113 | 114 | result = self.__callGraphQL(query, variables) 115 | return result["tagCreate"]["id"] 116 | 117 | def destroyTag(self, id): 118 | query = """ 119 | mutation tagDestroy($input: TagDestroyInput!) { 120 | tagDestroy(input: $input) 121 | } 122 | """ 123 | variables = {'input': { 124 | 'id': id 125 | }} 126 | self.__callGraphQL(query, variables) 127 | 128 | def findRandomSceneId(self): 129 | query = """ 130 | query findScenes($filter: FindFilterType!) { 131 | findScenes(filter: $filter) { 132 | count 133 | scenes { 134 | id 135 | tags { 136 | id 137 | } 138 | } 139 | } 140 | } 141 | """ 142 | 143 | variables = {'filter': { 144 | 'per_page': 1, 145 | 'sort': 'random' 146 | }} 147 | 148 | result = self.__callGraphQL(query, variables) 149 | 150 | if result["findScenes"]["count"] == 0: 151 | return None 152 | 153 | return result["findScenes"]["scenes"][0] 154 | 155 | def updateScene(self, sceneData): 156 | query = """ 157 | mutation sceneUpdate($input:SceneUpdateInput!) { 158 | sceneUpdate(input: $input) { 159 | id 160 | } 161 | } 162 | """ 163 | variables = {'input': sceneData} 164 | 165 | self.__callGraphQL(query, variables) 166 | 167 | def updateGallery(self, galleryData): 168 | query = """mutation GalleryUpdate($input: GalleryUpdateInput!) { 169 | galleryUpdate(input: $input) { 170 | id 171 | } 172 | }""" 173 | variables = {'input': galleryData} 174 | 175 | self.__callGraphQL(query, variables) 176 | 177 | def list_scene_scrapers(self, type): 178 | query = """ 179 | query listSceneScrapers { 180 | listSceneScrapers { 181 | id 182 | name 183 | scene{ 184 | supported_scrapes 185 | } 186 | gallery{ 187 | supported_scrapes 188 | } 189 | } 190 | }""" 191 | ret = [] 192 | result = self.__callGraphQL(query) 193 | for r in result["listSceneScrapers"]: 194 | if type in r["scene"]["supported_scrapes"]: 195 | ret.append(r["id"]) 196 | return ret 197 | 198 | def list_gallery_scrapers(self, type): 199 | query = """ 200 | query listGalleryScrapers { 201 | listGalleryScrapers { 202 | id 203 | name 204 | scene{ 205 | supported_scrapes 206 | } 207 | gallery{ 208 | supported_scrapes 209 | } 210 | } 211 | }""" 212 | ret = [] 213 | result = self.__callGraphQL(query) 214 | for r in result["listGalleryScrapers"]: 215 | if type in r["gallery"]["supported_scrapes"]: 216 | ret.append(r["id"]) 217 | return ret 218 | 219 | def get_scenes_with_tag(self, tag): 220 | tagID = self.findTagIdWithName(tag) 221 | query = """query findScenes($scene_filter: SceneFilterType!) { 222 | findScenes(scene_filter: $scene_filter filter: {per_page: -1}) { 223 | count 224 | scenes { 225 | id 226 | checksum 227 | oshash 228 | title 229 | details 230 | url 231 | date 232 | rating 233 | organized 234 | o_counter 235 | path 236 | file { 237 | size 238 | duration 239 | video_codec 240 | audio_codec 241 | width 242 | height 243 | framerate 244 | bitrate 245 | } 246 | galleries { 247 | id 248 | checksum 249 | path 250 | title 251 | url 252 | date 253 | details 254 | rating 255 | organized 256 | studio { 257 | id 258 | name 259 | url 260 | } 261 | image_count 262 | tags { 263 | id 264 | name 265 | image_path 266 | scene_count 267 | } 268 | } 269 | performers { 270 | id 271 | name 272 | gender 273 | url 274 | twitter 275 | instagram 276 | birthdate 277 | ethnicity 278 | country 279 | eye_color 280 | country 281 | height 282 | measurements 283 | fake_tits 284 | career_length 285 | tattoos 286 | piercings 287 | aliases 288 | } 289 | studio{ 290 | id 291 | name 292 | url 293 | stash_ids{ 294 | endpoint 295 | stash_id 296 | } 297 | } 298 | stash_ids{ 299 | endpoint 300 | stash_id 301 | } 302 | } 303 | } 304 | }""" 305 | 306 | variables = {"scene_filter": {"tags": {"value": [tagID], "modifier": "INCLUDES","depth":1}}} 307 | result = self.__callGraphQL(query, variables) 308 | return result["findScenes"]["scenes"] 309 | 310 | def get_galleries_with_tag(self, tag): 311 | tagID = self.findTagIdWithName(tag) 312 | query = """query findGalleries($galleries_filter: GalleryFilterType!) { 313 | findGalleries(gallery_filter: $galleries_filter filter: {per_page: -1}) { 314 | count 315 | galleries { 316 | id 317 | checksum 318 | path 319 | title 320 | url 321 | date 322 | details 323 | rating 324 | organized 325 | created_at 326 | updated_at 327 | file_mod_time 328 | scenes{ 329 | id 330 | title 331 | } 332 | performers { 333 | id 334 | name 335 | gender 336 | url 337 | twitter 338 | instagram 339 | birthdate 340 | ethnicity 341 | country 342 | eye_color 343 | country 344 | height 345 | measurements 346 | fake_tits 347 | career_length 348 | tattoos 349 | piercings 350 | aliases 351 | } 352 | studio{ 353 | id 354 | name 355 | url 356 | stash_ids{ 357 | endpoint 358 | stash_id 359 | } 360 | } 361 | } 362 | } 363 | }""" 364 | 365 | variables = {"galleries_filter": {"tags": {"value": [tagID], "modifier": "INCLUDES", "depth": 1}}} 366 | result = self.__callGraphQL(query, variables) 367 | return result["findGalleries"]["galleries"] 368 | 369 | def scrapeScene(self, scraper, scene): 370 | query = """query ScrapeScene($scraper_id: ID!, $scene: SceneUpdateInput!) { 371 | scrapeScene(scraper_id: $scraper_id, scene: $scene) { 372 | title 373 | details 374 | url 375 | date 376 | image 377 | file{ 378 | size 379 | duration 380 | video_codec 381 | audio_codec 382 | width 383 | height 384 | framerate 385 | bitrate 386 | } 387 | studio{ 388 | stored_id 389 | name 390 | url 391 | remote_site_id 392 | } 393 | tags{ 394 | stored_id 395 | name 396 | } 397 | performers{ 398 | stored_id 399 | name 400 | gender 401 | url 402 | twitter 403 | instagram 404 | birthdate 405 | ethnicity 406 | country 407 | eye_color 408 | country 409 | height 410 | measurements 411 | fake_tits 412 | career_length 413 | tattoos 414 | piercings 415 | aliases 416 | remote_site_id 417 | images 418 | } 419 | movies{ 420 | stored_id 421 | name 422 | aliases 423 | duration 424 | date 425 | rating 426 | director 427 | synopsis 428 | url 429 | } 430 | remote_site_id 431 | duration 432 | fingerprints{ 433 | algorithm 434 | hash 435 | duration 436 | } 437 | __typename 438 | } 439 | }""" 440 | variables = {"scraper_id": scraper, 441 | "scene": {"title": scene["title"], "date": scene["date"], "details": scene["details"], 442 | "gallery_ids": [], "id": scene["id"], "movies": None, "performer_ids": [], 443 | "rating": scene["rating"], "stash_ids": scene["stash_ids"], "studio_id": None, 444 | "tag_ids": None, "url": scene["url"]}} 445 | result = self.__callGraphQL(query, variables) 446 | return result["scrapeScene"] 447 | 448 | def scrapeGallery(self, scraper,gallery_id): 449 | query = """query ScrapeSingleGallery($source: ScraperSourceInput!, $input: ScrapeSingleGalleryInput!) { 450 | scrapeSingleGallery(source: $source, input: $input) { 451 | ...ScrapedGalleryData 452 | __typename 453 | } 454 | } 455 | 456 | fragment ScrapedGalleryData on ScrapedGallery { 457 | title 458 | details 459 | url 460 | date 461 | studio { 462 | ...ScrapedSceneStudioData 463 | __typename 464 | } 465 | tags { 466 | ...ScrapedSceneTagData 467 | __typename 468 | } 469 | performers { 470 | ...ScrapedScenePerformerData 471 | __typename 472 | } 473 | __typename 474 | } 475 | 476 | fragment ScrapedSceneStudioData on ScrapedStudio { 477 | stored_id 478 | name 479 | url 480 | remote_site_id 481 | __typename 482 | } 483 | 484 | fragment ScrapedSceneTagData on ScrapedTag { 485 | stored_id 486 | name 487 | __typename 488 | } 489 | 490 | fragment ScrapedScenePerformerData on ScrapedPerformer { 491 | stored_id 492 | name 493 | gender 494 | url 495 | twitter 496 | instagram 497 | birthdate 498 | ethnicity 499 | country 500 | eye_color 501 | height 502 | measurements 503 | fake_tits 504 | career_length 505 | tattoos 506 | piercings 507 | aliases 508 | tags { 509 | ...ScrapedSceneTagData 510 | __typename 511 | } 512 | remote_site_id 513 | images 514 | details 515 | death_date 516 | hair_color 517 | weight 518 | __typename 519 | }""" 520 | 521 | variables = {"input": {"gallery_id": gallery_id },"source": {"scraper_id": scraper}} 522 | result = self.__callGraphQL(query, variables) 523 | return result["scrapeSingleGallery"][0] 524 | 525 | def findStudioIdWithName(self, name): 526 | query = """query { 527 | allStudios { 528 | id 529 | name 530 | } 531 | }""" 532 | result = self.__callGraphQL(query) 533 | 534 | for tag in result["allStudios"]: 535 | if tag["name"] == name: 536 | return tag["id"] 537 | return None 538 | 539 | def findPerformersByName(self, name): 540 | query = """query FindPerformers( 541 | $filter: FindFilterType 542 | $performer_filter: PerformerFilterType 543 | ) { 544 | findPerformers(filter: $filter, performer_filter: $performer_filter) { 545 | count 546 | performers { 547 | ...PerformerData 548 | __typename 549 | } 550 | __typename 551 | } 552 | } 553 | fragment PerformerData on Performer { 554 | id 555 | checksum 556 | name 557 | url 558 | gender 559 | twitter 560 | instagram 561 | birthdate 562 | ethnicity 563 | country 564 | eye_color 565 | height 566 | measurements 567 | fake_tits 568 | career_length 569 | tattoos 570 | piercings 571 | aliases 572 | favorite 573 | image_path 574 | scene_count 575 | stash_ids { 576 | stash_id 577 | endpoint 578 | __typename 579 | } 580 | __typename 581 | }""" 582 | 583 | variables = { 584 | "filter": 585 | { 586 | "q": name, 587 | "page": 1, 588 | "per_page": 100, 589 | "sort": "name", 590 | "direction": "ASC" 591 | }, 592 | "performer_filter": {} 593 | } 594 | result = self.__callGraphQL(query, variables) 595 | return result["findPerformers"]["performers"] 596 | 597 | def findPerformer(self, name): 598 | for performer in self.findPerformersByName(name): 599 | self.debug("finding performer: "+name+ str(performer["name"])) 600 | if performer["name"].lower() == name.lower(): 601 | self.debug("Found performer") 602 | return performer 603 | if "aliases" in performer: 604 | if performer["aliases"] == name: 605 | return performer 606 | return None 607 | 608 | def createPerformer(self, performer): 609 | query = """ 610 | mutation performerCreate($input:PerformerCreateInput!) { 611 | performerCreate(input: $input){ 612 | id 613 | } 614 | } 615 | """ 616 | new_performer = {} 617 | if "name" in performer: 618 | new_performer["name"] = performer["name"] 619 | if "url" in performer: 620 | new_performer["url"] = performer["url"] 621 | else: 622 | new_performer["url"] = None 623 | if "gender" in performer: 624 | new_performer["gender"] = performer["gender"] 625 | else: 626 | new_performer["gender"] = None 627 | if "birthdate" in performer: 628 | new_performer["birthdate"] = None 629 | else: 630 | new_performer["birthdate"] = None 631 | if "ethnicity" in performer: 632 | new_performer["ethnicity"] = performer["ethnicity"] 633 | else: 634 | new_performer["country"] = None 635 | if "ethnicity" in performer: 636 | new_performer["country"] = performer["country"] 637 | else: 638 | new_performer["country"] = None 639 | if "ethnicity" in performer: 640 | new_performer["eye_color"] = performer["eye_color"] 641 | else: 642 | new_performer["height"] = None 643 | 644 | if "height" in performer: 645 | new_performer["height"] = performer["height"] 646 | else: 647 | new_performer["height"] = None 648 | if "measurements" in performer: 649 | new_performer["measurements"] = performer["measurements"] 650 | else: 651 | new_performer["measurements"] = None 652 | 653 | if "fake_tits" in performer: 654 | new_performer["fake_tits"] = performer["fake_tits"] 655 | else: 656 | new_performer["fake_tits"] = None 657 | if "career_length" in performer: 658 | new_performer["career_length"] = performer["career_length"] 659 | else: 660 | new_performer["career_length"] = None 661 | 662 | if "tattoos" in performer: 663 | new_performer["tattoos"] = performer["tattoos"] 664 | else: 665 | new_performer["tattoos"] = None 666 | if "piercings" in performer: 667 | new_performer["piercings"] = performer["piercings"] 668 | else: 669 | new_performer["piercings"] = None 670 | if "aliases" in performer: 671 | new_performer["aliases"] = performer["aliases"] 672 | else: 673 | new_performer["aliases"] = None 674 | 675 | if "twitter" in performer: 676 | new_performer["twitter"] = performer["twitter"] 677 | else: 678 | new_performer["twitter"] = None 679 | if "instagram" in performer: 680 | new_performer["instagram"] = performer["instagram"] 681 | else: 682 | new_performer["instagram"] = None 683 | if "favorite" in performer: 684 | new_performer["favorite"] = performer["favorite"] 685 | else: 686 | performer["favorite"] = None 687 | if "image" in performer: 688 | new_performer["image"] = performer["image"] 689 | elif "images" in performer: 690 | if performer["images"] is not None: 691 | if len(performer["images"]) > 0: 692 | new_performer["image"] = performer["images"][0] 693 | else: 694 | new_performer["image"] = None 695 | else: 696 | new_performer["image"] = None 697 | else: 698 | performer["image"] = None 699 | if "stash_ids" in performer: 700 | new_performer["stash_ids"] = performer["stash_ids"] 701 | else: 702 | new_performer["stash_ids"] = [] 703 | 704 | variables = {'input': new_performer} 705 | 706 | result = self.__callGraphQL(query, variables) 707 | return result["performerCreate"] 708 | 709 | 710 | 711 | 712 | def setup_tags(self): 713 | scrapers=self.list_scene_scrapers('FRAGMENT') 714 | for s in scrapers: 715 | tagName='scrape_with_'+s 716 | tagID = self.findTagIdWithName(tagName) 717 | if tagID == None: 718 | tagID = self.createTagWithName(tagName) 719 | self.debug("adding tag "+tagName) 720 | else: 721 | self.debug("tag exists, "+tagName) 722 | 723 | 724 | def update_with_tag(self,tag): 725 | 726 | scenes=self.get_scenes_with_tag(tag) 727 | #get rid of scrape_with_ 728 | scraper=tag[12:] 729 | for s in scenes: 730 | 731 | self.info("running scraper on scene: "+s["id"] +" title: "+ s["title"]) 732 | res=self.scrapeScene(scraper,s) 733 | if res is None: 734 | self.info("scraper did not return a result") 735 | newscene={} 736 | newscene["id"]=s["id"] 737 | new_tags=[] 738 | new_id=self.findTagIdWithName("unscrapable") 739 | if new_id==None: 740 | self.info("creating tag: unscrapable") 741 | new_id=self.createTagWithName("unscrapable") 742 | new_tags.append(new_id) 743 | newscene["tag_ids"]=new_tags 744 | self.debug("Saving scene: "+str(s["title"])) 745 | self.updateScene(newscene) 746 | else: 747 | self.info("Scraper returned something " ) 748 | newscene={} 749 | newscene["id"]=s["id"] 750 | if "title" in res: 751 | newscene["title"]=res["title"] 752 | if "details" in res: 753 | newscene["details"]=res["details"] 754 | if "url" in res: 755 | newscene["url"]=res["url"] 756 | if "date" in res: 757 | newscene["date"]=res["date"] 758 | if "rating" in res: 759 | newscene["rating"]=res["rating"] 760 | if "organized" in res: 761 | newscene["organized"]=res["organized"] 762 | if "studio" in res: 763 | if res["studio"] is None: 764 | True 765 | elif "stored_id" in res["studio"]: 766 | newscene["studio_id"]=res["studio"]["stored_id"] 767 | elif "name" in res["studio"]: 768 | studio_id=self.findStudioIdWithName(res["studio"]["name"]) 769 | newscene["studio_id"]=studio_id 770 | if "image" in res: 771 | newscene["cover_image"]=res["image"] 772 | if "tags" in res: 773 | new_tags=[] 774 | if res["tags"] is not None: 775 | for tag in res["tags"]: 776 | if "stored_id" in tag: 777 | if tag["stored_id"] is not None: 778 | new_tags.append(tag["stored_id"]) 779 | elif "name" in tag: 780 | new_id = self.findTagIdWithName(tag["name"]) 781 | if new_id == None: 782 | self.trace("creating tag: "+ tag["name"]) 783 | new_id = self.createTagWithName(tag["name"]) 784 | new_tags.append(new_id) 785 | elif "name" in tag: 786 | new_id=self.findTagIdWithName(tag["name"]) 787 | if new_id==None: 788 | self.info("creating tag: "+tag["name"]) 789 | new_id=self.createTagWithName(tag["name"]) 790 | new_tags.append(new_id) 791 | newscene["tag_ids"]=new_tags 792 | if "performers" in res: 793 | if res["performers"] is not None: 794 | self.debug(str(res["performers"])) 795 | performer_list=[] 796 | for p in res["performers"]: 797 | self.debug(str(p)) 798 | if "stored_id" in p: 799 | if p["stored_id"] != None: 800 | performer_list.append(p["stored_id"]) 801 | elif "name" in p: 802 | new_performer=self.findPerformer(p["name"]) 803 | if new_performer==None: 804 | self.info("Creating a new performer: "+ p["name"]) 805 | new_performer=self.createPerformer(p) 806 | performer_list.append(new_performer["id"]) 807 | newscene["performer_ids"]=performer_list 808 | self.debug("Saving scene: "+str(newscene["title"])) 809 | self.updateScene(newscene) 810 | 811 | def update_gallery_with_tag(self,tag): 812 | 813 | scenes=self.get_galleries_with_tag(tag) 814 | #get rid of scrape_with_ 815 | scraper=tag[12:] 816 | for s in scenes: 817 | 818 | self.info("running scraper on gallery: "+s["id"]) 819 | res=self.scrapeGallery(scraper,s["id"]) 820 | if res["title"] is None: 821 | self.info("gallery did not return a result") 822 | newscene={} 823 | newscene["id"]=s["id"] 824 | new_tags=[] 825 | new_id=self.findTagIdWithName("unscrapable") 826 | if new_id==None: 827 | self.info("creating tag: unscrapable") 828 | new_id=self.createTagWithName("unscrapable") 829 | new_tags.append(new_id) 830 | newscene["tag_ids"]=new_tags 831 | self.debug("Saving scene: "+str(s["title"])) 832 | self.updateScene(newscene) 833 | else: 834 | self.info("Scraper returned something " ) 835 | newGallery={} 836 | newGallery["id"]=s["id"] 837 | if "title" in res: 838 | newGallery["title"]=res["title"] 839 | if "details" in res: 840 | newGallery["details"]=res["details"] 841 | if "url" in res: 842 | newGallery["url"]=res["url"] 843 | if "date" in res: 844 | newGallery["date"]=res["date"] 845 | if "rating" in res: 846 | newGallery["rating"]=res["rating"] 847 | if "organized" in res: 848 | newGallery["organized"]=res["organized"] 849 | if "studio" in res: 850 | if res["studio"] is None: 851 | True 852 | elif "stored_id" in res["studio"]: 853 | newGallery["studio_id"]=res["studio"]["stored_id"] 854 | elif "name" in res["studio"]: 855 | studio_id=self.findStudioIdWithName(res["studio"]["name"]) 856 | newGallery["studio_id"]=studio_id 857 | if "tags" in res: 858 | new_tags=[] 859 | if res["tags"] is not None: 860 | for tag in res["tags"]: 861 | if "stored_id" in tag: 862 | if tag["stored_id"] is not None: 863 | new_tags.append(tag["stored_id"]) 864 | elif "name" in tag: 865 | new_id = self.findTagIdWithName(tag["name"]) 866 | if new_id == None: 867 | self.trace("creating tag: "+ tag["name"]) 868 | new_id = self.createTagWithName(tag["name"]) 869 | new_tags.append(new_id) 870 | elif "name" in tag: 871 | new_id=self.findTagIdWithName(tag["name"]) 872 | if new_id==None: 873 | self.info("creating tag: "+tag["name"]) 874 | new_id=self.createTagWithName(tag["name"]) 875 | new_tags.append(new_id) 876 | newGallery["tag_ids"]=new_tags 877 | if "performers" in res: 878 | if res["performers"] is not None: 879 | self.debug(str(res["performers"])) 880 | performer_list=[] 881 | for p in res["performers"]: 882 | self.debug(str(p)) 883 | if "stored_id" in p: 884 | if p["stored_id"] != None: 885 | performer_list.append(p["stored_id"]) 886 | elif "name" in p: 887 | new_performer=self.findPerformer(p["name"]) 888 | if new_performer==None: 889 | self.info("Creating a new performer: "+ p["name"]) 890 | new_performer=self.createPerformer(p) 891 | performer_list.append(new_performer["id"]) 892 | newGallery["performer_ids"]=performer_list 893 | self.debug("Saving scene: "+str(newGallery["title"])) 894 | self.updateGallery(newGallery) 895 | 896 | 897 | def update_all_scenes_with_tags(self): 898 | tags=self.listTags() 899 | for tag in tags: 900 | if tag["name"].startswith("scrape_with_"): 901 | self.info("scraping all scenes with tag: "+str(tag["name"])) 902 | scrapers = self.list_scene_scrapers('FRAGMENT') 903 | if tag["name"][12:] in scrapers: 904 | self.update_with_tag(tag["name"]) 905 | 906 | def update_all_galleries_with_tags(self): 907 | tags=self.listTags() 908 | for tag in tags: 909 | if tag["name"].startswith("scrape_with_"): 910 | self.info("scraping all galleries with tag: "+str(tag["name"])) 911 | scrapers = self.list_gallery_scrapers('FRAGMENT') 912 | if tag["name"][12:] in scrapers: 913 | self.update_gallery_with_tag(tag["name"]) 914 | 915 | 916 | def scrape_performer_list(self,scraper_id,performer): 917 | query="""query scrapePerformerList($scraper_id: ID!, $performer: String!) { 918 | scrapePerformerList(scraper_id: $scraper_id, query: $performer) { 919 | name 920 | url 921 | gender 922 | twitter 923 | instagram 924 | birthdate 925 | ethnicity 926 | country 927 | eye_color 928 | height 929 | measurements 930 | fake_tits 931 | career_length 932 | tattoos 933 | piercings 934 | aliases 935 | image 936 | } 937 | }""" 938 | 939 | variables = {'scraper_id': scraper_id,'performer': performer} 940 | result = self.__callGraphQL(query, variables) 941 | if result is not None: 942 | return result["scrapePerformerList"] 943 | return None 944 | 945 | def scrape_performer(self, scraper_id, performer): 946 | query = """query scrapePerformer($scraper_id: ID!, $performer: ScrapedPerformerInput!) { 947 | scrapePerformer(scraper_id: $scraper_id, scraped_performer: $performer) { 948 | name 949 | url 950 | gender 951 | twitter 952 | instagram 953 | birthdate 954 | ethnicity 955 | country 956 | eye_color 957 | height 958 | measurements 959 | fake_tits 960 | career_length 961 | tattoos 962 | piercings 963 | aliases 964 | details 965 | image 966 | death_date 967 | hair_color 968 | weight 969 | } 970 | }""" 971 | del performer["image"] 972 | variables = {'scraper_id': scraper_id, 'performer':performer} 973 | result = self.__callGraphQL(query, variables) 974 | return result["scrapePerformer"] 975 | 976 | def listPerformerScrapers(self): 977 | query="""{ 978 | listPerformerScrapers { 979 | id 980 | name 981 | performer { 982 | urls 983 | supported_scrapes 984 | } 985 | } 986 | }""" 987 | result = self.__callGraphQL(query) 988 | return result["listPerformerScrapers"] 989 | 990 | def allPerformers(self): 991 | query = """{ 992 | allPerformers { 993 | id 994 | checksum 995 | name 996 | url 997 | gender 998 | twitter 999 | instagram 1000 | birthdate 1001 | ethnicity 1002 | country 1003 | eye_color 1004 | height 1005 | measurements 1006 | fake_tits 1007 | career_length 1008 | tattoos 1009 | piercings 1010 | aliases 1011 | favorite 1012 | image_path 1013 | scene_count 1014 | stash_ids { 1015 | endpoint 1016 | stash_id 1017 | } 1018 | } 1019 | }""" 1020 | result = self.__callGraphQL(query) 1021 | return result["allPerformers"] 1022 | 1023 | def performer_update(self,performer): 1024 | query=""" 1025 | mutation performerUpdate($input: PerformerUpdateInput!) { 1026 | performerUpdate(input: $input) { 1027 | id 1028 | checksum 1029 | name 1030 | url 1031 | gender 1032 | twitter 1033 | instagram 1034 | birthdate 1035 | ethnicity 1036 | country 1037 | eye_color 1038 | height 1039 | measurements 1040 | fake_tits 1041 | career_length 1042 | tattoos 1043 | piercings 1044 | aliases 1045 | favorite 1046 | image_path 1047 | scene_count 1048 | stash_ids { 1049 | endpoint 1050 | stash_id 1051 | } 1052 | } 1053 | } 1054 | """ 1055 | variables = {'input': performer} 1056 | return self.__callGraphQL(query, variables) 1057 | 1058 | def run_update_performers(self,scraper_preference): 1059 | performers=self.allPerformers() 1060 | index=0 1061 | for p in performers: 1062 | index=index+1 1063 | self.progress(index/len(performers)) 1064 | if p["url"] is None: 1065 | self.debug("need to scrape performer "+p["name"]) 1066 | found=False 1067 | for scraper in scraper_preference: 1068 | scraped_list=self.scrape_performer_list(scraper,p["name"].lower()) 1069 | if scraped_list is not None: 1070 | self.debug("scraping "+p["name"]+" with scraper: "+scraper) 1071 | for s in scraped_list: 1072 | 1073 | if (not found) and ('name' in s) and (s["name"]).lower()==(p["name"]).lower(): 1074 | sp=self.scrape_performer(scraper,s) 1075 | if sp is not None: 1076 | if (sp["name"]).lower()==(p["name"]).lower(): 1077 | found=True 1078 | self.info("Found performer "+sp["name"] +" with scraper: " +scraper) 1079 | if sp["name"] is not None: 1080 | p["name"]=sp["name"] 1081 | if sp["url"] is not None: 1082 | p["url"]=sp["url"] 1083 | if sp["gender"] is not None: 1084 | p["gender"]=sp["gender"].upper() 1085 | if sp["twitter"] is not None: 1086 | p["twitter"]=sp["twitter"] 1087 | if sp["instagram"] is not None: 1088 | p["instagram"] = sp["instagram"] 1089 | if sp["birthdate"] is not None: 1090 | p["birthdate"] = sp["birthdate"] 1091 | if sp["ethnicity"] is not None: 1092 | p["ethnicity"] = sp["ethnicity"] 1093 | if sp["country"] is not None: 1094 | p["country"] = sp["country"] 1095 | if sp["eye_color"] is not None: 1096 | p["eye_color"] = sp["eye_color"] 1097 | if sp["height"] is not None: 1098 | p["height"] = sp["height"] 1099 | if sp["measurements"] is not None: 1100 | p["measurements"] = sp["measurements"] 1101 | if sp["fake_tits"] is not None: 1102 | p["fake_tits"] = sp["fake_tits"] 1103 | if sp["career_length"] is not None: 1104 | p["career_length"] = sp["career_length"] 1105 | if sp["tattoos"] is not None: 1106 | p["tattoos"] = sp["tattoos"] 1107 | if sp["piercings"] is not None: 1108 | p["piercings"] = sp["piercings"] 1109 | if sp["aliases"] is not None: 1110 | p["aliases"] = sp["aliases"] 1111 | if sp["image"] is not None: 1112 | p["image"] = sp["image"] 1113 | if sp["details"] is not None: 1114 | p["details"] = sp["details"] 1115 | else: 1116 | self.info("Looking up entry did not return a result entry: " +s["name"]) 1117 | if found: 1118 | del p["image_path"] 1119 | del p["checksum"] 1120 | del p["scene_count"] 1121 | self.info("updating performer "+p["name"]) 1122 | self.debug("===name: "+str(p["name"])+ " url: "+str(p["url"])+" gender "+str(p["gender"])) 1123 | u=self.performer_update(p) 1124 | if u is not None: 1125 | self.info("update succesful!!") 1126 | 1127 | def run_scraper_performers(self,scraper): 1128 | performers=self.allPerformers() 1129 | index=0 1130 | for p in performers: 1131 | index=index+1 1132 | found = False 1133 | self.progress(index/len(performers)) 1134 | scraped_list = self.scrape_performer_list(scraper, p["name"].lower()) 1135 | if scraped_list is not None: 1136 | for s in scraped_list: 1137 | if (s["name"]).lower() == (p["name"]).lower() and not found: 1138 | sp = self.scrape_performer(scraper, s) 1139 | if sp is not None: 1140 | if (sp["name"]).lower() == (p["name"]).lower(): 1141 | found = True 1142 | self.info("Found performer " + sp["name"] + " with scraper: " + scraper) 1143 | if sp["name"] is not None: 1144 | p["name"] = sp["name"] 1145 | if sp["url"] is not None: 1146 | p["url"] = sp["url"] 1147 | if sp["gender"] is not None: 1148 | p["gender"] = sp["gender"].upper() 1149 | if sp["twitter"] is not None: 1150 | p["twitter"] = sp["twitter"] 1151 | if sp["instagram"] is not None: 1152 | p["instagram"] = sp["instagram"] 1153 | if sp["birthdate"] is not None: 1154 | p["birthdate"] = sp["birthdate"] 1155 | if sp["ethnicity"] is not None: 1156 | p["ethnicity"] = sp["ethnicity"] 1157 | if sp["country"] is not None: 1158 | p["country"] = sp["country"] 1159 | if sp["eye_color"] is not None: 1160 | p["eye_color"] = sp["eye_color"] 1161 | if sp["height"] is not None: 1162 | p["height"] = sp["height"] 1163 | if sp["measurements"] is not None: 1164 | p["measurements"] = sp["measurements"] 1165 | if sp["fake_tits"] is not None: 1166 | p["fake_tits"] = sp["fake_tits"] 1167 | if sp["career_length"] is not None: 1168 | p["career_length"] = sp["career_length"] 1169 | if sp["tattoos"] is not None: 1170 | p["tattoos"] = sp["tattoos"] 1171 | if sp["piercings"] is not None: 1172 | p["piercings"] = sp["piercings"] 1173 | if sp["aliases"] is not None: 1174 | p["aliases"] = sp["aliases"] 1175 | if sp["image"] is not None: 1176 | p["image"] = sp["image"] 1177 | if sp["death_date"] is not None: 1178 | p["death_date"] = sp["death_date"] 1179 | if sp["hair_color"] is not None: 1180 | p["hair_color"] = sp["hair_color"] 1181 | if sp["weight"] is not None: 1182 | p["weight"] = sp["weight"] 1183 | 1184 | if found: 1185 | del p["image_path"] 1186 | del p["checksum"] 1187 | del p["scene_count"] 1188 | self.info("updating performer "+p["name"]) 1189 | self.debug("===name: "+str(p["name"])+ " url: "+str(p["url"])+" gender "+str(p["gender"])) 1190 | u=self.performer_update(p) 1191 | if u is not None: 1192 | self.info("update succesful!!") 1193 | 1194 | scraper_preference=["Babepedia","stash-sqlite","IAFD","ManyVids","Pornhub","SARJ LLC","ThePornDB","performer-image-dir"] 1195 | #scraper_preference=["Iafd","stash-sqlite","performer-image-dir"] 1196 | 1197 | 1198 | # Press the green button in the gutter to run the script. 1199 | if __name__ == '__main__': 1200 | if len(sys.argv) > 1: 1201 | url = "http://localhost:9999/graphql" 1202 | if len(sys.argv) > 2: 1203 | url = sys.argv[2] 1204 | 1205 | if sys.argv[1] == "setup": 1206 | client = scrape_with(url) 1207 | client.setup_tags() 1208 | elif sys.argv[1] =="scrape": 1209 | client = scrape_with(url) 1210 | tagName=sys.argv[3] 1211 | client.update_with_tag(tagName) 1212 | elif sys.argv[1] =="scrape_all": 1213 | client = scrape_with(url) 1214 | client.update_all_scenes_with_tags() 1215 | elif sys.argv[1] =="gallery_all": 1216 | client = scrape_with(url) 1217 | client.update_all_galleries_with_tags() 1218 | elif sys.argv[1] == "performers": 1219 | client= scrape_with(url) 1220 | client.run_update_performers(scraper_preference) 1221 | elif sys.argv[1] == "runperformers": 1222 | client = scrape_with(url) 1223 | client.run_scraper_performers("performer-image-dir") 1224 | elif sys.argv[1]== "api": 1225 | fragment = json.loads(sys.stdin.read()) 1226 | scheme=fragment["server_connection"]["Scheme"] 1227 | port=fragment["server_connection"]["Port"] 1228 | domain="localhost" 1229 | if "Domain" in fragment["server_connection"]: 1230 | domain = fragment["server_connection"]["Domain"] 1231 | if not domain: 1232 | domain='localhost' 1233 | url = scheme + "://" + domain + ":" +str(port) + "/graphql" 1234 | 1235 | client=scrape_with(url) 1236 | mode=fragment["args"]["mode"] 1237 | client.debug("Mode: "+mode) 1238 | if mode == "setup": 1239 | client.setup_tags() 1240 | elif mode == "scrape_all": 1241 | client.update_all_scenes_with_tags() 1242 | elif mode == "performers": 1243 | client.run_update_performers(scraper_preference) 1244 | elif mode.startswith('performers_'): 1245 | client.run_scraper_performers(mode[11:]) 1246 | elif mode.startswith("gallery_all"): 1247 | client.update_all_galleries_with_tags() 1248 | else: 1249 | print("") 1250 | -------------------------------------------------------------------------------- /scrape_with.yml: -------------------------------------------------------------------------------- 1 | name: Scrape With 2 | description: Run fragment based scrapers on fragment based scrapers ie tag a scene with scrape_with_xbvr and start the action. 3 | version: 0.4 4 | url: https://github.com/tweeticoats/scrape-with-plugin 5 | exec: 6 | - python3 7 | - "{pluginDir}/scrape_with.py" 8 | - api 9 | interface: raw 10 | tasks: 11 | - name: Setup tags 12 | description: Create tags used by plugin, ie scrape_with_xbvr 13 | defaultArgs: 14 | mode: setup 15 | - name: run fragment scrapers with tag 16 | description: run scrapers on tagged scenes 17 | defaultArgs: 18 | mode: scrape_all 19 | - name: run fragment galleries scraper with tag 20 | description: run galleries on tagged scenes 21 | defaultArgs: 22 | mode: gallery_all 23 | - name: run performer scrapers 24 | description: Run performers scrapers on performers missing a url in the prefered order 25 | defaultArgs: 26 | mode: performers 27 | - name: run performer image scraper on all performers 28 | description: Run "performer image" scrapers on all performers 29 | defaultArgs: 30 | mode: performers_performer-image-dir 31 | - name: run wikidata scraper on all performers 32 | description: Run "wikidata" scrapers on all performers 33 | defaultArgs: 34 | mode: performers_WikiData 35 | 36 | 37 | 38 | --------------------------------------------------------------------------------