├── IMDBspider ├── __init__.py ├── spiders │ ├── __init__.py │ ├── movies_to_search_for.txt │ └── spider.py ├── pipelines.py ├── items.py ├── settings.py └── middlewares.py ├── Pipfile ├── .gitignore ├── scrapy.cfg ├── README.md └── Pipfile.lock /IMDBspider/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /IMDBspider/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | Scrapy = "==1.7.3" 10 | 11 | [requires] 12 | python_version = "3.7" 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### PyCharm ### 2 | .idea 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # Scrapy stuff: 11 | .scrapy 12 | 13 | 14 | # Environments 15 | .env 16 | .venv 17 | env/ 18 | venv/ 19 | ENV/ 20 | env.bak/ 21 | venv.bak/ -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = IMDBspider.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = IMDBspider 12 | -------------------------------------------------------------------------------- /IMDBspider/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class ImdbspiderPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /IMDBspider/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://docs.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Item, Field 9 | import re 10 | from scrapy.loader.processors import TakeFirst, MapCompose, Join 11 | 12 | 13 | def clean_input(value): 14 | value = value.strip() 15 | if value != '': 16 | return value 17 | 18 | 19 | def clean_release_date(value): 20 | return re.sub('\(|\)', '', value) 21 | 22 | 23 | def clean_cast_members(value): 24 | if value != 'See full cast & crew': 25 | return value 26 | 27 | 28 | class MovieItem(Item): 29 | title = Field(input_processor=MapCompose(clean_input), output_processor=TakeFirst()) 30 | imdb_rating = Field(output_processor=TakeFirst()) 31 | release_date = Field(input_processor=MapCompose(clean_input, clean_release_date), output_processor=TakeFirst()) 32 | director = Field(output_processor=TakeFirst()) 33 | writers = Field(output_processor=Join(', ')) 34 | world_wide_box_office = Field(input_processor=MapCompose(clean_input), output_processor=TakeFirst()) 35 | budget = Field(input_processor=MapCompose(clean_input), output_processor=TakeFirst()) 36 | language = Field(output_processor=TakeFirst()) 37 | runtime = Field(input_processor=MapCompose(clean_input), output_processor=TakeFirst()) 38 | mpaa_rating = Field(input_processor=MapCompose(clean_input), output_processor=TakeFirst()) 39 | main_cast_members = Field(input_processor=MapCompose(clean_cast_members), output_processor=Join(', ')) 40 | meta_score = Field(output_processor=TakeFirst()) 41 | countries = Field(input_processor=MapCompose(clean_input), output_processor=Join(', ')) 42 | genres = Field(input_processor=MapCompose(clean_input), output_processor=Join(', ')) 43 | -------------------------------------------------------------------------------- /IMDBspider/spiders/movies_to_search_for.txt: -------------------------------------------------------------------------------- 1 | The Martian 2 | The Lord of the Rings: The Fellowship of the Ring 3 | The Lord of the Rings: The Two Towers 4 | The Lord of the Rings: The Return of the King 5 | Finding Nemo 6 | The Incredibles 7 | Brooklyn 8 | The Secret Life of Walter Mitty 9 | The Thin Red Line 10 | First Man 11 | The Nice Guys 12 | The Big Short 13 | Memento 14 | The Pianist 15 | Ex Machina 16 | The Social Network 17 | Mindhunter 18 | Zodiac 19 | The Game 20 | Fight Club 21 | Birdman or (The Unexpected Virtue of Ignorance) 22 | The Grand Budapest Hotel 23 | The Founder 24 | Snowden 25 | Whiplash 26 | Chef 27 | Delivery Man 28 | The Departed 29 | There Will Be Blood 30 | All 3 Batman Movies 31 | Django Unchained 32 | The Hateful Eight 33 | Pulp Fiction 34 | Vanilla Sky 35 | A Few Good Men 36 | The Firm 37 | The Darjeeling Limited 38 | Hotel Chevalier 39 | Fantastic Mr. Fox 40 | Moonrise Kingdom 41 | Cousin Ben Troop Screening with Jason Schwartzman 42 | Isle of Dogs 43 | Rushmore 44 | Blood Diamond 45 | Catch Me If You Can 46 | Schindler's List 47 | Spirited Away 48 | Saving Private Ryan 49 | Taxi Driver 50 | Requiem for a Dream 51 | Eternal Sunshine of the Spotless Mind 52 | The Truman Show 53 | Gladiator 54 | Gran Torino 55 | oh brother where art thou 56 | Donnie Darko 57 | Into the Wild 58 | Punch-Drunk Love 59 | Doubt 60 | In Bruges 61 | Good Will Hunting 62 | Ratatouille 63 | A Beautiful Mind 64 | Dead Poets Society 65 | Big Fish 66 | Children of Men 67 | Monsters, Inc. 68 | Mulholland Drive 69 | Little Miss Sunshine 70 | Drive 71 | Moon 72 | The Sixth Sense 73 | The Machinist 74 | Stand by Me 75 | Collateral 76 | Gravity 77 | The Big Lebowski 78 | The Revenant 79 | Slumdog Millionaire 80 | Gone Girl 81 | Gattaca 82 | 3:10 to Yuma 83 | Cinderella Man 84 | The Last King of Scotland 85 | Ben-Hur 86 | Philadelphia 87 | JFK 88 | Deadpool 89 | The Adventures of Tintin 90 | American History X 91 | Three Billboards Outside Ebbing, Missouri 92 | Chinatown 93 | 2001: A Space Odyssey 94 | A Serious Man 95 | Barton Fink 96 | A Clockwork Orange 97 | Full Metal Jacket 98 | Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb 99 | The English Patient 100 | Days of Heaven 101 | The Assassination of Jesse James by the Coward Robert Ford 102 | The Talented Mr. Ripley -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | IMDB Spider 2 | =========== 3 | 4 | ## Notes 5 | This spider is written in python using the Scrapy framework. It is licensed under the MIT lisense. Do with it what you will. If you'd like to contribute please send me a PR. 6 | 7 | One thing to note if you do use this project, be aware that I'm not sure of the legality of this in a professional setting, so use it at your own risk. Also be aware that you can get your ip address banned from websites if you scrap. There are ways of mitigating this risk such as adding delay between requests, which I've built into this by default, but nonetheless there is still risk. 8 | 9 | To run this code you need to have [Python 3.7](https://www.python.org/downloads/) installed on your computer and you need to have [Scrapy 1.7 installed](http://scrapy.org/). Also, included is a pipfile so you can use pip env with this project. install by running pipenv install inside the project directory. 10 | 11 | ## The Current State 12 | This spider was last updated in 2019 to search for a list of movies using the IMDB search engine and pull data into a pipeline. 13 | This data can then be exported as a CSV or as JSON using Scrapy's built in export system using `scrapy crawl -o file.csv -t csv` 14 | 15 | 16 | ## License: 17 | 18 | The MIT License 19 | 20 | The MIT License (MIT) 21 | Copyright (c) 2014 Alex Black 22 | 23 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 24 | 25 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 30 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 32 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 33 | THE SOFTWARE. 34 | -------------------------------------------------------------------------------- /IMDBspider/spiders/spider.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from scrapy.loader import ItemLoader 3 | from ..items import MovieItem 4 | 5 | 6 | class IMDBspider(scrapy.Spider): 7 | name = "IMDBspider" 8 | allowed_domains = ["imdb.com"] 9 | 10 | def start_requests(self): 11 | # movie_titles = ['The Incredibles'] 12 | with open('/Users/alexwhb/Desktop/IMDBspider/IMDBspider/spiders/movies_to_search_for.txt') as f: 13 | movie_titles = list(f) 14 | 15 | for title in movie_titles: 16 | search_url = f'https://imdb.com/find?q={title.strip()}' 17 | 18 | yield scrapy.Request(url=search_url, callback=self.parse_search_results) 19 | 20 | # for now this method just parses the top 250 IMDB page and has a callbck request to each title link 21 | # so I can parse more film info per film. 22 | def parse_search_results(self, response): 23 | l = ItemLoader(item=MovieItem(), response=response) 24 | l.add_xpath('title', '//*[@id="main"]/div/div[2]/table/tr[1]/td[2]/a/text()') 25 | l.add_xpath('release_date', '//*[@id="main"]/div/div[2]/table/tr[1]/td[2]/text()') 26 | 27 | href = response.xpath('//*[@id="main"]/div/div[2]/table/tr[1]/td[2]/a/@href').extract()[0] 28 | url_to_film_long_description = f'http://imdb.com{href}' 29 | 30 | yield scrapy.Request(url=url_to_film_long_description, callback=self.parse_film_description, 31 | meta={'movie_item': l.load_item()}) 32 | 33 | def parse_film_description(self, response): 34 | l = ItemLoader(item=response.meta['movie_item'], response=response) 35 | 36 | l.add_xpath('director', '//*[@id="title-overview-widget"]/div[2]/div[1]/div[2]/a/text()') 37 | l.add_xpath('writers', '//*[@id="title-overview-widget"]/div[2]/div[1]/div[3]/a/text()') 38 | l.add_xpath('main_cast_members', '//*[@id="title-overview-widget"]/div[2]/div[1]/div[4]/a/text()') 39 | l.add_xpath('meta_score', '//*[@id="title-overview-widget"]/div[2]/div[3]/div[1]/a/div/span/text()') 40 | l.add_xpath('mpaa_rating', '//*[@id="title-overview-widget"]/div[1]/div[2]/div/div[2]/div[2]/div/text()') 41 | l.add_xpath('runtime', '//*[@id="title-overview-widget"]/div[1]/div[2]/div/div[2]/div[2]/div/time/text()') 42 | l.add_xpath('imdb_rating', 43 | '//*[@id="title-overview-widget"]/div[1]/div[2]/div/div[1]/div[1]/div[1]/strong/span/text()') 44 | l.add_xpath('genres', '//*[@id="titleStoryLine"]/div[4]/a/text()') 45 | l.add_xpath('budget', '//*[@id="titleDetails"]/div[7]/text()') 46 | l.add_xpath('world_wide_box_office', '//*[@id="titleDetails"]/div[10]/text()') 47 | l.add_xpath('language', '//*[@id="titleDetails"]/div[3]/a[1]/text()') 48 | l.add_xpath('countries', '//*[@id="titleDetails"]/div[2]/a/text()') 49 | yield l.load_item() 50 | -------------------------------------------------------------------------------- /IMDBspider/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for IMDBspider project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # https://docs.scrapy.org/en/latest/topics/settings.html 9 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 10 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'IMDBspider' 13 | 14 | SPIDER_MODULES = ['IMDBspider.spiders'] 15 | NEWSPIDER_MODULE = 'IMDBspider.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'IMDBspider (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = False 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | #CONCURRENT_REQUESTS = 32 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | DOWNLOAD_DELAY = 3 31 | # The download delay setting will honor only one of: 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | #COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'IMDBspider.middlewares.ImdbspiderSpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'IMDBspider.middlewares.ImdbspiderDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See https://docs.scrapy.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html 67 | #ITEM_PIPELINES = { 68 | # 'IMDBspider.pipelines.ImdbspiderPipeline': 300, 69 | #} 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html 73 | #AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | #AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | -------------------------------------------------------------------------------- /IMDBspider/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class ImdbspiderSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Request, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | 58 | 59 | class ImdbspiderDownloaderMiddleware(object): 60 | # Not all methods need to be defined. If a method is not defined, 61 | # scrapy acts as if the downloader middleware does not modify the 62 | # passed objects. 63 | 64 | @classmethod 65 | def from_crawler(cls, crawler): 66 | # This method is used by Scrapy to create your spiders. 67 | s = cls() 68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 69 | return s 70 | 71 | def process_request(self, request, spider): 72 | # Called for each request that goes through the downloader 73 | # middleware. 74 | 75 | # Must either: 76 | # - return None: continue processing this request 77 | # - or return a Response object 78 | # - or return a Request object 79 | # - or raise IgnoreRequest: process_exception() methods of 80 | # installed downloader middleware will be called 81 | return None 82 | 83 | def process_response(self, request, response, spider): 84 | # Called with the response returned from the downloader. 85 | 86 | # Must either; 87 | # - return a Response object 88 | # - return a Request object 89 | # - or raise IgnoreRequest 90 | return response 91 | 92 | def process_exception(self, request, exception, spider): 93 | # Called when a download handler or a process_request() 94 | # (from other downloader middleware) raises an exception. 95 | 96 | # Must either: 97 | # - return None: continue processing this exception 98 | # - return a Response object: stops process_exception() chain 99 | # - return a Request object: stops process_exception() chain 100 | pass 101 | 102 | def spider_opened(self, spider): 103 | spider.logger.info('Spider opened: %s' % spider.name) 104 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "6a5bdd9ce75b9b99da35626782a182dac290b169e7aede6365d42885d60c7db6" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "attrs": { 20 | "hashes": [ 21 | "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", 22 | "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" 23 | ], 24 | "version": "==20.3.0" 25 | }, 26 | "automat": { 27 | "hashes": [ 28 | "sha256:7979803c74610e11ef0c0d68a2942b152df52da55336e0c9d58daf1831cbdf33", 29 | "sha256:b6feb6455337df834f6c9962d6ccf771515b7d939bca142b29c20c2376bc6111" 30 | ], 31 | "version": "==20.2.0" 32 | }, 33 | "cffi": { 34 | "hashes": [ 35 | "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813", 36 | "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06", 37 | "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea", 38 | "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee", 39 | "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396", 40 | "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73", 41 | "sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315", 42 | "sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1", 43 | "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49", 44 | "sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892", 45 | "sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482", 46 | "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058", 47 | "sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5", 48 | "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53", 49 | "sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045", 50 | "sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3", 51 | "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5", 52 | "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e", 53 | "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c", 54 | "sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369", 55 | "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827", 56 | "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053", 57 | "sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa", 58 | "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4", 59 | "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322", 60 | "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132", 61 | "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62", 62 | "sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa", 63 | "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0", 64 | "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396", 65 | "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e", 66 | "sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991", 67 | "sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6", 68 | "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1", 69 | "sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406", 70 | "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d", 71 | "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c" 72 | ], 73 | "version": "==1.14.5" 74 | }, 75 | "constantly": { 76 | "hashes": [ 77 | "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35", 78 | "sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d" 79 | ], 80 | "version": "==15.1.0" 81 | }, 82 | "cryptography": { 83 | "hashes": [ 84 | "sha256:0f1212a66329c80d68aeeb39b8a16d54ef57071bf22ff4e521657b27372e327d", 85 | "sha256:1e056c28420c072c5e3cb36e2b23ee55e260cb04eee08f702e0edfec3fb51959", 86 | "sha256:240f5c21aef0b73f40bb9f78d2caff73186700bf1bc6b94285699aff98cc16c6", 87 | "sha256:26965837447f9c82f1855e0bc8bc4fb910240b6e0d16a664bb722df3b5b06873", 88 | "sha256:37340614f8a5d2fb9aeea67fd159bfe4f5f4ed535b1090ce8ec428b2f15a11f2", 89 | "sha256:3d10de8116d25649631977cb37da6cbdd2d6fa0e0281d014a5b7d337255ca713", 90 | "sha256:3d8427734c781ea5f1b41d6589c293089704d4759e34597dce91014ac125aad1", 91 | "sha256:7ec5d3b029f5fa2b179325908b9cd93db28ab7b85bb6c1db56b10e0b54235177", 92 | "sha256:8e56e16617872b0957d1c9742a3f94b43533447fd78321514abbe7db216aa250", 93 | "sha256:de4e5f7f68220d92b7637fc99847475b59154b7a1b3868fb7385337af54ac9ca", 94 | "sha256:eb8cc2afe8b05acbd84a43905832ec78e7b3873fb124ca190f574dca7389a87d", 95 | "sha256:ee77aa129f481be46f8d92a1a7db57269a2f23052d5f2433b4621bb457081cc9" 96 | ], 97 | "version": "==3.4.7" 98 | }, 99 | "cssselect": { 100 | "hashes": [ 101 | "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf", 102 | "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc" 103 | ], 104 | "version": "==1.1.0" 105 | }, 106 | "hyperlink": { 107 | "hashes": [ 108 | "sha256:427af957daa58bc909471c6c40f74c5450fa123dd093fc53efd2e91d2705a56b", 109 | "sha256:e6b14c37ecb73e89c77d78cdb4c2cc8f3fb59a885c5b3f819ff4ed80f25af1b4" 110 | ], 111 | "version": "==21.0.0" 112 | }, 113 | "idna": { 114 | "hashes": [ 115 | "sha256:5205d03e7bcbb919cc9c19885f9920d622ca52448306f2377daede5cf3faac16", 116 | "sha256:c5b02147e01ea9920e6b0a3f1f7bb833612d507592c837a6c49552768f4054e1" 117 | ], 118 | "version": "==3.1" 119 | }, 120 | "incremental": { 121 | "hashes": [ 122 | "sha256:02f5de5aff48f6b9f665d99d48bfc7ec03b6e3943210de7cfc88856d755d6f57", 123 | "sha256:92014aebc6a20b78a8084cdd5645eeaa7f74b8933f70fa3ada2cfbd1e3b54321" 124 | ], 125 | "version": "==21.3.0" 126 | }, 127 | "lxml": { 128 | "hashes": [ 129 | "sha256:079f3ae844f38982d156efce585bc540c16a926d4436712cf4baee0cce487a3d", 130 | "sha256:0fbcf5565ac01dff87cbfc0ff323515c823081c5777a9fc7703ff58388c258c3", 131 | "sha256:122fba10466c7bd4178b07dba427aa516286b846b2cbd6f6169141917283aae2", 132 | "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f", 133 | "sha256:26e761ab5b07adf5f555ee82fb4bfc35bf93750499c6c7614bd64d12aaa67927", 134 | "sha256:289e9ca1a9287f08daaf796d96e06cb2bc2958891d7911ac7cae1c5f9e1e0ee3", 135 | "sha256:2a9d50e69aac3ebee695424f7dbd7b8c6d6eb7de2a2eb6b0f6c7db6aa41e02b7", 136 | "sha256:33bb934a044cf32157c12bfcfbb6649807da20aa92c062ef51903415c704704f", 137 | "sha256:3439c71103ef0e904ea0a1901611863e51f50b5cd5e8654a151740fde5e1cade", 138 | "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468", 139 | "sha256:4289728b5e2000a4ad4ab8da6e1db2e093c63c08bdc0414799ee776a3f78da4b", 140 | "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4", 141 | "sha256:542d454665a3e277f76954418124d67516c5f88e51a900365ed54a9806122b83", 142 | "sha256:5a0a14e264069c03e46f926be0d8919f4105c1623d620e7ec0e612a2e9bf1c04", 143 | "sha256:66e575c62792c3f9ca47cb8b6fab9e35bab91360c783d1606f758761810c9791", 144 | "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51", 145 | "sha256:7610b8c31688f0b1be0ef882889817939490a36d0ee880ea562a4e1399c447a1", 146 | "sha256:76fa7b1362d19f8fbd3e75fe2fb7c79359b0af8747e6f7141c338f0bee2f871a", 147 | "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f", 148 | "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee", 149 | "sha256:820628b7b3135403540202e60551e741f9b6d3304371712521be939470b454ec", 150 | "sha256:884ab9b29feaca361f7f88d811b1eea9bfca36cf3da27768d28ad45c3ee6f969", 151 | "sha256:89b8b22a5ff72d89d48d0e62abb14340d9e99fd637d046c27b8b257a01ffbe28", 152 | "sha256:92e821e43ad382332eade6812e298dc9701c75fe289f2a2d39c7960b43d1e92a", 153 | "sha256:b007cbb845b28db4fb8b6a5cdcbf65bacb16a8bd328b53cbc0698688a68e1caa", 154 | "sha256:bc4313cbeb0e7a416a488d72f9680fffffc645f8a838bd2193809881c67dd106", 155 | "sha256:bccbfc27563652de7dc9bdc595cb25e90b59c5f8e23e806ed0fd623755b6565d", 156 | "sha256:c4f05c5a7c49d2fb70223d0d5bcfbe474cf928310ac9fa6a7c6dddc831d0b1d4", 157 | "sha256:ce256aaa50f6cc9a649c51be3cd4ff142d67295bfc4f490c9134d0f9f6d58ef0", 158 | "sha256:d2e35d7bf1c1ac8c538f88d26b396e73dd81440d59c1ef8522e1ea77b345ede4", 159 | "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2", 160 | "sha256:efac139c3f0bf4f0939f9375af4b02c5ad83a622de52d6dfa8e438e8e01d0eb0", 161 | "sha256:efd7a09678fd8b53117f6bae4fa3825e0a22b03ef0a932e070c0bdbb3a35e654", 162 | "sha256:f2380a6376dfa090227b663f9678150ef27543483055cc327555fb592c5967e2", 163 | "sha256:f8380c03e45cf09f8557bdaa41e1fa7c81f3ae22828e1db470ab2a6c96d8bc23", 164 | "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586" 165 | ], 166 | "index": "pypi", 167 | "version": "==4.6.3" 168 | }, 169 | "parsel": { 170 | "hashes": [ 171 | "sha256:70efef0b651a996cceebc69e55a85eb2233be0890959203ba7c3a03c72725c79", 172 | "sha256:9e1fa8db1c0b4a878bf34b35c043d89c9d1cbebc23b4d34dbc3c0ec33f2e087d" 173 | ], 174 | "version": "==1.6.0" 175 | }, 176 | "pyasn1": { 177 | "hashes": [ 178 | "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", 179 | "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba" 180 | ], 181 | "version": "==0.4.8" 182 | }, 183 | "pyasn1-modules": { 184 | "hashes": [ 185 | "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e", 186 | "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74" 187 | ], 188 | "version": "==0.2.8" 189 | }, 190 | "pycparser": { 191 | "hashes": [ 192 | "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", 193 | "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" 194 | ], 195 | "version": "==2.20" 196 | }, 197 | "pydispatcher": { 198 | "hashes": [ 199 | "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf", 200 | "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433" 201 | ], 202 | "version": "==2.0.5" 203 | }, 204 | "pyopenssl": { 205 | "hashes": [ 206 | "sha256:4c231c759543ba02560fcd2480c48dcec4dae34c9da7d3747c508227e0624b51", 207 | "sha256:818ae18e06922c066f777a33f1fca45786d85edfe71cd043de6379337a7f274b" 208 | ], 209 | "version": "==20.0.1" 210 | }, 211 | "queuelib": { 212 | "hashes": [ 213 | "sha256:42b413295551bdc24ed9376c1a2cd7d0b1b0fa4746b77b27ca2b797a276a1a17", 214 | "sha256:ff43b5b74b9266f8df4232a8f768dc4d67281a271905e2ed4a3689d4d304cd02" 215 | ], 216 | "version": "==1.5.0" 217 | }, 218 | "scrapy": { 219 | "hashes": [ 220 | "sha256:1d2b2672049c3e7ebcab9736bbf759ec32d4cbf598292067737806453c208f95", 221 | "sha256:fe5a40177960e97d42d1c752a73edb40f76a85a24076dec8535cffa499eb08c8" 222 | ], 223 | "index": "pypi", 224 | "version": "==1.7.3" 225 | }, 226 | "service-identity": { 227 | "hashes": [ 228 | "sha256:001c0707759cb3de7e49c078a7c0c9cd12594161d3bf06b9c254fdcb1a60dc36", 229 | "sha256:0858a54aabc5b459d1aafa8a518ed2081a285087f349fe3e55197989232e2e2d" 230 | ], 231 | "version": "==18.1.0" 232 | }, 233 | "six": { 234 | "hashes": [ 235 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 236 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 237 | ], 238 | "version": "==1.15.0" 239 | }, 240 | "twisted": { 241 | "hashes": [ 242 | "sha256:77544a8945cf69b98d2946689bbe0c75de7d145cdf11f391dd487eae8fc95a12", 243 | "sha256:aab38085ea6cda5b378b519a0ec99986874921ee8881318626b0a3414bb2631e" 244 | ], 245 | "markers": "python_version != '3.4'", 246 | "version": "==21.2.0" 247 | }, 248 | "w3lib": { 249 | "hashes": [ 250 | "sha256:0161d55537063e00d95a241663ede3395c4c6d7b777972ba2fd58bbab2001e53", 251 | "sha256:0ad6d0203157d61149fd45aaed2e24f53902989c32fc1dccc2e2bfba371560df" 252 | ], 253 | "version": "==1.22.0" 254 | }, 255 | "zope.interface": { 256 | "hashes": [ 257 | "sha256:02d3535aa18e34ce97c58d241120b7554f7d1cf4f8002fc9675cc7e7745d20e8", 258 | "sha256:0378a42ec284b65706d9ef867600a4a31701a0d6773434e6537cfc744e3343f4", 259 | "sha256:07d289358a8c565ea09e426590dd1179f93cf5ac3dd17d43fcc4fc63c1a9d275", 260 | "sha256:0e6cdbdd94ae94d1433ab51f46a76df0f2cd041747c31baec1c1ffa4e76bd0c1", 261 | "sha256:11354fb8b8bdc5cdd66358ed4f1f0ce739d78ff6d215d33b8f3ae282258c0f11", 262 | "sha256:12588a46ae0a99f172c4524cbbc3bb870f32e0f8405e9fa11a5ef3fa3a808ad7", 263 | "sha256:16caa44a06f6b0b2f7626ced4b193c1ae5d09c1b49c9b4962c93ae8aa2134f55", 264 | "sha256:18c478b89b6505756f007dcf76a67224a23dcf0f365427742ed0c0473099caa4", 265 | "sha256:221b41442cf4428fcda7fc958c9721c916709e2a3a9f584edd70f1493a09a762", 266 | "sha256:26109c50ccbcc10f651f76277cfc05fba8418a907daccc300c9247f24b3158a2", 267 | "sha256:28d8157f8c77662a1e0796a7d3cfa8910289131d4b4dd4e10b2686ab1309b67b", 268 | "sha256:2c51689b7b40c7d9c7e8a678350e73dc647945a13b4e416e7a02bbf0c37bdb01", 269 | "sha256:2ec58e1e1691dde4fbbd97f8610de0f8f1b1a38593653f7d3b8e931b9cd6d67f", 270 | "sha256:416feb6500f7b6fc00d32271f6b8495e67188cb5eb51fc8e289b81fdf465a9cb", 271 | "sha256:520352b18adea5478bbf387e9c77910a914985671fe36bc5ef19fdcb67a854bc", 272 | "sha256:527415b5ca201b4add44026f70278fbc0b942cf0801a26ca5527cb0389b6151e", 273 | "sha256:54243053316b5eec92affe43bbace7c8cd946bc0974a4aa39ff1371df0677b22", 274 | "sha256:61b8454190b9cc87279232b6de28dee0bad040df879064bb2f0e505cda907918", 275 | "sha256:672668729edcba0f2ee522ab177fcad91c81cfce991c24d8767765e2637d3515", 276 | "sha256:67aa26097e194947d29f2b5a123830e03da1519bcce10cac034a51fcdb99c34f", 277 | "sha256:6e7305e42b5f54e5ccf51820de46f0a7c951ba7cb9e3f519e908545b0f5628d0", 278 | "sha256:7234ac6782ca43617de803735949f79b894f0c5d353fbc001d745503c69e6d1d", 279 | "sha256:7426bea25bdf92f00fa52c7b30fcd2a2f71c21cf007178971b1f248b6c2d3145", 280 | "sha256:74b331c5d5efdddf5bbd9e1f7d8cb91a0d6b9c4ba45ca3e9003047a84dca1a3b", 281 | "sha256:79b6db1a18253db86e9bf1e99fa829d60fd3fc7ac04f4451c44e4bdcf6756a42", 282 | "sha256:7d79cd354ae0a033ac7b86a2889c9e8bb0bb48243a6ed27fc5064ce49b842ada", 283 | "sha256:823d1b4a6a028b8327e64865e2c81a8959ae9f4e7c9c8e0eec814f4f9b36b362", 284 | "sha256:8715717a5861932b7fe7f3cbd498c82ff4132763e2fea182cc95e53850394ec1", 285 | "sha256:89a6091f2d07936c8a96ce56f2000ecbef20fb420a94845e7d53913c558a6378", 286 | "sha256:8af4b3116e4a37059bc8c7fe36d4a73d7c1d8802a1d8b6e549f1380d13a40160", 287 | "sha256:8b4b0034e6c7f30133fa64a1cc276f8f1a155ef9529e7eb93a3c1728b40c0f5c", 288 | "sha256:92195df3913c1de80062635bf64cd7bd0d0934a7fa1689b6d287d1cbbd16922c", 289 | "sha256:96c2e68385f3848d58f19b2975a675532abdb65c8fa5f04d94b95b27b6b1ffa7", 290 | "sha256:9c7044dbbf8c58420a9ef4ed6901f5a8b7698d90cd984d7f57a18c78474686f6", 291 | "sha256:a1937efed7e3fe0ee74630e1960df887d8aa83c571e1cf4db9d15b9c181d457d", 292 | "sha256:a38c10423a475a1658e2cb8f52cf84ec20a4c0adff724dd43a6b45183f499bc1", 293 | "sha256:a413c424199bcbab71bf5fa7538246f27177fbd6dd74b2d9c5f34878658807f8", 294 | "sha256:b18a855f8504743e0a2d8b75d008c7720d44e4c76687e13f959e35d9a13eb397", 295 | "sha256:b4d59ab3608538e550a72cea13d3c209dd72b6e19e832688da7884081c01594e", 296 | "sha256:b51d3f1cd87f488455f43046d72003689024b0fa9b2d53635db7523033b19996", 297 | "sha256:c02105deda867d09cdd5088d08708f06d75759df6f83d8f7007b06f422908a30", 298 | "sha256:c7b6032dc4490b0dcaf078f09f5b382dc35493cb7f473840368bf0de3196c2b6", 299 | "sha256:c95b355dba2aaf5177dff943b25ded0529a7feb80021d5fdb114a99f0a1ef508", 300 | "sha256:c980ae87863d76b1ea9a073d6d95554b4135032d34bc541be50c07d4a085821b", 301 | "sha256:d12895cd083e35e9e032eb4b57645b91116f8979527381a8d864d1f6b8cb4a2e", 302 | "sha256:d3cd9bad547a8e5fbe712a1dc1413aff1b917e8d39a2cd1389a6f933b7a21460", 303 | "sha256:e8809b01f27f679e3023b9e2013051e0a3f17abff4228cb5197663afd8a0f2c7", 304 | "sha256:f3c37b0dc1898e305aad4f7a1d75f6da83036588c28a9ce0afc681ff5245a601", 305 | "sha256:f966765f54b536e791541458de84a737a6adba8467190f17a8fe7f85354ba908", 306 | "sha256:fa939c2e2468142c9773443d4038e7c915b0cc1b670d3c9192bdc503f7ea73e9", 307 | "sha256:fcc5c1f95102989d2e116ffc8467963554ce89f30a65a3ea86a4d06849c498d8" 308 | ], 309 | "version": "==5.3.0" 310 | } 311 | }, 312 | "develop": {} 313 | } 314 | --------------------------------------------------------------------------------