├── .DS_Store ├── .gitignore ├── Daguerre ├── .DS_Store ├── Daguerre │ ├── __init__.py │ ├── items.py │ ├── middlewares.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ │ ├── __init__.py │ │ └── superspider.py ├── Run.py ├── out │ └── .DS_Store └── scrapy.cfg ├── LICENSE ├── README.md └── image ├── sbs002.png ├── sbs003.png ├── sbs004.png ├── sbs005.png ├── sbs006.png ├── sbs007.png ├── sbs008.png ├── sbs009.png ├── sbs01.png ├── sbs010.png ├── sbs011.png ├── sbs012.png ├── sbs013.png ├── sbs014.png ├── sbs015.png ├── sbs016.png ├── sbs017.png ├── sbs018.png ├── sbs019.png ├── sbs020.png ├── sbs021.png ├── sbs022.png ├── sbs023.png ├── sbs024.gif ├── sbs025.png ├── sbs026.png ├── sbs027.png ├── sbs028.png ├── sbs029.png ├── sbs030.png ├── sbs031.png ├── sbs032.png ├── sbs033.png ├── sbs034.png ├── sbs035.png ├── sbs036.png ├── sbs037.png ├── sbs038.png ├── sbs039.png ├── sbs040.gif ├── sbs0401.gif ├── sbs041.png ├── sbs042.png ├── sbs043.png ├── sbs044.png ├── sbs045.png ├── sbs046.jpg ├── sbs047.jpg ├── sbs099.gif └── sbs0991.gif /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .idea/ -------------------------------------------------------------------------------- /Daguerre/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/Daguerre/.DS_Store -------------------------------------------------------------------------------- /Daguerre/Daguerre/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/Daguerre/Daguerre/__init__.py -------------------------------------------------------------------------------- /Daguerre/Daguerre/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Item, Field 9 | 10 | 11 | class DaguerrePostItem(Item): 12 | post_id = Field() 13 | post_title = Field() 14 | post_url = Field() 15 | post_image_list = Field() 16 | -------------------------------------------------------------------------------- /Daguerre/Daguerre/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class DaguerreSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | 58 | 59 | class DaguerreDownloaderMiddleware(object): 60 | # Not all methods need to be defined. If a method is not defined, 61 | # scrapy acts as if the downloader middleware does not modify the 62 | # passed objects. 63 | 64 | @classmethod 65 | def from_crawler(cls, crawler): 66 | # This method is used by Scrapy to create your spiders. 67 | s = cls() 68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 69 | return s 70 | 71 | def process_request(self, request, spider): 72 | # Called for each request that goes through the downloader 73 | # middleware. 74 | 75 | # Must either: 76 | # - return None: continue processing this request 77 | # - or return a Response object 78 | # - or return a Request object 79 | # - or raise IgnoreRequest: process_exception() methods of 80 | # installed downloader middleware will be called 81 | return None 82 | 83 | def process_response(self, request, response, spider): 84 | # Called with the response returned from the downloader. 85 | 86 | # Must either; 87 | # - return a Response object 88 | # - return a Request object 89 | # - or raise IgnoreRequest 90 | return response 91 | 92 | def process_exception(self, request, exception, spider): 93 | # Called when a download handler or a process_request() 94 | # (from other downloader middleware) raises an exception. 95 | 96 | # Must either: 97 | # - return None: continue processing this exception 98 | # - return a Response object: stops process_exception() chain 99 | # - return a Request object: stops process_exception() chain 100 | pass 101 | 102 | def spider_opened(self, spider): 103 | spider.logger.info('Spider opened: %s' % spider.name) 104 | -------------------------------------------------------------------------------- /Daguerre/Daguerre/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | import pymongo 9 | import logging 10 | from Daguerre.items import DaguerrePostItem 11 | 12 | 13 | class DaguerrePipeline(object): 14 | def __init__(self): 15 | clinet = pymongo.MongoClient("localhost", 27017) 16 | db = clinet["Daguerre"] 17 | self.table = db["postTable"] 18 | 19 | def process_item(self, item, spider): 20 | if isinstance(item, DaguerrePostItem): 21 | try: 22 | self.table.insert(dict(item)) 23 | except Exception as e: 24 | logging.error("PIPLINE EXCEPTION: " + str(e)) 25 | return item 26 | -------------------------------------------------------------------------------- /Daguerre/Daguerre/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for Daguerre project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # https://doc.scrapy.org/en/latest/topics/settings.html 9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html 10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'Daguerre' 13 | 14 | SPIDER_MODULES = ['Daguerre.spiders'] 15 | NEWSPIDER_MODULE = 'Daguerre.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'Daguerre (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = True 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | CONCURRENT_REQUESTS = 32 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | DOWNLOAD_DELAY = 1 31 | # The download delay setting will honor only one of: 32 | # CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | #COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'Daguerre.middlewares.DaguerreSpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'Daguerre.middlewares.DaguerreDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See https://doc.scrapy.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html 67 | ITEM_PIPELINES = { 68 | 'Daguerre.pipelines.DaguerrePipeline': 300, 69 | } 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html 73 | #AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | #AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | #AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | 92 | 93 | ROOT_URL = "http://bc.ghuws.men/" 94 | 95 | LOCAL_FILE_ROOT = "/Users/XXX/XXX/DaguerreSpider/Daguerre/out/" 96 | 97 | MAX_PAGES = 3 -------------------------------------------------------------------------------- /Daguerre/Daguerre/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Daguerre/Daguerre/spiders/superspider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | import os 4 | from scrapy import Request 5 | from bs4 import BeautifulSoup 6 | from Daguerre.settings import ROOT_URL, LOCAL_FILE_ROOT, MAX_PAGES 7 | from Daguerre.items import DaguerrePostItem 8 | 9 | 10 | class SuperspiderSpider(scrapy.Spider): 11 | root_url = ROOT_URL 12 | local_file_root = LOCAL_FILE_ROOT 13 | max_pages = MAX_PAGES 14 | name = 'superspider' 15 | allowed_domains = ['bc.ghuws.men'] 16 | start_urls = ['http://bc.ghuws.men/thread0806.php?fid=16&search=&page=1'] 17 | 18 | def parse(self, response): 19 | content = response.body 20 | soup = BeautifulSoup(content, "html.parser") 21 | a_list = soup.find_all('a', attrs={'href': True, 'id': True}) 22 | for item in a_list: 23 | temp_result = item['href'].split('/') 24 | if len(temp_result) == 4: 25 | year_month = temp_result[2] 26 | post_id = temp_result[3].split('.')[0] 27 | if int(year_month) > 1800 and len(post_id) > 6: 28 | post_url = self.root_url + item['href'] 29 | yield Request(url=post_url, callback=self.parse_post_page, meta={'post_id': post_id}) 30 | cur_page = int(response.url.split('=')[-1]) 31 | next_page = cur_page + 1 32 | if next_page <= MAX_PAGES: 33 | next_page_url = response.url[:-len(str(cur_page))] + str(next_page) 34 | yield Request(url=next_page_url, callback=self.parse) 35 | 36 | def parse_post_page(self, response): 37 | content = response.body 38 | soup = BeautifulSoup(content, "html.parser") 39 | temp_title_list = soup.find_all('h4') 40 | post_id = response.meta['post_id'] 41 | post_url = response.url 42 | post_title = "" 43 | if len(temp_title_list) != 0: 44 | post_title = temp_title_list[0].text 45 | # print(post_title + " URL: " + response.url) 46 | temp_img_list = soup.find_all('input', attrs={'type': 'image'}) 47 | img_list = [] 48 | post_image_list = [] 49 | for i in range(len(temp_img_list)): 50 | if i == 1: 51 | src1 = temp_img_list[0]['src'].split('/')[2] 52 | src2 = temp_img_list[1]['src'].split('/')[2] 53 | if src1 != src2: 54 | img_list.remove(temp_img_list[0]) 55 | img_list.append(temp_img_list[i]) 56 | for item in img_list: 57 | image_url = item['src'] 58 | post_image_list.append(item['src']) 59 | yield Request(url=image_url, callback=self.down_load_image, 60 | meta={'post_id': post_id, 'post_title': post_title, 'index': img_list.index(item)}, 61 | dont_filter=True) 62 | item = DaguerrePostItem() 63 | item['post_id'] = post_id 64 | item['post_title'] = post_title 65 | item['post_url'] = post_url 66 | item['post_image_list'] = post_image_list 67 | yield item 68 | 69 | def down_load_image(self, response): 70 | content = response.body 71 | index = response.meta['index'] 72 | post_id = response.meta['post_id'] 73 | post_title = response.meta['post_title'] 74 | pic_format = response.url.split('.')[-1] 75 | # 创建本地目录的操作 76 | if response.status == 200: 77 | file_dir = self.local_file_root + post_title + "/" 78 | filename = file_dir + str(index) + "." + pic_format 79 | exist = os.path.exists(file_dir) 80 | if not exist: 81 | os.makedirs(file_dir) 82 | # 写入文件的操作 83 | with open(filename, 'xb') as file: 84 | file.write(content) 85 | 86 | 87 | -------------------------------------------------------------------------------- /Daguerre/Run.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | __author__ = 'lianggao' 3 | __date__ = '2018/5/11 上午11:27' 4 | 5 | from scrapy import cmdline 6 | 7 | 8 | def main(): 9 | cmdline.execute("scrapy crawl superspider".split()) 10 | 11 | 12 | if __name__ == '__main__': 13 | main() -------------------------------------------------------------------------------- /Daguerre/out/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/Daguerre/out/.DS_Store -------------------------------------------------------------------------------- /Daguerre/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = Daguerre.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = Daguerre 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 SwyftG 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > 发出前两篇Python实战的文章之后,有同学和我反映:**你的想法很牛逼,可是我就是看不懂你写的是什么,我Python不熟悉,看起来有点吃力**。我细细一琢磨,这点是个问题。对于熟悉Python的同学,能够看懂我思路,但是对于那些没有Python基础,或者对Python不熟悉的同学,这样直接扔过来,可能会让他们失望而归。所以,这回我弄了一期手把手的实战教程,同时,在文章中遇到的知识点,还会有提供链接。完全对新手有好。 2 | 3 | 在前两篇Python实战[「用代码来访问1024网站」](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483753&idx=1&sn=8df6c2a190201826f6f860659ad4af9e&chksm=eaec4ef5dd9bc7e39e8d48134795f6c0173c4614c615d0dcaaa38d937f4394aee77a978d70b1#rd)和[「用Scrapy编写“1024网站种子吞噬爬虫”」](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483776&idx=1&sn=50609d6dcf9c2c2fd80addb2d190fff2&chksm=eaec4e1cdd9bc70ab1ef74f7ae64bb3d619c9a09f2e2f0a676bbac33aa66260b7d566cb85154#rd)收到了大家的一致好评,可能文章写得比较匆忙,有些术语可能对于Python的初级玩家不是很好理解。所以,我特别准备了一下,用超级详细的解说,细化到每一步,提供查询链接等方法,为Python初级玩家,Python小白和对Scrapy框架不熟悉的同学,的制作了这篇手把手Python实战教程:用Scrapy爬取下载达盖尔社区的资源。 4 | 5 | 好了,废话不多说,学习代码就是要学以致用的。不能写了一遍代码就让代码吃灰。下面就跟我一起来搞吧。 6 | 7 | 小草网站是个好网站,我们这次实战的结果,是要把“达盖尔旗帜”里面的帖子爬取下来,将帖子的图片保存到本地,同时将帖子的一些相关信息,写入到本地的MongoDB中。这么乍一听,感觉我们做的事情好像挺多的,别慌,我带你慢慢的一步一步来搞起,问题不是很大。 8 | 9 | ### 手把手 Step By Stefp 10 | Scrapy可以通过pip来安装: 11 | ```bash 12 | $ pip install scrapy 13 | ``` 14 | 接下来,我们去事先建好的工程目录里面,创建Scrapy的项目。这里,我们先看一下Scrapy的命令行怎么用,输入`$ scray -help`出来 15 | 16 | ![scrapy帮助文档](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs01.png) 17 | 18 | 看到,创建scrapy的工程的命令是`$ scrapy startproject `创建完的结果如下: 19 | 20 | ![创建工程成功](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs002.png) 21 | 22 | OK,这个时候,我们的目录内容变成了如下结构: 23 | 24 | ![工程结构目录](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs003.png) 25 | 26 | 下一步就是创建我们的爬虫,还是依靠Scrapy本身自带的命令来创建。输入Scrapy自带四种爬虫模板:**basic**,**crawl**,**csvfeed**和**xmlfeed**四种。我们这里选择basic。 27 | ``` 28 | $ scrapy genspider --template=basic superspider bc.ghuws.men 29 | ``` 30 | 创建成功,会出现以下提示: 31 | 32 | ![创建爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs004.png) 33 | 34 | 这时候我们的工程目录就变成了这个样子: 35 | 36 | ![有了爬虫的工程目录](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs005.png) 37 | 38 | 看到我们的工程里面多了一个spiders文件夹,里面有一个`superspider.py`文件,这个就是我们这次程序的主角。我们来看,这个可爱的小虫子刚生下来是长这个样子的: 39 | 40 | ![爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs006.png) 41 | 42 | 这里呢,就简单说一下: 43 | - **name** - 是咱们的爬虫名字,这个主要是在运行爬虫的时候会用到。 44 | - **allowed_domains** - 是在scrapy自带的OffsiteMiddleware中用到的。Scrapy默认会开启OffsiteMiddleware插件,不在此允许范围内的域名就会被过滤,而不会进行爬取。 45 | - **start_urls** - 爬虫开始爬取的url。 46 | - **parse()方法** - 这个就是处理请求结果的。我们具体的爬虫逻辑大部分就是在这里写。 47 | 48 | 好了,废话不多说,既然start_urls是用来做爬虫开始爬取的第一个url,那么我们就应该把这里面的数值换成达盖尔社区的地址,然后我们看一下在`parse()`里面返回的值是什么。运行方法,就是输入`$ scrapy crawl superspider`指令即可: 49 | 50 | ![爬虫v0.1](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs007.png) 51 | 52 | ![response对象](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs008.png) 53 | 54 | ![response对象的body](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs009.png) 55 | 56 | 我们看到,这个response是一个HtmlResponse类,它里面的text属性,里面的字符串就是网页的html文件。OK,这一步结束之后,我们下一步就想办法怎样能够解析html网页了。Scrapy是提供了html对象的解析的,它有一个selector类,可以解析html,同时,里面还支持xpath语法的查找和css的查找。但是这个个人感觉不是很好用,我推荐用BeautifulSoup4库。安装方法只需要`$ pip install beautifulsoup4`。我们这里需要用这个来解析html,所以讲BeautifulSoup4导进来,在解析,然后我们就会得到一个beasutifulsoup对象。之后,我们就要在这个对象里面寻找我们需要解析的对象。 57 | 58 | ![BeautifulSoup的对象](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs010.png) 59 | 60 | ![bs解析](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs011.png) 61 | 62 | 目前网页已经解析好了,下一步就是要在html文件中,找到每一个帖子的信息。我们回头来看html文件的源码,可以看到,每一个帖子其实都是在一个``tag里面,其实我们需要的东西,就是下图红色框框里面圈的``tag。 63 | 64 | ![html页面](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs012.png) 65 | 66 | 这里,我们发现,每一个帖子的链接入口,也就是``tag是有两个特性,一个是有id值,另一个是有`href`值。所以,我们要针对soup对象,调用`find_all()`方法来寻找有特定内容的所有标签。 67 | 68 | ![抓取所有的 a 标签](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs013.png) 69 | 70 | ![a 标签结果](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs014.png) 71 | 72 | 我们得到了一个 `a_list`结果,这是一个list对象,长度102。在这些数据中,有些结果是我们不要的,比如000到007位置的这几个数据,他们在网页中对应的是版规之类的帖子信息,和我们想要的东西不一样,所以,在拿到这个`a_list`数据,我们需要进行一下筛选。 73 | 74 | 筛选的过程必不可少,筛选的方法有很多种,我们这里就做的简单一点,只选取18年的帖子。为什么会是18年的帖子啊?少年你看,这一列href的值: 75 | 76 | ![href的区别](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs015.png) 77 | 78 | 第二个数字“1805”,应该就是“年份+月份”。如果不信,则可以跳到比如论坛100页,看到的是16年3月份的帖子,这里面随便检查一个连接的href值,是“1603”。这就印证了我们的想法是正确的。好,按照这个筛选18年的帖子的思路,我们来筛选一下`a_list`。 79 | 80 | ![筛选结果diamante](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs016.png) 81 | 82 | ![筛选完的结果](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs017.png) 83 | 84 | 看到打印的结果却是是18年的帖子。但是目前的href并不是帖子真正的url。真正的url应该长这个样子: 85 | ```html 86 | http://bc.ghuws.men/htm_data/16/1805/3126577.html 87 | ``` 88 | 所以,我们这里得进行拼接。对比上面的url,我们目前只有后半部分,前半部分其实是社区网站的root url。那么我们在settings.py文件里面添加一个`ROOT_URL`变量,并将这个变量导入到我们的spider中即可。代码就变成了这样。为了方便,咱们还可以把帖子的id,也就是`.html`前面的那个数字也摘出来,方便日后使用。 89 | 90 | ![拼凑出帖子地址](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs018.png) 91 | 92 | 目前为止,我们拿到了帖子的id和帖子的url。我们的最终目的是要下载图片,所以,我们得让爬虫去按照帖子的url去爬取他们。爬虫需要进入第二层。这里,我们需要使用`yield`函数,调用`scrapy.Request`方法,传入一个callback,在callback中做解析。 93 | 94 | ![二级爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs019.png) 95 | 96 | 现在我们已经进入了每一个帖子的内部,我们现在还没有拿到的信息有帖子的标题和帖子的图片。还是和parse()的步骤一样,这个时候,我们就该分析帖子的html文件了。 97 | 我们先找标题。看到html文件中,标题对应的是一个`

`标签。 98 | 99 | ![post的html](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs020.png) 100 | 101 | 那这就简单了,我们只需要找到所有的`

`标签,然后看标题是第几个就好。接下来是图片了。每个帖子用的图床都不一样,所以图片部分,我们先来看一下结构: 102 | 103 | ![图片的标签1](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs021.png) 104 | 105 | ![图片的标签2](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs022.png) 106 | 107 | 大概就是这两种,我们看到,图片的标签是``,关键点就在`type=image`上面,所以我们尝试着看看能不能根据这个来找到图片的地址。 108 | 109 | ![二级爬虫完成](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs023.png) 110 | 111 | 我们简单测试一下,看看运行效果: 112 | 113 | ![爬取帖子页面的图片运行效果](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs024.gif) 114 | 115 | 完全没有问题,看着好爽。这时候,我们看结果,会发现,我们抓取到的image,会有一两个的图床是不一样的。 116 | 117 | ![运行结果(部分)](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs025.png) 118 | 119 | 打开也会看到这个图片,里面的内容也和其他的图片不一样,并且这个图片不是我们想要的。所以,这里我们得做一下过滤。我这里的方法就是要从找到的`image_list`里面,把少数图床不一样的图片url给过滤掉。一般看来,都是找到的第一个图片不是我们想要的,所以我们这里只是判断一下第一个和第二个是否一样就可以。 120 | 121 | ![筛选图片](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs026.png) 122 | 123 | 这样打印出来的结果就没有问题喽。 124 | 125 | 哈哈,现在我们已经拿到了帖子的id,标题,帖子的url地址,还有帖子里面图片的url地址。离我们的目标又近了一步。我之前说过,我们的目标是要把每张图片都保存在本地,目前我们只是拿到了每张图片的url。所以,我们需要把图片都下载下载下来。 126 | 127 | 其实,当拿到图片的URL进行访问的时候,通过http返回的数据,虽然是字符串的格式,但是只要将这些字符串保存成指定的图片格式,我们在本地就可以按照图片的解析来打开。这里,我们拿到帖子的`image_list`,就可以在yield出一层请求,这就是爬虫的第三层爬取了。 128 | 129 | ![红框框的很关键](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs027.png) 130 | 131 | 同时,在第三层爬虫里面,我们还需要将访问回来的图片保存到本地目录。那么代码就长这个样子: 132 | 133 | ![三级爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs028.png) 134 | 135 | 在上面第二次爬取函数的最后,有个地方需要注意一下,就是上图中红色框框圈出来的地方。这里需要加上`dont_filter=True`。否则就会被Scrapy给过滤掉。因为图床的地址,并未在我们刚开始的`allow_domain`里面。加上这个就可以正常访问了。 136 | 137 | 这样运行一遍,我们的本地目录里面就会有保存好的下载照片了。 138 | 139 | ![图片保存节选](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs029.png) 140 | 141 | ![本地文件夹保存的下载好的图片](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs030.png) 142 | 143 | 我们还有个问题,就是我们需要将每个帖子的信息(*id,title,url,*和*image*)都保存到本地的数据库中。这个该怎么做? 144 | 145 | 别慌,这个其实很简单。 146 | 147 | 首先,我们得针对每个帖子,建立一个Scrapy的item。需要在items.py里面编写代码: 148 | 149 | ![scrapy item](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs031.png) 150 | 151 | 写好之后,我们需要在爬虫里面引入这个类,在第二层解析函数中,构建好item,最后yield出来。这里,yield出来,会交给Scrapy的`pipeline`来处理。 152 | 153 | ![生成item](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs032.png) 154 | 155 | yield出来的item会进入到pipeline中。但是这里有个前提,就是需要将pipeline在settings.py中设置。 156 | 157 | ![将pipeline添加到settings里面](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs033.png) 158 | 159 | pipeline中我们先打印帖子的id,看看数据能不能够传入到这里 160 | 161 | ![pipeline](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs034.png) 162 | 163 | 运行: 164 | 165 | ![pipeline完美运行](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs035.png) 166 | 167 | 看到数据是完全可以过来的,而且在Scrapy的log中,会打印出来每一个item里面的信息。 168 | 169 | 我们如果想把数据保存到**MongoDB**中,这个操作就应该是在pipeline中完成的。Scrapy之所以简历pipeline就是为了针对每个item,如果有特殊的处理,就应该在这里完成。那么,我们应该首先导入`pymongo`库。然后,我们需要在pipeline的`__init__()`初始化进行连接数据库的操作。整体完成之后,pipeline应该长这个样子: 170 | 171 | ![pipeline代码](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs036.png) 172 | 173 | 那么我们来测试一下数据是否能够存入到MongoDB中。首先,在terminal中,通过命令`$ sudo mongod`来启动MongoDB。 174 | 175 | ![启动MondoDB](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs037.png) 176 | 177 | 那么运行一下,看一下效果: 178 | 179 | ![MongoDB里面保存的数据](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs038.png) 180 | 181 | 可以看到,左侧,有名为`Daguerre`的数据库,里面有名为`postTable`的表,而且我们的数据成功的写入了数据库中。数据的格式如图所展示,和我们预期的结果是一样的。 182 | 183 | 目前为止,我们完成了:从一页page中,获得所有帖子的url,然后进入每个帖子,再在帖子中,爬取每个帖子的图片,下载保存到本地,同时把帖子的信息存储到数据库中。 184 | 185 | 但是,这里你有没有发现一个问题啊?我们只爬取了第一页的数据,那如何才能爬取第二页,第三页,第N页的数据呢? 186 | 187 | 别慌,只需要简单的加几行代码即可。在我们的spider文件中的`parse()`方法地下,加一个调用自己的方法即可,只不过,传入的url得是下一页的url,所以,我们这里得拼凑出下一页的url,然后再次调用`parse()`方法即可。这里为了避免无限循环,我们设定一个最大页数`MAX_PAGES`为3,即爬取前三页的数据。 188 | 189 | ![爬取下一个页面](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs039.png) 190 | 191 | OK,这样就完事儿了,这个达盖尔旗帜的爬虫就写好了。我们运行一下瞅瞅效果: 192 | 193 | ![Boom,运行效果爆炸](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs040.gif) 194 | 195 | 是不是非常的酷炫?再来看看我们的运行结果: 196 | 197 | ![爬虫运行结果1](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs041.png) 198 | 199 | ![爬虫运行结果2](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs042.png) 200 | 201 | ![爬虫运行结果3](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs043.png) 202 | 203 | **只能说,战果累累,有图有真相。** 204 | 205 | 其实,这个程序,可以加入middleware,为http请求提供一些Cookie和User-Agents来防止被网站封。同时,在settings.py文件中,我们也可以设置一下`DOWNLOAD_DELAY`来降低一下单个的访问速度,和`CONCURRENT_REQUESTS`来提升一下访问速度。 206 | 207 | ![DOWNLOAD_DELAY](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs044.png) 208 | 209 | ![CONCURRENT_REQUESTS](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs045.png) 210 | 211 | 就像之前EpicScrapy1024项目里面一样。喜欢的同学,可以去借鉴那个项目代码,然后融会贯通,自成一派,爬遍天下网站,无敌是多么的寂8 寞~~~~ 212 | 213 | 214 | 好啦,能看到这里说明少年你很用心,很辛苦,是一个可塑之才。 215 | 216 | 废话不说,看到这里是有奖励的。关注“**皮克啪的铲屎官**”,回复“**达盖尔**”,即可获得项目源码和说明文档。同时,可以在下面的菜单中,找到“**Python实战**”按钮,能够查看以往文章,篇篇都非常精彩的哦~ 217 | 218 | 扯扯皮,我觉得学习编程最大的动力就是爱好,其实干什么事情都是。爱好能够提供无线的动力,让人元气满满的往前冲刺。代码就是要方便作者,方便大家。写出来的代码要有用处,而且不要吃灰。这样的代码才是好代码。欢迎大家关注我的公众号,“皮克啪的铲屎官”,之后我会退出Python数据分析的内容,可能会结合量化交易之类的东西。 219 | 220 | 最后,来贴一张达盖尔的图片,纪念一下这位为人类做出杰出贡献的人。 221 | 222 | ![达盖尔本尊](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs046.jpg) 223 | 224 | 225 | #### 推荐阅读: 226 | [【Python实战】用Scrapy编写“1024网站种子吞噬爬虫”,送福利](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483776&idx=1&sn=50609d6dcf9c2c2fd80addb2d190fff2&chksm=eaec4e1cdd9bc70ab1ef74f7ae64bb3d619c9a09f2e2f0a676bbac33aa66260b7d566cb85154#rd) 227 | [【Python实战】用代码来访问1024网站,送福利](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483753&idx=1&sn=8df6c2a190201826f6f860659ad4af9e&chksm=eaec4ef5dd9bc7e39e8d48134795f6c0173c4614c615d0dcaaa38d937f4394aee77a978d70b1#rd) 228 | 229 | 230 | ![关注公众号“皮克啪的铲屎官”,回复“达盖尔”就可以获得惊喜](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs047.jpg) -------------------------------------------------------------------------------- /image/sbs002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs002.png -------------------------------------------------------------------------------- /image/sbs003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs003.png -------------------------------------------------------------------------------- /image/sbs004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs004.png -------------------------------------------------------------------------------- /image/sbs005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs005.png -------------------------------------------------------------------------------- /image/sbs006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs006.png -------------------------------------------------------------------------------- /image/sbs007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs007.png -------------------------------------------------------------------------------- /image/sbs008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs008.png -------------------------------------------------------------------------------- /image/sbs009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs009.png -------------------------------------------------------------------------------- /image/sbs01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs01.png -------------------------------------------------------------------------------- /image/sbs010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs010.png -------------------------------------------------------------------------------- /image/sbs011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs011.png -------------------------------------------------------------------------------- /image/sbs012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs012.png -------------------------------------------------------------------------------- /image/sbs013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs013.png -------------------------------------------------------------------------------- /image/sbs014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs014.png -------------------------------------------------------------------------------- /image/sbs015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs015.png -------------------------------------------------------------------------------- /image/sbs016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs016.png -------------------------------------------------------------------------------- /image/sbs017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs017.png -------------------------------------------------------------------------------- /image/sbs018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs018.png -------------------------------------------------------------------------------- /image/sbs019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs019.png -------------------------------------------------------------------------------- /image/sbs020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs020.png -------------------------------------------------------------------------------- /image/sbs021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs021.png -------------------------------------------------------------------------------- /image/sbs022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs022.png -------------------------------------------------------------------------------- /image/sbs023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs023.png -------------------------------------------------------------------------------- /image/sbs024.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs024.gif -------------------------------------------------------------------------------- /image/sbs025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs025.png -------------------------------------------------------------------------------- /image/sbs026.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs026.png -------------------------------------------------------------------------------- /image/sbs027.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs027.png -------------------------------------------------------------------------------- /image/sbs028.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs028.png -------------------------------------------------------------------------------- /image/sbs029.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs029.png -------------------------------------------------------------------------------- /image/sbs030.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs030.png -------------------------------------------------------------------------------- /image/sbs031.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs031.png -------------------------------------------------------------------------------- /image/sbs032.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs032.png -------------------------------------------------------------------------------- /image/sbs033.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs033.png -------------------------------------------------------------------------------- /image/sbs034.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs034.png -------------------------------------------------------------------------------- /image/sbs035.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs035.png -------------------------------------------------------------------------------- /image/sbs036.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs036.png -------------------------------------------------------------------------------- /image/sbs037.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs037.png -------------------------------------------------------------------------------- /image/sbs038.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs038.png -------------------------------------------------------------------------------- /image/sbs039.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs039.png -------------------------------------------------------------------------------- /image/sbs040.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs040.gif -------------------------------------------------------------------------------- /image/sbs0401.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs0401.gif -------------------------------------------------------------------------------- /image/sbs041.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs041.png -------------------------------------------------------------------------------- /image/sbs042.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs042.png -------------------------------------------------------------------------------- /image/sbs043.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs043.png -------------------------------------------------------------------------------- /image/sbs044.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs044.png -------------------------------------------------------------------------------- /image/sbs045.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs045.png -------------------------------------------------------------------------------- /image/sbs046.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs046.jpg -------------------------------------------------------------------------------- /image/sbs047.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs047.jpg -------------------------------------------------------------------------------- /image/sbs099.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs099.gif -------------------------------------------------------------------------------- /image/sbs0991.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs0991.gif --------------------------------------------------------------------------------