├── .DS_Store
├── .gitignore
├── Daguerre
    ├── .DS_Store
    ├── Daguerre
    │   ├── __init__.py
    │   ├── items.py
    │   ├── middlewares.py
    │   ├── pipelines.py
    │   ├── settings.py
    │   └── spiders
    │   │   ├── __init__.py
    │   │   └── superspider.py
    ├── Run.py
    ├── out
    │   └── .DS_Store
    └── scrapy.cfg
├── LICENSE
├── README.md
└── image
    ├── sbs002.png
    ├── sbs003.png
    ├── sbs004.png
    ├── sbs005.png
    ├── sbs006.png
    ├── sbs007.png
    ├── sbs008.png
    ├── sbs009.png
    ├── sbs01.png
    ├── sbs010.png
    ├── sbs011.png
    ├── sbs012.png
    ├── sbs013.png
    ├── sbs014.png
    ├── sbs015.png
    ├── sbs016.png
    ├── sbs017.png
    ├── sbs018.png
    ├── sbs019.png
    ├── sbs020.png
    ├── sbs021.png
    ├── sbs022.png
    ├── sbs023.png
    ├── sbs024.gif
    ├── sbs025.png
    ├── sbs026.png
    ├── sbs027.png
    ├── sbs028.png
    ├── sbs029.png
    ├── sbs030.png
    ├── sbs031.png
    ├── sbs032.png
    ├── sbs033.png
    ├── sbs034.png
    ├── sbs035.png
    ├── sbs036.png
    ├── sbs037.png
    ├── sbs038.png
    ├── sbs039.png
    ├── sbs040.gif
    ├── sbs0401.gif
    ├── sbs041.png
    ├── sbs042.png
    ├── sbs043.png
    ├── sbs044.png
    ├── sbs045.png
    ├── sbs046.jpg
    ├── sbs047.jpg
    ├── sbs099.gif
    └── sbs0991.gif


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .idea/


--------------------------------------------------------------------------------
/Daguerre/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/Daguerre/.DS_Store


--------------------------------------------------------------------------------
/Daguerre/Daguerre/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/Daguerre/Daguerre/__init__.py


--------------------------------------------------------------------------------
/Daguerre/Daguerre/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | from scrapy import Item, Field
 9 | 
10 | 
11 | class DaguerrePostItem(Item):
12 |     post_id = Field()
13 |     post_title = Field()
14 |     post_url = Field()
15 |     post_image_list = Field()
16 | 


--------------------------------------------------------------------------------
/Daguerre/Daguerre/middlewares.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Define here the models for your spider middleware
  4 | #
  5 | # See documentation in:
  6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  7 | 
  8 | from scrapy import signals
  9 | 
 10 | 
 11 | class DaguerreSpiderMiddleware(object):
 12 |     # Not all methods need to be defined. If a method is not defined,
 13 |     # scrapy acts as if the spider middleware does not modify the
 14 |     # passed objects.
 15 | 
 16 |     @classmethod
 17 |     def from_crawler(cls, crawler):
 18 |         # This method is used by Scrapy to create your spiders.
 19 |         s = cls()
 20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 21 |         return s
 22 | 
 23 |     def process_spider_input(self, response, spider):
 24 |         # Called for each response that goes through the spider
 25 |         # middleware and into the spider.
 26 | 
 27 |         # Should return None or raise an exception.
 28 |         return None
 29 | 
 30 |     def process_spider_output(self, response, result, spider):
 31 |         # Called with the results returned from the Spider, after
 32 |         # it has processed the response.
 33 | 
 34 |         # Must return an iterable of Request, dict or Item objects.
 35 |         for i in result:
 36 |             yield i
 37 | 
 38 |     def process_spider_exception(self, response, exception, spider):
 39 |         # Called when a spider or process_spider_input() method
 40 |         # (from other spider middleware) raises an exception.
 41 | 
 42 |         # Should return either None or an iterable of Response, dict
 43 |         # or Item objects.
 44 |         pass
 45 | 
 46 |     def process_start_requests(self, start_requests, spider):
 47 |         # Called with the start requests of the spider, and works
 48 |         # similarly to the process_spider_output() method, except
 49 |         # that it doesn’t have a response associated.
 50 | 
 51 |         # Must return only requests (not items).
 52 |         for r in start_requests:
 53 |             yield r
 54 | 
 55 |     def spider_opened(self, spider):
 56 |         spider.logger.info('Spider opened: %s' % spider.name)
 57 | 
 58 | 
 59 | class DaguerreDownloaderMiddleware(object):
 60 |     # Not all methods need to be defined. If a method is not defined,
 61 |     # scrapy acts as if the downloader middleware does not modify the
 62 |     # passed objects.
 63 | 
 64 |     @classmethod
 65 |     def from_crawler(cls, crawler):
 66 |         # This method is used by Scrapy to create your spiders.
 67 |         s = cls()
 68 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 69 |         return s
 70 | 
 71 |     def process_request(self, request, spider):
 72 |         # Called for each request that goes through the downloader
 73 |         # middleware.
 74 | 
 75 |         # Must either:
 76 |         # - return None: continue processing this request
 77 |         # - or return a Response object
 78 |         # - or return a Request object
 79 |         # - or raise IgnoreRequest: process_exception() methods of
 80 |         #   installed downloader middleware will be called
 81 |         return None
 82 | 
 83 |     def process_response(self, request, response, spider):
 84 |         # Called with the response returned from the downloader.
 85 | 
 86 |         # Must either;
 87 |         # - return a Response object
 88 |         # - return a Request object
 89 |         # - or raise IgnoreRequest
 90 |         return response
 91 | 
 92 |     def process_exception(self, request, exception, spider):
 93 |         # Called when a download handler or a process_request()
 94 |         # (from other downloader middleware) raises an exception.
 95 | 
 96 |         # Must either:
 97 |         # - return None: continue processing this exception
 98 |         # - return a Response object: stops process_exception() chain
 99 |         # - return a Request object: stops process_exception() chain
100 |         pass
101 | 
102 |     def spider_opened(self, spider):
103 |         spider.logger.info('Spider opened: %s' % spider.name)
104 | 


--------------------------------------------------------------------------------
/Daguerre/Daguerre/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | import pymongo
 9 | import logging
10 | from Daguerre.items import DaguerrePostItem
11 | 
12 | 
13 | class DaguerrePipeline(object):
14 |     def __init__(self):
15 |         clinet = pymongo.MongoClient("localhost", 27017)
16 |         db = clinet["Daguerre"]
17 |         self.table = db["postTable"]
18 | 
19 |     def process_item(self, item, spider):
20 |         if isinstance(item, DaguerrePostItem):
21 |             try:
22 |                 self.table.insert(dict(item))
23 |             except Exception as e:
24 |                 logging.error("PIPLINE EXCEPTION: " + str(e))
25 |         return item
26 | 


--------------------------------------------------------------------------------
/Daguerre/Daguerre/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for Daguerre project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     https://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | #     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'Daguerre'
13 | 
14 | SPIDER_MODULES = ['Daguerre.spiders']
15 | NEWSPIDER_MODULE = 'Daguerre.spiders'
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'Daguerre (+http://www.yourdomain.com)'
20 | 
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = True
23 | 
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | CONCURRENT_REQUESTS = 32
26 | 
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | DOWNLOAD_DELAY = 1
31 | # The download delay setting will honor only one of:
32 | # CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | #CONCURRENT_REQUESTS_PER_IP = 16
34 | 
35 | # Disable cookies (enabled by default)
36 | #COOKIES_ENABLED = False
37 | 
38 | # Disable Telnet Console (enabled by default)
39 | #TELNETCONSOLE_ENABLED = False
40 | 
41 | # Override the default request headers:
42 | #DEFAULT_REQUEST_HEADERS = {
43 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 | #   'Accept-Language': 'en',
45 | #}
46 | 
47 | # Enable or disable spider middlewares
48 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
49 | #SPIDER_MIDDLEWARES = {
50 | #    'Daguerre.middlewares.DaguerreSpiderMiddleware': 543,
51 | #}
52 | 
53 | # Enable or disable downloader middlewares
54 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
55 | #DOWNLOADER_MIDDLEWARES = {
56 | #    'Daguerre.middlewares.DaguerreDownloaderMiddleware': 543,
57 | #}
58 | 
59 | # Enable or disable extensions
60 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
61 | #EXTENSIONS = {
62 | #    'scrapy.extensions.telnet.TelnetConsole': None,
63 | #}
64 | 
65 | # Configure item pipelines
66 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
67 | ITEM_PIPELINES = {
68 |    'Daguerre.pipelines.DaguerrePipeline': 300,
69 | }
70 | 
71 | # Enable and configure the AutoThrottle extension (disabled by default)
72 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
73 | #AUTOTHROTTLE_ENABLED = True
74 | # The initial download delay
75 | #AUTOTHROTTLE_START_DELAY = 5
76 | # The maximum download delay to be set in case of high latencies
77 | #AUTOTHROTTLE_MAX_DELAY = 60
78 | # The average number of requests Scrapy should be sending in parallel to
79 | # each remote server
80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
81 | # Enable showing throttling stats for every response received:
82 | #AUTOTHROTTLE_DEBUG = False
83 | 
84 | # Enable and configure HTTP caching (disabled by default)
85 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
86 | #HTTPCACHE_ENABLED = True
87 | #HTTPCACHE_EXPIRATION_SECS = 0
88 | #HTTPCACHE_DIR = 'httpcache'
89 | #HTTPCACHE_IGNORE_HTTP_CODES = []
90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
91 | 
92 | 
93 | ROOT_URL = "http://bc.ghuws.men/"
94 | 
95 | LOCAL_FILE_ROOT = "/Users/XXX/XXX/DaguerreSpider/Daguerre/out/"
96 | 
97 | MAX_PAGES = 3


--------------------------------------------------------------------------------
/Daguerre/Daguerre/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Daguerre/Daguerre/spiders/superspider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | import os
 4 | from scrapy import Request
 5 | from bs4 import BeautifulSoup
 6 | from Daguerre.settings import ROOT_URL, LOCAL_FILE_ROOT, MAX_PAGES
 7 | from Daguerre.items import DaguerrePostItem
 8 | 
 9 | 
10 | class SuperspiderSpider(scrapy.Spider):
11 |     root_url = ROOT_URL
12 |     local_file_root = LOCAL_FILE_ROOT
13 |     max_pages = MAX_PAGES
14 |     name = 'superspider'
15 |     allowed_domains = ['bc.ghuws.men']
16 |     start_urls = ['http://bc.ghuws.men/thread0806.php?fid=16&search=&page=1']
17 | 
18 |     def parse(self, response):
19 |         content = response.body
20 |         soup = BeautifulSoup(content, "html.parser")
21 |         a_list = soup.find_all('a', attrs={'href': True, 'id': True})
22 |         for item in a_list:
23 |             temp_result = item['href'].split('/')
24 |             if len(temp_result) == 4:
25 |                 year_month = temp_result[2]
26 |                 post_id = temp_result[3].split('.')[0]
27 |                 if int(year_month) > 1800 and len(post_id) > 6:
28 |                     post_url = self.root_url + item['href']
29 |                     yield Request(url=post_url, callback=self.parse_post_page, meta={'post_id': post_id})
30 |         cur_page = int(response.url.split('=')[-1])
31 |         next_page = cur_page + 1
32 |         if next_page <= MAX_PAGES:
33 |             next_page_url = response.url[:-len(str(cur_page))] + str(next_page)
34 |             yield Request(url=next_page_url, callback=self.parse)
35 | 
36 |     def parse_post_page(self, response):
37 |         content = response.body
38 |         soup = BeautifulSoup(content, "html.parser")
39 |         temp_title_list = soup.find_all('h4')
40 |         post_id = response.meta['post_id']
41 |         post_url = response.url
42 |         post_title = ""
43 |         if len(temp_title_list) != 0:
44 |             post_title = temp_title_list[0].text
45 |         # print(post_title + " URL: " + response.url)
46 |         temp_img_list = soup.find_all('input', attrs={'type': 'image'})
47 |         img_list = []
48 |         post_image_list = []
49 |         for i in range(len(temp_img_list)):
50 |             if i == 1:
51 |                 src1 = temp_img_list[0]['src'].split('/')[2]
52 |                 src2 = temp_img_list[1]['src'].split('/')[2]
53 |                 if src1 != src2:
54 |                     img_list.remove(temp_img_list[0])
55 |             img_list.append(temp_img_list[i])
56 |         for item in img_list:
57 |             image_url = item['src']
58 |             post_image_list.append(item['src'])
59 |             yield Request(url=image_url, callback=self.down_load_image,
60 |                           meta={'post_id': post_id, 'post_title': post_title, 'index': img_list.index(item)},
61 |                           dont_filter=True)
62 |         item = DaguerrePostItem()
63 |         item['post_id'] = post_id
64 |         item['post_title'] = post_title
65 |         item['post_url'] = post_url
66 |         item['post_image_list'] = post_image_list
67 |         yield item
68 | 
69 |     def down_load_image(self, response):
70 |         content = response.body
71 |         index = response.meta['index']
72 |         post_id = response.meta['post_id']
73 |         post_title = response.meta['post_title']
74 |         pic_format = response.url.split('.')[-1]
75 |         # 创建本地目录的操作
76 |         if response.status == 200:
77 |             file_dir = self.local_file_root + post_title + "/"
78 |             filename = file_dir + str(index) + "." + pic_format
79 |             exist = os.path.exists(file_dir)
80 |             if not exist:
81 |                 os.makedirs(file_dir)
82 |             # 写入文件的操作
83 |             with open(filename, 'xb') as file:
84 |                 file.write(content)
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/Daguerre/Run.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | __author__ = 'lianggao'
 3 | __date__ = '2018/5/11 上午11:27'
 4 | 
 5 | from scrapy import cmdline
 6 | 
 7 | 
 8 | def main():
 9 |     cmdline.execute("scrapy crawl superspider".split())
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     main()


--------------------------------------------------------------------------------
/Daguerre/out/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/Daguerre/out/.DS_Store


--------------------------------------------------------------------------------
/Daguerre/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = Daguerre.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = Daguerre
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 SwyftG
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | > 发出前两篇Python实战的文章之后，有同学和我反映：**你的想法很牛逼，可是我就是看不懂你写的是什么，我Python不熟悉，看起来有点吃力**。我细细一琢磨，这点是个问题。对于熟悉Python的同学，能够看懂我思路，但是对于那些没有Python基础，或者对Python不熟悉的同学，这样直接扔过来，可能会让他们失望而归。所以，这回我弄了一期手把手的实战教程，同时，在文章中遇到的知识点，还会有提供链接。完全对新手有好。
  2 | 
  3 | 在前两篇Python实战[「用代码来访问1024网站」](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483753&idx=1&sn=8df6c2a190201826f6f860659ad4af9e&chksm=eaec4ef5dd9bc7e39e8d48134795f6c0173c4614c615d0dcaaa38d937f4394aee77a978d70b1#rd)和[「用Scrapy编写“1024网站种子吞噬爬虫”」](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483776&idx=1&sn=50609d6dcf9c2c2fd80addb2d190fff2&chksm=eaec4e1cdd9bc70ab1ef74f7ae64bb3d619c9a09f2e2f0a676bbac33aa66260b7d566cb85154#rd)收到了大家的一致好评，可能文章写得比较匆忙，有些术语可能对于Python的初级玩家不是很好理解。所以，我特别准备了一下，用超级详细的解说，细化到每一步，提供查询链接等方法，为Python初级玩家，Python小白和对Scrapy框架不熟悉的同学，的制作了这篇手把手Python实战教程：用Scrapy爬取下载达盖尔社区的资源。
  4 | 
  5 | 好了，废话不多说，学习代码就是要学以致用的。不能写了一遍代码就让代码吃灰。下面就跟我一起来搞吧。
  6 | 
  7 | 小草网站是个好网站，我们这次实战的结果，是要把“达盖尔旗帜”里面的帖子爬取下来，将帖子的图片保存到本地，同时将帖子的一些相关信息，写入到本地的MongoDB中。这么乍一听，感觉我们做的事情好像挺多的，别慌，我带你慢慢的一步一步来搞起，问题不是很大。
  8 | 
  9 | ### 手把手 Step By Stefp
 10 | Scrapy可以通过pip来安装:
 11 | ```bash
 12 | $ pip install scrapy
 13 | ```
 14 | 接下来，我们去事先建好的工程目录里面，创建Scrapy的项目。这里，我们先看一下Scrapy的命令行怎么用，输入`$ scray -help`出来
 15 | 
 16 | ![scrapy帮助文档](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs01.png)
 17 | 
 18 | 看到，创建scrapy的工程的命令是`$ scrapy startproject <name>`创建完的结果如下：
 19 | 
 20 | ![创建工程成功](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs002.png)
 21 | 
 22 | OK，这个时候，我们的目录内容变成了如下结构：
 23 | 
 24 | ![工程结构目录](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs003.png)
 25 | 
 26 | 下一步就是创建我们的爬虫，还是依靠Scrapy本身自带的命令来创建。输入Scrapy自带四种爬虫模板：**basic**，**crawl**，**csvfeed**和**xmlfeed**四种。我们这里选择basic。
 27 | ```
 28 | $ scrapy genspider --template=basic superspider bc.ghuws.men
 29 | ```
 30 | 创建成功，会出现以下提示：
 31 | 
 32 | ![创建爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs004.png)
 33 | 
 34 | 这时候我们的工程目录就变成了这个样子：
 35 | 
 36 | ![有了爬虫的工程目录](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs005.png)
 37 | 
 38 | 看到我们的工程里面多了一个spiders文件夹，里面有一个`superspider.py`文件，这个就是我们这次程序的主角。我们来看，这个可爱的小虫子刚生下来是长这个样子的：
 39 | 
 40 | ![爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs006.png)
 41 | 
 42 | 这里呢，就简单说一下：
 43 | - **name** - 是咱们的爬虫名字，这个主要是在运行爬虫的时候会用到。
 44 | - **allowed_domains** - 是在scrapy自带的OffsiteMiddleware中用到的。Scrapy默认会开启OffsiteMiddleware插件，不在此允许范围内的域名就会被过滤，而不会进行爬取。
 45 | - **start_urls** - 爬虫开始爬取的url。
 46 | - **parse()方法** - 这个就是处理请求结果的。我们具体的爬虫逻辑大部分就是在这里写。
 47 | 
 48 | 好了，废话不多说，既然start_urls是用来做爬虫开始爬取的第一个url，那么我们就应该把这里面的数值换成达盖尔社区的地址，然后我们看一下在`parse()`里面返回的值是什么。运行方法，就是输入`$ scrapy crawl superspider`指令即可：
 49 | 
 50 | ![爬虫v0.1](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs007.png)
 51 | 
 52 | ![response对象](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs008.png)
 53 | 
 54 | ![response对象的body](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs009.png)
 55 | 
 56 | 我们看到，这个response是一个HtmlResponse类，它里面的text属性，里面的字符串就是网页的html文件。OK，这一步结束之后，我们下一步就想办法怎样能够解析html网页了。Scrapy是提供了html对象的解析的，它有一个selector类，可以解析html，同时，里面还支持xpath语法的查找和css的查找。但是这个个人感觉不是很好用，我推荐用BeautifulSoup4库。安装方法只需要`$ pip install beautifulsoup4`。我们这里需要用这个来解析html，所以讲BeautifulSoup4导进来，在解析，然后我们就会得到一个beasutifulsoup对象。之后，我们就要在这个对象里面寻找我们需要解析的对象。
 57 | 
 58 | ![BeautifulSoup的对象](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs010.png)
 59 | 
 60 | ![bs解析](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs011.png)
 61 | 
 62 | 目前网页已经解析好了，下一步就是要在html文件中，找到每一个帖子的信息。我们回头来看html文件的源码，可以看到，每一个帖子其实都是在一个`<tr>`tag里面，其实我们需要的东西，就是下图红色框框里面圈的`<a>`tag。
 63 | 
 64 | ![html页面](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs012.png)
 65 | 
 66 | 这里，我们发现，每一个帖子的链接入口，也就是`<a>`tag是有两个特性，一个是有id值，另一个是有`href`值。所以，我们要针对soup对象，调用`find_all()`方法来寻找有特定内容的所有标签。
 67 | 
 68 | ![抓取所有的 a 标签](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs013.png)
 69 | 
 70 | ![a 标签结果](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs014.png)
 71 | 
 72 | 我们得到了一个 `a_list`结果，这是一个list对象，长度102。在这些数据中，有些结果是我们不要的，比如000到007位置的这几个数据，他们在网页中对应的是版规之类的帖子信息，和我们想要的东西不一样，所以，在拿到这个`a_list`数据，我们需要进行一下筛选。
 73 | 
 74 | 筛选的过程必不可少，筛选的方法有很多种，我们这里就做的简单一点，只选取18年的帖子。为什么会是18年的帖子啊？少年你看，这一列href的值：
 75 | 
 76 | ![href的区别](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs015.png)
 77 | 
 78 | 第二个数字“1805”，应该就是“年份+月份”。如果不信，则可以跳到比如论坛100页，看到的是16年3月份的帖子，这里面随便检查一个连接的href值，是“1603”。这就印证了我们的想法是正确的。好，按照这个筛选18年的帖子的思路，我们来筛选一下`a_list`。
 79 | 
 80 | ![筛选结果diamante](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs016.png)
 81 | 
 82 | ![筛选完的结果](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs017.png)
 83 | 
 84 | 看到打印的结果却是是18年的帖子。但是目前的href并不是帖子真正的url。真正的url应该长这个样子：
 85 | ```html
 86 | http://bc.ghuws.men/htm_data/16/1805/3126577.html
 87 | ```
 88 | 所以，我们这里得进行拼接。对比上面的url，我们目前只有后半部分，前半部分其实是社区网站的root url。那么我们在settings.py文件里面添加一个`ROOT_URL`变量，并将这个变量导入到我们的spider中即可。代码就变成了这样。为了方便，咱们还可以把帖子的id，也就是`.html`前面的那个数字也摘出来，方便日后使用。
 89 | 
 90 | ![拼凑出帖子地址](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs018.png)
 91 | 
 92 | 目前为止，我们拿到了帖子的id和帖子的url。我们的最终目的是要下载图片，所以，我们得让爬虫去按照帖子的url去爬取他们。爬虫需要进入第二层。这里，我们需要使用`yield`函数，调用`scrapy.Request`方法，传入一个callback，在callback中做解析。
 93 | 
 94 | ![二级爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs019.png)
 95 | 
 96 | 现在我们已经进入了每一个帖子的内部，我们现在还没有拿到的信息有帖子的标题和帖子的图片。还是和parse()的步骤一样，这个时候，我们就该分析帖子的html文件了。
 97 | 我们先找标题。看到html文件中，标题对应的是一个`<h4>`标签。
 98 | 
 99 | ![post的html](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs020.png)
100 | 
101 | 那这就简单了，我们只需要找到所有的`<h4>`标签，然后看标题是第几个就好。接下来是图片了。每个帖子用的图床都不一样，所以图片部分，我们先来看一下结构：
102 | 
103 | ![图片的标签1](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs021.png)
104 | 
105 | ![图片的标签2](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs022.png)
106 | 
107 | 大概就是这两种，我们看到，图片的标签是`<input>`，关键点就在`type=image`上面，所以我们尝试着看看能不能根据这个来找到图片的地址。
108 | 
109 | ![二级爬虫完成](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs023.png)
110 | 
111 | 我们简单测试一下，看看运行效果：
112 | 
113 | ![爬取帖子页面的图片运行效果](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs024.gif)
114 | 
115 | 完全没有问题，看着好爽。这时候，我们看结果，会发现，我们抓取到的image，会有一两个的图床是不一样的。
116 | 
117 | ![运行结果（部分）](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs025.png)
118 | 
119 | 打开也会看到这个图片，里面的内容也和其他的图片不一样，并且这个图片不是我们想要的。所以，这里我们得做一下过滤。我这里的方法就是要从找到的`image_list`里面，把少数图床不一样的图片url给过滤掉。一般看来，都是找到的第一个图片不是我们想要的，所以我们这里只是判断一下第一个和第二个是否一样就可以。
120 | 
121 | ![筛选图片](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs026.png)
122 | 
123 | 这样打印出来的结果就没有问题喽。
124 | 
125 | 哈哈，现在我们已经拿到了帖子的id，标题，帖子的url地址，还有帖子里面图片的url地址。离我们的目标又近了一步。我之前说过，我们的目标是要把每张图片都保存在本地，目前我们只是拿到了每张图片的url。所以，我们需要把图片都下载下载下来。
126 | 
127 | 其实，当拿到图片的URL进行访问的时候，通过http返回的数据，虽然是字符串的格式，但是只要将这些字符串保存成指定的图片格式，我们在本地就可以按照图片的解析来打开。这里，我们拿到帖子的`image_list`，就可以在yield出一层请求，这就是爬虫的第三层爬取了。
128 | 
129 | ![红框框的很关键](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs027.png)
130 | 
131 | 同时，在第三层爬虫里面，我们还需要将访问回来的图片保存到本地目录。那么代码就长这个样子：
132 | 
133 | ![三级爬虫](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs028.png)
134 | 
135 | 在上面第二次爬取函数的最后，有个地方需要注意一下，就是上图中红色框框圈出来的地方。这里需要加上`dont_filter=True`。否则就会被Scrapy给过滤掉。因为图床的地址，并未在我们刚开始的`allow_domain`里面。加上这个就可以正常访问了。
136 | 
137 | 这样运行一遍，我们的本地目录里面就会有保存好的下载照片了。
138 | 
139 | ![图片保存节选](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs029.png)
140 | 
141 | ![本地文件夹保存的下载好的图片](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs030.png)
142 | 
143 | 我们还有个问题，就是我们需要将每个帖子的信息（*id，title，url，*和*image*）都保存到本地的数据库中。这个该怎么做？
144 | 
145 | 别慌，这个其实很简单。
146 | 
147 | 首先，我们得针对每个帖子，建立一个Scrapy的item。需要在items.py里面编写代码：
148 | 
149 | ![scrapy item](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs031.png)
150 | 
151 | 写好之后，我们需要在爬虫里面引入这个类，在第二层解析函数中，构建好item，最后yield出来。这里，yield出来，会交给Scrapy的`pipeline`来处理。
152 | 
153 | ![生成item](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs032.png)
154 | 
155 | yield出来的item会进入到pipeline中。但是这里有个前提，就是需要将pipeline在settings.py中设置。
156 | 
157 | ![将pipeline添加到settings里面](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs033.png)
158 | 
159 | pipeline中我们先打印帖子的id，看看数据能不能够传入到这里
160 | 
161 | ![pipeline](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs034.png)
162 | 
163 | 运行：
164 | 
165 | ![pipeline完美运行](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs035.png)
166 | 
167 | 看到数据是完全可以过来的，而且在Scrapy的log中，会打印出来每一个item里面的信息。
168 | 
169 | 我们如果想把数据保存到**MongoDB**中，这个操作就应该是在pipeline中完成的。Scrapy之所以简历pipeline就是为了针对每个item，如果有特殊的处理，就应该在这里完成。那么，我们应该首先导入`pymongo`库。然后，我们需要在pipeline的`__init__()`初始化进行连接数据库的操作。整体完成之后，pipeline应该长这个样子：
170 | 
171 | ![pipeline代码](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs036.png)
172 | 
173 | 那么我们来测试一下数据是否能够存入到MongoDB中。首先，在terminal中，通过命令`$ sudo mongod`来启动MongoDB。
174 | 
175 | ![启动MondoDB](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs037.png)
176 | 
177 | 那么运行一下，看一下效果：
178 | 
179 | ![MongoDB里面保存的数据](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs038.png)
180 | 
181 | 可以看到，左侧，有名为`Daguerre`的数据库，里面有名为`postTable`的表，而且我们的数据成功的写入了数据库中。数据的格式如图所展示，和我们预期的结果是一样的。
182 | 
183 | 目前为止，我们完成了：从一页page中，获得所有帖子的url，然后进入每个帖子，再在帖子中，爬取每个帖子的图片，下载保存到本地，同时把帖子的信息存储到数据库中。
184 | 
185 | 但是，这里你有没有发现一个问题啊？我们只爬取了第一页的数据，那如何才能爬取第二页，第三页，第N页的数据呢？
186 | 
187 | 别慌，只需要简单的加几行代码即可。在我们的spider文件中的`parse()`方法地下，加一个调用自己的方法即可，只不过，传入的url得是下一页的url，所以，我们这里得拼凑出下一页的url，然后再次调用`parse()`方法即可。这里为了避免无限循环，我们设定一个最大页数`MAX_PAGES`为3，即爬取前三页的数据。
188 | 
189 | ![爬取下一个页面](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs039.png)
190 | 
191 | OK，这样就完事儿了，这个达盖尔旗帜的爬虫就写好了。我们运行一下瞅瞅效果：
192 | 
193 | ![Boom，运行效果爆炸](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs040.gif)
194 | 
195 | 是不是非常的酷炫？再来看看我们的运行结果：
196 | 
197 | ![爬虫运行结果1](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs041.png)
198 | 
199 | ![爬虫运行结果2](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs042.png)
200 | 
201 | ![爬虫运行结果3](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs043.png)
202 | 
203 | **只能说，战果累累，有图有真相。**
204 | 
205 | 其实，这个程序，可以加入middleware，为http请求提供一些Cookie和User-Agents来防止被网站封。同时，在settings.py文件中，我们也可以设置一下`DOWNLOAD_DELAY`来降低一下单个的访问速度，和`CONCURRENT_REQUESTS`来提升一下访问速度。
206 | 
207 | ![DOWNLOAD_DELAY](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs044.png)
208 | 
209 | ![CONCURRENT_REQUESTS](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs045.png)
210 | 
211 | 就像之前EpicScrapy1024项目里面一样。喜欢的同学，可以去借鉴那个项目代码，然后融会贯通，自成一派，爬遍天下网站，无敌是多么的寂8 寞~~~~
212 | 
213 | 
214 | 好啦，能看到这里说明少年你很用心，很辛苦，是一个可塑之才。
215 | 
216 | 废话不说，看到这里是有奖励的。关注“**皮克啪的铲屎官**”，回复“**达盖尔**”，即可获得项目源码和说明文档。同时，可以在下面的菜单中，找到“**Python实战**”按钮，能够查看以往文章，篇篇都非常精彩的哦~
217 | 
218 | 扯扯皮，我觉得学习编程最大的动力就是爱好，其实干什么事情都是。爱好能够提供无线的动力，让人元气满满的往前冲刺。代码就是要方便作者，方便大家。写出来的代码要有用处，而且不要吃灰。这样的代码才是好代码。欢迎大家关注我的公众号，“皮克啪的铲屎官”，之后我会退出Python数据分析的内容，可能会结合量化交易之类的东西。
219 | 
220 | 最后，来贴一张达盖尔的图片，纪念一下这位为人类做出杰出贡献的人。
221 | 
222 | ![达盖尔本尊](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs046.jpg)
223 | 
224 | 
225 | #### 推荐阅读：
226 | [【Python实战】用Scrapy编写“1024网站种子吞噬爬虫”，送福利](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483776&idx=1&sn=50609d6dcf9c2c2fd80addb2d190fff2&chksm=eaec4e1cdd9bc70ab1ef74f7ae64bb3d619c9a09f2e2f0a676bbac33aa66260b7d566cb85154#rd)  
227 | [【Python实战】用代码来访问1024网站，送福利](https://mp.weixin.qq.com/s?__biz=MzI2ODYwNjE5NQ==&mid=2247483753&idx=1&sn=8df6c2a190201826f6f860659ad4af9e&chksm=eaec4ef5dd9bc7e39e8d48134795f6c0173c4614c615d0dcaaa38d937f4394aee77a978d70b1#rd)
228 | 
229 | 
230 | ![关注公众号“皮克啪的铲屎官”，回复“达盖尔”就可以获得惊喜](https://github.com/SwyftG/DaguerreSpider/blob/master/image/sbs047.jpg)


--------------------------------------------------------------------------------
/image/sbs002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs002.png


--------------------------------------------------------------------------------
/image/sbs003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs003.png


--------------------------------------------------------------------------------
/image/sbs004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs004.png


--------------------------------------------------------------------------------
/image/sbs005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs005.png


--------------------------------------------------------------------------------
/image/sbs006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs006.png


--------------------------------------------------------------------------------
/image/sbs007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs007.png


--------------------------------------------------------------------------------
/image/sbs008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs008.png


--------------------------------------------------------------------------------
/image/sbs009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs009.png


--------------------------------------------------------------------------------
/image/sbs01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs01.png


--------------------------------------------------------------------------------
/image/sbs010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs010.png


--------------------------------------------------------------------------------
/image/sbs011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs011.png


--------------------------------------------------------------------------------
/image/sbs012.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs012.png


--------------------------------------------------------------------------------
/image/sbs013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs013.png


--------------------------------------------------------------------------------
/image/sbs014.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs014.png


--------------------------------------------------------------------------------
/image/sbs015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs015.png


--------------------------------------------------------------------------------
/image/sbs016.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs016.png


--------------------------------------------------------------------------------
/image/sbs017.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs017.png


--------------------------------------------------------------------------------
/image/sbs018.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs018.png


--------------------------------------------------------------------------------
/image/sbs019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs019.png


--------------------------------------------------------------------------------
/image/sbs020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs020.png


--------------------------------------------------------------------------------
/image/sbs021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs021.png


--------------------------------------------------------------------------------
/image/sbs022.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs022.png


--------------------------------------------------------------------------------
/image/sbs023.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs023.png


--------------------------------------------------------------------------------
/image/sbs024.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs024.gif


--------------------------------------------------------------------------------
/image/sbs025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs025.png


--------------------------------------------------------------------------------
/image/sbs026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs026.png


--------------------------------------------------------------------------------
/image/sbs027.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs027.png


--------------------------------------------------------------------------------
/image/sbs028.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs028.png


--------------------------------------------------------------------------------
/image/sbs029.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs029.png


--------------------------------------------------------------------------------
/image/sbs030.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs030.png


--------------------------------------------------------------------------------
/image/sbs031.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs031.png


--------------------------------------------------------------------------------
/image/sbs032.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs032.png


--------------------------------------------------------------------------------
/image/sbs033.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs033.png


--------------------------------------------------------------------------------
/image/sbs034.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs034.png


--------------------------------------------------------------------------------
/image/sbs035.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs035.png


--------------------------------------------------------------------------------
/image/sbs036.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs036.png


--------------------------------------------------------------------------------
/image/sbs037.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs037.png


--------------------------------------------------------------------------------
/image/sbs038.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs038.png


--------------------------------------------------------------------------------
/image/sbs039.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs039.png


--------------------------------------------------------------------------------
/image/sbs040.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs040.gif


--------------------------------------------------------------------------------
/image/sbs0401.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs0401.gif


--------------------------------------------------------------------------------
/image/sbs041.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs041.png


--------------------------------------------------------------------------------
/image/sbs042.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs042.png


--------------------------------------------------------------------------------
/image/sbs043.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs043.png


--------------------------------------------------------------------------------
/image/sbs044.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs044.png


--------------------------------------------------------------------------------
/image/sbs045.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs045.png


--------------------------------------------------------------------------------
/image/sbs046.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs046.jpg


--------------------------------------------------------------------------------
/image/sbs047.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs047.jpg


--------------------------------------------------------------------------------
/image/sbs099.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs099.gif


--------------------------------------------------------------------------------
/image/sbs0991.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SwyftG/DaguerreSpider/d285b71bfa2149c46d869f3706985cfbaf1e24eb/image/sbs0991.gif


--------------------------------------------------------------------------------