├── .gitignore
├── Amazon
    ├── Amazon
    │   ├── __init__.py
    │   ├── images
    │   │   └── default.jpg
    │   ├── items.py
    │   ├── middlewares.py
    │   ├── pipelines.py
    │   ├── settings.py
    │   └── spiders
    │   │   ├── __init__.py
    │   │   └── amazon.py
    └── scrapy.cfg
├── LICENSE
├── README.md
├── requirements.txt
└── scpture.jpg


/.gitignore:
--------------------------------------------------------------------------------
 1 | /.vagrant
 2 | /scrapy.iml
 3 | *.pyc
 4 | _trial_temp*
 5 | dropin.cache
 6 | docs/build
 7 | *egg-info
 8 | .tox
 9 | venv
10 | build
11 | dist
12 | .idea
13 | htmlcov/
14 | .coverage
15 | .coverage.*
16 | .cache/
17 | 
18 | # Windows
19 | Thumbs.db
20 | 


--------------------------------------------------------------------------------
/Amazon/Amazon/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OFZFZS/scrapy-amazon/3befb32289ccd548fa74cead2e359be848b01fbc/Amazon/Amazon/__init__.py


--------------------------------------------------------------------------------
/Amazon/Amazon/images/default.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OFZFZS/scrapy-amazon/3befb32289ccd548fa74cead2e359be848b01fbc/Amazon/Amazon/images/default.jpg


--------------------------------------------------------------------------------
/Amazon/Amazon/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import scrapy
 4 | 
 5 | 
 6 | class AmazonItem(scrapy.Item):
 7 |     # define the fields for your item here like:
 8 |     title = scrapy.Field()
 9 |     image_url = scrapy.Field()
10 |     asin = scrapy.Field()
11 |     price = scrapy.Field()
12 |     url = scrapy.Field()
13 |     description = scrapy.Field()
14 |     features = scrapy.Field()
15 |     # 好评
16 |     review_good_titles = scrapy.Field()
17 |     review_good_contents = scrapy.Field()
18 | 
19 |     # 差评
20 |     review_bad_titles = scrapy.Field()
21 |     review_bad_contents = scrapy.Field()
22 | 


--------------------------------------------------------------------------------
/Amazon/Amazon/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | 
 9 | from scrapy import signals
10 | from Amazon.settings import USER_AGENT
11 | 
12 | 
13 | class AmazonSpiderMiddleware(object):
14 |     # Not all methods need to be defined. If a method is not defined,
15 |     # scrapy acts as if the spider middleware does not modify the
16 |     # passed objects.
17 | 
18 |     @classmethod
19 |     def from_crawler(cls, crawler):
20 |         # This method is used by Scrapy to create your spiders.
21 |         s = cls()
22 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
23 |         return s
24 | 
25 |     def process_spider_input(self, response, spider):
26 |         # Called for each response that goes through the spider
27 |         # middleware and into the spider.
28 | 
29 |         # Should return None or raise an exception.
30 |         return None
31 | 
32 |     def process_spider_output(self, response, result, spider):
33 |         # Called with the results returned from the Spider, after
34 |         # it has processed the response.
35 | 
36 |         # Must return an iterable of Request, dict or Item objects.
37 |         for i in result:
38 |             yield i
39 | 
40 |     def process_spider_exception(self, response, exception, spider):
41 |         # Called when a spider or process_spider_input() method
42 |         # (from other spider middleware) raises an exception.
43 | 
44 |         # Should return either None or an iterable of Response, dict
45 |         # or Item objects.
46 |         pass
47 | 
48 |     def process_start_requests(self, start_requests, spider):
49 |         # Called with the start requests of the spider, and works
50 |         # similarly to the process_spider_output() method, except
51 |         # that it doesn’t have a response associated.
52 | 
53 |         # Must return only requests (not items).
54 |         for r in start_requests:
55 |             yield r
56 | 
57 |     def spider_opened(self, spider):
58 |         spider.logger.info('Spider opened: %s' % spider.name)
59 | 
60 | 
61 | class RandomUserAgent(object):
62 | 
63 |     def process_request(self, request, spider):
64 |         request.headers['User-Agent'] = USER_AGENT
65 | 
66 | 
67 | class RandomProxyMiddleware(object):
68 |     """设置代理访问"""
69 |     # Not all methods need to be defined. If a method is not defined,
70 |     # scrapy acts as if the downloader middleware does not modify the
71 |     # passed objects.
72 | 
73 |     def process_request(self, request, spider):
74 |         request.meta['proxy'] ="http://10.122.141.184:16816"
75 | 


--------------------------------------------------------------------------------
/Amazon/Amazon/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | from Amazon.items import AmazonItem
 8 | 
 9 | from pymongo import MongoClient
10 | 
11 | 
12 | class AmazonGoodsPipeline(object):
13 |     """将商品详情保存到MongoDB"""
14 | 
15 |     def open_spider(self, spider):
16 |         self.db = MongoClient(host="127.0.0.1", port=27017)
17 |         self.client = self.db.Amazon.Pipa
18 | 
19 |     def process_item(self, item, spider):
20 |         if isinstance(item, AmazonItem):
21 |             _item = dict(item)
22 |             _item['_id'] = _item['asin']
23 |             try:
24 |                 # asin作为主键,插入重复会报错
25 |                 self.client.insert(_item)
26 |             except Exception as err:
27 |                 pass
28 | 
29 |         return item
30 | 


--------------------------------------------------------------------------------
/Amazon/Amazon/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | BOT_NAME = 'Amazon'
 4 | 
 5 | SPIDER_MODULES = ['Amazon.spiders']
 6 | NEWSPIDER_MODULE = 'Amazon.spiders'
 7 | 
 8 | USER_AGENT = "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36"
 9 | 
10 | # Obey robots.txt rules
11 | ROBOTSTXT_OBEY = False
12 | 
13 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
14 | CONCURRENT_REQUESTS = 1
15 | 
16 | # Configure a delay for requests for the same website (default: 0)
17 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
18 | # See also autothrottle settings and docs
19 | # DOWNLOAD_DELAY = 3
20 | # The download delay setting will honor only one of:
21 | # CONCURRENT_REQUESTS_PER_DOMAIN = 16
22 | # CONCURRENT_REQUESTS_PER_IP = 16
23 | 
24 | # Disable cookies (enabled by default)
25 | # COOKIES_ENABLED = False
26 | 
27 | # Disable Telnet Console (enabled by default)
28 | # TELNETCONSOLE_ENABLED = False
29 | 
30 | # Override the default request headers:
31 | DEFAULT_REQUEST_HEADERS = {
32 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,'
33 |               '*/*;q=0.8',
34 |     'Accept-Language': 'en',
35 |     'User-Agent': USER_AGENT,
36 | }
37 | 
38 | # Enable or disable spider middlewares
39 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
40 | # SPIDER_MIDDLEWARES = {
41 | #    'Amazon.middlewares.AmazonSpiderMiddleware': 543,
42 | #    'Amazon.middlewares.RandomUserAgentMiddleware': 543,
43 | # }
44 | 
45 | # Enable or disable downloader middlewares
46 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
47 | DOWNLOADER_MIDDLEWARES = {
48 |     'Amazon.middlewares.RandomUserAgent': 543,
49 | }
50 | 
51 | # Enable or disable extensions
52 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
53 | # EXTENSIONS = {
54 | #    'scrapy.extensions.telnet.TelnetConsole': None,
55 | # }
56 | 
57 | # Configure item pipelines
58 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
59 | ITEM_PIPELINES = {
60 |     'Amazon.pipelines.AmazonGoodsPipeline': 300,
61 | }
62 | 
63 | # Enable and configure the AutoThrottle extension (disabled by default)
64 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
65 | # AUTOTHROTTLE_ENABLED = True
66 | # The initial download delay
67 | # AUTOTHROTTLE_START_DELAY = 5
68 | # The maximum download delay to be set in case of high latencies
69 | # AUTOTHROTTLE_MAX_DELAY = 60
70 | # The average number of requests Scrapy should be sending in parallel to
71 | # each remote server
72 | # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
73 | # Enable showing throttling stats for every response received:
74 | # AUTOTHROTTLE_DEBUG = False
75 | 
76 | COOKIES = {'session-id-time': '2082787201l', 'i18n-prefs': 'USD',
77 |            'session-id': '142-9723185-7096359', 'csm-hit': 'tb:s-TBNBSGGG2DTDACWFT2JG|1572104186431&t:1572104193942&adb:adblk_yes', 'sp-cdn': '"L5Z9:CN"', 'session-token': 'NtXSk4TNeLL1ywfKV+TvuhmxatgSa0yrUMVDxOzt0g6CAMeI6LkpgnQrcoU1asoE+pKF7ldrZnErq1dycNPGtszkRh03Wmo07Omhxs4OsROir2zQn4T5AtJAkn+RqVL8XB6izSJHsI0OWrp6to8bsr9AAw/4tLCFpEsnIh7nzYE0aDnZRQdyKCRbZbIxQTZg42jrFYHQH21c0ePPk9d0oC3feWEYOqh5KmCr5RWv8+xnCTX7kqpCELI9Qbsz1VKR', 'ubid-main': '135-5055030-7258235', 'x-wl-uid': '1iBt/JjYEoFYF+hGe2aCjWjyE0SGZ8B4QyX2KaTJl47LFamTRWYPbh4mcm/D2kLypor/oEsLBxqI', 'lc-main': 'zh_CN'}
78 | 


--------------------------------------------------------------------------------
/Amazon/Amazon/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Amazon/Amazon/spiders/amazon.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import scrapy
  4 | import requests
  5 | 
  6 | from Amazon.items import AmazonItem
  7 | from Amazon.settings import DEFAULT_REQUEST_HEADERS
  8 | 
  9 | from bs4 import BeautifulSoup
 10 | 
 11 | BASE_URL = 'https://www.amazon.com'
 12 | 
 13 | 
 14 | class AmazonSpider(scrapy.Spider):
 15 |     name = 'amazon'
 16 |     allowed_domains = ['amazon.com']
 17 |     page = 1
 18 |     lost_item = 0
 19 |     keyword = 'Pipa'
 20 |     rh = 'n%3A11091801'
 21 |     cookies = {
 22 |         "anonymid": "j7wsz80ibwp8x3",
 23 |         "_r01_": "1",
 24 |         "ln_uact": "mr_mao_hacker@163.com",
 25 |         "_de": "BF09EE3A28DED52E6B65F6A4705D973F1383380866D39FF5",
 26 |         "depovince": "GW",
 27 |         "jebecookies": "2fb888d1-e16c-4e95-9e59-66e4a6ce1eae|||||",
 28 |         "ick_login": "1c2c11f1-50ce-4f8c-83ef-c1e03ae47add",
 29 |         "p": "158304820d08f48402be01f0545f406d9",
 30 |         "first_login_flag": "1",
 31 |         "ln_hurl": "http://hdn.xnimg.cn/photos/hdn521/20180711/2125/main_SDYi_ae9c0000bf9e1986.jpg",
 32 |         "t": "adb2270257904fff59f082494aa7f27b9",
 33 |         "societyguester": "adb2270257904fff59f082494aa7f27b9",
 34 |         "id": "327550029",
 35 |         "xnsid": "4a536121",
 36 |         "loginfrom": "syshome",
 37 |         "wp_fold": "0"
 38 |     }
 39 | 
 40 |     headers = {
 41 |         'Host': 'www.amazon.com',
 42 |         'User-Agent': 'Mozilla/5.0 (Linux; Android 7.0; \
 43 |                         SM-A520F Build/NRD90M; wv) AppleWebKit/537.36 \
 44 |                         (KHTML, like Gecko) Version/4.0 \
 45 |                         Chrome/65.0.3325.109 Mobile Safari/537.36',
 46 |         'Accept': 'text/html,application/xhtml+xml,\
 47 |                         application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
 48 |     }
 49 | 
 50 |     def start_requests(self):
 51 |         """
 52 |             start_requests做为程序的入口，可以重写，自定义第一批请求
 53 |             可以添加headers、cookies, , dont_filter=True
 54 |         """
 55 |         start_urls = [
 56 |             'https://www.amazon.com/s?k=' + self.keyword + '&page=' + str(
 57 |                 self.page) + '&rh=' + self.rh,
 58 |             # 'https://www.amazon.com/s?k=' + keyword + '&page=' + str(page)+'&rh=n%3A1055398'
 59 |         ]
 60 | 
 61 |         for url in start_urls:
 62 |             yield scrapy.Request(url, headers=self.headers,
 63 |                                  callback=self.parse)
 64 | 
 65 |     def parse(self, response):
 66 |         url_list = response.xpath('//a[@title="status-badge"]/@href').extract()
 67 |         last = response.xpath('//li[@class="a-last"]').extract()
 68 | 
 69 |         product_url_list = [BASE_URL + x for x in url_list]
 70 |         # 判断是否是最后一页，是最后一页则结束
 71 | 
 72 |         if not last or self.page >= 5:
 73 |             print('翻页结束,当前页:%s 没有描述特征商品数:%s' % (self.page, self.lost_item))
 74 |             return
 75 | 
 76 |         for product_url in product_url_list:
 77 |             yield scrapy.Request(url=product_url,
 78 |                                  callback=self._get_product_details,
 79 |                                  headers=DEFAULT_REQUEST_HEADERS)
 80 |         self.page += 1
 81 |         yield scrapy.Request(
 82 |             url='https://www.amazon.com/s?k=' + self.keyword + '&page=' + str(
 83 |                 self.page) + '&rh=' + self.rh + '&ref=is_pn_' + str(self.page -
 84 |                                                                     1),
 85 |             callback=self.parse)
 86 | 
 87 |     def _get_product_details(self, response):
 88 |         # 处理亚马逊的反爬文本,释放注释代码
 89 |         res_body = response.text
 90 |         _res = res_body.replace('<!--rbd-->', '').replace('<!-->', '')
 91 |         response = response.replace(body=_res)
 92 | 
 93 |         title = response.xpath('//span[@id="title"]/text()').extract_first()
 94 |         if not title:
 95 |             print('您的IP已被亚马逊限制,请更换IP后重试')
 96 |             return
 97 |         title = title.replace('\n', '')
 98 |         # 产品图片地址
 99 |         image_url = response.xpath(
100 |             '//img[@data-fling-refmarker="detail_main_image_block"]/@data-midres-replacement').extract_first()  # noqa: E501
101 |         # 商品唯一标识
102 |         asin = response.xpath(
103 |             '//div[@id="cerberus-data-metrics"]/@data-asin').extract_first()
104 |         # 价格
105 |         price = response.xpath(
106 |             '//div[@id="cerberus-data-metrics"]/@data-asin-price').extract_first()  # noqa: E501
107 |         # 描述
108 |         description = response.xpath(
109 |             '//*[@id="productDescription_fullView"]').extract_first()
110 |         if description:
111 |             # 过滤掉html标签
112 |             description = BeautifulSoup(description).get_text()
113 |         # 特征
114 |         features = response.xpath(
115 |             '//div[@id="feature-bullets"]//span[@class="a-list-item"]/text()') \
116 |             .extract()
117 | 
118 |         # 如果没有评论也没有获取到产品特征，那就不要这条数据
119 |         if not description and not features:
120 |             self.lost_item += 1
121 |             print('没有描述也没有特征,结束..,总共已过滤%s个' % self.lost_item)
122 |             return
123 | 
124 |         item = AmazonItem()
125 |         item['title'] = title
126 |         item['asin'] = asin
127 |         item['image_url'] = image_url
128 |         item['url'] = response.url
129 |         item['price'] = price
130 |         item['description'] = description
131 |         item['features'] = features
132 | 
133 |         # 保存图片
134 |         try:
135 |             self.save_image(image_url, asin)
136 |         except Exception:
137 |             print('图片下载保存失败..')
138 | 
139 |         comments_url = 'https://www.amazon.com/kinery-Concentrator-Generator' \
140 |                        '-Adjustable-Humidifiers/product-reviews/%s/ref=cm_cr' \
141 |                        '_unknown?ie=UTF8&reviewerType=all_reviews&filterBy' \
142 |                        'Star=five_star&pageNumber=1' % asin
143 |         yield scrapy.Request(
144 |             url=comments_url, callback=self._get_good_comments,
145 |             meta={"item": item})
146 | 
147 |     def save_image(self, img_url, img_name):
148 |         response = requests.get(img_url)
149 |         # 获取的文本实际上是图片的二进制文本
150 |         img = response.content
151 |         # 将他拷贝到本地文件 w 写  b 二进制  wb代表写入二进制文本
152 |         # 保存路径
153 |         path = '../images/%s.jpg' % (img_name)
154 |         with open(path, 'wb') as f:
155 |             f.write(img)
156 | 
157 |     def _get_good_comments(self, response):
158 |         """获取商品好评:只取一页五星好评"""
159 |         review_titles = response.xpath(
160 |             '//span[@data-hook="review-title"]/span/text()').extract()
161 |         review_contents = response.xpath(
162 |             '//div[@aria-expanded="false"]/span/text()').extract()
163 | 
164 |         item = response.meta["item"]
165 |         item["review_good_titles"] = review_titles
166 |         item["review_good_contents"] = review_contents
167 | 
168 |         comments_url = 'https://www.amazon.com/kinery-Concentrator-' \
169 |                        'Generator-Adjustable-Humidifiers/product-reviews/%s' \
170 |                        '/ref=cm_cr_unknown?ie=UTF8&reviewerType=all_reviews' \
171 |                        '&filterByStar=one_star&pageNumber=1' % item.get('asin')
172 |         yield scrapy.Request(
173 |             url=comments_url, callback=self._get_bad_comments,
174 |             meta={"item": item})
175 | 
176 |     def _get_bad_comments(self, response):
177 |         """获取商品差评:只取一页一星差评"""
178 |         review_titles = response.xpath(
179 |             '//span[@data-hook="review-title"]/span/text()').extract()
180 |         review_contents = response.xpath(
181 |             '//div[@aria-expanded="false"]/span/text()').extract()
182 | 
183 |         item = response.meta["item"]
184 |         item["review_bad_titles"] = review_titles
185 |         item["review_bad_contents"] = review_contents
186 | 
187 |         yield item
188 | 


--------------------------------------------------------------------------------
/Amazon/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = Amazon.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = Amazon
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2018, YaCheng
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ==========================
 2 | 
 3 | # **scrapy-amazon**(亚马逊爬虫)
 4 | 
 5 | 基于scrapy的亚马逊的爬虫
 6 | 
 7 | 默认python3环境，python2未测试
 8 | - 默认抓取手机版亚马逊
 9 | - 默认采集亚马逊指定关键词所有商品
10 | - 采集属性包括商品名称、链接、图片地址、ASIN、商品描述、评论等等
11 | - 爬取到的数据存储到MongoDB数据库
12 | 
13 | 未开源版本新增功能
14 | - 支持采集指定不同国家的亚马逊(美国亚马逊、日本亚马逊等等)
15 | - 支持指定代理IP访问,减少亚马逊Robot Check几率
16 | - 支持采集、发布日志保存到文件,方便查询
17 | - 接入百度翻译、有道翻译、腾讯翻译,自定义语种实现伪原创
18 | - 支持采集到的数据清洗伪原创一键发布到wordpress(带特色图片)
19 | - 支持发布去重、减少网站被K几率
20 | 
21 | 注意：建议自行指定IP池，随机更换User-Agent，防止被封
22 | 
23 | 
24 | 
25 | 截图展示
26 | =======
27 | 
28 | ![数据展示](https://github.com/OFZFZS/scrapy-amazon/blob/master/scpture.jpg?raw=true)
29 | 
30 | 
31 | 
32 | 联系作者
33 | -------
34 | 
35 | QQ1498066696,不常回复,欢迎直接issue
36 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.8.1
2 | lxml==4.4.1
3 | pymongo==3.9.0
4 | Scrapy==1.7.3


--------------------------------------------------------------------------------
/scpture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OFZFZS/scrapy-amazon/3befb32289ccd548fa74cead2e359be848b01fbc/scpture.jpg


--------------------------------------------------------------------------------