├── price_monitor
    ├── __init__.py
    ├── spiders
    │   ├── __init__.py
    │   ├── amazon.py
    │   ├── bestbuy.py
    │   ├── base_spider.py
    │   └── ebay.py
    ├── items.py
    ├── templates
    │   └── email.html
    ├── pipelines.py
    ├── settings.py
    ├── utils.py
    └── resources
    │   └── urls.json
├── requirements.txt
├── scrapinghub.yml
├── scrapy.cfg
├── setup.py
├── .gitignore
├── bin
    └── monitor.py
└── README.md


/price_monitor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scrapy
2 | boto
3 | extruct
4 | w3lib
5 | jinja2
6 | 


--------------------------------------------------------------------------------
/scrapinghub.yml:
--------------------------------------------------------------------------------
1 | requirements_file: requirements.txt
2 | stacks:
3 |   default: scrapy:1.1-py3
4 | 


--------------------------------------------------------------------------------
/price_monitor/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = price_monitor.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = price_monitor
12 | 


--------------------------------------------------------------------------------
/price_monitor/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class PriceMonitorItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Automatically created by: shub deploy
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | setup(
 6 |     name='project',
 7 |     version='1.0',
 8 |     packages=find_packages(),
 9 |     package_data={'price_monitor': ['resources/*.json', 'templates/*.html']},
10 |     scripts=['bin/monitor.py'],
11 |     entry_points={'scrapy': ['settings = price_monitor.settings']},
12 | )
13 | 


--------------------------------------------------------------------------------
/price_monitor/spiders/amazon.py:
--------------------------------------------------------------------------------
 1 | from .base_spider import BaseSpider
 2 | 
 3 | 
 4 | class AmazonSpider(BaseSpider):
 5 |     name = "amazon.com"
 6 | 
 7 |     def parse(self, response):
 8 |         item = response.meta.get('item', {})
 9 |         item['url'] = response.url
10 |         item['title'] = response.css("span#productTitle::text").extract_first("").strip()
11 |         item['price'] = float(
12 |             response.css("span#priceblock_ourprice::text").re_first("\$(.*)") or 0
13 |         )
14 |         yield item
15 | 


--------------------------------------------------------------------------------
/price_monitor/templates/email.html:
--------------------------------------------------------------------------------
 1 | <h1>🎉 Hey, we found a good deal! 🎁</h1>
 2 | <table border="1">
 3 | 
 4 | {% for item in items %}
 5 | <tr><td>
 6 |     <p><strong>Product:</strong> {{item.title}}</p>
 7 |     <p><strong>Price:</strong> {{item.price}}</p>
 8 |     <p><strong>Store:</strong> {{item.retailer}}</p>
 9 |     <p><strong>Price obtained at:</strong> {{item.when}}</p>
10 |     <p>Visit the product page at {{item.retailer}}: <a href="{{item.url}}">{{item.url}}</a></p>
11 | </td></tr>
12 | {% endfor %}
13 | </table>
14 | 
15 | 


--------------------------------------------------------------------------------
/price_monitor/spiders/bestbuy.py:
--------------------------------------------------------------------------------
 1 | from .base_spider import BaseSpider
 2 | 
 3 | 
 4 | class BestbuySpider(BaseSpider):
 5 |     name = "bestbuy.com"
 6 | 
 7 |     def parse(self, response):
 8 |         item = response.meta.get('item', {})
 9 |         item['url'] = response.url
10 |         item['title'] = response.css("div#sku-title > h1 ::text").extract_first().strip()
11 |         item['price'] = float(
12 |             response.css('div.price-block ::attr(data-customer-price)').extract_first(default=0)
13 |         )
14 |         yield item
15 | 


--------------------------------------------------------------------------------
/price_monitor/spiders/base_spider.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pkgutil
 3 | import scrapy
 4 | from datetime import datetime
 5 | 
 6 | 
 7 | class BaseSpider(scrapy.Spider):
 8 | 
 9 |     def start_requests(self):
10 |         products = json.loads(pkgutil.get_data('price_monitor', 'resources/urls.json').decode())
11 |         for name, urls in products.items():
12 |             for url in urls:
13 |                 if self.name in url:
14 |                     now = datetime.now().strftime('%Y/%m/%d %H:%M:%S')
15 |                     item = {'product_name': name, 'retailer': self.name, 'when': now}
16 |                     yield scrapy.Request(url, meta={'item': item})
17 | 


--------------------------------------------------------------------------------
/price_monitor/spiders/ebay.py:
--------------------------------------------------------------------------------
 1 | from extruct.w3cmicrodata import MicrodataExtractor
 2 | from .base_spider import BaseSpider
 3 | 
 4 | 
 5 | class EbaySpider(BaseSpider):
 6 |     name = "ebay.com"
 7 | 
 8 |     def parse(self, response):
 9 |         extractor = MicrodataExtractor()
10 |         properties = extractor.extract(response.body_as_unicode()).get('items')[0].get('properties', {})
11 |         item = response.meta.get('item', {})
12 |         item['url'] = response.url
13 |         item['title'] = properties.get('name').replace('Details about', '').strip()
14 |         item['price'] = float(
15 |             properties.get('offers', {}).get('properties', {}).get('price', 0)
16 |         )
17 |         yield item
18 | 


--------------------------------------------------------------------------------
/price_monitor/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from price_monitor import settings
 3 | from hubstorage import HubstorageClient
 4 | from price_monitor.utils import reversed_timestamp, get_product_names
 5 | 
 6 | 
 7 | class CollectionStoragePipeline(object):
 8 | 
 9 |     def open_spider(self, spider):
10 |         client = HubstorageClient(auth=settings.SHUB_KEY)
11 |         project = client.get_project(settings.SHUB_PROJ_ID)
12 |         self.data_stores = {}
13 |         for product_name in get_product_names():
14 |             self.data_stores[product_name] = project.collections.new_store(product_name)
15 | 
16 |     def process_item(self, item, spider):
17 |         key = "{}-{}-{}".format(
18 |             reversed_timestamp(), item.get('product_name'), item.get('retailer')
19 |         )
20 |         self.data_stores[item['product_name']].set({'_key': key, 'value': item})
21 |         return item
22 | 


--------------------------------------------------------------------------------
/price_monitor/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import os
 3 | 
 4 | BOT_NAME = 'price_monitor'
 5 | SPIDER_MODULES = ['price_monitor.spiders']
 6 | NEWSPIDER_MODULE = 'price_monitor.spiders'
 7 | 
 8 | ROBOTSTXT_OBEY = True
 9 | 
10 | SHUB_KEY = os.getenv('$SHUB_KEY')
11 | # if you want to run it locally, replace '999999' by your Scrapy Cloud project ID below
12 | SHUB_PROJ_ID = os.getenv('SHUB_JOBKEY', '999999').split('/')[0]
13 | 
14 | 
15 | # settings for Amazon SES email service
16 | AWS_ACCESS_KEY = os.getenv('$AWS_ACCESS_KEY')
17 | AWS_SECRET_KEY = os.getenv('$AWS_SECRET_KEY')
18 | EMAIL_ALERT_FROM = 'Price Monitor <SENDER_EMAIL@provider.com>'
19 | EMAIL_ALERT_TO = ['RECEIVER_EMAIL@provider.com']
20 | 
21 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
22 | ITEM_PIPELINES = {
23 |     'price_monitor.pipelines.CollectionStoragePipeline': 400,
24 | }
25 | 
26 | AUTOTHROTTLE_ENABLED = True
27 | # HTTPCACHE_ENABLED = True
28 | 


--------------------------------------------------------------------------------
/price_monitor/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pkgutil
 3 | from datetime import datetime, timedelta
 4 | 
 5 | 
 6 | def timestamp_from_reversed(reversed):
 7 |     return datetime(5000, 1, 1) - timedelta(seconds=float(reversed))
 8 | 
 9 | 
10 | def reversed_timestamp():
11 |     return str((datetime(5000, 1, 1) - datetime.now()).total_seconds())
12 | 
13 | 
14 | def normalize_name(name):
15 |     return name.replace('-', '')
16 | 
17 | 
18 | def get_product_names():
19 |     return [
20 |         normalize_name(name)
21 |         for name in json.loads(
22 |             pkgutil.get_data("price_monitor", "resources/urls.json").decode()
23 |         ).keys()
24 |     ]
25 | 
26 | 
27 | def get_retailer_name_from_url(url):
28 |         return url.split("://")[1].split("/")[0].replace("www.", "")
29 | 
30 | 
31 | def get_retailers_for_product(product_name):
32 |     data = json.loads(
33 |         pkgutil.get_data("price_monitor", "resources/urls.json").decode()
34 |     )
35 |     return {get_retailer_name_from_url(url) for url in data[product_name]}
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | .venv/
83 | venv/
84 | ENV/
85 | 
86 | # Spyder project settings
87 | .spyderproject
88 | 
89 | # Rope project settings
90 | .ropeproject
91 | 
92 | .scrapy


--------------------------------------------------------------------------------
/price_monitor/resources/urls.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "headsetlogitech": [
 3 |         "https://www.amazon.com/Logitech-Wireless-Headset-Over-Design/dp/B005GTO07O/",
 4 |         "http://www.bestbuy.com/site/logitech-h600-wireless-headset-black/3436118.p",
 5 |         "http://www.ebay.com/itm/N-Logitech-Wireless-Headset-H600-Over-The-Head-Design-981-000341-/110985874014"
 6 |     ],
 7 |     "webcamlogitech": [
 8 |         "https://www.amazon.com/Logitech-Widescreen-Calling-Recording-Desktop/dp/B006JH8T3S/",
 9 |         "http://www.bestbuy.com/site/logitech-c920-pro-webcam-black/4612476.p?skuId=4612476",
10 |         "http://www.ebay.com/itm/Logitech-HD-Pro-Webcam-C920-1080p-Widescreen-Video-Calling-and-Recording-/272381890214"
11 |     ],
12 |     "amazonechodot": [
13 |         "https://www.amazon.com/dp/B01DFKC2SO",
14 |         "http://www.bestbuy.com/site/amazon-echo-dot/5578851.p?skuId=5578851",
15 |         "http://www.ebay.com/itm/Amazon-Echo-Dot-2nd-Generation-w-Alexa-Voice-Media-Device-All-New-2016-/201668562192"
16 |     ],
17 |     "nikoncoolpix": [
18 |         "https://www.amazon.com/Nikon-COOLPIX-B500-Digital-Camera/dp/B01C3LEE9G/",
19 |         "http://www.bestbuy.com/site/nikon-coolpix-b500-16-0-megapixel-digital-camera-red/4997500.p?skuId=4997500",
20 |         "http://www.ebay.com/itm/Nikon-COOLPIX-B500-Digital-Camera-Red-/162225974018"
21 |     ],
22 |     "bluemicrophone": [
23 |         "https://www.amazon.com/Blue-Snowball-iCE-Condenser-Microphone/dp/B014PYGTUQ/",
24 |         "http://www.bestbuy.com/site/blue-microphones-snowball-usb-cardioid-and-omnidirectional-electret-condenser-vocal-microphone-black/9918056.p?skuId=9918056",
25 |         "http://www.ebay.com/itm/Blue-Microphones-Snowball-Black-iCE-Condenser-Microphone-/172260373002"
26 |     ]
27 | }
28 | 


--------------------------------------------------------------------------------
/bin/monitor.py:
--------------------------------------------------------------------------------
  1 | """Simple price monitor built with Scrapy and Scrapy Cloud
  2 | """
  3 | import argparse
  4 | import os
  5 | from datetime import datetime, timedelta
  6 | 
  7 | import boto
  8 | from hubstorage import HubstorageClient
  9 | from jinja2 import Environment, PackageLoader
 10 | from price_monitor import settings
 11 | from price_monitor.utils import get_product_names, get_retailers_for_product
 12 | from w3lib.html import remove_tags
 13 | 
 14 | jinja_env = Environment(loader=PackageLoader('price_monitor', 'templates'))
 15 | 
 16 | 
 17 | class DealsChecker(object):
 18 | 
 19 |     def __init__(self, latest_deals, previous_deals, price_threshold=0):
 20 |         self.price_threshold = price_threshold
 21 |         self.latest_deals = latest_deals
 22 |         self.previous_deals = previous_deals
 23 | 
 24 |     def is_from_latest_crawl(self, deal):
 25 |         """Checks whether the given deal is from the most recent execution.
 26 |         """
 27 |         return deal in self.latest_deals
 28 | 
 29 |     def get_best_deal(self):
 30 |         """Returns the item with the best overall price. self.price_threshold can be set to avoid
 31 |            considering minor price drops.
 32 |         """
 33 |         best_so_far = min(self.previous_deals, key=lambda x: x.get('price'))
 34 |         best_from_last = min(self.latest_deals, key=lambda x: x.get('price'))
 35 |         if best_from_last.get('price') + self.price_threshold < best_so_far.get('price'):
 36 |             return best_from_last
 37 |         else:
 38 |             return best_so_far
 39 | 
 40 | 
 41 | class DealsFetcher(object):
 42 | 
 43 |     def __init__(self, product_name, apikey, project_id, hours):
 44 |         self.product_name = product_name
 45 |         project = HubstorageClient(apikey).get_project(project_id)
 46 |         self.item_store = project.collections.new_store(product_name)
 47 |         self.load_items_from_last_n_hours(hours)
 48 | 
 49 |     def load_items_from_last_n_hours(self, n=24):
 50 |         """Load items from the last n hours, from the newest to the oldest.
 51 |         """
 52 |         since_time = int((datetime.now() - timedelta(hours=n)).timestamp() * 1000)
 53 |         self.deals = [item.get('value') for item in self.fetch_deals_newer_than(since_time)]
 54 | 
 55 |     def fetch_deals_newer_than(self, since_time):
 56 |         return list(self.item_store.get(meta=['_key', '_ts'], startts=since_time))
 57 | 
 58 |     def get_latest_deal_from_retailer(self, retailer):
 59 |         """Returns the most recently extracted deal from a given retailer.
 60 |         """
 61 |         for deals in self.deals:
 62 |             if retailer in deals.get('url'):
 63 |                 return deals
 64 | 
 65 |     def get_deals(self):
 66 |         """Returns a tuple with (deals from latest crawl, deals from previous crawls)
 67 |         """
 68 |         latest_deals = [
 69 |             self.get_latest_deal_from_retailer(retailer)
 70 |             for retailer in get_retailers_for_product(self.product_name)
 71 |         ]
 72 |         previous_deals = [
 73 |             deal for deal in self.deals if deal not in latest_deals
 74 |         ]
 75 |         return latest_deals, previous_deals
 76 | 
 77 | 
 78 | def send_email_alert(items):
 79 |     ses = boto.connect_ses(settings.AWS_ACCESS_KEY, settings.AWS_SECRET_KEY)
 80 |     html_body = jinja_env.get_template('email.html').render(items=items)
 81 | 
 82 |     ses.send_email(
 83 |         settings.EMAIL_ALERT_FROM,
 84 |         'Price drop alert',
 85 |         remove_tags(html_body),
 86 |         settings.EMAIL_ALERT_TO,
 87 |         html_body=html_body
 88 |     )
 89 | 
 90 | 
 91 | def main(args):
 92 |     items = []
 93 |     for prod_name in get_product_names():
 94 |         fetcher = DealsFetcher(prod_name, args.apikey, args.project, args.days * 24)
 95 |         checker = DealsChecker(*fetcher.get_deals(), args.threshold)
 96 |         best_deal = checker.get_best_deal()
 97 |         if checker.is_from_latest_crawl(best_deal):
 98 |             items.append(best_deal)
 99 | 
100 |     if items:
101 |         send_email_alert(items)
102 | 
103 | 
104 | def parse_args():
105 |     parser = argparse.ArgumentParser(description=__doc__)
106 |     parser.add_argument('--apikey', default=settings.SHUB_KEY or os.getenv('SHUB_KEY'),
107 |                         help='API key to use for scrapinghub (fallbacks to SHUB_KEY variable)')
108 |     parser.add_argument('--days', type=int, default=1,
109 |                         help='How many days back to compare with the last price')
110 |     parser.add_argument('--threshold', type=float, default=0,
111 |                         help='A margin to avoid raising alerts with minor price drops')
112 |     parser.add_argument('--project', type=int, default=settings.SHUB_PROJ_ID,
113 |                         help='Project ID to get info from')
114 | 
115 |     return parser.parse_args()
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     main(parse_args())
120 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Scrapy Price Monitor
  2 | ====================
  3 | 
  4 | This is a simple price monitor built with [Scrapy](https://github.com/scrapy/scrapy)
  5 | and [Scrapy Cloud](https://scrapinghub.com/scrapy-cloud).
  6 | 
  7 | It is basically a Scrapy project with one spider for each online retailer that
  8 | we want to monitor prices from. In addition to the spiders, there's a Python
  9 | Script that is scheduled to run periodically on Scrapy Cloud, checking whether
 10 | the latest prices are the best ones in a given time span. If so, the monitor
 11 | sends an email alerting you about the price drops.
 12 | 
 13 | 
 14 | ## Including Products to Monitor
 15 | 
 16 | There's a `resources/urls.json` file that lists the URLs from the products that
 17 | we want to monitor. If you just want to include a new product to monitor from
 18 | the already supported retailers, just add a new key for that product and add
 19 | the URL list as its value, such as:
 20 | 
 21 |     {
 22 |         "headsetlogitech": [
 23 |             "https://www.amazon.com/.../B005GTO07O/",
 24 |             "http://www.bestbuy.com/.../3436118.p",
 25 |             "http://www.ebay.com/.../110985874014"
 26 |         ],
 27 |         "NewProduct": [
 28 |             "http://url.for.retailer.x",
 29 |             "http://url.for.retailer.y",
 30 |             "http://url.for.retailer.z"
 31 |         ]
 32 |     }
 33 | 
 34 | 
 35 | ## Supporting Further Retailers
 36 | 
 37 | This project currently only works with 3 online retailers, and you can list them
 38 | running:
 39 | 
 40 |     $ scrapy list
 41 |     amazon.com
 42 |     bestbuy.com
 43 |     ebay.com
 44 | 
 45 | If the retailer that you want to monitor is not yet supported, just create a spider
 46 | to handle the product pages from it. To include a spider for samsclub.com, you
 47 | could run:
 48 | 
 49 |     $ scrapy genspider samsclub.com samsclub.com
 50 | 
 51 | And then, open the spider and add the extraction rules:
 52 | 
 53 |     $ scrapy edit samsclub.com
 54 | 
 55 | Have a look at the current spiders and implement the new ones using the same
 56 | structure, subclassing `BaseSpider` instead of `scrapy.Spider`. This way, your
 57 | spiders will automatically read the URLs list from `resources/urls.json`.
 58 | 
 59 | 
 60 | ## Customizing the Price Monitor
 61 | 
 62 | The price monitor sends an email using Amazon SES service, so to run it you
 63 | have to set both `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` variables in
 64 | `price_monitor/settings.py`. If you want to use another email service,
 65 | you have to rewrite the `send_email_alert` function in
 66 | `price_monitor/bin/monitor.py`.
 67 | 
 68 | The price monitor can be further customized via parameters to the
 69 | `price_monitor/bin/monitor.py` script. We will dig on those parameters
 70 | later when showing how to schedule the project on Scrapy Cloud.
 71 | 
 72 | 
 73 | ## Installing and Running
 74 | 
 75 | 1. Clone this repo:
 76 | 
 77 |         $ git clone git@github.com:stummjr/scrapy_price_monitor.git
 78 | 
 79 | 2. Enter the folder and install the project dependencies:
 80 | 
 81 |         $ cd scrapy_price_monitor
 82 |         $ pip install -r requirements.txt
 83 | 
 84 | 3. Create a free forever account on Scrapy Cloud:
 85 | https://app.scrapinghub.com/account/signup/.
 86 | 
 87 | 4. Create a Scrapy project on Scrapy Cloud and copy the project id from the project URL.
 88 | 
 89 | 5. Install [Scrapinghub command line tool (shub)](https://github.com/scrapinghub/shub):
 90 | 
 91 |         $ pip install shub
 92 | 
 93 | 6. Authenticate using your Scrapinghub API key:
 94 | 
 95 |         $ shub login
 96 | 
 97 | 7. Finally, deploy the local project to your Scrapy Cloud project:
 98 | 
 99 |         $ shub deploy <your_project_id_here>
100 | 
101 | This video also explains how to deploy a Scrapy project to Scrapy Cloud:
102 | https://youtu.be/JYch0zRmcgU
103 | 
104 | 
105 | ## How to Schedule on Scrapy Cloud
106 | 
107 | After you have deployed the project to Scrapy Cloud, it's time to schedule its
108 | execution on Scrapy Cloud.
109 | 
110 | This project has two main components:
111 | 
112 | - the [**spiders**](https://github.com/stummjr/scrapy_price_monitor/tree/master/price_monitor/spiders) that collect prices from the retailers' websites
113 | - the [**price monitor script**](https://github.com/stummjr/scrapy_price_monitor/blob/master/bin/monitor.py) that checks whether there's a new deal in the latest prices
114 | 
115 | You have to schedule both the spiders and the monitor to run periodically on
116 | Scrapy Cloud. It's a good idea to schedule all the spiders to run at the same
117 | time and schedule the monitor to run about 15 minutes after the spiders.
118 | 
119 | Take a look at this video to learn how to schedule periodic jobs on Scrapy Cloud:
120 | https://youtu.be/JYch0zRmcgU?t=1m51s
121 | 
122 | 
123 | ### Parameters for the Monitor Script
124 | 
125 | The monitor script takes these parameters and you can pass them via the parameters box in the
126 | scheduling dialog:
127 | 
128 | - `--days`: how many days of data we want to compare with the scraped prices.
129 | - `--threshold`: a margin that you can set to avoid getting alerts from minor price changes. For example, if you set it to 1.0, you will only get alerts when the price drop is bigger than $1.00.
130 | - `--apikey`: your Scrapy Cloud API key. You can get it in: https://app.scrapinghub.com/account/apikey.
131 | - `--project`: the Scrapy Cloud project where the monitor is deployed (you can grab it from your project URL at Scrapy Cloud).
132 | 
133 | 
134 | ## Running in a Local Environment
135 | 
136 | You can run this project on Scrapy Cloud or on your local environment. The only dependency
137 | from Scrapy Cloud is the [Collections API](https://doc.scrapinghub.com/api/collections.html),
138 | but the spiders and the monitor can be executed locally.
139 | 
140 | To do that, first add your Scrapy Cloud project id to [settings.py `SHUB_PROJ_ID` variable](https://github.com/stummjr/scrapy_price_monitor/blob/master/price_monitor/settings.py#L11).
141 | 
142 | Then run the spiders via command line:
143 | 
144 |     $ scrapy crawl bestbuy.com
145 | 
146 | This will run the spider named as `bestbuy.com` and store the scraped data into
147 | a Scrapy Cloud collection, under the project you set in the last step.
148 | 
149 | You can also run the price monitor via command line:
150 | 
151 |     $ python bin/monitor.py --apikey <SCRAPINGHUB_KEY> --days 2 --threshold 1 --project <PROJ_ID>
152 | 


--------------------------------------------------------------------------------