├── youtubeCrawler
    ├── __init__.py
    ├── spiders
    │   ├── __init__.py
    │   └── youtubecrawler.py
    ├── pipelines.py
    ├── items.py
    └── settings.py
├── youtubecrawler.sh
└── scrapy.cfg


/youtubeCrawler/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/youtubeCrawler/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/youtubecrawler.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | curl --silent https://www.youtube.com/watch?v=SYdGDH1KrnM | egrep "watch-view|yt-uix-button-content" | tr -s " " "\n" | egrep "watch-view|yt-uix-button-content" | tr -s ">|<" "\n" | grep ^[0-9] | tr -s "\n" " " | cut -d " " -f1,2,4 | tr -s " " "\n"
3 | 


--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = youtubeCrawler.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = youtubeCrawler
12 | 


--------------------------------------------------------------------------------
/youtubeCrawler/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class YoutubecrawlerPipeline(object):
10 |     def process_item(self, item, spider):
11 |         return item
12 | 


--------------------------------------------------------------------------------
/youtubeCrawler/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class YoutubecrawlerItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/youtubeCrawler/spiders/youtubecrawler.py:
--------------------------------------------------------------------------------
 1 | from scrapy.contrib.spiders import CrawlSpider
 2 | 
 3 | 
 4 | class YoutubeCrawler(CrawlSpider):
 5 | 
 6 |     name = "youtube"
 7 |     allowed_domains = ["youtube.com"]
 8 |     start_urls = [
 9 |         "https://www.youtube.com/watch?v=bAxlsajbcUg"
10 |     ]
11 | 
12 |     def parse(self, response):
13 | 
14 |         visits = response.xpath("//div[@class='watch-view-count']\
15 |             /text()").extract()[0]
16 | 
17 |         likes = response.xpath("//span[contains(@class, 'like-button-renderer')]\
18 |             /span[position()=1]/button/span/text()").extract()[0]
19 | 
20 |         dislikes = response.xpath("//span[contains(@class, 'like-button-renderer')]\
21 |             /span[position()=3]/button/span/text()").extract()[0]
22 | 
23 |         print "visits: %s" % visits
24 |         print "likes: %s" % likes
25 |         print "dislikes: %s" % dislikes
26 | 


--------------------------------------------------------------------------------
/youtubeCrawler/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for youtubeCrawler project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     http://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'youtubeCrawler'
13 | 
14 | SPIDER_MODULES = ['youtubeCrawler.spiders']
15 | NEWSPIDER_MODULE = 'youtubeCrawler.spiders'
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'youtubeCrawler (+http://www.yourdomain.com)'
20 | 
21 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
22 | #CONCURRENT_REQUESTS=32
23 | 
24 | # Configure a delay for requests for the same website (default: 0)
25 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
26 | # See also autothrottle settings and docs
27 | #DOWNLOAD_DELAY=3
28 | # The download delay setting will honor only one of:
29 | #CONCURRENT_REQUESTS_PER_DOMAIN=16
30 | #CONCURRENT_REQUESTS_PER_IP=16
31 | 
32 | # Disable cookies (enabled by default)
33 | #COOKIES_ENABLED=False
34 | 
35 | # Disable Telnet Console (enabled by default)
36 | #TELNETCONSOLE_ENABLED=False
37 | 
38 | # Override the default request headers:
39 | #DEFAULT_REQUEST_HEADERS = {
40 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
41 | #   'Accept-Language': 'en',
42 | #}
43 | 
44 | # Enable or disable spider middlewares
45 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
46 | #SPIDER_MIDDLEWARES = {
47 | #    'youtubeCrawler.middlewares.MyCustomSpiderMiddleware': 543,
48 | #}
49 | 
50 | # Enable or disable downloader middlewares
51 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
52 | #DOWNLOADER_MIDDLEWARES = {
53 | #    'youtubeCrawler.middlewares.MyCustomDownloaderMiddleware': 543,
54 | #}
55 | 
56 | # Enable or disable extensions
57 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
58 | #EXTENSIONS = {
59 | #    'scrapy.telnet.TelnetConsole': None,
60 | #}
61 | 
62 | # Configure item pipelines
63 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
64 | #ITEM_PIPELINES = {
65 | #    'youtubeCrawler.pipelines.SomePipeline': 300,
66 | #}
67 | 
68 | # Enable and configure the AutoThrottle extension (disabled by default)
69 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
70 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay
71 | #AUTOTHROTTLE_ENABLED=True
72 | # The initial download delay
73 | #AUTOTHROTTLE_START_DELAY=5
74 | # The maximum download delay to be set in case of high latencies
75 | #AUTOTHROTTLE_MAX_DELAY=60
76 | # Enable showing throttling stats for every response received:
77 | #AUTOTHROTTLE_DEBUG=False
78 | 
79 | # Enable and configure HTTP caching (disabled by default)
80 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
81 | #HTTPCACHE_ENABLED=True
82 | #HTTPCACHE_EXPIRATION_SECS=0
83 | #HTTPCACHE_DIR='httpcache'
84 | #HTTPCACHE_IGNORE_HTTP_CODES=[]
85 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
86 | 


--------------------------------------------------------------------------------