├── douyin
├── __init__.py
├── utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── tools.cpython-36.pyc
│ │ └── __init__.cpython-36.pyc
│ └── tools.py
├── spiders
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── dyspider.cpython-36.pyc
│ │ ├── categoryspider.cpython-36.pyc
│ │ └── categoryvideospider.cpython-36.pyc
│ ├── __init__.py
│ ├── categoryspider.py
│ ├── dyspider.py
│ └── categoryvideospider.py
├── maintest.py
├── items.py
├── user_agents.py
├── pipelines.py
├── main.py
├── settings.py
└── middlewares.py
├── .idea
├── vcs.xml
├── misc.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── modules.xml
├── douyin.iml
└── workspace.xml
├── scrapy.cfg
├── README.md
└── LICENSE
/douyin/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/douyin/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
--------------------------------------------------------------------------------
/douyin/utils/__pycache__/tools.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/utils/__pycache__/tools.cpython-36.pyc
--------------------------------------------------------------------------------
/douyin/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/douyin/spiders/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/douyin/spiders/__pycache__/dyspider.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/dyspider.cpython-36.pyc
--------------------------------------------------------------------------------
/douyin/spiders/__pycache__/categoryspider.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/categoryspider.cpython-36.pyc
--------------------------------------------------------------------------------
/douyin/spiders/__pycache__/categoryvideospider.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/categoryvideospider.cpython-36.pyc
--------------------------------------------------------------------------------
/douyin/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = douyin.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = douyin
12 |
--------------------------------------------------------------------------------
/.idea/douyin.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## [已废弃]抖音爬虫抓取
2 |
3 | ------
4 |
5 | ### 项目背景
6 | 主要用于个人初学python和scrapy框架的练手项目。**该爬虫仅供学习使用**,不用做任何其他途径。
7 |
8 | ### 开发依赖
9 |
10 | - python3.6.1
11 | - scrapy1.5.0
12 | - mongoDB
13 | - APScheduler
14 |
15 | ### 项目介绍
16 |
17 | 主要通过抓取链接,获得搜索中的"热门挑战"和"热门音乐"的列表,再去通过参与人数排序拿出部分比较热门的链接抓取视频数据。分别存入mongoDB数据库中。每天凌晨更新数据。目前数据不全,没做全量的更新功能。
18 |
19 | ### 后续目标(随缘吧...)
20 |
21 | 1. 单独写个客户端和Web端用于展现数据和筛选功能
22 |
23 | ### 更新记录:
24 | - 20180914
25 | 爬虫的链接好像失效了,近期会去更新。
26 |
--------------------------------------------------------------------------------
/douyin/maintest.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | from apscheduler.schedulers.background import BackgroundScheduler
4 | import subprocess
5 | import os
6 | from datetime import datetime
7 | import time
8 | import logging
9 |
10 | logging.basicConfig(level=logging.DEBUG)
11 |
12 |
13 | def tick_list():
14 | logging.debug('启动爬虫! The time is: %s' % datetime.now())
15 | app_path = os.path.dirname(os.path.realpath(__file__))
16 | subprocess.Popen("scrapy crawl categorySpider", shell=True, cwd=app_path)
17 |
18 |
19 | def tick_challenge():
20 | logging.debug('启动 热门挑战 爬虫! The time is: %s' % datetime.now())
21 | subprocess.Popen("scrapy crawl categoryVideoSpider")
22 |
23 |
24 | def tick_music():
25 | logging.debug('启动 热门音乐 爬虫! The time is: %s' % datetime.now())
26 | subprocess.Popen("scrapy crawl douyinSpider")
27 |
28 |
29 | if __name__ == '__main__':
30 | logging.debug(
31 | '======================程序启动!The time is: %s =======================' %
32 | datetime.now())
33 | tick_list()
34 |
--------------------------------------------------------------------------------
/douyin/utils/tools.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | import pymongo
4 | import json
5 | import random
6 | import time
7 | from scrapy.conf import settings
8 |
9 |
10 | class DBTools:
11 | '''
12 | 数据库链接类
13 | '''
14 | host = settings['MONGODB_HOST'] # settings 赋值piplines
15 | port = settings['MONGODB_PORT']
16 | dbName = settings['MONGODB_DBNAME'] # 数据库名字
17 | client = pymongo.MongoClient(host=host, port=port) # 链接数据库
18 | tdb = client[dbName]
19 |
20 | def __init__(self, name):
21 | print('name:',name)
22 | self.post = self.tdb[name]
23 |
24 | def get_db_con(self):
25 | return self.post
26 |
27 |
28 | class MyTools:
29 | '''
30 | 基础工具类
31 | '''
32 | def init_device_id():
33 | value = random.randint(1000000000, 9999999999)
34 | return str(value)
35 |
36 | def transform_time(u_time):
37 | timeArray = time.localtime(u_time)
38 | otherStyleTime = time.strftime("%Y/%m/%d %H:%M:%S", timeArray)
39 | return otherStyleTime
40 |
--------------------------------------------------------------------------------
/douyin/items.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your scraped items
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/items.html
7 | import scrapy
8 |
9 |
10 | class DouyinItem(scrapy.Item):
11 | # define the fields for your item here like:
12 | # name = scrapy.Field()
13 |
14 | # 挑战唯一标识
15 | category_id = scrapy.Field()
16 | # 用户信息
17 | # 用户id
18 | user_uid = scrapy.Field()
19 | # 用户短ID
20 | user_sid = scrapy.Field()
21 | # 注册出生日期
22 | user_birthday = scrapy.Field()
23 | # 性别
24 | user_gender = scrapy.Field()
25 |
26 | # 视频信息
27 | # 视频ID
28 | video_id = scrapy.Field()
29 | # 视频描述
30 | video_desc = scrapy.Field()
31 | # 视频播放量
32 | video_play = scrapy.Field()
33 | # 评论量
34 | video_comment = scrapy.Field()
35 | # 分享量
36 | video_share = scrapy.Field()
37 | # 点赞量
38 | video_digg = scrapy.Field()
39 | # 视频下载地址
40 | video_durl = scrapy.Field()
41 | # 视频封面地址
42 | video_gurl = scrapy.Field()
43 |
44 | video_time = scrapy.Field()
45 |
46 |
47 | class DouyinCategoryItem(scrapy.Item):
48 | # 类型:desc中的内容:热门音乐/热门挑战
49 | category_type = scrapy.Field()
50 | category_title = scrapy.Field()
51 | category_id = scrapy.Field()
52 | category_url = scrapy.Field()
53 | category_desc = scrapy.Field()
54 | category_user_count = scrapy.Field()
55 |
--------------------------------------------------------------------------------
/douyin/user_agents.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 |
4 | agents = [
5 | "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
6 | "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
7 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405 Safari/600.1.4",
8 | "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; SM-G9550 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.7.0.953 Mobile Safari/537.36",
9 | "Mozilla/5.0(Linux;U;Android2.3.7;en-us;NexusOneBuild/FRF91)AppleWebKit/533.1(KHTML,likeGecko)Version/4.0MobileSafari/533.1",
10 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI VIE-AL10 Build/HUAWEIVIE-AL10) AppleWebKit/537.36(KHTML,like Gecko) Version/4.0 Mobile Safari/537.36",
11 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; MI 4LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36 XiaoMi/MiuiBrowser/2.1.1",
12 | "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19",
13 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; SCH-N719 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
14 | ]
--------------------------------------------------------------------------------
/douyin/pipelines.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define your item pipelines here
4 | #
5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
7 | from douyin.utils.tools import DBTools
8 | # 分类信息保存
9 |
10 |
11 | class DouyinPipeline(object):
12 | def __init__(self):
13 |
14 | db_con_video = DBTools('video')
15 | db_con_category = DBTools('category')
16 | self.post_video = db_con_video.get_db_con()
17 | self.post_category = db_con_category.get_db_con()
18 |
19 | def process_item(self, item, spider):
20 | if spider.name == 'categorySpider':
21 | result = self.post_category.find(
22 | {'category_id': item['category_id']})
23 | print("入库操作")
24 | if result.count() > 0:
25 | self.post_category.update(
26 | {'category_id': item['category_id']}, dict(item)) # 更新操作
27 | else:
28 | self.post_category.insert(dict(item)) # 插入操作
29 |
30 | return item
31 | elif spider.name == 'categoryVideoSpider'or spider.name == 'douyinSpider':
32 | result = self.post_video.find({'video_id': item['video_id']})
33 | print("入库操作")
34 | if result.count() > 0:
35 | self.post_video.update(
36 | {'category_id': item['video_id']}, dict(item)) # 更新操作
37 | else:
38 | self.post_video.insert(dict(item)) # 插入操作
39 |
40 | return item
41 |
--------------------------------------------------------------------------------
/douyin/main.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | from apscheduler.schedulers.background import BackgroundScheduler
4 | import subprocess
5 | import os
6 | from datetime import datetime
7 | import time
8 | import logging
9 |
10 | logging.basicConfig(level=logging.DEBUG)
11 |
12 |
13 | def tick_list():
14 | logging.debug('启动爬虫! The time is: %s' % datetime.now())
15 | app_path = os.path.dirname(os.path.realpath(__file__))
16 | subprocess.Popen("scrapy crawl categorySpider", shell=True, cwd=app_path)
17 |
18 |
19 | def tick_challenge():
20 | logging.debug('启动 热门挑战 爬虫! The time is: %s' % datetime.now())
21 | app_path = os.path.dirname(os.path.realpath(__file__))
22 | subprocess.Popen(
23 | "scrapy crawl categoryVideoSpider",
24 | shell=True,
25 | cwd=app_path)
26 |
27 |
28 | def tick_music():
29 | logging.debug('启动 热门音乐 爬虫! The time is: %s' % datetime.now())
30 | app_path = os.path.dirname(os.path.realpath(__file__))
31 | subprocess.Popen("scrapy crawl douyinSpider", shell=True, cwd=app_path)
32 |
33 |
34 | if __name__ == '__main__':
35 | logging.debug(
36 | '======================程序启动!The time is: %s =======================' %
37 | datetime.now())
38 | scheduler = BackgroundScheduler()
39 | scheduler.add_job(tick_list, 'cron', day='*', hour=0, minute=30) # 每天凌晨更新
40 |
41 | scheduler.add_job(tick_challenge, 'cron', day='*', hour=2, minute=0)
42 | scheduler.add_job(tick_music, 'cron', day='*', hour=3, minute=0)
43 | scheduler.start() # 这里的调度任务是独立的一个线程
44 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
45 |
46 | try:
47 | # This is here to simulate application activity (which keeps the main
48 | # thread alive).
49 | while True:
50 | time.sleep(2) # 其他任务是独立的线程执行
51 | print('sleep!')
52 | except (KeyboardInterrupt, SystemExit):
53 | # Not strictly necessary if daemonic mode is enabled but should be done
54 | # if possible
55 | scheduler.shutdown()
56 | logging.debug('Exit The Job!')
57 | print('Exit The Job!')
58 |
--------------------------------------------------------------------------------
/douyin/spiders/categoryspider.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | '''
4 | 爬取列表信息
5 | '''
6 | import json
7 |
8 | from scrapy.http import Request
9 | from scrapy.spiders import CrawlSpider
10 |
11 | from douyin.items import DouyinCategoryItem
12 |
13 |
14 | class categorySpider(CrawlSpider):
15 | name = 'categorySpider'
16 | redis_key = 'categorySpider'
17 | cursor_num = 0
18 | count_size = 10
19 | url = "https://aweme.snssdk.com/aweme/v1/category/list/?version_code=181&count=10&cursor="
20 | start_urls = [url + str(cursor_num)]
21 |
22 | def parse(self, response):
23 | jsonresp = json.loads(response.body_as_unicode())
24 | if jsonresp['status_code'] == 0:
25 | if jsonresp['has_more'] == 1:
26 | aweme_list = list(jsonresp['category_list'])
27 | for jsonobj in aweme_list:
28 | item = self.init_item(jsonobj)
29 | yield item
30 | self.cursor_num += self.count_size
31 | nexturl = self.url + str(self.cursor_num)
32 | yield Request(nexturl, callback=self.parse)
33 | else:
34 | aweme_list = list(jsonresp['category_list'])
35 | for jsonobj in aweme_list:
36 | item = self.init_item(jsonobj)
37 | yield item
38 |
39 | def init_item(self, jsonobj):
40 | item = DouyinCategoryItem()
41 | if str(jsonobj['desc']) == "热门挑战":
42 | item['category_type'] = jsonobj['desc']
43 | item['category_id'] = jsonobj['challenge_info']['cid']
44 | item['category_desc'] = jsonobj['challenge_info']['desc']
45 | item['category_title'] = jsonobj['challenge_info']['cha_name']
46 | item['category_url'] = jsonobj['challenge_info']['schema']
47 | item['category_user_count'] = jsonobj['challenge_info']['user_count']
48 | else:
49 | # print("执行热门音乐赋值")
50 | item['category_type'] = jsonobj['desc']
51 | item['category_title'] = jsonobj['music_info']['title']
52 | item['category_id'] = jsonobj['music_info']['mid']
53 | item['category_url'] = 'https://api.amemv.com/aweme/v1/music/aweme/?music_id=' + \
54 | str(jsonobj['music_info']['mid'])
55 | item['category_desc'] = jsonobj['music_info']['offline_desc']
56 | item['category_user_count'] = jsonobj['music_info']['user_count']
57 | return item
58 |
--------------------------------------------------------------------------------
/douyin/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Scrapy settings for douyin project
4 | #
5 | # For simplicity, this file contains only settings considered important or
6 | # commonly used. You can find more settings consulting the documentation:
7 | #
8 | # https://doc.scrapy.org/en/latest/topics/settings.html
9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 |
12 | BOT_NAME = 'douyin'
13 |
14 | SPIDER_MODULES = ['douyin.spiders']
15 | NEWSPIDER_MODULE = 'douyin.spiders'
16 |
17 | # 连接接数据库
18 | MONGODB_HOST = '192.168.197.128'
19 | MONGODB_PORT = 27017
20 | MONGODB_DBNAME = 'douyin'
21 |
22 |
23 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
24 | # 三星GALAXY S8+ UC浏览器标识
25 |
26 | # Obey robots.txt rules
27 | ROBOTSTXT_OBEY = True
28 |
29 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
30 | #CONCURRENT_REQUESTS = 32
31 |
32 | # Configure a delay for requests for the same website (default: 0)
33 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
34 | # See also autothrottle settings and docs
35 | #DOWNLOAD_DELAY = 3
36 | # The download delay setting will honor only one of:
37 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
38 | #CONCURRENT_REQUESTS_PER_IP = 16
39 |
40 | # Disable cookies (enabled by default)
41 | #COOKIES_ENABLED = False
42 |
43 | # Disable Telnet Console (enabled by default)
44 | #TELNETCONSOLE_ENABLED = False
45 |
46 | # Override the default request headers:
47 | #DEFAULT_REQUEST_HEADERS = {
48 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
49 | # 'Accept-Language': 'en',
50 | #}
51 |
52 | # Enable or disable spider middlewares
53 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
54 | #SPIDER_MIDDLEWARES = {
55 | # 'douyin.middlewares.DouyinSpiderMiddleware': 543,
56 | #}
57 |
58 | # Enable or disable downloader middlewares
59 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
60 | #DOWNLOADER_MIDDLEWARES = {
61 | # 'douyin.middlewares.DouyinDownloaderMiddleware': 543,
62 | #}
63 |
64 | # Enable or disable extensions
65 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
66 | #EXTENSIONS = {
67 | # 'scrapy.extensions.telnet.TelnetConsole': None,
68 | #}
69 |
70 | # Configure item pipelines
71 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
72 | ITEM_PIPELINES = {
73 |
74 | #'douyin.pipelines.DouyinCategoryPipeline':402,
75 | 'douyin.pipelines.DouyinPipeline':402
76 | }
77 | DOWNLOADER_MIDDLEWARES = {
78 | 'douyin.middlewares.UserAgentMiddleware':401
79 | }
80 |
81 | # Enable and configure the AutoThrottle extension (disabled by default)
82 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
83 | #AUTOTHROTTLE_ENABLED = True
84 | # The initial download delay
85 | #AUTOTHROTTLE_START_DELAY = 5
86 | # The maximum download delay to be set in case of high latencies
87 | #AUTOTHROTTLE_MAX_DELAY = 60
88 | # The average number of requests Scrapy should be sending in parallel to
89 | # each remote server
90 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
91 | # Enable showing throttling stats for every response received:
92 | #AUTOTHROTTLE_DEBUG = False
93 |
94 | # Enable and configure HTTP caching (disabled by default)
95 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
96 | #HTTPCACHE_ENABLED = True
97 | #HTTPCACHE_EXPIRATION_SECS = 0
98 | #HTTPCACHE_DIR = 'httpcache'
99 | #HTTPCACHE_IGNORE_HTTP_CODES = []
100 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
101 |
--------------------------------------------------------------------------------
/douyin/middlewares.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Define here the models for your spider middleware
4 | #
5 | # See documentation in:
6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
7 | import random
8 |
9 | from scrapy import signals
10 |
11 | from douyin.user_agents import agents
12 |
13 |
14 | class DouyinSpiderMiddleware(object):
15 | # Not all methods need to be defined. If a method is not defined,
16 | # scrapy acts as if the spider middleware does not modify the
17 | # passed objects.
18 |
19 | @classmethod
20 | def from_crawler(cls, crawler):
21 | # This method is used by Scrapy to create your spiders.
22 | s = cls()
23 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
24 | return s
25 |
26 | def process_spider_input(self, response, spider):
27 | # Called for each response that goes through the spider
28 | # middleware and into the spider.
29 |
30 | # Should return None or raise an exception.
31 | return None
32 |
33 | def process_spider_output(self, response, result, spider):
34 | # Called with the results returned from the Spider, after
35 | # it has processed the response.
36 |
37 | # Must return an iterable of Request, dict or Item objects.
38 | for i in result:
39 | yield i
40 |
41 | def process_spider_exception(self, response, exception, spider):
42 | # Called when a spider or process_spider_input() method
43 | # (from other spider middleware) raises an exception.
44 |
45 | # Should return either None or an iterable of Response, dict
46 | # or Item objects.
47 | pass
48 |
49 | def process_start_requests(self, start_requests, spider):
50 | # Called with the start requests of the spider, and works
51 | # similarly to the process_spider_output() method, except
52 | # that it doesn’t have a response associated.
53 |
54 | # Must return only requests (not items).
55 | for r in start_requests:
56 | yield r
57 |
58 | def spider_opened(self, spider):
59 | spider.logger.info('Spider opened: %s' % spider.name)
60 |
61 |
62 | class DouyinDownloaderMiddleware(object):
63 | # Not all methods need to be defined. If a method is not defined,
64 | # scrapy acts as if the downloader middleware does not modify the
65 | # passed objects.
66 |
67 | @classmethod
68 | def from_crawler(cls, crawler):
69 | # This method is used by Scrapy to create your spiders.
70 | s = cls()
71 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
72 | return s
73 |
74 | def process_request(self, request, spider):
75 | # Called for each request that goes through the downloader
76 | # middleware.
77 |
78 | # Must either:
79 | # - return None: continue processing this request
80 | # - or return a Response object
81 | # - or return a Request object
82 | # - or raise IgnoreRequest: process_exception() methods of
83 | # installed downloader middleware will be called
84 | return None
85 |
86 | def process_response(self, request, response, spider):
87 | # Called with the response returned from the downloader.
88 |
89 | # Must either;
90 | # - return a Response object
91 | # - return a Request object
92 | # - or raise IgnoreRequest
93 | return response
94 |
95 | def process_exception(self, request, exception, spider):
96 | # Called when a download handler or a process_request()
97 | # (from other downloader middleware) raises an exception.
98 |
99 | # Must either:
100 | # - return None: continue processing this exception
101 | # - return a Response object: stops process_exception() chain
102 | # - return a Request object: stops process_exception() chain
103 | pass
104 |
105 | def spider_opened(self, spider):
106 | spider.logger.info('Spider opened: %s' % spider.name)
107 |
108 |
109 | class UserAgentMiddleware(object):
110 | """ 换User-Agent """
111 |
112 | def process_request(self, request, spider):
113 | agent = random.choice(agents)
114 | print('User-Agent:' + agent)
115 | request.headers["User-Agent"] = agent
--------------------------------------------------------------------------------
/douyin/spiders/dyspider.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 |
4 | import json
5 |
6 | from douyin.items import DouyinItem
7 | from scrapy.http import Request
8 | from scrapy.spiders import CrawlSpider
9 |
10 | from douyin.utils.tools import DBTools
11 | from douyin.utils.tools import MyTools
12 |
13 |
14 | class douyinSpider(CrawlSpider):
15 | name = 'douyinSpider'
16 | redis_key = 'douyinSpider'
17 | cursor_num = 0
18 | count_size = 20
19 | i = 0
20 | urls = 'https://api.amemv.com/aweme/v1/music/aweme/?count=20&cursor=%d&music_id=%s&device_id=%s'
21 | ids = []
22 | new_video_id = ''
23 | flag = False
24 |
25 | def __init__(self):
26 | # 查询满足的列表ids集合,组织urls
27 | super(douyinSpider, self).__init__()
28 | db_con = DBTools('category')
29 | self.post = db_con.get_db_con()
30 |
31 | db_video_con = DBTools('video')
32 | self.video_post = db_video_con.get_db_con()
33 |
34 | list = self.post.find({"category_type": "热门音乐"}).sort([{"category_user_count",-1}])
35 | for obj in list:
36 | self.ids.append(str(obj['category_id']))
37 |
38 | self.start_urls = [self.urls % (self.cursor_num, str(
39 | self.ids[self.i]), MyTools.init_device_id())]
40 | #self.new_video_id = self.getNewVideoId(str(self.ids[self.i]))
41 |
42 | def parse(self, response):
43 | print('抓取数据开始...')
44 | jsonresp = json.loads(response.body_as_unicode())
45 | if jsonresp['status_code'] == 0:
46 | if jsonresp['has_more'] == 1:
47 | aweme_list = list(jsonresp['aweme_list'])
48 | for jsonobj in aweme_list:
49 | '''if self.notUpdate(self.ids[self.i],jsonobj):
50 | break
51 | else:'''
52 | item = self.init_item(jsonobj, self.ids[self.i])
53 | yield item
54 | self.cursor_num += self.count_size
55 | nexturl = self.urls % (self.cursor_num,
56 | self.ids[self.i],
57 | MyTools.init_device_id())
58 | yield Request(nexturl, callback=self.parse)
59 | else:
60 | aweme_list = list(jsonresp['aweme_list'])
61 | for jsonobj in aweme_list:
62 | '''if self.notUpdate(self.ids[self.i], jsonobj):
63 | break
64 | else:'''
65 | item = self.init_item(jsonobj, self.ids[self.i])
66 | yield item
67 | self.i += 1
68 | self.cursor_num = 0
69 | if self.i < len(self.ids):
70 | # self.getNewVideoId(str(self.ids[self.i]))
71 | nexturl = self.urls % (
72 | self.cursor_num, self.ids[self.i], MyTools.init_device_id())
73 | yield Request(nexturl, callback=self.parse)
74 |
75 | else:
76 | self.i += 1
77 | self.cursor_num = 0
78 | if self.i < len(self.ids):
79 | # self.getNewVideoId(str(self.ids[self.i]))
80 | nexturl = self.urls % (self.cursor_num,
81 | self.ids[self.i],
82 | MyTools.init_device_id())
83 | yield Request(nexturl, callback=self.parse)
84 | else:
85 | pass
86 |
87 | def init_item(self, jsonobj, category_uid):
88 | item = DouyinItem()
89 | item['category_id'] = str(category_uid)
90 | item['user_uid'] = str(jsonobj['author']['uid'])
91 | item['user_sid'] = str(jsonobj['author']['short_id'])
92 | item['user_birthday'] = str(jsonobj['author']['birthday'])
93 | item['user_gender'] = jsonobj['author']['gender']
94 |
95 | item['video_id'] = str(jsonobj['aweme_id'])
96 | item['video_desc'] = str(jsonobj['desc'])
97 | item['video_play'] = jsonobj['statistics']['play_count']
98 | item['video_comment'] = jsonobj['statistics']['comment_count']
99 | item['video_share'] = jsonobj['statistics']['share_count']
100 | item['video_digg'] = jsonobj['statistics']['digg_count']
101 | item['video_durl'] = str(
102 | jsonobj['video']['download_addr']['url_list'][0])
103 | item['video_gurl'] = str(
104 | jsonobj['video']['dynamic_cover']['url_list'][0])
105 | item['video_time'] = MyTools.transform_time(jsonobj['create_time'])
106 | return item
107 |
108 | def notUpdate(self, categoryid, jsonobj):
109 | if str(categoryid) == str(jsonobj['aweme_id']):
110 | return True
111 | else:
112 | return False
113 |
114 | def getNewVideoId(self, categoryid):
115 | list = self.video_post.find(
116 | {"category_id": str(categoryid)}).sort([{"video_time", -1}])
117 | for obj in list:
118 | self.new_video_id = obj['video_id']
119 | break
120 |
--------------------------------------------------------------------------------
/douyin/spiders/categoryvideospider.py:
--------------------------------------------------------------------------------
1 | #! /user/bin/env python
2 | # -*- coding:utf-8 -*-
3 |
4 | import json
5 |
6 | from douyin.items import DouyinItem
7 | from scrapy.http import Request
8 | from scrapy.spiders import CrawlSpider
9 |
10 | from douyin.utils.tools import DBTools
11 | from douyin.utils.tools import MyTools
12 |
13 |
14 | class categoryVideoSpider(CrawlSpider):
15 |
16 | name = 'categoryVideoSpider'
17 | redis_key = 'categoryVideoSpider'
18 | cursor_num = 0
19 | count_size = 20
20 | i = 0
21 | urls = 'https://aweme.snssdk.com/aweme/v1/challenge/aweme/?query_type=0&count=20&aid=1128&cursor=%d&ch_id=%s&device_id=%s'
22 | ids = []
23 | new_video_id = ''
24 | flag = False
25 |
26 | def __init__(self):
27 | # 查询满足的列表ids集合,组织urls
28 | super(categoryVideoSpider, self).__init__()
29 | db_con = DBTools('category')
30 | self.post = db_con.get_db_con()
31 |
32 | db_video_con = DBTools('video')
33 | self.video_post = db_video_con.get_db_con()
34 |
35 | list = self.post.find({"category_type": "热门挑战"}).sort(
36 | [{"category_user_count", -1}])
37 | for obj in list:
38 | self.ids.append(str(obj['category_id']))
39 | self.start_urls = [self.urls % (self.cursor_num, str(
40 | self.ids[self.i]), MyTools.init_device_id())]
41 | # self.new_video_id = self.getNewVideoId(str(self.ids[self.i]))
42 |
43 | def parse(self, response):
44 | print('抓取数据开始...')
45 | jsonresp = json.loads(response.body_as_unicode())
46 | if jsonresp['status_code'] == 0:
47 | if jsonresp['has_more'] == 1:
48 | aweme_list = list(jsonresp['aweme_list'])
49 | for jsonobj in aweme_list:
50 | '''if self.notUpdate(self.ids[self.i],jsonobj):
51 | break
52 | else:'''
53 | item = self.init_item(jsonobj, self.ids[self.i])
54 | yield item
55 | self.cursor_num += self.count_size
56 | nexturl = self.urls % (self.cursor_num,
57 | self.ids[self.i],
58 | MyTools.init_device_id())
59 | yield Request(nexturl, callback=self.parse)
60 | else:
61 | aweme_list = list(jsonresp['aweme_list'])
62 | for jsonobj in aweme_list:
63 | '''if self.notUpdate(self.ids[self.i], jsonobj):
64 | break
65 | else:'''
66 | item = self.init_item(jsonobj, self.ids[self.i])
67 | yield item
68 | self.i += 1
69 | self.cursor_num = 0
70 | if self.i < len(self.ids):
71 | #self.getNewVideoId(str(self.ids[self.i]))
72 | nexturl = self.urls % (
73 | self.cursor_num, self.ids[self.i], MyTools.init_device_id())
74 | yield Request(nexturl, callback=self.parse)
75 |
76 | else:
77 | self.i += 1
78 | self.cursor_num = 0
79 | if self.i < len(self.ids):
80 | #self.getNewVideoId(str(self.ids[self.i]))
81 | nexturl = self.urls % (self.cursor_num,
82 | self.ids[self.i],
83 | MyTools.init_device_id())
84 | yield Request(nexturl, callback=self.parse)
85 |
86 | def init_item(self, jsonobj, category_uid):
87 | item = DouyinItem()
88 | item['category_id'] = str(category_uid)
89 | item['user_uid'] = str(jsonobj['author']['uid'])
90 | item['user_sid'] = str(jsonobj['author']['short_id'])
91 | item['user_birthday'] = str(jsonobj['author']['birthday'])
92 | item['user_gender'] = jsonobj['author']['gender']
93 |
94 | item['video_id'] = str(jsonobj['aweme_id'])
95 | item['video_desc'] = str(jsonobj['desc'])
96 | item['video_play'] = jsonobj['statistics']['play_count']
97 | item['video_comment'] = jsonobj['statistics']['comment_count']
98 | item['video_share'] = jsonobj['statistics']['share_count']
99 | item['video_digg'] = jsonobj['statistics']['digg_count']
100 | item['video_durl'] = str(
101 | jsonobj['video']['download_addr']['url_list'][0])
102 | item['video_gurl'] = str(
103 | jsonobj['video']['dynamic_cover']['url_list'][0])
104 | item['video_time'] = MyTools.transform_time(jsonobj['create_time'])
105 | return item
106 |
107 | def notUpdate(self, categoryid, jsonobj):
108 | if str(categoryid) == str(jsonobj['aweme_id']):
109 | return True
110 | else:
111 | return False
112 |
113 | def getNewVideoId(self, categoryid):
114 | list = self.video_post.find(
115 | {"category_id": str(categoryid)}).sort([{"video_time", -1}])
116 | for obj in list:
117 | self.new_video_id = obj['video_id']
118 | break
119 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
134 |
135 |
136 |
137 | MI 4LTE
138 | categorySpider
139 | notUpdate
140 | getNewVideoId
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 | true
178 | DEFINITION_ORDER
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 | project
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 | 1523273787516
474 |
475 |
476 | 1523273787516
477 |
478 |
479 | 1526536130110
480 |
481 |
482 |
483 | 1526536130110
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
878 |
879 |
880 |
881 |
882 |
883 |
884 |
885 |
886 |
--------------------------------------------------------------------------------