├── douyin ├── __init__.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── tools.cpython-36.pyc │ │ └── __init__.cpython-36.pyc │ └── tools.py ├── spiders │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── dyspider.cpython-36.pyc │ │ ├── categoryspider.cpython-36.pyc │ │ └── categoryvideospider.cpython-36.pyc │ ├── __init__.py │ ├── categoryspider.py │ ├── dyspider.py │ └── categoryvideospider.py ├── maintest.py ├── items.py ├── user_agents.py ├── pipelines.py ├── main.py ├── settings.py └── middlewares.py ├── .idea ├── vcs.xml ├── misc.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── douyin.iml └── workspace.xml ├── scrapy.cfg ├── README.md └── LICENSE /douyin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /douyin/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- -------------------------------------------------------------------------------- /douyin/utils/__pycache__/tools.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/utils/__pycache__/tools.cpython-36.pyc -------------------------------------------------------------------------------- /douyin/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /douyin/spiders/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /douyin/spiders/__pycache__/dyspider.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/dyspider.cpython-36.pyc -------------------------------------------------------------------------------- /douyin/spiders/__pycache__/categoryspider.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/categoryspider.cpython-36.pyc -------------------------------------------------------------------------------- /douyin/spiders/__pycache__/categoryvideospider.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gisShield/douyin/HEAD/douyin/spiders/__pycache__/categoryvideospider.cpython-36.pyc -------------------------------------------------------------------------------- /douyin/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = douyin.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = douyin 12 | -------------------------------------------------------------------------------- /.idea/douyin.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## [已废弃]抖音爬虫抓取 2 | 3 | ------ 4 | 5 | ### 项目背景 6 | 主要用于个人初学python和scrapy框架的练手项目。**该爬虫仅供学习使用**，不用做任何其他途径。 7 | 8 | ### 开发依赖 9 | 10 | - python3.6.1 11 | - scrapy1.5.0 12 | - mongoDB 13 | - APScheduler 14 | 15 | ### 项目介绍 16 | 17 | 主要通过抓取链接，获得搜索中的"热门挑战"和"热门音乐"的列表，再去通过参与人数排序拿出部分比较热门的链接抓取视频数据。分别存入mongoDB数据库中。每天凌晨更新数据。目前数据不全，没做全量的更新功能。 18 | 19 | ### 后续目标（随缘吧...） 20 | 21 | 1. 单独写个客户端和Web端用于展现数据和筛选功能 22 | 23 | ### 更新记录： 24 | - 20180914 25 | 爬虫的链接好像失效了，近期会去更新。 26 | -------------------------------------------------------------------------------- /douyin/maintest.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from apscheduler.schedulers.background import BackgroundScheduler 4 | import subprocess 5 | import os 6 | from datetime import datetime 7 | import time 8 | import logging 9 | 10 | logging.basicConfig(level=logging.DEBUG) 11 | 12 | 13 | def tick_list(): 14 | logging.debug('启动爬虫! The time is: %s' % datetime.now()) 15 | app_path = os.path.dirname(os.path.realpath(__file__)) 16 | subprocess.Popen("scrapy crawl categorySpider", shell=True, cwd=app_path) 17 | 18 | 19 | def tick_challenge(): 20 | logging.debug('启动热门挑战爬虫! The time is: %s' % datetime.now()) 21 | subprocess.Popen("scrapy crawl categoryVideoSpider") 22 | 23 | 24 | def tick_music(): 25 | logging.debug('启动热门音乐爬虫! The time is: %s' % datetime.now()) 26 | subprocess.Popen("scrapy crawl douyinSpider") 27 | 28 | 29 | if __name__ == '__main__': 30 | logging.debug( 31 | '======================程序启动！The time is: %s =======================' % 32 | datetime.now()) 33 | tick_list() 34 | -------------------------------------------------------------------------------- /douyin/utils/tools.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import pymongo 4 | import json 5 | import random 6 | import time 7 | from scrapy.conf import settings 8 | 9 | 10 | class DBTools: 11 | ''' 12 | 数据库链接类 13 | ''' 14 | host = settings['MONGODB_HOST'] # settings 赋值piplines 15 | port = settings['MONGODB_PORT'] 16 | dbName = settings['MONGODB_DBNAME'] # 数据库名字 17 | client = pymongo.MongoClient(host=host, port=port) # 链接数据库 18 | tdb = client[dbName] 19 | 20 | def __init__(self, name): 21 | print('name:',name) 22 | self.post = self.tdb[name] 23 | 24 | def get_db_con(self): 25 | return self.post 26 | 27 | 28 | class MyTools: 29 | ''' 30 | 基础工具类 31 | ''' 32 | def init_device_id(): 33 | value = random.randint(1000000000, 9999999999) 34 | return str(value) 35 | 36 | def transform_time(u_time): 37 | timeArray = time.localtime(u_time) 38 | otherStyleTime = time.strftime("%Y/%m/%d %H:%M:%S", timeArray) 39 | return otherStyleTime 40 | -------------------------------------------------------------------------------- /douyin/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/items.html 7 | import scrapy 8 | 9 | 10 | class DouyinItem(scrapy.Item): 11 | # define the fields for your item here like: 12 | # name = scrapy.Field() 13 | 14 | # 挑战唯一标识 15 | category_id = scrapy.Field() 16 | # 用户信息 17 | # 用户id 18 | user_uid = scrapy.Field() 19 | # 用户短ID 20 | user_sid = scrapy.Field() 21 | # 注册出生日期 22 | user_birthday = scrapy.Field() 23 | # 性别 24 | user_gender = scrapy.Field() 25 | 26 | # 视频信息 27 | # 视频ID 28 | video_id = scrapy.Field() 29 | # 视频描述 30 | video_desc = scrapy.Field() 31 | # 视频播放量 32 | video_play = scrapy.Field() 33 | # 评论量 34 | video_comment = scrapy.Field() 35 | # 分享量 36 | video_share = scrapy.Field() 37 | # 点赞量 38 | video_digg = scrapy.Field() 39 | # 视频下载地址 40 | video_durl = scrapy.Field() 41 | # 视频封面地址 42 | video_gurl = scrapy.Field() 43 | 44 | video_time = scrapy.Field() 45 | 46 | 47 | class DouyinCategoryItem(scrapy.Item): 48 | # 类型：desc中的内容：热门音乐/热门挑战 49 | category_type = scrapy.Field() 50 | category_title = scrapy.Field() 51 | category_id = scrapy.Field() 52 | category_url = scrapy.Field() 53 | category_desc = scrapy.Field() 54 | category_user_count = scrapy.Field() 55 | -------------------------------------------------------------------------------- /douyin/user_agents.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | agents = [ 5 | "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1", 6 | "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1", 7 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405 Safari/600.1.4", 8 | "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; SM-G9550 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.7.0.953 Mobile Safari/537.36", 9 | "Mozilla/5.0(Linux;U;Android2.3.7;en-us;NexusOneBuild/FRF91)AppleWebKit/533.1(KHTML,likeGecko)Version/4.0MobileSafari/533.1", 10 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI VIE-AL10 Build/HUAWEIVIE-AL10) AppleWebKit/537.36(KHTML,like Gecko) Version/4.0 Mobile Safari/537.36", 11 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; MI 4LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36 XiaoMi/MiuiBrowser/2.1.1", 12 | "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19", 13 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; SCH-N719 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30", 14 | ] -------------------------------------------------------------------------------- /douyin/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | from douyin.utils.tools import DBTools 8 | # 分类信息保存 9 | 10 | 11 | class DouyinPipeline(object): 12 | def __init__(self): 13 | 14 | db_con_video = DBTools('video') 15 | db_con_category = DBTools('category') 16 | self.post_video = db_con_video.get_db_con() 17 | self.post_category = db_con_category.get_db_con() 18 | 19 | def process_item(self, item, spider): 20 | if spider.name == 'categorySpider': 21 | result = self.post_category.find( 22 | {'category_id': item['category_id']}) 23 | print("入库操作") 24 | if result.count() > 0: 25 | self.post_category.update( 26 | {'category_id': item['category_id']}, dict(item)) # 更新操作 27 | else: 28 | self.post_category.insert(dict(item)) # 插入操作 29 | 30 | return item 31 | elif spider.name == 'categoryVideoSpider'or spider.name == 'douyinSpider': 32 | result = self.post_video.find({'video_id': item['video_id']}) 33 | print("入库操作") 34 | if result.count() > 0: 35 | self.post_video.update( 36 | {'category_id': item['video_id']}, dict(item)) # 更新操作 37 | else: 38 | self.post_video.insert(dict(item)) # 插入操作 39 | 40 | return item 41 | -------------------------------------------------------------------------------- /douyin/main.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from apscheduler.schedulers.background import BackgroundScheduler 4 | import subprocess 5 | import os 6 | from datetime import datetime 7 | import time 8 | import logging 9 | 10 | logging.basicConfig(level=logging.DEBUG) 11 | 12 | 13 | def tick_list(): 14 | logging.debug('启动爬虫! The time is: %s' % datetime.now()) 15 | app_path = os.path.dirname(os.path.realpath(__file__)) 16 | subprocess.Popen("scrapy crawl categorySpider", shell=True, cwd=app_path) 17 | 18 | 19 | def tick_challenge(): 20 | logging.debug('启动热门挑战爬虫! The time is: %s' % datetime.now()) 21 | app_path = os.path.dirname(os.path.realpath(__file__)) 22 | subprocess.Popen( 23 | "scrapy crawl categoryVideoSpider", 24 | shell=True, 25 | cwd=app_path) 26 | 27 | 28 | def tick_music(): 29 | logging.debug('启动热门音乐爬虫! The time is: %s' % datetime.now()) 30 | app_path = os.path.dirname(os.path.realpath(__file__)) 31 | subprocess.Popen("scrapy crawl douyinSpider", shell=True, cwd=app_path) 32 | 33 | 34 | if __name__ == '__main__': 35 | logging.debug( 36 | '======================程序启动！The time is: %s =======================' % 37 | datetime.now()) 38 | scheduler = BackgroundScheduler() 39 | scheduler.add_job(tick_list, 'cron', day='*', hour=0, minute=30) # 每天凌晨更新 40 | 41 | scheduler.add_job(tick_challenge, 'cron', day='*', hour=2, minute=0) 42 | scheduler.add_job(tick_music, 'cron', day='*', hour=3, minute=0) 43 | scheduler.start() # 这里的调度任务是独立的一个线程 44 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 45 | 46 | try: 47 | # This is here to simulate application activity (which keeps the main 48 | # thread alive). 49 | while True: 50 | time.sleep(2) # 其他任务是独立的线程执行 51 | print('sleep!') 52 | except (KeyboardInterrupt, SystemExit): 53 | # Not strictly necessary if daemonic mode is enabled but should be done 54 | # if possible 55 | scheduler.shutdown() 56 | logging.debug('Exit The Job!') 57 | print('Exit The Job!') 58 | -------------------------------------------------------------------------------- /douyin/spiders/categoryspider.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | ''' 4 | 爬取列表信息 5 | ''' 6 | import json 7 | 8 | from scrapy.http import Request 9 | from scrapy.spiders import CrawlSpider 10 | 11 | from douyin.items import DouyinCategoryItem 12 | 13 | 14 | class categorySpider(CrawlSpider): 15 | name = 'categorySpider' 16 | redis_key = 'categorySpider' 17 | cursor_num = 0 18 | count_size = 10 19 | url = "https://aweme.snssdk.com/aweme/v1/category/list/?version_code=181&count=10&cursor=" 20 | start_urls = [url + str(cursor_num)] 21 | 22 | def parse(self, response): 23 | jsonresp = json.loads(response.body_as_unicode()) 24 | if jsonresp['status_code'] == 0: 25 | if jsonresp['has_more'] == 1: 26 | aweme_list = list(jsonresp['category_list']) 27 | for jsonobj in aweme_list: 28 | item = self.init_item(jsonobj) 29 | yield item 30 | self.cursor_num += self.count_size 31 | nexturl = self.url + str(self.cursor_num) 32 | yield Request(nexturl, callback=self.parse) 33 | else: 34 | aweme_list = list(jsonresp['category_list']) 35 | for jsonobj in aweme_list: 36 | item = self.init_item(jsonobj) 37 | yield item 38 | 39 | def init_item(self, jsonobj): 40 | item = DouyinCategoryItem() 41 | if str(jsonobj['desc']) == "热门挑战": 42 | item['category_type'] = jsonobj['desc'] 43 | item['category_id'] = jsonobj['challenge_info']['cid'] 44 | item['category_desc'] = jsonobj['challenge_info']['desc'] 45 | item['category_title'] = jsonobj['challenge_info']['cha_name'] 46 | item['category_url'] = jsonobj['challenge_info']['schema'] 47 | item['category_user_count'] = jsonobj['challenge_info']['user_count'] 48 | else: 49 | # print("执行热门音乐赋值") 50 | item['category_type'] = jsonobj['desc'] 51 | item['category_title'] = jsonobj['music_info']['title'] 52 | item['category_id'] = jsonobj['music_info']['mid'] 53 | item['category_url'] = 'https://api.amemv.com/aweme/v1/music/aweme/?music_id=' + \ 54 | str(jsonobj['music_info']['mid']) 55 | item['category_desc'] = jsonobj['music_info']['offline_desc'] 56 | item['category_user_count'] = jsonobj['music_info']['user_count'] 57 | return item 58 | -------------------------------------------------------------------------------- /douyin/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for douyin project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # https://doc.scrapy.org/en/latest/topics/settings.html 9 | # https://doc.scrapy.org/en/latest/topics/downloader-middleware.html 10 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'douyin' 13 | 14 | SPIDER_MODULES = ['douyin.spiders'] 15 | NEWSPIDER_MODULE = 'douyin.spiders' 16 | 17 | # 连接接数据库 18 | MONGODB_HOST = '192.168.197.128' 19 | MONGODB_PORT = 27017 20 | MONGODB_DBNAME = 'douyin' 21 | 22 | 23 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 24 | # 三星GALAXY S8+ UC浏览器标识 25 | 26 | # Obey robots.txt rules 27 | ROBOTSTXT_OBEY = True 28 | 29 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 30 | #CONCURRENT_REQUESTS = 32 31 | 32 | # Configure a delay for requests for the same website (default: 0) 33 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay 34 | # See also autothrottle settings and docs 35 | #DOWNLOAD_DELAY = 3 36 | # The download delay setting will honor only one of: 37 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 38 | #CONCURRENT_REQUESTS_PER_IP = 16 39 | 40 | # Disable cookies (enabled by default) 41 | #COOKIES_ENABLED = False 42 | 43 | # Disable Telnet Console (enabled by default) 44 | #TELNETCONSOLE_ENABLED = False 45 | 46 | # Override the default request headers: 47 | #DEFAULT_REQUEST_HEADERS = { 48 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 49 | # 'Accept-Language': 'en', 50 | #} 51 | 52 | # Enable or disable spider middlewares 53 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html 54 | #SPIDER_MIDDLEWARES = { 55 | # 'douyin.middlewares.DouyinSpiderMiddleware': 543, 56 | #} 57 | 58 | # Enable or disable downloader middlewares 59 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html 60 | #DOWNLOADER_MIDDLEWARES = { 61 | # 'douyin.middlewares.DouyinDownloaderMiddleware': 543, 62 | #} 63 | 64 | # Enable or disable extensions 65 | # See https://doc.scrapy.org/en/latest/topics/extensions.html 66 | #EXTENSIONS = { 67 | # 'scrapy.extensions.telnet.TelnetConsole': None, 68 | #} 69 | 70 | # Configure item pipelines 71 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html 72 | ITEM_PIPELINES = { 73 | 74 | #'douyin.pipelines.DouyinCategoryPipeline':402, 75 | 'douyin.pipelines.DouyinPipeline':402 76 | } 77 | DOWNLOADER_MIDDLEWARES = { 78 | 'douyin.middlewares.UserAgentMiddleware':401 79 | } 80 | 81 | # Enable and configure the AutoThrottle extension (disabled by default) 82 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html 83 | #AUTOTHROTTLE_ENABLED = True 84 | # The initial download delay 85 | #AUTOTHROTTLE_START_DELAY = 5 86 | # The maximum download delay to be set in case of high latencies 87 | #AUTOTHROTTLE_MAX_DELAY = 60 88 | # The average number of requests Scrapy should be sending in parallel to 89 | # each remote server 90 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 91 | # Enable showing throttling stats for every response received: 92 | #AUTOTHROTTLE_DEBUG = False 93 | 94 | # Enable and configure HTTP caching (disabled by default) 95 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 96 | #HTTPCACHE_ENABLED = True 97 | #HTTPCACHE_EXPIRATION_SECS = 0 98 | #HTTPCACHE_DIR = 'httpcache' 99 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 100 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 101 | -------------------------------------------------------------------------------- /douyin/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | import random 8 | 9 | from scrapy import signals 10 | 11 | from douyin.user_agents import agents 12 | 13 | 14 | class DouyinSpiderMiddleware(object): 15 | # Not all methods need to be defined. If a method is not defined, 16 | # scrapy acts as if the spider middleware does not modify the 17 | # passed objects. 18 | 19 | @classmethod 20 | def from_crawler(cls, crawler): 21 | # This method is used by Scrapy to create your spiders. 22 | s = cls() 23 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 24 | return s 25 | 26 | def process_spider_input(self, response, spider): 27 | # Called for each response that goes through the spider 28 | # middleware and into the spider. 29 | 30 | # Should return None or raise an exception. 31 | return None 32 | 33 | def process_spider_output(self, response, result, spider): 34 | # Called with the results returned from the Spider, after 35 | # it has processed the response. 36 | 37 | # Must return an iterable of Request, dict or Item objects. 38 | for i in result: 39 | yield i 40 | 41 | def process_spider_exception(self, response, exception, spider): 42 | # Called when a spider or process_spider_input() method 43 | # (from other spider middleware) raises an exception. 44 | 45 | # Should return either None or an iterable of Response, dict 46 | # or Item objects. 47 | pass 48 | 49 | def process_start_requests(self, start_requests, spider): 50 | # Called with the start requests of the spider, and works 51 | # similarly to the process_spider_output() method, except 52 | # that it doesn’t have a response associated. 53 | 54 | # Must return only requests (not items). 55 | for r in start_requests: 56 | yield r 57 | 58 | def spider_opened(self, spider): 59 | spider.logger.info('Spider opened: %s' % spider.name) 60 | 61 | 62 | class DouyinDownloaderMiddleware(object): 63 | # Not all methods need to be defined. If a method is not defined, 64 | # scrapy acts as if the downloader middleware does not modify the 65 | # passed objects. 66 | 67 | @classmethod 68 | def from_crawler(cls, crawler): 69 | # This method is used by Scrapy to create your spiders. 70 | s = cls() 71 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 72 | return s 73 | 74 | def process_request(self, request, spider): 75 | # Called for each request that goes through the downloader 76 | # middleware. 77 | 78 | # Must either: 79 | # - return None: continue processing this request 80 | # - or return a Response object 81 | # - or return a Request object 82 | # - or raise IgnoreRequest: process_exception() methods of 83 | # installed downloader middleware will be called 84 | return None 85 | 86 | def process_response(self, request, response, spider): 87 | # Called with the response returned from the downloader. 88 | 89 | # Must either; 90 | # - return a Response object 91 | # - return a Request object 92 | # - or raise IgnoreRequest 93 | return response 94 | 95 | def process_exception(self, request, exception, spider): 96 | # Called when a download handler or a process_request() 97 | # (from other downloader middleware) raises an exception. 98 | 99 | # Must either: 100 | # - return None: continue processing this exception 101 | # - return a Response object: stops process_exception() chain 102 | # - return a Request object: stops process_exception() chain 103 | pass 104 | 105 | def spider_opened(self, spider): 106 | spider.logger.info('Spider opened: %s' % spider.name) 107 | 108 | 109 | class UserAgentMiddleware(object): 110 | """ 换User-Agent """ 111 | 112 | def process_request(self, request, spider): 113 | agent = random.choice(agents) 114 | print('User-Agent:' + agent) 115 | request.headers["User-Agent"] = agent -------------------------------------------------------------------------------- /douyin/spiders/dyspider.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | import json 5 | 6 | from douyin.items import DouyinItem 7 | from scrapy.http import Request 8 | from scrapy.spiders import CrawlSpider 9 | 10 | from douyin.utils.tools import DBTools 11 | from douyin.utils.tools import MyTools 12 | 13 | 14 | class douyinSpider(CrawlSpider): 15 | name = 'douyinSpider' 16 | redis_key = 'douyinSpider' 17 | cursor_num = 0 18 | count_size = 20 19 | i = 0 20 | urls = 'https://api.amemv.com/aweme/v1/music/aweme/?count=20&cursor=%d&music_id=%s&device_id=%s' 21 | ids = [] 22 | new_video_id = '' 23 | flag = False 24 | 25 | def __init__(self): 26 | # 查询满足的列表ids集合，组织urls 27 | super(douyinSpider, self).__init__() 28 | db_con = DBTools('category') 29 | self.post = db_con.get_db_con() 30 | 31 | db_video_con = DBTools('video') 32 | self.video_post = db_video_con.get_db_con() 33 | 34 | list = self.post.find({"category_type": "热门音乐"}).sort([{"category_user_count",-1}]) 35 | for obj in list: 36 | self.ids.append(str(obj['category_id'])) 37 | 38 | self.start_urls = [self.urls % (self.cursor_num, str( 39 | self.ids[self.i]), MyTools.init_device_id())] 40 | #self.new_video_id = self.getNewVideoId(str(self.ids[self.i])) 41 | 42 | def parse(self, response): 43 | print('抓取数据开始...') 44 | jsonresp = json.loads(response.body_as_unicode()) 45 | if jsonresp['status_code'] == 0: 46 | if jsonresp['has_more'] == 1: 47 | aweme_list = list(jsonresp['aweme_list']) 48 | for jsonobj in aweme_list: 49 | '''if self.notUpdate(self.ids[self.i],jsonobj): 50 | break 51 | else:''' 52 | item = self.init_item(jsonobj, self.ids[self.i]) 53 | yield item 54 | self.cursor_num += self.count_size 55 | nexturl = self.urls % (self.cursor_num, 56 | self.ids[self.i], 57 | MyTools.init_device_id()) 58 | yield Request(nexturl, callback=self.parse) 59 | else: 60 | aweme_list = list(jsonresp['aweme_list']) 61 | for jsonobj in aweme_list: 62 | '''if self.notUpdate(self.ids[self.i], jsonobj): 63 | break 64 | else:''' 65 | item = self.init_item(jsonobj, self.ids[self.i]) 66 | yield item 67 | self.i += 1 68 | self.cursor_num = 0 69 | if self.i < len(self.ids): 70 | # self.getNewVideoId(str(self.ids[self.i])) 71 | nexturl = self.urls % ( 72 | self.cursor_num, self.ids[self.i], MyTools.init_device_id()) 73 | yield Request(nexturl, callback=self.parse) 74 | 75 | else: 76 | self.i += 1 77 | self.cursor_num = 0 78 | if self.i < len(self.ids): 79 | # self.getNewVideoId(str(self.ids[self.i])) 80 | nexturl = self.urls % (self.cursor_num, 81 | self.ids[self.i], 82 | MyTools.init_device_id()) 83 | yield Request(nexturl, callback=self.parse) 84 | else: 85 | pass 86 | 87 | def init_item(self, jsonobj, category_uid): 88 | item = DouyinItem() 89 | item['category_id'] = str(category_uid) 90 | item['user_uid'] = str(jsonobj['author']['uid']) 91 | item['user_sid'] = str(jsonobj['author']['short_id']) 92 | item['user_birthday'] = str(jsonobj['author']['birthday']) 93 | item['user_gender'] = jsonobj['author']['gender'] 94 | 95 | item['video_id'] = str(jsonobj['aweme_id']) 96 | item['video_desc'] = str(jsonobj['desc']) 97 | item['video_play'] = jsonobj['statistics']['play_count'] 98 | item['video_comment'] = jsonobj['statistics']['comment_count'] 99 | item['video_share'] = jsonobj['statistics']['share_count'] 100 | item['video_digg'] = jsonobj['statistics']['digg_count'] 101 | item['video_durl'] = str( 102 | jsonobj['video']['download_addr']['url_list'][0]) 103 | item['video_gurl'] = str( 104 | jsonobj['video']['dynamic_cover']['url_list'][0]) 105 | item['video_time'] = MyTools.transform_time(jsonobj['create_time']) 106 | return item 107 | 108 | def notUpdate(self, categoryid, jsonobj): 109 | if str(categoryid) == str(jsonobj['aweme_id']): 110 | return True 111 | else: 112 | return False 113 | 114 | def getNewVideoId(self, categoryid): 115 | list = self.video_post.find( 116 | {"category_id": str(categoryid)}).sort([{"video_time", -1}]) 117 | for obj in list: 118 | self.new_video_id = obj['video_id'] 119 | break 120 | -------------------------------------------------------------------------------- /douyin/spiders/categoryvideospider.py: -------------------------------------------------------------------------------- 1 | #! /user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | import json 5 | 6 | from douyin.items import DouyinItem 7 | from scrapy.http import Request 8 | from scrapy.spiders import CrawlSpider 9 | 10 | from douyin.utils.tools import DBTools 11 | from douyin.utils.tools import MyTools 12 | 13 | 14 | class categoryVideoSpider(CrawlSpider): 15 | 16 | name = 'categoryVideoSpider' 17 | redis_key = 'categoryVideoSpider' 18 | cursor_num = 0 19 | count_size = 20 20 | i = 0 21 | urls = 'https://aweme.snssdk.com/aweme/v1/challenge/aweme/?query_type=0&count=20&aid=1128&cursor=%d&ch_id=%s&device_id=%s' 22 | ids = [] 23 | new_video_id = '' 24 | flag = False 25 | 26 | def __init__(self): 27 | # 查询满足的列表ids集合，组织urls 28 | super(categoryVideoSpider, self).__init__() 29 | db_con = DBTools('category') 30 | self.post = db_con.get_db_con() 31 | 32 | db_video_con = DBTools('video') 33 | self.video_post = db_video_con.get_db_con() 34 | 35 | list = self.post.find({"category_type": "热门挑战"}).sort( 36 | [{"category_user_count", -1}]) 37 | for obj in list: 38 | self.ids.append(str(obj['category_id'])) 39 | self.start_urls = [self.urls % (self.cursor_num, str( 40 | self.ids[self.i]), MyTools.init_device_id())] 41 | # self.new_video_id = self.getNewVideoId(str(self.ids[self.i])) 42 | 43 | def parse(self, response): 44 | print('抓取数据开始...') 45 | jsonresp = json.loads(response.body_as_unicode()) 46 | if jsonresp['status_code'] == 0: 47 | if jsonresp['has_more'] == 1: 48 | aweme_list = list(jsonresp['aweme_list']) 49 | for jsonobj in aweme_list: 50 | '''if self.notUpdate(self.ids[self.i],jsonobj): 51 | break 52 | else:''' 53 | item = self.init_item(jsonobj, self.ids[self.i]) 54 | yield item 55 | self.cursor_num += self.count_size 56 | nexturl = self.urls % (self.cursor_num, 57 | self.ids[self.i], 58 | MyTools.init_device_id()) 59 | yield Request(nexturl, callback=self.parse) 60 | else: 61 | aweme_list = list(jsonresp['aweme_list']) 62 | for jsonobj in aweme_list: 63 | '''if self.notUpdate(self.ids[self.i], jsonobj): 64 | break 65 | else:''' 66 | item = self.init_item(jsonobj, self.ids[self.i]) 67 | yield item 68 | self.i += 1 69 | self.cursor_num = 0 70 | if self.i < len(self.ids): 71 | #self.getNewVideoId(str(self.ids[self.i])) 72 | nexturl = self.urls % ( 73 | self.cursor_num, self.ids[self.i], MyTools.init_device_id()) 74 | yield Request(nexturl, callback=self.parse) 75 | 76 | else: 77 | self.i += 1 78 | self.cursor_num = 0 79 | if self.i < len(self.ids): 80 | #self.getNewVideoId(str(self.ids[self.i])) 81 | nexturl = self.urls % (self.cursor_num, 82 | self.ids[self.i], 83 | MyTools.init_device_id()) 84 | yield Request(nexturl, callback=self.parse) 85 | 86 | def init_item(self, jsonobj, category_uid): 87 | item = DouyinItem() 88 | item['category_id'] = str(category_uid) 89 | item['user_uid'] = str(jsonobj['author']['uid']) 90 | item['user_sid'] = str(jsonobj['author']['short_id']) 91 | item['user_birthday'] = str(jsonobj['author']['birthday']) 92 | item['user_gender'] = jsonobj['author']['gender'] 93 | 94 | item['video_id'] = str(jsonobj['aweme_id']) 95 | item['video_desc'] = str(jsonobj['desc']) 96 | item['video_play'] = jsonobj['statistics']['play_count'] 97 | item['video_comment'] = jsonobj['statistics']['comment_count'] 98 | item['video_share'] = jsonobj['statistics']['share_count'] 99 | item['video_digg'] = jsonobj['statistics']['digg_count'] 100 | item['video_durl'] = str( 101 | jsonobj['video']['download_addr']['url_list'][0]) 102 | item['video_gurl'] = str( 103 | jsonobj['video']['dynamic_cover']['url_list'][0]) 104 | item['video_time'] = MyTools.transform_time(jsonobj['create_time']) 105 | return item 106 | 107 | def notUpdate(self, categoryid, jsonobj): 108 | if str(categoryid) == str(jsonobj['aweme_id']): 109 | return True 110 | else: 111 | return False 112 | 113 | def getNewVideoId(self, categoryid): 114 | list = self.video_post.find( 115 | {"category_id": str(categoryid)}).sort([{"video_time", -1}]) 116 | for obj in list: 117 | self.new_video_id = obj['video_id'] 118 | break 119 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 134 | 135 | 136 | 137 | MI 4LTE 138 | categorySpider 139 | notUpdate 140 | getNewVideoId 141 | 142 | 143 | 144 | 146 | 147 | 173 | 174 | 175 | 176 | 177 | true 178 | DEFINITION_ORDER 179 | 180 | 181 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 211 | 212 | 215 | 216 | 217 | 218 | 221 | 222 | 225 | 226 | 229 | 230 | 231 | 232 | 235 | 236 | 239 | 240 | 243 | 244 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 286 | 287 | 288 | 289 | 307 | 308 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 357 | 358 | 371 | 372 | 390 | 391 | 403 | 404 | project 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 439 | 440 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 1523273787516 474 | 478 | 479 | 1526536130110 480 | 485 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 515 | 516 | 518 | 519 | 520 | 522 | 523 | 524 | 525 | 526 | 527 | 529 | 530 | 531 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 885 | 886 | --------------------------------------------------------------------------------