├── SnrksBot └── demo.py ├── SnrksMonitor ├── img │ └── go.jpg ├── SnrksDataBase.db ├── main.py ├── new_ios_push.py ├── wechatnotice.py ├── run.py ├── log.py ├── run_spider.py ├── config.yaml ├── webspider.py ├── db.py └── appspider.py ├── dependency.txt ├── SnrksDataBase.db ├── chromedriver.exe ├── .idea ├── encodings.xml ├── sqldialects.xml ├── vcs.xml ├── dictionaries │ ├── EAST.xml │ └── xiaodongyan.xml ├── modules.xml ├── misc.xml ├── inspectionProfiles │ └── Project_Default.xml ├── snrksMonitor.iml ├── dataSources.xml ├── codeStyles │ └── Project.xml └── dbnavigator.xml ├── README.md ├── utils.py ├── demo.py ├── IPPoolForSnrks ├── runippool.py ├── spiders.py ├── validate.py ├── CheckFromDb.py └── config.yaml ├── log.py └── config.yaml /SnrksBot/demo.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SnrksMonitor/img/go.jpg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dependency.txt: -------------------------------------------------------------------------------- 1 | requests==2.21.2 2 | yaml 3 | lxml==4.3.2 4 | itchat==1.3.10 -------------------------------------------------------------------------------- /SnrksDataBase.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastcn/snrksMonitor/HEAD/SnrksDataBase.db -------------------------------------------------------------------------------- /chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastcn/snrksMonitor/HEAD/chromedriver.exe -------------------------------------------------------------------------------- /SnrksMonitor/SnrksDataBase.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastcn/snrksMonitor/HEAD/SnrksMonitor/SnrksDataBase.db -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /SnrksMonitor/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | east 3 | """ 4 | from SnrksMonitor.run import run 5 | 6 | 7 | if __name__ == '__main__': 8 | """ 9 | 启动整个脚本 10 | """ 11 | run() 12 | -------------------------------------------------------------------------------- /.idea/sqldialects.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/dictionaries/EAST.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | configdata 5 | groupid 6 | readyaml 7 | useragent 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # snrksMonitor 2 | 主要功能模块为:监控snkrs是否有上新(后续会加入对库存量的更新),为访问nike接口的IP池建立,以及后续有可能更新的抢购功能。 3 | 4 | 监控模块主要流程为: 5 | 爬取四个国区的数据-保存到数据库-再次爬取-进行比较-得出结果-进入休眠 6 | 7 | 数据库采用的是sqlite,因此也一并上传了。 8 | 9 | # 2019-5-21更新 10 | 增加了一种推送方式,bark。目前只支持在IOS上使用。 11 | 在APPStore中下载 barK APP,获取自己设备的token。 12 | 在SnrksMonitor/new_ios_push.py文件的PushToIos.Push_list中加入自己的token即可。 13 | -------------------------------------------------------------------------------- /.idea/dictionaries/xiaodongyan.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | chatroomid 5 | chatroomnickname 6 | fileindex 7 | fileurl 8 | imgage 9 | loggerlevel 10 | loglevel 11 | maxtimeout 12 | monitortime 13 | sleeptime 14 | snrks 15 | yaml 16 | 17 | 18 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | 工具类 3 | """ 4 | import yaml 5 | import traceback 6 | 7 | config_url = '../config.yaml' 8 | 9 | 10 | class utils: 11 | def __init__(self): 12 | pass 13 | 14 | def readconfig(self): 15 | """ 16 | 读取配置 17 | :return:配置字典 18 | """ 19 | try: 20 | f = open(config_url, 'r', encoding='UTF-8') 21 | global configdata 22 | configdict = yaml.load(f) 23 | except IOError as e: 24 | # logging.log('open config failed') 25 | configdict = {} 26 | print('open config failed\n {}'.format(traceback.format_exc())) 27 | return configdict 28 | -------------------------------------------------------------------------------- /.idea/snrksMonitor.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /SnrksMonitor/new_ios_push.py: -------------------------------------------------------------------------------- 1 | """ 2 | author:hefeng 3 | function:push message to ios users with the help of BARK 4 | """ 5 | import requests 6 | import yaml 7 | from SnrksMonitor.log import Logger 8 | 9 | log = Logger().log() 10 | 11 | 12 | class PushToIos: 13 | def __init__(self): 14 | self.push_url = "https://api.day.app/" 15 | self.push_list = [ 16 | { 17 | "key": '123', 18 | "name": "east" 19 | } 20 | ] 21 | 22 | def push(self, message): 23 | for member in self.push_list: 24 | msg = f"{self.push_url}{member['key']}/{message}" 25 | requests.get(msg) 26 | log.info(f"推送成功--{member['name']}/{msg}") 27 | 28 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import sqlite3 4 | 5 | def check_start_selldate(): 6 | url = 'https://api.nike.com/snkrs/content/v1/?country=JP&language=ja&offset=0&orderBy=published' 7 | r = json.loads(requests.get(url).text) 8 | for item in r["threads"]: 9 | try: 10 | print('{},{}'.format(item['name'],item['product']['startSellDate'])) 11 | except: 12 | print(item["product"]["style"]) 13 | 14 | 15 | def dbRead(): 16 | db = sqlite3.connect('./SnrksDataBase.db') 17 | cusor = db.cursor() 18 | sql = """select shoeStyleCode, shoePublishTime from shoes""" 19 | datas = cusor.execute(sql) 20 | return datas 21 | 22 | 23 | def checl(news): 24 | datas = dbRead() 25 | shoeCode = [] 26 | shoePublishTime = [] 27 | for data in datas: 28 | shoeCode.append(data[0]) 29 | shoePublishTime.append(data[1]) 30 | for new in news: 31 | pass 32 | 33 | 34 | if __name__ == "__main__": 35 | check_start_selldate() -------------------------------------------------------------------------------- /IPPoolForSnrks/runippool.py: -------------------------------------------------------------------------------- 1 | """ 2 | 运行ip池 3 | """ 4 | import time 5 | import traceback 6 | from IPPoolForSnrks.spiders import proxyspider 7 | from IPPoolForSnrks.validate import validate 8 | from IPPoolForSnrks.CheckFromDb import CheckFromDb 9 | from log import Logger 10 | 11 | log = Logger().log() 12 | check = CheckFromDb() 13 | 14 | def run_add_pool(): 15 | """ 16 | 运行增加IP POOL 17 | :return: 18 | """ 19 | spider = proxyspider() 20 | spideData = spider.spiderFromQuick() + spider.spiderFromXici() 21 | newIPS = check.if_update(spideData) 22 | available_ip, unavailable_ip = validate ().validate (ips=newIPS ['data']) 23 | check.inserte_into_db(list=available_ip+unavailable_ip) 24 | 25 | """这边逻辑需要修改,需要把所有的IP都记录一遍,并做标记是否有效,因此需要在数据库中增加一个标记字段""" 26 | 27 | def run_check_pool(): 28 | """ 29 | 运行检查IP POOL 30 | :return: 31 | """ 32 | sql = """SELECT * FROM ips where 'availible' in (1,2,3)""" 33 | ip_list = check.read_from_db(sql=sql) 34 | available_ip, unavailable_ip = validate().validate(ips=ip_list) 35 | check.delete_from_db(unavailable_ip) 36 | 37 | if __name__ == '__main__': 38 | start = True 39 | while start: 40 | try: 41 | log.info('运行增加ip......') 42 | run_add_pool() 43 | log.info('进入休眠30s') 44 | time.sleep(10) 45 | log.info('运行检查数据库ip....') 46 | run_check_pool() 47 | log.info('进入休眠20s') 48 | time.sleep(10) 49 | except Exception as e: 50 | log.info('error:{}'.format(traceback.format_exc())) 51 | -------------------------------------------------------------------------------- /.idea/dataSources.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | sqlite.xerial 6 | true 7 | org.sqlite.JDBC 8 | jdbc:sqlite:D:\python_study\snrksMonitor\SnrksMonitor\SnrksDataBase.db 9 | 10 | 11 | 12 | 13 | 14 | sqlite.xerial 15 | true 16 | org.sqlite.JDBC 17 | jdbc:sqlite:D:\python_study\snrksMonitor\SnrksDataBase.db 18 | 19 | 20 | 21 | 22 | 23 | file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.25.1/license.txt 24 | 25 | 26 | file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.25.1/sqlite-jdbc-3.25.1.jar 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /SnrksMonitor/wechatnotice.py: -------------------------------------------------------------------------------- 1 | """ 2 | east 3 | """ 4 | import itchat 5 | import os 6 | from SnrksMonitor.log import Logger 7 | 8 | log = Logger().log() 9 | 10 | 11 | class wechat(): 12 | def __init__(self): 13 | pass 14 | 15 | def login(self): 16 | itchat.auto_login(hotReload=True) 17 | 18 | def getFriends(self): 19 | friends = itchat.get_friends() 20 | log.debug(friends) 21 | return friends 22 | 23 | def sendMessage(self, msg, user): 24 | # 在群聊中发送推送并且删除图片 25 | if type(msg) == list: 26 | for item in msg: 27 | log.info('开始向群聊中发送消息') 28 | if item['shoePublishTime'] is None: 29 | message = '国家:[{}] [{}] '.format (item ['shoeCountry'], item ['shoeName']) 30 | else: 31 | message ="***************************\n[{}]\n国区:[{}]\n发售:[{}]\n货号:[{}]\n价格:[{}]\n抽签:[{}] \n库存:[{}]\n***************************".format(item['shoeName'], 32 | item['shoeCountry'], 33 | item['shoePublishTime'], 34 | item['shoeStyleCode'], 35 | item['shoePrice'], 36 | item['shoeSelectMethod'], 37 | item['shoeSize']) 38 | itchat.send_msg(msg=message, toUserName=user) 39 | itchat.send_image(fileDir=item['shoeImage'], toUserName=user) 40 | log.info('推送完成') 41 | # try: # 删除图片 42 | # log.info('delete image:%s' % item['shoeImageUrl']) 43 | # os.remove(path=item['shoeImageUrl']) 44 | # except IOError: 45 | # log.error('delete failed') 46 | elif type(msg) == str: 47 | log.info('请传入list') 48 | elif type(msg) == dict: 49 | log.info ('请传入list') 50 | else: 51 | itchat.send_msg(msg=msg, toUserName=user) 52 | log.info('message has been send, waiting for next time to start') 53 | 54 | def getChatRoomId(self, nickname): 55 | # 获取群聊的username 56 | groupContent = itchat.get_chatrooms() 57 | # log.debug(groupContent) 58 | chatroomid = '' 59 | for item in groupContent: 60 | if item['NickName'] == nickname: 61 | chatroomid = item['UserName'] 62 | log.info('get chat room “%s” id successfully的ID:%s' % (nickname, chatroomid)) 63 | return chatroomid 64 | 65 | def init(self,groupname): 66 | self.login() 67 | groupid = self.getChatRoomId(nickname=groupname) 68 | return groupid 69 | 70 | -------------------------------------------------------------------------------- /SnrksMonitor/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | 运行脚本 3 | """ 4 | 5 | import SnrksMonitor.wechatnotice as notice 6 | import time 7 | import yaml 8 | from SnrksMonitor.log import Logger 9 | from SnrksMonitor.appspider import AppSpiders 10 | from SnrksMonitor.db import db as database 11 | 12 | log = Logger().log() 13 | 14 | 15 | class Utils: 16 | @staticmethod 17 | def readyaml(): 18 | # read config from yaml document 19 | file = './config.yaml' 20 | try: 21 | f = open(file, 'r', encoding='UTF-8') 22 | global configdata 23 | configdata = yaml.load(f) 24 | except IOError: 25 | # logging.log('open config failed') 26 | print('open config failed') 27 | return configdata 28 | 29 | 30 | def run(): 31 | log.info('East SnrksMonitor is starting') 32 | # 从配置中获取超时,爬虫间隔,微信群组名字 33 | u = Utils().readyaml() 34 | # timeout = u ['maxtimeout'] 35 | sleeptime = u['monitortime'] 36 | chatroomnickname = u['chatroomnickname'] 37 | 38 | # 登录微信 并返回群组id 39 | # push = notice.wechat () 40 | # chatroomid = push.init (groupname=chatroomnickname) 41 | num = 1 42 | 43 | # 实例化爬虫类 44 | shoesdata = AppSpiders() 45 | db = database() 46 | while True: 47 | log.info('第{}次开始'.format(num)) 48 | NewData = shoesdata.getNewShoesData() # 获取到最新的数据 49 | result = shoesdata.updateCheck(data=NewData) # 获取到是否有更新和更新数据 50 | log.info('第{}次是否有更新:{}'.format(num, result['isUpdate'])) 51 | # 如果有更新则对更新表进行操作,并发送推送 52 | if result['isUpdate'] is True: 53 | # 初始化鞋子,删除更新表 54 | shoesdata.initDB() # 初始化 55 | # 对更新表进行操作 56 | updateData = result['data'] 57 | shoesdata.insertToDb(data=updateData) 58 | # 给微信群发送推送 59 | updateShoesCodeList = [] 60 | for updatedata in updateData: 61 | updateShoesCodeList.append(updatedata['shoeStyleCode']) 62 | log.info('第{}次更新的货号:{}'.format(num, updateShoesCodeList)) 63 | # push.sendMessage(user=chatroomid, msg=updateData) 64 | # 把有更新的数据插入鞋子表 65 | db.updateShoesTable(data=updateData) 66 | else: 67 | # 是否需要操作? 68 | log.info('第{}次没有更新,进入暂停'.format(num)) 69 | log.info('第{}次结束'.format(num)) 70 | num += 1 71 | time.sleep(sleeptime) # 暂停时间 72 | -------------------------------------------------------------------------------- /.idea/codeStyles/Project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 14 | 15 | 21 | 22 | 26 | 27 | 28 | 29 | 35 | 36 | 37 | 38 | 39 | 45 | 46 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /IPPoolForSnrks/spiders.py: -------------------------------------------------------------------------------- 1 | """ 2 | 从国内几个免费的代理网站上爬取免费代理 3 | """ 4 | import random 5 | import requests 6 | from lxml import etree 7 | from utils import utils 8 | 9 | 10 | class proxyspider: 11 | def __init__(self): 12 | self.config = utils().readconfig() 13 | self.headers = { 14 | 'User-Agent': random.choice(self.config['User_Agents']) 15 | } 16 | self.page = 3 17 | 18 | def spiderFromXici(self, url='https://www.xicidaili.com/nn/'): 19 | IPPool = [] 20 | for i in range(self.page): 21 | # a = config() 22 | Url = url + str(i) 23 | r = requests.get(url=Url, headers=self.headers) 24 | selector = etree.HTML(r.text) 25 | tr = selector.xpath('//tr') # 选取页面中的所有tr标签 26 | for t in range(len(tr)): 27 | if t >= 1: 28 | ippool = { 29 | 'ip': '', 30 | 'port': '', 31 | 'http': '' 32 | } 33 | ippool['ip'] = tr[t].xpath('./td[2]/text()')[0] 34 | ippool['port'] = tr[t].xpath('./td[3]/text()')[0] 35 | temp = tr[t].xpath('./td[6]/text()')[0] 36 | if temp == 'HTTP': 37 | ippool['http'] = 'http' 38 | elif temp == 'HTTPS': 39 | ippool['http'] = 'https' 40 | IPPool.append(ippool) 41 | return IPPool 42 | 43 | def spiderFromQuick(self): 44 | IPPool = [] 45 | for i in range(self.page): 46 | url = 'https://www.kuaidaili.com/free/inha/{}/'.format(str(i + 1)) 47 | r = requests.get(url=url, headers=self.headers) 48 | selector = etree.HTML(r.text) 49 | tr = selector.xpath('//tr') # 选取页面中的所有tr标签 50 | for t in range(len(tr)): 51 | if t >= 1: 52 | ippool = { 53 | 'ip': '', 54 | 'port': '', 55 | 'http': '' 56 | } 57 | ippool['ip'] = tr[t].xpath('./td[@data-title="IP"]/text()')[0] 58 | ippool['port'] = tr[t].xpath('./td[@data-title="PORT"]/text()')[0] 59 | temp = tr[t].xpath('./td[@data-title="类型"]/text()')[0] 60 | if temp == 'HTTP': 61 | ippool['http'] = 'http' 62 | elif temp == 'HTTPS': 63 | ippool['http'] = 'https' 64 | IPPool.append(ippool) 65 | return IPPool 66 | 67 | 68 | if __name__ == '__main__': 69 | for ip in proxyspider().spiderFromQuick(): 70 | print(ip) 71 | -------------------------------------------------------------------------------- /log.py: -------------------------------------------------------------------------------- 1 | """ 2 | east 3 | """ 4 | import yaml 5 | import logging 6 | import datetime 7 | 8 | 9 | class Logger: 10 | """自定义封装logging模块""" 11 | 12 | def __init__(self, default_level=logging.INFO): 13 | self.logger = logging.getLogger('__name__') 14 | # 初始化一个logger 15 | self.default_level = default_level 16 | logger_main_level, logger_file_level, logger_console_level = self.config() 17 | self.logger.setLevel(logger_main_level) 18 | fomatter = logging.Formatter( 19 | '[%(asctime)s] %(filename)s line:%(lineno)d [%(levelname)s]%(message)s') 20 | # 初始化输出到日志文件的handle 21 | file_name = './log/{}log.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d')) 22 | file_log = logging.FileHandler(filename=file_name, encoding='utf-8') 23 | file_log.setLevel(logger_file_level) 24 | file_log.setFormatter(fomatter) 25 | # 初始化增加输出到控制台的handle 26 | console_log = logging.StreamHandler() 27 | console_log.setLevel(logger_console_level) 28 | console_log.setFormatter(fomatter) 29 | 30 | if self.logger.hasHandlers() is False: 31 | self.logger.addHandler(file_log) 32 | self.logger.addHandler(console_log) 33 | # self.logger.removeHandler(file_log) 34 | # self.logger.removeHandler(console_log) 35 | file_log.close() 36 | console_log.close() 37 | 38 | def config(self): 39 | """ 40 | :return: 返回配置中读取的level 41 | """ 42 | try: 43 | with open('./config.yaml', 'r', encoding='utf-8') as f: 44 | global config_data 45 | config_data = yaml.load(f) 46 | except IOError: 47 | self.logger.error('open config file failed') 48 | case1 = config_data['logConfig']['testLogLevel']['mainLogLevel'] 49 | case2 = config_data['logConfig']['testLogLevel']['fileLogLevel'] 50 | case3 = config_data['logConfig']['testLogLevel']['consoleLogLevel'] 51 | logger_main_level = self.switch(case=case1) 52 | logger_file_level = self.switch(case=case2) 53 | logger_console_level = self.switch(case=case3) 54 | return logger_main_level, logger_file_level, logger_console_level 55 | 56 | def switch(self, case): 57 | """ 58 | :param case: 传入需要做判断的level 59 | :return: 返回最终的level 60 | """ 61 | if case == 'DEBUG': 62 | result = logging.DEBUG 63 | elif case == 'INFO': 64 | result = logging.DEBUG 65 | elif case == 'ERROR': 66 | result = logging.ERROR 67 | elif case == 'CRITICAL': 68 | result = logging.CRITICAL 69 | else: 70 | result = self.logger.setLevel(self.default_level) 71 | return result 72 | 73 | def log(self): 74 | return self.logger 75 | -------------------------------------------------------------------------------- /SnrksMonitor/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | east 3 | """ 4 | import yaml 5 | import logging 6 | import datetime 7 | 8 | 9 | class Logger: 10 | """自定义封装logging模块""" 11 | 12 | def __init__(self, default_level=logging.INFO): 13 | self.logger = logging.getLogger('__name__') 14 | # 初始化一个logger 15 | self.default_level = default_level 16 | logger_main_level, logger_file_level, logger_console_level = self.config() 17 | self.logger.setLevel(logger_main_level) 18 | fomatter = logging.Formatter( 19 | '[%(asctime)s] %(filename)s line:%(lineno)d [%(levelname)s]%(message)s') 20 | # 初始化输出到日志文件的handle 21 | file_name = './log/{}log.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d')) 22 | file_log = logging.FileHandler(filename=file_name, encoding='utf-8') 23 | file_log.setLevel(logger_file_level) 24 | file_log.setFormatter(fomatter) 25 | # 初始化增加输出到控制台的handle 26 | console_log = logging.StreamHandler() 27 | console_log.setLevel(logger_console_level) 28 | console_log.setFormatter(fomatter) 29 | 30 | if self.logger.hasHandlers() is False: 31 | self.logger.addHandler(file_log) 32 | self.logger.addHandler(console_log) 33 | # self.logger.removeHandler(file_log) 34 | # self.logger.removeHandler(console_log) 35 | file_log.close() 36 | console_log.close() 37 | 38 | def config(self): 39 | """ 40 | :return: 返回配置中读取的level 41 | """ 42 | try: 43 | with open('./config.yaml', 'r', encoding='utf-8') as f: 44 | global config_data 45 | config_data = yaml.load(f, Loader=yaml.FullLoader) 46 | except IOError: 47 | self.logger.error('open config file failed') 48 | case1 = config_data['logConfig']['testLogLevel']['mainLogLevel'] 49 | case2 = config_data['logConfig']['testLogLevel']['fileLogLevel'] 50 | case3 = config_data['logConfig']['testLogLevel']['consoleLogLevel'] 51 | logger_main_level = self.switch(case=case1) 52 | logger_file_level = self.switch(case=case2) 53 | logger_console_level = self.switch(case=case3) 54 | return logger_main_level, logger_file_level, logger_console_level 55 | 56 | def switch(self, case): 57 | """ 58 | :param case: 传入需要做判断的level 59 | :return: 返回最终的level 60 | """ 61 | if case == 'DEBUG': 62 | result = logging.DEBUG 63 | elif case == 'INFO': 64 | result = logging.DEBUG 65 | elif case == 'ERROR': 66 | result = logging.ERROR 67 | elif case == 'CRITICAL': 68 | result = logging.CRITICAL 69 | else: 70 | result = self.logger.setLevel(self.default_level) 71 | return result 72 | 73 | def log(self): 74 | return self.logger 75 | -------------------------------------------------------------------------------- /SnrksMonitor/run_spider.py: -------------------------------------------------------------------------------- 1 | """ 2 | author: hefeng 3 | date: 2019.5.20 4 | function: 运行多线程爬虫 5 | """ 6 | from apscheduler.schedulers.background import BlockingScheduler 7 | import threading 8 | from SnrksMonitor.log import Logger 9 | from SnrksMonitor.appspider import AppSpiders 10 | from SnrksMonitor.db import db as database 11 | from SnrksMonitor.new_ios_push import PushToIos 12 | 13 | log = Logger().log() 14 | scheduler = BlockingScheduler() 15 | 16 | 17 | class RunSpider: 18 | def __init__(self): 19 | self.message = "1" 20 | self.spider = AppSpiders() 21 | self.db = database() 22 | self.data = [] 23 | self.district = self.spider.readyaml()['country'] 24 | self.Push = PushToIos() 25 | 26 | def get_data(self, district): 27 | """ 28 | 获取最新的数据 29 | :return:最新的数据 30 | """ 31 | log.info("开始爬取最新的数据") 32 | origin_data = self.spider.spiderDate(district) 33 | # print(origin_data) 34 | data = self.spider.updateCheck(origin_data) 35 | flag = data['isUpdate'] 36 | if flag is True: 37 | self.data.append(data) 38 | # print(data) 39 | else: 40 | log.info("本次没有更新") 41 | 42 | def insert_db(self): 43 | """ 44 | 插入数据库 45 | :return: 46 | """ 47 | log.info("重新数据库初始化...") 48 | self.spider.initDB() 49 | for item in self.data: 50 | self.db.updateShoesTable(data=item["data"]) 51 | 52 | def push(self): 53 | """ 54 | 推送 55 | :return: 56 | """ 57 | log.info("推送中...") 58 | if len(self.data) == 0: 59 | self.Push.push('test empty') 60 | else: 61 | for item in self.data: 62 | for shoe_data in item["data"]: 63 | msg_1 = f"[{shoe_data['shoeCountry']}] [{shoe_data['shoeSelectMethod']}] 时间:[{shoe_data['shoePublishTime']}]" 64 | msg_2 = f"{shoe_data['shoeName']} {shoe_data['shoeStyleCode']}" 65 | url_key = f"?url={shoe_data['shoeImageUrl']}" 66 | self.Push.push(message=msg_1+msg_2+url_key) 67 | 68 | def init_data(self): 69 | """ 70 | 初始化self中的data数据 71 | :return: 72 | """ 73 | self.data = [] 74 | 75 | 76 | def run_spider(): 77 | run = RunSpider() 78 | spider_thread_pool = [] 79 | for d in run.district: 80 | t = threading.Thread(target=run.get_data, args=([d])) 81 | spider_thread_pool.append(t) 82 | for t in spider_thread_pool: 83 | t.start() 84 | for t_j in spider_thread_pool: 85 | t_j.join() 86 | if len(run.data) > 0: 87 | run.insert_db() 88 | run.push() 89 | run.init_data() 90 | 91 | 92 | if __name__ == "__main__": 93 | print('start') 94 | run_spider() 95 | scheduler.add_job(run_spider, "interval", seconds=120, max_instances=5) 96 | scheduler.start() 97 | -------------------------------------------------------------------------------- /IPPoolForSnrks/validate.py: -------------------------------------------------------------------------------- 1 | """ 2 | 用于验证ip代理是否可用 3 | """ 4 | from log import Logger 5 | import json 6 | import requests 7 | import datetime 8 | import traceback 9 | from SnrksMonitor.db import db 10 | 11 | log = Logger().log() 12 | 13 | 14 | class validate: 15 | def __init__(self): 16 | self.checkurl = 'http://httpbin.org/get' 17 | self.db = db() 18 | 19 | def validate(self, ips): 20 | """ 21 | 验证是什么代理,统计代理数量,插入数据库 22 | :param ips: 代理ip列表 23 | :return: None 24 | """ 25 | available_ips = [] 26 | unavailable_ips = [] 27 | for ip in ips: 28 | 29 | log.info(f'开始验证代理{ip["http"]}:{ip["ip"]}:{ip["port"]}') 30 | ip_port = f'{ip["ip"]}:{ip["port"]}' 31 | proxy = {ip['http']: ip_port} 32 | time = datetime.datetime.now().now().strftime('%Y-%m-%d %H:%M:%S') 33 | try: 34 | r = requests.get(url=self.checkurl, proxies=proxy, timeout=1) 35 | if r.status_code == 200: 36 | result = json.loads(r.text) 37 | ip_proxy = ip['ip'] 38 | ip_v = result['origin'] 39 | headers = result['headers'] 40 | proxy_connection = headers.get('Proxy-Connection', None) 41 | # 判断是否为普通匿名,透明,或者高级匿名,记录普通匿名和高级匿名到数据库 42 | if proxy_connection: 43 | log.info(f"{time} success: {ip['http']}://{ip['ip']}:{ip['port']} ----普通匿名") 44 | ip['availible'] = 2 # 可用 45 | available_ips.append(ip) 46 | elif ',' in ip_v: 47 | log.info(f"{time} success: {ip['http']}://{ip['ip']}:{ip['port']} ----透明") 48 | ip['availible'] = 1 # 可用 49 | unavailable_ips.append(ip) 50 | else: 51 | log.info(f"{time} success: {ip['http']}://{ip['ip']}:{ip['port']} ----高级匿名") 52 | ip['availible'] = 3 # 可用 53 | available_ips.append(ip) 54 | 55 | else: 56 | log.info(f"{time} failed: {ip['http']}://{ip['ip']}:{ip['port']} ----无效代理") 57 | ip['availible'] = 0 # 可用 58 | unavailable_ips.append(ip) 59 | except Exception as e: 60 | log.info('error:' + repr(e)) 61 | log.info(f"{time} error: {ip['http']}://{ip['ip']}:{ip['port']} ----无效代理") 62 | ip['availible'] = 0 # 可用 63 | unavailable_ips.append(ip) 64 | 65 | return available_ips, unavailable_ips 66 | 67 | def test_validate(self): 68 | ip_list = [{'ip': '36.26.220.69', 'port': '9999', 'http': 'https'}, 69 | {'ip': '116.209.54.75', 'port': '9999', 'http': 'https'}, 70 | {'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}] 71 | self.validate(ip_list) 72 | 73 | 74 | -------------------------------------------------------------------------------- /IPPoolForSnrks/CheckFromDb.py: -------------------------------------------------------------------------------- 1 | """ 2 | 验证数据库中的IP是否可用,不可用则删除 3 | """ 4 | import traceback 5 | from log import Logger 6 | from SnrksMonitor.db import db 7 | 8 | 9 | log = Logger().log() 10 | 11 | 12 | class CheckFromDb: 13 | def __init__(self): 14 | self.db = db() 15 | 16 | def read_from_db(self,sql=None): 17 | """ 18 | 从数据库中读取数据 19 | :return: 20 | """ 21 | FetchSql = """SELECT * From 'ips'""" 22 | if sql == None: 23 | fetchSql = FetchSql 24 | else: 25 | fetchSql = sql 26 | db_data = self.db.fetchData(sql=fetchSql,c=None) 27 | ip_list = [] 28 | for data in db_data: 29 | ip_dict = { 30 | 'id': data[0], 31 | 'ip': data[2], 32 | 'http': data[1], 33 | 'port': data[3], 34 | 'availible': data[4] 35 | } 36 | ip_list.append(ip_dict) 37 | return ip_list 38 | 39 | def delete_from_db(self,list): 40 | """ 41 | 从数据库中删除数据 42 | :return: 43 | """ 44 | id_list = [] 45 | for ip in list: 46 | id_list.append(ip['id']) 47 | sql = """update ips set 'availible' = {} where 'id' = {} """.format (ip['availible'],ip['id']) 48 | self.db.updateTable(sql=sql,path=None) 49 | ids = tuple(id_list) 50 | self.db.deleteFromIpTable(ids=ids) 51 | 52 | def test_delete_from_db(self): 53 | ip_list = [{'id':1,'ip': '36.26.220.69', 'port': '9999', 'http': 'https'}, 54 | {'id':2,'ip': '116.209.54.75', 'port': '9999', 'http': 'https'}, 55 | {'id':3,'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}] 56 | self.delete_from_db(list=ip_list) 57 | 58 | def inserte_into_db(self,list): 59 | """ 60 | 将ip插入数据库 61 | :param list: 62 | :return: 63 | """ 64 | for ip in list: 65 | data = [(None, ip['http'], ip['ip'], ip['port'], ip['availible'])] 66 | log.info ('开始插入数据库') 67 | try: 68 | self.db.insertIntoIpTable (data=data) 69 | except Exception as e: 70 | log.info ('{}'.format (traceback.format_exc ())) 71 | def test_inserte_into_db(self): 72 | ip_list = [{'id': 1, 'ip': '36.26.220.69', 'port': '9999', 'http': 'https'}, 73 | {'id': 2, 'ip': '116.209.54.75', 'port': '9999', 'http': 'https'}, 74 | {'id': 3, 'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}] 75 | self.inserte_into_db(list=ip_list) 76 | 77 | def if_update(self,list): 78 | """ 79 | 判断跟数据库中对比是否更新 80 | :param list: 81 | :return: 82 | """ 83 | oldData = self.read_from_db() 84 | newData = list 85 | oldIPs= [] 86 | newIPs = [] 87 | isUpdate = False 88 | for oldip in oldData: 89 | oldIPs.append(oldip['ip']) 90 | 91 | for newip in newData: 92 | if newip['ip'] not in oldIPs: 93 | newIPs.append(newip) 94 | isUpdate = True 95 | updata_dict = { 96 | 'isupdate' : isUpdate, 97 | 'data': newIPs 98 | } 99 | return updata_dict 100 | 101 | def test_if_update(self): 102 | ip_list = [{'id': 1, 'ip': '36.26.220.691', 'port': '9999', 'http': 'https'}, 103 | {'id': 2, 'ip': '116.209.54.752', 'port': '9999', 'http': 'https'}, 104 | {'id': 3, 'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}] 105 | a = self.if_update(list=ip_list) 106 | print(a) 107 | 108 | if __name__ == '__main__': 109 | # CheckFromDb().test_delete_from_db() 110 | CheckFromDb().test_if_update() 111 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | url: 'https://www.nike.com/cn/launch/?s=upcoming' 2 | 3 | User_Agents: [ 4 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 5 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 6 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 7 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 8 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 9 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 10 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 12 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 13 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 14 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 15 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 16 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 18 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 19 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 20 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", 21 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", 22 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", 23 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", 24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER", 25 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 26 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", 27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)", 29 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 30 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 31 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 32 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 33 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", 34 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre", 35 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0", 36 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", 37 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", 38 | "SNKRS/3.9.0 (iPhone; iOS 12.1; Scale/2.00)" 39 | ] 40 | 41 | chatroomnickname: "snrks冲刺群" 42 | 43 | monitortime: 60 44 | 45 | maxtimeout: 30 46 | logConfig: 47 | testLogLevel: 48 | mainLogLevel: INFO 49 | fileLogLevel: INFO 50 | consoleLogLevel: INFO 51 | productLogLevel: 52 | mainLogLevel: INFO 53 | fileLogLevel: INFO 54 | consoleLogLevel: ERROR 55 | 56 | db: 57 | db_path: ../SnrksDataBase.db 58 | table_name: shoes 59 | -------------------------------------------------------------------------------- /IPPoolForSnrks/config.yaml: -------------------------------------------------------------------------------- 1 | url: 'https://www.nike.com/cn/launch/?s=upcoming' 2 | 3 | User_Agents: [ 4 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 5 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 6 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 7 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 8 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 9 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 10 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 12 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 13 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 14 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 15 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 16 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 18 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 19 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 20 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", 21 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", 22 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", 23 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", 24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER", 25 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 26 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", 27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)", 29 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 30 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 31 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 32 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 33 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", 34 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre", 35 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0", 36 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", 37 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", 38 | "SNKRS/3.9.0 (iPhone; iOS 12.1; Scale/2.00)" 39 | ] 40 | 41 | chatroomnickname: "好好学习天天向上" 42 | 43 | monitortime: 60 44 | 45 | maxtimeout: 30 46 | logConfig: 47 | testLogLevel: 48 | mainLogLevel: INFO 49 | fileLogLevel: INFO 50 | consoleLogLevel: INFO 51 | productLogLevel: 52 | mainLogLevel: INFO 53 | fileLogLevel: INFO 54 | consoleLogLevel: ERROR 55 | 56 | db: 57 | db_path: ../SnrksDataBase.db 58 | table_name: shoes 59 | -------------------------------------------------------------------------------- /SnrksMonitor/config.yaml: -------------------------------------------------------------------------------- 1 | url: 'https://www.nike.com/cn/launch/?s=upcoming' 2 | 3 | User_Agents: [ 4 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 5 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 6 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 7 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 8 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 9 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 10 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 12 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 13 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 14 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 15 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 16 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 18 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 19 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 20 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", 21 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", 22 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", 23 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)", 24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER", 25 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 26 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)", 27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)", 29 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)", 30 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)", 31 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 32 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1", 33 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", 34 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre", 35 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0", 36 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11", 37 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", 38 | "SNKRS/3.9.0 (iPhone; iOS 12.1; Scale/2.00)" 39 | ] 40 | 41 | chatroomnickname: "snrks冲刺群" 42 | 43 | monitortime: 60 44 | 45 | maxtimeout: 30 46 | logConfig: 47 | testLogLevel: 48 | mainLogLevel: INFO 49 | fileLogLevel: INFO 50 | consoleLogLevel: INFO 51 | productLogLevel: 52 | mainLogLevel: INFO 53 | fileLogLevel: INFO 54 | consoleLogLevel: ERROR 55 | 56 | db: 57 | db_path: ../SnrksDataBase.db 58 | table_name: shoes 59 | 60 | country: ["cn","us","jp"] -------------------------------------------------------------------------------- /SnrksMonitor/webspider.py: -------------------------------------------------------------------------------- 1 | """ 2 | east 3 | """ 4 | 5 | import yaml 6 | import requests 7 | import re 8 | from lxml import html 9 | from SnrksMonitor.log import Logger 10 | 11 | # create a static dict to save history data 12 | 13 | log = Logger().log() 14 | 15 | 16 | class WebSpider: 17 | 18 | def __init__(self): 19 | self.datadict = [] 20 | self.history = [] 21 | 22 | @staticmethod 23 | def readyaml(): 24 | # read config from yaml document 25 | file = './config.yaml' 26 | try: 27 | f = open(file) 28 | global configdata 29 | configdata = yaml.load(f) 30 | except IOError: 31 | # print('open config failed') 32 | log.error('open config failed') 33 | return configdata 34 | 35 | def download_imgage(self, url, fileurl): 36 | # log.debug('start download image:%s' % url) 37 | try: 38 | r = requests.get(url=url) 39 | with open(fileurl, 'wb') as f: 40 | f.write(r.content) 41 | f.close() 42 | except Exception: 43 | log.error('failed to download picture') 44 | with open('./img/go.jpg', 'wb') as fa: 45 | content = fa.read() 46 | with open(fileurl, 'wb') as fb: 47 | fb.write(content) 48 | fb.close() 49 | fa.close() 50 | # print('图片保存地址为:%s' % fileurl) 51 | # log.info('the image save in:%s' % fileurl) 52 | 53 | def spider(self, url, useragent, timeout): 54 | # 爬取snrks网站内容 55 | # config = self.readyaml() 56 | # url = config['url'] 57 | # useragent = random.choice(config['User_Agents']) 58 | header = { 59 | 'User_Agents': useragent 60 | } 61 | # logging.log('start spiders') 62 | # print('开始请求nike网站') 63 | log.info('start connect to nike') 64 | r = requests.get(url=url, headers=header, timeout=timeout) 65 | etree = html.etree 66 | s = etree.HTML(r.text) 67 | # 以下为对nike网站的分析 68 | log.info("start analysis nike'website") 69 | shoes_div = s.xpath('//figure[@class="d-md-h ncss-col-sm-12 va-sm-t pb0-sm prl0-sm"]') 70 | fileindex = 1 # 计数 71 | log.info("get shoes' data") 72 | for shoes in shoes_div: 73 | # shoes_name = shoes.xpath('.//h3[@class="ncss-brand u-uppercase mb-1-sm fs16-sm"]/text()')[1] # 鞋名 74 | shoes_link = shoes.xpath('.//a[@class="card-link d-sm-b"]/@href') # 鞋子详情连接 75 | shoes_name = self.get_shoes_name(sc=shoes_link[0]) 76 | shoes_price = self.get_shoes_price(sc=shoes_link[0], header=header, timeout=timeout) # 价格 77 | shoes_img = shoes.xpath('.//img/@src') # 图片 78 | shoes_sale_num = self.get_sale_num(sc=shoes_img[0]) # 货号 79 | fileurl = './img/shoes%s.jpg' % fileindex 80 | # self.download_imgage(url=shoes_img[0], fileurl=fileurl) # 下载图片 81 | shoes_time = shoes.xpath('.//h6//div/text()') # 时间 82 | shoes_method = self.get_shoes_method(s=shoes_time[0]) # 抽签方式 83 | shoes_dict = {} 84 | shoes_dict.update({ 85 | 'name': shoes_name, 86 | 'img_url': shoes_img[0], 87 | 'img': fileurl, 88 | 'time': shoes_time, 89 | 'country': 'cn', 90 | 'sale_num': shoes_sale_num, 91 | 'price': shoes_price, 92 | 'method': shoes_method 93 | }) 94 | self.datadict.append(shoes_dict) 95 | fileindex += 1 96 | log.info('get [{}] shoes'.format(shoes_sale_num)) 97 | 98 | def data_analysis(self): 99 | """ 100 | 分析是否有更新 101 | :return: 返回更新数据 102 | """ 103 | log.info('start checking whether updated or not') 104 | update = [] 105 | if len(self.history) == 0: 106 | for shoes in self.datadict: 107 | self.history.append(shoes) 108 | update = self.history 109 | elif len(self.history) > 0: 110 | for shoes in self.datadict: 111 | if shoes in self.history: 112 | pass 113 | elif shoes not in self.history: 114 | update.append(shoes) 115 | self.history = self.datadict 116 | self.datadict = [] 117 | 118 | log.info('the number of updated:%s' % len(update)) 119 | return update 120 | 121 | def get_sale_num(self, sc): 122 | """ 123 | :param sc: 124 | :return: 获取货号 125 | """ 126 | pattern = re.compile('Com/.+_A') 127 | a = pattern.findall(sc) 128 | b = a[0][4:-2] 129 | return b 130 | 131 | def get_shoes_name(self, sc): 132 | """ 133 | :param sc:链接url 134 | :return: 详细鞋名 135 | """ 136 | pattern = re.compile('t/.+/') 137 | a = pattern.findall(sc) 138 | b = a[0][2:-1].replace('-', ' ') 139 | return b 140 | 141 | def get_shoes_price(self, sc, header, timeout): 142 | """ 143 | 获取价格 144 | :param sc:连接地址 145 | :return: 返回价格 146 | """ 147 | url = 'https://www.nike.com' + sc 148 | price = '' 149 | try: 150 | r = requests.get(url=url, headers=header, timeout=timeout) 151 | except Exception: 152 | log.info('connect to product detail failed') 153 | price = '暂无' 154 | etree = html.etree 155 | s = etree.HTML(r.text) 156 | price = s.xpath('//div[@class="ncss-brand pb6-sm fs14-sm fs16-md"]/text()') 157 | return price 158 | 159 | def test_get_shoes_price(self): 160 | url = 'https://www.nike.com/cn/launch/t/air-jordan-6-retro-nrg-black-dark-concord/' 161 | header = { 162 | 'User_Agents': "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10" 163 | } 164 | WebSpider().get_shoes_price(sc=url, header=header, timeout=30) 165 | 166 | def get_shoes_method(self, s): 167 | """ 168 | :param s:发售时间 169 | :return: 抽签方式 170 | """ 171 | method = '' 172 | if '发售' in s: 173 | method = '小抽签' 174 | else: 175 | method = '大抽签' 176 | return method 177 | -------------------------------------------------------------------------------- /SnrksMonitor/db.py: -------------------------------------------------------------------------------- 1 | """ 2 | create to db 3 | """ 4 | import sqlite3 5 | import yaml 6 | from SnrksMonitor.log import Logger 7 | 8 | log = Logger().log() 9 | 10 | 11 | class db: 12 | def __init__(self): 13 | file = './config.yaml' 14 | try: 15 | f = open(file, 'r', encoding='UTF-8') 16 | global configdata 17 | configdata = yaml.load(f, Loader=yaml.FullLoader) 18 | except IOError: 19 | # logging.log('open config failed') 20 | log.info('open config failed') 21 | 22 | self.databasePath = configdata['db']['db_path'] 23 | self.table_name = configdata['db']['table_name'] 24 | 25 | def getConn(self, path=None): 26 | """ 27 | 获取数据库连接 28 | :return: 返回数据库连接对象 29 | """ 30 | if path is not None: 31 | conn = sqlite3.connect(path) 32 | return conn 33 | else: 34 | conn = sqlite3.connect(self.databasePath) 35 | return conn 36 | 37 | def getCursor(self, conn): 38 | """ 39 | 获取数据库连接游标 40 | :return: 返回数据库连接游标 41 | """ 42 | if conn is not None: 43 | return conn.cursor() 44 | else: 45 | return self.getConn(path=None).cursor() 46 | 47 | def createTable(self, path, sql): 48 | """ 49 | 创建数据库 50 | :return: 51 | """ 52 | if sql is not None and sql != '': 53 | conn = self.getConn() 54 | cu = self.getCursor(conn) 55 | cu.execute(sql) 56 | conn.commit() 57 | log.info('数据库创建成功') 58 | cu.close() 59 | conn.close() 60 | else: 61 | log.info('sql不正确') 62 | 63 | def dropTable(self, table, path): 64 | """ 65 | 删表 66 | :return: 67 | """ 68 | conn = self.getConn(path) 69 | cu = self.getCursor(conn) 70 | dropSql = """DROP TABLE '{}' """.format(table) 71 | cu.execute(dropSql) 72 | conn.commit() 73 | log.info('数据库表{}删除成功'.format(table)) 74 | cu.close() 75 | conn.close() 76 | 77 | def insertData(self, sql, d, path=None): 78 | """ 79 | 插入数据 80 | :param sql: 插入的sql语句 81 | :param d: 插入的数据 82 | :return: 83 | """ 84 | if sql is not None and sql != ' ': 85 | if d is not None: 86 | conn = self.getConn(path) 87 | cu = self.getCursor(conn) 88 | for data in d: 89 | cu.execute(sql, data) 90 | conn.commit() 91 | cu.close() 92 | conn.close() 93 | log.info('数据库数据插入成功') 94 | else: 95 | log.info('没有数据') 96 | else: 97 | log.info('没有sql') 98 | 99 | def fetchData(self, sql, c): 100 | """ 101 | 查询数据 102 | :param sql: 103 | :return: 104 | """ 105 | if sql is not None and sql != ' ': 106 | conn = self.getConn(c) 107 | cu = self.getCursor(conn) 108 | value = cu.execute(sql).fetchall() 109 | cu.close() 110 | conn.close() 111 | return value 112 | else: 113 | log.info('sql为空') 114 | return 'failed' 115 | 116 | def deleteData(self, sql, Path=None): 117 | """ 118 | 删除数据 119 | :param c: 120 | :param sql: 121 | :param d: 122 | :return: 123 | """ 124 | if sql is not None and sql != ' ': 125 | conn = self.getConn(Path) 126 | cu = self.getCursor(conn) 127 | cu.execute(sql) 128 | conn.commit() 129 | cu.close() 130 | conn.close() 131 | log.info('数据库中数据删除成功') 132 | else: 133 | log.info('sql为空') 134 | 135 | def init_ippool(self, path=None): 136 | """ 137 | 初始化IP池表 138 | :return: 139 | """ 140 | createIpTableSql = """CREATE TABLE 'ips'( 141 | 'id' INTEGER PRIMARY KEY AUTOINCREMENT, 142 | 'http' varchar (10), 143 | 'ip' varchar (30), 144 | 'port' varchar (10), 145 | 'availible' int(2) 146 | )""" 147 | self.createTable(sql=createIpTableSql, path=None) 148 | 149 | def insertIntoIpTable(self, data, path=None): 150 | """ 151 | 将数据插入IP池的数据库 152 | :param data: 153 | :param path: 154 | :return: 155 | """ 156 | inserSql = """INSERT INTO 'ips' values (?,?,?,?,?)""" 157 | self.insertData(d=data, path=path, sql=inserSql) 158 | 159 | def updateTable(self, sql, path): 160 | """ 161 | 更新数据 162 | :return: 163 | """ 164 | if sql is not None and sql != ' ': 165 | conn = self.getConn(Path) 166 | cu = self.getCursor(conn) 167 | cu.execute(sql) 168 | conn.commit() 169 | cu.close() 170 | conn.close() 171 | log.info('数据库中数据更新成功') 172 | else: 173 | log.info('sql为空') 174 | 175 | def deleteFromIpTable(self, ids, path=None): 176 | """ 177 | 从ips表中删除数据 178 | :param ids: 179 | :param path: 180 | :return: 181 | """ 182 | deleteSql = """DELETE FROM 'ips' where id in {}""".format(ids) 183 | self.deleteData(sql=deleteSql) 184 | 185 | def init_shoes(self): 186 | """初始化鞋子表""" 187 | createTableSql = """CREATE TABLE 'shoes'( 188 | 'id' INTEGER PRIMARY KEY AUTOINCREMENT, 189 | 'shoename' varchar (30), 190 | 'shoeColor' varchar (30), 191 | 'shoeImageUrl' varchar (100), 192 | 'shoeImage' varchar(100), 193 | 'shoeStyleCode' varchar (50), 194 | 'shoeSelectMethod' varchar (20), 195 | 'shoePrice' varchar (10), 196 | 'shoeSize' varchar (100), 197 | 'shoePublishTime' varchar (100), 198 | 'shoeCountry' varchar(10) 199 | )""" 200 | self.createTable(path=None, sql=createTableSql) 201 | 202 | def updateShoesTable(self, data): 203 | """ 204 | 对鞋子表进行更新 205 | :param data: 206 | :return: 207 | """ 208 | log.info('更新的鞋子数据插入中') 209 | insertSql = """INSERT INTO shoes values (?,?,?,?,?,?,?,?,?,?,?)""" 210 | insertData = [] 211 | # 把传进来的字典数据 转成插入数据库的数据tulble 212 | for item in data: 213 | dataturple = ( 214 | item['id'], 215 | item['shoeName'], 216 | item['shoeColor'], 217 | item['shoeImageUrl'], 218 | item['shoeImage'], 219 | item['shoeStyleCode'], 220 | item['shoeSelectMethod'], 221 | item['shoePrice'], 222 | item['shoeSize'], 223 | item['shoePublishTime'], 224 | item['shoeCountry'] 225 | ) 226 | insertData.append(dataturple) 227 | self.insertData(sql=insertSql, d=insertData, path=None) 228 | log.info('鞋子的最新数据插入成功') 229 | 230 | 231 | if __name__ == '__main__': 232 | db = db() 233 | # db.dropTable(table='shoes', path=None) 234 | db.init_shoes() 235 | # db.dropTable(table='shoes') 236 | # db.dropTable(table='update') 237 | # db.init_shoes() 238 | # createTableSql = """CREATE TABLE 'update'( 239 | # 'id' INTEGER PRIMARY KEY AUTOINCREMENT, 240 | # 'shoename' varchar (30), 241 | # 'shoeColor' varchar (30), 242 | # 'shoeImageUrl' varchar (100), 243 | # 'shoeImage' varchar(100), 244 | # 'shoeStyleCode' varchar (50), 245 | # 'shoeSelectMethod' varchar (20), 246 | # 'shoePrice' varchar (10), 247 | # 'shoeSize' varchar (100), 248 | # 'shoePublishTime' varchar (100), 249 | # 'shoeCountry' varchar(10) 250 | # )""" 251 | # db.createTable(c=None, sql= createTableSql) 252 | # db.init() 253 | # insertSql = """INSERT INTO shoes values (?,?,?,?,?,?,?,?,?)""" 254 | # insertData = [ 255 | # ( 256 | # 1, 'shoeName', '1asd/asd/asd', 'https://23123123', 'abc-123123', 257 | # 'leo', 258 | # '1299', 259 | # '1,2,3,4,5,6,7', 260 | # '2019-2-19 9:00' 261 | # ) 262 | # ] 263 | # db.insertData(sql=insertSql, d=insertData) 264 | # 265 | # fetchSql = """SELECT * FROM shoes""" 266 | # data = db.fetchData(sql=fetchSql, c=None) 267 | # log.info(data) 268 | -------------------------------------------------------------------------------- /SnrksMonitor/appspider.py: -------------------------------------------------------------------------------- 1 | """ 2 | @auther:EAST 3 | crawl data from app 4 | 获取全部鞋子 5 | https://api.nike.com/snkrs/content/v1/?&country=CN&language=zh-Hans&offset=0&orderBy=lastUpdated 6 | 获取entry 7 | https://api.nike.com/launch/entries/v2 8 | Authorization: 9 | 获取特定id鞋子 10 | https://api.nike.com/launch/launch_views/v2?filter=productId("productid") 11 | """ 12 | import json 13 | import random 14 | import yaml 15 | import time 16 | from SnrksMonitor.log import Logger 17 | from SnrksMonitor.db import db 18 | import requests 19 | import requests.adapters 20 | import traceback 21 | 22 | log = Logger().log() 23 | 24 | 25 | class AppSpiders: 26 | def __init__(self): 27 | self.url = { 28 | 'cn': 'https://api.nike.com/snkrs/content/v1/?&country=CN&language=zh-Hans&offset=0&orderBy=published', 29 | 'de': 'https://api.nike.com/snkrs/content/v1/?country=DE&language=de&offset=0&orderBy=published', 30 | 'us': 'https://api.nike.com/snkrs/content/v1/?country=US&language=en&offset=0&orderBy=published', 31 | 'jp': 'https://api.nike.com/snkrs/content/v1/?country=JP&language=ja&offset=0&orderBy=published' 32 | } 33 | 34 | self.entry = 'https://api.nike.com/launch/entries/v2' 35 | useragent = random.choice(self.readyaml()['User_Agents']) 36 | # auth = self.readyaml()['auth'] 37 | self.headers = { 38 | 'User_Agents': useragent 39 | # 'Authorization': auth 40 | } 41 | self.db = db() 42 | self.country = ['cn', 'us', 'jp'] 43 | requests.adapters.DEFAULT_RETRIES = 10 44 | 45 | def readyaml(self): 46 | # read config from yaml document 47 | file = './config.yaml' 48 | try: 49 | f = open(file, 'r', encoding='UTF-8') 50 | global configdata 51 | configdata = yaml.load(f, Loader=yaml.FullLoader) 52 | except IOError: 53 | # logging.log('open config failed') 54 | print('open config failed') 55 | return configdata 56 | 57 | def spiderDate(self, country): 58 | """ 59 | 通过snrks的首页接口获取到首页最新放送的鞋子数据 60 | 名字+颜色 图片 货号 发售方式 价格 库存码数 发售时间 61 | :return: 返回出来一个数组,包含前50条的鞋子数据 62 | """ 63 | header = { 64 | 'User-Agent': random.choice(self.readyaml()['User_Agents']) 65 | } 66 | proxy = { 67 | } 68 | log.info('最新的数据获取中...') 69 | url = self.url[country] 70 | global shoes 71 | try: 72 | responce = requests.get(url, headers=header) 73 | responceJson = json.loads(responce.text) 74 | shoes = responceJson['threads'] 75 | except Exception as e: 76 | ex = traceback.format_exc() 77 | isSuccess = False 78 | failedNum = 1 79 | while isSuccess == False: 80 | log.info('获取{}接口失败,正在重试第{}次......'.format(country, failedNum)) 81 | log.debug('以下为详细错误:{}'.format(ex)) 82 | responce = requests.get(url, headers=self.headers) 83 | responceJson = json.loads(responce.text) 84 | if 'threads' in responceJson.keys(): 85 | shoes = responceJson['threads'] 86 | log.info('重试成功,正在恢复') 87 | isSuccess = True 88 | elif failedNum == 60: 89 | shoes = [] 90 | break 91 | else: 92 | failedNum += 1 93 | shoesData = [] 94 | for shoe in shoes: 95 | """ 从接口中获取到一双鞋子的数据 包括pass """ 96 | product = shoe['product'] 97 | shoeStyle = product['style'] 98 | if shoeStyle == '999999': 99 | shoeDict = { 100 | 'id': None, 101 | 'shoeName': shoe['name'], 102 | 'shoeImageUrl': shoe['imageUrl'], 103 | 'shoeImage': None, 104 | 'shoeColor': '', 105 | 'shoeStyleCode': '', 106 | 'shoeSelectMethod': '', 107 | 'shoePrice': '', 108 | 'shoeSize': '', 109 | 'shoePublishTime': '', 110 | 'shoeCountry': country, 111 | 'shoeUpdateTime': '' 112 | } 113 | else: 114 | if shoe['name'] is '': 115 | shoe_name = shoe['subtitle'] 116 | else: 117 | shoe_name = shoe['name'] 118 | shoeSize = '' 119 | for sku in product['skus']: 120 | shoeSize = '{}|{}'.format(shoeSize, sku['localizedSize']) 121 | try: 122 | selector = product['selectionEngine'] 123 | except KeyError: 124 | selector = None 125 | t = product['startSellDate'][:19].replace('T', ' ') 126 | shoeTime = self.changeTime(t=t, c=country) 127 | shoeDict = { 128 | 'id': None, 129 | 'shoeName': shoe_name, 130 | 'shoeColor': product['colorDescription'], 131 | 'shoeImageUrl': product['imageUrl'], 132 | 'shoeImage': None, 133 | 'shoeStyleCode': "{}-{}".format(product['style'], product['colorCode']), 134 | 'shoeSelectMethod': selector, 135 | 'shoePrice': product['price']['msrp'], 136 | 'shoeSize': shoeSize, 137 | 'shoePublishTime': shoeTime, 138 | 'shoeCountry': country, 139 | 'shoeUpdateTime': shoe['lastUpdatedTime'] 140 | } 141 | shoesData.append(shoeDict) 142 | log.info('最新的数据获取完成') 143 | return shoesData 144 | 145 | def getNewShoesData(self): 146 | """ 147 | 整合四个区的获取到的新数据 148 | :return: 149 | """ 150 | allCountrtyShoesData = [] 151 | for country in self.country: 152 | data = self.spiderDate(country=country) 153 | allCountrtyShoesData = data + allCountrtyShoesData 154 | return allCountrtyShoesData 155 | 156 | def changeTime(self, t, c): 157 | """ 158 | 返回根据不同区转换后的时间 159 | :param t: 时间 160 | :param c: 国家 161 | :return: 162 | """ 163 | timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S") 164 | timestamp = int(time.mktime(timeArray)) 165 | global resulttime 166 | if c == 'cn' or 'us': 167 | timestamp_cn = timestamp + 28800 168 | timeArray_cn = time.localtime(timestamp_cn) 169 | resulttime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray_cn) 170 | elif c == 'jp': 171 | timestamp_jp = timestamp + 32400 172 | timeArray_jp = time.localtime(timestamp_jp) 173 | resulttime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray_jp) 174 | elif c == 'de': 175 | timestamp_cn = timestamp + 21600 176 | timeArray_cn = time.localtime(timestamp_cn) 177 | resulttime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray_cn) 178 | return resulttime 179 | 180 | def updateCheck(self, data): 181 | """ 182 | 用来检查是否有数据更新 183 | :param data: 传入需要进行对比的数据 184 | :return: 返回一个更新的数组和是否更新,数组中存的是鞋子的货号 185 | """ 186 | log.info('数据更新确认中...') 187 | fetchSql = """SELECT shoeStyleCode,shoename,shoeCountry FROM shoes""" 188 | OldData = self.db.fetchData(sql=fetchSql, c=None) 189 | if len(OldData) == 0: 190 | self.db.updateShoesTable(data=data) 191 | message = { 192 | 'isUpdate': False, 193 | 'data': 'no data' 194 | } 195 | else: 196 | CodeData_cn, NameData_cn = self.getCountryData(country='cn') 197 | CodeData_us, NameData_us = self.getCountryData(country='us') 198 | CodeData_de, NameData_de = self.getCountryData(country='de') 199 | CodeData_jp, NameData_jp = self.getCountryData(country='jp') 200 | isUpdate = False 201 | updateData = [] 202 | # 获取到的新数据按照国区分别进行更新检查 203 | for newdata in data: 204 | if newdata['shoeCountry'] == 'cn': 205 | if newdata['shoeStyleCode'] not in CodeData_cn or newdata['shoeName'] not in NameData_cn: 206 | updateData.append(newdata) 207 | # 把更新的鞋子的图片下载到本地并把url改为本地url 208 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'], 209 | filename=newdata['shoeStyleCode']) 210 | newdata['id'] = None 211 | isUpdate = True 212 | else: 213 | # 判断鞋子的last更新时间是否比存在数据库中的更新时间大,以下三国一样的 214 | pass 215 | elif newdata['shoeCountry'] == 'us': 216 | if newdata['shoeStyleCode'] not in CodeData_us or newdata['shoeName'] not in NameData_us: 217 | updateData.append(newdata) 218 | # 把更新的鞋子的图片下载到本地并把url改为本地url 219 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'], 220 | filename=newdata['shoeStyleCode']) 221 | newdata['id'] = None 222 | isUpdate = True 223 | elif newdata['shoeCountry'] == 'de': 224 | if newdata['shoeStyleCode'] not in CodeData_de or newdata['shoeName'] not in NameData_de: 225 | updateData.append(newdata) 226 | # 把更新的鞋子的图片下载到本地并把url改为本地url 227 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'], 228 | filename=newdata['shoeStyleCode']) 229 | newdata['id'] = None 230 | isUpdate = True 231 | elif newdata['shoeCountry'] == 'jp': 232 | if newdata['shoeStyleCode'] not in CodeData_jp or newdata['shoeName'] not in NameData_jp: 233 | updateData.append(newdata) 234 | # 把更新的鞋子的图片下载到本地并把url改为本地url 235 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'], 236 | filename=newdata['shoeStyleCode']) 237 | newdata['id'] = None 238 | isUpdate = True 239 | message = { 240 | 'isUpdate': isUpdate, 241 | 'data': updateData 242 | } 243 | log.info('数据更新确认完成') 244 | return message 245 | 246 | def getCountryData(self, country): 247 | """ 248 | 用于获取数据库中特定国家的数据 249 | :param country: 250 | :return: 251 | """ 252 | fetchsql = """SELECT shoeStyleCode,shoename FROM shoes where shoeCountry ='{}' """.format(country) 253 | countryData = self.db.fetchData(sql=fetchsql, c=None) 254 | CodeData = [] 255 | NameData = [] 256 | for data in countryData: 257 | CodeData.append(data[0]) 258 | NameData.append(data[1]) 259 | return CodeData, NameData 260 | 261 | def insertToDb(self, data): 262 | log.info('向更新表中插入数据中...') 263 | insertSql = """INSERT INTO "update" values (?,?,?,?,?,?,?,?,?,?,?)""" 264 | insertData = [] 265 | for item in data: 266 | dataturple = ( 267 | item['id'], 268 | item['shoeName'], 269 | item['shoeColor'], 270 | item['shoeImageUrl'], 271 | item['shoeImage'], 272 | item['shoeStyleCode'], 273 | item['shoeSelectMethod'], 274 | item['shoePrice'], 275 | item['shoeSize'], 276 | item['shoePublishTime'], 277 | item['shoeCountry'] 278 | ) 279 | insertData.append(dataturple) 280 | self.db.insertData(sql=insertSql, d=insertData, path=None) 281 | log.info('向更新表中插入数据结束') 282 | 283 | def initDB(self): 284 | deleteSql = """DELETE FROM "update" where id < 100000""" 285 | self.db.deleteData(sql=deleteSql) 286 | log.info('初始化更新表完成...') 287 | 288 | def download_imgage(self, url, filename): 289 | """ 290 | 用于下载图片,并返回图片url 291 | :param url: 图片的网络地址 292 | :param filename: 需要存放在本地的图片名字 293 | :return: 返回本地的图片地址 294 | """ 295 | log.debug('start download image:%s' % filename) 296 | fileurl = './img/{}.jpg'.format(filename) 297 | try: 298 | 299 | r = requests.get(url=url) 300 | with open(fileurl, 'wb') as f: 301 | f.write(r.content) 302 | f.close() 303 | except Exception: 304 | log.error('failed to download picture') 305 | with open('./img/go.jpg', 'wb') as fa: 306 | content = fa.read() 307 | with open(fileurl, 'wb') as fb: 308 | fb.write(content) 309 | fb.close() 310 | fa.close() 311 | return fileurl 312 | 313 | 314 | if __name__ == '__main__': 315 | shoesdata = AppSpiders() # 实例化鞋子爬虫的类 316 | shoesdata.initDB() # 初始化 317 | NewData = shoesdata.spiderDate() 318 | result = shoesdata.updateCheck(data=NewData) 319 | print(result) 320 | if result['isUpdate'] is True: 321 | updateData = result['data'] 322 | shoesdata.insertToDb(data=updateData) 323 | -------------------------------------------------------------------------------- /.idea/dbnavigator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 |
139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 |
170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 |
212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 |
226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 |
257 |
258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 |
654 | 655 | 656 | 657 | --------------------------------------------------------------------------------