├── SnrksBot
└── demo.py
├── SnrksMonitor
├── img
│ └── go.jpg
├── SnrksDataBase.db
├── main.py
├── new_ios_push.py
├── wechatnotice.py
├── run.py
├── log.py
├── run_spider.py
├── config.yaml
├── webspider.py
├── db.py
└── appspider.py
├── dependency.txt
├── SnrksDataBase.db
├── chromedriver.exe
├── .idea
├── encodings.xml
├── sqldialects.xml
├── vcs.xml
├── dictionaries
│ ├── EAST.xml
│ └── xiaodongyan.xml
├── modules.xml
├── misc.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── snrksMonitor.iml
├── dataSources.xml
├── codeStyles
│ └── Project.xml
└── dbnavigator.xml
├── README.md
├── utils.py
├── demo.py
├── IPPoolForSnrks
├── runippool.py
├── spiders.py
├── validate.py
├── CheckFromDb.py
└── config.yaml
├── log.py
└── config.yaml
/SnrksBot/demo.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/SnrksMonitor/img/go.jpg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dependency.txt:
--------------------------------------------------------------------------------
1 | requests==2.21.2
2 | yaml
3 | lxml==4.3.2
4 | itchat==1.3.10
--------------------------------------------------------------------------------
/SnrksDataBase.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastcn/snrksMonitor/HEAD/SnrksDataBase.db
--------------------------------------------------------------------------------
/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastcn/snrksMonitor/HEAD/chromedriver.exe
--------------------------------------------------------------------------------
/SnrksMonitor/SnrksDataBase.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastcn/snrksMonitor/HEAD/SnrksMonitor/SnrksDataBase.db
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/SnrksMonitor/main.py:
--------------------------------------------------------------------------------
1 | """
2 | east
3 | """
4 | from SnrksMonitor.run import run
5 |
6 |
7 | if __name__ == '__main__':
8 | """
9 | 启动整个脚本
10 | """
11 | run()
12 |
--------------------------------------------------------------------------------
/.idea/sqldialects.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/dictionaries/EAST.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | configdata
5 | groupid
6 | readyaml
7 | useragent
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # snrksMonitor
2 | 主要功能模块为:监控snkrs是否有上新(后续会加入对库存量的更新),为访问nike接口的IP池建立,以及后续有可能更新的抢购功能。
3 |
4 | 监控模块主要流程为:
5 | 爬取四个国区的数据-保存到数据库-再次爬取-进行比较-得出结果-进入休眠
6 |
7 | 数据库采用的是sqlite,因此也一并上传了。
8 |
9 | # 2019-5-21更新
10 | 增加了一种推送方式,bark。目前只支持在IOS上使用。
11 | 在APPStore中下载 barK APP,获取自己设备的token。
12 | 在SnrksMonitor/new_ios_push.py文件的PushToIos.Push_list中加入自己的token即可。
13 |
--------------------------------------------------------------------------------
/.idea/dictionaries/xiaodongyan.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | chatroomid
5 | chatroomnickname
6 | fileindex
7 | fileurl
8 | imgage
9 | loggerlevel
10 | loglevel
11 | maxtimeout
12 | monitortime
13 | sleeptime
14 | snrks
15 | yaml
16 |
17 |
18 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | 工具类
3 | """
4 | import yaml
5 | import traceback
6 |
7 | config_url = '../config.yaml'
8 |
9 |
10 | class utils:
11 | def __init__(self):
12 | pass
13 |
14 | def readconfig(self):
15 | """
16 | 读取配置
17 | :return:配置字典
18 | """
19 | try:
20 | f = open(config_url, 'r', encoding='UTF-8')
21 | global configdata
22 | configdict = yaml.load(f)
23 | except IOError as e:
24 | # logging.log('open config failed')
25 | configdict = {}
26 | print('open config failed\n {}'.format(traceback.format_exc()))
27 | return configdict
28 |
--------------------------------------------------------------------------------
/.idea/snrksMonitor.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/SnrksMonitor/new_ios_push.py:
--------------------------------------------------------------------------------
1 | """
2 | author:hefeng
3 | function:push message to ios users with the help of BARK
4 | """
5 | import requests
6 | import yaml
7 | from SnrksMonitor.log import Logger
8 |
9 | log = Logger().log()
10 |
11 |
12 | class PushToIos:
13 | def __init__(self):
14 | self.push_url = "https://api.day.app/"
15 | self.push_list = [
16 | {
17 | "key": '123',
18 | "name": "east"
19 | }
20 | ]
21 |
22 | def push(self, message):
23 | for member in self.push_list:
24 | msg = f"{self.push_url}{member['key']}/{message}"
25 | requests.get(msg)
26 | log.info(f"推送成功--{member['name']}/{msg}")
27 |
28 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import sqlite3
4 |
5 | def check_start_selldate():
6 | url = 'https://api.nike.com/snkrs/content/v1/?country=JP&language=ja&offset=0&orderBy=published'
7 | r = json.loads(requests.get(url).text)
8 | for item in r["threads"]:
9 | try:
10 | print('{},{}'.format(item['name'],item['product']['startSellDate']))
11 | except:
12 | print(item["product"]["style"])
13 |
14 |
15 | def dbRead():
16 | db = sqlite3.connect('./SnrksDataBase.db')
17 | cusor = db.cursor()
18 | sql = """select shoeStyleCode, shoePublishTime from shoes"""
19 | datas = cusor.execute(sql)
20 | return datas
21 |
22 |
23 | def checl(news):
24 | datas = dbRead()
25 | shoeCode = []
26 | shoePublishTime = []
27 | for data in datas:
28 | shoeCode.append(data[0])
29 | shoePublishTime.append(data[1])
30 | for new in news:
31 | pass
32 |
33 |
34 | if __name__ == "__main__":
35 | check_start_selldate()
--------------------------------------------------------------------------------
/IPPoolForSnrks/runippool.py:
--------------------------------------------------------------------------------
1 | """
2 | 运行ip池
3 | """
4 | import time
5 | import traceback
6 | from IPPoolForSnrks.spiders import proxyspider
7 | from IPPoolForSnrks.validate import validate
8 | from IPPoolForSnrks.CheckFromDb import CheckFromDb
9 | from log import Logger
10 |
11 | log = Logger().log()
12 | check = CheckFromDb()
13 |
14 | def run_add_pool():
15 | """
16 | 运行增加IP POOL
17 | :return:
18 | """
19 | spider = proxyspider()
20 | spideData = spider.spiderFromQuick() + spider.spiderFromXici()
21 | newIPS = check.if_update(spideData)
22 | available_ip, unavailable_ip = validate ().validate (ips=newIPS ['data'])
23 | check.inserte_into_db(list=available_ip+unavailable_ip)
24 |
25 | """这边逻辑需要修改,需要把所有的IP都记录一遍,并做标记是否有效,因此需要在数据库中增加一个标记字段"""
26 |
27 | def run_check_pool():
28 | """
29 | 运行检查IP POOL
30 | :return:
31 | """
32 | sql = """SELECT * FROM ips where 'availible' in (1,2,3)"""
33 | ip_list = check.read_from_db(sql=sql)
34 | available_ip, unavailable_ip = validate().validate(ips=ip_list)
35 | check.delete_from_db(unavailable_ip)
36 |
37 | if __name__ == '__main__':
38 | start = True
39 | while start:
40 | try:
41 | log.info('运行增加ip......')
42 | run_add_pool()
43 | log.info('进入休眠30s')
44 | time.sleep(10)
45 | log.info('运行检查数据库ip....')
46 | run_check_pool()
47 | log.info('进入休眠20s')
48 | time.sleep(10)
49 | except Exception as e:
50 | log.info('error:{}'.format(traceback.format_exc()))
51 |
--------------------------------------------------------------------------------
/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sqlite.xerial
6 | true
7 | org.sqlite.JDBC
8 | jdbc:sqlite:D:\python_study\snrksMonitor\SnrksMonitor\SnrksDataBase.db
9 |
10 |
11 |
12 |
13 |
14 | sqlite.xerial
15 | true
16 | org.sqlite.JDBC
17 | jdbc:sqlite:D:\python_study\snrksMonitor\SnrksDataBase.db
18 |
19 |
20 |
21 |
22 |
23 | file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.25.1/license.txt
24 |
25 |
26 | file://$APPLICATION_CONFIG_DIR$/jdbc-drivers/Xerial SQLiteJDBC/3.25.1/sqlite-jdbc-3.25.1.jar
27 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/SnrksMonitor/wechatnotice.py:
--------------------------------------------------------------------------------
1 | """
2 | east
3 | """
4 | import itchat
5 | import os
6 | from SnrksMonitor.log import Logger
7 |
8 | log = Logger().log()
9 |
10 |
11 | class wechat():
12 | def __init__(self):
13 | pass
14 |
15 | def login(self):
16 | itchat.auto_login(hotReload=True)
17 |
18 | def getFriends(self):
19 | friends = itchat.get_friends()
20 | log.debug(friends)
21 | return friends
22 |
23 | def sendMessage(self, msg, user):
24 | # 在群聊中发送推送并且删除图片
25 | if type(msg) == list:
26 | for item in msg:
27 | log.info('开始向群聊中发送消息')
28 | if item['shoePublishTime'] is None:
29 | message = '国家:[{}] [{}] '.format (item ['shoeCountry'], item ['shoeName'])
30 | else:
31 | message ="***************************\n[{}]\n国区:[{}]\n发售:[{}]\n货号:[{}]\n价格:[{}]\n抽签:[{}] \n库存:[{}]\n***************************".format(item['shoeName'],
32 | item['shoeCountry'],
33 | item['shoePublishTime'],
34 | item['shoeStyleCode'],
35 | item['shoePrice'],
36 | item['shoeSelectMethod'],
37 | item['shoeSize'])
38 | itchat.send_msg(msg=message, toUserName=user)
39 | itchat.send_image(fileDir=item['shoeImage'], toUserName=user)
40 | log.info('推送完成')
41 | # try: # 删除图片
42 | # log.info('delete image:%s' % item['shoeImageUrl'])
43 | # os.remove(path=item['shoeImageUrl'])
44 | # except IOError:
45 | # log.error('delete failed')
46 | elif type(msg) == str:
47 | log.info('请传入list')
48 | elif type(msg) == dict:
49 | log.info ('请传入list')
50 | else:
51 | itchat.send_msg(msg=msg, toUserName=user)
52 | log.info('message has been send, waiting for next time to start')
53 |
54 | def getChatRoomId(self, nickname):
55 | # 获取群聊的username
56 | groupContent = itchat.get_chatrooms()
57 | # log.debug(groupContent)
58 | chatroomid = ''
59 | for item in groupContent:
60 | if item['NickName'] == nickname:
61 | chatroomid = item['UserName']
62 | log.info('get chat room “%s” id successfully的ID:%s' % (nickname, chatroomid))
63 | return chatroomid
64 |
65 | def init(self,groupname):
66 | self.login()
67 | groupid = self.getChatRoomId(nickname=groupname)
68 | return groupid
69 |
70 |
--------------------------------------------------------------------------------
/SnrksMonitor/run.py:
--------------------------------------------------------------------------------
1 | """
2 | 运行脚本
3 | """
4 |
5 | import SnrksMonitor.wechatnotice as notice
6 | import time
7 | import yaml
8 | from SnrksMonitor.log import Logger
9 | from SnrksMonitor.appspider import AppSpiders
10 | from SnrksMonitor.db import db as database
11 |
12 | log = Logger().log()
13 |
14 |
15 | class Utils:
16 | @staticmethod
17 | def readyaml():
18 | # read config from yaml document
19 | file = './config.yaml'
20 | try:
21 | f = open(file, 'r', encoding='UTF-8')
22 | global configdata
23 | configdata = yaml.load(f)
24 | except IOError:
25 | # logging.log('open config failed')
26 | print('open config failed')
27 | return configdata
28 |
29 |
30 | def run():
31 | log.info('East SnrksMonitor is starting')
32 | # 从配置中获取超时,爬虫间隔,微信群组名字
33 | u = Utils().readyaml()
34 | # timeout = u ['maxtimeout']
35 | sleeptime = u['monitortime']
36 | chatroomnickname = u['chatroomnickname']
37 |
38 | # 登录微信 并返回群组id
39 | # push = notice.wechat ()
40 | # chatroomid = push.init (groupname=chatroomnickname)
41 | num = 1
42 |
43 | # 实例化爬虫类
44 | shoesdata = AppSpiders()
45 | db = database()
46 | while True:
47 | log.info('第{}次开始'.format(num))
48 | NewData = shoesdata.getNewShoesData() # 获取到最新的数据
49 | result = shoesdata.updateCheck(data=NewData) # 获取到是否有更新和更新数据
50 | log.info('第{}次是否有更新:{}'.format(num, result['isUpdate']))
51 | # 如果有更新则对更新表进行操作,并发送推送
52 | if result['isUpdate'] is True:
53 | # 初始化鞋子,删除更新表
54 | shoesdata.initDB() # 初始化
55 | # 对更新表进行操作
56 | updateData = result['data']
57 | shoesdata.insertToDb(data=updateData)
58 | # 给微信群发送推送
59 | updateShoesCodeList = []
60 | for updatedata in updateData:
61 | updateShoesCodeList.append(updatedata['shoeStyleCode'])
62 | log.info('第{}次更新的货号:{}'.format(num, updateShoesCodeList))
63 | # push.sendMessage(user=chatroomid, msg=updateData)
64 | # 把有更新的数据插入鞋子表
65 | db.updateShoesTable(data=updateData)
66 | else:
67 | # 是否需要操作?
68 | log.info('第{}次没有更新,进入暂停'.format(num))
69 | log.info('第{}次结束'.format(num))
70 | num += 1
71 | time.sleep(sleeptime) # 暂停时间
72 |
--------------------------------------------------------------------------------
/.idea/codeStyles/Project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/IPPoolForSnrks/spiders.py:
--------------------------------------------------------------------------------
1 | """
2 | 从国内几个免费的代理网站上爬取免费代理
3 | """
4 | import random
5 | import requests
6 | from lxml import etree
7 | from utils import utils
8 |
9 |
10 | class proxyspider:
11 | def __init__(self):
12 | self.config = utils().readconfig()
13 | self.headers = {
14 | 'User-Agent': random.choice(self.config['User_Agents'])
15 | }
16 | self.page = 3
17 |
18 | def spiderFromXici(self, url='https://www.xicidaili.com/nn/'):
19 | IPPool = []
20 | for i in range(self.page):
21 | # a = config()
22 | Url = url + str(i)
23 | r = requests.get(url=Url, headers=self.headers)
24 | selector = etree.HTML(r.text)
25 | tr = selector.xpath('//tr') # 选取页面中的所有tr标签
26 | for t in range(len(tr)):
27 | if t >= 1:
28 | ippool = {
29 | 'ip': '',
30 | 'port': '',
31 | 'http': ''
32 | }
33 | ippool['ip'] = tr[t].xpath('./td[2]/text()')[0]
34 | ippool['port'] = tr[t].xpath('./td[3]/text()')[0]
35 | temp = tr[t].xpath('./td[6]/text()')[0]
36 | if temp == 'HTTP':
37 | ippool['http'] = 'http'
38 | elif temp == 'HTTPS':
39 | ippool['http'] = 'https'
40 | IPPool.append(ippool)
41 | return IPPool
42 |
43 | def spiderFromQuick(self):
44 | IPPool = []
45 | for i in range(self.page):
46 | url = 'https://www.kuaidaili.com/free/inha/{}/'.format(str(i + 1))
47 | r = requests.get(url=url, headers=self.headers)
48 | selector = etree.HTML(r.text)
49 | tr = selector.xpath('//tr') # 选取页面中的所有tr标签
50 | for t in range(len(tr)):
51 | if t >= 1:
52 | ippool = {
53 | 'ip': '',
54 | 'port': '',
55 | 'http': ''
56 | }
57 | ippool['ip'] = tr[t].xpath('./td[@data-title="IP"]/text()')[0]
58 | ippool['port'] = tr[t].xpath('./td[@data-title="PORT"]/text()')[0]
59 | temp = tr[t].xpath('./td[@data-title="类型"]/text()')[0]
60 | if temp == 'HTTP':
61 | ippool['http'] = 'http'
62 | elif temp == 'HTTPS':
63 | ippool['http'] = 'https'
64 | IPPool.append(ippool)
65 | return IPPool
66 |
67 |
68 | if __name__ == '__main__':
69 | for ip in proxyspider().spiderFromQuick():
70 | print(ip)
71 |
--------------------------------------------------------------------------------
/log.py:
--------------------------------------------------------------------------------
1 | """
2 | east
3 | """
4 | import yaml
5 | import logging
6 | import datetime
7 |
8 |
9 | class Logger:
10 | """自定义封装logging模块"""
11 |
12 | def __init__(self, default_level=logging.INFO):
13 | self.logger = logging.getLogger('__name__')
14 | # 初始化一个logger
15 | self.default_level = default_level
16 | logger_main_level, logger_file_level, logger_console_level = self.config()
17 | self.logger.setLevel(logger_main_level)
18 | fomatter = logging.Formatter(
19 | '[%(asctime)s] %(filename)s line:%(lineno)d [%(levelname)s]%(message)s')
20 | # 初始化输出到日志文件的handle
21 | file_name = './log/{}log.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d'))
22 | file_log = logging.FileHandler(filename=file_name, encoding='utf-8')
23 | file_log.setLevel(logger_file_level)
24 | file_log.setFormatter(fomatter)
25 | # 初始化增加输出到控制台的handle
26 | console_log = logging.StreamHandler()
27 | console_log.setLevel(logger_console_level)
28 | console_log.setFormatter(fomatter)
29 |
30 | if self.logger.hasHandlers() is False:
31 | self.logger.addHandler(file_log)
32 | self.logger.addHandler(console_log)
33 | # self.logger.removeHandler(file_log)
34 | # self.logger.removeHandler(console_log)
35 | file_log.close()
36 | console_log.close()
37 |
38 | def config(self):
39 | """
40 | :return: 返回配置中读取的level
41 | """
42 | try:
43 | with open('./config.yaml', 'r', encoding='utf-8') as f:
44 | global config_data
45 | config_data = yaml.load(f)
46 | except IOError:
47 | self.logger.error('open config file failed')
48 | case1 = config_data['logConfig']['testLogLevel']['mainLogLevel']
49 | case2 = config_data['logConfig']['testLogLevel']['fileLogLevel']
50 | case3 = config_data['logConfig']['testLogLevel']['consoleLogLevel']
51 | logger_main_level = self.switch(case=case1)
52 | logger_file_level = self.switch(case=case2)
53 | logger_console_level = self.switch(case=case3)
54 | return logger_main_level, logger_file_level, logger_console_level
55 |
56 | def switch(self, case):
57 | """
58 | :param case: 传入需要做判断的level
59 | :return: 返回最终的level
60 | """
61 | if case == 'DEBUG':
62 | result = logging.DEBUG
63 | elif case == 'INFO':
64 | result = logging.DEBUG
65 | elif case == 'ERROR':
66 | result = logging.ERROR
67 | elif case == 'CRITICAL':
68 | result = logging.CRITICAL
69 | else:
70 | result = self.logger.setLevel(self.default_level)
71 | return result
72 |
73 | def log(self):
74 | return self.logger
75 |
--------------------------------------------------------------------------------
/SnrksMonitor/log.py:
--------------------------------------------------------------------------------
1 | """
2 | east
3 | """
4 | import yaml
5 | import logging
6 | import datetime
7 |
8 |
9 | class Logger:
10 | """自定义封装logging模块"""
11 |
12 | def __init__(self, default_level=logging.INFO):
13 | self.logger = logging.getLogger('__name__')
14 | # 初始化一个logger
15 | self.default_level = default_level
16 | logger_main_level, logger_file_level, logger_console_level = self.config()
17 | self.logger.setLevel(logger_main_level)
18 | fomatter = logging.Formatter(
19 | '[%(asctime)s] %(filename)s line:%(lineno)d [%(levelname)s]%(message)s')
20 | # 初始化输出到日志文件的handle
21 | file_name = './log/{}log.txt'.format(datetime.datetime.now().strftime('%Y-%m-%d'))
22 | file_log = logging.FileHandler(filename=file_name, encoding='utf-8')
23 | file_log.setLevel(logger_file_level)
24 | file_log.setFormatter(fomatter)
25 | # 初始化增加输出到控制台的handle
26 | console_log = logging.StreamHandler()
27 | console_log.setLevel(logger_console_level)
28 | console_log.setFormatter(fomatter)
29 |
30 | if self.logger.hasHandlers() is False:
31 | self.logger.addHandler(file_log)
32 | self.logger.addHandler(console_log)
33 | # self.logger.removeHandler(file_log)
34 | # self.logger.removeHandler(console_log)
35 | file_log.close()
36 | console_log.close()
37 |
38 | def config(self):
39 | """
40 | :return: 返回配置中读取的level
41 | """
42 | try:
43 | with open('./config.yaml', 'r', encoding='utf-8') as f:
44 | global config_data
45 | config_data = yaml.load(f, Loader=yaml.FullLoader)
46 | except IOError:
47 | self.logger.error('open config file failed')
48 | case1 = config_data['logConfig']['testLogLevel']['mainLogLevel']
49 | case2 = config_data['logConfig']['testLogLevel']['fileLogLevel']
50 | case3 = config_data['logConfig']['testLogLevel']['consoleLogLevel']
51 | logger_main_level = self.switch(case=case1)
52 | logger_file_level = self.switch(case=case2)
53 | logger_console_level = self.switch(case=case3)
54 | return logger_main_level, logger_file_level, logger_console_level
55 |
56 | def switch(self, case):
57 | """
58 | :param case: 传入需要做判断的level
59 | :return: 返回最终的level
60 | """
61 | if case == 'DEBUG':
62 | result = logging.DEBUG
63 | elif case == 'INFO':
64 | result = logging.DEBUG
65 | elif case == 'ERROR':
66 | result = logging.ERROR
67 | elif case == 'CRITICAL':
68 | result = logging.CRITICAL
69 | else:
70 | result = self.logger.setLevel(self.default_level)
71 | return result
72 |
73 | def log(self):
74 | return self.logger
75 |
--------------------------------------------------------------------------------
/SnrksMonitor/run_spider.py:
--------------------------------------------------------------------------------
1 | """
2 | author: hefeng
3 | date: 2019.5.20
4 | function: 运行多线程爬虫
5 | """
6 | from apscheduler.schedulers.background import BlockingScheduler
7 | import threading
8 | from SnrksMonitor.log import Logger
9 | from SnrksMonitor.appspider import AppSpiders
10 | from SnrksMonitor.db import db as database
11 | from SnrksMonitor.new_ios_push import PushToIos
12 |
13 | log = Logger().log()
14 | scheduler = BlockingScheduler()
15 |
16 |
17 | class RunSpider:
18 | def __init__(self):
19 | self.message = "1"
20 | self.spider = AppSpiders()
21 | self.db = database()
22 | self.data = []
23 | self.district = self.spider.readyaml()['country']
24 | self.Push = PushToIos()
25 |
26 | def get_data(self, district):
27 | """
28 | 获取最新的数据
29 | :return:最新的数据
30 | """
31 | log.info("开始爬取最新的数据")
32 | origin_data = self.spider.spiderDate(district)
33 | # print(origin_data)
34 | data = self.spider.updateCheck(origin_data)
35 | flag = data['isUpdate']
36 | if flag is True:
37 | self.data.append(data)
38 | # print(data)
39 | else:
40 | log.info("本次没有更新")
41 |
42 | def insert_db(self):
43 | """
44 | 插入数据库
45 | :return:
46 | """
47 | log.info("重新数据库初始化...")
48 | self.spider.initDB()
49 | for item in self.data:
50 | self.db.updateShoesTable(data=item["data"])
51 |
52 | def push(self):
53 | """
54 | 推送
55 | :return:
56 | """
57 | log.info("推送中...")
58 | if len(self.data) == 0:
59 | self.Push.push('test empty')
60 | else:
61 | for item in self.data:
62 | for shoe_data in item["data"]:
63 | msg_1 = f"[{shoe_data['shoeCountry']}] [{shoe_data['shoeSelectMethod']}] 时间:[{shoe_data['shoePublishTime']}]"
64 | msg_2 = f"{shoe_data['shoeName']} {shoe_data['shoeStyleCode']}"
65 | url_key = f"?url={shoe_data['shoeImageUrl']}"
66 | self.Push.push(message=msg_1+msg_2+url_key)
67 |
68 | def init_data(self):
69 | """
70 | 初始化self中的data数据
71 | :return:
72 | """
73 | self.data = []
74 |
75 |
76 | def run_spider():
77 | run = RunSpider()
78 | spider_thread_pool = []
79 | for d in run.district:
80 | t = threading.Thread(target=run.get_data, args=([d]))
81 | spider_thread_pool.append(t)
82 | for t in spider_thread_pool:
83 | t.start()
84 | for t_j in spider_thread_pool:
85 | t_j.join()
86 | if len(run.data) > 0:
87 | run.insert_db()
88 | run.push()
89 | run.init_data()
90 |
91 |
92 | if __name__ == "__main__":
93 | print('start')
94 | run_spider()
95 | scheduler.add_job(run_spider, "interval", seconds=120, max_instances=5)
96 | scheduler.start()
97 |
--------------------------------------------------------------------------------
/IPPoolForSnrks/validate.py:
--------------------------------------------------------------------------------
1 | """
2 | 用于验证ip代理是否可用
3 | """
4 | from log import Logger
5 | import json
6 | import requests
7 | import datetime
8 | import traceback
9 | from SnrksMonitor.db import db
10 |
11 | log = Logger().log()
12 |
13 |
14 | class validate:
15 | def __init__(self):
16 | self.checkurl = 'http://httpbin.org/get'
17 | self.db = db()
18 |
19 | def validate(self, ips):
20 | """
21 | 验证是什么代理,统计代理数量,插入数据库
22 | :param ips: 代理ip列表
23 | :return: None
24 | """
25 | available_ips = []
26 | unavailable_ips = []
27 | for ip in ips:
28 |
29 | log.info(f'开始验证代理{ip["http"]}:{ip["ip"]}:{ip["port"]}')
30 | ip_port = f'{ip["ip"]}:{ip["port"]}'
31 | proxy = {ip['http']: ip_port}
32 | time = datetime.datetime.now().now().strftime('%Y-%m-%d %H:%M:%S')
33 | try:
34 | r = requests.get(url=self.checkurl, proxies=proxy, timeout=1)
35 | if r.status_code == 200:
36 | result = json.loads(r.text)
37 | ip_proxy = ip['ip']
38 | ip_v = result['origin']
39 | headers = result['headers']
40 | proxy_connection = headers.get('Proxy-Connection', None)
41 | # 判断是否为普通匿名,透明,或者高级匿名,记录普通匿名和高级匿名到数据库
42 | if proxy_connection:
43 | log.info(f"{time} success: {ip['http']}://{ip['ip']}:{ip['port']} ----普通匿名")
44 | ip['availible'] = 2 # 可用
45 | available_ips.append(ip)
46 | elif ',' in ip_v:
47 | log.info(f"{time} success: {ip['http']}://{ip['ip']}:{ip['port']} ----透明")
48 | ip['availible'] = 1 # 可用
49 | unavailable_ips.append(ip)
50 | else:
51 | log.info(f"{time} success: {ip['http']}://{ip['ip']}:{ip['port']} ----高级匿名")
52 | ip['availible'] = 3 # 可用
53 | available_ips.append(ip)
54 |
55 | else:
56 | log.info(f"{time} failed: {ip['http']}://{ip['ip']}:{ip['port']} ----无效代理")
57 | ip['availible'] = 0 # 可用
58 | unavailable_ips.append(ip)
59 | except Exception as e:
60 | log.info('error:' + repr(e))
61 | log.info(f"{time} error: {ip['http']}://{ip['ip']}:{ip['port']} ----无效代理")
62 | ip['availible'] = 0 # 可用
63 | unavailable_ips.append(ip)
64 |
65 | return available_ips, unavailable_ips
66 |
67 | def test_validate(self):
68 | ip_list = [{'ip': '36.26.220.69', 'port': '9999', 'http': 'https'},
69 | {'ip': '116.209.54.75', 'port': '9999', 'http': 'https'},
70 | {'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}]
71 | self.validate(ip_list)
72 |
73 |
74 |
--------------------------------------------------------------------------------
/IPPoolForSnrks/CheckFromDb.py:
--------------------------------------------------------------------------------
1 | """
2 | 验证数据库中的IP是否可用,不可用则删除
3 | """
4 | import traceback
5 | from log import Logger
6 | from SnrksMonitor.db import db
7 |
8 |
9 | log = Logger().log()
10 |
11 |
12 | class CheckFromDb:
13 | def __init__(self):
14 | self.db = db()
15 |
16 | def read_from_db(self,sql=None):
17 | """
18 | 从数据库中读取数据
19 | :return:
20 | """
21 | FetchSql = """SELECT * From 'ips'"""
22 | if sql == None:
23 | fetchSql = FetchSql
24 | else:
25 | fetchSql = sql
26 | db_data = self.db.fetchData(sql=fetchSql,c=None)
27 | ip_list = []
28 | for data in db_data:
29 | ip_dict = {
30 | 'id': data[0],
31 | 'ip': data[2],
32 | 'http': data[1],
33 | 'port': data[3],
34 | 'availible': data[4]
35 | }
36 | ip_list.append(ip_dict)
37 | return ip_list
38 |
39 | def delete_from_db(self,list):
40 | """
41 | 从数据库中删除数据
42 | :return:
43 | """
44 | id_list = []
45 | for ip in list:
46 | id_list.append(ip['id'])
47 | sql = """update ips set 'availible' = {} where 'id' = {} """.format (ip['availible'],ip['id'])
48 | self.db.updateTable(sql=sql,path=None)
49 | ids = tuple(id_list)
50 | self.db.deleteFromIpTable(ids=ids)
51 |
52 | def test_delete_from_db(self):
53 | ip_list = [{'id':1,'ip': '36.26.220.69', 'port': '9999', 'http': 'https'},
54 | {'id':2,'ip': '116.209.54.75', 'port': '9999', 'http': 'https'},
55 | {'id':3,'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}]
56 | self.delete_from_db(list=ip_list)
57 |
58 | def inserte_into_db(self,list):
59 | """
60 | 将ip插入数据库
61 | :param list:
62 | :return:
63 | """
64 | for ip in list:
65 | data = [(None, ip['http'], ip['ip'], ip['port'], ip['availible'])]
66 | log.info ('开始插入数据库')
67 | try:
68 | self.db.insertIntoIpTable (data=data)
69 | except Exception as e:
70 | log.info ('{}'.format (traceback.format_exc ()))
71 | def test_inserte_into_db(self):
72 | ip_list = [{'id': 1, 'ip': '36.26.220.69', 'port': '9999', 'http': 'https'},
73 | {'id': 2, 'ip': '116.209.54.75', 'port': '9999', 'http': 'https'},
74 | {'id': 3, 'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}]
75 | self.inserte_into_db(list=ip_list)
76 |
77 | def if_update(self,list):
78 | """
79 | 判断跟数据库中对比是否更新
80 | :param list:
81 | :return:
82 | """
83 | oldData = self.read_from_db()
84 | newData = list
85 | oldIPs= []
86 | newIPs = []
87 | isUpdate = False
88 | for oldip in oldData:
89 | oldIPs.append(oldip['ip'])
90 |
91 | for newip in newData:
92 | if newip['ip'] not in oldIPs:
93 | newIPs.append(newip)
94 | isUpdate = True
95 | updata_dict = {
96 | 'isupdate' : isUpdate,
97 | 'data': newIPs
98 | }
99 | return updata_dict
100 |
101 | def test_if_update(self):
102 | ip_list = [{'id': 1, 'ip': '36.26.220.691', 'port': '9999', 'http': 'https'},
103 | {'id': 2, 'ip': '116.209.54.752', 'port': '9999', 'http': 'https'},
104 | {'id': 3, 'ip': '116.209.53.242', 'port': '9999', 'http': 'https'}]
105 | a = self.if_update(list=ip_list)
106 | print(a)
107 |
108 | if __name__ == '__main__':
109 | # CheckFromDb().test_delete_from_db()
110 | CheckFromDb().test_if_update()
111 |
--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
1 | url: 'https://www.nike.com/cn/launch/?s=upcoming'
2 |
3 | User_Agents: [
4 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
5 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
6 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
7 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
8 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
9 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
10 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
12 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
13 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
14 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
15 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
16 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
18 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
19 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
20 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
21 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
22 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
23 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
25 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
26 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
29 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
30 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
31 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
32 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
33 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
34 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
35 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
36 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
37 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
38 | "SNKRS/3.9.0 (iPhone; iOS 12.1; Scale/2.00)"
39 | ]
40 |
41 | chatroomnickname: "snrks冲刺群"
42 |
43 | monitortime: 60
44 |
45 | maxtimeout: 30
46 | logConfig:
47 | testLogLevel:
48 | mainLogLevel: INFO
49 | fileLogLevel: INFO
50 | consoleLogLevel: INFO
51 | productLogLevel:
52 | mainLogLevel: INFO
53 | fileLogLevel: INFO
54 | consoleLogLevel: ERROR
55 |
56 | db:
57 | db_path: ../SnrksDataBase.db
58 | table_name: shoes
59 |
--------------------------------------------------------------------------------
/IPPoolForSnrks/config.yaml:
--------------------------------------------------------------------------------
1 | url: 'https://www.nike.com/cn/launch/?s=upcoming'
2 |
3 | User_Agents: [
4 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
5 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
6 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
7 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
8 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
9 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
10 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
12 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
13 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
14 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
15 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
16 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
18 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
19 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
20 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
21 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
22 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
23 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
25 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
26 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
29 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
30 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
31 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
32 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
33 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
34 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
35 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
36 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
37 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
38 | "SNKRS/3.9.0 (iPhone; iOS 12.1; Scale/2.00)"
39 | ]
40 |
41 | chatroomnickname: "好好学习天天向上"
42 |
43 | monitortime: 60
44 |
45 | maxtimeout: 30
46 | logConfig:
47 | testLogLevel:
48 | mainLogLevel: INFO
49 | fileLogLevel: INFO
50 | consoleLogLevel: INFO
51 | productLogLevel:
52 | mainLogLevel: INFO
53 | fileLogLevel: INFO
54 | consoleLogLevel: ERROR
55 |
56 | db:
57 | db_path: ../SnrksDataBase.db
58 | table_name: shoes
59 |
--------------------------------------------------------------------------------
/SnrksMonitor/config.yaml:
--------------------------------------------------------------------------------
1 | url: 'https://www.nike.com/cn/launch/?s=upcoming'
2 |
3 | User_Agents: [
4 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
5 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
6 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
7 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
8 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
9 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
10 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
11 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
12 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
13 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
14 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
15 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
16 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
18 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
19 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
20 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
21 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
22 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
23 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)",
24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 LBBROWSER",
25 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
26 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
27 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
28 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; 360SE)",
29 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
30 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)",
31 | "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
32 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1",
33 | "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
34 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0b13pre) Gecko/20110307 Firefox/4.0b13pre",
35 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0",
36 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
37 | "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
38 | "SNKRS/3.9.0 (iPhone; iOS 12.1; Scale/2.00)"
39 | ]
40 |
41 | chatroomnickname: "snrks冲刺群"
42 |
43 | monitortime: 60
44 |
45 | maxtimeout: 30
46 | logConfig:
47 | testLogLevel:
48 | mainLogLevel: INFO
49 | fileLogLevel: INFO
50 | consoleLogLevel: INFO
51 | productLogLevel:
52 | mainLogLevel: INFO
53 | fileLogLevel: INFO
54 | consoleLogLevel: ERROR
55 |
56 | db:
57 | db_path: ../SnrksDataBase.db
58 | table_name: shoes
59 |
60 | country: ["cn","us","jp"]
--------------------------------------------------------------------------------
/SnrksMonitor/webspider.py:
--------------------------------------------------------------------------------
1 | """
2 | east
3 | """
4 |
5 | import yaml
6 | import requests
7 | import re
8 | from lxml import html
9 | from SnrksMonitor.log import Logger
10 |
11 | # create a static dict to save history data
12 |
13 | log = Logger().log()
14 |
15 |
16 | class WebSpider:
17 |
18 | def __init__(self):
19 | self.datadict = []
20 | self.history = []
21 |
22 | @staticmethod
23 | def readyaml():
24 | # read config from yaml document
25 | file = './config.yaml'
26 | try:
27 | f = open(file)
28 | global configdata
29 | configdata = yaml.load(f)
30 | except IOError:
31 | # print('open config failed')
32 | log.error('open config failed')
33 | return configdata
34 |
35 | def download_imgage(self, url, fileurl):
36 | # log.debug('start download image:%s' % url)
37 | try:
38 | r = requests.get(url=url)
39 | with open(fileurl, 'wb') as f:
40 | f.write(r.content)
41 | f.close()
42 | except Exception:
43 | log.error('failed to download picture')
44 | with open('./img/go.jpg', 'wb') as fa:
45 | content = fa.read()
46 | with open(fileurl, 'wb') as fb:
47 | fb.write(content)
48 | fb.close()
49 | fa.close()
50 | # print('图片保存地址为:%s' % fileurl)
51 | # log.info('the image save in:%s' % fileurl)
52 |
53 | def spider(self, url, useragent, timeout):
54 | # 爬取snrks网站内容
55 | # config = self.readyaml()
56 | # url = config['url']
57 | # useragent = random.choice(config['User_Agents'])
58 | header = {
59 | 'User_Agents': useragent
60 | }
61 | # logging.log('start spiders')
62 | # print('开始请求nike网站')
63 | log.info('start connect to nike')
64 | r = requests.get(url=url, headers=header, timeout=timeout)
65 | etree = html.etree
66 | s = etree.HTML(r.text)
67 | # 以下为对nike网站的分析
68 | log.info("start analysis nike'website")
69 | shoes_div = s.xpath('//figure[@class="d-md-h ncss-col-sm-12 va-sm-t pb0-sm prl0-sm"]')
70 | fileindex = 1 # 计数
71 | log.info("get shoes' data")
72 | for shoes in shoes_div:
73 | # shoes_name = shoes.xpath('.//h3[@class="ncss-brand u-uppercase mb-1-sm fs16-sm"]/text()')[1] # 鞋名
74 | shoes_link = shoes.xpath('.//a[@class="card-link d-sm-b"]/@href') # 鞋子详情连接
75 | shoes_name = self.get_shoes_name(sc=shoes_link[0])
76 | shoes_price = self.get_shoes_price(sc=shoes_link[0], header=header, timeout=timeout) # 价格
77 | shoes_img = shoes.xpath('.//img/@src') # 图片
78 | shoes_sale_num = self.get_sale_num(sc=shoes_img[0]) # 货号
79 | fileurl = './img/shoes%s.jpg' % fileindex
80 | # self.download_imgage(url=shoes_img[0], fileurl=fileurl) # 下载图片
81 | shoes_time = shoes.xpath('.//h6//div/text()') # 时间
82 | shoes_method = self.get_shoes_method(s=shoes_time[0]) # 抽签方式
83 | shoes_dict = {}
84 | shoes_dict.update({
85 | 'name': shoes_name,
86 | 'img_url': shoes_img[0],
87 | 'img': fileurl,
88 | 'time': shoes_time,
89 | 'country': 'cn',
90 | 'sale_num': shoes_sale_num,
91 | 'price': shoes_price,
92 | 'method': shoes_method
93 | })
94 | self.datadict.append(shoes_dict)
95 | fileindex += 1
96 | log.info('get [{}] shoes'.format(shoes_sale_num))
97 |
98 | def data_analysis(self):
99 | """
100 | 分析是否有更新
101 | :return: 返回更新数据
102 | """
103 | log.info('start checking whether updated or not')
104 | update = []
105 | if len(self.history) == 0:
106 | for shoes in self.datadict:
107 | self.history.append(shoes)
108 | update = self.history
109 | elif len(self.history) > 0:
110 | for shoes in self.datadict:
111 | if shoes in self.history:
112 | pass
113 | elif shoes not in self.history:
114 | update.append(shoes)
115 | self.history = self.datadict
116 | self.datadict = []
117 |
118 | log.info('the number of updated:%s' % len(update))
119 | return update
120 |
121 | def get_sale_num(self, sc):
122 | """
123 | :param sc:
124 | :return: 获取货号
125 | """
126 | pattern = re.compile('Com/.+_A')
127 | a = pattern.findall(sc)
128 | b = a[0][4:-2]
129 | return b
130 |
131 | def get_shoes_name(self, sc):
132 | """
133 | :param sc:链接url
134 | :return: 详细鞋名
135 | """
136 | pattern = re.compile('t/.+/')
137 | a = pattern.findall(sc)
138 | b = a[0][2:-1].replace('-', ' ')
139 | return b
140 |
141 | def get_shoes_price(self, sc, header, timeout):
142 | """
143 | 获取价格
144 | :param sc:连接地址
145 | :return: 返回价格
146 | """
147 | url = 'https://www.nike.com' + sc
148 | price = ''
149 | try:
150 | r = requests.get(url=url, headers=header, timeout=timeout)
151 | except Exception:
152 | log.info('connect to product detail failed')
153 | price = '暂无'
154 | etree = html.etree
155 | s = etree.HTML(r.text)
156 | price = s.xpath('//div[@class="ncss-brand pb6-sm fs14-sm fs16-md"]/text()')
157 | return price
158 |
159 | def test_get_shoes_price(self):
160 | url = 'https://www.nike.com/cn/launch/t/air-jordan-6-retro-nrg-black-dark-concord/'
161 | header = {
162 | 'User_Agents': "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10"
163 | }
164 | WebSpider().get_shoes_price(sc=url, header=header, timeout=30)
165 |
166 | def get_shoes_method(self, s):
167 | """
168 | :param s:发售时间
169 | :return: 抽签方式
170 | """
171 | method = ''
172 | if '发售' in s:
173 | method = '小抽签'
174 | else:
175 | method = '大抽签'
176 | return method
177 |
--------------------------------------------------------------------------------
/SnrksMonitor/db.py:
--------------------------------------------------------------------------------
1 | """
2 | create to db
3 | """
4 | import sqlite3
5 | import yaml
6 | from SnrksMonitor.log import Logger
7 |
8 | log = Logger().log()
9 |
10 |
11 | class db:
12 | def __init__(self):
13 | file = './config.yaml'
14 | try:
15 | f = open(file, 'r', encoding='UTF-8')
16 | global configdata
17 | configdata = yaml.load(f, Loader=yaml.FullLoader)
18 | except IOError:
19 | # logging.log('open config failed')
20 | log.info('open config failed')
21 |
22 | self.databasePath = configdata['db']['db_path']
23 | self.table_name = configdata['db']['table_name']
24 |
25 | def getConn(self, path=None):
26 | """
27 | 获取数据库连接
28 | :return: 返回数据库连接对象
29 | """
30 | if path is not None:
31 | conn = sqlite3.connect(path)
32 | return conn
33 | else:
34 | conn = sqlite3.connect(self.databasePath)
35 | return conn
36 |
37 | def getCursor(self, conn):
38 | """
39 | 获取数据库连接游标
40 | :return: 返回数据库连接游标
41 | """
42 | if conn is not None:
43 | return conn.cursor()
44 | else:
45 | return self.getConn(path=None).cursor()
46 |
47 | def createTable(self, path, sql):
48 | """
49 | 创建数据库
50 | :return:
51 | """
52 | if sql is not None and sql != '':
53 | conn = self.getConn()
54 | cu = self.getCursor(conn)
55 | cu.execute(sql)
56 | conn.commit()
57 | log.info('数据库创建成功')
58 | cu.close()
59 | conn.close()
60 | else:
61 | log.info('sql不正确')
62 |
63 | def dropTable(self, table, path):
64 | """
65 | 删表
66 | :return:
67 | """
68 | conn = self.getConn(path)
69 | cu = self.getCursor(conn)
70 | dropSql = """DROP TABLE '{}' """.format(table)
71 | cu.execute(dropSql)
72 | conn.commit()
73 | log.info('数据库表{}删除成功'.format(table))
74 | cu.close()
75 | conn.close()
76 |
77 | def insertData(self, sql, d, path=None):
78 | """
79 | 插入数据
80 | :param sql: 插入的sql语句
81 | :param d: 插入的数据
82 | :return:
83 | """
84 | if sql is not None and sql != ' ':
85 | if d is not None:
86 | conn = self.getConn(path)
87 | cu = self.getCursor(conn)
88 | for data in d:
89 | cu.execute(sql, data)
90 | conn.commit()
91 | cu.close()
92 | conn.close()
93 | log.info('数据库数据插入成功')
94 | else:
95 | log.info('没有数据')
96 | else:
97 | log.info('没有sql')
98 |
99 | def fetchData(self, sql, c):
100 | """
101 | 查询数据
102 | :param sql:
103 | :return:
104 | """
105 | if sql is not None and sql != ' ':
106 | conn = self.getConn(c)
107 | cu = self.getCursor(conn)
108 | value = cu.execute(sql).fetchall()
109 | cu.close()
110 | conn.close()
111 | return value
112 | else:
113 | log.info('sql为空')
114 | return 'failed'
115 |
116 | def deleteData(self, sql, Path=None):
117 | """
118 | 删除数据
119 | :param c:
120 | :param sql:
121 | :param d:
122 | :return:
123 | """
124 | if sql is not None and sql != ' ':
125 | conn = self.getConn(Path)
126 | cu = self.getCursor(conn)
127 | cu.execute(sql)
128 | conn.commit()
129 | cu.close()
130 | conn.close()
131 | log.info('数据库中数据删除成功')
132 | else:
133 | log.info('sql为空')
134 |
135 | def init_ippool(self, path=None):
136 | """
137 | 初始化IP池表
138 | :return:
139 | """
140 | createIpTableSql = """CREATE TABLE 'ips'(
141 | 'id' INTEGER PRIMARY KEY AUTOINCREMENT,
142 | 'http' varchar (10),
143 | 'ip' varchar (30),
144 | 'port' varchar (10),
145 | 'availible' int(2)
146 | )"""
147 | self.createTable(sql=createIpTableSql, path=None)
148 |
149 | def insertIntoIpTable(self, data, path=None):
150 | """
151 | 将数据插入IP池的数据库
152 | :param data:
153 | :param path:
154 | :return:
155 | """
156 | inserSql = """INSERT INTO 'ips' values (?,?,?,?,?)"""
157 | self.insertData(d=data, path=path, sql=inserSql)
158 |
159 | def updateTable(self, sql, path):
160 | """
161 | 更新数据
162 | :return:
163 | """
164 | if sql is not None and sql != ' ':
165 | conn = self.getConn(Path)
166 | cu = self.getCursor(conn)
167 | cu.execute(sql)
168 | conn.commit()
169 | cu.close()
170 | conn.close()
171 | log.info('数据库中数据更新成功')
172 | else:
173 | log.info('sql为空')
174 |
175 | def deleteFromIpTable(self, ids, path=None):
176 | """
177 | 从ips表中删除数据
178 | :param ids:
179 | :param path:
180 | :return:
181 | """
182 | deleteSql = """DELETE FROM 'ips' where id in {}""".format(ids)
183 | self.deleteData(sql=deleteSql)
184 |
185 | def init_shoes(self):
186 | """初始化鞋子表"""
187 | createTableSql = """CREATE TABLE 'shoes'(
188 | 'id' INTEGER PRIMARY KEY AUTOINCREMENT,
189 | 'shoename' varchar (30),
190 | 'shoeColor' varchar (30),
191 | 'shoeImageUrl' varchar (100),
192 | 'shoeImage' varchar(100),
193 | 'shoeStyleCode' varchar (50),
194 | 'shoeSelectMethod' varchar (20),
195 | 'shoePrice' varchar (10),
196 | 'shoeSize' varchar (100),
197 | 'shoePublishTime' varchar (100),
198 | 'shoeCountry' varchar(10)
199 | )"""
200 | self.createTable(path=None, sql=createTableSql)
201 |
202 | def updateShoesTable(self, data):
203 | """
204 | 对鞋子表进行更新
205 | :param data:
206 | :return:
207 | """
208 | log.info('更新的鞋子数据插入中')
209 | insertSql = """INSERT INTO shoes values (?,?,?,?,?,?,?,?,?,?,?)"""
210 | insertData = []
211 | # 把传进来的字典数据 转成插入数据库的数据tulble
212 | for item in data:
213 | dataturple = (
214 | item['id'],
215 | item['shoeName'],
216 | item['shoeColor'],
217 | item['shoeImageUrl'],
218 | item['shoeImage'],
219 | item['shoeStyleCode'],
220 | item['shoeSelectMethod'],
221 | item['shoePrice'],
222 | item['shoeSize'],
223 | item['shoePublishTime'],
224 | item['shoeCountry']
225 | )
226 | insertData.append(dataturple)
227 | self.insertData(sql=insertSql, d=insertData, path=None)
228 | log.info('鞋子的最新数据插入成功')
229 |
230 |
231 | if __name__ == '__main__':
232 | db = db()
233 | # db.dropTable(table='shoes', path=None)
234 | db.init_shoes()
235 | # db.dropTable(table='shoes')
236 | # db.dropTable(table='update')
237 | # db.init_shoes()
238 | # createTableSql = """CREATE TABLE 'update'(
239 | # 'id' INTEGER PRIMARY KEY AUTOINCREMENT,
240 | # 'shoename' varchar (30),
241 | # 'shoeColor' varchar (30),
242 | # 'shoeImageUrl' varchar (100),
243 | # 'shoeImage' varchar(100),
244 | # 'shoeStyleCode' varchar (50),
245 | # 'shoeSelectMethod' varchar (20),
246 | # 'shoePrice' varchar (10),
247 | # 'shoeSize' varchar (100),
248 | # 'shoePublishTime' varchar (100),
249 | # 'shoeCountry' varchar(10)
250 | # )"""
251 | # db.createTable(c=None, sql= createTableSql)
252 | # db.init()
253 | # insertSql = """INSERT INTO shoes values (?,?,?,?,?,?,?,?,?)"""
254 | # insertData = [
255 | # (
256 | # 1, 'shoeName', '1asd/asd/asd', 'https://23123123', 'abc-123123',
257 | # 'leo',
258 | # '1299',
259 | # '1,2,3,4,5,6,7',
260 | # '2019-2-19 9:00'
261 | # )
262 | # ]
263 | # db.insertData(sql=insertSql, d=insertData)
264 | #
265 | # fetchSql = """SELECT * FROM shoes"""
266 | # data = db.fetchData(sql=fetchSql, c=None)
267 | # log.info(data)
268 |
--------------------------------------------------------------------------------
/SnrksMonitor/appspider.py:
--------------------------------------------------------------------------------
1 | """
2 | @auther:EAST
3 | crawl data from app
4 | 获取全部鞋子
5 | https://api.nike.com/snkrs/content/v1/?&country=CN&language=zh-Hans&offset=0&orderBy=lastUpdated
6 | 获取entry
7 | https://api.nike.com/launch/entries/v2
8 | Authorization:
9 | 获取特定id鞋子
10 | https://api.nike.com/launch/launch_views/v2?filter=productId("productid")
11 | """
12 | import json
13 | import random
14 | import yaml
15 | import time
16 | from SnrksMonitor.log import Logger
17 | from SnrksMonitor.db import db
18 | import requests
19 | import requests.adapters
20 | import traceback
21 |
22 | log = Logger().log()
23 |
24 |
25 | class AppSpiders:
26 | def __init__(self):
27 | self.url = {
28 | 'cn': 'https://api.nike.com/snkrs/content/v1/?&country=CN&language=zh-Hans&offset=0&orderBy=published',
29 | 'de': 'https://api.nike.com/snkrs/content/v1/?country=DE&language=de&offset=0&orderBy=published',
30 | 'us': 'https://api.nike.com/snkrs/content/v1/?country=US&language=en&offset=0&orderBy=published',
31 | 'jp': 'https://api.nike.com/snkrs/content/v1/?country=JP&language=ja&offset=0&orderBy=published'
32 | }
33 |
34 | self.entry = 'https://api.nike.com/launch/entries/v2'
35 | useragent = random.choice(self.readyaml()['User_Agents'])
36 | # auth = self.readyaml()['auth']
37 | self.headers = {
38 | 'User_Agents': useragent
39 | # 'Authorization': auth
40 | }
41 | self.db = db()
42 | self.country = ['cn', 'us', 'jp']
43 | requests.adapters.DEFAULT_RETRIES = 10
44 |
45 | def readyaml(self):
46 | # read config from yaml document
47 | file = './config.yaml'
48 | try:
49 | f = open(file, 'r', encoding='UTF-8')
50 | global configdata
51 | configdata = yaml.load(f, Loader=yaml.FullLoader)
52 | except IOError:
53 | # logging.log('open config failed')
54 | print('open config failed')
55 | return configdata
56 |
57 | def spiderDate(self, country):
58 | """
59 | 通过snrks的首页接口获取到首页最新放送的鞋子数据
60 | 名字+颜色 图片 货号 发售方式 价格 库存码数 发售时间
61 | :return: 返回出来一个数组,包含前50条的鞋子数据
62 | """
63 | header = {
64 | 'User-Agent': random.choice(self.readyaml()['User_Agents'])
65 | }
66 | proxy = {
67 | }
68 | log.info('最新的数据获取中...')
69 | url = self.url[country]
70 | global shoes
71 | try:
72 | responce = requests.get(url, headers=header)
73 | responceJson = json.loads(responce.text)
74 | shoes = responceJson['threads']
75 | except Exception as e:
76 | ex = traceback.format_exc()
77 | isSuccess = False
78 | failedNum = 1
79 | while isSuccess == False:
80 | log.info('获取{}接口失败,正在重试第{}次......'.format(country, failedNum))
81 | log.debug('以下为详细错误:{}'.format(ex))
82 | responce = requests.get(url, headers=self.headers)
83 | responceJson = json.loads(responce.text)
84 | if 'threads' in responceJson.keys():
85 | shoes = responceJson['threads']
86 | log.info('重试成功,正在恢复')
87 | isSuccess = True
88 | elif failedNum == 60:
89 | shoes = []
90 | break
91 | else:
92 | failedNum += 1
93 | shoesData = []
94 | for shoe in shoes:
95 | """ 从接口中获取到一双鞋子的数据 包括pass """
96 | product = shoe['product']
97 | shoeStyle = product['style']
98 | if shoeStyle == '999999':
99 | shoeDict = {
100 | 'id': None,
101 | 'shoeName': shoe['name'],
102 | 'shoeImageUrl': shoe['imageUrl'],
103 | 'shoeImage': None,
104 | 'shoeColor': '',
105 | 'shoeStyleCode': '',
106 | 'shoeSelectMethod': '',
107 | 'shoePrice': '',
108 | 'shoeSize': '',
109 | 'shoePublishTime': '',
110 | 'shoeCountry': country,
111 | 'shoeUpdateTime': ''
112 | }
113 | else:
114 | if shoe['name'] is '':
115 | shoe_name = shoe['subtitle']
116 | else:
117 | shoe_name = shoe['name']
118 | shoeSize = ''
119 | for sku in product['skus']:
120 | shoeSize = '{}|{}'.format(shoeSize, sku['localizedSize'])
121 | try:
122 | selector = product['selectionEngine']
123 | except KeyError:
124 | selector = None
125 | t = product['startSellDate'][:19].replace('T', ' ')
126 | shoeTime = self.changeTime(t=t, c=country)
127 | shoeDict = {
128 | 'id': None,
129 | 'shoeName': shoe_name,
130 | 'shoeColor': product['colorDescription'],
131 | 'shoeImageUrl': product['imageUrl'],
132 | 'shoeImage': None,
133 | 'shoeStyleCode': "{}-{}".format(product['style'], product['colorCode']),
134 | 'shoeSelectMethod': selector,
135 | 'shoePrice': product['price']['msrp'],
136 | 'shoeSize': shoeSize,
137 | 'shoePublishTime': shoeTime,
138 | 'shoeCountry': country,
139 | 'shoeUpdateTime': shoe['lastUpdatedTime']
140 | }
141 | shoesData.append(shoeDict)
142 | log.info('最新的数据获取完成')
143 | return shoesData
144 |
145 | def getNewShoesData(self):
146 | """
147 | 整合四个区的获取到的新数据
148 | :return:
149 | """
150 | allCountrtyShoesData = []
151 | for country in self.country:
152 | data = self.spiderDate(country=country)
153 | allCountrtyShoesData = data + allCountrtyShoesData
154 | return allCountrtyShoesData
155 |
156 | def changeTime(self, t, c):
157 | """
158 | 返回根据不同区转换后的时间
159 | :param t: 时间
160 | :param c: 国家
161 | :return:
162 | """
163 | timeArray = time.strptime(t, "%Y-%m-%d %H:%M:%S")
164 | timestamp = int(time.mktime(timeArray))
165 | global resulttime
166 | if c == 'cn' or 'us':
167 | timestamp_cn = timestamp + 28800
168 | timeArray_cn = time.localtime(timestamp_cn)
169 | resulttime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray_cn)
170 | elif c == 'jp':
171 | timestamp_jp = timestamp + 32400
172 | timeArray_jp = time.localtime(timestamp_jp)
173 | resulttime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray_jp)
174 | elif c == 'de':
175 | timestamp_cn = timestamp + 21600
176 | timeArray_cn = time.localtime(timestamp_cn)
177 | resulttime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray_cn)
178 | return resulttime
179 |
180 | def updateCheck(self, data):
181 | """
182 | 用来检查是否有数据更新
183 | :param data: 传入需要进行对比的数据
184 | :return: 返回一个更新的数组和是否更新,数组中存的是鞋子的货号
185 | """
186 | log.info('数据更新确认中...')
187 | fetchSql = """SELECT shoeStyleCode,shoename,shoeCountry FROM shoes"""
188 | OldData = self.db.fetchData(sql=fetchSql, c=None)
189 | if len(OldData) == 0:
190 | self.db.updateShoesTable(data=data)
191 | message = {
192 | 'isUpdate': False,
193 | 'data': 'no data'
194 | }
195 | else:
196 | CodeData_cn, NameData_cn = self.getCountryData(country='cn')
197 | CodeData_us, NameData_us = self.getCountryData(country='us')
198 | CodeData_de, NameData_de = self.getCountryData(country='de')
199 | CodeData_jp, NameData_jp = self.getCountryData(country='jp')
200 | isUpdate = False
201 | updateData = []
202 | # 获取到的新数据按照国区分别进行更新检查
203 | for newdata in data:
204 | if newdata['shoeCountry'] == 'cn':
205 | if newdata['shoeStyleCode'] not in CodeData_cn or newdata['shoeName'] not in NameData_cn:
206 | updateData.append(newdata)
207 | # 把更新的鞋子的图片下载到本地并把url改为本地url
208 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'],
209 | filename=newdata['shoeStyleCode'])
210 | newdata['id'] = None
211 | isUpdate = True
212 | else:
213 | # 判断鞋子的last更新时间是否比存在数据库中的更新时间大,以下三国一样的
214 | pass
215 | elif newdata['shoeCountry'] == 'us':
216 | if newdata['shoeStyleCode'] not in CodeData_us or newdata['shoeName'] not in NameData_us:
217 | updateData.append(newdata)
218 | # 把更新的鞋子的图片下载到本地并把url改为本地url
219 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'],
220 | filename=newdata['shoeStyleCode'])
221 | newdata['id'] = None
222 | isUpdate = True
223 | elif newdata['shoeCountry'] == 'de':
224 | if newdata['shoeStyleCode'] not in CodeData_de or newdata['shoeName'] not in NameData_de:
225 | updateData.append(newdata)
226 | # 把更新的鞋子的图片下载到本地并把url改为本地url
227 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'],
228 | filename=newdata['shoeStyleCode'])
229 | newdata['id'] = None
230 | isUpdate = True
231 | elif newdata['shoeCountry'] == 'jp':
232 | if newdata['shoeStyleCode'] not in CodeData_jp or newdata['shoeName'] not in NameData_jp:
233 | updateData.append(newdata)
234 | # 把更新的鞋子的图片下载到本地并把url改为本地url
235 | newdata['shoeImage'] = self.download_imgage(url=newdata['shoeImageUrl'],
236 | filename=newdata['shoeStyleCode'])
237 | newdata['id'] = None
238 | isUpdate = True
239 | message = {
240 | 'isUpdate': isUpdate,
241 | 'data': updateData
242 | }
243 | log.info('数据更新确认完成')
244 | return message
245 |
246 | def getCountryData(self, country):
247 | """
248 | 用于获取数据库中特定国家的数据
249 | :param country:
250 | :return:
251 | """
252 | fetchsql = """SELECT shoeStyleCode,shoename FROM shoes where shoeCountry ='{}' """.format(country)
253 | countryData = self.db.fetchData(sql=fetchsql, c=None)
254 | CodeData = []
255 | NameData = []
256 | for data in countryData:
257 | CodeData.append(data[0])
258 | NameData.append(data[1])
259 | return CodeData, NameData
260 |
261 | def insertToDb(self, data):
262 | log.info('向更新表中插入数据中...')
263 | insertSql = """INSERT INTO "update" values (?,?,?,?,?,?,?,?,?,?,?)"""
264 | insertData = []
265 | for item in data:
266 | dataturple = (
267 | item['id'],
268 | item['shoeName'],
269 | item['shoeColor'],
270 | item['shoeImageUrl'],
271 | item['shoeImage'],
272 | item['shoeStyleCode'],
273 | item['shoeSelectMethod'],
274 | item['shoePrice'],
275 | item['shoeSize'],
276 | item['shoePublishTime'],
277 | item['shoeCountry']
278 | )
279 | insertData.append(dataturple)
280 | self.db.insertData(sql=insertSql, d=insertData, path=None)
281 | log.info('向更新表中插入数据结束')
282 |
283 | def initDB(self):
284 | deleteSql = """DELETE FROM "update" where id < 100000"""
285 | self.db.deleteData(sql=deleteSql)
286 | log.info('初始化更新表完成...')
287 |
288 | def download_imgage(self, url, filename):
289 | """
290 | 用于下载图片,并返回图片url
291 | :param url: 图片的网络地址
292 | :param filename: 需要存放在本地的图片名字
293 | :return: 返回本地的图片地址
294 | """
295 | log.debug('start download image:%s' % filename)
296 | fileurl = './img/{}.jpg'.format(filename)
297 | try:
298 |
299 | r = requests.get(url=url)
300 | with open(fileurl, 'wb') as f:
301 | f.write(r.content)
302 | f.close()
303 | except Exception:
304 | log.error('failed to download picture')
305 | with open('./img/go.jpg', 'wb') as fa:
306 | content = fa.read()
307 | with open(fileurl, 'wb') as fb:
308 | fb.write(content)
309 | fb.close()
310 | fa.close()
311 | return fileurl
312 |
313 |
314 | if __name__ == '__main__':
315 | shoesdata = AppSpiders() # 实例化鞋子爬虫的类
316 | shoesdata.initDB() # 初始化
317 | NewData = shoesdata.spiderDate()
318 | result = shoesdata.updateCheck(data=NewData)
319 | print(result)
320 | if result['isUpdate'] is True:
321 | updateData = result['data']
322 | shoesdata.insertToDb(data=updateData)
323 |
--------------------------------------------------------------------------------
/.idea/dbnavigator.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
--------------------------------------------------------------------------------