├── chromedriver.exe
├── rule.json
├── RequestsHeader.py
├── config.ini
├── README.md
├── log.py
├── read_DB.py
├── config.py
├── WebpageShot.py
├── Notification.py
└── webmonitor.py


/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0x24bin/WebMonitor/HEAD/chromedriver.exe


--------------------------------------------------------------------------------
/rule.json:
--------------------------------------------------------------------------------
1 | {
2 |     "single": {
3 |         "https://www.baidu.com/": {},
4 |         "https://www.qq.com/": {},
5 |         "https://www.ifeng.com/": {}
6 |     },
7 |     "TimeInterval": 3
8 | 
9 | }


--------------------------------------------------------------------------------
/RequestsHeader.py:
--------------------------------------------------------------------------------
1 | user_agents = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"
2 | 
3 | req_headers = {
4 |     'User-Agent': user_agents
5 | }


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | [default]
 2 | TimeInterval : 3600
 3 | FailTimeInterval : 60
 4 | retriesnum : 1
 5 | recorddir :  D:\webmonitor
 6 | dbfile: D:\webmonitor\webmonitor.db
 7 | 
 8 | 
 9 | [mail]
10 | host : smtp.qq.com
11 | port : 465
12 | from : 123@qq.com
13 | password: password
14 | to : aqdwd@@xxxx.com,test@awqw.com
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # WebMonitor
 2 | 网站监控
 3 | 
 4 | 
 5 | 主要作用：
 6 | 定时对rule.json文件内对网站进行轮询访问
 7 | 对需要访问对网站主页保存html 并将访问页面截图保存
 8 | 将网站内容与历史记录进行比对，将比对结果邮件发送发送邮箱
 9 | 
10 | 各个脚本主要功能
11 | 
12 | config.ini     配置文件
13 | config.py     读取配置文件
14 | log.py         日志记录
15 | Notification.py   邮件通知
16 | read_DB.py      写sqlite db程序
17 | RequestsHeader.py   设置请求header
18 | rule.json          需要监控对网站主页列表
19 | webmonitor.py  主入口程序
20 | WebpageShot.py  网页截图
21 | 


--------------------------------------------------------------------------------
/log.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 |     log
 5 |     ~~~
 6 | 
 7 |     Implements color logger
 8 | 
 9 |     :author:    Feei <feei@feei.cn>
10 |     :homepage:  https://github.com/wufeifei/cobra
11 |     :license:   MIT, see LICENSE for more details.
12 |     :copyright: Copyright (c) 2018 Feei. All rights reserved
13 | """
14 | import os
15 | import colorlog
16 | import logging
17 | from logging import handlers
18 | 
19 | log_path = 'logs'
20 | if os.path.isdir(log_path) is not True:
21 |     os.mkdir(log_path, 0o755)
22 | logfile = os.path.join(log_path, 'webmonitor.log')
23 | 
24 | handler = colorlog.StreamHandler()
25 | formatter = colorlog.ColoredFormatter(
26 |     '%(log_color)s%(asctime)s [%(name)s] [%(levelname)s] %(message)s%(reset)s',
27 |     datefmt=None,
28 |     reset=True,
29 |     log_colors={
30 |         'DEBUG': 'cyan',
31 |         'INFO': 'green',
32 |         'WARNING': 'yellow',
33 |         'ERROR': 'red',
34 |         'CRITICAL': 'red,bg_white',
35 |     },
36 |     secondary_log_colors={},
37 |     style='%'
38 | )
39 | handler.setFormatter(formatter)
40 | 
41 | file_handler = handlers.RotatingFileHandler(logfile, maxBytes=(1048576 * 5), backupCount=7)
42 | file_handler.setFormatter(formatter)
43 | 
44 | logger = colorlog.getLogger('WebpageMonitor')
45 | logger.addHandler(handler)
46 | logger.addHandler(file_handler)
47 | logger.setLevel(logging.INFO)
48 | 


--------------------------------------------------------------------------------
/read_DB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | 
 3 | import sqlite3
 4 | 
 5 | 
 6 | # 创建表
 7 | def create_table(db_name, table_name):
 8 |     conn = sqlite3.connect('%s' % db_name)
 9 |     cursor = conn.cursor()
10 |     cursor.execute("create table %s (id INTEGER PRIMARY KEY AUTOINCREMENT,url text not null,pagemd5 text not null,sourcefile text not null,picname text not null,date timestamp not null default (datetime('now','localtime')))"%table_name,)
11 |     cursor.close()
12 |     conn.close()
13 | 
14 | 
15 | # 读取数据库
16 | def queryUrlMd5(db_name, table_name, url, size=100):
17 |     conn = sqlite3.connect('%s' % db_name)
18 |     cursor = conn.cursor()
19 |     cursor.execute("select pagemd5 from %s  where url = '%s' order by date desc limit 0,1" % (table_name,url))
20 |     result_all = cursor.fetchmany(size)
21 |     cursor.close()
22 |     conn.close()
23 |     if result_all:
24 |         return result_all[0][0]
25 |     return None
26 | 
27 |     #return result_all[0]
28 | 
29 | 
30 | 
31 | 
32 | # 写入数据库
33 | # 由于写入数据库比较耗时,直接将更新的所有传递给write_db
34 | def write_db(db_name, table_name, result_list):
35 |     conn = sqlite3.connect('%s' % db_name)
36 |     cursor = conn.cursor()
37 |     new_list = []
38 |     for result in result_list:
39 |         url,pagemd5, sourfile, imgname = result
40 |         if pagemd5 != 'null':
41 |             sql = "insert into %s (url, pagemd5, sourcefile, picname ) values ('%s', '%s', '%s', '%s')" % (table_name, url,pagemd5, sourfile, imgname)
42 |             #print(sql)
43 |             #cursor.execute(sql)
44 |             #conn.commit()
45 |             try:
46 |                 cursor.execute(sql)
47 |                 new_list.append(result)
48 |                 conn.commit()
49 |                 print(u"写入  "+sql+u" 成功！")
50 |             except:
51 |                 print(result+u" 已存在！")
52 |     cursor.close()
53 |     conn.close()
54 |     return True
55 |     #return new_list
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     #create_table('test', 'test')
60 |     aa = queryUrlMd5('aaa.db','result','http://www.suningestate.com/index.aspxa')
61 |     if aa:
62 |         print (aa)
63 |     #print(querymd5('aaa.db','result','http://www.suningestate.com/index.asp'))
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 |     config
  5 |     ~~~~~~
  6 | 
  7 |     Implements configuration
  8 | 
  9 | 
 10 | """
 11 | 
 12 | import sys
 13 | import traceback
 14 | import configparser
 15 | import os
 16 | import json
 17 | from log import logger
 18 | 
 19 | 
 20 | #conf_name = sys.argv[1] if len(sys.argv) == 2 else 'config.ini'
 21 | #上层目录
 22 | #project_directory = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
 23 | #当前目录
 24 | project_directory = os.path.abspath(os.path.join(os.path.dirname(__file__)))
 25 | config_path = os.path.join(project_directory, 'config.ini')
 26 | rules_path = os.path.join(project_directory, 'rule.json')
 27 | #print(config_path)
 28 | 
 29 | 
 30 | 
 31 | 
 32 | def get(level1=None, level2=None):
 33 |     """
 34 |     Get config value
 35 |     :param level1:
 36 |     :param level2:
 37 |     :return: string
 38 |     """
 39 |     if level1 is None and level2 is None:
 40 |         return
 41 |     config = configparser.ConfigParser()
 42 | 
 43 |     config.read(config_path)
 44 |     value = None
 45 |     try:
 46 |         value = config.get(level1, level2)
 47 |         #print (value)
 48 |     except Exception as e:
 49 |         print(level1, level2)
 50 |         traceback.print_exc()
 51 |         print("config.ini file configure failed.\nError: {0}".format(e))
 52 |     return value
 53 | 
 54 | # Rules Structure Design
 55 | #
 56 | # 'rule keywords': {
 57 | #     'mode': '' // RuleMode: normal-match(default)/only-match/full-match/mail
 58 | #     'extension': '' // search extension: (default)/txt/md/java/python/etc...
 59 | # }
 60 | #
 61 | try:
 62 |     with open(rules_path) as f:
 63 |         rules_dict = json.load(f)
 64 | except Exception as e:
 65 |     logger.critical('please config rules.json!')
 66 |     logger.critical(traceback.format_exc())
 67 | 
 68 | #print(rules_dict.items())
 69 | 
 70 | class Rule(object):
 71 |     def __init__(self, types=None,url=None,mode=None):
 72 |         self.types = types
 73 |         self.url = url
 74 |         #self.filename = url.replace("/","-")
 75 |         self.mode = mode
 76 | 
 77 | # 读取配置文件
 78 | def get_rules(rule_type='singlepage'):
 79 |     rules_objects = []
 80 |     for types, rule_list in rules_dict.items():
 81 |         if types in rule_type:
 82 |             types =types.upper()
 83 |             for url,rule_attr in rule_list.items():
 84 |                 if 'mode' in rule_attr:
 85 |                     mode = rule_attr['mode']
 86 |                 else:
 87 |                     mode = None
 88 |                 r = Rule(types,url,mode)
 89 |                 #print(url,types,mode)
 90 |                 rules_objects.append(r)
 91 |     return rules_objects
 92 | 
 93 | 
 94 | 
 95 | 
 96 | if __name__ == '__main__':
 97 |     #print(get('mail', 'host'))
 98 |     rules = get_rules()
 99 |     if len(rules) == 0:
100 |         print('aaa')
101 |     for idx,rule_object in enumerate(rules):
102 |         print(idx,rule_object.url)
103 | 
104 | 


--------------------------------------------------------------------------------
/WebpageShot.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import time
 3 | import os.path
 4 | import multiprocessing as mp
 5 | from selenium.webdriver.chrome.options import Options
 6 |  
 7 |  
 8 | def readtxt():
 9 |     '''读取txt文件，返回一个列表，每个元素都是一个元组;文件的格式是图片保存的名称加英文逗号加网页地址'''
10 |     with open('urls.txt', 'r') as f:
11 |         lines = f.readlines()
12 |     urls = []
13 |     for line in lines:
14 |         try:
15 |             thelist = line.strip().split(",")
16 |             if len(thelist) == 2 and thelist[0] and thelist[1]:
17 |                 urls.append((thelist[0], thelist[1]))
18 |         except:
19 |             pass
20 |     return urls
21 |  
22 |  
23 | def get_dir():
24 |     '''判断文件夹是否存在，如果不存在就创建一个'''
25 |     filename = "./pics"
26 |     if not os.path.isdir(filename):
27 |         os.makedirs(filename)
28 |     return filename
29 |  
30 |  
31 | def webshot(args):
32 |     filename,link = args
33 |     # driver = webdriver.PhantomJS()
34 |     options = webdriver.ChromeOptions()
35 |     #options = Options()
36 |     chromed='./chromedriver.exe'
37 |     options.add_argument('--headless')
38 |     options.add_argument('--disable-gpu')
39 |     #driver = webdriver.Chrome(chromed,options=options)
40 |     driver = webdriver.Chrome(chromed,chrome_options=options)
41 |     driver.maximize_window()
42 |     # 返回网页的高度的js代码
43 |     js_height = "return document.body.clientHeight"
44 |     #picname = str(tup[0])
45 |     #print(tup[1])
46 |  
47 |     #link = tup[1]
48 |     #print(link)
49 |     driver.get(link)
50 |     #print(link)
51 |     try:
52 |         driver.get(link)
53 |         k = 1
54 |         height = driver.execute_script(js_height)
55 |         while True:
56 |             if k * 500 < height:
57 |                 js_move = "window.scrollTo(0,{})".format(k * 500)
58 |                 print(js_move)
59 |                 driver.execute_script(js_move)
60 |                 time.sleep(1)
61 |                 height = driver.execute_script(js_height)
62 |                 k += 1
63 |             else:
64 |                 break
65 |         scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
66 |         scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
67 |         driver.set_window_size(scroll_width, scroll_height)
68 |         #driver.get_screenshot_as_file(
69 |         #    "./pics/" + picname + ".png")
70 |         driver.get_screenshot_as_file(filename)
71 |         #with open(filename+ ".txt", 'w') as f:
72 |         #    f.write(driver.page_source)
73 |         print("Process {} get one pic !!!".format(os.getpid()))
74 |         time.sleep(0.1)
75 |     except Exception as e:
76 |         print(filename + ".png", e)
77 |  
78 |  
79 | if __name__ == '__main__':
80 |     t = time.time()
81 |     get_dir()
82 |     urls = readtxt()
83 |     #print(urls)
84 |     #webshot(urls[0])
85 |     pool = mp.Pool()
86 |     pool.map_async(func=webshot, iterable=urls)
87 |     pool.close()
88 |     pool.join()
89 |     print("操作结束，耗时：{:.2f}秒".format(float(time.time() - t)))
90 | 


--------------------------------------------------------------------------------
/Notification.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | from email.header import Header
  4 | from email.mime.text import MIMEText
  5 | from email.mime.multipart import MIMEMultipart
  6 | from email.mime.application import MIMEApplication
  7 | import smtplib
  8 | from config import get
  9 | from log import logger
 10 | import traceback
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | 
 16 | 
 17 | 
 18 | 
 19 | from_addr = get('mail','from')
 20 | password = get('mail','password')
 21 | to_addr = get('mail','to')
 22 | smtp_server = get('mail','host').strip()
 23 | 
 24 | smtp_port = get('mail','port').strip()
 25 | 
 26 | class Notification(object):
 27 |     def __init__(self, subject, to=None):
 28 |         """
 29 |         Initialize notification class
 30 |         :param subject:
 31 |         :param to:
 32 |         """
 33 |         self.subject = subject
 34 |         self.mail = get('mail','from')
 35 |         if to is None:
 36 |             self.to = get('mail', 'to')
 37 |         else:
 38 |             self.to = to
 39 | 
 40 | 
 41 |     # 字节bytes转化kb\m\g   返回大小单位为M
 42 |     def formatSize(self,bytes):
 43 |         try:
 44 |             bytes = float(bytes)
 45 |             kb = bytes / 1024
 46 |         except:
 47 |             print("传入的字节格式不对")
 48 |             return "Error"
 49 | 
 50 |         M = kb / 1024
 51 |         return M
 52 | 
 53 |     # 获取文件大小
 54 |     def getFileSize(self,path):
 55 |         try:
 56 |             if os.path.isfile(path):
 57 |                 size = os.path.getsize(path)
 58 |             else:
 59 |                 size = 0
 60 |             return self.formatSize(size)
 61 |         except Exception as err:
 62 |             print(err)
 63 | 
 64 | 
 65 |     def sendmail(self, html=None,attchfile=None):
 66 |         """
 67 |         Send notification use by mail
 68 |         :param html:
 69 |         :return:
 70 |         """
 71 |         msg = MIMEMultipart()
 72 |         msg['Subject'] = self.subject
 73 |         msg['From'] = '{0} <{1}>'.format(self.mail, get('mail', 'from'))
 74 |         # 支持多用户接收邮件
 75 |         msg['To'] = self.to
 76 | 
 77 | 
 78 |         text = MIMEText(html, 'html', 'utf-8')
 79 |         msg.attach(text)
 80 |         host = get('mail', 'host').strip()
 81 |         port = get('mail', 'port').strip()
 82 |         #print(attchfile)
 83 |         if attchfile:
 84 |             #只允许小于40Mb的附件
 85 |             if self.getFileSize(attchfile) < 40:
 86 |                 #print('ok')
 87 |                 zipApart = MIMEApplication(open(attchfile, 'rb').read())
 88 |                 zipApart.add_header('Content-Disposition', 'attachment', filename=attchfile)
 89 |                 msg.attach(zipApart)
 90 | 
 91 |         try:
 92 |             if port == '465':
 93 |                 port = int(port)
 94 |                 s = smtplib.SMTP_SSL(host, port)
 95 |             else:
 96 |                 s = smtplib.SMTP(host, port)
 97 |                 s.ehlo()
 98 |                 s.starttls()
 99 |             s.ehlo()
100 |             s.set_debuglevel(1)
101 |             s.login(self.mail, get('mail', 'password'))
102 |             s.sendmail(self.mail, self.to.split(','), msg.as_string())
103 |             s.quit()
104 |             return True
105 |         except SMTPException:
106 |             logger.critical('Send mail failed')
107 |             traceback.print_exc()
108 |             return False
109 | 
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     aa =Notification('aaa')
114 |     aa.sendmail(html='<html>aaaaa</html>',attchfile='a.py')
115 | 


--------------------------------------------------------------------------------
/webmonitor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | import requests
  4 | from RequestsHeader import req_headers
  5 | from read_DB import *
  6 | from Notification import Notification
  7 | import random
  8 | from log import logger
  9 | import hashlib
 10 | from config import *
 11 | import multiprocessing as mp
 12 | from WebpageShot import webshot
 13 | import time
 14 | import os
 15 | import zipfile
 16 | 
 17 | 
 18 | 
 19 | class WebpageMonitor(object):
 20 |     def __init__(self):
 21 |         self.rules = get_rules()
 22 |         self.recorddir = get('default', 'recorddir').strip()
 23 |         self.fail_time_interval_num = int(get('default', 'FailTimeInterval').strip())
 24 |         self.timeinterval= int(get('default', 'TimeInterval'))
 25 |         self.retriesnum = int(get('default', 'retriesnum').strip())
 26 |         self.dbfile= get('default', 'dbfile')
 27 |         self.table_name = 'result'
 28 |         # 入库
 29 |         try:
 30 |             create_table('%s' % self.dbfile, 'result')
 31 |         except:
 32 |             pass
 33 | 
 34 | 
 35 |     def md5_ncrypt(self,text):
 36 |         m = hashlib.md5()
 37 |         m.update(text.encode(encoding='utf-8'))
 38 |         str_md5 = m.hexdigest()
 39 |         return str_md5
 40 | 
 41 |     def md5sum(self,filename, blocksize=65536):
 42 |         hash = hashlib.md5()
 43 |         with open(filename, "rb") as f:
 44 |             # 必须是rb形式打开的，否则的两次出来的结果不一致
 45 |             for block in iter(lambda: f.read(blocksize), b""):
 46 |                 hash.update(block)
 47 |         return hash.hexdigest()
 48 | 
 49 |     def getNowtime(self):
 50 |         #return time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
 51 |         return time.strftime('%Y%m%d%H%M%S', time.localtime(time.time()))
 52 | 
 53 |     def createdir(self):
 54 |         dirname = self.recorddir + os.path.sep + self.getNowtime()
 55 |         if not os.path.isdir(dirname):
 56 |             os.makedirs(dirname)
 57 |         return dirname
 58 |     def url2name(self,url):
 59 |         tmpurl=url
 60 |         if url.startswith('http'):
 61 |             tmpurl=url.replace('http://', '')
 62 |             tmpurl = tmpurl.replace('https://', '')
 63 |         if tmpurl.endswith('/'):
 64 |             tmpurl=tmpurl[:-1]
 65 |         tmpurl = tmpurl.replace('/', '-')
 66 |         return tmpurl
 67 | 
 68 | 
 69 |     def start(self,rule_types):
 70 |         rules = get_rules(rule_types)
 71 |         if len(rules) == 0:
 72 |             logger.critical('get rules failed, rule types not found!')
 73 |             exit(0)
 74 |         self.rulenum = len(rules)
 75 |         logger.info('rules length: {rl}'.format(rl=len(rules)))
 76 |         #tmptime=self.getNowtime()
 77 |         #dirname = self.recorddir + tmptime
 78 |         self.dirname = self.createdir()
 79 |         #dirname = self.recorddir
 80 |         #pool = mp.Pool()
 81 |         result_list = []
 82 |         webshotargs = []
 83 |         for idx, rule_object in enumerate(rules):
 84 |             #print(idx, rule_object.url)
 85 |             logger.info('>>>>>>>>>>>>> {n} > {r} >>>>>>'.format(n=rule_object.types, r=rule_object.url))
 86 |             urlname = self.url2name(rule_object.url)+'-'+self.getNowtime()
 87 |             content = self.openWebPage(rule_object.url)
 88 |             sourcefile = None
 89 | 
 90 | 
 91 |             #html = '<h1>网页监控报告: {rule_regex} Count: {count} Datetime: {datetime}</h1>'.format(
 92 |             #    rule_regex=self.rule_object.keyword, datetime=time.strftime("%Y-%m-%d %H:%M:%S"),
 93 |             #    count=len(self.content))
 94 | 
 95 |             tmp = []
 96 |             tmp.append(rule_object.url)
 97 |             if content  :
 98 | 
 99 |                 sourcefile = self.dirname + os.path.sep + urlname + '.txt'
100 | 
101 |                 #sourcemd5= self.md5_ncrypt(content)
102 | 
103 | 
104 |                 with open(sourcefile, 'w',encoding='utf-8') as f:
105 |                     f.write(content)
106 |                 sourcemd5 = self.md5sum(sourcefile)
107 |                 #oldmd5 = queryUrlMd5(self.dbfile, self.table_name, rule_object.url)
108 | 
109 |                 picname = self.dirname + os.path.sep + urlname + '.png'
110 |                 filename = self.dirname + os.path.sep + urlname
111 |                 #webshot(filename, rule_object.url)
112 |                 webshotargs.append((picname, rule_object.url))
113 | 
114 | 
115 |                 tmp = (rule_object.url, sourcemd5, sourcefile, picname)
116 |                 result_list.append(tmp)
117 |             else:
118 |                 tmp = (rule_object.url,'null','null','null')
119 |                 result_list.append(tmp)
120 |         webshotmp(webshotargs)
121 |         self.checkdiff(result_list)
122 | 
123 | 
124 |             #pool.apply_async(search, args=(idx, rule_object), callback=store_result)
125 |         #pool.close()
126 |         #pool.join()
127 | 
128 |     def genratepagelist(self,pagelist):
129 |         html=''
130 |         for record in pagelist:
131 |             html += '<li> URL地址: {url}    MD5: {md5}   源码路径:  {source}  截图路径: {img} </li>'.format(
132 |                 url=record[0], md5=record[1], source=record[2], img=record[3])
133 |         return html
134 | 
135 | 
136 |     def checkdiff(self, result_list):
137 |         count = len(result_list)
138 |         #normal new  change error
139 |         result_dict={}
140 |         pagenew = []
141 |         pagenormal = []
142 |         pageerror = []
143 |         pagechanged = []
144 |         for record in result_list:
145 |             #print (record[0])
146 |             url, pagemd5, sourfile, imgname = record
147 | 
148 |             if pagemd5 == 'null':
149 |                 #页面打开失败
150 |                 pageerror.append(record)
151 |             else:
152 |                 oldmd5 = queryUrlMd5(self.dbfile, self.table_name,url)
153 | 
154 |                 if oldmd5 :
155 |                     if oldmd5 == pagemd5 :
156 |                         pagenormal.append(record)
157 |                     else:
158 |                         pagechanged.append(record)
159 |                 else:
160 |                     pagenew.append(record)
161 |         write_db('%s' % self.dbfile, self.table_name, result_list)
162 |         #print ('normal page', pagenormal)
163 |         #print ('page error', pageerror)
164 |         #print ('page new', pagenew)
165 |         #print ('page changed', pagechanged)
166 |         reporttime = time.strftime("%Y-%m-%d %H:%M:%S")
167 |         subject = "网站防篡改监控报告--{ss}".format(ss=reporttime)
168 | 
169 |         html = '<h1>监控网站数量: {count} 监控时间: {datetime}</h1><HR>'.format(count=self.rulenum,
170 |                                                                            datetime=reporttime)
171 |         #html +='<HR style="FILTER: alpha(opacity=100,finishopacity=0,style=3)"  color=#987cb9 SIZE=3>'
172 |         #所有页面正常
173 |         print (self.rulenum,'-----------',len(pagenormal))
174 |         if self.rulenum == len(pagenormal):
175 |             html += '<h3> 所有被监控网页正常运行无异常！ </h3>'
176 |             html += self.genratepagelist(pagenormal)
177 |             Notification(subject).sendmail(html=html)
178 |         else:
179 |             if len(pageerror):
180 |                 #html += '<h3 style="color:red ; font-size:50px">'
181 |                 html += '</br><h3 style="color:red ; ">访问异常网站{count}个，分别为如下网站</h3>'.format(count=len(pageerror))
182 |                 html += self.genratepagelist(pageerror)
183 |             if len(pagechanged):
184 |                 html += '</br><h3 style="color:red ; ">页面变化网站{count}个，请人工确认，分别为如下网站</h3>'.format(count=len(pagechanged))
185 |                 html += self.genratepagelist(pagechanged)
186 |             if len(pagenew):
187 |                 html += '</br><h3 style="color:red ; ">新增监控网站{count}个，分别为如下网站</h3>'.format(count=len(pagenew))
188 |                 html += self.genratepagelist(pagenew)
189 |             if len(pagenormal):
190 |                 html += '</br><h3 >运行正常网站{count}个，分别为如下网站</h3>'.format(count=len(pagenormal))
191 |                 html += self.genratepagelist(pagenormal)
192 |             zipfilename = '网站防篡改监控运行日志-' + reporttime.replace(':','').replace('-','') + '.zip'
193 |             if self.zipDir(self.dirname,zipfilename):
194 |                 #print("qqqq")
195 |                 Notification(subject).sendmail(html=html,attchfile=zipfilename)
196 |                 os.remove(zipfilename)
197 | 
198 |     def zipDir(self,dirpath, outFullName):
199 |         """
200 |         压缩指定文件夹
201 |         :param dirpath: 目标文件夹路径
202 |         :param outFullName: 压缩文件保存路径+xxxx.zip
203 |         :return: 无
204 |         """
205 |         try:
206 |             #if 'nt' == os.name: 
207 |             #    zip = zipfile.ZipFile(outFullName, "w")
208 |             #else:
209 | 
210 |             #    zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED)
211 |             zip = zipfile.ZipFile(outFullName, "w")
212 |             for path, dirnames, filenames in os.walk(dirpath):
213 |                 # 去掉目标跟路径，只对目标文件夹下边的文件及文件夹进行压缩
214 |                 #print(fpath)
215 |                 fpath = path.replace(dirpath, '')
216 | 
217 |                 #print(fpath)
218 |                 for filename in filenames:
219 |                     zip.write(os.path.join(path, filename), os.path.join(fpath, filename))
220 |             zip.close()
221 |             return True
222 |         except:
223 |             logger.error("genrate zip file  error {dir}   {name}".format(dir=dirpath,name=outFullName))
224 |             return False
225 | 
226 | 
227 | 
228 | 
229 |     def main(self):
230 | #        t = time.time()
231 |         #pool = multiprocessing.Pool(multiprocessing.cpu_count())
232 | #        self.start('single')
233 | #        logger.info("操作结束，耗时：{:.2f}秒".format(float(time.time() - t)))
234 |         while 1:
235 |             t = time.time()
236 |             self.start('single')
237 |             logger.info("操作结束，耗时：{:.2f}秒".format(float(time.time() - t)))
238 |             time.sleep(self.timeinterval)
239 | #            
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 |     #访问URL
247 |     def openWebPage(self,url):
248 |         tag = 0
249 |         while tag < int(self.retriesnum):
250 |             try:
251 |                 if tag != 0 :
252 |                     time.sleep(self.fail_time_interval_num + random.randint(1, 5))
253 |                 page = requests.get(url, headers=req_headers, allow_redirects=True)
254 |                 return page.text
255 |             except:
256 |                 print(u"网络访问失败! ")
257 |                 logger.error('open {url} fail ,fail num is {tag} '.format(url=url, tag=tag+1))
258 |                 tag +=1
259 |                 
260 |         return None
261 | 
262 | 
263 | def webshotmp(args):
264 |     t = time.time()
265 |     pool = mp.Pool()
266 |     logger.info('webshot {filename}  {url}'.format(filename=args[0],url=args[1]))
267 |     #pool.map_async(func=webshot, iterable=args)
268 |     for aa in args:
269 |         logger.info('webshot {filename}  {url}'.format(filename=aa[0],url=aa[1]))
270 |         pool.apply_async(webshot,args=(aa,))
271 |     #pool.map(webshot, args)
272 |     pool.close()
273 |     pool.join()
274 |     print("操作结束，耗时：{:.2f}秒".format(float(time.time() - t)))
275 | 
276 | if __name__ == '__main__':
277 |     newVisit = WebpageMonitor()
278 |     newVisit.main()
279 |     #newVisit.save_result()
280 | 


--------------------------------------------------------------------------------