├── src
    ├── requirements.txt
    ├── plugins
    │   ├── __init__.py
    │   ├── geoip.py
    │   ├── url.py
    │   ├── ip.py
    │   ├── plugin.py
    │   ├── asset.py
    │   ├── nmap.py
    │   └── wappalyzer.py
    ├── rules
    │   ├── http_asset_types.json
    │   ├── tcp_device_types.json
    │   ├── http_device_types.json
    │   ├── vendors.json
    │   └── tcp_asset_types.json
    ├── config
    │   └── plugin.yml
    └── main.py
├── docker-compose.yml
├── Dockerfile
├── .gitignore
├── PLUGIN_DEVELOP.md
├── README.md
└── LICENSE


/src/requirements.txt:
--------------------------------------------------------------------------------
1 | elasticsearch==7.1.0
2 | pyaml
3 | cacheout
4 | geoip2


--------------------------------------------------------------------------------
/src/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | '''
4 | Author: Bugfix<tanjelly@gmail.com
5 | Created: 2019-12-11
6 | MOdified: 2020-01-19
7 | '''
8 | 
9 | from .plugin import Plugin, LogLevel


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | services:
 4 |   filter:
 5 |     build: .
 6 |     image: dsolab/passets-filter:1.0.0
 7 |     container_name: passets-filter
 8 |     environment:
 9 |       - ELASTICSEARCH_URL=<elasticsearch-host>:9200
10 |       - ELASTICSEARCH_INDEX=logstash-passets
11 |       - THREADS=5
12 |       - BATCH_SIZE=20
13 |       - CACHE_SIZE=1024
14 |       - CACHE_TTL=120
15 |       - MODE=1
16 |       - DEBUG=1
17 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker.io/ubuntu:18.04
 2 | 
 3 | LABEL maintainer="tanjelly@gmail.com" version="1.0.0"
 4 | 
 5 | USER root
 6 | 
 7 | ENV TZ="CST-8" ELASTICSEARCH_URL="localhost:9200" ELASTICSEARCH_INDEX="logstash-passets" THREADS=5 BATCH_SIZE=20 CACHE_SIZE=1024 CACHE_TTL=120 MODE=1 DEBUG=1
 8 | 
 9 | COPY src/ /opt/filter/
10 | 
11 | WORKDIR /opt/filter/
12 | 
13 | RUN apt-get update && \
14 |     apt-get install -y python3 python3-pip && \
15 |     pip3 install -r requirements.txt && \
16 |     apt-get clean all && \
17 |     apt-get autoclean && \
18 |     apt-get autoremove
19 | 
20 | ENTRYPOINT ["sh", "-c", "python3 /opt/filter/main.py -H $ELASTICSEARCH_URL -i $ELASTICSEARCH_INDEX -t $THREADS -b $BATCH_SIZE -c $CACHE_SIZE -T $CACHE_TTL -m $MODE -d $DEBUG"]


--------------------------------------------------------------------------------
/src/rules/http_asset_types.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"Network Device":[
 3 | 		31,37,1005
 4 | 	],
 5 | 	"Security Device":[
 6 | 		1006
 7 | 	],
 8 | 	"Storage Device":[
 9 | 		48
10 | 	],
11 | 	"IoT":[
12 | 		39,1008
13 | 	],
14 | 	"Printer":[
15 | 		40
16 | 	],
17 | 	"Control System":[
18 | 		45
19 | 	],
20 | 	"OS":[
21 | 		5
22 | 	],
23 | 	"Mail":[
24 | 		30
25 | 	],
26 | 	"Database":[
27 | 		34
28 | 	],
29 | 	"Web Server":[
30 | 		22,64
31 | 	],
32 | 	"Media Server":[
33 | 		38
34 | 	],
35 | 	"Application Middleware":[
36 | 		1009
37 | 	],
38 | 	"Office Software":[
39 | 		50,53,58,1007
40 | 	],
41 | 	"Digital Currency":[
42 | 		56
43 | 	],
44 | 	"Container":[
45 | 		60
46 | 	],
47 | 	"Cloud Platform":[
48 | 		9,61,62,63
49 | 	],
50 | 	"Load Balancer":[
51 | 		65
52 | 	],
53 | 	"Securities System":[
54 | 		1001,1002
55 | 	],
56 | 	"Knowledge Base System":[
57 | 		2,4,8,11,49
58 | 	],
59 | 	"Payment System":[
60 | 		41,43
61 | 	]
62 | }


--------------------------------------------------------------------------------
/src/rules/tcp_device_types.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"load balancer":["load balancer"],
 3 | 	"remote management":[],
 4 | 	"security-misc":["gateway", "security"],
 5 | 	"printer":["printer"],
 6 | 	"storage-misc":["storage"],
 7 | 	"media device":["media", "video", "dvr"],
 8 | 	"router":["router"],
 9 | 	"webcam":["camera", "webcam"],
10 | 	"terminal server":["kvm"],
11 | 	"printer server":["print server"],
12 | 	"power-device":["ups"],
13 | 	"firewall":["firewall"],
14 | 	"pda":["pda"],
15 | 	"pbx":["pbx"],
16 | 	"game console":["game"],
17 | 	"phone":["phone"],
18 | 	"voip":["voip", " sip ", "ip phone"],
19 | 	"wap":["wap"],
20 | 	"switch":["switch"],
21 | 	"terminal":["terminal"],
22 | 	"power-misc":["power"],
23 | 	"telecom-misc":["telecom"],
24 | 	"proxy server":["proxy"],
25 | 	"hub":[" hub"],
26 | 	"bridge":["bridge"],
27 | 	"broadand router":["dsl", "adsl", "modem", "broadand"],
28 | 	"vpn":["vpn", "openvpn"],
29 | 	"wireless router":["wireless", "wifi", "wi-fi", "wlan"],
30 | 	"specializied":[]
31 | }


--------------------------------------------------------------------------------
/src/config/plugin.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # 根节点：插件名称，与plugins目录下的文件名对应
 3 | # 二级节点：
 4 | # - enable: 是否启用插件
 5 | # - index: 插件的处理顺序，0以上的整数，数值越小
 6 | 
 7 | ip:
 8 |   enable: false
 9 |   index: 1
10 |   inner_ips:
11 |     - 10.0.0.0-10.255.255.255
12 |     - 172.16.0.0-172.31.255.255
13 |     - 192.168.0.0-192.168.255.255
14 |     - 169.254.0.0-169.254.255.255
15 |     - 127.0.0.1-127.0.0.255
16 | 
17 | geoip:
18 |   enable: false
19 |   index: 2
20 | 
21 | url:
22 |   enable: false
23 |   index: 3
24 | 
25 | wappalyzer:
26 |   enable: true
27 |   index: 4
28 | 
29 | nmap:
30 |   enable: true
31 |   index: 5
32 |   ignore_rules:
33 |     - ^OK$
34 |     - ^\+OK\r\n$
35 |   ssl_portmap:
36 |     - 443:https
37 |     - 465:smtps
38 |     - 993:imaps
39 |     - 995:pop3s
40 |     - 22:ssh
41 |     - 21:ftps
42 | 
43 | https:
44 |   enable: true
45 |   include_chain: false
46 |   index: 6
47 | 
48 | asset:
49 |   enable: true
50 |   index: 7
51 |   ignore_vendors:
52 |     - asp
53 |     - iis
54 |     - windows
55 |     - java
56 |     - getmdl
57 |     - getbootstrap
58 |     


--------------------------------------------------------------------------------
/src/rules/http_device_types.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"load balancer":["load balancer", "负载均衡", "big-ip", "cdn"],
 3 | 	"remote management":[],
 4 | 	"security-misc":["gateway", "网关", "security", "安全", "堡垒机"],
 5 | 	"printer":["printer", "打印机"],
 6 | 	"storage-misc":["storage", "云存储"],
 7 | 	"media device":["media", "流媒体", "视频"],
 8 | 	"router":["router", "路由器"],
 9 | 	"webcam":["camera", "webcam", "摄像机"],
10 | 	"terminal server":["kvm"],
11 | 	"printer server":["print server", "打印服务器"],
12 | 	"power-device":["ups"],
13 | 	"firewall":["firewall", "防火墙"],
14 | 	"pda":["pda"],
15 | 	"pbx":["pbx"],
16 | 	"game console":["game"],
17 | 	"phone":["phone"],
18 | 	"voip":["voip", " sip ", "ip phone", "IP语音", "电话"],
19 | 	"wap":["wap"],
20 | 	"switch":["switch", "交换机"],
21 | 	"terminal":["终端"],
22 | 	"power-misc":["power"],
23 | 	"telecom-misc":[],
24 | 	"proxy server":["proxy", "代理"],
25 | 	"hub":[" hub", "集线器"],
26 | 	"bridge":["bridge"],
27 | 	"broadand router":["dsl ", "modem", "broadand", "宽带"],
28 | 	"vpn":["vpn", "接入"],
29 | 	"wireless router":["wireless", "wifi", "wi-fi", "wlan", "无线路由"],
30 | 	"specializied":[]
31 | }


--------------------------------------------------------------------------------
/src/rules/vendors.json:
--------------------------------------------------------------------------------
 1 | [
 2 | 	"Asus",
 3 | 	"Dell",
 4 | 	"IBM",
 5 | 	"Cisco",
 6 | 	"3COM",
 7 | 	"Fortinet",
 8 | 	"Huawei",
 9 | 	"H3C",
10 | 	"Linksys",
11 | 	"Adobe",
12 | 	"AirLink",
13 | 	"Google",
14 | 	"Microsoft",
15 | 	"Alcatel",
16 | 	"Alt-N",
17 | 	"Aastra",
18 | 	"APC",
19 | 	"AVG",
20 | 	"AVM",
21 | 	"AWS",
22 | 	"AT&T",
23 | 	"ActionTec",
24 | 	"ACTi",
25 | 	"Adtran",
26 | 	"Allied",
27 | 	"Amino",
28 | 	"Amazon",
29 | 	"Apache",
30 | 	"Apple",
31 | 	"ArGoSoft",
32 | 	"Atlassian",
33 | 	"Avaya",
34 | 	"Avtech",
35 | 	"Axis",
36 | 	"Axway",
37 | 	"HP",
38 | 	"BMC",
39 | 	"Barracuda",
40 | 	"Belkin",
41 | 	"D-Link",
42 | 	"360",
43 | 	"BenQ",
44 | 	"BayStack",
45 | 	"Samsung",
46 | 	"Xerox",
47 | 	"Xen",
48 | 	"ZTE",
49 | 	"Intel",
50 | 	"Juniper",
51 | 	"TP-Link",
52 | 	"Brocade",
53 | 	"Netgear",
54 | 	"SMC",
55 | 	"Trendnet",
56 | 	"Trend",
57 | 	"Sony",
58 | 	"Hikvision",
59 | 	"Huacam",
60 | 	"Aviosys",
61 | 	"Panasonic",
62 | 	"Zmodo",
63 | 	"Sanyo",
64 | 	"AirLive",
65 | 	"ZyXEL",
66 | 	"NetComm",
67 | 	"Xfinity",
68 | 	"CJ Hellovision",
69 | 	"EnGenius",
70 | 	"Technicolor",
71 | 	"Hotbox",
72 | 	"Arcadyan",
73 | 	"MikroTik",
74 | 	"Westell",
75 | 	"Verizon"
76 | ]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | src/wappalyzer/node_modules/
131 | 


--------------------------------------------------------------------------------
/src/plugins/geoip.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | '''
 4 | Author: Bugfix<tanjelly@gmail.com
 5 | Created: 2019-12-12
 6 | Modified: 2020-01-19
 7 | '''
 8 | 
 9 | import os
10 | import time
11 | import geoip2.database
12 | import traceback
13 | 
14 | from plugin import Plugin, LogLevel
15 | 
16 | class FilterPlugin(Plugin):
17 |     _geoip = None
18 |     """
19 |     IP归属地识别插件
20 |     src: ip
21 |     dst: 
22 |     - country: 国家
23 |     - city: 城市
24 |     - location: 位置
25 |       - lon: 经度
26 |       - lat: 纬度
27 |     """
28 |     def __init__(self, rootdir, debug=False, logger=None):
29 |         """
30 |         构造函数
31 |         :param rootdir: 工作目录
32 |         :param debug: 调式信息输出开关
33 |         :param logger: 日志处理对象
34 |         """
35 |         super().__init__(rootdir, debug, logger)
36 | 
37 |         db_file = os.path.join(rootdir, 'rules', 'GeoLite2-City.mmdb')
38 |         if os.path.exists(db_file):
39 |             self._geoip = geoip2.database.Reader(db_file, locales=['zh-CN'])
40 |         else:
41 |             raise Exception('GEOIP2 database not found.')
42 |     
43 |     def execute(self, msg):
44 |         """
45 |         插件入口函数，根据插件的功能对 msg 进行处理
46 |         :param msg: 需要处理的消息
47 |         :return: 返回需要更新的消息字典（不含原始消息）
48 |         """
49 |         if 'ip' not in msg or not msg['ip']:
50 |             self.log('ip field not found.', LogLevel.DEBUG)
51 |             return None
52 | 
53 |         info = { 'geo': {} }
54 |         try:
55 |             resp = self._geoip.city(msg['ip'])
56 |             info['geo']['city'] = resp.city.name
57 |             info['geo']['country'] = resp.country.name
58 |             if info['geo']['country'] in ['香港', '澳门', '台湾']:
59 |                 info['geo']['country'] = '中国' + info['geo']['country']
60 |             if info['geo']['country'] == '中华民国':
61 |                 info['geo']['country'] = '中国台湾'
62 | 
63 |             info['geo']['location'] = {
64 |                 'lat': resp.location.latitude,
65 |                 'lon': resp.location.longitude
66 |             }
67 |         except Exception as e:
68 |             self.log(e, LogLevel.ERROR)
69 |         
70 |         return info
71 | 
72 | if __name__ == '__main__':
73 |     plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True)
74 |     msg = {
75 |         "ip": "202.106.0.20",
76 |         "port": 80,
77 |         "pro": "TCP",
78 |         "host": "111.206.63.16:80",
79 |         "data": "6765745f696e666f3a20706c7567696e730a5250525420300a617366647361666173667361666173",
80 |         "tag": "sensor-ens160"
81 |     }
82 |     msg_update = {}
83 |     for i in sorted(plugins.keys()):
84 |         (pluginName, plugin) = plugins[i]
85 |         if pluginName == 'geoip':
86 |             ctime = time.time()
87 |             ret = plugin.execute(msg)
88 |             etime = time.time()
89 |             print('Eclipse time: {}'.format(etime-ctime))
90 |             print(ret)
91 |             break


--------------------------------------------------------------------------------
/src/plugins/url.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 2020-01-19
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import re
 12 | import html
 13 | import json
 14 | import base64
 15 | import traceback
 16 | 
 17 | from urllib import parse
 18 | from plugin import Plugin, LogLevel
 19 | 
 20 | class FilterPlugin(Plugin):
 21 |     """
 22 |     URL 处理插件
 23 |     """
 24 |     
 25 |     def execute(self, msg):
 26 |         """
 27 |         插件入口函数，根据插件的功能对 msg 进行处理
 28 |         :param msg: 需要处理的消息
 29 |         :return: 返回需要更新的消息字典（不含原始消息）
 30 |         """
 31 |         if 'pro' not in msg or msg['pro'] != 'HTTP':
 32 |             self.log('Not http message.', LogLevel.DEBUG)
 33 |             return None
 34 | 
 35 |         if 'url' not in msg and not isinstance(msg['url'], str):
 36 |             self.log('url not found or not a string.')
 37 |             return None
 38 |         
 39 |         info = {
 40 |             'site': '',
 41 |             'path': '',
 42 |             'url_tpl': ''
 43 |         }
 44 |         try:
 45 |             url_parts = parse.urlsplit(msg['url'])
 46 |             
 47 |             if not url_parts.scheme:
 48 |                 url_parts.scheme = 'http'
 49 |             if url_parts.netloc:
 50 |                 info['site'] = '{}://{}'.format(url_parts.scheme, url_parts.netloc)
 51 |             path = '/' if not url_parts.path else url_parts.path
 52 |             path_tpl = path
 53 |             if url_parts.query:
 54 |                 path += '?' + url_parts.query
 55 |                 path_tpl += '?'
 56 |                 params = parse.parse_qs(url_parts.query)
 57 |                 for _ in sorted(params):
 58 |                     path_tpl += '&{}={{}}'.format(_)
 59 |                 path_tpl = path_tpl.rstrip('&')
 60 |             if url_parts.fragment:
 61 |                 path += '#' + url_parts.fragment
 62 |                 path_tpl += '#{}'
 63 |             info['path'] = path
 64 |             info['url_tpl'] = info['site'] + path_tpl
 65 |         except:
 66 |             self.log(traceback.format_exc(), LogLevel.ERROR)
 67 | 
 68 |         return info
 69 | 
 70 | if __name__ == '__main__':
 71 |     plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True)
 72 |     msg = {
 73 |         "ip_num": 1875787536,
 74 |         "ip": "111.206.63.16",
 75 |         "host": "111.206.63.16:80",
 76 |         "header": "HTTP/1.1 200 OK\r\nServer: nginx\r\nDate: Fri, 06 Dec 2019 01:51:24 GMT\r\nContent-Type: text/html\r\nTransfer-Encoding: chunked\r\nConnection: close\r\nCache-Control: no-cache\r\npragma: no-cache",
 77 |         "@version": "1",
 78 |         "inner": False,
 79 |         "port": "80",
 80 |         "tags": [],
 81 |         "type": "text/html",
 82 |         "server": "nginx",
 83 |         "pro": "TCP",
 84 |         "@timestamp": "2019-12-06T01:51:25.024Z",
 85 |         "body": "<html><head><title>登录</title></head></html>",
 86 |         "code": 200,
 87 |         "url": "http://111.206.63.16/hello.jsp?zone=public&service=80&protocol=tcp#main",
 88 |         "tag": "sensor-ens160"
 89 |     }
 90 |     msg_update = {}
 91 |     for i in sorted(plugins.keys()):
 92 |         (pluginName, plugin) = plugins[i]
 93 |         if pluginName == 'url':
 94 |             print('[!] Plugin {} processing ...'.format(pluginName))
 95 |             ret = plugin.execute(msg)
 96 |             print(ret)
 97 |             if ret:
 98 |                 msg_update = dict(msg_update, **ret)
 99 | 
100 |                 msg = dict(msg, **ret)
101 |             print('[!] Plugin {} completed.'.format(pluginName))
102 |         
103 |     print(msg_update)


--------------------------------------------------------------------------------
/PLUGIN_DEVELOP.md:
--------------------------------------------------------------------------------
  1 | # Passets 被动资产识别框架数据清洗模块插件开发说明
  2 | 
  3 | ### 插件工作原理
  4 | 
  5 | ```
  6 |               原始数据
  7 | [ElasticSearch] ---→ [passets-filter]
  8 |     ↑                      ↓
  9 |     |                   Plugin 1
 10 |     │                      ↓
 11 |     │                   Plugin 2
 12 |     │                      ↓
 13 |     │                   ... ...
 14 |     │                      |
 15 |     ╰----------------------╯
 16 |        处理后产生的新数据        
 17 | ```
 18 | 
 19 | ### 插件配置文件说明
 20 | 
 21 | 插件按照配置文件中定义的顺序来进行数据处理，通过配置文件，使用者可以仅开启部分必须的插件，以提交处理效率。
 22 | 插件配置文件为 config/plugin.yml，配置文件的结构如下：
 23 | ```
 24 | xxxx:                  # 插件名，同时也是插件文件名
 25 |   enable: true         # 插件开关：true - 启用，false - 停用
 26 |   index: 1             # 插件的执行顺序，使用0以上的整数，数据越小越优先
 27 |   xxxxx:               # 当前插件的自定义参数，在初始化的时候传入插件
 28 | ```
 29 | 
 30 | ip 插件的配置实例：
 31 | 
 32 | ```
 33 | ip:                                 # 插件名称
 34 |   enable: true                      # 启用该插件
 35 |   index: 1                          # 插件处理顺序为 1
 36 |   inner_ips:                        # 内部IP地址范围定义
 37 |     - 10.0.0.0-10.255.255.255
 38 |     - 172.16.0.0-172.31.255.255
 39 |     - 192.168.0.0-192.168.255.255
 40 |     - 169.254.0.0-169.254.255.255
 41 |     - 127.0.0.1-127.0.0.255
 42 | ```
 43 | 
 44 | ### 文件说明
 45 | 
 46 | 插件必须放置于应用路径下的 `plugins` 目录下，该目录下的 `__init__.py` 和 `plugin.py` 必须保留，并且不建议用户修改。
 47 | ```
 48 | src                      # 代码目录
 49 |   plugins                # 插件存放目录
 50 |     __init__.py          # 模块初始化脚本
 51 |     plugin.py            # 数据清洗插件基类，所有插件均需继承此类
 52 | ```
 53 | 
 54 | ### 插件的代码结构
 55 | 
 56 | ```
 57 | from plugin import Plugin
 58 | 
 59 | class FilterPlugin(Plugin):
 60 | 
 61 |     def __init__(self, rootdir, debug=False):
 62 |         """
 63 |         构造函数
 64 |         :param rootdir: 应用根目录
 65 |         :param debug: 调试开关
 66 |         """
 67 |         super().__init__(rootdir, debug)
 68 | 
 69 |         # 此处编写本插件的初始化代码
 70 |         # 注：如果插件没有额外的初始化操作，可以无需实现 __init__() 方法。
 71 |         ... ...
 72 |     
 73 |     def set_config(self, config):
 74 |         """
 75 |         配置初始化函数
 76 |         :param config: 插件配置
 77 |         """
 78 |         super().set_config(config)
 79 |         
 80 |         # 此处编写本插件的配置初始化代码
 81 |         ... ...
 82 |         
 83 |     def execute(self, msg):
 84 |         """
 85 |         插件入口函数，根据插件的功能对 msg 进行处理
 86 |         :param msg: 需要处理的消息(字典类型)
 87 |         """
 88 |         # 此处编写本插件的业务处理代码
 89 |         ... ...
 90 |         
 91 |         # 返回插件产生的新数据字典（不含原数据），没有产生数据则返回 None
 92 |         return new_msg
 93 | 
 94 | ```
 95 | 
 96 | 插件执行过程中，可以调用 `self.log(msg, level)` 来输出必要的信息，消息分为以下三类：
 97 | 
 98 | | 消息标识 |  输出前缀  | 说明
 99 | |----------|------------|--------------------------------|
100 | |  INFO    |  [!]       | 普通信息
101 | |  ERROR   |  [-]       | 错误信息
102 | |  DEBUG   |  [D]       | 调试信息，只有开启调试后才会输出
103 | 
104 | 
105 | 
106 | ### 插件测试
107 | 
108 | 开发者可以在插件脚本的 __main__ 代码块来编写插件的测试代码，实例如下：
109 | 
110 | ```
111 | if __name__ == '__main__':
112 | 
113 |     # 应用根目录（通常为plugins目录的上层目录）
114 |     rootdir = '/opt/filter/'
115 |     
116 |     # 是否开启调试模式
117 |     debug = True
118 |     
119 |     # 初始化插件
120 |     plugin = FilterPlugin(rootdir, debug)
121 |     
122 |     # 测试输入数据
123 |     msg = {
124 |         'pro': 'TCP',
125 |         'ip': '192.168.1.121',
126 |         'port': 80,
127 |         'data': 'AAAAAAAAAAAAAAAAAAAA'
128 |     }
129 |     
130 |     # 执行插件
131 |     new_msg = plugin.execute(msg)
132 |     
133 |     # 判断插件返回结果
134 |     if new_msg:
135 |         print(u'插件返回了数据！')
136 |     else:
137 |         print(u'插件没有返回数据！')
138 | ```
139 | 
140 | 然后，直接在 IDE（集成开发工具）或者是命令上下直接运行该插件脚本：
141 | 
142 | 在命令行下执行插件脚本的方法：
143 | ```
144 | $ cd plugins
145 | $ python3 xxxx.py
146 | ```


--------------------------------------------------------------------------------
/src/plugins/ip.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 2020-01-19
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import json
 12 | import socket
 13 | import time
 14 | import traceback
 15 | 
 16 | from plugin import Plugin, LogLevel
 17 | 
 18 | class FilterPlugin(Plugin):
 19 |     """
 20 |     IP 处理插件
 21 |     src: ip, port
 22 |     dst:
 23 |     - host: 服务
 24 |     - ip_num: IP对应的数值
 25 |     - inner: 内网标识，true为内网
 26 |     """
 27 |     inner_ip_ranges = [
 28 |         [167772160, 184549375], # '10.0.0.0-10.255.255.255',
 29 |         [2886729728, 2887778303], # '172.16.0.0-172.31.255.255',
 30 |         [3232235520, 3232301055], # '192.168.0.0-192.168.255.255',
 31 |         [2851995648, 2852061183], # '169.254.0.0-169.254.255.255',
 32 |         [2130706432, 2130706687] # '127.0.0.0-127.0.0.255'
 33 |     ]
 34 |     
 35 |     def set_config(self, config):
 36 |         """
 37 |         插件配置处理
 38 |         :param config: 原始配置信息
 39 |         """
 40 |         super().set_config(config)
 41 | 
 42 |         # 处理内网 IP 范围
 43 |         if self._config and 'inner_ips' in self._config and isinstance(self._config['inner_ips'], list):
 44 |             ip_ranges = self._config['inner_ips']
 45 |         
 46 |             self.inner_ip_ranges = []
 47 |             for _ in ip_ranges:
 48 |                 _ = _.split('-')
 49 |                 
 50 |                 try:
 51 |                     self.inner_ip_ranges.append([ self.ip2num(_[0]),  self.ip2num(_[-1])])
 52 |                 except:
 53 |                     continue
 54 |     
 55 |     def ip2num(self, ip):
 56 |         """
 57 |         IP字符串转长整数
 58 |         :param ip: IP字符串
 59 |         :return: IP数值
 60 |         """
 61 |         try:
 62 |             return int.from_bytes(socket.inet_aton(ip), 'big')
 63 |         except:
 64 |             return 0
 65 | 
 66 |     def execute(self, msg):
 67 |         """
 68 |         插件入口函数，根据插件的功能对 msg 进行处理
 69 |         :param msg: 需要处理的消息
 70 |         :return: 返回需要更新的消息字典（不含原始消息）
 71 |         """
 72 |         if 'ip' not in msg or not msg['ip']:
 73 |             self.log('ip field not found.')
 74 |             return None
 75 | 
 76 |         if 'port' not in msg or not msg['port']:
 77 |             self.log('port filed not found.')
 78 | 
 79 |         info = {
 80 |             'ip_num': 0, 'inner': False
 81 |         }
 82 |         
 83 |         if 'host' not in msg:
 84 |             info['host'] = '{}:{}'.format(msg['ip'], msg['port'])
 85 | 
 86 |         # 计算IP的数值
 87 |         ip_num = self.ip2num(msg['ip'])
 88 |         if ip_num <= 0:
 89 |             return info
 90 |         info['ip_num'] = ip_num
 91 | 
 92 |         # 判断内网IP
 93 |         for _ in self.inner_ip_ranges:
 94 |             if ip_num >= _[0] and ip_num <= _[1]:
 95 |                 inner = True
 96 |                 break
 97 |         
 98 |         info['inner'] = inner
 99 |         return info
100 | 
101 | if __name__ == '__main__':
102 |     plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True)
103 |     msg = {
104 |         #"ip": "202.106.0.20",
105 |         "ip": "192.168.1.20",
106 |         "port": 80,
107 |         "pro": "TCP",
108 |         "host": "111.206.63.16:80",
109 |         # Example: 554 SMTP synchronization error\r\n
110 |         #"data": "35353420534d54502073796e6368726f6e697a6174696f6e206572726f720d0a",
111 |         # Example: >INFO:OpenVPN Management Interface Version 1.0.1 -- type 'help' for more info\r\n>
112 |         #"data": "3e494e464f3a4f70656e56504e204d616e6167656d656e7420496e746572666163652056657273696f6e20312e302e31202d2d2074797065202768656c702720666f72206d6f726520696e666f0d0a3e",
113 |         # Example: get_info: plugins\nRPRT 0\nasfdsafasfsafas
114 |         "data": "6765745f696e666f3a20706c7567696e730a5250525420300a617366647361666173667361666173",
115 |         "tag": "sensor-ens160"
116 |     }
117 |     msg_update = {}
118 |     for i in sorted(plugins.keys()):
119 |         (pluginName, plugin) = plugins[i]
120 |         if pluginName == 'ip':
121 |             ctime = time.time()
122 |             ret = plugin.execute(msg)
123 |             etime = time.time()
124 |             print('Eclipse time: {}'.format(etime-ctime))
125 |             print(ret)
126 |             break


--------------------------------------------------------------------------------
/src/rules/tcp_asset_types.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "Network Device": [
  3 |         "router",
  4 |         "switch",
  5 |         "telecom",
  6 |         "hub",
  7 |         "bridge",
  8 |         "gateway",
  9 |         "modem",
 10 |         "wireless",
 11 |         "wifi",
 12 |         "wlan",
 13 |         "wi-fi",
 14 |         "network",
 15 |         "vpn",
 16 |         "openvpn",
 17 |         "dsl",
 18 |         "adsl",
 19 |         "gsm",
 20 |         "telnet",
 21 |         "winbox"
 22 |     ],
 23 |     "Security Device": [
 24 |         "security-misc",
 25 |         "firewall",
 26 |         "access",
 27 |         "secure",
 28 |         "anti-virus",
 29 |         "anti-spam",
 30 |         "nessus"
 31 |     ],
 32 |     "Storage Device": [
 33 |         "storage-misc",
 34 |         "raid",
 35 |         "storage"
 36 |     ],
 37 |     "IoT Device": [
 38 |         "webcam",
 39 |         "pda",
 40 |         "camera",
 41 |         "microcontroller"
 42 |     ],
 43 |     "Printer": [
 44 |         "printer"
 45 |     ],
 46 |     "Control System": [
 47 |         "power-device",
 48 |         "power-misc",
 49 |         "modbus"
 50 |     ],
 51 |     "OS": [],
 52 |     "Mail Server": [
 53 |         "smtp",
 54 |         "smtpd",
 55 |         "smtps",
 56 |         "imap",
 57 |         "imapd",
 58 |         "imaps",
 59 |         "pop3",
 60 |         "pop3d",
 61 |         "pop3s",
 62 |         "lmtp",
 63 |         "lmtpd",
 64 |         "webmail"
 65 |     ],
 66 |     "Database": [
 67 |         "rdbms",
 68 |         "mysql",
 69 |         "oracle",
 70 |         "ms-sql-m",
 71 |         "ms-sql-s",
 72 |         "db2",
 73 |         "mongodb",
 74 |         "influxdb",
 75 |         "couchdb",
 76 |         "rethinkdb",
 77 |         "arangodb",
 78 |         "monetdb",
 79 |         "rethinkdb",
 80 |         "hbase",
 81 |         "redis",
 82 |         "memcached",
 83 |         "database"
 84 |     ],
 85 |     "Web Server": [
 86 |         "http",
 87 |         "https",
 88 |         "httpd",
 89 |         "web",
 90 |         "iis",
 91 |         "nginx",
 92 |         "tengine"
 93 |     ],
 94 |     "FTP Server": [
 95 |         "ftp",
 96 |         "serv-u",
 97 |         "filezilla"
 98 |     ],
 99 |     "DNS Server": [
100 |         "dns",
101 |         "bind",
102 |         "nameserver"
103 |     ],
104 |     "Media Server": [
105 |         "pbx",
106 |         "voip",
107 |         "radio",
108 |         "video",
109 |         "dvr",
110 |         "media",
111 |         "sip"
112 |     ],
113 |     "Font Server": [
114 |         "font"
115 |     ],
116 |     "Time Server": [
117 |         "time"
118 |     ],
119 |     "SSH Server": [
120 |         "ssh",
121 |         "sshd"
122 |     ],
123 |     "Remote Admin": [
124 |         "vnc",
125 |         "vnc-http",
126 |         "ms-wbt-server",
127 |         "ms-wbt-server-proxy",
128 |         "radmin",
129 |         "radmind",
130 |         "x11",
131 |         "webmin"
132 |     ],
133 |     "Application Middleware": [
134 |         "weblogic",
135 |         "websphere",
136 |         "tomcat",
137 |         "jboss",
138 |         "jetty"
139 |     ],
140 |     "Office Software": [],
141 |     "Digital Currency": [
142 |         "currency",
143 |         "bitcoin"
144 |     ],
145 |     "Container": [
146 |         "container",
147 |         "vmware",
148 |         "esxi",
149 |         "vmware-aam",
150 |         "vmware-auth",
151 |         "vmware-print",
152 |         "docker"
153 |     ],
154 |     "Cloud Platform": [
155 |         "cloud",
156 |         "zoomkeeper",
157 |         "webcache"
158 |     ],
159 |     "Load Balancer": [
160 |         "load balancer",
161 |         "wap"
162 |     ],
163 |     "Proxy Server": [
164 |         "socks4",
165 |         "socks5",
166 |         "myproxy",
167 |         "xtunnels"
168 |     ],
169 |     "Securities System": [
170 |         "securities",
171 |         "zqyh",
172 |         "zqzx",
173 |         "zqhq",
174 |         "zqkzhq",
175 |         "zqjy",
176 |         "zqsj"
177 |     ],
178 |     "Knowledge Base System": [
179 |         "bbs",
180 |         "wiki",
181 |         "message",
182 |         "guestbook"
183 |     ],
184 |     "Payment System": [
185 |         "payment"
186 |     ],
187 |     "Terminal Server": [
188 |         "nagios",
189 |         "zabbix",
190 |         "citrix-ica",
191 |         "citrix-ima"
192 |     ],
193 |     "Darknet": [
194 |         "tor",
195 |         "tor-control",
196 |         "tor-info",
197 |         "tor-orport",
198 |         "tor-socks"
199 |     ],
200 |     "Terminal": [
201 |         "phone",
202 |         "game",
203 |         "pc",
204 |         "desktop"
205 |     ],
206 |     "Data Analysis": [
207 |         "spark",
208 |         "splunk"
209 |     ]
210 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Passets 被动资产识别框架数据清洗模块
  2 | 
  3 | ## 简介
  4 | 
  5 | 本模块主要用于对收集的被动资产原始数据进行二次加工，Elasticsearch 中经过清洗的合法数据（至少包含ip和port字段）会添加 state 字段。state=0表示正在清洗，state=1表示已完成清洗。所有的清洗操作都采用插件的方式进行，目前已支持以下插件。
  6 | 
  7 | ### Wappalyzer 插件
  8 | 
  9 | 基于数据中的 URL、HTTP 响应头、HTTP响应正文来识别站点指纹信息。
 10 | 
 11 | 指纹库及识别引擎基于 [Wappalyzer](https://github.com/AliasIO/Wappalyzer/) 修改。
 12 | 
 13 | 配置参数：
 14 | 
 15 | | 插件参数   | 用途说明
 16 | |------------|------------------------------------|
 17 | | enable     | 插件开关，true-启用，false-不启用
 18 | | index      | 在所有插件中的处理顺序，值越小越优先处理
 19 | 
 20 | 相关配置文件（指纹规则）：
 21 | 
 22 | ```
 23 | # rules/apps.json
 24 | ```
 25 | 
 26 | ### NMAP 插件
 27 | 
 28 | 基于数据中的 TCP 响应报文来识别目标服务的指纹信息。
 29 | 
 30 | 指纹库基于 [NMAP](https://github.com/nmap/nmap/) 项目中的 `nmap-service-probes` 指纹库。
 31 | 
 32 | 配置参数：
 33 | 
 34 | | 插件参数   | 用途说明
 35 | |------------|------------------------------------|
 36 | | enable     | 插件开关，true-启用，false-不启用
 37 | | index      | 在所有插件中的处理顺序，值越小越优先处理
 38 | | ignore_rules | 忽略的 TCP 指纹规则列表，用 `nmap-service-probes` 中的 `m` 参数值表示，必须完全匹配。
 39 | | ssl_portmap  | 指定 ssl 协议数据的端口对应关系列表，例如：`443:https` 表示检测到 ssl 服务时，如果端口为 443 则认定其为 https 服务，过滤后的用 https 覆盖 ssl。
 40 | 
 41 | 相关配置文件（指纹规则）：
 42 | 
 43 | ```
 44 | # rules/nmap-service-probes
 45 | ```
 46 | 
 47 | ### Assets 分类插件
 48 | 
 49 | 基于指纹识别的结果对目标进行资产分类。
 50 | 
 51 | 配置参数：
 52 | 
 53 | | 插件参数   | 用途说明
 54 | |------------|------------------------------------|
 55 | | enable     | 插件开关，true-启用，false-不启用
 56 | | index      | 在所有插件中的处理顺序，值越小越优先处理，此插件必须在 Wapplayzer、Nmap 插件的后面执行。
 57 | | ignore_vendors | 忽略的厂商名称（小写），用于排除一些不想要的厂商名称。
 58 | 
 59 | 相关配置文件：
 60 | 
 61 | #### 资产类型配置文件
 62 | 
 63 | ```
 64 | # rules/http_asset_types.json
 65 | {
 66 |     "<资产类型名>":[
 67 |         <分类编号>, ...
 68 |     ]
 69 | }
 70 | ```
 71 | 用于识别 `pro` 为 `HTTP` 类的流量资产类型。
 72 | 
 73 | 工作原理：根据指纹分类来确定资产类型。
 74 | 
 75 | ```
 76 | # rules/tcp_asset_types.json
 77 | {
 78 |     "<资产类型名>":[
 79 |         "<关键词>", ...
 80 |     ]
 81 | }
 82 | ```
 83 | 用于识别 `pro` 为 `TCP` 类的流量资产类型。
 84 | 
 85 | 工作原理：根据指纹设备类型、指纹名称、指纹描述、服务名中的单词来确定设备类型。关键词不区分大小写。
 86 | 
 87 | #### 设备类型配置文件
 88 | 
 89 | ```
 90 | # rules/http_device_types.json
 91 | {
 92 |     "<设备类型名>":[
 93 |         "<关键词>", ...
 94 |     ]
 95 | }
 96 | ```
 97 | 用于识别 `pro` 为 `HTTP` 类的流量设备类型。
 98 | 
 99 | 工作原理：根据指纹名称中的关键词来确定设备类型。
100 | 
101 | ```
102 | # rules/tcp_device_types.json
103 | {
104 |     "<设备类型名>":[
105 |         "<关键词>", ...
106 |     ]
107 | }
108 | ```
109 | 
110 | 用于识别 `pro` 为 `TCP` 类的流量设备类型。
111 | 
112 | 工作原理：根据指纹设备类型以及指纹名称、指纹描述中的单词来确定设备类型。关键词不区分大小写。
113 | 
114 | #### 设备厂商配置文件
115 | 
116 | ```
117 | # rules/vendors.json
118 | [
119 |     "<关键词>", ...
120 | ]
121 | ```
122 | 
123 | 厂商信息获取有两种方式：
124 | 
125 | - 对于 `pro` 为 `HTTP` 的流量，可以通过指纹的 `website` 属性提取域名关键词获得，也可以根据指纹名称中的关键词获得；
126 | - 对于 `pro` 为 `TCP` 的流量，仅通过指纹中的 `name`、`info` 和 `device`来获取。关键词区分大小写。
127 | 
128 | ## 运行环境
129 | 
130 | - Python 3.x
131 | - Nodejs 8.x 及以上
132 | 
133 | ## 文件说明
134 | 
135 | ```
136 | Dockerfile                 # 容器环境配置文件
137 | docker-compose.yml         # 容器启动配置文件
138 | src                        # 核心代码文件
139 |   config/plugin.yml        # 数据清洗插件配置文件
140 |   plugins                  # 数据清洗插件存放路径
141 |     plugin.py              # 数据清洗插件基类，所有插件均需继承此类
142 |     ... ...
143 |   rules                    # 插件相关配置文件存放路径
144 |     ... ...
145 |   main.py                  # 主程序
146 |   requirements.txt         # 程序依赖库清单
147 | ```
148 | 
149 | [最新Web应用指纹库下载](https://github.com/AliasIO/Wappalyzer/raw/master/src/apps.json)
150 | 
151 | [最新端口服务指纹库下载](https://github.com/nmap/nmap/raw/master/nmap-service-probes)
152 | 
153 | ## 清洗程序执行说明
154 |  
155 | 清洗程序是一个基于 Python3 开发的脚本应用程序。
156 | 
157 | 命令行参数如下：
158 | ```
159 | 用法: python3 main.py [OPTIONS] arg
160 | 
161 | OPTIONS:
162 |   --version                             输出版本信息
163 |   -h,           --help                  显示命令行帮助信息
164 |   -H HOST,      --host=HOST             设置 Elasticsearch 服务器地址/地址:端口
165 |   -i INDEX,     --index=INDEX           设置 ES 索引名，默认为logstash-passets
166 |   -r RANGE,     --range=RANGE           设置 ES 搜索的时间偏移量，单位为分钟，默认 15 分钟
167 |   -t THREADS,   --threads=THREADS       设置并发线程数量，默认为 10 个线程
168 |   -b BATCH_SIZE --batch-size=BATCH_SIZE 每线程单批处理的数据数量，默认为 20 条。
169 |   -c CACHE_SIZE --cache-size=CACHE_SIZE 设置处理缓存的大小
170 |   -T CACHE_TTL  --cache-ttl=CACHE_TTL   设置处理缓存的过期时间，单位为秒，默认 120 秒
171 |   -m MODE       --mode=MODE             设置工作模式，默认为 1（主），可选值有 0（从）。
172 |   -d DEBUG, --debug=DEBUG               调试信息开关，0-关闭，1-开启
173 | ```
174 | 
175 | **使用示例：**
176 | 
177 | ```
178 | # 并发10个线程处理 192.168.1.2:9200 中 logstash-passets* 索引下的数据，执行过程输出调试信息
179 | 
180 | # 主节点模式
181 | python3 main.py -H 192.168.1.2:9200 -i logstash-passets -r 5 -t 10 -m 1 -d 1
182 | 
183 | # 从节点模式
184 | python3 main.py -H 192.168.1.2:9200 -i logstash-passets -r 5 -t 10 -m 0 -d 1
185 | ```
186 | 
187 | 在设备性能允许的情况下尽量选用单节点多线程模式，综合对比来看单节点比多节点性能上更优（节点数*线程数）。多节点部署时只能、并且必须有一个主节点。
188 | 
189 | ## 清洗程序配置说明
190 | 
191 | 配置文件路径为 `config/plugin.yml`。
192 | 
193 | **配置示例：**
194 | ```
195 | wappalyzer:
196 |   enable: true
197 |   index: 1
198 | 
199 | nmap:
200 |   enable: true
201 |   index: 2
202 |   ignore_rules:     # 不处理的规则列表（列表中的规则将不会处理）
203 |     - ^OK$
204 |   ssl_portmap:      # ssl 协议端口映射表
205 |     - 443:https
206 | 
207 | asset:
208 |   enable: true
209 |   index: 3
210 |   ignore_vendors:   # 要忽略的厂商名称（小写）
211 |     - asp
212 | ```
213 | 
214 | 
215 | ## 容器化部署说明
216 | 
217 | ### 容器构建
218 | 
219 | 配置文件：
220 | [Dockerfile](./Dockerfile)
221 | 
222 | [docker-compose.yml](./docker-compose.yml)
223 | 
224 | ```
225 | # 使用 docker 命令构建
226 | docker build -t dsolab/passets-filter:<ver> .
227 | 
228 | # 使用 docker-compose 命令构建
229 | docker-compose build
230 | ```
231 | 
232 | ### 容器启动
233 | 
234 | > 使用 docker 命令启动：
235 | 
236 | ```
237 | # 基本命令：
238 | docker run -it dsolab/passets-filter:<ver>
239 | 
240 | # 使用新的配置文件、指纹规则启动：
241 | docker run -it passets-filter:<ver> -v $(PWD)/src/config/plugin.yml:/opt/filter/config/plugin.yml -v $(PWD)/src/rules/apps.json:/opt/filter/rules/apps.json -v $(PWD)/src/rules/nmap-service-probes:/opt/filter/rules/nmap-service-probes -e ELASTICSEARCH_URL=<elasticsearch>:9200
242 | # 注：其它参数均使用默认设置
243 | ```
244 | 
245 | > 使用 docker-compose 启动：
246 | 
247 | ```
248 | docker-compose up -d
249 | ```
250 | 
251 | ## 自定义数据清洗插件
252 | 
253 | 详见 [插件开发说明](PLUGIN_DEVELOP.md) 。


--------------------------------------------------------------------------------
/src/plugins/plugin.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 2020-01-19
  7 | '''
  8 | 
  9 | import importlib
 10 | import os
 11 | import sys
 12 | import traceback
 13 | from datetime import datetime
 14 | 
 15 | 
 16 | class LogLevel:
 17 |     """日志级别"""
 18 |     INFO = 0
 19 |     ERROR = 1
 20 |     WARN = 2
 21 |     NOTICE = 3
 22 |     DEBUG = 4
 23 | 
 24 | class Plugin(object):
 25 |     """
 26 |     过滤插件基类
 27 |     """
 28 | 
 29 |     __pluginPath = 'plugins'
 30 |     # 插件配置，参见 $ROOT/config/plugin.conf
 31 |     # 对应每个插件的配置以文件名作为节点名（不含.py)
 32 |     _config = None
 33 |     
 34 |     @staticmethod
 35 |     def loadPluginsConfig(rootdir):
 36 |         """
 37 |         载入所有插件配置信息
 38 |         :param rootdir: 应用根目录
 39 |         :return: 插件配置字典
 40 |         """
 41 |         fp = None
 42 |         try:
 43 |             fp = open(os.path.join(rootdir, 'config', 'plugin.yml'), encoding='utf-8')
 44 | 
 45 |             import yaml
 46 |             config = yaml.load(fp, Loader=yaml.SafeLoader)
 47 |             del(yaml)
 48 |             return config
 49 |         except Exception as e:
 50 |             print('[-] ERROR: ' + str(e))
 51 |             print(traceback.format_exc())
 52 |         finally:
 53 |             if fp: fp.close()
 54 |         return {}
 55 | 
 56 |     @staticmethod
 57 |     def loadPlugins(rootdir, debug=False):
 58 |         """
 59 |         载入所有可用插件
 60 |         :param rootdir: 应用根路径
 61 |         :param debug: 调试开关
 62 |         """
 63 |         pluginPath = os.path.join(rootdir, Plugin.__pluginPath)
 64 |         if not os.path.isdir(pluginPath):
 65 |             print('[-] {} is not a directory'.format(pluginPath))
 66 |             return []
 67 | 
 68 |         # 将插件路径加入环境变量
 69 |         sys.path.insert(0, pluginPath)
 70 |         # 读取插件配置
 71 |         pluginConfigs = Plugin.loadPluginsConfig(rootdir)
 72 |         
 73 |         plugins = {}
 74 |         for pluginName in pluginConfigs:
 75 |             # 插件是否启用
 76 |             if 'enable' not in pluginConfigs[pluginName] or not pluginConfigs[pluginName].pop('enable'):
 77 |                 continue
 78 |             
 79 |             # 插件的处理顺序
 80 |             index = -1
 81 |             if 'index' in pluginConfigs[pluginName]: index = pluginConfigs[pluginName].pop('index')
 82 | 
 83 |             # 插件文件是否存在
 84 |             pluginFile = os.path.join(pluginPath, '{}.py'.format(pluginName))
 85 |             if not os.path.isfile(pluginFile):
 86 |                 print('[!] Failed to load plugin {}. file not found.'.format(pluginName))
 87 |                 continue
 88 | 
 89 |             # 插件加载
 90 |             try:
 91 |                 plugin_spec = importlib.util.spec_from_file_location(pluginName, pluginFile)
 92 |                 if plugin_spec:
 93 |                     plugin = importlib.util.module_from_spec(plugin_spec)
 94 |                     plugin_spec.loader.exec_module(plugin)
 95 |                     if hasattr(plugin, 'FilterPlugin'):
 96 |                         if hasattr(plugin.FilterPlugin, 'execute'):
 97 |                             plugins[index] = (pluginName, plugin.FilterPlugin(rootdir, debug))
 98 |                             if pluginConfigs[pluginName]:
 99 |                                 plugins[index][1].set_config(pluginConfigs[pluginName])
100 |                         else:
101 |                             print('[!] `execute()` method not found in Plugin {}.'.format(pluginName))
102 |                     else:
103 |                         print('[!] Invalid plugin {}'.format(pluginName))
104 |             except Exception as e:
105 |                 print('[-] ERROR: {}'.format(str(e)))
106 |                 print(traceback.format_exc())
107 |         
108 |         return plugins
109 | 
110 |     def __init__(self, rootdir, debug=2, logger=None):
111 |         """
112 |         初始化
113 |         :param rootdir: 应用根目录
114 |         :param debug: 调式开关
115 |         """
116 |         self._rootdir = rootdir
117 |         self._debug = debug
118 |         self._logger = logger
119 | 
120 |     def remain_cmd_len(self, cmd):
121 |         """
122 |         获取剩余的命令行长度
123 |         :param cmd: 命令行片段
124 |         :return: 剩余可用命令行字符串长度
125 |         """
126 |         if os.name == 'nt': # Windows
127 |             return 4096 - len(cmd)
128 |         else: # CentOS/Ubuntu
129 |             return 2097136 - len(cmd)
130 | 
131 |     def cmd_encode(self, str):
132 |         """
133 |         转义命令行的引号，防止命令注入
134 |         :param str: 要编码的字符串
135 |         :return: 编码后的字符串
136 |         """
137 |         return str.replace('\\', '\\\\').replace('"', '\"')
138 | 
139 |     def log(self, msg, level=LogLevel.ERROR):
140 |         """
141 |         日志输出
142 |         :param msg: 日志消息'
143 |         :param level: 日志等级，分为 INFO、ERROR和DEBUG
144 |         """
145 |         if level > self._debug: return
146 | 
147 |         if self._logger:
148 |             if level == LogLevel.ERROR:
149 |                 self._logger.error(str(msg))
150 |             elif level == LogLevel.WARN:
151 |                 self._logger.warn(str(msg))
152 |             elif level == LogLevel.INFO:
153 |                 self._logger.info(str(msg))
154 |             else:
155 |                 self._logger.debug(str(msg))
156 |         else:
157 |             timeStr = datetime.now().strftime('%H:%M:%S.%f')
158 |             if level == LogLevel.ERROR:
159 |                 print('[E][{}] {}'.format(timeStr, str(msg)))
160 |             elif level == LogLevel.WARN:
161 |                 print('[W][{}] {}'.format(timeStr, str(msg)))
162 |             elif level == LogLevel.INFO:
163 |                 print('[I][{}] {}'.format(timeStr, str(msg)))
164 |             else:
165 |                 print('[D][{}] {}'.format(timeStr, str(msg)))
166 |     
167 |     def set_config(self, config):
168 |         """
169 |         设置插件配置，配置为字典形式，例如：{ "参数名": 参数值 }
170 |         :param config: 参数字典
171 |         """
172 |         self._config = config
173 | 
174 |     def execute(self, msg, workdir, debug=False):
175 |         """
176 |         插件入口函数，根据插件的功能对 msg 进行处理
177 |         :param msg: 需要处理的消息
178 |         :param workdir: 应用主目录路径
179 |         :param debug: 是否开启调试模式
180 |         :return: 返回需要更新的消息字典（不含原始消息）
181 |         """
182 |         print('Please implement the execute() function for plugin {}.'.format(self.__class__.__name__))
183 |         return None
184 | 


--------------------------------------------------------------------------------
/src/plugins/asset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 2020-01-19
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import re
 12 | import html
 13 | import json
 14 | import time
 15 | import base64
 16 | import hashlib
 17 | import copy
 18 | import traceback
 19 | import threading
 20 | import traceback
 21 | 
 22 | from urllib import parse
 23 | from plugin import Plugin, LogLevel
 24 | 
 25 | class FilterPlugin(Plugin):
 26 |     """
 27 |     资产分类插件
 28 |     src: data
 29 |     dst:
 30 |     - asset_type: 资产类型，字符串数组类型
 31 |     - device: 设备类型，字符串数组类型
 32 |     - vendor: 厂商，字符串数组
 33 |     - service: 服务名，字符串数组
 34 |     - info: 设备信息，字符串数组
 35 |     """
 36 |     ignore_vendors = []
 37 |     
 38 |     def __init__(self, rootdir, debug=False, logger=None):
 39 |         """
 40 |         构造函数
 41 |         :param rootdir: 工作目录
 42 |         :param debug: 调式信息输出开关
 43 |         :param logger: 日志处理对象
 44 |         """
 45 |         super().__init__(rootdir, debug, logger)
 46 | 
 47 |         # 载入资产类型、设备类型、厂商映射关系
 48 |         tcp_asset_type_file = os.path.join(rootdir, 'rules', 'tcp_asset_types.json')
 49 |         if not os.path.exists(tcp_asset_type_file) or not self.loadTcpAssetTypes(tcp_asset_type_file):
 50 |             self.log('Load tcp asset type file failed.', LogLevel.ERROR)
 51 |             self.tcp_asset_types = {}
 52 | 
 53 |         tcp_device_type_file = os.path.join(rootdir, 'rules', 'tcp_device_types.json')
 54 |         if not os.path.exists(tcp_device_type_file) or not self.loadTcpDeviceTypes(tcp_device_type_file):
 55 |             self.log('Load tcp device type file failed.', LogLevel.ERROR)
 56 |             self.tcp_device_types = {}
 57 | 
 58 |         http_asset_type_file = os.path.join(rootdir, 'rules', 'http_asset_types.json')
 59 |         if not os.path.exists(http_asset_type_file) or not self.loadHttpAssetTypes(http_asset_type_file):
 60 |             self.log('Load http asset type file failed.', LogLevel.ERROR)
 61 |             self.http_asset_types = {}
 62 | 
 63 |         http_device_type_file = os.path.join(rootdir, 'rules', 'http_device_types.json')
 64 |         if not os.path.exists(http_device_type_file) or not self.loadHttpDeviceTypes(http_device_type_file):
 65 |             self.log('Load http device type file failed.', LogLevel.ERROR)
 66 |             self.http_device_types = {}
 67 | 
 68 |         vendor_file = os.path.join(rootdir, 'rules', 'vendors.json')
 69 |         if not os.path.exists(vendor_file) or not self.loadVendors(vendor_file):
 70 |             self.log('Load vendor file failed.', LogLevel.ERROR)
 71 |             self.vendors = []
 72 | 
 73 |     def set_config(self, config):
 74 |         """
 75 |         配置初始化函数
 76 |         :param config: 插件配置
 77 |         """
 78 |         super().set_config(config)
 79 | 
 80 |         # 来自插件配置的忽略厂商关键词列表
 81 |         self.ignore_vendors = []
 82 |         if self._config:
 83 |             if 'ignore_vendors' in self._config and isinstance(self._config['ignore_vendors'], list):
 84 |                 self.ignore_vendors = self._config['ignore_vendors']
 85 |     
 86 |     def loadTcpAssetTypes(self, rule_file):
 87 |         """
 88 |         根据文件名读取TCP资产类型映射关系
 89 |         :param rule_file: 资产类型映射关系表
 90 |         :return True-成功，False-失败
 91 |         """
 92 |         self.tcp_asset_types = {}
 93 |         fp = None
 94 |         try:
 95 |             fp = open(rule_file, encoding='utf-8')
 96 |             data = json.loads(fp.read())
 97 |             for key in data:
 98 |                 if not isinstance(data[key], list):
 99 |                     continue
100 |                 
101 |                 for _ in data[key]:
102 |                     self.tcp_asset_types[_] = key
103 |             return True
104 |         except Exception as e:
105 |             self.log(str(e), LogLevel.ERROR)
106 |             self.log(traceback.format_exc(), LogLevel.ERROR)
107 |             return False
108 |         finally:
109 |             if fp: fp.close()
110 | 
111 |     def loadTcpDeviceTypes(self, rule_file):
112 |         """
113 |         根据文件名读取TCP设备类型映射关系
114 |         :param rule_file: 资产类型映射关系表
115 |         :return True-成功，False-失败
116 |         """
117 |         self.tcp_device_types = {}
118 |         fp = None
119 |         try:
120 |             fp = open(rule_file, encoding='utf-8')
121 |             data = json.loads(fp.read())
122 |             for key in data:
123 |                 if not isinstance(data[key], list):
124 |                     continue
125 |                 
126 |                 for _ in data[key]:
127 |                     self.tcp_device_types[_] = key
128 |             return True
129 |         except Exception as e:
130 |             self.log(str(e), LogLevel.ERROR)
131 |             self.log(traceback.format_exc(), LogLevel.ERROR)
132 |             return False
133 |         finally:
134 |             if fp: fp.close()
135 | 
136 |     def loadHttpAssetTypes(self, rule_file):
137 |         """
138 |         根据文件名读取HTTP资产类型映射关系
139 |         :param rule_file: 资产类型映射关系表
140 |         :return True-成功，False-失败
141 |         """
142 |         self.http_asset_types = {}
143 |         fp = None
144 |         try:
145 |             fp = open(rule_file, encoding='utf-8')
146 |             data = json.loads(fp.read())
147 |             for key in data:
148 |                 if not isinstance(data[key], list):
149 |                     continue
150 |                 
151 |                 for _ in data[key]:
152 |                     self.http_asset_types[_] = key
153 |             return True
154 |         except Exception as e:
155 |             self.log(traceback.format_exc(), LogLevel.ERROR)
156 |             self.log(str(e), LogLevel.ERROR)
157 |             return False
158 |         finally:
159 |             if fp: fp.close()
160 | 
161 |     def loadHttpDeviceTypes(self, rule_file):
162 |         """
163 |         根据文件名读取HTTP设备类型映射关系
164 |         :param rule_file: 资产类型映射关系表
165 |         :return True-成功，False-失败
166 |         """
167 |         self.http_device_types = {}
168 |         fp = None
169 |         try:
170 |             fp = open(rule_file, encoding='utf-8')
171 |             data = json.loads(fp.read())
172 |             for key in data:
173 |                 if not isinstance(data[key], list):
174 |                     continue
175 |                 
176 |                 for _ in data[key]:
177 |                     self.http_device_types[_] = key
178 |             return True
179 |         except Exception as e:
180 |             self.log(str(e), LogLevel.ERROR)
181 |             self.log(traceback.format_exc(), LogLevel.ERROR)
182 |             return False
183 |         finally:
184 |             if fp: fp.close()
185 | 
186 |     def loadVendors(self, rule_file):
187 |         """
188 |         根据文件名读取设备厂商列表
189 |         :param rule_file: 厂商列表文件
190 |         :return True-成功，False-失败
191 |         """
192 |         fp = None
193 |         try:
194 |             fp = open(rule_file, encoding='utf-8')
195 |             self.vendors = json.loads(fp.read())
196 |             return True
197 |         except Exception as e:
198 |             self.log(str(e), LogLevel.ERROR)
199 |             self.log(traceback.format_exc(), LogLevel.ERROR)
200 |             return False
201 |         finally:
202 |             if fp: fp.close()
203 | 
204 |     def parseTcpAssetType(self, name, info, device, service):
205 |         """
206 |         从指纹名称、信息、设备类型中识别资产类型
207 |         """
208 |         result = []
209 |         parts = "{} {} {} {}".format(name, info, device,  service).lower().split(' ')
210 |         for _ in  parts:
211 |             if not _: continue
212 |             for key in self.tcp_asset_types:
213 |                 if key == _:
214 |                     result.append(self.tcp_asset_types[key])
215 | 
216 |         return result
217 | 
218 |     def parseTcpDeviceType(self, name, info, device):
219 |         """
220 |         从指纹名称、信息、设备信息中提取设备类型
221 |         """
222 |         # 优先根据既有设备类型来识别类型
223 |         result = []
224 |         if device:
225 |             device = device.lower()
226 |             for key in self.tcp_device_types:
227 |                 if key in device:
228 |                     result.append(self.tcp_device_types[key])
229 |         
230 |         parts = "{} {}".format(name, info).lower().split(' ')
231 |         for _ in  parts:
232 |             if not _: continue
233 |             for key in self.tcp_device_types:
234 |                 if key == _:
235 |                     result.append(self.tcp_device_types[key])
236 | 
237 |         return result
238 | 
239 |     def parseTcpVendor(self, name, info):
240 |         """
241 |         从指纹名称、信息中识别厂商
242 |         """
243 |         data = "{} {}".format(name, info)
244 |         for _ in self.vendors:
245 |             if _ in data and _.lower() not in self.ignore_vendors:
246 |                 return _
247 |         
248 |         return ''
249 | 
250 |     def parseHttpAssetType(self, categorie_ids):
251 |         """
252 |         根据分类 ID 确定资产类型
253 |         """
254 |         result = []
255 |         for _ in categorie_ids:
256 |             if 'id' in _ and _['id'] in self.http_asset_types:
257 |                 result.append(self.http_asset_types[_['id']])
258 |         
259 |         return result
260 | 
261 |     def parseHttpDeviceType(self, appName):
262 |         """
263 |         根据指纹名称关键词确定设备类型
264 |         """
265 |         result = []
266 |         appName = appName.lower()
267 |         for _ in self.http_device_types:
268 |             if _ in appName:
269 |                 result.append(self.http_device_types[_])
270 |         
271 |         return result
272 | 
273 |     def parseHttpVendor(self, url):
274 |         """
275 |         从URL中解析提取厂商名称
276 |         """
277 |         url = url.lower()
278 |         if url.find('http://') != 0 and url.find('https://') != 0:
279 |             return ''
280 |         
281 |         try:
282 |             parts = parse.urlsplit(url).netloc.split(':')[0].split('.')[:-1]
283 |             if parts[0] in ['www']:
284 |                 parts = parts[1:]
285 |             if parts[-1] in ['org', 'com', 'edu', 'gov', 'biz']:
286 |                 parts = parts[:-1]
287 |             
288 |             vendor = ''
289 |             if len(parts[-1]) < 3 and len(parts) > 1:
290 |                 vendor = parts[-2].upper()
291 |             else:
292 |                 if len(parts[-1]) < 4:
293 |                     vendor = parts[-1].upper()
294 |                 else:
295 |                     vendor = parts[-1].capitalize()
296 |             
297 |             if vendor.lower() not in self.ignore_vendors:
298 |                 return vendor
299 |             
300 |             return ''
301 |         except:
302 |             return ''
303 | 
304 |     def analyzeTcp(self, apps):
305 |         """
306 |         分析HTTP指纹获取资产类型
307 |         :param apps: 指纹列表
308 |         :return: 资产相关信息，例如：{ 'asset_type': ["Network Device"], 'vendor': ["Huawei"], 'device': ["Router"], 'service': ["telnet"], 'info': ["Huawei AR5102"] }
309 |         """
310 |         info = { 'asset_type': [], 'vendor': [], 'device': [], 'service': [], 'info': [] }
311 |         for i in range(len(apps)):
312 |             app = apps[i]
313 |             devices = self.parseTcpDeviceType(app['name'], app['info'], app['device'])
314 |             if devices:
315 |                 for _ in devices:
316 |                     if _ not in info['device']:
317 |                         info['device'].append(_)
318 | 
319 |             asset_types = self.parseTcpAssetType(app['name'], app['info'], ' '.join(devices), app['service'])
320 |             if asset_types:
321 |                 for _ in asset_types:
322 |                     if _ not in info['asset_type']:
323 |                         info['asset_type'].append(_)
324 | 
325 |             vendor = self.parseTcpVendor(app['name'], app['info'])
326 |             if vendor and vendor not in info['vendor']:
327 |                 info['vendor'].append(vendor)
328 | 
329 |             if app['service'] and app['service'] not in info['service']:
330 |                 info['service'].append(app['service'])
331 |             
332 |             # 设备信息不存在则用 os 属性代替
333 |             if app['info']:
334 |                 if app['info'] not in info['info']:
335 |                     info['info'].append(app['info'])
336 |             else:
337 |                 if app['os'] and app['os'] not in info['info']:
338 |                     info['info'].append(app['os'])
339 | 
340 |             del(apps[i]['device'], apps[i]['service'], apps[i]['info'])
341 |         
342 |         info['apps'] = apps
343 | 
344 |         return info
345 | 
346 |     def analyzeHttp(self, apps):
347 |         """
348 |         分析HTTP指纹获取资产类型
349 |         :param apps: 指纹列表
350 |         :return: 资产相关信息，例如：{ 'asset_type': ["Web Server"], 'vendor': ["Apache"], 'device': [], 'service': ["http"], 'info': ["Apache tomcat 9.0.28"] }
351 |         """
352 |         info = { 'asset_type': [], 'vendor': [], 'device': [], 'service': ['http'], 'info': [] }
353 |         for i in range(len(apps)):
354 |             app = apps[i]
355 |             # 识别设备产品型号/版本（用3-5层的指纹名称填充，使用 lastLayer 来控制只取一个指纹的父级）
356 |             # print("Level: {}, appName: {}, implies: {}".format(self._apps[appName]['layer'], appName, self._apps[appName]['implies']))
357 |             if app['layer'] in [2, 3, 4, 5]:
358 |                 name = app['name']
359 |                 if app['version']: name += "/" + app['version']
360 |                 if info not in info['info']:
361 |                     info['info'].append(name)
362 |             
363 |             # 识别厂商
364 |             vendor = self.parseHttpVendor(app['website'])
365 |             if vendor and len(vendor) > 2 and vendor not in info['vendor']:
366 |                 info['vendor'].append(vendor)
367 | 
368 |             # 识别资产类型
369 |             asset_types = self.parseHttpAssetType(app['categories'])
370 |             if asset_types:
371 |                 for _ in asset_types:
372 |                     if _ not in info['asset_type']:
373 |                         info['asset_type'].append(_)
374 | 
375 |             # 识别设备类型
376 |             devices = self.parseHttpDeviceType(app['name'])
377 |             if devices:
378 |                 for _ in devices:
379 |                     if _ not in info['device']:
380 |                         info['device'].append(_)
381 | 
382 |             # 删除 Wappalyzer 插件传递过来的中间属性
383 |             del(apps[i]['layer'], apps[i]['website'])
384 | 
385 |         info['apps'] = apps
386 | 
387 |         return info
388 | 
389 |     def execute(self, msg):
390 |         """
391 |         插件入口函数，根据插件的功能对 msg 进行处理
392 |         :param msg: 需要处理的消息
393 |         :return: 返回需要更新的消息字典（不含原始消息）
394 |         """
395 |         if 'pro' not in msg or msg['pro'].upper() not in ['TCP', 'HTTP']:
396 |             self.log('Not TCP/HTTP message.', LogLevel.DEBUG)
397 |             return
398 | 
399 |         info = { 'asset_type': [], 'vendor': [], 'device': [], 'service': [], 'info': [] }
400 |         if 'apps' not in msg:
401 |             self.log('Fingerprint property "apps" not found.', LogLevel.ERROR)
402 |             return
403 | 
404 |         if not msg['apps']:
405 |             return
406 |         
407 |         # 识别资产分类
408 |         pro = msg['pro'].upper()
409 |         if pro == 'HTTP':
410 |             info = self.analyzeHttp(msg['apps'])
411 |         elif pro == 'TCP':
412 |             info = self.analyzeTcp(msg['apps'])
413 |         
414 |         return info
415 | 
416 | if __name__ == '__main__':
417 |     plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True)
418 |     print(plugins)
419 |     # msg = {
420 |     #     "tag": "eno2",
421 |     #     "method": "GET",
422 |     #     "type": "text/plain; charset=utf-8",
423 |     #     "header": "Server: nginx\r\nDate: Mon, 23 Nov 2020 06:08:26 GMT\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: 26\r\nConnection: keep-alive\r\nCache-Control: no-cache\r\nWWW-Authenticate: Basic realm=\"GitLab\"\r\nX-Content-Type-Options: nosniff\r\nX-Frame-Options: DENY\r\nX-Request-Id: 7a5ad814-b10f-4a2e-8035-4f0436e2dd1a\r\nX-Runtime: 0.015255\r\nX-Ua-Compatible: IE=edge\r\nX-Xss-Protection: 1; mode=block",
424 |     #     "url": "http://www.gitlab.com/omni/chinaz-sdk.git/info/refs?service=git-upload-pack",
425 |     #     "@version": "1",
426 |     #     "ip_str": "192.168.199.23",
427 |     #     "tags": [
428 |     #         "_geoip_lookup_failure"
429 |     #     ],
430 |     #     "url_tpl": "http://www.gitlab.com/omni/chinaz-sdk.git/info/refs?service=%7B%7D",
431 |     #     "server": "nginx",
432 |     #     "inner": True,
433 |     #     "ip": "192.168.199.23",
434 |     #     "host": "192.168.199.23:80",
435 |     #     "pro": "HTTP",
436 |     #     "code": "401",
437 |     #     "body": "HTTP Basic: Access denied\n",
438 |     #     "port": 80,
439 |     #     "site": "http://www.gitlab.com",
440 |     #     "apps": [
441 |     #         {
442 |     #             "confidence": 100,
443 |     #             "name": "Nginx",
444 |     #             "categories": [
445 |     #             {
446 |     #                 "name": "Web Servers",
447 |     #                 "id": 22
448 |     #             }
449 |     #             ],
450 |     #             "version": ""
451 |     #         }
452 |     #     ]
453 |     # }
454 |     msg = {
455 |         "tag": "eno2",
456 |         "@version": "1",
457 |         "ip_str": "47.92.139.186",
458 |         "inner": False,
459 |         "ip": "47.92.139.186",
460 |         "data": "590000000a352e352e352d31302e312e32342d4d6172696144420042bd00007b7b7661603e536700fff72102003fa015000000000000000000002e4b6f6e5c615258452d4f29006d7973716c5f6e61746976655f70617373776f726400",
461 |         "host": "47.92.139.186:3306",
462 |         "geoip": {
463 |             "location": {
464 |                 "lon": 120.1619,
465 |                 "lat": 30.294
466 |             },
467 |             "city_name": "杭州",
468 |             "country_name": "中国"
469 |         },
470 |         "pro": "TCP",
471 |         "port": 3306,
472 |         "state": 1,
473 |         "apps": [
474 |         {
475 |             "os": "",
476 |             "confidence": 100,
477 |             "name": "MySQL",
478 |             "version": "5.5.5-10.1.24-MariaDB"
479 |         }
480 |         ]
481 |     }
482 |     msg_update = {}
483 |     for i in sorted(plugins.keys()):
484 |         (pluginName, plugin) = plugins[i]
485 |         print('[!] Plugin {} processing ...'.format(pluginName))
486 |         ctime = time.time()
487 |         ret = plugin.execute(msg)
488 |         if ret:
489 |             msg.update(ret)
490 |         etime = time.time()
491 |         print('Eclipse time: {}'.format(etime-ctime))
492 |         print(json.dumps(ret, indent=2))
493 |         print('[!] Plugin {} process completd.'.format(pluginName))
494 |     


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 2020-01-19
  7 | '''
  8 | 
  9 | import base64
 10 | import sys
 11 | import os
 12 | import json
 13 | import traceback
 14 | import re
 15 | import time
 16 | import html
 17 | import optparse
 18 | import threading
 19 | import logging
 20 | 
 21 | from datetime import datetime, timedelta
 22 | from elasticsearch import Elasticsearch
 23 | from elasticsearch.helpers import bulk
 24 | from elasticsearch.helpers import BulkIndexError
 25 | from elasticsearch.exceptions import ConnectionError, ConflictError, ConnectionTimeout, NotFoundError, TransportError
 26 | from cacheout import Cache
 27 | from plugins import Plugin, LogLevel
 28 | 
 29 | debug = False
 30 | logger= None
 31 | es = None
 32 | # Search params
 33 | scrollId = None
 34 | # Cache
 35 | cacheIds = None
 36 | cache = None
 37 | # Thread
 38 | threadLock = None
 39 | threadExit = False
 40 | # Result
 41 | processCount = 0
 42 | startTime = time.time()
 43 | 
 44 | class MsgState:
 45 |     """消息状态"""
 46 |     # 处理中
 47 |     PROGRESSING = 0
 48 |     # 已完成
 49 |     COMPLETED = 1
 50 | 
 51 | def get_datetime(time_str):
 52 |     """
 53 |     时间字符串转时间对象
 54 |     :param time_str: 时间字符串
 55 |     """
 56 |     try:
 57 |         return datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S.%fZ')
 58 |     except:
 59 |         return None
 60 | 
 61 | def output(msg, level=LogLevel.INFO):
 62 |     """
 63 |     输出信息
 64 |     :param msg: 消息内容
 65 |     :param level: 消息级别
 66 |     """
 67 |     global debug, logger
 68 | 
 69 |     if level > debug: return
 70 | 
 71 |     try:
 72 |         msg = str(bytes(str(msg), "utf-8", "ignore"), "utf-8", "ignore")
 73 |     except:
 74 |         pass
 75 |     
 76 |     if logger:
 77 |         if level == LogLevel.ERROR:
 78 |             logger.error(msg)
 79 |         elif level == LogLevel.WARN:
 80 |             logger.warn(msg)
 81 |         elif level == LogLevel.INFO:
 82 |             logger.info(msg)
 83 |         else:
 84 |             logger.debug(msg)
 85 |     else:
 86 |         timeStr = datetime.now().strftime('%H:%M:%S.%f')
 87 |         if level == LogLevel.ERROR:
 88 |             print('[E][{}] {}'.format(timeStr, msg))
 89 |         elif level == LogLevel.WARN:
 90 |             print('[W][{}] {}'.format(timeStr, msg))
 91 |         elif level == LogLevel.INFO:
 92 |             print('[I][{}] {}'.format(timeStr, msg))
 93 |         else:
 94 |             print('[D][{}] {}'.format(timeStr, msg))
 95 | 
 96 | def index_template(es):
 97 |     """
 98 |     上传索引模板
 99 |     :param es: ES 对象
100 |     """
101 |     body = {
102 |         "index_patterns": ".passets-filter",
103 |         "settings": { "refresh_interval": "1s", "number_of_shards": 1, "auto_expand_replicas": "0-1" },
104 |         "mappings": {
105 |             "properties": {
106 |                 "scroll_id": { "type": "text"}
107 |             }
108 |         }
109 |     }
110 | 
111 |     try:
112 |         ret = es.indices.put_template(name="passets-config", body=body, create=False)
113 |         output(ret, LogLevel.DEBUG)
114 |     except ConnectionError:
115 |         output("ES connect error.", LogLevel.ERROR)
116 |         quit(1)
117 |     except:
118 |         output(traceback.format_exc(), LogLevel.ERROR)
119 | 
120 | def set_scroll(es, scroll_id):
121 |     """
122 |     将Scroll和最后一次查询时间记录到ES上，方便不同实例间共享
123 |     :param scroll_id: Scroll ID
124 |     """
125 |     body = {
126 |         'scroll_id': scroll_id
127 |     }
128 |     try:
129 |         es.index(index='.passets-filter', id='SearchPosition', body=body, refresh=True)
130 |     except:
131 |         traceback.print_exc()
132 | 
133 | def get_scroll(es):
134 |     """
135 |     从ES上获取数据搜索的相关参数
136 |     """
137 |     try:
138 |         ret = es.get(index='.passets-filter', id="SearchPosition", _source=True)
139 |         if 'found' in ret and ret['found']:
140 |             if 'scroll_id' in ret['_source']:
141 |                 return ret['_source']['scroll_id']
142 |     except:
143 |         traceback.print_exc()
144 |     return None
145 | 
146 | def search_by_time(es, index, time_range=15, size=10, mode=0):
147 |     """
148 |     从 ES 上搜索符合条件的数据
149 |     :param es: ES 连接对象
150 |     :param index: ES 索引名
151 |     :param time_range: 默认时间节点（当前时间往前分钟数）
152 |     :param size: 搜索分页大小
153 |     :param mode: 实例工作模式
154 |     :return: 搜索结果列表
155 |     """
156 |     global scrollId, threadLock, processCount
157 | 
158 |     # 有 Scroll 的先走 Scroll
159 |     scroll_reloaded = False
160 |     if scrollId:
161 |         try:
162 |             ret = es.scroll(scroll='3m', scroll_id=scrollId, body={ "scroll_id": scrollId })
163 |             # 处理几种常见错误
164 |             if ret['_shards']['failed'] > 0:
165 |                 error_info = json.dumps(ret['_shards']['failures'])
166 |                 if 'search_context_missing_exception' in error_info:    # Scroll 失效
167 |                     if mode:
168 |                         es.clear_scroll(scroll_id=scrollId)
169 |                         raise NotFoundError('Search scroll context missing.')
170 |                 elif 'search.max_open_scroll_context' in error_info:    # Scroll 太多，清除后重新生成
171 |                     if mode:
172 |                         es.clear_scroll(scroll_id='_all')
173 |                         raise NotFoundError('Search scroll context peaked, cleaning ...')
174 |                 elif 'null_pointer_exception' in error_info:
175 |                     # https://github.com/elastic/elasticsearch/issues/35860
176 |                     raise NotFoundError('Trigger a elasticsearch scroll null pointer exception.')
177 |                 else:
178 |                     output(error_info, LogLevel.INFO)
179 |                     return []
180 |             else:
181 |                 if len(ret['hits']['hits']) > 0:
182 |                     return ret['hits']['hits']
183 |                 else:
184 |                     # 没有数据的情况下等待2秒
185 |                     time.sleep(2)
186 |             
187 |             if mode:
188 |                 es.clear_scroll(scroll_id=scrollId)
189 |                 scroll_reloaded = True
190 |             
191 |             raise Exception('Scroll result is empty.')
192 | 
193 |         except NotFoundError:
194 |             scroll_reloaded = True
195 |         except Exception as e:
196 |             output(e, LogLevel.WARN)
197 |             #output(traceback.format_exc(), LogLevel.DEBUG)
198 |     else:
199 |         if mode: scroll_reloaded = True
200 | 
201 |     # 从节点不主动创建 Scroll，只从 ES 上获取
202 |     if not mode:
203 |         time.sleep(2)
204 |         output('Fetch new scroll...', LogLevel.INFO)
205 |         scrollId = get_scroll(es)
206 |         return []
207 | 
208 |     # 意外导致的无结果直接返回
209 |     if not scroll_reloaded: return []
210 | 
211 |     # 默认查询最近x分钟的数据
212 |     lastTime = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime(time.time() - time_range * 60))
213 |     
214 |     query = {
215 |         "size": size,
216 |         "query": {
217 |             "bool": {
218 |                 # "must": [
219 |                 #     {"range": {"@timestamp": {"gte": lastTime}}} # 查询某个时间点之后的数据，默认为当前时间前15分钟
220 |                 # ],
221 |                 "must_not": [
222 |                     {"exists": {"field": "state"}} # 只处理没有处理状态字段的数据
223 |                 ]
224 |             }
225 |         },
226 |         "sort": {
227 |             "@timestamp": { "order": "desc" }
228 |         }
229 |     }
230 |     
231 |     try:
232 |         output('Start new search context...', LogLevel.INFO)
233 |         output(query, LogLevel.DEBUG)
234 |         ret = es.search(index=index, body=query, scroll='3m')
235 |         if '_scroll_id' in ret:
236 |             output('Use new scroll id', LogLevel.INFO)
237 |             scrollId = ret['_scroll_id']
238 |         
239 |             # 保存 scroll_id 供其它实例使用
240 |             set_scroll(es, scrollId)
241 | 
242 |         output('Search {} documents.'.format(len(ret['hits']['hits'])), LogLevel.INFO)
243 |         return ret['hits']['hits']
244 |     except ConnectionError:
245 |         output("ES connect error.", LogLevel.ERROR)
246 |         time.sleep(2)
247 |     except Exception as e:
248 |         output(e, LogLevel.ERROR)
249 |         traceback.print_exc()
250 | 
251 |     return []
252 | 
253 | def batch_update(es, docs, max_retry=3):
254 |     """
255 |     批量文档操作
256 |     :param es: ES 对象
257 |     :param docs: 批量操作的数据对象
258 |     :param max_retry: 重试次数
259 |     """
260 |     ret = []
261 |     try:
262 |         output(docs, LogLevel.DEBUG)
263 |         resp = bulk(es, docs)
264 |         output(resp, LogLevel.DEBUG)
265 |     except BulkIndexError as e:
266 |         for _ in e.errors:
267 |             if 'update' in _ and '_id' in _['update']:
268 |                 ret.append(_['update']['_id'])
269 | 
270 |         output(e.args[0], LogLevel.DEBUG)
271 |     except ConnectionTimeout as ce:
272 |         # 重试三次
273 |         if max_retry > 0:
274 |             time.sleep(0.1)
275 |             return batch_update(es, docs, max_retry - 1)
276 |         else:
277 |             output(ce, LogLevel.ERROR)
278 |     except:
279 |         output(traceback.print_exc(), LogLevel.ERROR)
280 | 
281 |     return ret
282 | 
283 | def filter_thread(threadId, options):
284 |     """
285 |     数据清洗线程
286 |     :param threadId: 线程序号
287 |     :param options: 程序参数
288 |     """
289 |     global es, cacheIds, cache, threadExit, threadLock, processCount
290 | 
291 |     # 加载插件列表
292 |     plugins = Plugin.loadPlugins(options.rootdir, options.debug)
293 |     output('Thread {}: Plugins loaded.'.format(threadId), LogLevel.INFO)
294 | 
295 |     if len(plugins) == 0: return
296 | 
297 |     #es = Elasticsearch(hosts=options.hosts)
298 |     while True:
299 |         if threadExit: break
300 | 
301 |         try:
302 |             threadLock.acquire()
303 |             data = search_by_time(es, options.index + '*', time_range=options.range, size=options.batch_size, mode=options.mode)
304 |             threadLock.release()
305 | 
306 |             if not data:
307 |                 output('Thread {}: No new msg, waiting 2s ...'.format(threadId), LogLevel.DEBUG)
308 |                 time.sleep(2)
309 |                 if threadExit: break
310 |                 continue
311 | 
312 |             # 更新ES文档中的内容为正在处理状态
313 |             actions = []
314 |             for i in range(len(data)-1, -1, -1):
315 |                 # 处理过的ID缓存下来，避免在多个线程间重复处理数据
316 |                 existed = cacheIds.get(data[i]['_id'])
317 |                 if existed:
318 |                     del(data[i])
319 |                     continue
320 | 
321 |                 cacheIds.set(data[i]['_id'], True)
322 | 
323 |                 if 'ip' not in data[i]['_source'] or 'port' not in data[i]['_source'] or 'pro' not in data[i]['_source']:
324 |                     del(data[i])
325 |                     continue
326 | 
327 |                 actions.append({
328 |                     '_op_type': 'update', 
329 |                     '_index': data[i]['_index'],
330 |                     '_type': data[i]['_type'],
331 |                     '_id': data[i]['_id'],
332 |                     'doc': { 'state': MsgState.PROGRESSING }
333 |                 })
334 |             
335 |             if len(actions) == 0:
336 |                 time.sleep(1)
337 |                 if threadExit: break
338 |                 continue
339 | 
340 |             conflict_list = batch_update(es, actions)
341 |             threadLock.acquire()
342 |             processCount += len(data)
343 |             threadLock.release()
344 |             
345 |             actions = []
346 |             while True:
347 |                 if not data: break
348 |                 item = data.pop()
349 |                 # 冲突或已处理的直接跳过
350 |                 if item['_id'] in conflict_list: continue
351 |                 
352 |                 msg = item['_source']
353 |                 # 通过 Cache 降低插件的处理频率
354 |                 cache_key = '{}:{}'.format(msg['ip'], msg['port'])
355 |                 if msg['pro'] == 'HTTP':
356 |                     cache_key = msg['url']
357 | 
358 |                 cacheMsg = cache.get(cache_key)
359 |                 if cacheMsg:
360 |                     output('Thread {}: Use cached result, key={}'.format(threadId, cache_key), LogLevel.DEBUG)
361 |                     actions.append({
362 |                         '_type': item['_type'],
363 |                         '_op_type': 'update', 
364 |                         '_index': item['_index'],
365 |                         '_id': item['_id'],
366 |                         'doc': cacheMsg
367 |                     })
368 |                     continue
369 | 
370 |                 msg_update = {}
371 |                 # 按插件顺序对数据进行处理（插件顺序在配置文件中定义）
372 |                 stime = time.time()
373 |                 for i in sorted(plugins.keys()):
374 |                     (pluginName, plugin) = plugins[i]
375 |                     output('Thread {}: Plugin {} processing ...'.format(threadId, pluginName), LogLevel.DEBUG)
376 | 
377 |                     try:
378 |                         ret = plugin.execute(msg)
379 |                         if ret:
380 |                             msg_update = dict(msg_update, **ret)
381 |                             msg = dict(msg, **ret)
382 |                     except:
383 |                         output(traceback.format_exc(), LogLevel.ERROR)
384 |                     
385 |                     output('Thread {}: Plugin {} completed.'.format(threadId, pluginName), LogLevel.DEBUG)
386 |                 
387 |                 output("Elapsed time: {}".format(time.time() - stime), LogLevel.DEBUG)
388 |                 # 更新数据
389 |                 msg_update['state'] = MsgState.COMPLETED
390 |                 cache.set(cache_key, msg_update)
391 | 
392 |                 actions.append({
393 |                     '_type': item['_type'],
394 |                     '_op_type': 'update', 
395 |                     '_index': item['_index'],
396 |                     '_id': item['_id'],
397 |                     'doc': msg_update
398 |                 })
399 | 
400 |             # 提交到 ES
401 |             if len(actions) > 0:
402 |                 output('Thread {}: Batch update {} document.'.format(threadId, len(actions)), LogLevel.INFO)
403 |                 output('Thread {}: {}'.format(threadId, json.dumps(actions)), LogLevel.DEBUG)
404 |                 batch_update(es, actions)
405 |                 actions = []
406 | 
407 |         except:
408 |             output(traceback.format_exc(), LogLevel.ERROR)
409 | 
410 | 
411 | def main(options):
412 |     """
413 |     主函数
414 |     :param options: 命令行传入参数对象
415 |     """
416 |     global es, cacheIds, cache, threadLock, debug, processCount, threadExit, startTime, scrollId
417 |     
418 |     debug = options.debug
419 |     cacheIds = Cache(maxsize=512, ttl=60, timer=time.time, default=None)
420 |     cache = Cache(maxsize=options.cache_size, ttl=options.cache_ttl, timer=time.time, default=None)
421 | 
422 |     threadLock = threading.RLock()
423 |     threadList = [None for i in range(options.threads)]
424 | 
425 |     es = Elasticsearch(hosts=options.hosts)
426 |     # 更新索引模板
427 |     index_template(es)
428 |     # 获取搜索位置信息
429 |     scrollId = get_scroll(es)
430 | 
431 |     try:
432 |         for i in range(options.threads):
433 |             threadList[i] = threading.Thread(target=filter_thread, args=(i, options))
434 |             threadList[i].setDaemon(True)
435 |             threadList[i].start()
436 |             time.sleep(1)
437 | 
438 |         while True:
439 |             time.sleep(5)
440 |     except KeyboardInterrupt:
441 |         print('Ctrl+C, exiting ...')
442 |         threadLock.acquire()
443 |         threadExit = True
444 |         threadLock.release()
445 | 
446 |     for i in range(options.threads):
447 |         if threadList[i] and threadList[i].isAlive():
448 |             print('Thread {} waiting to exit...'.format(i))
449 |             threadList[i].join()
450 |     
451 |     quit(0)
452 | 
453 | def quit(status):
454 |     """
455 |     退出程序
456 |     :param status: 退出状态
457 |     """
458 |     global startTime, processCount
459 | 
460 |     eclipseTime = time.time() - startTime
461 |     print('Total: {} second, {} document.'.format(eclipseTime, processCount))
462 |     print('Exited.')
463 |     exit(status)
464 | 
465 | def usage():
466 |     """
467 |     获取命令行参数
468 |     """
469 |     parser = optparse.OptionParser(usage="python3 %prog [OPTIONS] ARG", version='%prog 1.0.1')
470 |     parser.add_option('-H', '--hosts', action='store', dest='hosts', type='string', help='Elasticsearch server address:port list, like localhost:9200,...')
471 |     parser.add_option('-i', '--index', action='store', dest='index', type='string', default='logstash-passets', help='Elasticsearch index name')
472 |     parser.add_option('-r', '--range', action='store', dest='range', type='int', default=15, help='Elasticsearch search time range, unit is minute, default is 15 minutes.')
473 |     parser.add_option('-t', '--threads', action='store', dest='threads', type='int', default=5, help='Number of concurrent threads, default is 5')
474 |     parser.add_option('-b', '--batch-size', action='store', dest='batch_size', type='int', default=20, help='The data item number of each batch per thread, default is 20.')
475 |     parser.add_option('-c', '--cache-size', action='store', dest='cache_size', type='int', default=1024, help='Process cache size, default is 1024.')
476 |     parser.add_option('-T', '--cache-ttl', action='store', dest='cache_ttl', type='int', default=120, help='Process cache time to live(TTL), default is 120 seconds.')
477 |     parser.add_option('-m', '--mode', action='store', dest='mode', type='int', default=1, help='Work mode: 1-master, 0-slave, default is 1.')
478 |     parser.add_option('-d', '--debug', action='store', dest='debug', type='int', default=2, help='Print debug info, 1-error, 2-warning, 3-info, 4-debug, default is 2.')
479 | 
480 |     options, args = parser.parse_args()
481 |     options.rootdir = os.path.split(os.path.abspath(sys.argv[0]))[0]
482 |     if not options.hosts:
483 |         parser.error('Please specify elasticsearch address by entering the -H/--host parameter.')
484 |     
485 |     if options.threads < 1 or options.threads > 50:
486 |         parser.error('Please specify valid thread count, the valid range is 1-50. Default is 10.')
487 | 
488 |     if options.batch_size < 5 or options.batch_size > 200:
489 |         parser.error('Please specify valid batch count, the valid range is 5-200. Default is 20.')
490 | 
491 |     if options.cache_size < 1 or options.cache_size > 4096:
492 |         parser.error('Please specify valid cache size, the valid range is 1-4096. Default is 1024.')
493 | 
494 |     if options.cache_ttl < 1 or options.cache_ttl > 24 * 60 * 60:
495 |         parser.error('Please specify valid cache ttl, the valid range is 1 minutes to 1 days. Default is 120(5 minutes).')
496 | 
497 |     if options.range <= 0 or options.range > 24 * 60:
498 |         parser.error('Please specify valid time, format is [number]，like: 15, max is 10080(7 days).')
499 | 
500 |     if options.mode not in [0, 1]:
501 |         parser.error('Please specify valid mode: 1-master, 0-slave.')
502 | 
503 |     if options.debug < 0: options.debug = 2
504 | 
505 |     options.hosts = options.hosts.split(',')
506 |     for i in range(len(options.hosts)):
507 |         if not options.hosts[i]:
508 |             del(options.hosts[i])
509 | 
510 |     if not options.hosts:
511 |         parser.error('Please specify elasticsearch address by entering the -H/--host parameter.')
512 |     
513 |     return options
514 | 
515 | if __name__ == '__main__':
516 |     options = usage()
517 |     print('Home: {}'.format(options.rootdir))
518 | 
519 |     main(options)
520 | 


--------------------------------------------------------------------------------
/src/plugins/nmap.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 2020-01-19
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import re
 12 | import html
 13 | import json
 14 | import time
 15 | import base64
 16 | import hashlib
 17 | import copy
 18 | import traceback
 19 | import threading
 20 | 
 21 | from plugin import Plugin, LogLevel
 22 | 
 23 | class FilterPlugin(Plugin):
 24 |     """
 25 |     TCP 指纹识别插件
 26 |     src: data
 27 |     dst:
 28 |     - apps: 指纹信息，格式: [{name,version,os,device,info,service},...]
 29 |     """
 30 |     os_white_list = []
 31 |     ignore_rules = []
 32 |     name_regex = None
 33 |     
 34 |     def __init__(self, rootdir, debug=False, logger=None):
 35 |         """
 36 |         构造函数
 37 |         :param rootdir: 工作目录
 38 |         :param debug: 调式信息输出开关
 39 |         :param logger: 日志处理对象
 40 |         """
 41 |         super().__init__(rootdir, debug, logger)
 42 | 
 43 |         # 初始化指纹相关路径
 44 |         rule_file = os.path.join(rootdir, 'rules', 'nmap-service-probes')
 45 |         if not os.path.exists(rule_file):
 46 |             raise Exception('Nmap rule file not found.')
 47 | 
 48 |         self.loadRules(rule_file)
 49 |         self.name_regex = re.compile(r'[^\x20-\x7e]')
 50 | 
 51 |     def set_config(self, config):
 52 |         """
 53 |         配置初始化函数
 54 |         :param config: 插件配置
 55 |         """
 56 |         super().set_config(config)
 57 | 
 58 |         self.ignore_rules = []
 59 |         self.ssl_portmap = {}
 60 |         if self._config:
 61 |             if 'ignore_rules' in self._config and isinstance(self._config['ignore_rules'], list):
 62 |                 self.ignore_rules = self._config['ignore_rules']
 63 |             
 64 |             if 'ssl_portmap' in self._config and isinstance(self._config['ssl_portmap'], list):
 65 |                 for _ in self._config['ssl_portmap']:
 66 |                     try:
 67 |                         parts = _.split(':')
 68 |                         self.ssl_portmap[int(parts[0])] = parts[-1].strip()
 69 |                     except:
 70 |                         self.log('[E] ssl_portmap config error! Data is "{}".'.format(_))
 71 |                         continue
 72 |     
 73 |     def _readfile(self, filepath):
 74 |         """
 75 |         读取文件内容
 76 |         :param filepath: 文件路径
 77 |         """
 78 |         try:
 79 |             fp = open(filepath, encoding='utf-8')
 80 |             data = fp.read()
 81 |             fp.close()
 82 |             return data
 83 |         except:
 84 |             self.log(traceback.format_exc(), LogLevel.ERROR)
 85 |         return None
 86 | 
 87 |     def _writefile(self, filepath, filecontent):
 88 |         """
 89 |         写入文件内容
 90 |         :param filepath: 要写入的文件路径
 91 |         :param filecontent: 要写入的文件内容
 92 |         """
 93 |         try:
 94 |             fp = open(filepath, encoding='utf-8', mode='w')
 95 |             fp.write(filecontent)
 96 |         except:
 97 |             self.log(traceback.format_exc(), LogLevel.ERROR)
 98 |         finally:
 99 |             fp.close()
100 | 
101 |     def loadRuleJson(self, rule_file):
102 |         """
103 |         加载 NMAP 规则库（JSON格式）
104 |         :param rule_file: Json 格式规则库文件
105 |         """
106 |         try:
107 |             data = self._readfile(rule_file)
108 |             if data:
109 |                 return json.loads(data)
110 |         except:
111 |             self.log(traceback.format_exc(), LogLevel.ERROR)
112 |         
113 |         return None
114 |     
115 |     def loadRules(self, rule_file):
116 |         """
117 |         加载 NMAP 规则库
118 |         :param rule_file: NAMP 指纹规则库文件
119 |         """
120 |         self.rules = []
121 | 
122 |         converted_rule_path = rule_file[:rule_file.rfind(os.sep) + 1] + 'nmap.json'
123 |         data = None
124 |         if os.path.isfile(converted_rule_path):
125 |             data = self.loadRuleJson(converted_rule_path)
126 |         
127 |         file_data = self._readfile(rule_file)
128 |         if not file_data:
129 |             raise Exception('NMAP rule file not found.')
130 | 
131 |         file_hash = hashlib.md5(file_data.encode('utf-8')).hexdigest()
132 |         if data and data['hash'] == file_hash:
133 |             self.rules = data['apps']
134 |             # 预加载正则表达式
135 |             for i in range(len(self.rules)):
136 |                 if self.rules[i]['o']:
137 |                     os_prefix = self.rules[i]['o'].split('$')[0].rstrip().lower()
138 |                     if len(os_prefix) > 0: self.os_white_list.append(os_prefix)
139 |                 self.rules[i]['r'] = re.compile(bytes(self.rules[i]['m'], encoding="utf-8"), self.rules[i]['mf'])
140 |                 self.rules[i]['ports'] = self.parsePorts(self.rules[i]['ports'])
141 |             return
142 |         
143 |         data = file_data.split('\n')
144 | 
145 |         regex_flags = {'i':re.I, 's':re.S, 'm':re.M, 'u':re.U, 'l':re.L, 'a':re.A, 't':re.T, 'x':re.X}
146 |         is_tcp = False
147 |         ports = ''
148 |         tmp_rules = []
149 |         for _ in data:
150 |             _ = _.strip()
151 | 
152 |             if _[:6] == 'Probe ':
153 |                 if _[:10] == 'Probe TCP ':
154 |                     is_tcp = True
155 |                 else:
156 |                     is_tcp = False
157 | 
158 |             # 不处理 UDP 指纹
159 |             if not is_tcp:
160 |                 continue
161 |             
162 |             if _[:6] == 'ports ':
163 |                 ports += _[6:].strip() + ','
164 |             
165 |             if _[:9] == 'sslports ':
166 |                 ports += _[9:].strip() + ','
167 |             
168 |             if not (_[:6] == 'match ' or _[:10] == 'softmatch '):
169 |                 continue
170 | 
171 |             rule = {
172 |                 'm': None, 'mf': 0, 's': None, 'p': None, 'v': None, 'i': None, 'o': None, 
173 |                 'd': None, 'h': None, 'cpe': '', 'r': None, 'ports': ports.strip(',')
174 |             }
175 | 
176 |             line = _[_.find('match ') + 6:].strip()
177 | 
178 |             pos = line.find(' ')
179 |             if pos == -1:
180 |                 continue
181 | 
182 |             rule['s'] = line[:pos]
183 |             line = line[pos + 1:].strip()
184 |             regex_type = re.compile(r'([mpviodh]|cpe:)([/\|=%@])')
185 |             while True:
186 |                 m = regex_type.search(line)
187 |                 if not m:
188 |                     break
189 | 
190 |                 key = m.group(1).replace(':', '')
191 |                 # 属性的边界符号是根据内容变的，通常为/，但内容中如果有/则使用|，暂时未发现其它符号
192 |                 end_pos = line.find(m.group(2), len(m.group(0)))
193 |                 val = None
194 |                 if end_pos > 0:
195 |                     val = line[len(m.group(0)): end_pos]
196 |                     line = line[end_pos+1:]
197 |                 else:
198 |                     val = line[len(m.group(0)): ]
199 |                     line = ''
200 | 
201 |                 if key == 'cpe': # CPE可能出现多次
202 |                     if rule['cpe']:
203 |                         rule['cpe'] += '\n' + val
204 |                 else:
205 |                     rule[key] = val
206 |                 
207 |                 if line.find(' ') > 0:
208 |                     flags = line[: line.find(' ')]
209 |                     # 识别匹配表达式的模式
210 |                     if key == 'm':
211 |                         for flag in flags:
212 |                             if flag in regex_flags:
213 |                                 rule['mf'] |= regex_flags[flag]
214 |                             else:
215 |                                 print('[E] Find a unrecognized flag. Data: ' + flag)
216 |                     
217 |                     line = line[line.find(' ')+1:].strip()
218 |                 else:
219 |                     line = line.strip()
220 |                     
221 |                 if not line:
222 |                     break
223 | 
224 |             # 一些太短或特征不明显的规则，直接丢弃
225 |             if not rule['m'] or len(rule['m']) <= 1: continue
226 |             if rule['m'] in [
227 |                 '^\\t$', '^\\0$', '^ok$', '^OK$', '^\\x05', '^ \\r\\n$', '^\\|$', '^00$', '^01$', '^02$', '^ $', '^1$',
228 |                 '^\\xff$', '^1\\0$', '^A$', '^Q$', '^x0$', '^\\0\\0$', '^\\x01$', '^0\\0$']:
229 |                 continue
230 | 
231 |             # 人工配置为忽略的规则，直接丢弃
232 |             if rule['m'] in self.ignore_rules:
233 |                 continue
234 |                 
235 |             tmp_rules.append(rule)
236 | 
237 |             new_rule = copy.deepcopy(rule)
238 |             # 预加载正则表达式
239 |             try:
240 |                 new_rule['r'] = re.compile(bytes(new_rule['m'], encoding="utf-8"), new_rule['mf'])
241 |                 new_rule['ports'] = self.parsePorts(new_rule['ports'])                
242 |             except:
243 |                 self.log('Match rule parse error:', LogLevel.ERROR)
244 |                 self.log(new_rule['m'], LogLevel.ERROR)
245 | 
246 |             self.rules.append(new_rule)
247 |         
248 |         self._ruleCount = len(self.rules)
249 |         self._writefile(converted_rule_path, json.dumps({'hash': file_hash, 'apps': tmp_rules}, indent=2, sort_keys=True))
250 | 
251 |     def parsePorts(self, ports):
252 |         """
253 |         解析指纹匹配的端口列表，方便后面匹配
254 |         :param ports: 端口列表
255 |         """
256 |         results = {}
257 |         for _ in ports.split(','):
258 |             try:
259 |                 parts = _.split('-')
260 |                 portStart = int(parts[0])
261 |                 portEnd = int(parts[-1])
262 |                 for i in range(portStart, portEnd + 1):
263 |                     results[i] = None
264 |             except:
265 |                 continue
266 | 
267 |         return list(results.keys())
268 | 
269 |     def analyze(self, data, port):
270 |         """
271 |         分析获取指纹
272 |         :param data: TCP响应数据包
273 |         :return: 指纹列表，例如：[{'name':'XXX','version':'XXX',...}]
274 |         """
275 |         result = []
276 |         for rule in self.rules:
277 |             try:
278 |                 m = rule['r'].search(data)
279 |                 if m:
280 |                     app = {
281 |                         'name': rule['s'] if not rule['p'] else rule['p'],
282 |                         'version': '' if not rule['v'] else rule['v'],
283 |                         'info': '' if not rule['i'] else rule['i'],
284 |                         'os': '' if not rule['o'] else rule['o'],
285 |                         'device': '' if not rule['d'] else rule['d'],
286 |                         'service': rule['s'],
287 |                         # 端口不匹配的可信度下降为50
288 |                         'confidence': 100 if len(rule['ports']) == 0 or port in rule['ports'] else 50
289 |                     }
290 | 
291 |                     if m.lastindex:
292 |                         for i in range(m.lastindex + 1):
293 |                             skey = '${}'.format(i)
294 |                             for k in app:
295 |                                 if not app[k] or k in ['confidence']: continue
296 | 
297 |                                 if skey in app[k]:
298 |                                     app[k] = app[k].replace(skey, str(m.group(i), 'utf-8', 'ignore'))
299 |                     
300 |                     available = False
301 |                     if app['os']:
302 |                         # 太长或者是存在不可见字符的，说明获取的数据不对
303 |                         if len(app['os']) > 30 or self.name_regex.search(app['os']): continue
304 |                         tmpOS = app['os'].lower()
305 |                         for _ in self.os_white_list:
306 |                             if tmpOS.find(_) == 0 or _.find(tmpOS) == 0:
307 |                                 available = True
308 |                                 break
309 |                     else:
310 |                         available = True
311 |                     
312 |                     if available:
313 |                         # SSL 协议映射处理
314 |                         if app['service'] == 'ssl' and port in self.ssl_portmap:
315 |                             app['service'] = self.ssl_portmap[port]
316 |                         result.append(app)
317 |                         break
318 |             except Exception as e:
319 |                 self.log(e, LogLevel.ERROR)
320 |                 self.log(traceback.format_exc(), LogLevel.ERROR)
321 |                 self.log('[!] Hited Rule: ' + str(rule), LogLevel.ERROR)
322 |         
323 |         return result
324 | 
325 |     def execute(self, msg):
326 |         """
327 |         插件入口函数，根据插件的功能对 msg 进行处理
328 |         :param msg: 需要处理的消息
329 |         :return: 返回需要更新的消息字典（不含原始消息）
330 |         """
331 |         if 'pro' not in msg or msg['pro'] != 'TCP':
332 |             self.log('Not tcp message.', LogLevel.DEBUG)
333 |             return
334 | 
335 |         info = {}
336 |         if 'data' not in msg or not msg['data']:
337 |             self.log('data field not found.')
338 |             return
339 |         
340 |         # 识别指纹
341 |         apps = self.analyze(bytes.fromhex(msg['data']), msg['port'])
342 |         
343 |         # 识别端口匹配度，匹配的可信度为空，不匹配的可信度为50
344 |         for i in range(len(apps)):
345 |             confidence = 50
346 | 
347 |             ports = apps[i].pop('ports', [])
348 |             if len(ports) == 0:
349 |                 confidence = 100
350 |             elif msg['port'] in ports:
351 |                 confidence = 100
352 |             
353 |             apps[i]['confidence'] = confidence
354 | 
355 |         info['apps'] = apps
356 | 
357 |         return info
358 | 
359 | if __name__ == '__main__':
360 |     plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True)
361 |     print(plugins)
362 |     msg = {
363 |         "pro": "TCP",
364 |         "inner": False,
365 |         "data": "030000130ed000001234000201080002000000",
366 |         "ip_str": "116.213.169.244",
367 |         "ip": "116.213.169.244",
368 |         "@timestamp": "2020-11-23T09:24:55.339Z",
369 |         "@version": "1",
370 |         "tag": "eno2",
371 |         "geoip": {
372 |             "country_name": "中国",
373 |             "location": {
374 |                 "lat": 39.9288,
375 |                 "lon": 116.3889
376 |             },
377 |             "city_name": "北京"
378 |         },
379 |         "host": "116.213.169.244:63389",
380 |         "port": 63389
381 |     }
382 |     # msg = {
383 |     #     "ip_num": 1875787536,
384 |     #     "ip": "111.206.63.16",
385 |     #     "port": 443,
386 |     #     "pro": "TCP",
387 |     #     "host": "111.206.63.16:80",
388 |     #     #'data': '00',
389 |     #     # Example: 554 SMTP synchronization error\r\n
390 |     #     #"data": "35353420534d54502073796e6368726f6e697a6174696f6e206572726f720d0a",
391 | 
392 |     #     # Example: >INFO:OpenVPN Management Interface Version 1.0.1 -- type 'help' for more info\r\n>
393 |     #     #"data": "3e494e464f3a4f70656e56504e204d616e6167656d656e7420496e746572666163652056657273696f6e20312e302e31202d2d2074797065202768656c702720666f72206d6f726520696e666f0d0a3e",
394 | 
395 |     #     # Example: get_info: plugins\nRPRT 0\nasfdsafasfsafas
396 |     #     #"data": "6765745f696e666f3a20706c7567696e730a5250525420300a617366647361666173667361666173",
397 | 
398 |     #     #"data": '16030300d0010000cc03035df0c691b795581015d570c868b701ed1784528e488e9aeec4b37dad521e2de4202332000016299b175b8f0ad21daeb83a03eb5d47b57bb60ecfbd10bcd67a101d0026c02cc02bc030c02fc024c023c028c027c00ac009c014c013009d009c003d003c0035002f000a0100005d00000019001700001461637469766974792e77696e646f77732e636f6d000500050100000000000a00080006001d00170018000b00020100000d001400120401050102010403050302030202060106030023000000170000ff01000100',
399 |     #     #"data": "004a56978183000100000000000013616c6572746d616e616765722d6d61696e2d3115616c6572746d616e616765722d6f706572617465640a6d6f6e69746f72696e67037376630000ff0001",
400 | 
401 |     #     # Example: SMTP
402 |     #     #"data": '32323020736d74702e71712e636f6d2045736d7470205151204d61696c205365727665720d0a',
403 |         
404 |     #     # Example: RDP
405 |     #     "data": "030000130ed000001234000209080002000000",
406 | 
407 |     #     # Example:HTTPS
408 |     #     #"data": "1603030ce50200005b03035f6d463e6b8d09d43230d15d3e64ab61fb9e54317099b2c53c9dafd30e509297206abe5bc2265b6d09710c81877859d85a1218e5a27e5805fa0d9d47b2dbfe9f69009c000013000000000010000b000908687474702f312e310b000c7e000c7b0008313082082d30820715a0030201020210644a68f011861931192823728fbe1545300d06092a864886f70d01010b05003062311c301a060355040313134170706c65204953542043412032202d2047313120301e060355040b131743657274696669636174696f6e20417574686f7269747931133011060355040a130a4170706c6520496e632e310b3009060355040613025553301e170d3139303331353233313732395a170d3231303431333233313732395a30773117301506035504030c0e2a2e6c732e6170706c652e636f6d31253023060355040b0c1c6d616e6167656d656e743a69646d732e67726f75702e35373634383631133011060355040a0c0a4170706c6520496e632e3113301106035504080c0a43616c69666f726e6961310b300906035504061302555330820122300d06092a864886f70d01010105000382010f003082010a0282010100cf9390dba34c1b7fb02fb550891bd89849747501fecbb8c6df45ead2ccf00341e11d43a5b6d78054493bb92095efbd2f19df07e18ae81f8cda4c7b996722ff99eb68a3e7ce9d967ccae05128040498b93493a717ce2e367a647750ec5523194005a6f6d1c98c8e28181021b3d5d1971741158e13d8d658272de9ddf2c211e8e2fbfce6e7a116270301d492bff6dcc26157ff562dd596a1a3b4a385d63cfaa1988dcea8365ff006e9bbf2bb9fbc9de954ca41ec6ac4706a1c8ea3962b97930a7cad1e63da24ce2e871999ed2f7ab354b603dfd09dc1edf11226d79caa6a509b0fce9004ea346f5351cb0967b7a5c079bf4299ea3b954709359303a90aa028f51f0203010001a38204c8308204c4300c0603551d130101ff04023000301f0603551d23041830168014d87a94447c907090169edd179c01440386d62a29307e06082b0601050507010104723070303406082b060105050730028628687474703a2f2f63657274732e6170706c652e636f6d2f6170706c6569737463613267312e646572303806082b06010505073001862c687474703a2f2f6f6373702e6170706c652e636f6d2f6f63737030332d6170706c656973746361326731323030190603551d1104123010820e2a2e6c732e6170706c652e636f6d3081ff0603551d200481f73081f43081f1060a2a864886f76364050b043081e23081a406082b060105050702023081970c819452656c69616e6365206f6e207468697320636572746966696361746520627920616e7920706172747920617373756d657320616363657074616e6365206f6620616e79206170706c696361626c65207465726d7320616e6420636f6e646974696f6e73206f662075736520616e642f6f722063657274696669636174696f6e2070726163746963652073746174656d656e74732e303906082b06010505070201162d687474703a2f2f7777772e6170706c652e636f6d2f6365727469666963617465617574686f726974792f727061301d0603551d250416301406082b0601050507030206082b0601050507030130370603551d1f0430302e302ca02aa0288626687474703a2f2f63726c2e6170706c652e636f6d2f6170706c6569737463613267312e63726c301d0603551d0e041604143fc6bb3b828a044930a9813a6824cc0d7388e597300e0603551d0f0101ff0404030205a03082026d060a2b06010401d6790204020482025d048202590257007600bbd9dfbc1f8a71b593942397aa927b473857950aab52e81a909664368e1ed1850000016983ae8f950000040300473045022100baa8d2a6d8f3b68959c063775735c8cffd1450afe792c79efb6225258f41de10022076f6fbf8f9bea11ace1c596f5c39f35804e036329e4fb831298f8901927f668a007500a4b90990b4",
409 |     #     #'data': '	1603030046020000420303380629e477d8f0a0b8682d25118e807ccaacc11122040dcbb88433b1af19363c00c02f00001aff01000100000b000403000102002300000010000500030268321603030b2e0b000b2a000b270006cf308206cb308205b3a003020102020c1806f2a7bc6475852c9c7ff7300d06092a864886f70d01010b05003050310b300906035504061302424531193017060355040a1310476c6f62616c5369676e206e762d7361312630240603550403131d476c6f62616c5369676e20525341204f562053534c2043412032303138301e170d3139313031313037343133395a170d3231313132383233353935395a30818a310b300906035504061302434e310f300d06035504080c06e58c97e4baac310f300d06035504070c06e58c97e4baac31123010060355040b0c09e8bf90e7bbb4e983a8312d302b060355040a0c24e58c97e4baace59fbae8b083e7bd91e7bb9ce882a1e4bbbde69c89e99990e585ace58fb83116301406035504030c0d2a2e74696e6779756e2e636f6d30820122300d06092a864886f70d01010105000382010f003082010a0282010100d1789b0d322616ca1bfc6467b2ecbec70dcf8b14a79d217561cb0a28cd544304383dd4a99c62edc0ff9ef5b732b62c823f0032a3c9b82a6c16a9e12a92af3748c4842605af6fc6cdacff6ac92a67a7ee1af4f678fba50f25ea24c82fc96b89f0d7353b062210a883b73cf383293f2d051fa959559738b8f1c2bedf43af872247855ba2d29b8b40898303299aca9b11fd1a954864b948449f8f30fd2eb32add07e9fbfc98ed16eca9f149966cab6dde7eca5758eb1fd4ab20ccf700283c48cff3235ffbbbaa234bbd8eab6f9ececa10f3cbbf83af0e811301010d70fe3773d7a94f3fb9f0d0a406c6bd352913284117e4f79cd5a8ff7001d9b0379a2c5234a7630203010001a382036830820364300e0603551d0f0101ff0404030205a030818e06082b06010505070101048181307f304406082b060105050730028638687474703a2f2f7365637572652e676c6f62616c7369676e2e636f6d2f6361636572742f67737273616f7673736c6361323031382e637274303706082b06010505073001862b687474703a2f2f6f6373702e676c6f62616c7369676e2e636f6d2f67737273616f7673736c63613230313830560603551d20044f304d304106092b06010401a03201143034303206082b06010505070201162668747470733a2f2f7777772e676c6f62616c7369676e2e636f6d2f7265706f7369746f72792f3008060667810c01020230090603551d1304023000303f0603551d1f043830363034a032a030862e687474703a2f2f63726c2e676c6f62616c7369676e2e636f6d2f67737273616f7673736c6361323031382e63726c30390603551d1104323030820d2a2e74696e6779756e2e636f6d82122a2e6e6574776f726b62656e63682e636f6d820b74696e6779756e2e636f6d301d0603551d250416301406082b0601050507030106082b06010505070302301f0603551d23041830168014f8ef7ff2cd7867a8de6f8f248d88f1870302b3eb301d0603551d0e04160414f2686266f52e4ed5a8649335ac838c68634264e730820181060a2b06010401d679020402048201710482016d016b007700a4b90990b418581487bb13a2cc67700a3c359804f91bdfb8e377cd0ec80ddc100000016db9c420910000040300483046022100d73c696260ec47b79ae61affe7a0f96817702dcfe603ea1a5810f08ff909a6f4022100c96b98b57f5c92a97a867b62ab4b26d78bd7ff40ae1403422927266f707cab4a0077006f5376ac31f03119d89900a45115ff77151c11d902c10029068db2089a37d9130000016db9c420e30000040300483046022100e169be6917dc0b2ef2cb4b386efea03e0c0346277308aa18bfd0be3cadd2df91022100a3e000d1e9f375441f154e03bda80740f2cafb7239d7929f39aa5579',
410 | 
411 |     #     "inner": False,
412 |     #     "tag": "sensor-ens160"
413 |     # }
414 |     msg_update = {}
415 |     for i in sorted(plugins.keys()):
416 |         (pluginName, plugin) = plugins[i]
417 |         if pluginName == 'nmap':
418 |             print('[!] Plugin {} processing ...'.format(pluginName))
419 |             ctime = time.time()
420 |             ret = plugin.execute(msg)
421 |             etime = time.time()
422 |             print('Eclipse time: {}'.format(etime-ctime))
423 |             print(ret)
424 |             break
425 | 


--------------------------------------------------------------------------------
/src/plugins/wappalyzer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | '''
  4 | Author: Bugfix<tanjelly@gmail.com
  5 | Created: 2019-12-11
  6 | Modified: 20120-01-19
  7 | '''
  8 | 
  9 | import os
 10 | import sys
 11 | import re
 12 | import html
 13 | import json
 14 | import base64
 15 | import traceback
 16 | import logging
 17 | 
 18 | from datetime import datetime
 19 | from plugin import Plugin, LogLevel
 20 | 
 21 | class Wappalyzer():
 22 |     _rules = None
 23 |     _apps = {}
 24 |     _categories = {}
 25 |     _logger = None
 26 |     _statusRegex = re.compile(r'^HTTP/\d\.\d (\d{3}) ')
 27 |     _titleRegex = re.compile(r'<title\s*>(.*?)</title>', re.I)
 28 |     _scriptRegex = re.compile(r'<script[^>]+?src=[\'"]([^\'"]+)[\'"]', re.I)
 29 |     _linkRegex = re.compile(r'<(?:a|iframe|link)[^>]+?(?:href|src)=[\'"]*?([^\'"]+)[\'"]*?', re.I)
 30 |     _metaRegex1 = re.compile(r'<meta\s+(?:name|http-equiv)=[\'"]([^\'"]+)[\'"]\s+content=[\'"]([^>]*?)[\'"]', re.I)
 31 |     _metaRegex2 = re.compile(r'<meta\s+content=[\'"]([^>]*?)[\'"]\s+(?:name|http-equiv)=[\'"]([^\'"]+)[\'"]', re.I)
 32 |     
 33 |     def __init__(self, rule_file, logger=None, debug=LogLevel.ERROR):
 34 |         """
 35 |         构造函数
 36 |         :param rule_file: wappalyzer 规则库文件路径
 37 |         :param logger: 日志处理对象
 38 |         :param debug: 调试开关
 39 |         """
 40 |         self._logger = logger
 41 |         self._debug = debug
 42 | 		
 43 | 
 44 |         if not self.loadRules(rule_file):
 45 |             raise Exception('Wappalyzer rules load failed.')
 46 | 
 47 |     def log(self, msg, level=LogLevel.ERROR):
 48 |         if level > self._debug: return
 49 | 
 50 |         if self._logger:
 51 |             if level == LogLevel.ERROR:
 52 |                 self._logger.error(str(msg))
 53 |             elif level == LogLevel.WARN:
 54 |                 self._logger.warn(str(msg))
 55 |             elif level == LogLevel.INFO:
 56 |                 self._logger.info(str(msg))
 57 |             else:
 58 |                 self._logger.debug(str(msg))
 59 |         else:
 60 |             timeStr = datetime.now().strftime('%H:%M:%S.%f')
 61 |             if level == LogLevel.ERROR:
 62 |                 print('[E][{}] {}'.format(timeStr, str(msg)))
 63 |             elif level == LogLevel.WARN:
 64 |                 print('[W][{}] {}'.format(timeStr, str(msg)))
 65 |             elif level == LogLevel.INFO:
 66 |                 print('[I][{}] {}'.format(timeStr, str(msg)))
 67 |             else:
 68 |                 print('[D][{}] {}'.format(timeStr, str(msg)))
 69 | 
 70 |     def unchunk_body(self, body):
 71 |         """
 72 |         还原被 Chunked 响应正文
 73 |         :param body: 被 Chunked 的 HTTP 响应正文
 74 |         :return: 恢复的原始响应正文
 75 |         """
 76 |         data = ""
 77 |         pos = body.find('\r\n')
 78 |         while pos > 0:
 79 |             try:
 80 |                 size = int(body[:pos], 16)
 81 |                 if size > 0:
 82 |                     data = body[pos+2:pos+2+size]
 83 |                     body = body[pos+2+size+2:]
 84 |                 else:
 85 |                     body = body[pos+2+size+2:]
 86 |                     break
 87 |             except:
 88 |                 break
 89 | 
 90 |             pos = body.find('\r\n')
 91 |         
 92 |         data += body
 93 |         return data
 94 | 
 95 |     def unzip(self, body):
 96 |         pass
 97 | 
 98 |     def analyze(self, url, raw_headers, body):
 99 |         """
100 |         根据URL、HTTP响应头、正文分析应用指纹
101 |         :param url: URL
102 |         :param raw_headers: 原始HTTP头
103 |         :param body: 原始页面内容
104 |         :return: 指纹列表 [{name,version,confidence,product},...]
105 |         """
106 |         matchList = []
107 | 
108 |         #status = self.parseStatus(raw_headers)
109 |         headers = self.parseHeaders(raw_headers)
110 |         if 'transfer-encoding' in headers and 'chunked' in headers['transfer-encoding']:
111 |             body = self.unchunk_body(body)
112 |         # if 'content-encoding' in headers and 'gzip' in headers['content-encoding']:
113 |         #     body = self.unzip(body)
114 |         
115 |         cookies = self.parseCookies(headers)
116 |         scripts = self.parseScripts(body)
117 |         metas = self.parseMetas(body)
118 |         #js = self.parseJs(body)
119 | 
120 |         matchList += self.analyzeUrl(url)
121 |         matchList += self.analyzeHeaders(headers)
122 |         matchList += self.analyzeCookies(cookies)
123 |         matchList += self.analyzeScripts(scripts)
124 |         matchList += self.analyzeMetas(metas)
125 |         #matchList += self.analyzeJs(js)
126 |         matchList += self.analyzeHtml(body)
127 | 
128 |         result = {}
129 |         for _ in matchList:
130 |             _['layer'] = self._apps[_['name']]['layer']
131 |             _['website'] = self._apps[_['name']]['website']
132 |             if _['name'] not in result:
133 |                 result[_['name']] = _
134 |                 continue
135 | 
136 |             if not result[_['name']]['version'] and _['version']:
137 |                 result[_['name']]['version'] = _['version']
138 | 
139 |             if not result[_['name']]['product'] and _['product']:
140 |                 result[_['name']]['product'] = _['product']
141 |             
142 |             if result[_['name']]['confidence'] < 100:
143 |                 result[_['name']]['confidence'] = result[_['name']]['confidence'] + _['confidence']
144 |                 if result[_['name']]['confidence'] > 100:
145 |                     result[_['name']]['confidence'] = 100
146 |         
147 |         confidenceRegex = re.compile(r'^confidence:([\d\.]+)$')
148 |         # 填充关联指纹和分类
149 |         appNames = list(result.keys())
150 |         while len(appNames) > 0:
151 |             appName = appNames.pop()
152 |             # 合并产品属性到应用属性中
153 |             if 'product' in result[appName]:
154 |                 if result[appName]['product']:
155 |                     result[appName]['name'] += result[appName]['product']
156 |             
157 |                 del(result[appName]['product'])
158 |             
159 |             result[appName]['categories'] = self.analyzeCategory(self._apps[appName]['cats'])
160 |             # 如果当前层为操作系统层，则将 os 设置为指纹名
161 |             result[appName]['os'] = appName if self._apps[appName]['layer'] == 4 else ''
162 | 
163 |             if not self._apps[appName]['implies']: continue
164 |             
165 |             for parentName in self._apps[appName]['implies']:
166 |                 confidence = 0
167 |                 pos = parentName.find(r'\;')
168 |                 if pos > 0:
169 |                     rightName = parentName[pos+2:]
170 |                     parentName = parentName[:pos]
171 |                     m = confidenceRegex.match(rightName)
172 |                     if m:
173 |                         tmp_c = float(m.group(1))
174 |                         if tmp_c > 1:
175 |                             confidence = abs(int(tmp_c))
176 |                         else:
177 |                             confidence = abs(int(tmp_c * 100))
178 |                 
179 |                 if parentName in self._apps and parentName not in result:
180 |                     result[parentName] = {
181 |                         'os': parentName,
182 |                         'name': parentName,
183 |                         'confidence': 100 if confidence > 100 else confidence,
184 |                         'version': '',
185 |                         'categories': self.analyzeCategory(self._apps[parentName]['cats']),
186 |                         'layer': self._apps[parentName]['layer'],
187 |                         'website': self._apps[parentName]['website']
188 |                     }
189 |                     appNames.append(parentName)
190 | 
191 |         return list(result.values())
192 | 
193 |     def analyzeCategory(self, cat_ids):
194 |         """
195 |         根据分类ID列表提取指纹分类列表
196 |         :param cat_ids: 分类ID列表
197 |         """
198 |         categories = []
199 |         for cat_id in cat_ids:
200 |             cat_id = str(cat_id)
201 |             if cat_id in self._categories:
202 |                 categories.append({
203 |                     'id': int(cat_id),
204 |                     'name': self._categories[cat_id]['name']
205 |                 })
206 |         
207 |         return categories
208 | 
209 |     def analyzeHtml(self, body):
210 |         """
211 |         分析页面中的指纹信息
212 |         :param body: 页面源码
213 |         :return: 指纹列表
214 |         """
215 |         if not body: return []
216 | 
217 |         result = []
218 |         for _ in self._rules['html']:
219 |             match = _['regex'].search(body)
220 |             if match:
221 |                 result.append(self.makeDetected(match, _))
222 |         
223 |         return result
224 | 
225 |     def analyzeJs(self, js):
226 |         """
227 |         分析页面加载的JS变量中的指纹信息
228 |         :param js: js 变量字典
229 |         :return: 指纹列表
230 |         """
231 |         if not js: return []
232 | 
233 |         result = []
234 |         for _ in self._rules['js']:
235 |             if _['keyword'] not in js: continue
236 | 
237 |             if not _['regex']:
238 |                 result.append(self.makeDetected(None, _))
239 |             else:
240 |                 match = _['regex'].search(js[_['keyword']])
241 |                 if match:
242 |                     result.append(self.makeDetected(match, _))
243 |         
244 |         return result
245 | 
246 |     def analyzeMetas(self, metas):
247 |         """
248 |         分析页面中元数据标签中的指纹信息
249 |         :param cookies: Cookie 字典
250 |         :return: 指纹列表
251 |         """
252 |         if not metas: return []
253 | 
254 |         result = []
255 |         for _ in self._rules['meta']:
256 |             if _['keyword'] not in metas: continue
257 | 
258 |             if not _['regex']:
259 |                 result.append(self.makeDetected(None, _))
260 |             else:
261 |                 for item in metas[_['keyword']]:
262 |                     match = _['regex'].search(item)
263 |                     if match:
264 |                         result.append(self.makeDetected(match, _))
265 |         
266 |         return result
267 | 
268 |     def analyzeScripts(self, scripts):
269 |         """
270 |         分析引用脚本路径中的指纹信息
271 |         :param scripts: scripts 列表
272 |         :return: 指纹列表
273 |         """
274 |         if not scripts: return []
275 | 
276 |         result = []
277 |         for _ in self._rules['script']:
278 |             for item in scripts:
279 |                 match = _['regex'].search(item)
280 |                 if match:
281 |                     result.append(self.makeDetected(match, _))
282 |         
283 |         return result
284 | 
285 |     def analyzeCookies(self, cookies):
286 |         """
287 |         分析URL指纹信息
288 |         :param cookies: Cookie 字典
289 |         :return: 指纹列表
290 |         """
291 |         if not cookies: return []
292 | 
293 |         result = []
294 |         for _ in self._rules['cookies']:
295 |             if _['keyword'] not in cookies: continue
296 | 
297 |             if not _['regex']:
298 |                 result.append(self.makeDetected(None, _))
299 |             else:
300 |                 match = _['regex'].search(cookies[_['keyword']])
301 |                 if match:
302 |                     result.append(self.makeDetected(match, _))
303 |         
304 |         return result
305 | 
306 |     def analyzeHeaders(self, headers):
307 |         """
308 |         分析URL指纹信息
309 |         :param headers: HTTP头
310 |         :return: 指纹列表
311 |         """
312 |         if not headers: return []
313 | 
314 |         result = []
315 |         for _ in self._rules['headers']:
316 |             if _['keyword'] not in headers: continue
317 | 
318 |             if not _['regex']:
319 |                 result.append(self.makeDetected(None, _))
320 |             else:
321 |                 for headValue in headers[_['keyword']]:
322 |                     match = _['regex'].search(headValue)
323 |                     if match:
324 |                         result.append(self.makeDetected(match, _))
325 |                         break
326 |         
327 |         return result
328 | 
329 |     def analyzeUrl(self, url):
330 |         """
331 |         分析URL指纹信息
332 |         :param url: URL
333 |         :return: 指纹列表
334 |         """
335 |         if not url: return []
336 | 
337 |         result = []
338 |         for _ in self._rules['url']:
339 |             match = _['regex'].search(url)
340 |             if match:
341 |                 result.append(self.makeDetected(match, _))
342 |         return result
343 | 
344 |     def makeDetected(self, match, rule):
345 |         """
346 |         根据匹配结果生成一条应用信息
347 |         :param match: 正则匹配结果
348 |         :param rule: 匹配规则
349 |         :return: {name,confidence,version,product}
350 |         """
351 |         result = {
352 |             "name": rule['name'],
353 |             "confidence": rule['confidence'],
354 |             "version": '' if 'version' not in rule else rule['version'],
355 |             "product": '' if 'product' not in rule else rule['product']
356 |         }
357 | 
358 |         if match:
359 |             if match.lastindex:
360 |                 for k in ['version', 'product']:
361 |                     if rule[k]:
362 |                         for i in range(1, match.lastindex + 1):
363 |                             result[k] = result[k].replace(r'\{}'.format(i), match.group(i))
364 |             
365 |             for k in ['version', 'product']:
366 |                 if rule[k]:
367 |                     patterns = re.findall(r'\\\d', rule[k])
368 |                     for _ in patterns:
369 |                         result[k] = result[k].replace(_, '')
370 | 
371 |         return result
372 |         
373 |     def loadRules(self, rule_file):
374 |         """
375 |         根据文件名载入 Wappalyzer 规则库
376 |         :param rule_file: 规则文件名
377 |         :return: True-成功， False-失败
378 |         """
379 |         fp = None
380 |         try:
381 |             fp = open(rule_file, encoding='utf-8')
382 |             rules = json.loads(fp.read())
383 |             if not rules or 'apps' not in rules or 'categories' not in rules:
384 |                 raise Exception('Wappalyzer rule file is null or format error.')
385 |             if not isinstance(rules['apps'], dict) or len(rules['apps']) == 0:
386 |                 raise Exception('Wappalyzer rules is null or format error.')
387 |             
388 |             self._categories = rules['categories']
389 |             self._rules = {
390 |                 'cookies':[],
391 |                 'headers':[],
392 |                 'script': [],
393 |                 'html': [],
394 |                 'url': [],
395 |                 'js': [],
396 |                 'meta': []
397 |             }
398 |             for appName in rules['apps']:
399 |                 if 'layer' not in rules['apps'][appName]:
400 |                     rules['apps'][appName]['layer'] = 1
401 |                 else:
402 |                     try:
403 |                         rules['apps'][appName]['layer'] = int(rules['apps'][appName]['layer'])
404 |                         if rules['apps'][appName]['layer'] not in [1, 2, 3, 4, 5]:
405 |                             rules['apps'][appName]['layer'] = 1
406 |                     except:
407 |                         pass
408 |                 
409 |                 # 忽略纯粹的 NMAP 指纹
410 |                 if 'cookies' not in rules['apps'][appName] and 'headers' not in rules['apps'][appName] and \
411 |                     'js' not in rules['apps'][appName] and 'script' not in rules['apps'][appName] and \
412 |                         'html' not in rules['apps'][appName] and 'url' not in rules['apps'][appName] and \
413 |                             'meta' not in rules['apps'][appName] and rules['apps'][appName]['layer'] == 1:
414 |                     continue
415 |                 
416 |                 website = '' if 'website' not in rules['apps'][appName] else rules['apps'][appName]['website']
417 |                 cats = [] if 'cats' not in rules['apps'][appName] else rules['apps'][appName]['cats']
418 |                 implies = [] if 'implies' not in rules['apps'][appName] else rules['apps'][appName]['implies']
419 |                 self._apps[appName] = {
420 |                     'website': website,
421 |                     'cats': cats,
422 |                     'implies': implies,
423 |                     'layer': rules['apps'][appName]['layer']
424 |                 }
425 |                 if not isinstance(self._apps[appName]['implies'], list):
426 |                     self._apps[appName]['implies'] = [ self._apps[appName]['implies'] ]
427 | 
428 |                 for t in rules['apps'][appName]:
429 |                     if t in ['icon', 'implies', 'website', 'cats', 'layer']: continue
430 | 
431 |                     if t == 'headers':
432 |                         for k in rules['apps'][appName][t]:
433 |                             if not isinstance(rules['apps'][appName][t][k], list):
434 |                                 rules['apps'][appName][t][k] = [ rules['apps'][appName][t][k] ]
435 |                             
436 |                             for headerValue in rules['apps'][appName][t][k]:
437 |                                 rule = self.parseRule(headerValue)
438 |                                 if rule:
439 |                                     rule['name'] = appName
440 |                                     rule['keyword'] = k.lower()
441 |                                     self._rules[t].append(rule)
442 | 
443 |                     elif t in ['js', 'meta']:
444 |                         for k in rules['apps'][appName][t]:
445 |                             if not isinstance(rules['apps'][appName][t][k], list):
446 |                                 rules['apps'][appName][t][k] = [ rules['apps'][appName][t][k] ]
447 |                             
448 |                             for v in rules['apps'][appName][t][k]:
449 |                                 rule = self.parseRule(v)
450 |                                 if rule:
451 |                                     rule['name'] = appName
452 |                                     rule['keyword'] = k.lower()
453 |                                     self._rules[t].append(rule)
454 |                     
455 |                     elif t in ['html', 'script', 'url']:
456 |                         if not isinstance(rules['apps'][appName][t], list):
457 |                             rules['apps'][appName][t] = [ str(rules['apps'][appName][t]) ]
458 |                     
459 |                         for item in rules['apps'][appName][t]:
460 |                             rule = self.parseRule(item)
461 |                             if rule:
462 |                                 rule['name'] = appName
463 |                                 rule['keyword'] = ''
464 |                                 self._rules[t].append(rule)
465 | 
466 |             return True
467 |         except Exception as e:
468 |             self.log(str(e), LogLevel.ERROR)
469 |             self.log(traceback.format_exc(), LogLevel.ERROR)
470 |             return False
471 |         finally:
472 |             if fp: fp.close()
473 | 
474 |     def parseRule(self, rule):
475 |         """
476 |         解析规则库中的单条规则
477 |         @param rule: 规则文本
478 |         @return: {'regex': Regex, 'version': string, 'confidence': int}
479 |         """
480 |         if not rule:
481 |             return { 'regex': '', 'version': '', 'confidence': 100 }
482 |         
483 |         try:
484 |             parts = rule.split(r'\;')
485 |             result = {
486 |                 'regex': re.compile(parts[0], re.I),
487 |                 'version': '',
488 |                 'product': '',
489 |                 'confidence': 100
490 |             }
491 |             for item in parts[1:]:
492 |                 pos = item.find(':')
493 |                 if pos == -1: continue
494 | 
495 |                 if item[:pos] == 'version':
496 |                     result['version'] = item[pos+1:]
497 |                 elif item[:pos] == 'confidence':
498 |                     confidence = float(item[pos+1:])
499 |                     if confidence <= 1:
500 |                         confidence *= 100
501 |                     result['confidence'] = abs(int(confidence))
502 |                 elif item[:pos] == 'product':
503 |                     result['product'] = item[pos+1:]
504 |             return result
505 |         except Exception as e:
506 |             self.log(str(e), LogLevel.ERROR)
507 |             self.log("Rule:" + rule, LogLevel.ERROR)
508 |             self.log(traceback.format_exc(), LogLevel.ERROR)
509 |             return None
510 | 
511 |     def parseStatus(self, rawHeaders):
512 |         """
513 |         识别原始HTTP头中的请求状态
514 |         :param rawHeaders: 原始头信息
515 |         :return: HTTP响应状态码
516 |         """
517 |         if rawHeaders:
518 |             match = self._statusRegex.search(rawHeaders)
519 |             if match:
520 |                 return int(match.group(1))
521 |         return None
522 | 
523 |     def parseHeaders(self, rawHeaders):
524 |         """
525 |         将原始HTTP头解析为字典格式
526 |         :param rawHeaders: 原始头信息
527 |         :return: 请求头字典
528 |         """
529 |         if not rawHeaders: return {}
530 | 
531 |         lines = rawHeaders.split('\r\n')
532 |         if len(lines) > 0 and lines[0][:5] == 'HTTP/': del(lines[0]) # 删除 HTTP/x.x 这一行
533 | 
534 |         result = {}
535 |         for i in range(0, len(lines)):
536 |             
537 |             pos = lines[i].find(':')
538 |             if pos == -1: continue
539 |             
540 |             header_name = lines[i][:pos].strip().lower()
541 |             header_value = lines[i][pos+1:].strip()
542 |             if header_name not in result: result[header_name] = []
543 | 
544 |             result[header_name].append(header_value)
545 |         
546 |         return result
547 | 
548 |     def parseCookies(self, headers):
549 |         """
550 |         获取HTTP响应头中的Cookie列表
551 |         :param headers: HTTP头字典对象
552 |         """
553 |         if 'set-cookie' not in headers: return {}
554 | 
555 |         cookies = {}
556 |         for item in headers['set-cookie']:
557 |             parts = item.split(';')
558 |             for _ in parts:
559 |                 pos = _.find('=')
560 |                 if pos == -1: continue
561 | 
562 |                 name = _[:pos]
563 |                 if name not in ['domain', 'path']:
564 |                     cookies[name] = _[pos+1:].strip()
565 |                     continue
566 | 
567 |         return cookies
568 | 
569 |     def parseScripts(self, html):
570 |         """
571 |         获取页面中的脚本列表
572 |         :param html: 页面源代码
573 |         """
574 |         return self._scriptRegex.findall(html)
575 | 
576 |     def parseLinks(self, html):
577 |         """
578 |         获取页面中的链接列表
579 |         :param html: 页面源代码
580 |         """
581 |         return self._linkRegex.findall(html)
582 | 
583 |     def parseMetas(self, html):
584 |         """
585 |         获取页面中的元数据
586 |         :param html: 页面源代码
587 |         """
588 |         metas1 = self._metaRegex1.findall(html)
589 |         metas2 = self._metaRegex2.findall(html)
590 | 
591 |         result = {}
592 |         for _ in metas1 + metas2:
593 |             if _[0] not in result:
594 |                 result[_[0]] = [ _[1] ]
595 |             else:
596 |                 result[_[0]].append(_[1])
597 |         
598 |         return result
599 | 
600 |     def parseJs(self, html):
601 |         """
602 |         获取页面执行过程中的JS变量（未实现）
603 |         :param html: 页面源代码
604 |         """
605 |         return {}
606 | 
607 | 
608 | class FilterPlugin(Plugin):
609 |     _wappalyzer = None
610 |     """
611 |     Web 指纹识别插件
612 |     src: url, header, body
613 |     dst:
614 |     - apps: 应用指纹，格式：[{name,version,confidence},...]
615 |     - title: 网页标题
616 | 
617 |     """
618 |     
619 |     def __init__(self, rootdir, debug = False, logger=None):
620 |         """
621 |         构造函数
622 |         :param rootdir: 应用根目录
623 |         :param debug: 调式开关
624 |         """
625 |         super().__init__(rootdir, debug, logger)
626 | 
627 |         # 初始化指纹相关路径
628 | 
629 |         self._wappalyzer = Wappalyzer(os.path.join(rootdir, 'rules', 'apps.json'), logger=logger)
630 |     
631 |     def analyze(self, url, headers, body):
632 |         """
633 |         分析获取指纹
634 |         :param url: 请求URL
635 |         :param headers: 响应头
636 |         :param body: 响应正文
637 |         """
638 |         return self._wappalyzer.analyze(url, headers, body)
639 | 
640 |     def generate_header(self, msg):
641 |         """
642 |         根据消息生成一个HTTP头信息
643 |         :param msg: 原始消息 JSON
644 |         :return: 生成的原始响应头
645 |         """
646 |         header = ''
647 |         if 'server' in msg and msg['server']:
648 |             header += '\r\nServer: {}'.format(msg['server'])
649 |         if 'type' in msg and msg['type']:
650 |             header += '\r\nContent-Type: {}'.format(msg['type'])
651 |         
652 |         if header:
653 |             header = 'HTTP/1.1 {}{}'.format(
654 |                 '0' if 'code' not in msg or not msg['code'] else msg['code'],
655 |                 header
656 |             )
657 |         return header
658 | 
659 |     def execute(self, msg):
660 |         """
661 |         插件入口函数，根据插件的功能对 msg 进行处理
662 |         :param msg: 需要处理的消息
663 |         :param mode: 识别方法：1-使用内置Python引擎，2-使用Node版本Wappalyzer引擎
664 |         :return: 返回需要更新的消息字典（不含原始消息）
665 |         """
666 |         if 'pro' not in msg or msg['pro'] != 'HTTP':
667 |             self.log('Not http message.', LogLevel.DEBUG)
668 |             return
669 | 
670 |         info = {}
671 |         # 更新HTTP头
672 |         if 'header' not in msg or not msg['header']:
673 |             new_header = self.generate_header(msg)
674 |             if new_header:
675 |                 msg['header'] = new_header
676 |                 info['header'] = new_header
677 |             else:
678 |                 msg['header'] = 'HTTP/1.1 000 Unkown'
679 |         
680 |         if 'body' not in msg or not msg['body']:
681 |             msg['body'] = ''
682 | 
683 |         # 指纹识别
684 |         apps = self.analyze(msg['url'], msg['header'], msg['body'])
685 |         info['apps'] = apps
686 | 
687 |         # 标题提取
688 |         if 'type' in msg and msg['type'].find('text/html') != -1:
689 |             m = re.search(r'<title>([^<]*?)</title>', msg['body'], re.I)
690 |             if m:
691 |                 info['title'] = html.unescape(m.group(1))
692 | 
693 |         return info
694 | 
695 | if __name__ == '__main__':
696 |     import time
697 | 
698 |     #'''
699 |     plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True)
700 |     print(plugins)
701 |     msg = {
702 |         "pro": "HTTP",
703 |         "inner": True,
704 |         "site": "http://192.168.199.170",
705 |         "ip_str": "192.168.199.170",
706 |         "type": "text/html; charset=UTF-8",
707 |         "ip": "192.168.199.170",
708 |         "method": "GET",
709 |         "url": "http://192.168.199.170/",
710 |         "server": "Apache/2.4.6 (CentOS)",
711 |         "header": "Date: Mon, 23 Nov 2020 09:55:57 GMT\r\nServer: Apache/2.4.6 (CentOS)\r\nLast-Modified: Thu, 16 Oct 2014 13:20:58 GMT\r\nETag: \"1321-5058a1e728280\"\r\nAccept-Ranges: bytes\r\nContent-Length: 4897\r\nContent-Type: text/html; charset=UTF-8",
712 |         "@timestamp": "2020-11-23T09:55:48.018Z",
713 |         "@version": "1",
714 |         "tag": "eno2",
715 |         "geoip": {},
716 |         "tags": [
717 |             "_geoip_lookup_failure"
718 |         ],
719 |         "host": "192.168.199.170:80",
720 |         "body": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\"><html><head>\n<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">\n\t\t<title>Apache HTTP Server Test Page powered by CentOS</title>\n\t\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n\n    <!-- Bootstrap -->\n    <link href=\"/noindex/css/bootstrap.min.css\" rel=\"stylesheet\">\n    <link rel=\"stylesheet\" href=\"noindex/css/open-sans.css\" type=\"text/css\" />\n\n<style type=\"text/css\"><!--\t\t \n\nbody {\n  font-family: \"Open Sans\", Helvetica, sans-serif;\n  font-weight: 100;\n  color: #ccc;\n  background: rgba(10, 24, 55, 1);\n  font-size: 16px;\n}\n\nh2, h3, h4 {\n  font-weight: 200;\n}\n\nh2 {\n  font-size: 28px;\n}\n\n.jumbotron {\n  margin-bottom: 0;\n  color: #333;\n  background: rgb(212,212,221); /* Old browsers */\n  background: radial-gradient(ellipse at center top, rgba(255,255,255,1) 0%,rgba(174,174,183,1) 100%); /* W3C */\n}\n\n.jumbotron h1 {\n  font-size: 128px;\n  font-weight: 700;\n  color: white;\n  text-shadow: 0px 2px 0px #abc,\n               0px 4px 10px rgba(0,0,0,0.15),\n               0px 5px 2px rgba(0,0,0,0.1),\n               0px 6px 30px rgba(0,0,0,0.1);\n}\n\n.jumbotron ",
721 |         "port": 80,
722 |         "url_tpl": "http://192.168.199.170/",
723 |         "code": "403",
724 |         "path": "/"
725 |     }
726 |     msg_update = {}
727 |     for i in sorted(plugins.keys()):
728 |         (pluginName, plugin) = plugins[i]
729 |         print('[!] Plugin {} processing ...'.format(pluginName))
730 |         ctime = time.time()
731 |         ret = plugin.execute(msg)
732 |         if ret:
733 |             msg.update(ret)
734 |         etime = time.time()
735 |         print('Eclipse time: {}'.format(etime-ctime))
736 |         print(json.dumps(ret, indent=2))
737 | 
738 |     print(json.dumps(msg))
739 |     #'''
740 | 
741 |     # # Test python engine
742 |     # url = 'http://www.baidu.com/'
743 |     # stime = time.time()
744 |     # rawHeaders = base64.b64decode(bytes(headers, 'utf-8', 'ignore')).decode('utf-8', 'ignore')
745 |     # rawBody = base64.b64decode(bytes(body, 'utf-8', 'ignore')).decode('utf-8', 'ignore')
746 | 
747 |     # wapp = Wappalyzer(r'E:\Code\passets-github\passets-filter\src\wappalyzer\apps.json')
748 |     # result = wapp.analyze(url, rawHeaders, rawBody)
749 |     # print(json.dumps(result))
750 |     # print(time.time() - stime)
751 | 
752 |     # # Test nodejs engine
753 |     # stime = time.time()
754 |     # wapp = Wappalyzer(
755 |     #     rule_file=r'E:\Code\passets-github\passets-filter\src\wappalyzer\apps.json', 
756 |     #     wapp_path=r'E:\Code\passets-github\passets-filter\src\wappalyzer\cli.js')
757 |     # result = wapp.analyzeByNode(url, headers, body)
758 |     # print(json.dumps(result))
759 |     # print(time.time() - stime)
760 | 
761 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------