├── src ├── requirements.txt ├── plugins │ ├── __init__.py │ ├── geoip.py │ ├── url.py │ ├── ip.py │ ├── plugin.py │ ├── asset.py │ ├── nmap.py │ └── wappalyzer.py ├── rules │ ├── http_asset_types.json │ ├── tcp_device_types.json │ ├── http_device_types.json │ ├── vendors.json │ └── tcp_asset_types.json ├── config │ └── plugin.yml └── main.py ├── docker-compose.yml ├── Dockerfile ├── .gitignore ├── PLUGIN_DEVELOP.md ├── README.md └── LICENSE /src/requirements.txt: -------------------------------------------------------------------------------- 1 | elasticsearch==7.1.0 2 | pyaml 3 | cacheout 4 | geoip2 -------------------------------------------------------------------------------- /src/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix:9200 10 | - ELASTICSEARCH_INDEX=logstash-passets 11 | - THREADS=5 12 | - BATCH_SIZE=20 13 | - CACHE_SIZE=1024 14 | - CACHE_TTL=120 15 | - MODE=1 16 | - DEBUG=1 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/ubuntu:18.04 2 | 3 | LABEL maintainer="tanjelly@gmail.com" version="1.0.0" 4 | 5 | USER root 6 | 7 | ENV TZ="CST-8" ELASTICSEARCH_URL="localhost:9200" ELASTICSEARCH_INDEX="logstash-passets" THREADS=5 BATCH_SIZE=20 CACHE_SIZE=1024 CACHE_TTL=120 MODE=1 DEBUG=1 8 | 9 | COPY src/ /opt/filter/ 10 | 11 | WORKDIR /opt/filter/ 12 | 13 | RUN apt-get update && \ 14 | apt-get install -y python3 python3-pip && \ 15 | pip3 install -r requirements.txt && \ 16 | apt-get clean all && \ 17 | apt-get autoclean && \ 18 | apt-get autoremove 19 | 20 | ENTRYPOINT ["sh", "-c", "python3 /opt/filter/main.py -H $ELASTICSEARCH_URL -i $ELASTICSEARCH_INDEX -t $THREADS -b $BATCH_SIZE -c $CACHE_SIZE -T $CACHE_TTL -m $MODE -d $DEBUG"] -------------------------------------------------------------------------------- /src/rules/http_asset_types.json: -------------------------------------------------------------------------------- 1 | { 2 | "Network Device":[ 3 | 31,37,1005 4 | ], 5 | "Security Device":[ 6 | 1006 7 | ], 8 | "Storage Device":[ 9 | 48 10 | ], 11 | "IoT":[ 12 | 39,1008 13 | ], 14 | "Printer":[ 15 | 40 16 | ], 17 | "Control System":[ 18 | 45 19 | ], 20 | "OS":[ 21 | 5 22 | ], 23 | "Mail":[ 24 | 30 25 | ], 26 | "Database":[ 27 | 34 28 | ], 29 | "Web Server":[ 30 | 22,64 31 | ], 32 | "Media Server":[ 33 | 38 34 | ], 35 | "Application Middleware":[ 36 | 1009 37 | ], 38 | "Office Software":[ 39 | 50,53,58,1007 40 | ], 41 | "Digital Currency":[ 42 | 56 43 | ], 44 | "Container":[ 45 | 60 46 | ], 47 | "Cloud Platform":[ 48 | 9,61,62,63 49 | ], 50 | "Load Balancer":[ 51 | 65 52 | ], 53 | "Securities System":[ 54 | 1001,1002 55 | ], 56 | "Knowledge Base System":[ 57 | 2,4,8,11,49 58 | ], 59 | "Payment System":[ 60 | 41,43 61 | ] 62 | } -------------------------------------------------------------------------------- /src/rules/tcp_device_types.json: -------------------------------------------------------------------------------- 1 | { 2 | "load balancer":["load balancer"], 3 | "remote management":[], 4 | "security-misc":["gateway", "security"], 5 | "printer":["printer"], 6 | "storage-misc":["storage"], 7 | "media device":["media", "video", "dvr"], 8 | "router":["router"], 9 | "webcam":["camera", "webcam"], 10 | "terminal server":["kvm"], 11 | "printer server":["print server"], 12 | "power-device":["ups"], 13 | "firewall":["firewall"], 14 | "pda":["pda"], 15 | "pbx":["pbx"], 16 | "game console":["game"], 17 | "phone":["phone"], 18 | "voip":["voip", " sip ", "ip phone"], 19 | "wap":["wap"], 20 | "switch":["switch"], 21 | "terminal":["terminal"], 22 | "power-misc":["power"], 23 | "telecom-misc":["telecom"], 24 | "proxy server":["proxy"], 25 | "hub":[" hub"], 26 | "bridge":["bridge"], 27 | "broadand router":["dsl", "adsl", "modem", "broadand"], 28 | "vpn":["vpn", "openvpn"], 29 | "wireless router":["wireless", "wifi", "wi-fi", "wlan"], 30 | "specializied":[] 31 | } -------------------------------------------------------------------------------- /src/config/plugin.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # 根节点:插件名称,与plugins目录下的文件名对应 3 | # 二级节点: 4 | # - enable: 是否启用插件 5 | # - index: 插件的处理顺序,0以上的整数,数值越小 6 | 7 | ip: 8 | enable: false 9 | index: 1 10 | inner_ips: 11 | - 10.0.0.0-10.255.255.255 12 | - 172.16.0.0-172.31.255.255 13 | - 192.168.0.0-192.168.255.255 14 | - 169.254.0.0-169.254.255.255 15 | - 127.0.0.1-127.0.0.255 16 | 17 | geoip: 18 | enable: false 19 | index: 2 20 | 21 | url: 22 | enable: false 23 | index: 3 24 | 25 | wappalyzer: 26 | enable: true 27 | index: 4 28 | 29 | nmap: 30 | enable: true 31 | index: 5 32 | ignore_rules: 33 | - ^OK$ 34 | - ^\+OK\r\n$ 35 | ssl_portmap: 36 | - 443:https 37 | - 465:smtps 38 | - 993:imaps 39 | - 995:pop3s 40 | - 22:ssh 41 | - 21:ftps 42 | 43 | https: 44 | enable: true 45 | include_chain: false 46 | index: 6 47 | 48 | asset: 49 | enable: true 50 | index: 7 51 | ignore_vendors: 52 | - asp 53 | - iis 54 | - windows 55 | - java 56 | - getmdl 57 | - getbootstrap 58 | -------------------------------------------------------------------------------- /src/rules/http_device_types.json: -------------------------------------------------------------------------------- 1 | { 2 | "load balancer":["load balancer", "负载均衡", "big-ip", "cdn"], 3 | "remote management":[], 4 | "security-misc":["gateway", "网关", "security", "安全", "堡垒机"], 5 | "printer":["printer", "打印机"], 6 | "storage-misc":["storage", "云存储"], 7 | "media device":["media", "流媒体", "视频"], 8 | "router":["router", "路由器"], 9 | "webcam":["camera", "webcam", "摄像机"], 10 | "terminal server":["kvm"], 11 | "printer server":["print server", "打印服务器"], 12 | "power-device":["ups"], 13 | "firewall":["firewall", "防火墙"], 14 | "pda":["pda"], 15 | "pbx":["pbx"], 16 | "game console":["game"], 17 | "phone":["phone"], 18 | "voip":["voip", " sip ", "ip phone", "IP语音", "电话"], 19 | "wap":["wap"], 20 | "switch":["switch", "交换机"], 21 | "terminal":["终端"], 22 | "power-misc":["power"], 23 | "telecom-misc":[], 24 | "proxy server":["proxy", "代理"], 25 | "hub":[" hub", "集线器"], 26 | "bridge":["bridge"], 27 | "broadand router":["dsl ", "modem", "broadand", "宽带"], 28 | "vpn":["vpn", "接入"], 29 | "wireless router":["wireless", "wifi", "wi-fi", "wlan", "无线路由"], 30 | "specializied":[] 31 | } -------------------------------------------------------------------------------- /src/rules/vendors.json: -------------------------------------------------------------------------------- 1 | [ 2 | "Asus", 3 | "Dell", 4 | "IBM", 5 | "Cisco", 6 | "3COM", 7 | "Fortinet", 8 | "Huawei", 9 | "H3C", 10 | "Linksys", 11 | "Adobe", 12 | "AirLink", 13 | "Google", 14 | "Microsoft", 15 | "Alcatel", 16 | "Alt-N", 17 | "Aastra", 18 | "APC", 19 | "AVG", 20 | "AVM", 21 | "AWS", 22 | "AT&T", 23 | "ActionTec", 24 | "ACTi", 25 | "Adtran", 26 | "Allied", 27 | "Amino", 28 | "Amazon", 29 | "Apache", 30 | "Apple", 31 | "ArGoSoft", 32 | "Atlassian", 33 | "Avaya", 34 | "Avtech", 35 | "Axis", 36 | "Axway", 37 | "HP", 38 | "BMC", 39 | "Barracuda", 40 | "Belkin", 41 | "D-Link", 42 | "360", 43 | "BenQ", 44 | "BayStack", 45 | "Samsung", 46 | "Xerox", 47 | "Xen", 48 | "ZTE", 49 | "Intel", 50 | "Juniper", 51 | "TP-Link", 52 | "Brocade", 53 | "Netgear", 54 | "SMC", 55 | "Trendnet", 56 | "Trend", 57 | "Sony", 58 | "Hikvision", 59 | "Huacam", 60 | "Aviosys", 61 | "Panasonic", 62 | "Zmodo", 63 | "Sanyo", 64 | "AirLive", 65 | "ZyXEL", 66 | "NetComm", 67 | "Xfinity", 68 | "CJ Hellovision", 69 | "EnGenius", 70 | "Technicolor", 71 | "Hotbox", 72 | "Arcadyan", 73 | "MikroTik", 74 | "Westell", 75 | "Verizon" 76 | ] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | src/wappalyzer/node_modules/ 131 | -------------------------------------------------------------------------------- /src/plugins/geoip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix登录", 86 | "code": 200, 87 | "url": "http://111.206.63.16/hello.jsp?zone=public&service=80&protocol=tcp#main", 88 | "tag": "sensor-ens160" 89 | } 90 | msg_update = {} 91 | for i in sorted(plugins.keys()): 92 | (pluginName, plugin) = plugins[i] 93 | if pluginName == 'url': 94 | print('[!] Plugin {} processing ...'.format(pluginName)) 95 | ret = plugin.execute(msg) 96 | print(ret) 97 | if ret: 98 | msg_update = dict(msg_update, **ret) 99 | 100 | msg = dict(msg, **ret) 101 | print('[!] Plugin {} completed.'.format(pluginName)) 102 | 103 | print(msg_update) -------------------------------------------------------------------------------- /PLUGIN_DEVELOP.md: -------------------------------------------------------------------------------- 1 | # Passets 被动资产识别框架数据清洗模块插件开发说明 2 | 3 | ### 插件工作原理 4 | 5 | ``` 6 | 原始数据 7 | [ElasticSearch] ---→ [passets-filter] 8 | ↑ ↓ 9 | | Plugin 1 10 | │ ↓ 11 | │ Plugin 2 12 | │ ↓ 13 | │ ... ... 14 | │ | 15 | ╰----------------------╯ 16 | 处理后产生的新数据 17 | ``` 18 | 19 | ### 插件配置文件说明 20 | 21 | 插件按照配置文件中定义的顺序来进行数据处理,通过配置文件,使用者可以仅开启部分必须的插件,以提交处理效率。 22 | 插件配置文件为 config/plugin.yml,配置文件的结构如下: 23 | ``` 24 | xxxx: # 插件名,同时也是插件文件名 25 | enable: true # 插件开关:true - 启用,false - 停用 26 | index: 1 # 插件的执行顺序,使用0以上的整数,数据越小越优先 27 | xxxxx: # 当前插件的自定义参数,在初始化的时候传入插件 28 | ``` 29 | 30 | ip 插件的配置实例: 31 | 32 | ``` 33 | ip: # 插件名称 34 | enable: true # 启用该插件 35 | index: 1 # 插件处理顺序为 1 36 | inner_ips: # 内部IP地址范围定义 37 | - 10.0.0.0-10.255.255.255 38 | - 172.16.0.0-172.31.255.255 39 | - 192.168.0.0-192.168.255.255 40 | - 169.254.0.0-169.254.255.255 41 | - 127.0.0.1-127.0.0.255 42 | ``` 43 | 44 | ### 文件说明 45 | 46 | 插件必须放置于应用路径下的 `plugins` 目录下,该目录下的 `__init__.py` 和 `plugin.py` 必须保留,并且不建议用户修改。 47 | ``` 48 | src # 代码目录 49 | plugins # 插件存放目录 50 | __init__.py # 模块初始化脚本 51 | plugin.py # 数据清洗插件基类,所有插件均需继承此类 52 | ``` 53 | 54 | ### 插件的代码结构 55 | 56 | ``` 57 | from plugin import Plugin 58 | 59 | class FilterPlugin(Plugin): 60 | 61 | def __init__(self, rootdir, debug=False): 62 | """ 63 | 构造函数 64 | :param rootdir: 应用根目录 65 | :param debug: 调试开关 66 | """ 67 | super().__init__(rootdir, debug) 68 | 69 | # 此处编写本插件的初始化代码 70 | # 注:如果插件没有额外的初始化操作,可以无需实现 __init__() 方法。 71 | ... ... 72 | 73 | def set_config(self, config): 74 | """ 75 | 配置初始化函数 76 | :param config: 插件配置 77 | """ 78 | super().set_config(config) 79 | 80 | # 此处编写本插件的配置初始化代码 81 | ... ... 82 | 83 | def execute(self, msg): 84 | """ 85 | 插件入口函数,根据插件的功能对 msg 进行处理 86 | :param msg: 需要处理的消息(字典类型) 87 | """ 88 | # 此处编写本插件的业务处理代码 89 | ... ... 90 | 91 | # 返回插件产生的新数据字典(不含原数据),没有产生数据则返回 None 92 | return new_msg 93 | 94 | ``` 95 | 96 | 插件执行过程中,可以调用 `self.log(msg, level)` 来输出必要的信息,消息分为以下三类: 97 | 98 | | 消息标识 | 输出前缀 | 说明 99 | |----------|------------|--------------------------------| 100 | | INFO | [!] | 普通信息 101 | | ERROR | [-] | 错误信息 102 | | DEBUG | [D] | 调试信息,只有开启调试后才会输出 103 | 104 | 105 | 106 | ### 插件测试 107 | 108 | 开发者可以在插件脚本的 __main__ 代码块来编写插件的测试代码,实例如下: 109 | 110 | ``` 111 | if __name__ == '__main__': 112 | 113 | # 应用根目录(通常为plugins目录的上层目录) 114 | rootdir = '/opt/filter/' 115 | 116 | # 是否开启调试模式 117 | debug = True 118 | 119 | # 初始化插件 120 | plugin = FilterPlugin(rootdir, debug) 121 | 122 | # 测试输入数据 123 | msg = { 124 | 'pro': 'TCP', 125 | 'ip': '192.168.1.121', 126 | 'port': 80, 127 | 'data': 'AAAAAAAAAAAAAAAAAAAA' 128 | } 129 | 130 | # 执行插件 131 | new_msg = plugin.execute(msg) 132 | 133 | # 判断插件返回结果 134 | if new_msg: 135 | print(u'插件返回了数据!') 136 | else: 137 | print(u'插件没有返回数据!') 138 | ``` 139 | 140 | 然后,直接在 IDE(集成开发工具)或者是命令上下直接运行该插件脚本: 141 | 142 | 在命令行下执行插件脚本的方法: 143 | ``` 144 | $ cd plugins 145 | $ python3 xxxx.py 146 | ``` -------------------------------------------------------------------------------- /src/plugins/ip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix= _[0] and ip_num <= _[1]: 95 | inner = True 96 | break 97 | 98 | info['inner'] = inner 99 | return info 100 | 101 | if __name__ == '__main__': 102 | plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True) 103 | msg = { 104 | #"ip": "202.106.0.20", 105 | "ip": "192.168.1.20", 106 | "port": 80, 107 | "pro": "TCP", 108 | "host": "111.206.63.16:80", 109 | # Example: 554 SMTP synchronization error\r\n 110 | #"data": "35353420534d54502073796e6368726f6e697a6174696f6e206572726f720d0a", 111 | # Example: >INFO:OpenVPN Management Interface Version 1.0.1 -- type 'help' for more info\r\n> 112 | #"data": "3e494e464f3a4f70656e56504e204d616e6167656d656e7420496e746572666163652056657273696f6e20312e302e31202d2d2074797065202768656c702720666f72206d6f726520696e666f0d0a3e", 113 | # Example: get_info: plugins\nRPRT 0\nasfdsafasfsafas 114 | "data": "6765745f696e666f3a20706c7567696e730a5250525420300a617366647361666173667361666173", 115 | "tag": "sensor-ens160" 116 | } 117 | msg_update = {} 118 | for i in sorted(plugins.keys()): 119 | (pluginName, plugin) = plugins[i] 120 | if pluginName == 'ip': 121 | ctime = time.time() 122 | ret = plugin.execute(msg) 123 | etime = time.time() 124 | print('Eclipse time: {}'.format(etime-ctime)) 125 | print(ret) 126 | break -------------------------------------------------------------------------------- /src/rules/tcp_asset_types.json: -------------------------------------------------------------------------------- 1 | { 2 | "Network Device": [ 3 | "router", 4 | "switch", 5 | "telecom", 6 | "hub", 7 | "bridge", 8 | "gateway", 9 | "modem", 10 | "wireless", 11 | "wifi", 12 | "wlan", 13 | "wi-fi", 14 | "network", 15 | "vpn", 16 | "openvpn", 17 | "dsl", 18 | "adsl", 19 | "gsm", 20 | "telnet", 21 | "winbox" 22 | ], 23 | "Security Device": [ 24 | "security-misc", 25 | "firewall", 26 | "access", 27 | "secure", 28 | "anti-virus", 29 | "anti-spam", 30 | "nessus" 31 | ], 32 | "Storage Device": [ 33 | "storage-misc", 34 | "raid", 35 | "storage" 36 | ], 37 | "IoT Device": [ 38 | "webcam", 39 | "pda", 40 | "camera", 41 | "microcontroller" 42 | ], 43 | "Printer": [ 44 | "printer" 45 | ], 46 | "Control System": [ 47 | "power-device", 48 | "power-misc", 49 | "modbus" 50 | ], 51 | "OS": [], 52 | "Mail Server": [ 53 | "smtp", 54 | "smtpd", 55 | "smtps", 56 | "imap", 57 | "imapd", 58 | "imaps", 59 | "pop3", 60 | "pop3d", 61 | "pop3s", 62 | "lmtp", 63 | "lmtpd", 64 | "webmail" 65 | ], 66 | "Database": [ 67 | "rdbms", 68 | "mysql", 69 | "oracle", 70 | "ms-sql-m", 71 | "ms-sql-s", 72 | "db2", 73 | "mongodb", 74 | "influxdb", 75 | "couchdb", 76 | "rethinkdb", 77 | "arangodb", 78 | "monetdb", 79 | "rethinkdb", 80 | "hbase", 81 | "redis", 82 | "memcached", 83 | "database" 84 | ], 85 | "Web Server": [ 86 | "http", 87 | "https", 88 | "httpd", 89 | "web", 90 | "iis", 91 | "nginx", 92 | "tengine" 93 | ], 94 | "FTP Server": [ 95 | "ftp", 96 | "serv-u", 97 | "filezilla" 98 | ], 99 | "DNS Server": [ 100 | "dns", 101 | "bind", 102 | "nameserver" 103 | ], 104 | "Media Server": [ 105 | "pbx", 106 | "voip", 107 | "radio", 108 | "video", 109 | "dvr", 110 | "media", 111 | "sip" 112 | ], 113 | "Font Server": [ 114 | "font" 115 | ], 116 | "Time Server": [ 117 | "time" 118 | ], 119 | "SSH Server": [ 120 | "ssh", 121 | "sshd" 122 | ], 123 | "Remote Admin": [ 124 | "vnc", 125 | "vnc-http", 126 | "ms-wbt-server", 127 | "ms-wbt-server-proxy", 128 | "radmin", 129 | "radmind", 130 | "x11", 131 | "webmin" 132 | ], 133 | "Application Middleware": [ 134 | "weblogic", 135 | "websphere", 136 | "tomcat", 137 | "jboss", 138 | "jetty" 139 | ], 140 | "Office Software": [], 141 | "Digital Currency": [ 142 | "currency", 143 | "bitcoin" 144 | ], 145 | "Container": [ 146 | "container", 147 | "vmware", 148 | "esxi", 149 | "vmware-aam", 150 | "vmware-auth", 151 | "vmware-print", 152 | "docker" 153 | ], 154 | "Cloud Platform": [ 155 | "cloud", 156 | "zoomkeeper", 157 | "webcache" 158 | ], 159 | "Load Balancer": [ 160 | "load balancer", 161 | "wap" 162 | ], 163 | "Proxy Server": [ 164 | "socks4", 165 | "socks5", 166 | "myproxy", 167 | "xtunnels" 168 | ], 169 | "Securities System": [ 170 | "securities", 171 | "zqyh", 172 | "zqzx", 173 | "zqhq", 174 | "zqkzhq", 175 | "zqjy", 176 | "zqsj" 177 | ], 178 | "Knowledge Base System": [ 179 | "bbs", 180 | "wiki", 181 | "message", 182 | "guestbook" 183 | ], 184 | "Payment System": [ 185 | "payment" 186 | ], 187 | "Terminal Server": [ 188 | "nagios", 189 | "zabbix", 190 | "citrix-ica", 191 | "citrix-ima" 192 | ], 193 | "Darknet": [ 194 | "tor", 195 | "tor-control", 196 | "tor-info", 197 | "tor-orport", 198 | "tor-socks" 199 | ], 200 | "Terminal": [ 201 | "phone", 202 | "game", 203 | "pc", 204 | "desktop" 205 | ], 206 | "Data Analysis": [ 207 | "spark", 208 | "splunk" 209 | ] 210 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Passets 被动资产识别框架数据清洗模块 2 | 3 | ## 简介 4 | 5 | 本模块主要用于对收集的被动资产原始数据进行二次加工,Elasticsearch 中经过清洗的合法数据(至少包含ip和port字段)会添加 state 字段。state=0表示正在清洗,state=1表示已完成清洗。所有的清洗操作都采用插件的方式进行,目前已支持以下插件。 6 | 7 | ### Wappalyzer 插件 8 | 9 | 基于数据中的 URL、HTTP 响应头、HTTP响应正文来识别站点指纹信息。 10 | 11 | 指纹库及识别引擎基于 [Wappalyzer](https://github.com/AliasIO/Wappalyzer/) 修改。 12 | 13 | 配置参数: 14 | 15 | | 插件参数 | 用途说明 16 | |------------|------------------------------------| 17 | | enable | 插件开关,true-启用,false-不启用 18 | | index | 在所有插件中的处理顺序,值越小越优先处理 19 | 20 | 相关配置文件(指纹规则): 21 | 22 | ``` 23 | # rules/apps.json 24 | ``` 25 | 26 | ### NMAP 插件 27 | 28 | 基于数据中的 TCP 响应报文来识别目标服务的指纹信息。 29 | 30 | 指纹库基于 [NMAP](https://github.com/nmap/nmap/) 项目中的 `nmap-service-probes` 指纹库。 31 | 32 | 配置参数: 33 | 34 | | 插件参数 | 用途说明 35 | |------------|------------------------------------| 36 | | enable | 插件开关,true-启用,false-不启用 37 | | index | 在所有插件中的处理顺序,值越小越优先处理 38 | | ignore_rules | 忽略的 TCP 指纹规则列表,用 `nmap-service-probes` 中的 `m` 参数值表示,必须完全匹配。 39 | | ssl_portmap | 指定 ssl 协议数据的端口对应关系列表,例如:`443:https` 表示检测到 ssl 服务时,如果端口为 443 则认定其为 https 服务,过滤后的用 https 覆盖 ssl。 40 | 41 | 相关配置文件(指纹规则): 42 | 43 | ``` 44 | # rules/nmap-service-probes 45 | ``` 46 | 47 | ### Assets 分类插件 48 | 49 | 基于指纹识别的结果对目标进行资产分类。 50 | 51 | 配置参数: 52 | 53 | | 插件参数 | 用途说明 54 | |------------|------------------------------------| 55 | | enable | 插件开关,true-启用,false-不启用 56 | | index | 在所有插件中的处理顺序,值越小越优先处理,此插件必须在 Wapplayzer、Nmap 插件的后面执行。 57 | | ignore_vendors | 忽略的厂商名称(小写),用于排除一些不想要的厂商名称。 58 | 59 | 相关配置文件: 60 | 61 | #### 资产类型配置文件 62 | 63 | ``` 64 | # rules/http_asset_types.json 65 | { 66 | "<资产类型名>":[ 67 | <分类编号>, ... 68 | ] 69 | } 70 | ``` 71 | 用于识别 `pro` 为 `HTTP` 类的流量资产类型。 72 | 73 | 工作原理:根据指纹分类来确定资产类型。 74 | 75 | ``` 76 | # rules/tcp_asset_types.json 77 | { 78 | "<资产类型名>":[ 79 | "<关键词>", ... 80 | ] 81 | } 82 | ``` 83 | 用于识别 `pro` 为 `TCP` 类的流量资产类型。 84 | 85 | 工作原理:根据指纹设备类型、指纹名称、指纹描述、服务名中的单词来确定设备类型。关键词不区分大小写。 86 | 87 | #### 设备类型配置文件 88 | 89 | ``` 90 | # rules/http_device_types.json 91 | { 92 | "<设备类型名>":[ 93 | "<关键词>", ... 94 | ] 95 | } 96 | ``` 97 | 用于识别 `pro` 为 `HTTP` 类的流量设备类型。 98 | 99 | 工作原理:根据指纹名称中的关键词来确定设备类型。 100 | 101 | ``` 102 | # rules/tcp_device_types.json 103 | { 104 | "<设备类型名>":[ 105 | "<关键词>", ... 106 | ] 107 | } 108 | ``` 109 | 110 | 用于识别 `pro` 为 `TCP` 类的流量设备类型。 111 | 112 | 工作原理:根据指纹设备类型以及指纹名称、指纹描述中的单词来确定设备类型。关键词不区分大小写。 113 | 114 | #### 设备厂商配置文件 115 | 116 | ``` 117 | # rules/vendors.json 118 | [ 119 | "<关键词>", ... 120 | ] 121 | ``` 122 | 123 | 厂商信息获取有两种方式: 124 | 125 | - 对于 `pro` 为 `HTTP` 的流量,可以通过指纹的 `website` 属性提取域名关键词获得,也可以根据指纹名称中的关键词获得; 126 | - 对于 `pro` 为 `TCP` 的流量,仅通过指纹中的 `name`、`info` 和 `device`来获取。关键词区分大小写。 127 | 128 | ## 运行环境 129 | 130 | - Python 3.x 131 | - Nodejs 8.x 及以上 132 | 133 | ## 文件说明 134 | 135 | ``` 136 | Dockerfile # 容器环境配置文件 137 | docker-compose.yml # 容器启动配置文件 138 | src # 核心代码文件 139 | config/plugin.yml # 数据清洗插件配置文件 140 | plugins # 数据清洗插件存放路径 141 | plugin.py # 数据清洗插件基类,所有插件均需继承此类 142 | ... ... 143 | rules # 插件相关配置文件存放路径 144 | ... ... 145 | main.py # 主程序 146 | requirements.txt # 程序依赖库清单 147 | ``` 148 | 149 | [最新Web应用指纹库下载](https://github.com/AliasIO/Wappalyzer/raw/master/src/apps.json) 150 | 151 | [最新端口服务指纹库下载](https://github.com/nmap/nmap/raw/master/nmap-service-probes) 152 | 153 | ## 清洗程序执行说明 154 | 155 | 清洗程序是一个基于 Python3 开发的脚本应用程序。 156 | 157 | 命令行参数如下: 158 | ``` 159 | 用法: python3 main.py [OPTIONS] arg 160 | 161 | OPTIONS: 162 | --version 输出版本信息 163 | -h, --help 显示命令行帮助信息 164 | -H HOST, --host=HOST 设置 Elasticsearch 服务器地址/地址:端口 165 | -i INDEX, --index=INDEX 设置 ES 索引名,默认为logstash-passets 166 | -r RANGE, --range=RANGE 设置 ES 搜索的时间偏移量,单位为分钟,默认 15 分钟 167 | -t THREADS, --threads=THREADS 设置并发线程数量,默认为 10 个线程 168 | -b BATCH_SIZE --batch-size=BATCH_SIZE 每线程单批处理的数据数量,默认为 20 条。 169 | -c CACHE_SIZE --cache-size=CACHE_SIZE 设置处理缓存的大小 170 | -T CACHE_TTL --cache-ttl=CACHE_TTL 设置处理缓存的过期时间,单位为秒,默认 120 秒 171 | -m MODE --mode=MODE 设置工作模式,默认为 1(主),可选值有 0(从)。 172 | -d DEBUG, --debug=DEBUG 调试信息开关,0-关闭,1-开启 173 | ``` 174 | 175 | **使用示例:** 176 | 177 | ``` 178 | # 并发10个线程处理 192.168.1.2:9200 中 logstash-passets* 索引下的数据,执行过程输出调试信息 179 | 180 | # 主节点模式 181 | python3 main.py -H 192.168.1.2:9200 -i logstash-passets -r 5 -t 10 -m 1 -d 1 182 | 183 | # 从节点模式 184 | python3 main.py -H 192.168.1.2:9200 -i logstash-passets -r 5 -t 10 -m 0 -d 1 185 | ``` 186 | 187 | 在设备性能允许的情况下尽量选用单节点多线程模式,综合对比来看单节点比多节点性能上更优(节点数*线程数)。多节点部署时只能、并且必须有一个主节点。 188 | 189 | ## 清洗程序配置说明 190 | 191 | 配置文件路径为 `config/plugin.yml`。 192 | 193 | **配置示例:** 194 | ``` 195 | wappalyzer: 196 | enable: true 197 | index: 1 198 | 199 | nmap: 200 | enable: true 201 | index: 2 202 | ignore_rules: # 不处理的规则列表(列表中的规则将不会处理) 203 | - ^OK$ 204 | ssl_portmap: # ssl 协议端口映射表 205 | - 443:https 206 | 207 | asset: 208 | enable: true 209 | index: 3 210 | ignore_vendors: # 要忽略的厂商名称(小写) 211 | - asp 212 | ``` 213 | 214 | 215 | ## 容器化部署说明 216 | 217 | ### 容器构建 218 | 219 | 配置文件: 220 | [Dockerfile](./Dockerfile) 221 | 222 | [docker-compose.yml](./docker-compose.yml) 223 | 224 | ``` 225 | # 使用 docker 命令构建 226 | docker build -t dsolab/passets-filter: . 227 | 228 | # 使用 docker-compose 命令构建 229 | docker-compose build 230 | ``` 231 | 232 | ### 容器启动 233 | 234 | > 使用 docker 命令启动: 235 | 236 | ``` 237 | # 基本命令: 238 | docker run -it dsolab/passets-filter: 239 | 240 | # 使用新的配置文件、指纹规则启动: 241 | docker run -it passets-filter: -v $(PWD)/src/config/plugin.yml:/opt/filter/config/plugin.yml -v $(PWD)/src/rules/apps.json:/opt/filter/rules/apps.json -v $(PWD)/src/rules/nmap-service-probes:/opt/filter/rules/nmap-service-probes -e ELASTICSEARCH_URL=:9200 242 | # 注:其它参数均使用默认设置 243 | ``` 244 | 245 | > 使用 docker-compose 启动: 246 | 247 | ``` 248 | docker-compose up -d 249 | ``` 250 | 251 | ## 自定义数据清洗插件 252 | 253 | 详见 [插件开发说明](PLUGIN_DEVELOP.md) 。 -------------------------------------------------------------------------------- /src/plugins/plugin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix self._debug: return 146 | 147 | if self._logger: 148 | if level == LogLevel.ERROR: 149 | self._logger.error(str(msg)) 150 | elif level == LogLevel.WARN: 151 | self._logger.warn(str(msg)) 152 | elif level == LogLevel.INFO: 153 | self._logger.info(str(msg)) 154 | else: 155 | self._logger.debug(str(msg)) 156 | else: 157 | timeStr = datetime.now().strftime('%H:%M:%S.%f') 158 | if level == LogLevel.ERROR: 159 | print('[E][{}] {}'.format(timeStr, str(msg))) 160 | elif level == LogLevel.WARN: 161 | print('[W][{}] {}'.format(timeStr, str(msg))) 162 | elif level == LogLevel.INFO: 163 | print('[I][{}] {}'.format(timeStr, str(msg))) 164 | else: 165 | print('[D][{}] {}'.format(timeStr, str(msg))) 166 | 167 | def set_config(self, config): 168 | """ 169 | 设置插件配置,配置为字典形式,例如:{ "参数名": 参数值 } 170 | :param config: 参数字典 171 | """ 172 | self._config = config 173 | 174 | def execute(self, msg, workdir, debug=False): 175 | """ 176 | 插件入口函数,根据插件的功能对 msg 进行处理 177 | :param msg: 需要处理的消息 178 | :param workdir: 应用主目录路径 179 | :param debug: 是否开启调试模式 180 | :return: 返回需要更新的消息字典(不含原始消息) 181 | """ 182 | print('Please implement the execute() function for plugin {}.'.format(self.__class__.__name__)) 183 | return None 184 | -------------------------------------------------------------------------------- /src/plugins/asset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix 1: 290 | vendor = parts[-2].upper() 291 | else: 292 | if len(parts[-1]) < 4: 293 | vendor = parts[-1].upper() 294 | else: 295 | vendor = parts[-1].capitalize() 296 | 297 | if vendor.lower() not in self.ignore_vendors: 298 | return vendor 299 | 300 | return '' 301 | except: 302 | return '' 303 | 304 | def analyzeTcp(self, apps): 305 | """ 306 | 分析HTTP指纹获取资产类型 307 | :param apps: 指纹列表 308 | :return: 资产相关信息,例如:{ 'asset_type': ["Network Device"], 'vendor': ["Huawei"], 'device': ["Router"], 'service': ["telnet"], 'info': ["Huawei AR5102"] } 309 | """ 310 | info = { 'asset_type': [], 'vendor': [], 'device': [], 'service': [], 'info': [] } 311 | for i in range(len(apps)): 312 | app = apps[i] 313 | devices = self.parseTcpDeviceType(app['name'], app['info'], app['device']) 314 | if devices: 315 | for _ in devices: 316 | if _ not in info['device']: 317 | info['device'].append(_) 318 | 319 | asset_types = self.parseTcpAssetType(app['name'], app['info'], ' '.join(devices), app['service']) 320 | if asset_types: 321 | for _ in asset_types: 322 | if _ not in info['asset_type']: 323 | info['asset_type'].append(_) 324 | 325 | vendor = self.parseTcpVendor(app['name'], app['info']) 326 | if vendor and vendor not in info['vendor']: 327 | info['vendor'].append(vendor) 328 | 329 | if app['service'] and app['service'] not in info['service']: 330 | info['service'].append(app['service']) 331 | 332 | # 设备信息不存在则用 os 属性代替 333 | if app['info']: 334 | if app['info'] not in info['info']: 335 | info['info'].append(app['info']) 336 | else: 337 | if app['os'] and app['os'] not in info['info']: 338 | info['info'].append(app['os']) 339 | 340 | del(apps[i]['device'], apps[i]['service'], apps[i]['info']) 341 | 342 | info['apps'] = apps 343 | 344 | return info 345 | 346 | def analyzeHttp(self, apps): 347 | """ 348 | 分析HTTP指纹获取资产类型 349 | :param apps: 指纹列表 350 | :return: 资产相关信息,例如:{ 'asset_type': ["Web Server"], 'vendor': ["Apache"], 'device': [], 'service': ["http"], 'info': ["Apache tomcat 9.0.28"] } 351 | """ 352 | info = { 'asset_type': [], 'vendor': [], 'device': [], 'service': ['http'], 'info': [] } 353 | for i in range(len(apps)): 354 | app = apps[i] 355 | # 识别设备产品型号/版本(用3-5层的指纹名称填充,使用 lastLayer 来控制只取一个指纹的父级) 356 | # print("Level: {}, appName: {}, implies: {}".format(self._apps[appName]['layer'], appName, self._apps[appName]['implies'])) 357 | if app['layer'] in [2, 3, 4, 5]: 358 | name = app['name'] 359 | if app['version']: name += "/" + app['version'] 360 | if info not in info['info']: 361 | info['info'].append(name) 362 | 363 | # 识别厂商 364 | vendor = self.parseHttpVendor(app['website']) 365 | if vendor and len(vendor) > 2 and vendor not in info['vendor']: 366 | info['vendor'].append(vendor) 367 | 368 | # 识别资产类型 369 | asset_types = self.parseHttpAssetType(app['categories']) 370 | if asset_types: 371 | for _ in asset_types: 372 | if _ not in info['asset_type']: 373 | info['asset_type'].append(_) 374 | 375 | # 识别设备类型 376 | devices = self.parseHttpDeviceType(app['name']) 377 | if devices: 378 | for _ in devices: 379 | if _ not in info['device']: 380 | info['device'].append(_) 381 | 382 | # 删除 Wappalyzer 插件传递过来的中间属性 383 | del(apps[i]['layer'], apps[i]['website']) 384 | 385 | info['apps'] = apps 386 | 387 | return info 388 | 389 | def execute(self, msg): 390 | """ 391 | 插件入口函数,根据插件的功能对 msg 进行处理 392 | :param msg: 需要处理的消息 393 | :return: 返回需要更新的消息字典(不含原始消息) 394 | """ 395 | if 'pro' not in msg or msg['pro'].upper() not in ['TCP', 'HTTP']: 396 | self.log('Not TCP/HTTP message.', LogLevel.DEBUG) 397 | return 398 | 399 | info = { 'asset_type': [], 'vendor': [], 'device': [], 'service': [], 'info': [] } 400 | if 'apps' not in msg: 401 | self.log('Fingerprint property "apps" not found.', LogLevel.ERROR) 402 | return 403 | 404 | if not msg['apps']: 405 | return 406 | 407 | # 识别资产分类 408 | pro = msg['pro'].upper() 409 | if pro == 'HTTP': 410 | info = self.analyzeHttp(msg['apps']) 411 | elif pro == 'TCP': 412 | info = self.analyzeTcp(msg['apps']) 413 | 414 | return info 415 | 416 | if __name__ == '__main__': 417 | plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True) 418 | print(plugins) 419 | # msg = { 420 | # "tag": "eno2", 421 | # "method": "GET", 422 | # "type": "text/plain; charset=utf-8", 423 | # "header": "Server: nginx\r\nDate: Mon, 23 Nov 2020 06:08:26 GMT\r\nContent-Type: text/plain; charset=utf-8\r\nContent-Length: 26\r\nConnection: keep-alive\r\nCache-Control: no-cache\r\nWWW-Authenticate: Basic realm=\"GitLab\"\r\nX-Content-Type-Options: nosniff\r\nX-Frame-Options: DENY\r\nX-Request-Id: 7a5ad814-b10f-4a2e-8035-4f0436e2dd1a\r\nX-Runtime: 0.015255\r\nX-Ua-Compatible: IE=edge\r\nX-Xss-Protection: 1; mode=block", 424 | # "url": "http://www.gitlab.com/omni/chinaz-sdk.git/info/refs?service=git-upload-pack", 425 | # "@version": "1", 426 | # "ip_str": "192.168.199.23", 427 | # "tags": [ 428 | # "_geoip_lookup_failure" 429 | # ], 430 | # "url_tpl": "http://www.gitlab.com/omni/chinaz-sdk.git/info/refs?service=%7B%7D", 431 | # "server": "nginx", 432 | # "inner": True, 433 | # "ip": "192.168.199.23", 434 | # "host": "192.168.199.23:80", 435 | # "pro": "HTTP", 436 | # "code": "401", 437 | # "body": "HTTP Basic: Access denied\n", 438 | # "port": 80, 439 | # "site": "http://www.gitlab.com", 440 | # "apps": [ 441 | # { 442 | # "confidence": 100, 443 | # "name": "Nginx", 444 | # "categories": [ 445 | # { 446 | # "name": "Web Servers", 447 | # "id": 22 448 | # } 449 | # ], 450 | # "version": "" 451 | # } 452 | # ] 453 | # } 454 | msg = { 455 | "tag": "eno2", 456 | "@version": "1", 457 | "ip_str": "47.92.139.186", 458 | "inner": False, 459 | "ip": "47.92.139.186", 460 | "data": "590000000a352e352e352d31302e312e32342d4d6172696144420042bd00007b7b7661603e536700fff72102003fa015000000000000000000002e4b6f6e5c615258452d4f29006d7973716c5f6e61746976655f70617373776f726400", 461 | "host": "47.92.139.186:3306", 462 | "geoip": { 463 | "location": { 464 | "lon": 120.1619, 465 | "lat": 30.294 466 | }, 467 | "city_name": "杭州", 468 | "country_name": "中国" 469 | }, 470 | "pro": "TCP", 471 | "port": 3306, 472 | "state": 1, 473 | "apps": [ 474 | { 475 | "os": "", 476 | "confidence": 100, 477 | "name": "MySQL", 478 | "version": "5.5.5-10.1.24-MariaDB" 479 | } 480 | ] 481 | } 482 | msg_update = {} 483 | for i in sorted(plugins.keys()): 484 | (pluginName, plugin) = plugins[i] 485 | print('[!] Plugin {} processing ...'.format(pluginName)) 486 | ctime = time.time() 487 | ret = plugin.execute(msg) 488 | if ret: 489 | msg.update(ret) 490 | etime = time.time() 491 | print('Eclipse time: {}'.format(etime-ctime)) 492 | print(json.dumps(ret, indent=2)) 493 | print('[!] Plugin {} process completd.'.format(pluginName)) 494 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix debug: return 70 | 71 | try: 72 | msg = str(bytes(str(msg), "utf-8", "ignore"), "utf-8", "ignore") 73 | except: 74 | pass 75 | 76 | if logger: 77 | if level == LogLevel.ERROR: 78 | logger.error(msg) 79 | elif level == LogLevel.WARN: 80 | logger.warn(msg) 81 | elif level == LogLevel.INFO: 82 | logger.info(msg) 83 | else: 84 | logger.debug(msg) 85 | else: 86 | timeStr = datetime.now().strftime('%H:%M:%S.%f') 87 | if level == LogLevel.ERROR: 88 | print('[E][{}] {}'.format(timeStr, msg)) 89 | elif level == LogLevel.WARN: 90 | print('[W][{}] {}'.format(timeStr, msg)) 91 | elif level == LogLevel.INFO: 92 | print('[I][{}] {}'.format(timeStr, msg)) 93 | else: 94 | print('[D][{}] {}'.format(timeStr, msg)) 95 | 96 | def index_template(es): 97 | """ 98 | 上传索引模板 99 | :param es: ES 对象 100 | """ 101 | body = { 102 | "index_patterns": ".passets-filter", 103 | "settings": { "refresh_interval": "1s", "number_of_shards": 1, "auto_expand_replicas": "0-1" }, 104 | "mappings": { 105 | "properties": { 106 | "scroll_id": { "type": "text"} 107 | } 108 | } 109 | } 110 | 111 | try: 112 | ret = es.indices.put_template(name="passets-config", body=body, create=False) 113 | output(ret, LogLevel.DEBUG) 114 | except ConnectionError: 115 | output("ES connect error.", LogLevel.ERROR) 116 | quit(1) 117 | except: 118 | output(traceback.format_exc(), LogLevel.ERROR) 119 | 120 | def set_scroll(es, scroll_id): 121 | """ 122 | 将Scroll和最后一次查询时间记录到ES上,方便不同实例间共享 123 | :param scroll_id: Scroll ID 124 | """ 125 | body = { 126 | 'scroll_id': scroll_id 127 | } 128 | try: 129 | es.index(index='.passets-filter', id='SearchPosition', body=body, refresh=True) 130 | except: 131 | traceback.print_exc() 132 | 133 | def get_scroll(es): 134 | """ 135 | 从ES上获取数据搜索的相关参数 136 | """ 137 | try: 138 | ret = es.get(index='.passets-filter', id="SearchPosition", _source=True) 139 | if 'found' in ret and ret['found']: 140 | if 'scroll_id' in ret['_source']: 141 | return ret['_source']['scroll_id'] 142 | except: 143 | traceback.print_exc() 144 | return None 145 | 146 | def search_by_time(es, index, time_range=15, size=10, mode=0): 147 | """ 148 | 从 ES 上搜索符合条件的数据 149 | :param es: ES 连接对象 150 | :param index: ES 索引名 151 | :param time_range: 默认时间节点(当前时间往前分钟数) 152 | :param size: 搜索分页大小 153 | :param mode: 实例工作模式 154 | :return: 搜索结果列表 155 | """ 156 | global scrollId, threadLock, processCount 157 | 158 | # 有 Scroll 的先走 Scroll 159 | scroll_reloaded = False 160 | if scrollId: 161 | try: 162 | ret = es.scroll(scroll='3m', scroll_id=scrollId, body={ "scroll_id": scrollId }) 163 | # 处理几种常见错误 164 | if ret['_shards']['failed'] > 0: 165 | error_info = json.dumps(ret['_shards']['failures']) 166 | if 'search_context_missing_exception' in error_info: # Scroll 失效 167 | if mode: 168 | es.clear_scroll(scroll_id=scrollId) 169 | raise NotFoundError('Search scroll context missing.') 170 | elif 'search.max_open_scroll_context' in error_info: # Scroll 太多,清除后重新生成 171 | if mode: 172 | es.clear_scroll(scroll_id='_all') 173 | raise NotFoundError('Search scroll context peaked, cleaning ...') 174 | elif 'null_pointer_exception' in error_info: 175 | # https://github.com/elastic/elasticsearch/issues/35860 176 | raise NotFoundError('Trigger a elasticsearch scroll null pointer exception.') 177 | else: 178 | output(error_info, LogLevel.INFO) 179 | return [] 180 | else: 181 | if len(ret['hits']['hits']) > 0: 182 | return ret['hits']['hits'] 183 | else: 184 | # 没有数据的情况下等待2秒 185 | time.sleep(2) 186 | 187 | if mode: 188 | es.clear_scroll(scroll_id=scrollId) 189 | scroll_reloaded = True 190 | 191 | raise Exception('Scroll result is empty.') 192 | 193 | except NotFoundError: 194 | scroll_reloaded = True 195 | except Exception as e: 196 | output(e, LogLevel.WARN) 197 | #output(traceback.format_exc(), LogLevel.DEBUG) 198 | else: 199 | if mode: scroll_reloaded = True 200 | 201 | # 从节点不主动创建 Scroll,只从 ES 上获取 202 | if not mode: 203 | time.sleep(2) 204 | output('Fetch new scroll...', LogLevel.INFO) 205 | scrollId = get_scroll(es) 206 | return [] 207 | 208 | # 意外导致的无结果直接返回 209 | if not scroll_reloaded: return [] 210 | 211 | # 默认查询最近x分钟的数据 212 | lastTime = time.strftime("%Y-%m-%dT%H:%M:%S.000Z", time.gmtime(time.time() - time_range * 60)) 213 | 214 | query = { 215 | "size": size, 216 | "query": { 217 | "bool": { 218 | # "must": [ 219 | # {"range": {"@timestamp": {"gte": lastTime}}} # 查询某个时间点之后的数据,默认为当前时间前15分钟 220 | # ], 221 | "must_not": [ 222 | {"exists": {"field": "state"}} # 只处理没有处理状态字段的数据 223 | ] 224 | } 225 | }, 226 | "sort": { 227 | "@timestamp": { "order": "desc" } 228 | } 229 | } 230 | 231 | try: 232 | output('Start new search context...', LogLevel.INFO) 233 | output(query, LogLevel.DEBUG) 234 | ret = es.search(index=index, body=query, scroll='3m') 235 | if '_scroll_id' in ret: 236 | output('Use new scroll id', LogLevel.INFO) 237 | scrollId = ret['_scroll_id'] 238 | 239 | # 保存 scroll_id 供其它实例使用 240 | set_scroll(es, scrollId) 241 | 242 | output('Search {} documents.'.format(len(ret['hits']['hits'])), LogLevel.INFO) 243 | return ret['hits']['hits'] 244 | except ConnectionError: 245 | output("ES connect error.", LogLevel.ERROR) 246 | time.sleep(2) 247 | except Exception as e: 248 | output(e, LogLevel.ERROR) 249 | traceback.print_exc() 250 | 251 | return [] 252 | 253 | def batch_update(es, docs, max_retry=3): 254 | """ 255 | 批量文档操作 256 | :param es: ES 对象 257 | :param docs: 批量操作的数据对象 258 | :param max_retry: 重试次数 259 | """ 260 | ret = [] 261 | try: 262 | output(docs, LogLevel.DEBUG) 263 | resp = bulk(es, docs) 264 | output(resp, LogLevel.DEBUG) 265 | except BulkIndexError as e: 266 | for _ in e.errors: 267 | if 'update' in _ and '_id' in _['update']: 268 | ret.append(_['update']['_id']) 269 | 270 | output(e.args[0], LogLevel.DEBUG) 271 | except ConnectionTimeout as ce: 272 | # 重试三次 273 | if max_retry > 0: 274 | time.sleep(0.1) 275 | return batch_update(es, docs, max_retry - 1) 276 | else: 277 | output(ce, LogLevel.ERROR) 278 | except: 279 | output(traceback.print_exc(), LogLevel.ERROR) 280 | 281 | return ret 282 | 283 | def filter_thread(threadId, options): 284 | """ 285 | 数据清洗线程 286 | :param threadId: 线程序号 287 | :param options: 程序参数 288 | """ 289 | global es, cacheIds, cache, threadExit, threadLock, processCount 290 | 291 | # 加载插件列表 292 | plugins = Plugin.loadPlugins(options.rootdir, options.debug) 293 | output('Thread {}: Plugins loaded.'.format(threadId), LogLevel.INFO) 294 | 295 | if len(plugins) == 0: return 296 | 297 | #es = Elasticsearch(hosts=options.hosts) 298 | while True: 299 | if threadExit: break 300 | 301 | try: 302 | threadLock.acquire() 303 | data = search_by_time(es, options.index + '*', time_range=options.range, size=options.batch_size, mode=options.mode) 304 | threadLock.release() 305 | 306 | if not data: 307 | output('Thread {}: No new msg, waiting 2s ...'.format(threadId), LogLevel.DEBUG) 308 | time.sleep(2) 309 | if threadExit: break 310 | continue 311 | 312 | # 更新ES文档中的内容为正在处理状态 313 | actions = [] 314 | for i in range(len(data)-1, -1, -1): 315 | # 处理过的ID缓存下来,避免在多个线程间重复处理数据 316 | existed = cacheIds.get(data[i]['_id']) 317 | if existed: 318 | del(data[i]) 319 | continue 320 | 321 | cacheIds.set(data[i]['_id'], True) 322 | 323 | if 'ip' not in data[i]['_source'] or 'port' not in data[i]['_source'] or 'pro' not in data[i]['_source']: 324 | del(data[i]) 325 | continue 326 | 327 | actions.append({ 328 | '_op_type': 'update', 329 | '_index': data[i]['_index'], 330 | '_type': data[i]['_type'], 331 | '_id': data[i]['_id'], 332 | 'doc': { 'state': MsgState.PROGRESSING } 333 | }) 334 | 335 | if len(actions) == 0: 336 | time.sleep(1) 337 | if threadExit: break 338 | continue 339 | 340 | conflict_list = batch_update(es, actions) 341 | threadLock.acquire() 342 | processCount += len(data) 343 | threadLock.release() 344 | 345 | actions = [] 346 | while True: 347 | if not data: break 348 | item = data.pop() 349 | # 冲突或已处理的直接跳过 350 | if item['_id'] in conflict_list: continue 351 | 352 | msg = item['_source'] 353 | # 通过 Cache 降低插件的处理频率 354 | cache_key = '{}:{}'.format(msg['ip'], msg['port']) 355 | if msg['pro'] == 'HTTP': 356 | cache_key = msg['url'] 357 | 358 | cacheMsg = cache.get(cache_key) 359 | if cacheMsg: 360 | output('Thread {}: Use cached result, key={}'.format(threadId, cache_key), LogLevel.DEBUG) 361 | actions.append({ 362 | '_type': item['_type'], 363 | '_op_type': 'update', 364 | '_index': item['_index'], 365 | '_id': item['_id'], 366 | 'doc': cacheMsg 367 | }) 368 | continue 369 | 370 | msg_update = {} 371 | # 按插件顺序对数据进行处理(插件顺序在配置文件中定义) 372 | stime = time.time() 373 | for i in sorted(plugins.keys()): 374 | (pluginName, plugin) = plugins[i] 375 | output('Thread {}: Plugin {} processing ...'.format(threadId, pluginName), LogLevel.DEBUG) 376 | 377 | try: 378 | ret = plugin.execute(msg) 379 | if ret: 380 | msg_update = dict(msg_update, **ret) 381 | msg = dict(msg, **ret) 382 | except: 383 | output(traceback.format_exc(), LogLevel.ERROR) 384 | 385 | output('Thread {}: Plugin {} completed.'.format(threadId, pluginName), LogLevel.DEBUG) 386 | 387 | output("Elapsed time: {}".format(time.time() - stime), LogLevel.DEBUG) 388 | # 更新数据 389 | msg_update['state'] = MsgState.COMPLETED 390 | cache.set(cache_key, msg_update) 391 | 392 | actions.append({ 393 | '_type': item['_type'], 394 | '_op_type': 'update', 395 | '_index': item['_index'], 396 | '_id': item['_id'], 397 | 'doc': msg_update 398 | }) 399 | 400 | # 提交到 ES 401 | if len(actions) > 0: 402 | output('Thread {}: Batch update {} document.'.format(threadId, len(actions)), LogLevel.INFO) 403 | output('Thread {}: {}'.format(threadId, json.dumps(actions)), LogLevel.DEBUG) 404 | batch_update(es, actions) 405 | actions = [] 406 | 407 | except: 408 | output(traceback.format_exc(), LogLevel.ERROR) 409 | 410 | 411 | def main(options): 412 | """ 413 | 主函数 414 | :param options: 命令行传入参数对象 415 | """ 416 | global es, cacheIds, cache, threadLock, debug, processCount, threadExit, startTime, scrollId 417 | 418 | debug = options.debug 419 | cacheIds = Cache(maxsize=512, ttl=60, timer=time.time, default=None) 420 | cache = Cache(maxsize=options.cache_size, ttl=options.cache_ttl, timer=time.time, default=None) 421 | 422 | threadLock = threading.RLock() 423 | threadList = [None for i in range(options.threads)] 424 | 425 | es = Elasticsearch(hosts=options.hosts) 426 | # 更新索引模板 427 | index_template(es) 428 | # 获取搜索位置信息 429 | scrollId = get_scroll(es) 430 | 431 | try: 432 | for i in range(options.threads): 433 | threadList[i] = threading.Thread(target=filter_thread, args=(i, options)) 434 | threadList[i].setDaemon(True) 435 | threadList[i].start() 436 | time.sleep(1) 437 | 438 | while True: 439 | time.sleep(5) 440 | except KeyboardInterrupt: 441 | print('Ctrl+C, exiting ...') 442 | threadLock.acquire() 443 | threadExit = True 444 | threadLock.release() 445 | 446 | for i in range(options.threads): 447 | if threadList[i] and threadList[i].isAlive(): 448 | print('Thread {} waiting to exit...'.format(i)) 449 | threadList[i].join() 450 | 451 | quit(0) 452 | 453 | def quit(status): 454 | """ 455 | 退出程序 456 | :param status: 退出状态 457 | """ 458 | global startTime, processCount 459 | 460 | eclipseTime = time.time() - startTime 461 | print('Total: {} second, {} document.'.format(eclipseTime, processCount)) 462 | print('Exited.') 463 | exit(status) 464 | 465 | def usage(): 466 | """ 467 | 获取命令行参数 468 | """ 469 | parser = optparse.OptionParser(usage="python3 %prog [OPTIONS] ARG", version='%prog 1.0.1') 470 | parser.add_option('-H', '--hosts', action='store', dest='hosts', type='string', help='Elasticsearch server address:port list, like localhost:9200,...') 471 | parser.add_option('-i', '--index', action='store', dest='index', type='string', default='logstash-passets', help='Elasticsearch index name') 472 | parser.add_option('-r', '--range', action='store', dest='range', type='int', default=15, help='Elasticsearch search time range, unit is minute, default is 15 minutes.') 473 | parser.add_option('-t', '--threads', action='store', dest='threads', type='int', default=5, help='Number of concurrent threads, default is 5') 474 | parser.add_option('-b', '--batch-size', action='store', dest='batch_size', type='int', default=20, help='The data item number of each batch per thread, default is 20.') 475 | parser.add_option('-c', '--cache-size', action='store', dest='cache_size', type='int', default=1024, help='Process cache size, default is 1024.') 476 | parser.add_option('-T', '--cache-ttl', action='store', dest='cache_ttl', type='int', default=120, help='Process cache time to live(TTL), default is 120 seconds.') 477 | parser.add_option('-m', '--mode', action='store', dest='mode', type='int', default=1, help='Work mode: 1-master, 0-slave, default is 1.') 478 | parser.add_option('-d', '--debug', action='store', dest='debug', type='int', default=2, help='Print debug info, 1-error, 2-warning, 3-info, 4-debug, default is 2.') 479 | 480 | options, args = parser.parse_args() 481 | options.rootdir = os.path.split(os.path.abspath(sys.argv[0]))[0] 482 | if not options.hosts: 483 | parser.error('Please specify elasticsearch address by entering the -H/--host parameter.') 484 | 485 | if options.threads < 1 or options.threads > 50: 486 | parser.error('Please specify valid thread count, the valid range is 1-50. Default is 10.') 487 | 488 | if options.batch_size < 5 or options.batch_size > 200: 489 | parser.error('Please specify valid batch count, the valid range is 5-200. Default is 20.') 490 | 491 | if options.cache_size < 1 or options.cache_size > 4096: 492 | parser.error('Please specify valid cache size, the valid range is 1-4096. Default is 1024.') 493 | 494 | if options.cache_ttl < 1 or options.cache_ttl > 24 * 60 * 60: 495 | parser.error('Please specify valid cache ttl, the valid range is 1 minutes to 1 days. Default is 120(5 minutes).') 496 | 497 | if options.range <= 0 or options.range > 24 * 60: 498 | parser.error('Please specify valid time, format is [number],like: 15, max is 10080(7 days).') 499 | 500 | if options.mode not in [0, 1]: 501 | parser.error('Please specify valid mode: 1-master, 0-slave.') 502 | 503 | if options.debug < 0: options.debug = 2 504 | 505 | options.hosts = options.hosts.split(',') 506 | for i in range(len(options.hosts)): 507 | if not options.hosts[i]: 508 | del(options.hosts[i]) 509 | 510 | if not options.hosts: 511 | parser.error('Please specify elasticsearch address by entering the -H/--host parameter.') 512 | 513 | return options 514 | 515 | if __name__ == '__main__': 516 | options = usage() 517 | print('Home: {}'.format(options.rootdir)) 518 | 519 | main(options) 520 | -------------------------------------------------------------------------------- /src/plugins/nmap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix 0: self.os_white_list.append(os_prefix) 139 | self.rules[i]['r'] = re.compile(bytes(self.rules[i]['m'], encoding="utf-8"), self.rules[i]['mf']) 140 | self.rules[i]['ports'] = self.parsePorts(self.rules[i]['ports']) 141 | return 142 | 143 | data = file_data.split('\n') 144 | 145 | regex_flags = {'i':re.I, 's':re.S, 'm':re.M, 'u':re.U, 'l':re.L, 'a':re.A, 't':re.T, 'x':re.X} 146 | is_tcp = False 147 | ports = '' 148 | tmp_rules = [] 149 | for _ in data: 150 | _ = _.strip() 151 | 152 | if _[:6] == 'Probe ': 153 | if _[:10] == 'Probe TCP ': 154 | is_tcp = True 155 | else: 156 | is_tcp = False 157 | 158 | # 不处理 UDP 指纹 159 | if not is_tcp: 160 | continue 161 | 162 | if _[:6] == 'ports ': 163 | ports += _[6:].strip() + ',' 164 | 165 | if _[:9] == 'sslports ': 166 | ports += _[9:].strip() + ',' 167 | 168 | if not (_[:6] == 'match ' or _[:10] == 'softmatch '): 169 | continue 170 | 171 | rule = { 172 | 'm': None, 'mf': 0, 's': None, 'p': None, 'v': None, 'i': None, 'o': None, 173 | 'd': None, 'h': None, 'cpe': '', 'r': None, 'ports': ports.strip(',') 174 | } 175 | 176 | line = _[_.find('match ') + 6:].strip() 177 | 178 | pos = line.find(' ') 179 | if pos == -1: 180 | continue 181 | 182 | rule['s'] = line[:pos] 183 | line = line[pos + 1:].strip() 184 | regex_type = re.compile(r'([mpviodh]|cpe:)([/\|=%@])') 185 | while True: 186 | m = regex_type.search(line) 187 | if not m: 188 | break 189 | 190 | key = m.group(1).replace(':', '') 191 | # 属性的边界符号是根据内容变的,通常为/,但内容中如果有/则使用|,暂时未发现其它符号 192 | end_pos = line.find(m.group(2), len(m.group(0))) 193 | val = None 194 | if end_pos > 0: 195 | val = line[len(m.group(0)): end_pos] 196 | line = line[end_pos+1:] 197 | else: 198 | val = line[len(m.group(0)): ] 199 | line = '' 200 | 201 | if key == 'cpe': # CPE可能出现多次 202 | if rule['cpe']: 203 | rule['cpe'] += '\n' + val 204 | else: 205 | rule[key] = val 206 | 207 | if line.find(' ') > 0: 208 | flags = line[: line.find(' ')] 209 | # 识别匹配表达式的模式 210 | if key == 'm': 211 | for flag in flags: 212 | if flag in regex_flags: 213 | rule['mf'] |= regex_flags[flag] 214 | else: 215 | print('[E] Find a unrecognized flag. Data: ' + flag) 216 | 217 | line = line[line.find(' ')+1:].strip() 218 | else: 219 | line = line.strip() 220 | 221 | if not line: 222 | break 223 | 224 | # 一些太短或特征不明显的规则,直接丢弃 225 | if not rule['m'] or len(rule['m']) <= 1: continue 226 | if rule['m'] in [ 227 | '^\\t$', '^\\0$', '^ok$', '^OK$', '^\\x05', '^ \\r\\n$', '^\\|$', '^00$', '^01$', '^02$', '^ $', '^1$', 228 | '^\\xff$', '^1\\0$', '^A$', '^Q$', '^x0$', '^\\0\\0$', '^\\x01$', '^0\\0$']: 229 | continue 230 | 231 | # 人工配置为忽略的规则,直接丢弃 232 | if rule['m'] in self.ignore_rules: 233 | continue 234 | 235 | tmp_rules.append(rule) 236 | 237 | new_rule = copy.deepcopy(rule) 238 | # 预加载正则表达式 239 | try: 240 | new_rule['r'] = re.compile(bytes(new_rule['m'], encoding="utf-8"), new_rule['mf']) 241 | new_rule['ports'] = self.parsePorts(new_rule['ports']) 242 | except: 243 | self.log('Match rule parse error:', LogLevel.ERROR) 244 | self.log(new_rule['m'], LogLevel.ERROR) 245 | 246 | self.rules.append(new_rule) 247 | 248 | self._ruleCount = len(self.rules) 249 | self._writefile(converted_rule_path, json.dumps({'hash': file_hash, 'apps': tmp_rules}, indent=2, sort_keys=True)) 250 | 251 | def parsePorts(self, ports): 252 | """ 253 | 解析指纹匹配的端口列表,方便后面匹配 254 | :param ports: 端口列表 255 | """ 256 | results = {} 257 | for _ in ports.split(','): 258 | try: 259 | parts = _.split('-') 260 | portStart = int(parts[0]) 261 | portEnd = int(parts[-1]) 262 | for i in range(portStart, portEnd + 1): 263 | results[i] = None 264 | except: 265 | continue 266 | 267 | return list(results.keys()) 268 | 269 | def analyze(self, data, port): 270 | """ 271 | 分析获取指纹 272 | :param data: TCP响应数据包 273 | :return: 指纹列表,例如:[{'name':'XXX','version':'XXX',...}] 274 | """ 275 | result = [] 276 | for rule in self.rules: 277 | try: 278 | m = rule['r'].search(data) 279 | if m: 280 | app = { 281 | 'name': rule['s'] if not rule['p'] else rule['p'], 282 | 'version': '' if not rule['v'] else rule['v'], 283 | 'info': '' if not rule['i'] else rule['i'], 284 | 'os': '' if not rule['o'] else rule['o'], 285 | 'device': '' if not rule['d'] else rule['d'], 286 | 'service': rule['s'], 287 | # 端口不匹配的可信度下降为50 288 | 'confidence': 100 if len(rule['ports']) == 0 or port in rule['ports'] else 50 289 | } 290 | 291 | if m.lastindex: 292 | for i in range(m.lastindex + 1): 293 | skey = '${}'.format(i) 294 | for k in app: 295 | if not app[k] or k in ['confidence']: continue 296 | 297 | if skey in app[k]: 298 | app[k] = app[k].replace(skey, str(m.group(i), 'utf-8', 'ignore')) 299 | 300 | available = False 301 | if app['os']: 302 | # 太长或者是存在不可见字符的,说明获取的数据不对 303 | if len(app['os']) > 30 or self.name_regex.search(app['os']): continue 304 | tmpOS = app['os'].lower() 305 | for _ in self.os_white_list: 306 | if tmpOS.find(_) == 0 or _.find(tmpOS) == 0: 307 | available = True 308 | break 309 | else: 310 | available = True 311 | 312 | if available: 313 | # SSL 协议映射处理 314 | if app['service'] == 'ssl' and port in self.ssl_portmap: 315 | app['service'] = self.ssl_portmap[port] 316 | result.append(app) 317 | break 318 | except Exception as e: 319 | self.log(e, LogLevel.ERROR) 320 | self.log(traceback.format_exc(), LogLevel.ERROR) 321 | self.log('[!] Hited Rule: ' + str(rule), LogLevel.ERROR) 322 | 323 | return result 324 | 325 | def execute(self, msg): 326 | """ 327 | 插件入口函数,根据插件的功能对 msg 进行处理 328 | :param msg: 需要处理的消息 329 | :return: 返回需要更新的消息字典(不含原始消息) 330 | """ 331 | if 'pro' not in msg or msg['pro'] != 'TCP': 332 | self.log('Not tcp message.', LogLevel.DEBUG) 333 | return 334 | 335 | info = {} 336 | if 'data' not in msg or not msg['data']: 337 | self.log('data field not found.') 338 | return 339 | 340 | # 识别指纹 341 | apps = self.analyze(bytes.fromhex(msg['data']), msg['port']) 342 | 343 | # 识别端口匹配度,匹配的可信度为空,不匹配的可信度为50 344 | for i in range(len(apps)): 345 | confidence = 50 346 | 347 | ports = apps[i].pop('ports', []) 348 | if len(ports) == 0: 349 | confidence = 100 350 | elif msg['port'] in ports: 351 | confidence = 100 352 | 353 | apps[i]['confidence'] = confidence 354 | 355 | info['apps'] = apps 356 | 357 | return info 358 | 359 | if __name__ == '__main__': 360 | plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True) 361 | print(plugins) 362 | msg = { 363 | "pro": "TCP", 364 | "inner": False, 365 | "data": "030000130ed000001234000201080002000000", 366 | "ip_str": "116.213.169.244", 367 | "ip": "116.213.169.244", 368 | "@timestamp": "2020-11-23T09:24:55.339Z", 369 | "@version": "1", 370 | "tag": "eno2", 371 | "geoip": { 372 | "country_name": "中国", 373 | "location": { 374 | "lat": 39.9288, 375 | "lon": 116.3889 376 | }, 377 | "city_name": "北京" 378 | }, 379 | "host": "116.213.169.244:63389", 380 | "port": 63389 381 | } 382 | # msg = { 383 | # "ip_num": 1875787536, 384 | # "ip": "111.206.63.16", 385 | # "port": 443, 386 | # "pro": "TCP", 387 | # "host": "111.206.63.16:80", 388 | # #'data': '00', 389 | # # Example: 554 SMTP synchronization error\r\n 390 | # #"data": "35353420534d54502073796e6368726f6e697a6174696f6e206572726f720d0a", 391 | 392 | # # Example: >INFO:OpenVPN Management Interface Version 1.0.1 -- type 'help' for more info\r\n> 393 | # #"data": "3e494e464f3a4f70656e56504e204d616e6167656d656e7420496e746572666163652056657273696f6e20312e302e31202d2d2074797065202768656c702720666f72206d6f726520696e666f0d0a3e", 394 | 395 | # # Example: get_info: plugins\nRPRT 0\nasfdsafasfsafas 396 | # #"data": "6765745f696e666f3a20706c7567696e730a5250525420300a617366647361666173667361666173", 397 | 398 | # #"data": '16030300d0010000cc03035df0c691b795581015d570c868b701ed1784528e488e9aeec4b37dad521e2de4202332000016299b175b8f0ad21daeb83a03eb5d47b57bb60ecfbd10bcd67a101d0026c02cc02bc030c02fc024c023c028c027c00ac009c014c013009d009c003d003c0035002f000a0100005d00000019001700001461637469766974792e77696e646f77732e636f6d000500050100000000000a00080006001d00170018000b00020100000d001400120401050102010403050302030202060106030023000000170000ff01000100', 399 | # #"data": "004a56978183000100000000000013616c6572746d616e616765722d6d61696e2d3115616c6572746d616e616765722d6f706572617465640a6d6f6e69746f72696e67037376630000ff0001", 400 | 401 | # # Example: SMTP 402 | # #"data": '32323020736d74702e71712e636f6d2045736d7470205151204d61696c205365727665720d0a', 403 | 404 | # # Example: RDP 405 | # "data": "030000130ed000001234000209080002000000", 406 | 407 | # # Example:HTTPS 408 | # #"data": "1603030ce50200005b03035f6d463e6b8d09d43230d15d3e64ab61fb9e54317099b2c53c9dafd30e509297206abe5bc2265b6d09710c81877859d85a1218e5a27e5805fa0d9d47b2dbfe9f69009c000013000000000010000b000908687474702f312e310b000c7e000c7b0008313082082d30820715a0030201020210644a68f011861931192823728fbe1545300d06092a864886f70d01010b05003062311c301a060355040313134170706c65204953542043412032202d2047313120301e060355040b131743657274696669636174696f6e20417574686f7269747931133011060355040a130a4170706c6520496e632e310b3009060355040613025553301e170d3139303331353233313732395a170d3231303431333233313732395a30773117301506035504030c0e2a2e6c732e6170706c652e636f6d31253023060355040b0c1c6d616e6167656d656e743a69646d732e67726f75702e35373634383631133011060355040a0c0a4170706c6520496e632e3113301106035504080c0a43616c69666f726e6961310b300906035504061302555330820122300d06092a864886f70d01010105000382010f003082010a0282010100cf9390dba34c1b7fb02fb550891bd89849747501fecbb8c6df45ead2ccf00341e11d43a5b6d78054493bb92095efbd2f19df07e18ae81f8cda4c7b996722ff99eb68a3e7ce9d967ccae05128040498b93493a717ce2e367a647750ec5523194005a6f6d1c98c8e28181021b3d5d1971741158e13d8d658272de9ddf2c211e8e2fbfce6e7a116270301d492bff6dcc26157ff562dd596a1a3b4a385d63cfaa1988dcea8365ff006e9bbf2bb9fbc9de954ca41ec6ac4706a1c8ea3962b97930a7cad1e63da24ce2e871999ed2f7ab354b603dfd09dc1edf11226d79caa6a509b0fce9004ea346f5351cb0967b7a5c079bf4299ea3b954709359303a90aa028f51f0203010001a38204c8308204c4300c0603551d130101ff04023000301f0603551d23041830168014d87a94447c907090169edd179c01440386d62a29307e06082b0601050507010104723070303406082b060105050730028628687474703a2f2f63657274732e6170706c652e636f6d2f6170706c6569737463613267312e646572303806082b06010505073001862c687474703a2f2f6f6373702e6170706c652e636f6d2f6f63737030332d6170706c656973746361326731323030190603551d1104123010820e2a2e6c732e6170706c652e636f6d3081ff0603551d200481f73081f43081f1060a2a864886f76364050b043081e23081a406082b060105050702023081970c819452656c69616e6365206f6e207468697320636572746966696361746520627920616e7920706172747920617373756d657320616363657074616e6365206f6620616e79206170706c696361626c65207465726d7320616e6420636f6e646974696f6e73206f662075736520616e642f6f722063657274696669636174696f6e2070726163746963652073746174656d656e74732e303906082b06010505070201162d687474703a2f2f7777772e6170706c652e636f6d2f6365727469666963617465617574686f726974792f727061301d0603551d250416301406082b0601050507030206082b0601050507030130370603551d1f0430302e302ca02aa0288626687474703a2f2f63726c2e6170706c652e636f6d2f6170706c6569737463613267312e63726c301d0603551d0e041604143fc6bb3b828a044930a9813a6824cc0d7388e597300e0603551d0f0101ff0404030205a03082026d060a2b06010401d6790204020482025d048202590257007600bbd9dfbc1f8a71b593942397aa927b473857950aab52e81a909664368e1ed1850000016983ae8f950000040300473045022100baa8d2a6d8f3b68959c063775735c8cffd1450afe792c79efb6225258f41de10022076f6fbf8f9bea11ace1c596f5c39f35804e036329e4fb831298f8901927f668a007500a4b90990b4", 409 | # #'data': ' 1603030046020000420303380629e477d8f0a0b8682d25118e807ccaacc11122040dcbb88433b1af19363c00c02f00001aff01000100000b000403000102002300000010000500030268321603030b2e0b000b2a000b270006cf308206cb308205b3a003020102020c1806f2a7bc6475852c9c7ff7300d06092a864886f70d01010b05003050310b300906035504061302424531193017060355040a1310476c6f62616c5369676e206e762d7361312630240603550403131d476c6f62616c5369676e20525341204f562053534c2043412032303138301e170d3139313031313037343133395a170d3231313132383233353935395a30818a310b300906035504061302434e310f300d06035504080c06e58c97e4baac310f300d06035504070c06e58c97e4baac31123010060355040b0c09e8bf90e7bbb4e983a8312d302b060355040a0c24e58c97e4baace59fbae8b083e7bd91e7bb9ce882a1e4bbbde69c89e99990e585ace58fb83116301406035504030c0d2a2e74696e6779756e2e636f6d30820122300d06092a864886f70d01010105000382010f003082010a0282010100d1789b0d322616ca1bfc6467b2ecbec70dcf8b14a79d217561cb0a28cd544304383dd4a99c62edc0ff9ef5b732b62c823f0032a3c9b82a6c16a9e12a92af3748c4842605af6fc6cdacff6ac92a67a7ee1af4f678fba50f25ea24c82fc96b89f0d7353b062210a883b73cf383293f2d051fa959559738b8f1c2bedf43af872247855ba2d29b8b40898303299aca9b11fd1a954864b948449f8f30fd2eb32add07e9fbfc98ed16eca9f149966cab6dde7eca5758eb1fd4ab20ccf700283c48cff3235ffbbbaa234bbd8eab6f9ececa10f3cbbf83af0e811301010d70fe3773d7a94f3fb9f0d0a406c6bd352913284117e4f79cd5a8ff7001d9b0379a2c5234a7630203010001a382036830820364300e0603551d0f0101ff0404030205a030818e06082b06010505070101048181307f304406082b060105050730028638687474703a2f2f7365637572652e676c6f62616c7369676e2e636f6d2f6361636572742f67737273616f7673736c6361323031382e637274303706082b06010505073001862b687474703a2f2f6f6373702e676c6f62616c7369676e2e636f6d2f67737273616f7673736c63613230313830560603551d20044f304d304106092b06010401a03201143034303206082b06010505070201162668747470733a2f2f7777772e676c6f62616c7369676e2e636f6d2f7265706f7369746f72792f3008060667810c01020230090603551d1304023000303f0603551d1f043830363034a032a030862e687474703a2f2f63726c2e676c6f62616c7369676e2e636f6d2f67737273616f7673736c6361323031382e63726c30390603551d1104323030820d2a2e74696e6779756e2e636f6d82122a2e6e6574776f726b62656e63682e636f6d820b74696e6779756e2e636f6d301d0603551d250416301406082b0601050507030106082b06010505070302301f0603551d23041830168014f8ef7ff2cd7867a8de6f8f248d88f1870302b3eb301d0603551d0e04160414f2686266f52e4ed5a8649335ac838c68634264e730820181060a2b06010401d679020402048201710482016d016b007700a4b90990b418581487bb13a2cc67700a3c359804f91bdfb8e377cd0ec80ddc100000016db9c420910000040300483046022100d73c696260ec47b79ae61affe7a0f96817702dcfe603ea1a5810f08ff909a6f4022100c96b98b57f5c92a97a867b62ab4b26d78bd7ff40ae1403422927266f707cab4a0077006f5376ac31f03119d89900a45115ff77151c11d902c10029068db2089a37d9130000016db9c420e30000040300483046022100e169be6917dc0b2ef2cb4b386efea03e0c0346277308aa18bfd0be3cadd2df91022100a3e000d1e9f375441f154e03bda80740f2cafb7239d7929f39aa5579', 410 | 411 | # "inner": False, 412 | # "tag": "sensor-ens160" 413 | # } 414 | msg_update = {} 415 | for i in sorted(plugins.keys()): 416 | (pluginName, plugin) = plugins[i] 417 | if pluginName == 'nmap': 418 | print('[!] Plugin {} processing ...'.format(pluginName)) 419 | ctime = time.time() 420 | ret = plugin.execute(msg) 421 | etime = time.time() 422 | print('Eclipse time: {}'.format(etime-ctime)) 423 | print(ret) 424 | break 425 | -------------------------------------------------------------------------------- /src/plugins/wappalyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | Author: Bugfix(.*?)', re.I) 28 | _scriptRegex = re.compile(r']+?src=[\'"]([^\'"]+)[\'"]', re.I) 29 | _linkRegex = re.compile(r'<(?:a|iframe|link)[^>]+?(?:href|src)=[\'"]*?([^\'"]+)[\'"]*?', re.I) 30 | _metaRegex1 = re.compile(r']*?)[\'"]', re.I) 31 | _metaRegex2 = re.compile(r']*?)[\'"]\s+(?:name|http-equiv)=[\'"]([^\'"]+)[\'"]', re.I) 32 | 33 | def __init__(self, rule_file, logger=None, debug=LogLevel.ERROR): 34 | """ 35 | 构造函数 36 | :param rule_file: wappalyzer 规则库文件路径 37 | :param logger: 日志处理对象 38 | :param debug: 调试开关 39 | """ 40 | self._logger = logger 41 | self._debug = debug 42 | 43 | 44 | if not self.loadRules(rule_file): 45 | raise Exception('Wappalyzer rules load failed.') 46 | 47 | def log(self, msg, level=LogLevel.ERROR): 48 | if level > self._debug: return 49 | 50 | if self._logger: 51 | if level == LogLevel.ERROR: 52 | self._logger.error(str(msg)) 53 | elif level == LogLevel.WARN: 54 | self._logger.warn(str(msg)) 55 | elif level == LogLevel.INFO: 56 | self._logger.info(str(msg)) 57 | else: 58 | self._logger.debug(str(msg)) 59 | else: 60 | timeStr = datetime.now().strftime('%H:%M:%S.%f') 61 | if level == LogLevel.ERROR: 62 | print('[E][{}] {}'.format(timeStr, str(msg))) 63 | elif level == LogLevel.WARN: 64 | print('[W][{}] {}'.format(timeStr, str(msg))) 65 | elif level == LogLevel.INFO: 66 | print('[I][{}] {}'.format(timeStr, str(msg))) 67 | else: 68 | print('[D][{}] {}'.format(timeStr, str(msg))) 69 | 70 | def unchunk_body(self, body): 71 | """ 72 | 还原被 Chunked 响应正文 73 | :param body: 被 Chunked 的 HTTP 响应正文 74 | :return: 恢复的原始响应正文 75 | """ 76 | data = "" 77 | pos = body.find('\r\n') 78 | while pos > 0: 79 | try: 80 | size = int(body[:pos], 16) 81 | if size > 0: 82 | data = body[pos+2:pos+2+size] 83 | body = body[pos+2+size+2:] 84 | else: 85 | body = body[pos+2+size+2:] 86 | break 87 | except: 88 | break 89 | 90 | pos = body.find('\r\n') 91 | 92 | data += body 93 | return data 94 | 95 | def unzip(self, body): 96 | pass 97 | 98 | def analyze(self, url, raw_headers, body): 99 | """ 100 | 根据URL、HTTP响应头、正文分析应用指纹 101 | :param url: URL 102 | :param raw_headers: 原始HTTP头 103 | :param body: 原始页面内容 104 | :return: 指纹列表 [{name,version,confidence,product},...] 105 | """ 106 | matchList = [] 107 | 108 | #status = self.parseStatus(raw_headers) 109 | headers = self.parseHeaders(raw_headers) 110 | if 'transfer-encoding' in headers and 'chunked' in headers['transfer-encoding']: 111 | body = self.unchunk_body(body) 112 | # if 'content-encoding' in headers and 'gzip' in headers['content-encoding']: 113 | # body = self.unzip(body) 114 | 115 | cookies = self.parseCookies(headers) 116 | scripts = self.parseScripts(body) 117 | metas = self.parseMetas(body) 118 | #js = self.parseJs(body) 119 | 120 | matchList += self.analyzeUrl(url) 121 | matchList += self.analyzeHeaders(headers) 122 | matchList += self.analyzeCookies(cookies) 123 | matchList += self.analyzeScripts(scripts) 124 | matchList += self.analyzeMetas(metas) 125 | #matchList += self.analyzeJs(js) 126 | matchList += self.analyzeHtml(body) 127 | 128 | result = {} 129 | for _ in matchList: 130 | _['layer'] = self._apps[_['name']]['layer'] 131 | _['website'] = self._apps[_['name']]['website'] 132 | if _['name'] not in result: 133 | result[_['name']] = _ 134 | continue 135 | 136 | if not result[_['name']]['version'] and _['version']: 137 | result[_['name']]['version'] = _['version'] 138 | 139 | if not result[_['name']]['product'] and _['product']: 140 | result[_['name']]['product'] = _['product'] 141 | 142 | if result[_['name']]['confidence'] < 100: 143 | result[_['name']]['confidence'] = result[_['name']]['confidence'] + _['confidence'] 144 | if result[_['name']]['confidence'] > 100: 145 | result[_['name']]['confidence'] = 100 146 | 147 | confidenceRegex = re.compile(r'^confidence:([\d\.]+)$') 148 | # 填充关联指纹和分类 149 | appNames = list(result.keys()) 150 | while len(appNames) > 0: 151 | appName = appNames.pop() 152 | # 合并产品属性到应用属性中 153 | if 'product' in result[appName]: 154 | if result[appName]['product']: 155 | result[appName]['name'] += result[appName]['product'] 156 | 157 | del(result[appName]['product']) 158 | 159 | result[appName]['categories'] = self.analyzeCategory(self._apps[appName]['cats']) 160 | # 如果当前层为操作系统层,则将 os 设置为指纹名 161 | result[appName]['os'] = appName if self._apps[appName]['layer'] == 4 else '' 162 | 163 | if not self._apps[appName]['implies']: continue 164 | 165 | for parentName in self._apps[appName]['implies']: 166 | confidence = 0 167 | pos = parentName.find(r'\;') 168 | if pos > 0: 169 | rightName = parentName[pos+2:] 170 | parentName = parentName[:pos] 171 | m = confidenceRegex.match(rightName) 172 | if m: 173 | tmp_c = float(m.group(1)) 174 | if tmp_c > 1: 175 | confidence = abs(int(tmp_c)) 176 | else: 177 | confidence = abs(int(tmp_c * 100)) 178 | 179 | if parentName in self._apps and parentName not in result: 180 | result[parentName] = { 181 | 'os': parentName, 182 | 'name': parentName, 183 | 'confidence': 100 if confidence > 100 else confidence, 184 | 'version': '', 185 | 'categories': self.analyzeCategory(self._apps[parentName]['cats']), 186 | 'layer': self._apps[parentName]['layer'], 187 | 'website': self._apps[parentName]['website'] 188 | } 189 | appNames.append(parentName) 190 | 191 | return list(result.values()) 192 | 193 | def analyzeCategory(self, cat_ids): 194 | """ 195 | 根据分类ID列表提取指纹分类列表 196 | :param cat_ids: 分类ID列表 197 | """ 198 | categories = [] 199 | for cat_id in cat_ids: 200 | cat_id = str(cat_id) 201 | if cat_id in self._categories: 202 | categories.append({ 203 | 'id': int(cat_id), 204 | 'name': self._categories[cat_id]['name'] 205 | }) 206 | 207 | return categories 208 | 209 | def analyzeHtml(self, body): 210 | """ 211 | 分析页面中的指纹信息 212 | :param body: 页面源码 213 | :return: 指纹列表 214 | """ 215 | if not body: return [] 216 | 217 | result = [] 218 | for _ in self._rules['html']: 219 | match = _['regex'].search(body) 220 | if match: 221 | result.append(self.makeDetected(match, _)) 222 | 223 | return result 224 | 225 | def analyzeJs(self, js): 226 | """ 227 | 分析页面加载的JS变量中的指纹信息 228 | :param js: js 变量字典 229 | :return: 指纹列表 230 | """ 231 | if not js: return [] 232 | 233 | result = [] 234 | for _ in self._rules['js']: 235 | if _['keyword'] not in js: continue 236 | 237 | if not _['regex']: 238 | result.append(self.makeDetected(None, _)) 239 | else: 240 | match = _['regex'].search(js[_['keyword']]) 241 | if match: 242 | result.append(self.makeDetected(match, _)) 243 | 244 | return result 245 | 246 | def analyzeMetas(self, metas): 247 | """ 248 | 分析页面中元数据标签中的指纹信息 249 | :param cookies: Cookie 字典 250 | :return: 指纹列表 251 | """ 252 | if not metas: return [] 253 | 254 | result = [] 255 | for _ in self._rules['meta']: 256 | if _['keyword'] not in metas: continue 257 | 258 | if not _['regex']: 259 | result.append(self.makeDetected(None, _)) 260 | else: 261 | for item in metas[_['keyword']]: 262 | match = _['regex'].search(item) 263 | if match: 264 | result.append(self.makeDetected(match, _)) 265 | 266 | return result 267 | 268 | def analyzeScripts(self, scripts): 269 | """ 270 | 分析引用脚本路径中的指纹信息 271 | :param scripts: scripts 列表 272 | :return: 指纹列表 273 | """ 274 | if not scripts: return [] 275 | 276 | result = [] 277 | for _ in self._rules['script']: 278 | for item in scripts: 279 | match = _['regex'].search(item) 280 | if match: 281 | result.append(self.makeDetected(match, _)) 282 | 283 | return result 284 | 285 | def analyzeCookies(self, cookies): 286 | """ 287 | 分析URL指纹信息 288 | :param cookies: Cookie 字典 289 | :return: 指纹列表 290 | """ 291 | if not cookies: return [] 292 | 293 | result = [] 294 | for _ in self._rules['cookies']: 295 | if _['keyword'] not in cookies: continue 296 | 297 | if not _['regex']: 298 | result.append(self.makeDetected(None, _)) 299 | else: 300 | match = _['regex'].search(cookies[_['keyword']]) 301 | if match: 302 | result.append(self.makeDetected(match, _)) 303 | 304 | return result 305 | 306 | def analyzeHeaders(self, headers): 307 | """ 308 | 分析URL指纹信息 309 | :param headers: HTTP头 310 | :return: 指纹列表 311 | """ 312 | if not headers: return [] 313 | 314 | result = [] 315 | for _ in self._rules['headers']: 316 | if _['keyword'] not in headers: continue 317 | 318 | if not _['regex']: 319 | result.append(self.makeDetected(None, _)) 320 | else: 321 | for headValue in headers[_['keyword']]: 322 | match = _['regex'].search(headValue) 323 | if match: 324 | result.append(self.makeDetected(match, _)) 325 | break 326 | 327 | return result 328 | 329 | def analyzeUrl(self, url): 330 | """ 331 | 分析URL指纹信息 332 | :param url: URL 333 | :return: 指纹列表 334 | """ 335 | if not url: return [] 336 | 337 | result = [] 338 | for _ in self._rules['url']: 339 | match = _['regex'].search(url) 340 | if match: 341 | result.append(self.makeDetected(match, _)) 342 | return result 343 | 344 | def makeDetected(self, match, rule): 345 | """ 346 | 根据匹配结果生成一条应用信息 347 | :param match: 正则匹配结果 348 | :param rule: 匹配规则 349 | :return: {name,confidence,version,product} 350 | """ 351 | result = { 352 | "name": rule['name'], 353 | "confidence": rule['confidence'], 354 | "version": '' if 'version' not in rule else rule['version'], 355 | "product": '' if 'product' not in rule else rule['product'] 356 | } 357 | 358 | if match: 359 | if match.lastindex: 360 | for k in ['version', 'product']: 361 | if rule[k]: 362 | for i in range(1, match.lastindex + 1): 363 | result[k] = result[k].replace(r'\{}'.format(i), match.group(i)) 364 | 365 | for k in ['version', 'product']: 366 | if rule[k]: 367 | patterns = re.findall(r'\\\d', rule[k]) 368 | for _ in patterns: 369 | result[k] = result[k].replace(_, '') 370 | 371 | return result 372 | 373 | def loadRules(self, rule_file): 374 | """ 375 | 根据文件名载入 Wappalyzer 规则库 376 | :param rule_file: 规则文件名 377 | :return: True-成功, False-失败 378 | """ 379 | fp = None 380 | try: 381 | fp = open(rule_file, encoding='utf-8') 382 | rules = json.loads(fp.read()) 383 | if not rules or 'apps' not in rules or 'categories' not in rules: 384 | raise Exception('Wappalyzer rule file is null or format error.') 385 | if not isinstance(rules['apps'], dict) or len(rules['apps']) == 0: 386 | raise Exception('Wappalyzer rules is null or format error.') 387 | 388 | self._categories = rules['categories'] 389 | self._rules = { 390 | 'cookies':[], 391 | 'headers':[], 392 | 'script': [], 393 | 'html': [], 394 | 'url': [], 395 | 'js': [], 396 | 'meta': [] 397 | } 398 | for appName in rules['apps']: 399 | if 'layer' not in rules['apps'][appName]: 400 | rules['apps'][appName]['layer'] = 1 401 | else: 402 | try: 403 | rules['apps'][appName]['layer'] = int(rules['apps'][appName]['layer']) 404 | if rules['apps'][appName]['layer'] not in [1, 2, 3, 4, 5]: 405 | rules['apps'][appName]['layer'] = 1 406 | except: 407 | pass 408 | 409 | # 忽略纯粹的 NMAP 指纹 410 | if 'cookies' not in rules['apps'][appName] and 'headers' not in rules['apps'][appName] and \ 411 | 'js' not in rules['apps'][appName] and 'script' not in rules['apps'][appName] and \ 412 | 'html' not in rules['apps'][appName] and 'url' not in rules['apps'][appName] and \ 413 | 'meta' not in rules['apps'][appName] and rules['apps'][appName]['layer'] == 1: 414 | continue 415 | 416 | website = '' if 'website' not in rules['apps'][appName] else rules['apps'][appName]['website'] 417 | cats = [] if 'cats' not in rules['apps'][appName] else rules['apps'][appName]['cats'] 418 | implies = [] if 'implies' not in rules['apps'][appName] else rules['apps'][appName]['implies'] 419 | self._apps[appName] = { 420 | 'website': website, 421 | 'cats': cats, 422 | 'implies': implies, 423 | 'layer': rules['apps'][appName]['layer'] 424 | } 425 | if not isinstance(self._apps[appName]['implies'], list): 426 | self._apps[appName]['implies'] = [ self._apps[appName]['implies'] ] 427 | 428 | for t in rules['apps'][appName]: 429 | if t in ['icon', 'implies', 'website', 'cats', 'layer']: continue 430 | 431 | if t == 'headers': 432 | for k in rules['apps'][appName][t]: 433 | if not isinstance(rules['apps'][appName][t][k], list): 434 | rules['apps'][appName][t][k] = [ rules['apps'][appName][t][k] ] 435 | 436 | for headerValue in rules['apps'][appName][t][k]: 437 | rule = self.parseRule(headerValue) 438 | if rule: 439 | rule['name'] = appName 440 | rule['keyword'] = k.lower() 441 | self._rules[t].append(rule) 442 | 443 | elif t in ['js', 'meta']: 444 | for k in rules['apps'][appName][t]: 445 | if not isinstance(rules['apps'][appName][t][k], list): 446 | rules['apps'][appName][t][k] = [ rules['apps'][appName][t][k] ] 447 | 448 | for v in rules['apps'][appName][t][k]: 449 | rule = self.parseRule(v) 450 | if rule: 451 | rule['name'] = appName 452 | rule['keyword'] = k.lower() 453 | self._rules[t].append(rule) 454 | 455 | elif t in ['html', 'script', 'url']: 456 | if not isinstance(rules['apps'][appName][t], list): 457 | rules['apps'][appName][t] = [ str(rules['apps'][appName][t]) ] 458 | 459 | for item in rules['apps'][appName][t]: 460 | rule = self.parseRule(item) 461 | if rule: 462 | rule['name'] = appName 463 | rule['keyword'] = '' 464 | self._rules[t].append(rule) 465 | 466 | return True 467 | except Exception as e: 468 | self.log(str(e), LogLevel.ERROR) 469 | self.log(traceback.format_exc(), LogLevel.ERROR) 470 | return False 471 | finally: 472 | if fp: fp.close() 473 | 474 | def parseRule(self, rule): 475 | """ 476 | 解析规则库中的单条规则 477 | @param rule: 规则文本 478 | @return: {'regex': Regex, 'version': string, 'confidence': int} 479 | """ 480 | if not rule: 481 | return { 'regex': '', 'version': '', 'confidence': 100 } 482 | 483 | try: 484 | parts = rule.split(r'\;') 485 | result = { 486 | 'regex': re.compile(parts[0], re.I), 487 | 'version': '', 488 | 'product': '', 489 | 'confidence': 100 490 | } 491 | for item in parts[1:]: 492 | pos = item.find(':') 493 | if pos == -1: continue 494 | 495 | if item[:pos] == 'version': 496 | result['version'] = item[pos+1:] 497 | elif item[:pos] == 'confidence': 498 | confidence = float(item[pos+1:]) 499 | if confidence <= 1: 500 | confidence *= 100 501 | result['confidence'] = abs(int(confidence)) 502 | elif item[:pos] == 'product': 503 | result['product'] = item[pos+1:] 504 | return result 505 | except Exception as e: 506 | self.log(str(e), LogLevel.ERROR) 507 | self.log("Rule:" + rule, LogLevel.ERROR) 508 | self.log(traceback.format_exc(), LogLevel.ERROR) 509 | return None 510 | 511 | def parseStatus(self, rawHeaders): 512 | """ 513 | 识别原始HTTP头中的请求状态 514 | :param rawHeaders: 原始头信息 515 | :return: HTTP响应状态码 516 | """ 517 | if rawHeaders: 518 | match = self._statusRegex.search(rawHeaders) 519 | if match: 520 | return int(match.group(1)) 521 | return None 522 | 523 | def parseHeaders(self, rawHeaders): 524 | """ 525 | 将原始HTTP头解析为字典格式 526 | :param rawHeaders: 原始头信息 527 | :return: 请求头字典 528 | """ 529 | if not rawHeaders: return {} 530 | 531 | lines = rawHeaders.split('\r\n') 532 | if len(lines) > 0 and lines[0][:5] == 'HTTP/': del(lines[0]) # 删除 HTTP/x.x 这一行 533 | 534 | result = {} 535 | for i in range(0, len(lines)): 536 | 537 | pos = lines[i].find(':') 538 | if pos == -1: continue 539 | 540 | header_name = lines[i][:pos].strip().lower() 541 | header_value = lines[i][pos+1:].strip() 542 | if header_name not in result: result[header_name] = [] 543 | 544 | result[header_name].append(header_value) 545 | 546 | return result 547 | 548 | def parseCookies(self, headers): 549 | """ 550 | 获取HTTP响应头中的Cookie列表 551 | :param headers: HTTP头字典对象 552 | """ 553 | if 'set-cookie' not in headers: return {} 554 | 555 | cookies = {} 556 | for item in headers['set-cookie']: 557 | parts = item.split(';') 558 | for _ in parts: 559 | pos = _.find('=') 560 | if pos == -1: continue 561 | 562 | name = _[:pos] 563 | if name not in ['domain', 'path']: 564 | cookies[name] = _[pos+1:].strip() 565 | continue 566 | 567 | return cookies 568 | 569 | def parseScripts(self, html): 570 | """ 571 | 获取页面中的脚本列表 572 | :param html: 页面源代码 573 | """ 574 | return self._scriptRegex.findall(html) 575 | 576 | def parseLinks(self, html): 577 | """ 578 | 获取页面中的链接列表 579 | :param html: 页面源代码 580 | """ 581 | return self._linkRegex.findall(html) 582 | 583 | def parseMetas(self, html): 584 | """ 585 | 获取页面中的元数据 586 | :param html: 页面源代码 587 | """ 588 | metas1 = self._metaRegex1.findall(html) 589 | metas2 = self._metaRegex2.findall(html) 590 | 591 | result = {} 592 | for _ in metas1 + metas2: 593 | if _[0] not in result: 594 | result[_[0]] = [ _[1] ] 595 | else: 596 | result[_[0]].append(_[1]) 597 | 598 | return result 599 | 600 | def parseJs(self, html): 601 | """ 602 | 获取页面执行过程中的JS变量(未实现) 603 | :param html: 页面源代码 604 | """ 605 | return {} 606 | 607 | 608 | class FilterPlugin(Plugin): 609 | _wappalyzer = None 610 | """ 611 | Web 指纹识别插件 612 | src: url, header, body 613 | dst: 614 | - apps: 应用指纹,格式:[{name,version,confidence},...] 615 | - title: 网页标题 616 | 617 | """ 618 | 619 | def __init__(self, rootdir, debug = False, logger=None): 620 | """ 621 | 构造函数 622 | :param rootdir: 应用根目录 623 | :param debug: 调式开关 624 | """ 625 | super().__init__(rootdir, debug, logger) 626 | 627 | # 初始化指纹相关路径 628 | 629 | self._wappalyzer = Wappalyzer(os.path.join(rootdir, 'rules', 'apps.json'), logger=logger) 630 | 631 | def analyze(self, url, headers, body): 632 | """ 633 | 分析获取指纹 634 | :param url: 请求URL 635 | :param headers: 响应头 636 | :param body: 响应正文 637 | """ 638 | return self._wappalyzer.analyze(url, headers, body) 639 | 640 | def generate_header(self, msg): 641 | """ 642 | 根据消息生成一个HTTP头信息 643 | :param msg: 原始消息 JSON 644 | :return: 生成的原始响应头 645 | """ 646 | header = '' 647 | if 'server' in msg and msg['server']: 648 | header += '\r\nServer: {}'.format(msg['server']) 649 | if 'type' in msg and msg['type']: 650 | header += '\r\nContent-Type: {}'.format(msg['type']) 651 | 652 | if header: 653 | header = 'HTTP/1.1 {}{}'.format( 654 | '0' if 'code' not in msg or not msg['code'] else msg['code'], 655 | header 656 | ) 657 | return header 658 | 659 | def execute(self, msg): 660 | """ 661 | 插件入口函数,根据插件的功能对 msg 进行处理 662 | :param msg: 需要处理的消息 663 | :param mode: 识别方法:1-使用内置Python引擎,2-使用Node版本Wappalyzer引擎 664 | :return: 返回需要更新的消息字典(不含原始消息) 665 | """ 666 | if 'pro' not in msg or msg['pro'] != 'HTTP': 667 | self.log('Not http message.', LogLevel.DEBUG) 668 | return 669 | 670 | info = {} 671 | # 更新HTTP头 672 | if 'header' not in msg or not msg['header']: 673 | new_header = self.generate_header(msg) 674 | if new_header: 675 | msg['header'] = new_header 676 | info['header'] = new_header 677 | else: 678 | msg['header'] = 'HTTP/1.1 000 Unkown' 679 | 680 | if 'body' not in msg or not msg['body']: 681 | msg['body'] = '' 682 | 683 | # 指纹识别 684 | apps = self.analyze(msg['url'], msg['header'], msg['body']) 685 | info['apps'] = apps 686 | 687 | # 标题提取 688 | if 'type' in msg and msg['type'].find('text/html') != -1: 689 | m = re.search(r'([^<]*?)', msg['body'], re.I) 690 | if m: 691 | info['title'] = html.unescape(m.group(1)) 692 | 693 | return info 694 | 695 | if __name__ == '__main__': 696 | import time 697 | 698 | #''' 699 | plugins = Plugin.loadPlugins(os.path.join(os.path.dirname(__file__), ".."), True) 700 | print(plugins) 701 | msg = { 702 | "pro": "HTTP", 703 | "inner": True, 704 | "site": "http://192.168.199.170", 705 | "ip_str": "192.168.199.170", 706 | "type": "text/html; charset=UTF-8", 707 | "ip": "192.168.199.170", 708 | "method": "GET", 709 | "url": "http://192.168.199.170/", 710 | "server": "Apache/2.4.6 (CentOS)", 711 | "header": "Date: Mon, 23 Nov 2020 09:55:57 GMT\r\nServer: Apache/2.4.6 (CentOS)\r\nLast-Modified: Thu, 16 Oct 2014 13:20:58 GMT\r\nETag: \"1321-5058a1e728280\"\r\nAccept-Ranges: bytes\r\nContent-Length: 4897\r\nContent-Type: text/html; charset=UTF-8", 712 | "@timestamp": "2020-11-23T09:55:48.018Z", 713 | "@version": "1", 714 | "tag": "eno2", 715 | "geoip": {}, 716 | "tags": [ 717 | "_geoip_lookup_failure" 718 | ], 719 | "host": "192.168.199.170:80", 720 | "body": "\n\n\t\tApache HTTP Server Test Page powered by CentOS\n\t\t\n\n \n \n \n\n