├── nxtool ├── __init__.py ├── whitelists_generators │ ├── __init__.py │ ├── google_analytics.py │ ├── cookies.py │ ├── site_wide_id.py │ ├── zone_wide.py │ ├── url_wide.py │ ├── zone_var_wide.py │ ├── array_like_variables_names.py │ ├── zone_var_wide_url.py │ └── images_1002.py ├── printers.py ├── typing.py └── log_providers │ ├── __init__.py │ ├── flat_file.py │ └── elastic.py ├── tests ├── __init__.py ├── test_typing.py ├── data │ ├── exlog.txt │ ├── images_1002.txt │ ├── cookies.txt │ └── logs.txt ├── test_elastic.py └── test_flat_files.py ├── config.cfg ├── requirements-v1.txt ├── requirements.txt ├── requirements-v2.txt ├── requirements-v5.txt ├── .travis.yml ├── nxtool.py └── README.md /nxtool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config.cfg: -------------------------------------------------------------------------------- 1 | [elastic] 2 | host = 127.0.0.1:9200 3 | use_ssl = False 4 | index = nxapi 5 | version = 2 -------------------------------------------------------------------------------- /requirements-v1.txt: -------------------------------------------------------------------------------- 1 | elasticsearch-dsl<2.0.0 2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | elasticsearch-dsl,<6.0.0 2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi 3 | -------------------------------------------------------------------------------- /requirements-v2.txt: -------------------------------------------------------------------------------- 1 | elasticsearch-dsl>=2.0.0,<3.0.0 2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi 3 | -------------------------------------------------------------------------------- /requirements-v5.txt: -------------------------------------------------------------------------------- 1 | elasticsearch-dsl>=5.0.0,<6.0.0 2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi 3 | -------------------------------------------------------------------------------- /nxtool/whitelists_generators/__init__.py: -------------------------------------------------------------------------------- 1 | def modify_search(func): 2 | def wrapper(provider, wl): 3 | if not hasattr(provider, 'search'): 4 | return func(provider, wl) 5 | search = provider.search 6 | ret = func(provider, wl) 7 | provider.search = search 8 | return ret 9 | return wrapper 10 | -------------------------------------------------------------------------------- /tests/test_typing.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from nxtool import typing 3 | from nxtool.log_providers import flat_file 4 | 5 | 6 | class TestTyping(unittest.TestCase): 7 | def test_typing(self): 8 | parser = flat_file.FlatFile('./tests/data/exlog.txt') 9 | self.assertEquals([i for i in typing.typification(parser)], [['^\\d+$', 'integer', 'ARGS', 'a']]) 10 | 11 | parser.get_results = lambda : [{'zone': "BODY", 'var_name': "pif"}, ] 12 | self.assertFalse([i for i in typing.typification(parser)]) -------------------------------------------------------------------------------- /nxtool/whitelists_generators/google_analytics.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from . import modify_search 4 | 5 | 6 | @modify_search 7 | def generate_whitelist(provider, whitelists): 8 | """ 9 | Generate whitelists for Google Analytics cookies 10 | :param provider: 11 | :param list of dict whitelists: 12 | :return list of dict: 13 | """ 14 | logging.info('Generating \033[1mGoogle analytics\033[0m rules') 15 | 16 | provider.add_filters({'zone': 'ARGS', 'var_name': ['__utma', '__utmb', '__utmc', '__utmt', '__utmv', '__utmz']}) 17 | ids = provider.get_relevant_ids(['uri', 'ip']) 18 | 19 | ret = list() 20 | if ids: 21 | ret.append({'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': ids, 'msg': 'Google analytics'}) 22 | return ret 23 | -------------------------------------------------------------------------------- /tests/data/exlog.txt: -------------------------------------------------------------------------------- 1 | 2013/05/30 20:47:05 [debug] 10804#0:*1 NAXSI_EXLOG: ip=127.0.0.1&server=127.0.0.1&uri=/&id=1302&zone=ARGS&var_name=a&content=1234 2 | 2013/05/30 20:47:05 [debug] 10804#0:*1 NAXSI_EXLOG: ip=127.0.0.1&server=127.0.0.1&uri=/&id=1302&zone=ARGS&var_name=&content=1234 3 | 2013/05/30 20:47:05 [debug] 10804#0:*1 NAXSI_EXLOG: ip=127.0.0.1&server=127.0.0.1&uri=/&id=1302&zone=ARGS|NAME&var_name=abc&content=1234 4 | garbage line for teh lulz 5 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=127.0.0.1&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" -------------------------------------------------------------------------------- /nxtool/printers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions to display data on stdout 3 | """ 4 | 5 | 6 | def print_statistics(results): 7 | for key, value in results.items(): 8 | print('# \033[100m%s\033[0m #' % key.upper()) 9 | print('\n'.join('\033[32m%s\033[0m: \033[1m%s\033[0m' % (i, j) for i, j in value.items()) + '\n') 10 | 11 | 12 | def print_generic(results): 13 | if hasattr(results, 'hits'): 14 | it = results.hits 15 | else: 16 | it = results 17 | for item in it: 18 | print('\n'.join('%s: %s' % (k, item[k]) for k in item) + '\n') 19 | 20 | 21 | def print_typed_rules(results): 22 | """ 23 | 24 | :param list of str results: 25 | :return: 26 | """ 27 | print('\n\033[1mGenerated types:\033[0m') 28 | for rule in results: 29 | print('BasicRule negative "rx:%s" "msg:%s" "mz:%s:%s" "s:BLOCK";' % tuple(rule)) 30 | -------------------------------------------------------------------------------- /nxtool/whitelists_generators/cookies.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from nxapi.rules import get_description_core 4 | from . import modify_search 5 | 6 | 7 | @modify_search 8 | def generate_whitelist(provider, whitelists): 9 | """ 10 | Generate whitelists for exceptions that are happening in the cookies. 11 | 12 | :param provider: 13 | :param list whitelists: 14 | :return list of dict: 15 | """ 16 | logging.info('Generating \033[1mcookies\033[0m rules') 17 | 18 | provider.add_filters({'zone': 'HEADERS', 'var_name': 'cookie'}) 19 | 20 | # We don't require the hits to be spread across users, because the cookie might be only available 21 | # to admins for example. But since the browser sends it on every request, it should be 22 | # spread amongst urls. 23 | data = provider.get_relevant_ids(['uri']) 24 | 25 | ret = list() 26 | for _id in data: 27 | ret.append( 28 | { 29 | 'mz': ['$HEADERS_VAR:cookie'], 30 | 'wl': [_id], 31 | 'msg': 'Cookies that matches a %s' % get_description_core(_id) 32 | } 33 | ) 34 | return ret 35 | -------------------------------------------------------------------------------- /nxtool/whitelists_generators/site_wide_id.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from nxapi.rules import get_description_core 4 | from . import modify_search 5 | 6 | 7 | @modify_search 8 | def generate_whitelist(provider, whitelists): 9 | """ 10 | 11 | :param log_provider.elastic provider: 12 | :param list whitelists: 13 | :return: 14 | """ 15 | logging.info('Generating \033[1msite\033[0m rules') 16 | zones = provider.get_top('zone') 17 | 18 | res = dict() 19 | for zone in zones.keys(): 20 | logging.debug('Generating \033[1murl_wide_id\033[0m rules for \033[1m%s\033[0m', zone) 21 | search = provider.export_search() 22 | provider.add_filters({'zone': zone}) 23 | data = provider.get_relevant_ids(['zone'], 75) 24 | provider.import_search(search) 25 | 26 | if data: 27 | res[zone] = data 28 | 29 | ret = list() 30 | for zone, _id in res.items(): 31 | ret.append({ 32 | 'mz': [zone], 33 | 'wl': _id, 34 | 'msg': 'Site-wide id+zone if it matches %s' % ', or a '.join(map(get_description_core, _id))} 35 | ) 36 | return ret 37 | 38 | 39 | -------------------------------------------------------------------------------- /tests/data/images_1002.txt: -------------------------------------------------------------------------------- 1 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X" 2 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/script/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X" 3 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X" 4 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/script/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X" -------------------------------------------------------------------------------- /nxtool/whitelists_generators/zone_wide.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import collections 3 | 4 | from nxapi.rules import get_description_core 5 | from . import modify_search 6 | 7 | 8 | @modify_search 9 | def generate_whitelist(provider, whitelists): 10 | """ 11 | 12 | :param log_provider.elastic provider: 13 | :param list of dict whitelists: 14 | :return: 15 | """ 16 | logging.info('Generating \033[1mzone\033[0m rules') 17 | zones = provider.get_top('zone') 18 | 19 | res = collections.defaultdict(set) 20 | for zone in zones.keys(): 21 | logging.debug('Searching for id in the zone \033[1m%s\033[0m', zone) 22 | provider.add_filters({'zone': zone}) 23 | data = provider.get_top('id') 24 | 25 | for id_name, nb in data.items(): 26 | if not id_name: 27 | continue 28 | elif nb < 1000: 29 | logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', id_name, nb) 30 | continue 31 | search = provider.export_search() 32 | provider.add_filters({'id': id_name}) 33 | if int(id_name) in provider.get_relevant_ids(['ip']): 34 | res[zone].add(id_name) 35 | provider.import_search(search) 36 | 37 | ret = list() 38 | for zone, wid in res.items(): 39 | ret.append( 40 | { 41 | 'mz': ['%s' % (zone,)], 42 | 'wl': wid, 43 | 'msg': 'zone-wide ID whitelist if it matches a %s' % ', or a '.join(map(get_description_core, wid))} 44 | ) 45 | return ret 46 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: trusty 3 | sudo: false 4 | 5 | cache: pip 6 | 7 | python: 8 | - "2.7" 9 | - "3.3" 10 | - "3.4" 11 | - "3.5" 12 | - "3.5-dev" 13 | - "nightly" 14 | 15 | addons: 16 | apt: 17 | packages: 18 | - openjdk-8-jre 19 | 20 | env: 21 | matrix: 22 | - ES_VERSION=5.4.1 23 | DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz 24 | - ES_VERSION=5.3.3 25 | DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz 26 | - ES_VERSION=2.4.5 27 | DOWNLOAD_URL=https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/${ES_VERSION}/elasticsearch-${ES_VERSION}.tar.gz 28 | - ES_VERSION=1.7.6 29 | DOWNLOAD_URL=https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz 30 | 31 | install: 32 | - mkdir /tmp/elasticsearch 33 | - wget -O - ${DOWNLOAD_URL} | tar xz --directory=/tmp/elasticsearch --strip-components=1 34 | - /tmp/elasticsearch/bin/elasticsearch -d 35 | - sleep 10 36 | - pip install git+https://github.com/elastic/elasticsearch-py.git#egg=elasticsearch 37 | 38 | script: 39 | - 'case "${ES_VERSION}" in 40 | 5.4.1|5.3.3) 41 | pip install -r requirements-v5.txt || travis_terminate 1 42 | ;; 43 | 2.4.5) 44 | pip install -r requirements-v2.txt || travis_terminate 1 45 | ;; 46 | 1.7.6) 47 | pip install -r requirements-v1.txt || travis_terminate 1 48 | ;; 49 | *) 50 | travis_terminate 1 51 | ;; 52 | esac' 53 | - pip install coveralls nose2 nose2-cov 54 | - coverage run --source=nxtool -m nose2.__main__ -v 55 | 56 | after_success: 57 | - coveralls 58 | -------------------------------------------------------------------------------- /nxtool/whitelists_generators/url_wide.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import collections 3 | 4 | from nxapi.rules import get_description_core 5 | from . import modify_search 6 | 7 | 8 | @modify_search 9 | def generate_whitelist(provider, whitelists): 10 | """ 11 | 12 | :param provider: 13 | :param list of dict whitelists: 14 | :return list of dict: 15 | """ 16 | logging.info('Generating \033[1murl\033[0m rules') 17 | uris = provider.get_top('uri') 18 | 19 | res = collections.defaultdict(set) 20 | for uri in uris.keys(): 21 | logging.debug('Searching for id in the uri \033[1m%s\033[0m', uri) 22 | provider.add_filters({'uri': uri}) 23 | 24 | search = provider.export_search() 25 | provider.add_filters({'zone': 'URL'}) 26 | data = provider.get_top('id') 27 | provider.import_search(search) 28 | 29 | for id_name, nb in data.items(): 30 | if not id_name: 31 | continue 32 | elif nb < 1000: 33 | logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', id_name, nb) 34 | continue 35 | else: 36 | logging.debug('\033[1mKeeping\033[0m the id \033[32m%s\033[0m (%d occurrences)', id_name, nb) 37 | 38 | search = provider.export_search() 39 | provider.add_filters({'id': id_name}) 40 | if int(id_name) in provider.get_relevant_ids(['ip']): 41 | res[uri].add(id_name) 42 | provider.import_search(search) 43 | 44 | ret = [] 45 | for uri, ids in res.items(): 46 | descriptions = ', or a '.join(map(get_description_core, ids)) 47 | ret.append({'mz': ['$URL:%s' % (uri,)], 'wl': ids, 48 | 'msg': 'url-wide whitelist if it matches a %s' % descriptions}) 49 | return ret -------------------------------------------------------------------------------- /nxtool/whitelists_generators/zone_var_wide.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import collections 3 | 4 | from nxapi.rules import get_description_core 5 | from . import modify_search 6 | 7 | 8 | @modify_search 9 | def generate_whitelist(provider, whitelists): 10 | """ 11 | 12 | :param log_provider.elastic provider: 13 | :return: 14 | """ 15 | logging.info('Generating \033[1mvar + zone\033[0m rules') 16 | res = collections.defaultdict(dict) 17 | 18 | for zone in ['ARGS', 'BODY', 'ARGS|NAME', 'BODY|NAME']: 19 | logging.debug('Searching for arguments in the zone \033[1m%s\033[0m', zone) 20 | provider.add_filters({'zone': zone}) 21 | data = provider.get_top('var_name') 22 | 23 | for var_name, nb in data.items(): 24 | if not var_name: 25 | continue 26 | elif nb < 1000: 27 | logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', var_name, nb) 28 | continue 29 | search = provider.export_search() 30 | provider.add_filters({'var_name': var_name}) 31 | res[zone][var_name] = provider.get_relevant_ids(['ip']) # every peer should have triggered the exception 32 | provider.import_search(search) 33 | 34 | ret = list() 35 | for zone, content in res.items(): 36 | for variable, ids in content.items(): 37 | if not ids: # We don't care about empty sets 38 | continue 39 | descriptions = ', or a '.join(map(get_description_core, ids)) 40 | if zone.endswith('|NAME'): 41 | mz = '%s:%s|%s' % (zone.split('|')[0], variable, 'NAME') 42 | else: 43 | mz = '%s:%s' % (zone, variable) 44 | ret.append({'mz': [mz], 'wl': ids, 'msg': 'Variable zone-wide if it matches a %s' % descriptions}) 45 | return ret 46 | -------------------------------------------------------------------------------- /nxtool/typing.py: -------------------------------------------------------------------------------- 1 | import re 2 | import collections 3 | import logging 4 | 5 | # Each regexp is (almost) a subset of the next one 6 | REGEXPS = [ 7 | [r'^$', 'empty'], 8 | [r'^[01]$', 'boolean'], 9 | [r'^\d+$', 'integer'], 10 | [r'^#[0-9a-f]+$', 'colour'], # '#' + hex 11 | [r'^[0-9a-f]+$', 'hexadecimal'], 12 | [r'^[0-9a-z]+$', 'alphanum'], 13 | ['r^[./]?([0-9a-z]/)+[\w?+-=&/ ]+$', 'relative url'], 14 | [r'^https?://([0-9a-z-.]+\.)+[\w?+-=&/ ]+$', 'absolute url'], # like http://example.com?hello=1&id=3 15 | [r'^\w+$', 'alphanumdash'], 16 | [r'^[0-9a-z?&=+_-]+$', 'url parameter'], 17 | [r'^[\w[] ,&=+-]+$', 'array'], 18 | [r'^[' + r'\s\w' + r'!$%^&*()[]:;@~#?/.,' + r']+$', 'plaintext'], 19 | [r'', 'none'], # untypables parameters 20 | ] 21 | 22 | 23 | def typification(source): 24 | # rules = {zone1: {var1:0, var2:0}, zone2: {var6:0, ...}, ...} 25 | rules = collections.defaultdict(lambda: collections.defaultdict(int)) 26 | 27 | # Compile regexp for speed 28 | regexps = [re.compile(reg, re.IGNORECASE) for reg, _ in REGEXPS] 29 | 30 | for line in source.get_results(): 31 | line = {i: line[i] for i in line} # because `Results` objects are weird, we prefer to manipulate `dict` instead 32 | 33 | # naxsi inverts the var_name and the content 34 | # when a rule match on var_name 35 | if line.get('zone', 'zone0').endswith('|NAME'): 36 | continue 37 | zone = line.get('zone', 'zone0') 38 | 39 | var_name = line.get('var_name', '') 40 | if not var_name: # No types for empty variable names 41 | continue 42 | 43 | try: 44 | content = line['content'] 45 | except KeyError as e: 46 | logging.error('%s has no "content" (line %s): %s', var_name, line, e) 47 | continue 48 | 49 | # Bump regexps until one matches 50 | # Since every regexp is a subset of the next one, 51 | # this works great. 52 | while not regexps[rules[zone][var_name]].match(content): 53 | rules[zone][var_name] += 1 54 | 55 | for zone, zone_data in rules.items(): 56 | for var_name, index in zone_data.items(): 57 | if index < len(REGEXPS) - 1: # Don't return untyped things 58 | yield [REGEXPS[index][0], REGEXPS[index][1], zone, var_name] 59 | -------------------------------------------------------------------------------- /nxtool/log_providers/__init__.py: -------------------------------------------------------------------------------- 1 | class LogProvider(object): 2 | def __init__(self, auto_commit_limit=400): 3 | self.nlist = list() 4 | self.auto_commit = auto_commit_limit 5 | self.total_objs = 0 6 | self.total_commits = 0 7 | 8 | def add_filters(self, filters, regexp=False, negative=False): 9 | """ 10 | :param dict filters: What fields/values do we want to filter on? 11 | :param bool regexp: Treat `filters` as regexp ? 12 | :param bool negative: Shall the match be negative ? 13 | """ 14 | raise NotImplementedError 15 | 16 | def get_results(self): 17 | raise NotImplementedError 18 | 19 | def get_statistics(self): 20 | """ Get some events statistics 21 | 22 | :return a dict of dict of int: 23 | """ 24 | ret = dict() 25 | for field in ['uri', 'server', 'ip', 'zone']: 26 | ret[field] = self.get_top(field) 27 | return ret 28 | 29 | def get_top(self, field, size=250): 30 | """ Get the top values on a given `field`. 31 | 32 | :param str field: On what field we want to filter 33 | :param int size: On how much data do we want to process 34 | :return dict of str: {field: nb_occurrences, ..} 35 | """ 36 | raise NotImplementedError 37 | 38 | def get_relevant_ids(self, fields, percentage=10.0, minimum_occurrences=250): 39 | """ This function is supposed to return the id that are reparteed/present on the `fields`. 40 | 41 | :param str fields: 42 | :param float percentage: 43 | :param int minimum_occurrences: 44 | :return set: 45 | """ 46 | raise NotImplementedError 47 | 48 | def insert(self, obj): 49 | """ This function adds the object obj to the instance of LogProvider. 50 | :param obj: object to add to this instance 51 | :return bool: Success ? 52 | """ 53 | self.nlist.extend(obj) 54 | if self.auto_commit > 0 and len(self.nlist) > self.auto_commit: 55 | return self.commit() 56 | return True 57 | 58 | def commit(self): 59 | """ This function commits pendants objects in the LogProvider instance 60 | 61 | """ 62 | raise NotImplementedError 63 | 64 | def stop(self): 65 | self.commit() 66 | 67 | -------------------------------------------------------------------------------- /nxtool/whitelists_generators/array_like_variables_names.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from . import modify_search 4 | 5 | 6 | def __check_and_strip_brackets(string): 7 | """ Return `False` if the `string` variable hasn't the same number of opening and closing brackets, 8 | else, return the variable without the trailing brackets: `pouet[1[2]3]` -> `pouet` 9 | 10 | :param str string: 11 | :return bool|int: 12 | """ 13 | if sum(1 if char == '[' else -1 if char == ']' else 0 for char in string) != 0: 14 | return False 15 | try: 16 | return string[:string.index('[')] 17 | except ValueError: # no '[' nor ']' in the `string` 18 | return False 19 | 20 | 21 | @modify_search 22 | def generate_whitelist(provider, whitelists): 23 | """ 24 | Generate whitelists for variables that look like an array, eg. `pouet[1]`, `pouet[1[2]3]`, or `pouet[1][2]`. 25 | 26 | :param provider: The data provider 27 | :param list whitelists: Already generated rules, acting as a whitelist 28 | :return list of dict: The generated whitelists 29 | """ 30 | ids = [1310, 1311] # [ and ] 31 | 32 | logging.info('Generating \033[1marray-like variable name\033[0m rules') 33 | 34 | provider.add_filters({'zone': ['ARGS|NAME', 'BODY|NAME'], 'id': ids}) 35 | variables = provider.get_top('var_name') 36 | 37 | ret = list() # we can't use a `set` for `ret`, because we're using `dict` with it, and they're unhashable. 38 | stripped_names = set() # so we don't add duplicate rules 39 | for var_name, nb in variables.items(): 40 | if nb < 1000: 41 | logging.debug('Discarding the variable \033[32m%s\033[0m (%d occurrences)', var_name, nb) 42 | continue 43 | 44 | stripped_name = __check_and_strip_brackets(var_name) 45 | if not stripped_name: 46 | logging.debug('The variable \033[32m%s\033[0m does not have an expected form', var_name) 47 | continue 48 | 49 | if stripped_name not in stripped_names: 50 | stripped_names.add(stripped_name) 51 | ret.append({ 52 | 'mz': ['$BODY_VAR_X:^%s\[.+\]$' % stripped_name], 53 | 'wl': ids, 'msg': 'Array-like variable name'}) 54 | ret.append({ 55 | 'mz': ['$ARGS_VAR_X:^%s\[.+\]$' % stripped_name], 56 | 'wl': ids, 'msg': 'Array-like variable name'}) 57 | return ret 58 | -------------------------------------------------------------------------------- /nxtool/whitelists_generators/zone_var_wide_url.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import collections 3 | 4 | from nxapi.rules import get_description_core 5 | from . import modify_search 6 | 7 | 8 | @modify_search 9 | def generate_whitelist(provider, whitelists): 10 | """ 11 | Generate rules for a specific variable, in a specific zone, on a specific url. 12 | :param log_provider.elastic provider: 13 | :return: 14 | """ 15 | logging.info('Generating \033[1mvar + zone + url\033[0m rules') 16 | res = collections.defaultdict(lambda: collections.defaultdict(dict)) 17 | 18 | for uri in provider.get_top('uri').keys(): 19 | _search = provider.export_search() 20 | provider.add_filters({'uri': uri}) 21 | 22 | for zone in ['ARGS', 'BODY', 'ARGS|NAME', 'BODY|NAME']: 23 | logging.debug('Searching for arguments in the zone \033[1m%s\033[0m on the url \033[1m%s\033[0m', zone, uri) 24 | 25 | provider.add_filters({'zone': zone}) 26 | data = provider.get_top('var_name') 27 | 28 | for var_name, nb in data.items(): 29 | if not var_name: 30 | continue 31 | elif nb < 1000: 32 | logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', var_name, nb) 33 | continue 34 | search = provider.export_search() 35 | provider.add_filters({'var_name': var_name}) 36 | res[uri][zone][var_name] = provider.get_relevant_ids(['ip']) 37 | provider.import_search(search) 38 | 39 | provider.import_search(_search) 40 | 41 | ret = list() 42 | for uri, content in res.items(): 43 | for zone, _content in content.items(): 44 | if not _content: # We don't care about empty sets 45 | continue 46 | for var_name, __ids in _content.items(): 47 | if not __ids: 48 | continue 49 | descriptions = ', or a '.join(map(get_description_core, __ids)) 50 | if zone.endswith('|NAME'): 51 | mz = '%s:%s|%s' % (zone.split('|')[0], var_name, 'NAME') 52 | else: 53 | mz = '$%s_VAR:%s' % (zone, var_name) 54 | ret.append( 55 | {'mz': ['$URL:%s|%s' % (uri, mz)], 'wl': __ids, 56 | 'msg': 'Variable zone-wide on a specific url if it matches a %s' % descriptions} 57 | ) 58 | return ret 59 | -------------------------------------------------------------------------------- /tests/data/cookies.txt: -------------------------------------------------------------------------------- 1 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 2 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 3 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 4 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 5 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 6 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 7 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 8 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" -------------------------------------------------------------------------------- /nxtool/whitelists_generators/images_1002.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import collections 5 | 6 | try: 7 | from itertools import zip_longest as izip_longest 8 | except ImportError: # python 2 9 | from itertools import izip_longest 10 | 11 | from . import modify_search 12 | 13 | 14 | def __guess_prefixes(strings): 15 | """ Get the list of the most common prefixes for `strings`. 16 | Careful, this function is a bit fucked up, with stupid complexity, 17 | but since our dataset is small, who cares? 18 | 19 | :param list of list of str strings: [['wp-content', '10'], ['pouet', pif']] 20 | :return dict: {url1:nb_url1, url2: nb_url2, ...} 21 | """ 22 | if len(strings) == 1: 23 | return [('/' + os.path.join(*strings[0]), 1), ] 24 | 25 | threshold = len(strings) 26 | prefix, prefixes = [], [] 27 | for chars in izip_longest(*strings, fillvalue=''): 28 | char, count = collections.Counter(chars).most_common(1)[0] 29 | if count == 1: 30 | break 31 | elif count < threshold: 32 | if prefix: 33 | prefixes.append(('/' + ''.join(prefix), threshold)) 34 | threshold = count 35 | prefix.append(char) 36 | if prefix: 37 | prefixes.append(('/' + ''.join(prefix), threshold)) 38 | return prefixes 39 | 40 | 41 | @modify_search 42 | def generate_whitelist(provider, whitelists): 43 | logging.info('Generating \033[1mImage 1002\033[0m rules') 44 | 45 | provider.add_filters({'zone': 'URL', 'id': '1002'}) 46 | 47 | uris = provider.get_top('uri') 48 | if not uris: 49 | return [] 50 | 51 | # Filter already whitelisted things 52 | already_whitelisted_uri = set() 53 | for r in whitelists: 54 | if 1002 in r['wl']: 55 | if 'mz' not in r: 56 | already_whitelisted_uri = already_whitelisted_uri.union('/') 57 | break 58 | elif 'URL' in r['mz']: 59 | already_whitelisted_uri = already_whitelisted_uri.union(r['mz']) 60 | 61 | res = dict() 62 | for uri, nb in uris.items(): 63 | if not any(uri.startswith(i) for i in already_whitelisted_uri): 64 | res[uri] = nb 65 | 66 | if not res: 67 | return [] 68 | 69 | prefixes = __guess_prefixes([a.split('/')[1:] for a in res.keys()]) 70 | 71 | # We multiply the number of common paths between url with the number 72 | # of times the url has been triggered by an exception. 73 | best_path = collections.defaultdict(int) 74 | for pre, nb_pre in prefixes: 75 | for uri, nb in res.items(): 76 | if uri.startswith(pre): 77 | best_path[pre] += int(nb) * nb_pre 78 | 79 | rules = [] 80 | for url, nb in best_path.items(): 81 | logging.info('The url \033[32m%s\033[0m triggered %d exceptions for the rule 1002, whitelisting it.', url, nb) 82 | rules.append({'wl': [1002], 'mz': ['$URL_X:^%s|URL' % url], 'msg': 'Images size (0x)'}) 83 | return rules 84 | -------------------------------------------------------------------------------- /tests/test_elastic.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import time 3 | 4 | from nxtool.log_providers import elastic 5 | from nxtool.log_providers import flat_file 6 | 7 | class TestElastic(unittest.TestCase): 8 | maxDiff = None 9 | def test_export(self): 10 | parser = elastic.Elastic() 11 | search = parser.export_search() 12 | parser.import_search(parser.export_search()) 13 | 14 | self.assertEqual(search, parser.export_search()) 15 | 16 | def test_add_filters(self): 17 | parser = elastic.Elastic() 18 | parser.add_filters({'pif': 'paf'}) 19 | self.assertEqual(parser.get_filters(), {'query': {'multi_match': {'query': 'paf', 'fields': ['pif']}}, 'size': 10000}) 20 | 21 | parser = elastic.Elastic() 22 | parser.add_filters({'pif': ['paf', 'pouf']}) 23 | self.assertEqual(parser.get_filters(), 24 | {'query': {'bool': {'must': [{'bool': {'should': 25 | [ 26 | {'multi_match': {'fields': ['pif'], 'query': 'paf'}}, 27 | {'multi_match': {'fields': ['pif'],'query': 'pouf'}} 28 | ] 29 | }}]}},'size': 10000}) 30 | 31 | parser = elastic.Elastic() 32 | parser.add_filters({'pif': []}) 33 | self.assertEqual(parser.get_filters(), {'query': {'match_all': {}}, 'size': 10000}) 34 | 35 | parser = elastic.Elastic() 36 | parser.add_filters({'pif': []}, negative=True) 37 | self.assertEqual(parser.get_filters(), {'query': {'match_all': {}}, 'size': 10000}) 38 | 39 | parser = elastic.Elastic() 40 | parser.add_filters({'pif': set()}, negative=True) 41 | self.assertEqual(parser.get_filters(), {'query': {'match_all': {}}, 'size': 10000}) 42 | 43 | 44 | parser = elastic.Elastic() 45 | parser.add_filters({'pif': [1,]}, negative=True) 46 | self.assertEqual(parser.get_filters(), {'query': {'bool': {'must_not': [ 47 | {'multi_match': {'fields': ['pif'],'query': 1}}]}}, 'size': 10000}) 48 | 49 | parser = elastic.Elastic() 50 | parser.add_filters({'pif': 'paf'}, negative=True) 51 | self.assertEqual(parser.get_filters(), {'query': {'bool': 52 | {'must_not': 53 | [ 54 | {'multi_match': {'fields': ['pif'], 'query': 'paf'}} 55 | ]}}, 56 | 'size': 10000}) 57 | 58 | def test_reset_filters(self): 59 | parser = elastic.Elastic() 60 | search = parser.get_filters() 61 | parser.add_filters({'pif': 'paf'}) 62 | parser.reset_filters() 63 | self.assertEqual(parser.get_filters(), search) 64 | 65 | def test_get_results(self): 66 | parser = elastic.Elastic() 67 | parser.search.scan = lambda: None 68 | parser.add_filters({'pif': 'paf'}) 69 | filters = parser.get_filters() 70 | parser.get_results() 71 | self.assertEqual(parser.get_filters(), filters) 72 | 73 | 74 | class TestElasticImport(unittest.TestCase): 75 | 76 | 77 | def test_elastic_import(self): 78 | dest = elastic.Elastic() 79 | source = flat_file.FlatFile('./tests/data/exlog.txt') 80 | for log in source.logs: 81 | dest.insert([log]) 82 | dest.stop() 83 | dest.initialize_search() 84 | dest.minimum_occurences = 0 85 | dest.percentage = 0 86 | time.sleep(5) 87 | self.assertEqual(dest.get_relevant_ids(['id']), {u'1302', u'42000227'}) 88 | self.assertEqual(dest.get_top('id'), {1302: 3, 42000227: 1}) 89 | self.assertEqual(dest.get_top('uri'),{u'/': 3, u'/phpMyAdmin-2.8.2/scripts/setup.php': 1}) 90 | dest.client.indices.delete(index=dest.index, ignore=[400, 404]) 91 | 92 | 93 | -------------------------------------------------------------------------------- /nxtool.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import logging 4 | 5 | from nxapi import whitelist as nxapi_whitelist 6 | 7 | from nxtool import printers, typing 8 | from nxtool.log_providers import flat_file 9 | from nxtool.whitelists_generators import cookies, images_1002, google_analytics, zone_var_wide, site_wide_id, zone_wide 10 | from nxtool.whitelists_generators import url_wide, array_like_variables_names, zone_var_wide_url 11 | 12 | import urllib3 13 | urllib3.disable_warnings() 14 | 15 | WL_MODULES = [google_analytics, images_1002, array_like_variables_names, cookies, zone_var_wide_url, url_wide, 16 | zone_var_wide, zone_wide, site_wide_id] 17 | 18 | try: 19 | from nxtool.log_providers import elastic 20 | elastic_imported = True 21 | except ImportError: 22 | print('Unable to correctly import the elastic material. Did you forget to install elasticsearch-dsl?') 23 | elastic_imported = False 24 | 25 | 26 | def __whitelist_from_rules(source, rules): 27 | """ 28 | :param source: 29 | :param dict rules: 30 | """ 31 | for rule in rules: 32 | whitelist = {'id': rule.get('wl', '*')} 33 | for matchzone in rule.get('mz', '*')[0].split('|'): 34 | try: 35 | zone, value = matchzone.split(':') 36 | except ValueError: # no ':', it's a simple matchzone 37 | whitelist['zone'] = matchzone 38 | continue 39 | 40 | if zone == '$URL': 41 | whitelist['url'] = value 42 | elif zone.startswith('$') and zone.endswith('_VAR'): # stuff like `$ARGS_VAR:variable_name` 43 | whitelist['zone'] = zone[1:-4] 44 | whitelist['var_name'] = value 45 | 46 | source.add_filters(whitelist, regexp=False) 47 | 48 | 49 | def __filter(source, filters, regexp=False, hostname=''): 50 | _filter = {} 51 | 52 | if filters: 53 | for param in filters.split(','): 54 | try: 55 | key, value = param.split('=') 56 | except ValueError: 57 | print('Parameters should be of the form key=value,key2=value2,...') 58 | return 59 | _filter[key] = value 60 | 61 | if hostname: 62 | _filter['server'] = hostname 63 | 64 | source.add_filters(_filter, regexp) 65 | 66 | 67 | def __create_argparser(): 68 | """ Create a simple arguments parser. """ 69 | parser = argparse.ArgumentParser(description='Sweet tool to help you managing your naxsi logs.') 70 | 71 | parser.add_argument('hostname', action='store', nargs='?') 72 | parser.add_argument('-v', '--verbose', action='store_true') 73 | 74 | log_sources = parser.add_argument_group('Log sources') 75 | log_sources.add_argument('--elastic-source', action='store_true') 76 | log_sources.add_argument('--flat-file', type=str) 77 | log_sources.add_argument('--stdin', action='store_true') 78 | 79 | log_destinations = parser.add_argument_group('Log destinations') 80 | log_destinations.add_argument('--elastic-dest', action='store_true') 81 | 82 | actions = parser.add_argument_group('Actions') 83 | actions.add_argument('--typing', action='store_true') 84 | actions.add_argument('--whitelist', action='store_true') 85 | actions.add_argument('--slack', action='store_true') 86 | actions.add_argument('--filter', action='store') 87 | actions.add_argument('--filter-regexp', action='store') 88 | actions.add_argument('--stats', action='store_true') 89 | 90 | return parser.parse_args() 91 | 92 | 93 | def main(): 94 | args = __create_argparser() 95 | 96 | logging.getLogger("elasticsearch").setLevel(logging.ERROR) 97 | logging.getLogger("urllib3").setLevel(logging.ERROR) 98 | if args.verbose: 99 | logging.basicConfig(level=logging.DEBUG, format= '%(message)s') 100 | else: 101 | logging.basicConfig(level=logging.INFO, format='[+] %(message)s') 102 | 103 | if args.elastic_source is True: 104 | if elastic_imported is False: 105 | print('You asked for an elastic source, but you do not have the required dependencies.') 106 | return 107 | source = elastic.Elastic() 108 | elif args.flat_file: 109 | source = flat_file.FlatFile(args.flat_file) 110 | elif args.stdin is True: 111 | source = flat_file.FlatFile() 112 | else: 113 | print('Please give me a valid source (or try to relaunch me with `-h` if you are lost).') 114 | return 1 115 | 116 | if args.slack and not args.whitelist: 117 | print('You asked for loosen constraints on whitelist generation but you did''nt ask for whitelist generation.') 118 | return 119 | 120 | # Filtering can be used for any operation 121 | __filter(source, args.filter, regexp=False, hostname=args.hostname) 122 | if args.filter_regexp: 123 | __filter(source, args.filter_regexp, regexp=True, hostname=args.hostname) 124 | 125 | if args.elastic_dest: 126 | destination = elastic.Elastic() 127 | for log in source.logs: 128 | destination.insert([log]) 129 | destination.stop() 130 | elif args.stats: 131 | printers.print_statistics(source.get_statistics()) 132 | elif args.whitelist: 133 | whitelist = list() 134 | for module in WL_MODULES: 135 | if args.slack: 136 | source.minimum_occurences = 0 137 | rules = module.generate_whitelist(source, whitelist) 138 | whitelist.extend(rules) 139 | __whitelist_from_rules(source, rules) 140 | if whitelist: 141 | print('\n\033[1mGenerated whitelists:\033[0m') 142 | print('\t' + ';\n\t'.join(map(nxapi_whitelist.dict_to_str, whitelist)) + ';') 143 | else: 144 | print('\n\033[1mnxtool was not able to generate meaningful whitelist\033[0m') 145 | elif args.typing: 146 | printers.print_typed_rules(typing.typification(source)) 147 | else: 148 | print(printers.print_generic(source.get_results())) 149 | 150 | 151 | if __name__ == '__main__': 152 | sys.exit(main()) 153 | -------------------------------------------------------------------------------- /tests/data/logs.txt: -------------------------------------------------------------------------------- 1 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 2 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 3 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 4 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 5 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 6 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 7 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 8 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 9 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 10 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 11 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 12 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 13 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 14 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 15 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 16 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" 17 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X" -------------------------------------------------------------------------------- /nxtool/log_providers/flat_file.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import fileinput 3 | import mimetypes 4 | import zipfile 5 | import tarfile 6 | import re 7 | import logging 8 | 9 | from nxapi.nxlog import parse_nxlog 10 | 11 | from nxtool.log_providers import LogProvider 12 | 13 | 14 | class FlatFile(LogProvider): 15 | def __init__(self, fname=None): 16 | super(LogProvider, self) 17 | 18 | self.minimum_occurrences = 250 19 | self.percentage = 10 20 | self.logs = list() 21 | self.filters = collections.defaultdict(list) 22 | self.negative_filters = collections.defaultdict(list) 23 | self.filters_regexp = collections.defaultdict(list) 24 | self.negative_filters_regexp = collections.defaultdict(list) 25 | self.fname = fname 26 | 27 | try: 28 | ftype = mimetypes.guess_all_extensions(self.fname)[0] 29 | except AttributeError: # `self.fname` is None 30 | self.__transform_logs(fileinput.input("-")) 31 | except IndexError: # `self.fname` has no guessable mimtype 32 | self.__transform_logs(fileinput.input(self.fname)) 33 | else: 34 | if ftype == 'application/zip': # zip file! 35 | with zipfile.ZipFile(self.fname) as f: 36 | for name in f.namelist(): 37 | self.__transform_logs(f.read(name)) 38 | elif ftype == 'application/tar': # tar file! 39 | with tarfile.open(self.fname) as f: 40 | for name in f.namelist(): 41 | self.__transform_logs(f.read(name)) 42 | 43 | def export_search(self): 44 | return self.filters, self.negative_filters, self.filters_regexp, self.negative_filters_regexp 45 | 46 | def import_search(self, search): 47 | self.filters, self.negative_filters, self.filters_regexp, self.negative_filters_regexp = search 48 | 49 | def __transform_logs(self, it): 50 | for line in it: 51 | error, log = parse_nxlog(line) 52 | if error: 53 | logging.error('%s while parsing %s', error, line) 54 | if log: 55 | self.logs.extend(log) 56 | 57 | def get_top(self, field, size=250): 58 | ret = dict() 59 | values = (log[field] for log in self.__get_filtered_logs()) 60 | for key, value in collections.Counter(values).most_common(10): 61 | ret[key] = value 62 | return ret 63 | 64 | def __get_filtered_logs(self): 65 | """ 66 | yield the loglines accordingly to the filtering policy defined in `self.filters` 67 | """ 68 | if not any((self.filters, self.filters_regexp, self.negative_filters_regexp, self.negative_filters)): 69 | for log in self.logs: # we don't filter, give everything! 70 | yield log 71 | else: 72 | for log in self.logs: 73 | for key, value in log.items(): 74 | if key in self.filters: # are we filtering on this `key`? 75 | if value in self.filters[key] and value != '*': # is the current `value` in the filtering list? 76 | if key not in self.negative_filters: # are we filtering on this particular `key`? 77 | if value not in self.negative_filters[key] and value != '*': 78 | yield log 79 | if key in self.filters_regexp: # are we filtering on this `key`? 80 | if re.match(self.filters_regexp[key], value): # is the current `value` in the filtering list? 81 | if key not in self.negative_filters_regexp: # are we filtering on this particular `key`? 82 | if not re.match(self.negative_filters_regexp[key], value): 83 | yield log 84 | 85 | def get_results(self): 86 | return self.__get_filtered_logs() 87 | 88 | def add_filters(self, filters, regexp=False, negative=False): # TODO: simplify this shit 89 | for key, value in filters.items(): 90 | if negative is True: 91 | if isinstance(value, list): 92 | if regexp is True: 93 | self.negative_filters_regexp[key].extend(value) 94 | else: 95 | self.negative_filters[key].extend(value) 96 | else: 97 | if regexp is True: 98 | self.negative_filters_regexp[key].append(value) 99 | else: 100 | self.negative_filters[key].append(value) 101 | else: 102 | if isinstance(value, list): 103 | if regexp is True: 104 | self.filters_regexp[key].extend(value) 105 | else: 106 | self.filters[key].extend(value) 107 | else: 108 | if regexp is True: 109 | self.filters_regexp[key].append(value) 110 | else: 111 | self.filters[key].append(value) 112 | 113 | def get_relevant_ids(self, fields, percentage=0, minimum_occurrences=0): 114 | """ 115 | We want to keep alerts that are spread over a vast number of different`fields` 116 | 117 | To measure the spreading, we're using this metric: https://en.wikipedia.org/wiki/Coefficient_of_variation 118 | :param list of str fields: 119 | :return: 120 | """ 121 | minimum_occurrences = minimum_occurrences or self.minimum_occurrences 122 | percentage = percentage or self.percentage 123 | 124 | id_blacklist = set() 125 | ret = set() 126 | for field in fields: 127 | stats = collections.defaultdict(int) 128 | size = 0 129 | for logline in self.get_results(): 130 | if logline['id'] not in id_blacklist: 131 | stats[logline['id']] += 1 132 | size += 1 133 | 134 | for k, v in stats.items(): 135 | if size < minimum_occurrences: 136 | logging.debug('The field %s has not enough occurrences (%d): non-significant', field, size) 137 | continue 138 | if 100 * v < size * percentage: 139 | logging.debug('The id %s is present in less than 10%% (%d) of %s : non-significant.', k, v, field) 140 | id_blacklist.add(k) 141 | else: 142 | ret.add(k) 143 | 144 | return list(map(int, ret)) 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Coverage Status](https://coveralls.io/repos/github/nbs-system/nxtool-ng/badge.svg?branch=master)](https://coveralls.io/github/nbs-system/nxtool-ng?branch=master) 2 | [![Code Health](https://landscape.io/github/nbs-system/nxtool-ng/master/landscape.svg?style=flat)](https://landscape.io/github/nbs-system/nxtool-ng/master) 3 | [![Code Climate](https://codeclimate.com/github/nbs-system/nxtool-ng/badges/gpa.svg)](https://codeclimate.com/github/nbs-system/nxtool-ng) 4 | [![Build Status](https://travis-ci.org/nbs-system/nxtool-ng.svg?branch=master)](https://travis-ci.org/nbs-system/nxtool-ng) 5 | 6 | ``` 7 | __ __ 8 | .-----.--.--.| |_.-----.-----.| |____.-----.-----. 9 | | |_ _|| _| _ | _ || |____| | _ | 10 | |__|__|__.__||____|_____|_____||__| |__|__|___ | 11 | |_____| 12 | 13 | -- Because life is too short to transform naxsi logs into rules by hand. 14 | ``` 15 | 16 | nxtool-ng is a tool to magically transform your [naxsi]( http://naxsi.org ) logs into useful rules. 17 | It can get its data from your elastic instance, or you can feed it flat files, 18 | and it will magically show you some statistics, generate relevant whitelists, 19 | provide type-based rules, … 20 | 21 | It works with *modules*, that are generating whitelists, without overlapping each other. 22 | 23 | Proudly powered by [Python]( https://python.org ) (2 and 3 by the way), 24 | using (optionally) [elasticsearch-dsl]( https://elasticsearch-dsl.readthedocs.org/en/latest/ ), 25 | written with love and tears by the great people of [NBS-System]( https://nbs-system.com ), 26 | nxtool-ng is released under the [GPL]( https://gnu.org/licenses/gpl.html ). 27 | 28 | # Installation 29 | 30 | Nxtool-ng depends on [nxapi](https://github.com/nbs-system/nxapi) for naxsi-related magic, 31 | and optionally on [elasticsearch-dsl]( https://github.com/elastic/elasticsearch-dsl-py ) 32 | if you want to generate rules from an Elastic instance. You can install them with 33 | 34 | ### Elasticsearch 5.x 35 | `pip install -r ./requirements-v5.txt` 36 | 37 | ### Elasticsearch 2.x 38 | `pip install -r ./requirements-v2.txt` 39 | 40 | ### Elasticsearch 1.x 41 | 42 | `pip install -r /requirements-v1.txt` 43 | 44 | # Usage 45 | 46 | ```bash 47 | $ python nxtool.py -h 48 | usage: nxtool.py [-h] [-v] [--elastic] [--flat-file] [--stdin] [--archive] 49 | [--typing] [--whitelist] [--filter FILTER] [--stats] 50 | [hostname] 51 | 52 | Sweet tool to help you managing your naxsi logs. 53 | 54 | positional arguments: 55 | hostname 56 | 57 | optional arguments: 58 | -h, --help show this help message and exit 59 | -v, --verbose 60 | 61 | Log sources: 62 | --elastic-source 63 | --flat-file 64 | --stdin 65 | 66 | Actions: 67 | --typing 68 | --whitelist 69 | --elastic-dest 70 | --filter FILTER 71 | --stats 72 | --slack 73 | ``` 74 | 75 | First you can populate an elasticsearch instance by: 76 | ```bash 77 | $ python nxtool.py --elastic-dest --flat-file example.com.log 78 | ``` 79 | 80 | For example, if you want some stats about `example.com` using your elasticsearch instance: 81 | 82 | ```bash 83 | $ python nxtool.py --elastic-source --stats example.com 84 | 2.39.218.24: 14 85 | 14.76.8.132: 18 86 | 13.24.13.122: 8 87 | 157.5.39.176: 13 88 | 19.187.104.23: 8 89 | 80.24.150.43: 21 90 | 50.2.176.10: 198 91 | 79.14.72.145: 44 92 | 14.26.23.213: 80 93 | 86.242.8.36: 58 94 | 95 | # URI # 96 | /cache.php: 12 97 | /11.php: 12 98 | /call-for-paper-contact/: 82 99 | /: 22 100 | /xmlrpc.php: 22 101 | /en/production/type.asp: 41 102 | /contact/: 21 103 | /wp-json/oembed/1.0/embed: 38 104 | /en/production/formation.asp: 68 105 | /totallylegit/: 14 106 | 107 | # ZONE # 108 | BODY: 276 109 | ARGS|NAME: 24 110 | URL: 22 111 | ARGS: 146 112 | HEADERS: 54 113 | BODY|NAME: 10 114 | FILE_EXT: 4 115 | 116 | # SERVER # 117 | example.com: 536 118 | ``` 119 | 120 | To generate some whitelists for `example.com`, using your elasticsearch instance: 121 | 122 | ```bash 123 | $ python nxtool.py --elastic-source --whitelist example.com 124 | [+] Generating Google analytics rules 125 | [+] Generating Image 1002 rules 126 | [+] Generating cookies rules 127 | [+] Generating var + zone rules 128 | [+] Generating site rules 129 | [+] Generating zone rules 130 | [+] Generating url rules 131 | 132 | Generated whitelists: 133 | BasicRule wl:1310,1311 "mz:$HEADERS_VAR:cookie" "msg:Cookies"; 134 | ``` 135 | 136 | You can add the `--verbose` flag if you want more information about what's going on. 137 | If you're using *flat files*, you can either pass, well flat files, but also *archives*, 138 | like `.zip` or `.tar.gz`. 139 | 140 | You can add the `--slack` flag if you want loosen constraints on whitelist generation. 141 | It can be useful with only little amount of logs. 142 | 143 | You can also use nxtool-ng to query your elasticsearch instance, for example 144 | to search for access to `/admin`, that triggered the rule `1010` in the `HEADERS`: 145 | 146 | ```bash 147 | $ python nxtool.py --elastic-source --filter 'uri=/admin,zone=HEADERS,id=1010' 148 | 149 | zone: HEADERS 150 | ip: 133.144.211.172 151 | whitelisted: false 152 | uri: /admin 153 | comments: import:2016-08-30 09:44:17.938620 154 | server: example.com 155 | content: 156 | var_name: cookie 157 | country: 158 | date: 2016-08-30T09:45:13+0200 159 | id: 1010 160 | 161 | zone: HEADERS 162 | ip: 15.125.251.122 163 | whitelisted: false 164 | uri: /admin 165 | comments: import:2016-08-30 11:00:03.523580 166 | server: example.com 167 | content: 168 | var_name: cookie 169 | country: 170 | date: 2016-08-30T11:06:36+0200 171 | id: 1010 172 | 173 | ``` 174 | 175 | It's also possible to *type* your parameters, to tighten a bit the security of 176 | your application: 177 | 178 | ``` 179 | $ python nxtool.py --elastic-source --typing --verbose example.com 180 | 181 | Generated types: 182 | 183 | BasicRule negative "rx:^$" "msg:empty" "mz:FILE_EXT:user_avatar" "s:BLOCK"; 184 | BasicRule negative "rx:^$" "msg:empty" "mz:FILE_EXT:society_logo" "s:BLOCK"; 185 | BasicRule negative "rx:^https?://([0-9a-z-.]+\.)+[\w?+-=&/ ]+$" "msg:url" "mz:ARGS:url" "s:BLOCK"; 186 | ``` 187 | 188 | # Note on the structure of ElasticSearch entries 189 | 190 | Each core rule violation is logged in a NAXSI_FMT entry. Each violation is reported once in the ElasticSearch instance. 191 | Types in the used elasticsearch entries are enforced: 192 | 193 | ``` 194 | ip = Ip 195 | coords = GeoPoint 196 | learning = Boolean 197 | total_processed = Integer 198 | total_blocked = Integer 199 | blocked = Boolean 200 | cscore0 = Keyword 201 | score0 = Integer 202 | zone = Keyword 203 | id = Integer 204 | var_name = Keyword 205 | date = Date 206 | whitelisted = Boolean 207 | uri = Text 208 | server = Text 209 | comments = Text 210 | vers = Text 211 | ``` 212 | First term is the key used in NAXSI_FMT and second term is the defined ElasticSearch type. Text is used as a backward 213 | compatible version of Keyword. We may drop the support of old elasticsearch version in the near future and replace 214 | Text with Keyword. 215 | 216 | It is noteworthy that one request might violate multiple core rule and lead to multiple entries in ElasticSearch. 217 | -------------------------------------------------------------------------------- /nxtool/log_providers/elastic.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | import logging 4 | import operator 5 | import collections 6 | import datetime 7 | 8 | try: # Fuck you guido for removing reduce 9 | # noinspection PyUnresolvedReferences 10 | from functools import reduce 11 | except ImportError: 12 | pass 13 | 14 | try: 15 | from ConfigParser import SafeConfigParser as ConfigParser 16 | except ImportError: # python3 17 | from configparser import ConfigParser 18 | 19 | from elasticsearch import TransportError 20 | from elasticsearch_dsl import Search, Q 21 | from elasticsearch_dsl import DocType, Date, Boolean, Integer, Ip, GeoPoint 22 | from elasticsearch_dsl import Index, VERSION 23 | from elasticsearch_dsl.connections import connections 24 | 25 | try: 26 | from elasticsearch_dsl import Text, Keyword 27 | except ImportError: # oldversion of Elasticsearch 28 | from elasticsearch_dsl import String 29 | Text = String 30 | Keyword = String 31 | 32 | 33 | from nxtool.log_providers import LogProvider 34 | 35 | class Event(DocType): 36 | ip = Ip() 37 | coords = GeoPoint() 38 | learning = Boolean() 39 | total_processed = Integer() 40 | total_blocked = Integer() 41 | blocked = Boolean() 42 | cscore0 = Keyword() 43 | score0 = Integer() 44 | zone = Keyword() 45 | id = Integer() 46 | var_name = Keyword() 47 | date = Date() 48 | whitelisted = Boolean() 49 | uri = Text(fields={'raw': Keyword(index = 'not_analyzed')}) 50 | server = Text(fields={'raw': Keyword(index = 'not_analyzed')}) 51 | comments = Text(fields={'raw': Keyword(index = 'not_analyzed')}) 52 | vers = Text(fields={'raw': Keyword(index = 'not_analyzed')}) 53 | 54 | 55 | class Meta: 56 | doc_type = 'events' 57 | ## ToDo change the hardcoded events used when saved is used 58 | ## elasticsearch_dsl issue 689 59 | 60 | 61 | 62 | class Elastic(LogProvider): 63 | def __init__(self, config_file='config.cfg'): 64 | super(Elastic, self).__init__() 65 | 66 | self.percentage=10.0 67 | self.minimum_occurrences=250 68 | 69 | # The ConfigParser documentation points out that there's no way to force defaults config option 70 | # outside the "DEFAULT" section. 71 | config = ConfigParser() 72 | config.read(config_file) 73 | if not config.has_section('elastic'): 74 | config.add_section('elastic') 75 | 76 | for option, value in {'use_ssl': 'True', 'host': '127.0.0.1', 'version': '2', 'index': 'nxapi', 'doc_type': 'events'}.items(): 77 | if not config.has_option('elastic', option): 78 | config.set('elastic', option, value) 79 | 80 | self.version = config.getint('elastic', 'version') 81 | self.index = config.get('elastic', 'index') 82 | use_ssl = config.getboolean('elastic', 'use_ssl') 83 | host = config.get('elastic', 'host') 84 | self.doc_type = config.get('elastic', 'doc_type') 85 | self.client = connections.create_connection(hosts=[host], use_ssl=use_ssl, index=self.index, version=self.version, doc_type=self.doc_type, timeout=30, retry_on_timeout=True ) 86 | 87 | Event.init(index=self.index) 88 | index = Index(self.index, using=self.client) 89 | index.doc_type(Event) 90 | self.initialize_search() 91 | 92 | def initialize_search(self): 93 | self.search = Search(using=self.client, index=self.index).extra(size=10000) 94 | 95 | def export_search(self): 96 | return self.search 97 | 98 | def import_search(self, search): 99 | self.search = search 100 | 101 | def get_filters(self): 102 | return self.search.to_dict() 103 | 104 | def add_filters(self, filters, regexp=False, negative=False): 105 | """ 106 | Add `filters` to the query. 107 | `filters is a dict of the form {'field': value, field2: value2}, but you can also use a list of values 108 | instead of a `str`. They'll be added as a _or_ (and not a _and_). 109 | :param dict filters: 110 | :param bool regexp: 111 | :param bool negative: 112 | :return: 113 | """ 114 | # We need to use multi_match, since we get the fields names dynamically. 115 | for key, value in filters.items(): 116 | if isinstance(value, set): 117 | value = list(value) 118 | 119 | # There is no need to process empty values. 120 | if not value: 121 | continue 122 | 123 | if isinstance(value, list): 124 | if negative: 125 | self.search = self.search.query(Q('bool', must_not=[ 126 | reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])]) 127 | ) 128 | else: 129 | self.search = self.search.query(Q('bool', must=[ 130 | reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])]) 131 | ) 132 | else: 133 | if negative: 134 | self.search = self.search.query(~Q("multi_match", query=value, fields=[key])) 135 | else: 136 | self.search = self.search.query(Q("multi_match", query=value, fields=[key])) 137 | 138 | def get_top(self, field, size=250): 139 | """ 140 | Get the top values for the given `field` 141 | :param str field: the field to filter on 142 | :param int size: how many top values to return, top 143 | :return dict of int: A structure of the form {value: number_of_hits, value2: numer_of_hits2} 144 | """ 145 | search = self.search 146 | ret = dict() 147 | 148 | if field in ['uri', 'vers', 'comments', 'server']: 149 | field = ''.join((field, '.raw')) 150 | 151 | if VERSION < (5, 0, 0): 152 | self.search = self.search.params(search_type='count', default_operator='AND') 153 | else: 154 | self.search = self.search.params(search_type='query_then_fetch') 155 | # This documented at https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.search 156 | # search_type='count' has been deprecated in ES 2.0 157 | self.search.aggs.bucket('TEST', 'terms', field=field) 158 | for hit in self.search.execute(ignore_cache=True).aggregations['TEST']['buckets']: 159 | ret[hit['key']] = hit['doc_count'] 160 | self.search = search 161 | return ret 162 | 163 | def get_relevant_ids(self, fields, percentage=0, minimum_occurrences=0): 164 | """ This function is supposed to return the id that are reparteed/present on the `fields`. 165 | 166 | :param list of str fields: 167 | :param float percentage: 168 | :param float minimum_occurrences: 169 | :return set of int: 170 | """ 171 | minimum_occurences = minimum_occurrences or self.minimum_occurrences 172 | percentage = percentage or self.percentage 173 | 174 | ret = set() 175 | search = self.search 176 | ids = set(i['id'] for i in self.search.execute()) # get all possible ID 177 | self.search = search 178 | 179 | for _id in ids: 180 | search = self.search 181 | 182 | self.add_filters({'id': _id}) 183 | 184 | # Get how many different fields there are for a given `id` 185 | data = collections.defaultdict(set) 186 | fields_counter = collections.defaultdict(int) 187 | for res in self.search.execute(): 188 | for field in fields: 189 | if res[field] not in data[field]: 190 | fields_counter[field] += 1.0 191 | data[field].add(res[field]) 192 | 193 | # Ignore id that are present on less than 10% of different values of each fields 194 | for field, content in data.items(): 195 | if len(content) < minimum_occurrences: 196 | logging.debug('Discarding id \033[32m%s\033[0m only present %d times.', _id, len(content)) 197 | continue 198 | _percentage = len(content) / fields_counter[field] * 100.0 199 | if _percentage > percentage: 200 | continue 201 | logging.debug('Discarding id \033[32m%s\033[0m present in %d%% of different values of the \033[32m%s\033[0m field', _id, _percentage, field) 202 | break 203 | else: 204 | ret.add(_id) 205 | self.search = search 206 | 207 | return ret 208 | 209 | def reset_filters(self): 210 | self.search = Search(using=self.client, index=self.index).extra(size=10000) 211 | 212 | def get_results(self): 213 | """ 214 | Return a `Result` object obtained from the execution of the search `self.search`. 215 | :return Result: The `Result` object obtained from the execution of the search `self.search`. 216 | """ 217 | search = self.search 218 | result = self.search.scan() 219 | self.search = search 220 | return result 221 | 222 | def commit(self): 223 | """Process list of dict (yes) and push them to DB """ 224 | self.total_objs += len(self.nlist) 225 | count = 0 226 | 227 | def gen_events(events): 228 | dicts = list() 229 | for d in events: 230 | dicts.extend([{'index': {'_index': 'nxapi', '_type': 'events'}}, d.to_dict()]) 231 | yield dicts.pop(-2) 232 | yield dicts.pop(-1) 233 | 234 | 235 | events = list() 236 | for entry in self.nlist: 237 | event = Event(_index=self.index) 238 | for key, value in entry.items(): 239 | setattr(event, key, value) 240 | 241 | event.whitelisted = False 242 | event.comments = "import on"+str(datetime.datetime.now()) 243 | events.append(event) 244 | count += 1 245 | 246 | try: 247 | ret = self.client.bulk(gen_events(events)) 248 | ## ToDo parse ret to selectively loop over events to events.save() whatever happens 249 | except TransportError as e: 250 | logging.warning("We encountered an error trying to continue.") 251 | for event in events: 252 | event.save(using=self.client) 253 | ## ToDo find a way to change the hardcoded 'events' for ES doctype 254 | ## elasticsearch_dsl Issue 689 255 | 256 | self.total_commits += count 257 | logging.debug("Written "+str(self.total_commits)+" events") 258 | del self.nlist[0:len(self.nlist)] 259 | 260 | -------------------------------------------------------------------------------- /tests/test_flat_files.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from nxtool.log_providers import flat_file 4 | from nxtool.whitelists_generators import cookies, images_1002, zone_wide, zone_var_wide, url_wide, site_wide_id 5 | from nxtool.whitelists_generators import google_analytics, zone_var_wide_url, array_like_variables_names 6 | 7 | 8 | class TestFlatFiles(unittest.TestCase): 9 | def test_get_relevant_ids(self): 10 | parser = flat_file.FlatFile('./tests/data/cookies.txt') 11 | self.assertEquals(parser.get_relevant_ids(['zone', ], 1, 1), [42000227]) 12 | self.assertEquals(parser.get_relevant_ids(['zone', ], 1, 100), []) 13 | 14 | 15 | class TestParseLog(unittest.TestCase): 16 | maxDiff = None 17 | def test_show_stats(self): 18 | parser = flat_file.FlatFile('tests/data/logs.txt') 19 | parser.get_statistics() 20 | 21 | def test_generate_whitelist_cookies(self): 22 | parser = flat_file.FlatFile('./tests/data/cookies.txt') 23 | parser.get_relevant_ids = lambda x: [42000227] 24 | self.assertEqual(cookies.generate_whitelist(parser, []), [{'wl': [42000227], 'mz':['$HEADERS_VAR:cookie'], 25 | 'msg': 'Cookies that matches a id 42000227'}]) 26 | 27 | self.assertEqual(cookies.generate_whitelist(parser, [{'id':1234}]), [{'wl': [42000227], 'mz':['$HEADERS_VAR:cookie'], 28 | 'msg': 'Cookies that matches a id 42000227'}]) 29 | def test_generate_whitelist_images(self): 30 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 31 | self.assertEqual( 32 | images_1002.generate_whitelist(parser, []), 33 | [{'mz': ['$URL_X:^/phpMyAdmin-2.8.2/scripts/setup.php|URL'], 'wl': [1002], 'msg': 'Images size (0x)'}] 34 | ) 35 | self.assertEqual(images_1002.generate_whitelist(parser, [{'wl': [1002]}]), []) 36 | 37 | def test_generate_whitelist_zone_wide(self): 38 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 39 | parser.get_top = lambda x: {1337: 2048, '': 1337} if x =='id' else {'ARGS': 2048} 40 | parser.get_relevant_ids = lambda x: [1337] 41 | self.assertEqual(zone_wide.generate_whitelist(parser, []), 42 | [{'msg': 'zone-wide ID whitelist if it matches a id 1337', 'mz': ['ARGS'], 'wl': {1337}}]) 43 | 44 | parser.get_relevant_ids = lambda x: [] 45 | self.assertEqual(zone_wide.generate_whitelist(parser, [{'id':1337}]), []) 46 | 47 | parser.get_top = lambda x: {1337: 2} if x =='id' else {'ARGS': 2} 48 | self.assertEqual(zone_wide.generate_whitelist(parser, []), []) 49 | 50 | def test_generate_whitelist_zone_var_wide(self): 51 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 52 | parser.get_relevant_ids = lambda x: [1337] 53 | parser.get_top = lambda x: {'test_var_name': 2048, '':123, 'super-test':12} 54 | expected = [ 55 | {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['BODY:test_var_name'], 'wl': [1337]}, 56 | {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['ARGS:test_var_name|NAME'], 'wl': [1337]}, 57 | {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['ARGS:test_var_name'], 'wl': [1337]}, 58 | {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['BODY:test_var_name|NAME'], 'wl': [1337]} 59 | ] 60 | try: 61 | self.assertCountEqual(zone_var_wide.generate_whitelist(parser, []), expected) 62 | except AttributeError: # Python2/3 fuckery 63 | self.assertItemsEqual(zone_var_wide.generate_whitelist(parser, []), expected) 64 | 65 | parser.get_top = lambda x: {'test_var_name': 0} 66 | try: 67 | self.assertCountEqual(zone_var_wide.generate_whitelist(parser, [{'id':1, 'mz':'BODY'}]), []) 68 | except AttributeError: # Python2/3 fuckery 69 | self.assertItemsEqual(zone_var_wide.generate_whitelist(parser, [{'id':1, 'mz':'BODY'}]), []) 70 | 71 | 72 | def test_generate_whitelist_url_wide(self): 73 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 74 | parser.get_relevant_ids = lambda x: [1337] 75 | parser.get_top = lambda x: {'1337': 2048, '123': 2} 76 | expected = [{'msg': 'url-wide whitelist if it matches a id 1337', 'mz': ['$URL:1337'], 'wl': {'1337'}}, 77 | {'msg': 'url-wide whitelist if it matches a id 1337', 'mz': ['$URL:123'], 'wl': {'1337'}}] 78 | self.assertTrue(all(i in url_wide.generate_whitelist(parser, []) for i in expected)) 79 | 80 | parser.get_relevant_ids = lambda x: [] 81 | parser.get_top = lambda x: {} 82 | self.assertEqual(url_wide.generate_whitelist(parser, [{'id': 1337}]), []) 83 | 84 | def test_generate_whitelist_site_wide_id(self): 85 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 86 | parser.get_relevant_ids = lambda x, y: [1337] 87 | parser.get_top = lambda x: {'1337': 2048} 88 | self.assertEqual(site_wide_id.generate_whitelist(parser, []), 89 | [{'msg': 'Site-wide id+zone if it matches id 1337', 'mz': ['1337'], 'wl': [1337]}]) 90 | self.assertEqual(site_wide_id.generate_whitelist(parser, [{'id':1234}]), 91 | [{'msg': 'Site-wide id+zone if it matches id 1337', 'mz': ['1337'], 'wl': [1337]}]) 92 | 93 | def test_generate_whitelist_google_analytics(self): 94 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 95 | parser.get_relevant_ids = lambda x: [1337] 96 | parser.get_top = lambda x: {'1337': 2048} 97 | self.assertEqual(google_analytics.generate_whitelist(parser, []), 98 | [{'msg': 'Google analytics', 'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': [1337]}]) 99 | self.assertEqual(google_analytics.generate_whitelist(parser, [{'id':1234}]), 100 | [{'msg': 'Google analytics', 'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': [1337]}]) 101 | self.assertEqual(google_analytics.generate_whitelist(parser, [{'wl':1002}]), 102 | [{'msg': 'Google analytics', 'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': [1337]}]) 103 | 104 | def test_generate_whitelist_zone_var_wide_url(self): 105 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 106 | parser.get_relevant_ids = lambda x: [1337] 107 | parser.get_top = lambda x: {'1337': 2048, '': 123, 'test': 1} 108 | expected = [ 109 | { 110 | 'msg': 'Variable zone-wide on a specific url if it matches a id 1337', 111 | 'mz': ['$URL:1337|$BODY_VAR:1337'], 112 | 'wl': [1337] 113 | }, 114 | { 115 | 'msg': 'Variable zone-wide on a specific url if it matches a id 1337', 116 | 'mz': ['$URL:1337|ARGS:1337|NAME'], 117 | 'wl': [1337] 118 | }, 119 | {'msg': 'Variable zone-wide on a specific url if it matches a id 1337', 120 | 'mz': ['$URL:1337|$ARGS_VAR:1337'], 121 | 'wl': [1337] 122 | }, 123 | { 124 | 'msg': 'Variable zone-wide on a specific url if it matches a id 1337', 125 | 'mz': ['$URL:1337|BODY:1337|NAME'], 126 | 'wl': [1337] 127 | }] 128 | self.assertTrue(all(i in zone_var_wide_url.generate_whitelist(parser, [{'id':123}]) for i in expected)) 129 | 130 | def test_generate_whitelist_array_like_variables_names(self): 131 | parser = flat_file.FlatFile('./tests/data/images_1002.txt') 132 | parser.get_relevant_ids = lambda x: [1337] 133 | parser.get_top = lambda x: {'1337': 2048} 134 | self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), []) 135 | 136 | parser.get_relevant_ids = lambda x: [1310] 137 | parser.get_top = lambda x: {'test[1234]': 2048} 138 | self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), [ 139 | {'msg': 'Array-like variable name', 'mz': ['$BODY_VAR_X:^test\\[.+\\]$'], 'wl': [1310, 1311]}, 140 | {'msg': 'Array-like variable name', 'mz': ['$ARGS_VAR_X:^test\\[.+\\]$'],'wl': [1310, 1311]}] 141 | ) 142 | self.assertEqual(array_like_variables_names.generate_whitelist(parser, [{'id':3, 'mz':'BODY'}]), [ 143 | {'msg': 'Array-like variable name', 'mz': ['$BODY_VAR_X:^test\\[.+\\]$'], 'wl': [1310, 1311]}, 144 | {'msg': 'Array-like variable name', 'mz': ['$ARGS_VAR_X:^test\\[.+\\]$'],'wl': [1310, 1311]}] 145 | ) 146 | parser.get_top = lambda x: {'test[1234]]': 2048} 147 | self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), []) 148 | 149 | parser.get_top = lambda x: {'test[[1234]': 2048} 150 | self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), []) 151 | 152 | parser.get_top = lambda x: {'test[1234]': 1} 153 | self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), []) 154 | 155 | class TestFiltering(unittest.TestCase): 156 | def test_filter_str(self): 157 | parser = flat_file.FlatFile('./tests/data/cookies.txt') 158 | self.assertEquals([i for i in parser.get_results()][0], {'block': '0', 159 | 'cscore0': '$UWA', 'id': '42000227', 'ip': 'X.X.X.X', 'learning': '0', 'score0': '8', 160 | 'server': 'Y.Y.Y.Y', 'total_blocked': '204', 'total_processed': '472', 161 | 'uri': '/phpMyAdmin-2.8.2/scripts/setup.php', 'var_name': 'cookie', 'vers': '0.52', 'zone': 'HEADERS', 162 | 'coords': None, 'date': '20131110T07:36:19'} 163 | ) 164 | 165 | def test_filter_list(self): 166 | parser = flat_file.FlatFile('./tests/data/cookies.txt') 167 | parser.add_filters({'ip': ['X.X.X.X', 'A.A.A.A']}) 168 | self.assertEquals([i for i in parser.get_results()], [{'block': '0', 169 | 'cscore0': '$UWA', 'id': '42000227', 'ip': 'X.X.X.X', 'learning': '0', 'score0': '8', 170 | 'server': 'Y.Y.Y.Y', 'total_blocked': '204', 'total_processed': '472', 171 | 'uri': '/phpMyAdmin-2.8.2/scripts/setup.php', 'var_name': 'cookie', 'vers': '0.52', 'zone': 'HEADERS', 172 | 'coords': None, 'date': '20131110T07:36:19'}]) 173 | 174 | parser = flat_file.FlatFile('./tests/data/cookies.txt') 175 | parser.add_filters({'ip': ['A.A.A.A']}) 176 | self.assertEquals([i for i in parser.get_results()], []) 177 | 178 | parser = flat_file.FlatFile('./tests/data/cookies.txt') 179 | parser.add_filters({'ip': ['X.X.X.X']}, negative=True) 180 | self.assertEquals([i for i in parser.get_results()], []) 181 | --------------------------------------------------------------------------------