├── nxtool
    ├── __init__.py
    ├── whitelists_generators
    │   ├── __init__.py
    │   ├── google_analytics.py
    │   ├── cookies.py
    │   ├── site_wide_id.py
    │   ├── zone_wide.py
    │   ├── url_wide.py
    │   ├── zone_var_wide.py
    │   ├── array_like_variables_names.py
    │   ├── zone_var_wide_url.py
    │   └── images_1002.py
    ├── printers.py
    ├── typing.py
    └── log_providers
    │   ├── __init__.py
    │   ├── flat_file.py
    │   └── elastic.py
├── tests
    ├── __init__.py
    ├── test_typing.py
    ├── data
    │   ├── exlog.txt
    │   ├── images_1002.txt
    │   ├── cookies.txt
    │   └── logs.txt
    ├── test_elastic.py
    └── test_flat_files.py
├── config.cfg
├── requirements-v1.txt
├── requirements.txt
├── requirements-v2.txt
├── requirements-v5.txt
├── .travis.yml
├── nxtool.py
└── README.md


/nxtool/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config.cfg:
--------------------------------------------------------------------------------
1 | [elastic]
2 | host = 127.0.0.1:9200
3 | use_ssl = False
4 | index = nxapi
5 | version = 2


--------------------------------------------------------------------------------
/requirements-v1.txt:
--------------------------------------------------------------------------------
1 | elasticsearch-dsl<2.0.0
2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | elasticsearch-dsl,<6.0.0
2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi
3 | 


--------------------------------------------------------------------------------
/requirements-v2.txt:
--------------------------------------------------------------------------------
1 | elasticsearch-dsl>=2.0.0,<3.0.0
2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi
3 | 


--------------------------------------------------------------------------------
/requirements-v5.txt:
--------------------------------------------------------------------------------
1 | elasticsearch-dsl>=5.0.0,<6.0.0
2 | -e git+http://github.com/nbs-system/nxapi-ng/#egg=nxapi
3 | 


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/__init__.py:
--------------------------------------------------------------------------------
 1 | def modify_search(func):
 2 |     def wrapper(provider, wl):
 3 |         if not hasattr(provider, 'search'):
 4 |             return func(provider, wl)
 5 |         search = provider.search
 6 |         ret = func(provider, wl)
 7 |         provider.search = search
 8 |         return ret
 9 |     return wrapper
10 | 


--------------------------------------------------------------------------------
/tests/test_typing.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from nxtool import typing
 3 | from nxtool.log_providers import flat_file
 4 | 
 5 | 
 6 | class TestTyping(unittest.TestCase):
 7 |     def test_typing(self):
 8 |         parser = flat_file.FlatFile('./tests/data/exlog.txt')
 9 |         self.assertEquals([i for i in typing.typification(parser)], [['^\\d+$', 'integer', 'ARGS', 'a']])
10 | 
11 |         parser.get_results = lambda : [{'zone': "BODY", 'var_name': "pif"}, ]
12 |         self.assertFalse([i for i in typing.typification(parser)])


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/google_analytics.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from . import modify_search
 4 | 
 5 | 
 6 | @modify_search
 7 | def generate_whitelist(provider, whitelists):
 8 |     """
 9 |     Generate whitelists for Google Analytics cookies
10 |     :param provider:
11 |     :param list of dict whitelists:
12 |     :return list of dict:
13 |     """
14 |     logging.info('Generating \033[1mGoogle analytics\033[0m rules')
15 | 
16 |     provider.add_filters({'zone': 'ARGS', 'var_name': ['__utma', '__utmb', '__utmc', '__utmt', '__utmv', '__utmz']})
17 |     ids = provider.get_relevant_ids(['uri', 'ip'])
18 | 
19 |     ret = list()
20 |     if ids:
21 |         ret.append({'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': ids, 'msg': 'Google analytics'})
22 |     return ret
23 | 


--------------------------------------------------------------------------------
/tests/data/exlog.txt:
--------------------------------------------------------------------------------
1 | 2013/05/30 20:47:05 [debug] 10804#0:*1 NAXSI_EXLOG: ip=127.0.0.1&server=127.0.0.1&uri=/&id=1302&zone=ARGS&var_name=a&content=1234
2 | 2013/05/30 20:47:05 [debug] 10804#0:*1 NAXSI_EXLOG: ip=127.0.0.1&server=127.0.0.1&uri=/&id=1302&zone=ARGS&var_name=&content=1234
3 | 2013/05/30 20:47:05 [debug] 10804#0:*1 NAXSI_EXLOG: ip=127.0.0.1&server=127.0.0.1&uri=/&id=1302&zone=ARGS|NAME&var_name=abc&content=1234
4 | garbage line for teh lulz
5 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=127.0.0.1&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"


--------------------------------------------------------------------------------
/nxtool/printers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper functions to display data on stdout
 3 | """
 4 | 
 5 | 
 6 | def print_statistics(results):
 7 |     for key, value in results.items():
 8 |         print('# \033[100m%s\033[0m #' % key.upper())
 9 |         print('\n'.join('\033[32m%s\033[0m: \033[1m%s\033[0m' % (i, j) for i, j in value.items()) + '\n')
10 | 
11 | 
12 | def print_generic(results):
13 |     if hasattr(results, 'hits'):
14 |         it = results.hits
15 |     else:
16 |         it = results
17 |     for item in it:
18 |         print('\n'.join('%s: %s' % (k, item[k]) for k in item) + '\n')
19 | 
20 | 
21 | def print_typed_rules(results):
22 |     """
23 | 
24 |     :param list of str results:
25 |     :return:
26 |     """
27 |     print('\n\033[1mGenerated types:\033[0m')
28 |     for rule in results:
29 |         print('BasicRule negative "rx:%s" "msg:%s" "mz:%s:%s" "s:BLOCK";' % tuple(rule))
30 | 


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/cookies.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from nxapi.rules import get_description_core
 4 | from . import modify_search
 5 | 
 6 | 
 7 | @modify_search
 8 | def generate_whitelist(provider, whitelists):
 9 |     """
10 |     Generate whitelists for exceptions that are happening in the cookies.
11 | 
12 |     :param provider:
13 |     :param list whitelists:
14 |     :return list of dict:
15 |     """
16 |     logging.info('Generating \033[1mcookies\033[0m rules')
17 | 
18 |     provider.add_filters({'zone': 'HEADERS', 'var_name': 'cookie'})
19 | 
20 |     # We don't require the hits to be spread across users, because the cookie might be only available
21 |     # to admins for example. But since the browser sends it on every request, it should be
22 |     # spread amongst urls.
23 |     data = provider.get_relevant_ids(['uri'])
24 | 
25 |     ret = list()
26 |     for _id in data:
27 |         ret.append(
28 |             {
29 |                 'mz': ['$HEADERS_VAR:cookie'],
30 |                 'wl': [_id],
31 |                 'msg': 'Cookies that matches a %s' % get_description_core(_id)
32 |             }
33 |         )
34 |     return ret
35 | 


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/site_wide_id.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from nxapi.rules import get_description_core
 4 | from . import modify_search
 5 | 
 6 | 
 7 | @modify_search
 8 | def generate_whitelist(provider, whitelists):
 9 |     """
10 | 
11 |     :param log_provider.elastic provider:
12 |     :param list whitelists:
13 |     :return:
14 |     """
15 |     logging.info('Generating \033[1msite\033[0m rules')
16 |     zones = provider.get_top('zone')
17 | 
18 |     res = dict()
19 |     for zone in zones.keys():
20 |         logging.debug('Generating \033[1murl_wide_id\033[0m rules for \033[1m%s\033[0m', zone)
21 |         search = provider.export_search()
22 |         provider.add_filters({'zone': zone})
23 |         data = provider.get_relevant_ids(['zone'], 75)
24 |         provider.import_search(search)
25 | 
26 |         if data:
27 |             res[zone] = data
28 | 
29 |     ret = list()
30 |     for zone, _id in res.items():
31 |         ret.append({
32 |             'mz': [zone],
33 |             'wl': _id,
34 |             'msg': 'Site-wide id+zone if it matches %s' % ', or a '.join(map(get_description_core, _id))}
35 |         )
36 |     return ret
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/tests/data/images_1002.txt:
--------------------------------------------------------------------------------
1 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X"
2 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/script/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X"
3 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X"
4 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=URL&id0=1002&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/script/300x300_pouet.net.jpg HTTP/1.1", host: "X.X.X.X"


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/zone_wide.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import collections
 3 | 
 4 | from nxapi.rules import get_description_core
 5 | from . import modify_search
 6 | 
 7 | 
 8 | @modify_search
 9 | def generate_whitelist(provider, whitelists):
10 |     """
11 | 
12 |     :param log_provider.elastic provider:
13 |     :param list of dict whitelists:
14 |     :return:
15 |     """
16 |     logging.info('Generating \033[1mzone\033[0m rules')
17 |     zones = provider.get_top('zone')
18 | 
19 |     res = collections.defaultdict(set)
20 |     for zone in zones.keys():
21 |         logging.debug('Searching for id in the zone \033[1m%s\033[0m', zone)
22 |         provider.add_filters({'zone': zone})
23 |         data = provider.get_top('id')
24 | 
25 |         for id_name, nb in data.items():
26 |             if not id_name:
27 |                 continue
28 |             elif nb < 1000:
29 |                 logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', id_name, nb)
30 |                 continue
31 |             search = provider.export_search()
32 |             provider.add_filters({'id': id_name})
33 |             if int(id_name) in provider.get_relevant_ids(['ip']):
34 |                 res[zone].add(id_name)
35 |             provider.import_search(search)
36 | 
37 |     ret = list()
38 |     for zone, wid in res.items():
39 |         ret.append(
40 |             {
41 |                 'mz': ['%s' % (zone,)],
42 |                 'wl': wid,
43 |                 'msg': 'zone-wide ID whitelist if it matches a %s' % ', or a '.join(map(get_description_core, wid))}
44 |         )
45 |     return ret
46 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: trusty
 3 | sudo: false
 4 | 
 5 | cache: pip
 6 | 
 7 | python:
 8 |   - "2.7"
 9 |   - "3.3"
10 |   - "3.4"
11 |   - "3.5"
12 |   - "3.5-dev"
13 |   - "nightly"
14 |   
15 | addons:
16 |   apt:
17 |     packages:
18 |       - openjdk-8-jre
19 |     
20 | env:
21 |   matrix:
22 |     - ES_VERSION=5.4.1
23 |       DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
24 |     - ES_VERSION=5.3.3
25 |       DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
26 |     - ES_VERSION=2.4.5
27 |       DOWNLOAD_URL=https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/${ES_VERSION}/elasticsearch-${ES_VERSION}.tar.gz
28 |     - ES_VERSION=1.7.6
29 |       DOWNLOAD_URL=https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
30 |       
31 | install:
32 |   - mkdir /tmp/elasticsearch
33 |   - wget -O - ${DOWNLOAD_URL} | tar xz --directory=/tmp/elasticsearch --strip-components=1
34 |   - /tmp/elasticsearch/bin/elasticsearch -d
35 |   - sleep 10
36 |   - pip install git+https://github.com/elastic/elasticsearch-py.git#egg=elasticsearch
37 |   
38 | script:
39 |   - 'case "${ES_VERSION}" in
40 |     5.4.1|5.3.3)
41 |       pip install -r requirements-v5.txt || travis_terminate 1
42 |       ;;
43 |     2.4.5)
44 |       pip install -r requirements-v2.txt || travis_terminate 1
45 |       ;;
46 |     1.7.6)
47 |       pip install -r requirements-v1.txt || travis_terminate 1
48 |       ;;
49 |     *)
50 |       travis_terminate 1
51 |       ;;
52 |     esac'
53 |   - pip install coveralls nose2 nose2-cov
54 |   - coverage run --source=nxtool -m nose2.__main__ -v
55 | 
56 | after_success:
57 |   - coveralls
58 | 


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/url_wide.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import collections
 3 | 
 4 | from nxapi.rules import get_description_core
 5 | from . import modify_search
 6 | 
 7 | 
 8 | @modify_search
 9 | def generate_whitelist(provider, whitelists):
10 |     """
11 | 
12 |     :param provider:
13 |     :param list of dict whitelists:
14 |     :return list of dict:
15 |     """
16 |     logging.info('Generating \033[1murl\033[0m rules')
17 |     uris = provider.get_top('uri')
18 | 
19 |     res = collections.defaultdict(set)
20 |     for uri in uris.keys():
21 |         logging.debug('Searching for id in the uri \033[1m%s\033[0m', uri)
22 |         provider.add_filters({'uri': uri})
23 | 
24 |         search = provider.export_search()
25 |         provider.add_filters({'zone': 'URL'})
26 |         data = provider.get_top('id')
27 |         provider.import_search(search)
28 | 
29 |         for id_name, nb in data.items():
30 |             if not id_name:
31 |                 continue
32 |             elif nb < 1000:
33 |                 logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', id_name, nb)
34 |                 continue
35 |             else:
36 |                 logging.debug('\033[1mKeeping\033[0m the id \033[32m%s\033[0m (%d occurrences)', id_name, nb)
37 | 
38 |             search = provider.export_search()
39 |             provider.add_filters({'id': id_name})
40 |             if int(id_name) in provider.get_relevant_ids(['ip']):
41 |                 res[uri].add(id_name)
42 |             provider.import_search(search)
43 | 
44 |     ret = []
45 |     for uri, ids in res.items():
46 |         descriptions = ', or a '.join(map(get_description_core, ids))
47 |         ret.append({'mz': ['$URL:%s' % (uri,)], 'wl': ids,
48 |                     'msg': 'url-wide whitelist if it matches a %s' % descriptions})
49 |     return ret


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/zone_var_wide.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import collections
 3 | 
 4 | from nxapi.rules import get_description_core
 5 | from . import modify_search
 6 | 
 7 | 
 8 | @modify_search
 9 | def generate_whitelist(provider, whitelists):
10 |     """
11 | 
12 |     :param log_provider.elastic provider:
13 |     :return:
14 |     """
15 |     logging.info('Generating \033[1mvar + zone\033[0m rules')
16 |     res = collections.defaultdict(dict)
17 | 
18 |     for zone in ['ARGS', 'BODY', 'ARGS|NAME', 'BODY|NAME']:
19 |         logging.debug('Searching for arguments in the zone \033[1m%s\033[0m', zone)
20 |         provider.add_filters({'zone': zone})
21 |         data = provider.get_top('var_name')
22 | 
23 |         for var_name, nb in data.items():
24 |             if not var_name:
25 |                 continue
26 |             elif nb < 1000:
27 |                 logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', var_name, nb)
28 |                 continue
29 |             search = provider.export_search()
30 |             provider.add_filters({'var_name': var_name})
31 |             res[zone][var_name] = provider.get_relevant_ids(['ip'])  # every peer should have triggered the exception
32 |             provider.import_search(search)
33 | 
34 |     ret = list()
35 |     for zone, content in res.items():
36 |         for variable, ids in content.items():
37 |             if not ids:  # We don't care about empty sets
38 |                 continue
39 |             descriptions = ', or a '.join(map(get_description_core, ids))
40 |             if zone.endswith('|NAME'):
41 |                 mz = '%s:%s|%s' % (zone.split('|')[0], variable, 'NAME')
42 |             else:
43 |                 mz = '%s:%s' % (zone, variable)
44 |             ret.append({'mz': [mz], 'wl': ids, 'msg': 'Variable zone-wide if it matches a %s' % descriptions})
45 |     return ret
46 | 


--------------------------------------------------------------------------------
/nxtool/typing.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import collections
 3 | import logging
 4 | 
 5 | # Each regexp is (almost) a subset of the next one
 6 | REGEXPS = [
 7 |     [r'^$', 'empty'],
 8 |     [r'^[01]$', 'boolean'],
 9 |     [r'^\d+$', 'integer'],
10 |     [r'^#[0-9a-f]+$', 'colour'],  # '#' + hex
11 |     [r'^[0-9a-f]+$', 'hexadecimal'],
12 |     [r'^[0-9a-z]+$', 'alphanum'],
13 |     ['r^[./]?([0-9a-z]/)+[\w?+-=&/ ]+$', 'relative url'],
14 |     [r'^https?://([0-9a-z-.]+\.)+[\w?+-=&/ ]+$', 'absolute url'],  # like http://example.com?hello=1&id=3
15 |     [r'^\w+$', 'alphanumdash'],
16 |     [r'^[0-9a-z?&=+_-]+$', 'url parameter'],
17 |     [r'^[\w[] ,&=+-]+$', 'array'],
18 |     [r'^[' + r'\s\w' + r'!$%^&*()[]:;@~#?/.,' + r']+$', 'plaintext'],
19 |     [r'', 'none'],  # untypables parameters
20 | ]
21 | 
22 | 
23 | def typification(source):
24 |     # rules = {zone1: {var1:0, var2:0}, zone2: {var6:0, ...}, ...}
25 |     rules = collections.defaultdict(lambda: collections.defaultdict(int))
26 | 
27 |     # Compile regexp for speed
28 |     regexps = [re.compile(reg, re.IGNORECASE) for reg, _ in REGEXPS]
29 | 
30 |     for line in source.get_results():
31 |         line = {i: line[i] for i in line}  # because `Results` objects are weird, we prefer to manipulate `dict` instead
32 | 
33 |         # naxsi inverts the var_name and the content
34 |         # when a rule match on var_name
35 |         if line.get('zone', 'zone0').endswith('|NAME'):
36 |             continue
37 |         zone = line.get('zone', 'zone0')
38 | 
39 |         var_name = line.get('var_name', '')
40 |         if not var_name:  # No types for empty variable names
41 |             continue
42 | 
43 |         try:
44 |             content = line['content']
45 |         except KeyError as e:
46 |             logging.error('%s has no "content" (line %s): %s', var_name, line, e)
47 |             continue
48 | 
49 |         # Bump regexps until one matches
50 |         # Since every regexp is a subset of the next one,
51 |         # this works great.
52 |         while not regexps[rules[zone][var_name]].match(content):
53 |             rules[zone][var_name] += 1
54 | 
55 |     for zone, zone_data in rules.items():
56 |         for var_name, index in zone_data.items():
57 |             if index < len(REGEXPS) - 1:  # Don't return untyped things
58 |                 yield [REGEXPS[index][0], REGEXPS[index][1], zone, var_name]
59 | 


--------------------------------------------------------------------------------
/nxtool/log_providers/__init__.py:
--------------------------------------------------------------------------------
 1 | class LogProvider(object):
 2 |     def __init__(self, auto_commit_limit=400):
 3 |         self.nlist = list()
 4 |         self.auto_commit = auto_commit_limit
 5 |         self.total_objs = 0
 6 |         self.total_commits = 0
 7 |         
 8 |     def add_filters(self, filters, regexp=False, negative=False):
 9 |         """
10 |         :param dict filters: What fields/values do we want to filter on?
11 |         :param bool regexp: Treat `filters` as regexp ?
12 |         :param bool negative: Shall the match be negative ?
13 |         """
14 |         raise NotImplementedError
15 | 
16 |     def get_results(self):
17 |         raise NotImplementedError
18 | 
19 |     def get_statistics(self):
20 |         """ Get some events statistics
21 | 
22 |         :return a dict of dict of int:
23 |         """
24 |         ret = dict()
25 |         for field in ['uri', 'server', 'ip', 'zone']:
26 |             ret[field] = self.get_top(field)
27 |         return ret
28 | 
29 |     def get_top(self, field, size=250):
30 |         """ Get the top values on a given `field`.
31 | 
32 |         :param str field: On what field we want to filter
33 |         :param int size: On how much data do we want to process
34 |         :return dict of str: {field: nb_occurrences, ..}
35 |         """
36 |         raise NotImplementedError
37 | 
38 |     def get_relevant_ids(self, fields, percentage=10.0, minimum_occurrences=250):
39 |         """ This function is supposed to return the id that are reparteed/present on the `fields`.
40 | 
41 |         :param str fields:
42 |         :param float percentage:
43 |         :param int minimum_occurrences:
44 |         :return set:
45 |         """
46 |         raise NotImplementedError
47 | 
48 |     def insert(self, obj):
49 |         """ This function adds the object obj to the instance of LogProvider.
50 |         :param obj: object to add to this instance
51 |         :return bool: Success ?
52 |         """
53 |         self.nlist.extend(obj)
54 |         if self.auto_commit > 0 and len(self.nlist) > self.auto_commit:
55 |             return self.commit()
56 |         return True
57 | 
58 |     def commit(self):
59 |         """ This function commits pendants objects in the LogProvider instance
60 | 
61 |         """
62 |         raise NotImplementedError
63 | 
64 |     def stop(self):
65 |         self.commit()
66 |     
67 | 


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/array_like_variables_names.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from . import modify_search
 4 | 
 5 | 
 6 | def __check_and_strip_brackets(string):
 7 |     """ Return `False` if the `string` variable hasn't the same number of opening and closing brackets,
 8 |     else, return the variable without the trailing brackets: `pouet[1[2]3]` -> `pouet`
 9 | 
10 |     :param str string:
11 |     :return bool|int:
12 |     """
13 |     if sum(1 if char == '[' else -1 if char == ']' else 0 for char in string) != 0:
14 |         return False
15 |     try:
16 |         return string[:string.index('[')]
17 |     except ValueError:  # no '[' nor ']' in the `string`
18 |         return False
19 | 
20 | 
21 | @modify_search
22 | def generate_whitelist(provider, whitelists):
23 |     """
24 |     Generate whitelists for variables that look like an array, eg. `pouet[1]`, `pouet[1[2]3]`, or `pouet[1][2]`.
25 | 
26 |     :param provider: The data provider
27 |     :param list whitelists: Already generated rules, acting as a whitelist
28 |     :return list of dict: The generated whitelists
29 |     """
30 |     ids = [1310, 1311]  # [ and ]
31 | 
32 |     logging.info('Generating \033[1marray-like variable name\033[0m rules')
33 | 
34 |     provider.add_filters({'zone': ['ARGS|NAME', 'BODY|NAME'], 'id': ids})
35 |     variables = provider.get_top('var_name')
36 | 
37 |     ret = list()  # we can't use a `set` for `ret`, because we're using `dict` with it, and they're unhashable.
38 |     stripped_names = set()  # so we don't add duplicate rules
39 |     for var_name, nb in variables.items():
40 |         if nb < 1000:
41 |             logging.debug('Discarding the variable \033[32m%s\033[0m (%d occurrences)', var_name, nb)
42 |             continue
43 | 
44 |         stripped_name = __check_and_strip_brackets(var_name)
45 |         if not stripped_name:
46 |             logging.debug('The variable \033[32m%s\033[0m does not have an expected form', var_name)
47 |             continue
48 | 
49 |         if stripped_name not in stripped_names:
50 |             stripped_names.add(stripped_name)
51 |             ret.append({
52 |                 'mz': ['$BODY_VAR_X:^%s\[.+\]$' % stripped_name],
53 |                 'wl': ids, 'msg': 'Array-like variable name'})
54 |             ret.append({
55 |                 'mz': ['$ARGS_VAR_X:^%s\[.+\]$' % stripped_name],
56 |                 'wl': ids, 'msg': 'Array-like variable name'})
57 |     return ret
58 | 


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/zone_var_wide_url.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import collections
 3 | 
 4 | from nxapi.rules import get_description_core
 5 | from . import modify_search
 6 | 
 7 | 
 8 | @modify_search
 9 | def generate_whitelist(provider, whitelists):
10 |     """
11 |     Generate rules for a specific variable, in a specific zone, on a specific url.
12 |     :param log_provider.elastic provider:
13 |     :return:
14 |     """
15 |     logging.info('Generating \033[1mvar + zone + url\033[0m rules')
16 |     res = collections.defaultdict(lambda: collections.defaultdict(dict))
17 | 
18 |     for uri in provider.get_top('uri').keys():
19 |         _search = provider.export_search()
20 |         provider.add_filters({'uri': uri})
21 | 
22 |         for zone in ['ARGS', 'BODY', 'ARGS|NAME', 'BODY|NAME']:
23 |             logging.debug('Searching for arguments in the zone \033[1m%s\033[0m on the url \033[1m%s\033[0m', zone, uri)
24 | 
25 |             provider.add_filters({'zone': zone})
26 |             data = provider.get_top('var_name')
27 | 
28 |             for var_name, nb in data.items():
29 |                 if not var_name:
30 |                     continue
31 |                 elif nb < 1000:
32 |                     logging.debug('Discarding the argument \033[32m%s\033[0m (%d occurrences)', var_name, nb)
33 |                     continue
34 |                 search = provider.export_search()
35 |                 provider.add_filters({'var_name': var_name})
36 |                 res[uri][zone][var_name] = provider.get_relevant_ids(['ip'])
37 |                 provider.import_search(search)
38 | 
39 |         provider.import_search(_search)
40 | 
41 |     ret = list()
42 |     for uri, content in res.items():
43 |         for zone, _content in content.items():
44 |             if not _content:  # We don't care about empty sets
45 |                 continue
46 |             for var_name, __ids in _content.items():
47 |                 if not __ids:
48 |                     continue
49 |                 descriptions = ', or a '.join(map(get_description_core, __ids))
50 |                 if zone.endswith('|NAME'):
51 |                     mz = '%s:%s|%s' % (zone.split('|')[0], var_name, 'NAME')
52 |                 else:
53 |                     mz = '$%s_VAR:%s' % (zone, var_name)
54 |                 ret.append(
55 |                     {'mz': ['$URL:%s|%s' % (uri, mz)], 'wl': __ids,
56 |                      'msg': 'Variable zone-wide on a specific url if it matches a %s' % descriptions}
57 |                 )
58 |     return ret
59 | 


--------------------------------------------------------------------------------
/tests/data/cookies.txt:
--------------------------------------------------------------------------------
1 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
2 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
3 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
4 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
5 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
6 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
7 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
8 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=cookie, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"


--------------------------------------------------------------------------------
/nxtool/whitelists_generators/images_1002.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import collections
 5 | 
 6 | try:
 7 |     from itertools import zip_longest as izip_longest
 8 | except ImportError:  # python 2
 9 |     from itertools import izip_longest
10 | 
11 | from . import modify_search
12 | 
13 | 
14 | def __guess_prefixes(strings):
15 |     """ Get the list of the most common prefixes for `strings`.
16 |     Careful, this function is a bit fucked up, with stupid complexity,
17 |     but since our dataset is small, who cares?
18 | 
19 |     :param list of list of str strings: [['wp-content', '10'], ['pouet', pif']]
20 |     :return dict: {url1:nb_url1, url2: nb_url2, ...}
21 |     """
22 |     if len(strings) == 1:
23 |         return [('/' + os.path.join(*strings[0]), 1), ]
24 | 
25 |     threshold = len(strings)
26 |     prefix, prefixes = [], []
27 |     for chars in izip_longest(*strings, fillvalue=''):
28 |         char, count = collections.Counter(chars).most_common(1)[0]
29 |         if count == 1:
30 |             break
31 |         elif count < threshold:
32 |             if prefix:
33 |                 prefixes.append(('/' + ''.join(prefix), threshold))
34 |             threshold = count
35 |         prefix.append(char)
36 |     if prefix:
37 |         prefixes.append(('/' + ''.join(prefix), threshold))
38 |     return prefixes
39 | 
40 | 
41 | @modify_search
42 | def generate_whitelist(provider, whitelists):
43 |     logging.info('Generating \033[1mImage 1002\033[0m rules')
44 | 
45 |     provider.add_filters({'zone': 'URL', 'id': '1002'})
46 | 
47 |     uris = provider.get_top('uri')
48 |     if not uris:
49 |         return []
50 | 
51 |     # Filter already whitelisted things
52 |     already_whitelisted_uri = set()
53 |     for r in whitelists:
54 |         if 1002 in r['wl']:
55 |             if 'mz' not in r:
56 |                 already_whitelisted_uri = already_whitelisted_uri.union('/')
57 |                 break
58 |             elif 'URL' in r['mz']:
59 |                 already_whitelisted_uri = already_whitelisted_uri.union(r['mz'])
60 | 
61 |     res = dict()
62 |     for uri, nb in uris.items():
63 |         if not any(uri.startswith(i) for i in already_whitelisted_uri):
64 |             res[uri] = nb
65 | 
66 |     if not res:
67 |         return []
68 | 
69 |     prefixes = __guess_prefixes([a.split('/')[1:] for a in res.keys()])
70 | 
71 |     # We multiply the number of common paths between url with the number
72 |     # of times the url has been triggered by an exception.
73 |     best_path = collections.defaultdict(int)
74 |     for pre, nb_pre in prefixes:
75 |         for uri, nb in res.items():
76 |             if uri.startswith(pre):
77 |                 best_path[pre] += int(nb) * nb_pre
78 | 
79 |     rules = []
80 |     for url, nb in best_path.items():
81 |         logging.info('The url \033[32m%s\033[0m triggered %d exceptions for the rule 1002, whitelisting it.', url, nb)
82 |         rules.append({'wl': [1002], 'mz': ['$URL_X:^%s|URL' % url], 'msg': 'Images size (0x)'})
83 |     return rules
84 | 


--------------------------------------------------------------------------------
/tests/test_elastic.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import time
 3 | 
 4 | from nxtool.log_providers import elastic
 5 | from nxtool.log_providers import flat_file
 6 | 
 7 | class TestElastic(unittest.TestCase):
 8 |     maxDiff = None
 9 |     def test_export(self):
10 |         parser = elastic.Elastic()
11 |         search = parser.export_search()
12 |         parser.import_search(parser.export_search())
13 | 
14 |         self.assertEqual(search, parser.export_search())
15 | 
16 |     def test_add_filters(self):
17 |         parser = elastic.Elastic()
18 |         parser.add_filters({'pif': 'paf'})
19 |         self.assertEqual(parser.get_filters(), {'query': {'multi_match': {'query': 'paf', 'fields': ['pif']}}, 'size': 10000})
20 | 
21 |         parser = elastic.Elastic()
22 |         parser.add_filters({'pif': ['paf', 'pouf']})
23 |         self.assertEqual(parser.get_filters(),
24 |                           {'query': {'bool': {'must': [{'bool': {'should':
25 |                               [
26 |                                   {'multi_match': {'fields': ['pif'], 'query': 'paf'}},
27 |                                   {'multi_match': {'fields': ['pif'],'query': 'pouf'}}
28 |                               ]
29 |                           }}]}},'size': 10000})
30 | 
31 |         parser = elastic.Elastic()
32 |         parser.add_filters({'pif': []})
33 |         self.assertEqual(parser.get_filters(), {'query': {'match_all': {}}, 'size': 10000})
34 | 
35 |         parser = elastic.Elastic()
36 |         parser.add_filters({'pif': []}, negative=True)
37 |         self.assertEqual(parser.get_filters(), {'query': {'match_all': {}}, 'size': 10000})
38 | 
39 |         parser = elastic.Elastic()
40 |         parser.add_filters({'pif': set()}, negative=True)
41 |         self.assertEqual(parser.get_filters(), {'query': {'match_all': {}}, 'size': 10000})
42 | 
43 | 
44 |         parser = elastic.Elastic()
45 |         parser.add_filters({'pif': [1,]}, negative=True)
46 |         self.assertEqual(parser.get_filters(),  {'query': {'bool': {'must_not': [
47 |             {'multi_match': {'fields': ['pif'],'query': 1}}]}}, 'size': 10000})
48 | 
49 |         parser = elastic.Elastic()
50 |         parser.add_filters({'pif': 'paf'}, negative=True)
51 |         self.assertEqual(parser.get_filters(), {'query': {'bool':
52 |                                                               {'must_not':
53 |                                                                    [
54 |                                                                        {'multi_match': {'fields': ['pif'], 'query': 'paf'}}
55 |                                                                    ]}},
56 |                                                 'size': 10000})
57 | 
58 |     def test_reset_filters(self):
59 |         parser = elastic.Elastic()
60 |         search = parser.get_filters()
61 |         parser.add_filters({'pif': 'paf'})
62 |         parser.reset_filters()
63 |         self.assertEqual(parser.get_filters(), search)
64 | 
65 |     def test_get_results(self):
66 |         parser = elastic.Elastic()
67 |         parser.search.scan = lambda: None
68 |         parser.add_filters({'pif': 'paf'})
69 |         filters = parser.get_filters()
70 |         parser.get_results()
71 |         self.assertEqual(parser.get_filters(), filters)
72 | 
73 | 
74 | class TestElasticImport(unittest.TestCase):
75 | 
76 | 
77 |     def test_elastic_import(self):
78 |         dest = elastic.Elastic()
79 |         source = flat_file.FlatFile('./tests/data/exlog.txt')
80 |         for log in source.logs:
81 |             dest.insert([log])
82 |         dest.stop()
83 |         dest.initialize_search()
84 |         dest.minimum_occurences = 0
85 |         dest.percentage = 0
86 |         time.sleep(5)
87 |         self.assertEqual(dest.get_relevant_ids(['id']), {u'1302', u'42000227'})
88 |         self.assertEqual(dest.get_top('id'), {1302: 3, 42000227: 1})
89 |         self.assertEqual(dest.get_top('uri'),{u'/': 3, u'/phpMyAdmin-2.8.2/scripts/setup.php': 1})
90 |         dest.client.indices.delete(index=dest.index, ignore=[400, 404])
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/nxtool.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | import logging
  4 | 
  5 | from nxapi import whitelist as nxapi_whitelist
  6 | 
  7 | from nxtool import printers, typing
  8 | from nxtool.log_providers import flat_file
  9 | from nxtool.whitelists_generators import cookies, images_1002, google_analytics, zone_var_wide, site_wide_id, zone_wide
 10 | from nxtool.whitelists_generators import url_wide, array_like_variables_names, zone_var_wide_url
 11 | 
 12 | import urllib3
 13 | urllib3.disable_warnings()
 14 | 
 15 | WL_MODULES = [google_analytics, images_1002, array_like_variables_names, cookies, zone_var_wide_url, url_wide,
 16 |               zone_var_wide, zone_wide, site_wide_id]
 17 | 
 18 | try:
 19 |     from nxtool.log_providers import elastic
 20 |     elastic_imported = True
 21 | except ImportError:
 22 |     print('Unable to correctly import the elastic material. Did you forget to install elasticsearch-dsl?')
 23 |     elastic_imported = False
 24 | 
 25 | 
 26 | def __whitelist_from_rules(source, rules):
 27 |     """
 28 |     :param source:
 29 |     :param dict rules:
 30 |     """
 31 |     for rule in rules:
 32 |         whitelist = {'id': rule.get('wl', '*')}
 33 |         for matchzone in rule.get('mz', '*')[0].split('|'):
 34 |             try:
 35 |                 zone, value = matchzone.split(':')
 36 |             except ValueError:  # no ':', it's a simple matchzone
 37 |                 whitelist['zone'] = matchzone
 38 |                 continue
 39 | 
 40 |             if zone == '$URL':
 41 |                 whitelist['url'] = value
 42 |             elif zone.startswith('$') and zone.endswith('_VAR'):  # stuff like `$ARGS_VAR:variable_name`
 43 |                 whitelist['zone'] = zone[1:-4]
 44 |                 whitelist['var_name'] = value
 45 | 
 46 |         source.add_filters(whitelist, regexp=False)
 47 | 
 48 | 
 49 | def __filter(source, filters, regexp=False, hostname=''):
 50 |     _filter = {}
 51 | 
 52 |     if filters:
 53 |         for param in filters.split(','):
 54 |             try:
 55 |                 key, value = param.split('=')
 56 |             except ValueError:
 57 |                 print('Parameters should be of the form key=value,key2=value2,...')
 58 |                 return
 59 |             _filter[key] = value
 60 | 
 61 |     if hostname:
 62 |         _filter['server'] = hostname
 63 | 
 64 |     source.add_filters(_filter, regexp)
 65 | 
 66 | 
 67 | def __create_argparser():
 68 |     """ Create a simple arguments parser. """
 69 |     parser = argparse.ArgumentParser(description='Sweet tool to help you managing your naxsi logs.')
 70 | 
 71 |     parser.add_argument('hostname', action='store', nargs='?')
 72 |     parser.add_argument('-v', '--verbose', action='store_true')
 73 | 
 74 |     log_sources = parser.add_argument_group('Log sources')
 75 |     log_sources.add_argument('--elastic-source', action='store_true')
 76 |     log_sources.add_argument('--flat-file', type=str)
 77 |     log_sources.add_argument('--stdin', action='store_true')
 78 | 
 79 |     log_destinations = parser.add_argument_group('Log destinations')
 80 |     log_destinations.add_argument('--elastic-dest', action='store_true')
 81 | 
 82 |     actions = parser.add_argument_group('Actions')
 83 |     actions.add_argument('--typing', action='store_true')
 84 |     actions.add_argument('--whitelist', action='store_true')
 85 |     actions.add_argument('--slack', action='store_true')
 86 |     actions.add_argument('--filter', action='store')
 87 |     actions.add_argument('--filter-regexp', action='store')
 88 |     actions.add_argument('--stats', action='store_true')
 89 | 
 90 |     return parser.parse_args()
 91 | 
 92 | 
 93 | def main():
 94 |     args = __create_argparser()
 95 | 
 96 |     logging.getLogger("elasticsearch").setLevel(logging.ERROR)
 97 |     logging.getLogger("urllib3").setLevel(logging.ERROR)
 98 |     if args.verbose:
 99 |         logging.basicConfig(level=logging.DEBUG, format= '%(message)s')
100 |     else:
101 |         logging.basicConfig(level=logging.INFO, format='[+] %(message)s')
102 | 
103 |     if args.elastic_source is True:
104 |         if elastic_imported is False:
105 |             print('You asked for an elastic source, but you do not have the required dependencies.')
106 |             return
107 |         source = elastic.Elastic()
108 |     elif args.flat_file:
109 |         source = flat_file.FlatFile(args.flat_file)
110 |     elif args.stdin is True:
111 |         source = flat_file.FlatFile()
112 |     else:
113 |         print('Please give me a valid source (or try to relaunch me with `-h` if you are lost).')
114 |         return 1
115 | 
116 |     if args.slack and not args.whitelist:
117 |         print('You asked for loosen constraints on whitelist generation but you did''nt ask for whitelist generation.')
118 |         return
119 |     
120 |     # Filtering can be used for any operation
121 |     __filter(source, args.filter, regexp=False, hostname=args.hostname)
122 |     if args.filter_regexp:
123 |         __filter(source, args.filter_regexp, regexp=True, hostname=args.hostname)
124 | 
125 |     if args.elastic_dest:
126 |         destination = elastic.Elastic()
127 |         for log in source.logs:
128 |             destination.insert([log])
129 |         destination.stop()
130 |     elif args.stats:
131 |         printers.print_statistics(source.get_statistics())
132 |     elif args.whitelist:
133 |         whitelist = list()
134 |         for module in WL_MODULES:
135 |             if args.slack:
136 |                 source.minimum_occurences = 0
137 |             rules = module.generate_whitelist(source, whitelist)
138 |             whitelist.extend(rules)
139 |             __whitelist_from_rules(source, rules)
140 |         if whitelist:
141 |             print('\n\033[1mGenerated whitelists:\033[0m')
142 |             print('\t' + ';\n\t'.join(map(nxapi_whitelist.dict_to_str,  whitelist)) + ';')
143 |         else:
144 |             print('\n\033[1mnxtool was not able to generate meaningful whitelist\033[0m')
145 |     elif args.typing:
146 |         printers.print_typed_rules(typing.typification(source))
147 |     else:
148 |         print(printers.print_generic(source.get_results()))
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     sys.exit(main())
153 | 


--------------------------------------------------------------------------------
/tests/data/logs.txt:
--------------------------------------------------------------------------------
 1 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 2 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 3 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 4 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 5 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 6 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 7 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=Y.Y.Y.Y&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 8 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
 9 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
10 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
11 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
12 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
13 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
14 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
15 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
16 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"
17 | 2013/11/10 07:36:19 [error] 8278#0: *5932 NAXSI_FMT: ip=X.X.X.X&server=1.2.3.4&uri=/phpMyAdmin-2.8.2/scripts/setup.php&learning=0&vers=0.52&total_processed=472&total_blocked=204&block=0&cscore0=$UWA&score0=8&zone0=HEADERS&id0=42000227&var_name0=user-agent, client: X.X.X.X, server: blog.memze.ro, request: "GET /phpMyAdmin-2.8.2/scripts/setup.php HTTP/1.1", host: "X.X.X.X"


--------------------------------------------------------------------------------
/nxtool/log_providers/flat_file.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import fileinput
  3 | import mimetypes
  4 | import zipfile
  5 | import tarfile
  6 | import re
  7 | import logging
  8 | 
  9 | from nxapi.nxlog import parse_nxlog
 10 | 
 11 | from nxtool.log_providers import LogProvider
 12 | 
 13 | 
 14 | class FlatFile(LogProvider):
 15 |     def __init__(self, fname=None):
 16 |         super(LogProvider, self)
 17 | 
 18 |         self.minimum_occurrences = 250
 19 |         self.percentage = 10
 20 |         self.logs = list()
 21 |         self.filters = collections.defaultdict(list)
 22 |         self.negative_filters = collections.defaultdict(list)
 23 |         self.filters_regexp = collections.defaultdict(list)
 24 |         self.negative_filters_regexp = collections.defaultdict(list)
 25 |         self.fname = fname
 26 | 
 27 |         try:
 28 |             ftype = mimetypes.guess_all_extensions(self.fname)[0]
 29 |         except AttributeError:  # `self.fname` is None
 30 |             self.__transform_logs(fileinput.input("-"))
 31 |         except IndexError:  # `self.fname` has no guessable mimtype
 32 |             self.__transform_logs(fileinput.input(self.fname))
 33 |         else:
 34 |             if ftype == 'application/zip':  # zip file!
 35 |                 with zipfile.ZipFile(self.fname) as f:
 36 |                     for name in f.namelist():
 37 |                         self.__transform_logs(f.read(name))
 38 |             elif ftype == 'application/tar':  # tar file!
 39 |                 with tarfile.open(self.fname) as f:
 40 |                     for name in f.namelist():
 41 |                         self.__transform_logs(f.read(name))
 42 | 
 43 |     def export_search(self):
 44 |         return self.filters, self.negative_filters, self.filters_regexp, self.negative_filters_regexp
 45 | 
 46 |     def import_search(self, search):
 47 |         self.filters, self.negative_filters, self.filters_regexp, self.negative_filters_regexp = search
 48 | 
 49 |     def __transform_logs(self, it):
 50 |         for line in it:
 51 |             error, log = parse_nxlog(line)
 52 |             if error:
 53 |                 logging.error('%s while parsing %s', error, line)
 54 |             if log:
 55 |                 self.logs.extend(log)
 56 | 
 57 |     def get_top(self, field, size=250):
 58 |         ret = dict()
 59 |         values = (log[field] for log in self.__get_filtered_logs())
 60 |         for key, value in collections.Counter(values).most_common(10):
 61 |             ret[key] = value
 62 |         return ret
 63 | 
 64 |     def __get_filtered_logs(self):
 65 |         """
 66 |         yield the loglines accordingly to the filtering policy defined in `self.filters`
 67 |         """
 68 |         if not any((self.filters, self.filters_regexp, self.negative_filters_regexp, self.negative_filters)):
 69 |             for log in self.logs:  # we don't filter, give everything!
 70 |                 yield log
 71 |         else:
 72 |             for log in self.logs:
 73 |                 for key, value in log.items():
 74 |                     if key in self.filters:  # are we filtering on this `key`?
 75 |                         if value in self.filters[key] and value != '*':  # is the current `value` in the filtering list?
 76 |                             if key not in self.negative_filters:  # are we filtering on this particular `key`?
 77 |                                 if value not in self.negative_filters[key] and value != '*':
 78 |                                     yield log
 79 |                     if key in self.filters_regexp:  # are we filtering on this `key`?
 80 |                         if re.match(self.filters_regexp[key], value):  # is the current `value` in the filtering list?
 81 |                             if key not in self.negative_filters_regexp:  # are we filtering on this particular `key`?
 82 |                                 if not re.match(self.negative_filters_regexp[key], value):
 83 |                                     yield log
 84 | 
 85 |     def get_results(self):
 86 |         return self.__get_filtered_logs()
 87 | 
 88 |     def add_filters(self, filters, regexp=False, negative=False):  # TODO: simplify this shit
 89 |         for key, value in filters.items():
 90 |             if negative is True:
 91 |                 if isinstance(value, list):
 92 |                     if regexp is True:
 93 |                         self.negative_filters_regexp[key].extend(value)
 94 |                     else:
 95 |                         self.negative_filters[key].extend(value)
 96 |                 else:
 97 |                     if regexp is True:
 98 |                         self.negative_filters_regexp[key].append(value)
 99 |                     else:
100 |                         self.negative_filters[key].append(value)
101 |             else:
102 |                 if isinstance(value, list):
103 |                     if regexp is True:
104 |                         self.filters_regexp[key].extend(value)
105 |                     else:
106 |                         self.filters[key].extend(value)
107 |                 else:
108 |                     if regexp is True:
109 |                         self.filters_regexp[key].append(value)
110 |                     else:
111 |                         self.filters[key].append(value)
112 | 
113 |     def get_relevant_ids(self, fields, percentage=0, minimum_occurrences=0):
114 |         """
115 |          We want to keep alerts that are spread over a vast number of different`fields`
116 | 
117 |             To measure the spreading, we're using this metric: https://en.wikipedia.org/wiki/Coefficient_of_variation
118 |         :param list of str fields:
119 |         :return:
120 |         """
121 |         minimum_occurrences = minimum_occurrences or self.minimum_occurrences
122 |         percentage = percentage or self.percentage
123 |         
124 |         id_blacklist = set()
125 |         ret = set()
126 |         for field in fields:
127 |             stats = collections.defaultdict(int)
128 |             size = 0
129 |             for logline in self.get_results():
130 |                 if logline['id'] not in id_blacklist:
131 |                     stats[logline['id']] += 1
132 |                 size += 1
133 | 
134 |             for k, v in stats.items():
135 |                 if size < minimum_occurrences:
136 |                     logging.debug('The field %s has not enough occurrences (%d): non-significant', field, size)
137 |                     continue
138 |                 if 100 * v < size * percentage:
139 |                     logging.debug('The id %s is present in less than 10%% (%d) of %s : non-significant.', k, v, field)
140 |                     id_blacklist.add(k)
141 |                 else:
142 |                     ret.add(k)
143 | 
144 |         return list(map(int, ret))
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Coverage Status](https://coveralls.io/repos/github/nbs-system/nxtool-ng/badge.svg?branch=master)](https://coveralls.io/github/nbs-system/nxtool-ng?branch=master)
  2 | [![Code Health](https://landscape.io/github/nbs-system/nxtool-ng/master/landscape.svg?style=flat)](https://landscape.io/github/nbs-system/nxtool-ng/master)
  3 | [![Code Climate](https://codeclimate.com/github/nbs-system/nxtool-ng/badges/gpa.svg)](https://codeclimate.com/github/nbs-system/nxtool-ng)
  4 | [![Build Status](https://travis-ci.org/nbs-system/nxtool-ng.svg?branch=master)](https://travis-ci.org/nbs-system/nxtool-ng)
  5 | 
  6 | ```
  7 |               __                __                  
  8 | .-----.--.--.|  |_.-----.-----.|  |____.-----.-----.
  9 | |     |_   _||   _|  _  |  _  ||  |____|     |  _  |
 10 | |__|__|__.__||____|_____|_____||__|    |__|__|___  |
 11 |                                              |_____|
 12 | 
 13 |  -- Because life is too short to transform naxsi logs into rules by hand.
 14 | ```
 15 |           
 16 | nxtool-ng is a tool to magically transform your [naxsi]( http://naxsi.org ) logs into useful rules.
 17 | It can get its data from your elastic instance, or you can feed it flat files,
 18 | and it will magically show you some statistics, generate relevant whitelists,
 19 | provide type-based rules, …
 20 | 
 21 | It works with *modules*, that are generating whitelists, without overlapping each other.
 22 | 
 23 | Proudly powered by [Python]( https://python.org ) (2 and 3 by the way),
 24 | using (optionally) [elasticsearch-dsl]( https://elasticsearch-dsl.readthedocs.org/en/latest/ ),
 25 | written with love and tears by the great people of [NBS-System]( https://nbs-system.com ),
 26 | nxtool-ng is released under the [GPL]( https://gnu.org/licenses/gpl.html ).
 27 | 
 28 | # Installation
 29 | 
 30 | Nxtool-ng depends on [nxapi](https://github.com/nbs-system/nxapi) for naxsi-related magic,
 31 | and optionally on [elasticsearch-dsl]( https://github.com/elastic/elasticsearch-dsl-py )
 32 | if you want to generate rules from an Elastic instance. You can install them with
 33 | 
 34 | ### Elasticsearch 5.x
 35 | `pip install -r ./requirements-v5.txt`
 36 | 
 37 | ### Elasticsearch 2.x
 38 | `pip install -r ./requirements-v2.txt`
 39 | 
 40 | ### Elasticsearch 1.x
 41 | 
 42 | `pip install -r /requirements-v1.txt`
 43 | 
 44 | # Usage
 45 | 
 46 | ```bash
 47 | $ python nxtool.py -h
 48 | usage: nxtool.py [-h] [-v] [--elastic] [--flat-file] [--stdin] [--archive]
 49 |                  [--typing] [--whitelist] [--filter FILTER] [--stats]
 50 |                  [hostname]
 51 | 
 52 | Sweet tool to help you managing your naxsi logs.
 53 | 
 54 | positional arguments:
 55 |   hostname
 56 | 
 57 | optional arguments:
 58 |   -h, --help       show this help message and exit
 59 |   -v, --verbose
 60 | 
 61 | Log sources:
 62 |   --elastic-source
 63 |   --flat-file
 64 |   --stdin
 65 | 
 66 | Actions:
 67 |   --typing
 68 |   --whitelist
 69 |   --elastic-dest
 70 |   --filter FILTER
 71 |   --stats
 72 |   --slack
 73 | ```
 74 | 
 75 | First you can populate an elasticsearch instance by:
 76 | ```bash
 77 | $ python nxtool.py --elastic-dest --flat-file example.com.log
 78 | ```
 79 | 
 80 | For example, if you want some stats about `example.com` using your elasticsearch instance:
 81 | 
 82 | ```bash
 83 | $ python nxtool.py --elastic-source --stats example.com
 84 | 2.39.218.24: 14
 85 | 14.76.8.132: 18
 86 | 13.24.13.122: 8
 87 | 157.5.39.176: 13
 88 | 19.187.104.23: 8
 89 | 80.24.150.43: 21
 90 | 50.2.176.10: 198
 91 | 79.14.72.145: 44
 92 | 14.26.23.213: 80
 93 | 86.242.8.36: 58
 94 | 
 95 | # URI #
 96 | /cache.php: 12
 97 | /11.php: 12
 98 | /call-for-paper-contact/: 82
 99 | /: 22
100 | /xmlrpc.php: 22
101 | /en/production/type.asp: 41
102 | /contact/: 21
103 | /wp-json/oembed/1.0/embed: 38
104 | /en/production/formation.asp: 68
105 | /totallylegit/: 14
106 | 
107 | # ZONE #
108 | BODY: 276
109 | ARGS|NAME: 24
110 | URL: 22
111 | ARGS: 146
112 | HEADERS: 54
113 | BODY|NAME: 10
114 | FILE_EXT: 4
115 | 
116 | # SERVER #
117 | example.com: 536
118 | ```
119 | 
120 | To generate some whitelists for `example.com`, using your elasticsearch instance:
121 | 
122 | ```bash
123 | $ python nxtool.py --elastic-source --whitelist example.com
124 | [+] Generating Google analytics rules
125 | [+] Generating Image 1002 rules
126 | [+] Generating cookies rules
127 | [+] Generating var + zone rules
128 | [+] Generating site rules
129 | [+] Generating zone rules
130 | [+] Generating url rules
131 | 
132 | Generated whitelists:
133 | 	BasicRule wl:1310,1311 "mz:$HEADERS_VAR:cookie" "msg:Cookies";
134 | ```
135 | 
136 | You can add the `--verbose` flag if you want more information about what's going on.
137 | If you're using *flat files*, you can either pass, well flat files, but also *archives*,
138 | like `.zip` or `.tar.gz`.
139 | 
140 | You can add the `--slack` flag if you want loosen constraints on whitelist generation.
141 | It can be useful with only little amount of logs.
142 | 
143 | You can also use nxtool-ng to query your elasticsearch instance, for example
144 | to search for access to `/admin`, that triggered the rule `1010` in the `HEADERS`:
145 | 
146 | ```bash
147 | $ python nxtool.py --elastic-source --filter 'uri=/admin,zone=HEADERS,id=1010'
148 | 
149 | zone: HEADERS
150 | ip: 133.144.211.172
151 | whitelisted: false
152 | uri: /admin
153 | comments: import:2016-08-30 09:44:17.938620
154 | server: example.com
155 | content: 
156 | var_name: cookie
157 | country: 
158 | date: 2016-08-30T09:45:13+0200
159 | id: 1010
160 | 
161 | zone: HEADERS
162 | ip: 15.125.251.122
163 | whitelisted: false
164 | uri: /admin
165 | comments: import:2016-08-30 11:00:03.523580
166 | server: example.com
167 | content: 
168 | var_name: cookie
169 | country: 
170 | date: 2016-08-30T11:06:36+0200
171 | id: 1010
172 | 
173 | ```
174 | 
175 | It's also possible to *type* your parameters, to tighten a bit the security of
176 | your application:
177 | 
178 | ```
179 | $ python nxtool.py --elastic-source --typing --verbose example.com
180 | 
181 | Generated types:
182 | 
183 | BasicRule negative "rx:^$" "msg:empty" "mz:FILE_EXT:user_avatar" "s:BLOCK";
184 | BasicRule negative "rx:^$" "msg:empty" "mz:FILE_EXT:society_logo" "s:BLOCK";
185 | BasicRule negative "rx:^https?://([0-9a-z-.]+\.)+[\w?+-=&/ ]+$" "msg:url" "mz:ARGS:url" "s:BLOCK";
186 | ```
187 | 
188 | # Note on the structure of ElasticSearch entries
189 | 
190 | Each core rule violation is logged in a NAXSI_FMT entry. Each violation is reported once in the ElasticSearch instance.
191 | Types in the used elasticsearch entries are enforced:
192 | 
193 | ```
194 |     ip = Ip
195 |     coords = GeoPoint
196 |     learning = Boolean
197 |     total_processed = Integer
198 |     total_blocked = Integer
199 |     blocked = Boolean
200 |     cscore0 = Keyword
201 |     score0 = Integer
202 |     zone = Keyword
203 |     id = Integer
204 |     var_name = Keyword
205 |     date = Date
206 |     whitelisted = Boolean
207 |     uri = Text
208 |     server = Text
209 |     comments = Text
210 |     vers = Text
211 | ```
212 | First term is the key used in NAXSI_FMT and second term is the defined ElasticSearch type. Text is used as a backward
213 | compatible version of Keyword. We may drop the support of old elasticsearch version in the near future and replace
214 | Text with Keyword.
215 | 
216 | It is noteworthy that one request might violate multiple core rule and lead to multiple entries in ElasticSearch.
217 | 


--------------------------------------------------------------------------------
/nxtool/log_providers/elastic.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from __future__ import unicode_literals
  3 | import logging
  4 | import operator
  5 | import collections
  6 | import datetime
  7 | 
  8 | try:  # Fuck you guido for removing reduce
  9 |     # noinspection PyUnresolvedReferences
 10 |     from functools import reduce
 11 | except ImportError:
 12 |     pass
 13 | 
 14 | try:
 15 |     from ConfigParser import SafeConfigParser as ConfigParser
 16 | except ImportError:  # python3
 17 |     from configparser import ConfigParser
 18 | 
 19 | from elasticsearch import TransportError
 20 | from elasticsearch_dsl import Search, Q
 21 | from elasticsearch_dsl import DocType, Date, Boolean, Integer, Ip, GeoPoint
 22 | from elasticsearch_dsl import Index, VERSION
 23 | from elasticsearch_dsl.connections import connections
 24 | 
 25 | try:
 26 |     from elasticsearch_dsl import Text, Keyword
 27 | except ImportError:  # oldversion of Elasticsearch
 28 |     from elasticsearch_dsl import String
 29 |     Text = String
 30 |     Keyword = String
 31 | 
 32 | 
 33 | from nxtool.log_providers import LogProvider
 34 | 
 35 | class Event(DocType):
 36 |     ip = Ip()
 37 |     coords = GeoPoint()
 38 |     learning = Boolean()
 39 |     total_processed = Integer()
 40 |     total_blocked = Integer()
 41 |     blocked = Boolean()
 42 |     cscore0 = Keyword()
 43 |     score0 = Integer()
 44 |     zone = Keyword()
 45 |     id = Integer()
 46 |     var_name = Keyword()
 47 |     date = Date()
 48 |     whitelisted = Boolean()
 49 |     uri = Text(fields={'raw': Keyword(index = 'not_analyzed')})
 50 |     server = Text(fields={'raw': Keyword(index = 'not_analyzed')})
 51 |     comments = Text(fields={'raw': Keyword(index = 'not_analyzed')})
 52 |     vers = Text(fields={'raw': Keyword(index = 'not_analyzed')})
 53 | 
 54 | 
 55 |     class Meta:
 56 |         doc_type = 'events'
 57 |         ## ToDo change the hardcoded events used when saved is used
 58 |         ## elasticsearch_dsl issue 689
 59 | 
 60 | 
 61 | 
 62 | class Elastic(LogProvider):
 63 |     def __init__(self, config_file='config.cfg'):
 64 |         super(Elastic, self).__init__()
 65 | 
 66 |         self.percentage=10.0
 67 |         self.minimum_occurrences=250
 68 | 
 69 | # The ConfigParser documentation points out that there's no way to force defaults config option
 70 | # outside the "DEFAULT" section.
 71 |         config = ConfigParser()
 72 |         config.read(config_file)
 73 |         if not config.has_section('elastic'):
 74 |             config.add_section('elastic')
 75 |         
 76 |         for option, value in {'use_ssl': 'True', 'host': '127.0.0.1', 'version': '2', 'index': 'nxapi', 'doc_type': 'events'}.items():
 77 |             if not config.has_option('elastic', option):
 78 |                 config.set('elastic', option, value)
 79 | 
 80 |         self.version = config.getint('elastic', 'version')
 81 |         self.index = config.get('elastic', 'index')
 82 |         use_ssl = config.getboolean('elastic', 'use_ssl')
 83 |         host = config.get('elastic', 'host')
 84 |         self.doc_type = config.get('elastic', 'doc_type')
 85 |         self.client = connections.create_connection(hosts=[host], use_ssl=use_ssl, index=self.index, version=self.version, doc_type=self.doc_type, timeout=30, retry_on_timeout=True )
 86 | 
 87 |         Event.init(index=self.index)
 88 |         index = Index(self.index, using=self.client)
 89 |         index.doc_type(Event)
 90 |         self.initialize_search()
 91 | 
 92 |     def initialize_search(self):
 93 |         self.search = Search(using=self.client, index=self.index).extra(size=10000)
 94 |         
 95 |     def export_search(self):
 96 |         return self.search
 97 | 
 98 |     def import_search(self, search):
 99 |         self.search = search
100 | 
101 |     def get_filters(self):
102 |         return self.search.to_dict()
103 | 
104 |     def add_filters(self, filters, regexp=False, negative=False):
105 |         """
106 |         Add `filters` to the query.
107 |          `filters is a dict of the form {'field': value, field2: value2}, but you can also use a list of values
108 |          instead of a `str`. They'll be added as a _or_ (and not a _and_).
109 |         :param dict filters:
110 |         :param bool regexp:
111 |         :param bool negative:
112 |         :return:
113 |         """
114 |         # We need to use multi_match, since we get the fields names dynamically.
115 |         for key, value in filters.items():
116 |             if isinstance(value, set):
117 |                 value = list(value)
118 | 
119 |             # There is no need to process empty values.
120 |             if not value:
121 |                 continue
122 | 
123 |             if isinstance(value, list):
124 |                 if negative:
125 |                     self.search = self.search.query(Q('bool', must_not=[
126 |                         reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])])
127 |                     )
128 |                 else:
129 |                     self.search = self.search.query(Q('bool', must=[
130 |                         reduce(operator.or_, [Q('multi_match', query=v, fields=[key]) for v in value])])
131 |                     )
132 |             else:
133 |                 if negative:
134 |                     self.search = self.search.query(~Q("multi_match", query=value, fields=[key]))
135 |                 else:
136 |                     self.search = self.search.query(Q("multi_match", query=value, fields=[key]))
137 | 
138 |     def get_top(self, field, size=250):
139 |         """
140 |         Get the top values for the given `field`
141 |         :param str field: the field to filter on
142 |         :param int size: how many top values to return, top
143 |         :return dict of int: A structure of the form {value: number_of_hits, value2: numer_of_hits2}
144 |         """
145 |         search = self.search
146 |         ret = dict()
147 | 
148 |         if field in ['uri', 'vers', 'comments', 'server']:
149 |             field = ''.join((field, '.raw'))
150 | 
151 |         if VERSION < (5, 0, 0):
152 |             self.search = self.search.params(search_type='count', default_operator='AND')
153 |         else:
154 |             self.search = self.search.params(search_type='query_then_fetch')
155 |         # This documented at https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.search
156 |         # search_type='count' has been deprecated in ES 2.0
157 |         self.search.aggs.bucket('TEST', 'terms', field=field)
158 |         for hit in self.search.execute(ignore_cache=True).aggregations['TEST']['buckets']:
159 |             ret[hit['key']] = hit['doc_count']
160 |         self.search = search
161 |         return ret
162 | 
163 |     def get_relevant_ids(self, fields, percentage=0, minimum_occurrences=0):
164 |         """ This function is supposed to return the id that are reparteed/present on the `fields`.
165 | 
166 |          :param list of str fields:
167 |          :param float percentage:
168 |          :param float minimum_occurrences:
169 |          :return set of int:
170 |          """
171 |         minimum_occurences = minimum_occurrences or self.minimum_occurrences
172 |         percentage = percentage or self.percentage
173 | 
174 |         ret = set()
175 |         search = self.search
176 |         ids = set(i['id'] for i in self.search.execute())  # get all possible ID
177 |         self.search = search
178 | 
179 |         for _id in ids:
180 |             search = self.search
181 | 
182 |             self.add_filters({'id': _id})
183 | 
184 |             # Get how many different fields there are for a given `id`
185 |             data = collections.defaultdict(set)
186 |             fields_counter = collections.defaultdict(int)
187 |             for res in self.search.execute():
188 |                 for field in fields:
189 |                     if res[field] not in data[field]:
190 |                         fields_counter[field] += 1.0
191 |                     data[field].add(res[field])
192 | 
193 |             # Ignore id that are present on less than 10% of different values of each fields
194 |             for field, content in data.items():
195 |                 if len(content) < minimum_occurrences:
196 |                     logging.debug('Discarding id \033[32m%s\033[0m only present %d times.', _id, len(content))
197 |                     continue
198 |                 _percentage = len(content) / fields_counter[field] * 100.0
199 |                 if _percentage > percentage:
200 |                     continue
201 |                 logging.debug('Discarding id \033[32m%s\033[0m present in %d%% of different values of the \033[32m%s\033[0m field', _id, _percentage, field)
202 |                 break
203 |             else:
204 |                 ret.add(_id)
205 |             self.search = search
206 | 
207 |         return ret
208 | 
209 |     def reset_filters(self):
210 |         self.search = Search(using=self.client, index=self.index).extra(size=10000)
211 | 
212 |     def get_results(self):
213 |         """
214 |         Return a `Result` object obtained from the execution of the search `self.search`.
215 |         :return Result: The `Result` object obtained from the execution of the search `self.search`.
216 |         """
217 |         search = self.search
218 |         result = self.search.scan()
219 |         self.search = search
220 |         return result
221 | 
222 |     def commit(self):
223 |         """Process list of dict (yes) and push them to DB """
224 |         self.total_objs += len(self.nlist)
225 |         count = 0
226 | 
227 |         def gen_events(events):
228 |             dicts = list()
229 |             for d in events:
230 |                 dicts.extend([{'index': {'_index': 'nxapi', '_type': 'events'}}, d.to_dict()])
231 |                 yield dicts.pop(-2)
232 |                 yield dicts.pop(-1)
233 | 
234 | 
235 |         events = list()
236 |         for entry in self.nlist:
237 |             event = Event(_index=self.index)
238 |             for key, value in entry.items():
239 |                 setattr(event, key, value)
240 | 
241 |             event.whitelisted = False
242 |             event.comments = "import on"+str(datetime.datetime.now())
243 |             events.append(event)
244 |             count += 1
245 | 
246 |         try:
247 |             ret = self.client.bulk(gen_events(events))
248 |             ## ToDo parse ret to selectively loop over events to events.save() whatever happens
249 |         except TransportError as e:
250 |             logging.warning("We encountered an error trying to continue.")
251 |             for event in events:
252 |                 event.save(using=self.client)
253 |                 ## ToDo find a way to change the hardcoded 'events' for ES doctype
254 |                 ## elasticsearch_dsl Issue 689
255 |                
256 |         self.total_commits += count
257 |         logging.debug("Written "+str(self.total_commits)+" events")
258 |         del self.nlist[0:len(self.nlist)]
259 | 
260 | 


--------------------------------------------------------------------------------
/tests/test_flat_files.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | from nxtool.log_providers import flat_file
  4 | from nxtool.whitelists_generators import cookies, images_1002, zone_wide, zone_var_wide, url_wide, site_wide_id
  5 | from nxtool.whitelists_generators import google_analytics, zone_var_wide_url, array_like_variables_names
  6 | 
  7 | 
  8 | class TestFlatFiles(unittest.TestCase):
  9 |     def test_get_relevant_ids(self):
 10 |         parser = flat_file.FlatFile('./tests/data/cookies.txt')
 11 |         self.assertEquals(parser.get_relevant_ids(['zone', ], 1, 1), [42000227])
 12 |         self.assertEquals(parser.get_relevant_ids(['zone', ], 1, 100), [])
 13 | 
 14 | 
 15 | class TestParseLog(unittest.TestCase):
 16 |     maxDiff = None
 17 |     def test_show_stats(self):
 18 |         parser = flat_file.FlatFile('tests/data/logs.txt')
 19 |         parser.get_statistics()
 20 | 
 21 |     def test_generate_whitelist_cookies(self):
 22 |         parser = flat_file.FlatFile('./tests/data/cookies.txt')
 23 |         parser.get_relevant_ids = lambda x: [42000227]
 24 |         self.assertEqual(cookies.generate_whitelist(parser, []), [{'wl': [42000227], 'mz':['$HEADERS_VAR:cookie'],
 25 |                                                                    'msg': 'Cookies that matches a id 42000227'}])
 26 | 
 27 |         self.assertEqual(cookies.generate_whitelist(parser, [{'id':1234}]), [{'wl': [42000227], 'mz':['$HEADERS_VAR:cookie'],
 28 |                                                                    'msg': 'Cookies that matches a id 42000227'}])
 29 |     def test_generate_whitelist_images(self):
 30 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
 31 |         self.assertEqual(
 32 |             images_1002.generate_whitelist(parser, []),
 33 |             [{'mz': ['$URL_X:^/phpMyAdmin-2.8.2/scripts/setup.php|URL'], 'wl': [1002], 'msg': 'Images size (0x)'}]
 34 |         )
 35 |         self.assertEqual(images_1002.generate_whitelist(parser, [{'wl': [1002]}]), [])
 36 | 
 37 |     def test_generate_whitelist_zone_wide(self):
 38 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
 39 |         parser.get_top = lambda x: {1337: 2048, '': 1337} if x =='id' else {'ARGS': 2048}
 40 |         parser.get_relevant_ids = lambda x: [1337]
 41 |         self.assertEqual(zone_wide.generate_whitelist(parser, []),
 42 |                          [{'msg': 'zone-wide ID whitelist if it matches a id 1337', 'mz': ['ARGS'], 'wl': {1337}}])
 43 | 
 44 |         parser.get_relevant_ids = lambda x: []
 45 |         self.assertEqual(zone_wide.generate_whitelist(parser, [{'id':1337}]), [])
 46 | 
 47 |         parser.get_top = lambda x: {1337: 2} if x =='id' else {'ARGS': 2}
 48 |         self.assertEqual(zone_wide.generate_whitelist(parser, []), [])
 49 | 
 50 |     def test_generate_whitelist_zone_var_wide(self):
 51 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
 52 |         parser.get_relevant_ids = lambda x: [1337]
 53 |         parser.get_top = lambda x: {'test_var_name': 2048, '':123, 'super-test':12}
 54 |         expected = [
 55 |             {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['BODY:test_var_name'], 'wl': [1337]},
 56 |             {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['ARGS:test_var_name|NAME'], 'wl': [1337]},
 57 |             {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['ARGS:test_var_name'], 'wl': [1337]},
 58 |             {'msg': 'Variable zone-wide if it matches a id 1337', 'mz': ['BODY:test_var_name|NAME'], 'wl': [1337]}
 59 |         ]
 60 |         try:
 61 |             self.assertCountEqual(zone_var_wide.generate_whitelist(parser, []), expected)
 62 |         except AttributeError:  # Python2/3 fuckery
 63 |             self.assertItemsEqual(zone_var_wide.generate_whitelist(parser, []), expected)
 64 | 
 65 |         parser.get_top = lambda x: {'test_var_name': 0}
 66 |         try:
 67 |             self.assertCountEqual(zone_var_wide.generate_whitelist(parser, [{'id':1, 'mz':'BODY'}]), [])
 68 |         except AttributeError:  # Python2/3 fuckery
 69 |             self.assertItemsEqual(zone_var_wide.generate_whitelist(parser, [{'id':1, 'mz':'BODY'}]), [])
 70 | 
 71 | 
 72 |     def test_generate_whitelist_url_wide(self):
 73 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
 74 |         parser.get_relevant_ids = lambda x: [1337]
 75 |         parser.get_top = lambda x: {'1337': 2048, '123': 2}
 76 |         expected = [{'msg': 'url-wide whitelist if it matches a id 1337', 'mz': ['$URL:1337'], 'wl': {'1337'}},
 77 |                           {'msg': 'url-wide whitelist if it matches a id 1337', 'mz': ['$URL:123'], 'wl': {'1337'}}]
 78 |         self.assertTrue(all(i in url_wide.generate_whitelist(parser, []) for i in expected))
 79 | 
 80 |         parser.get_relevant_ids = lambda x: []
 81 |         parser.get_top = lambda x: {}
 82 |         self.assertEqual(url_wide.generate_whitelist(parser, [{'id': 1337}]), [])
 83 | 
 84 |     def test_generate_whitelist_site_wide_id(self):
 85 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
 86 |         parser.get_relevant_ids = lambda x, y: [1337]
 87 |         parser.get_top = lambda x: {'1337': 2048}
 88 |         self.assertEqual(site_wide_id.generate_whitelist(parser, []),
 89 |                          [{'msg': 'Site-wide id+zone if it matches id 1337', 'mz': ['1337'], 'wl': [1337]}])
 90 |         self.assertEqual(site_wide_id.generate_whitelist(parser, [{'id':1234}]),
 91 |                          [{'msg': 'Site-wide id+zone if it matches id 1337', 'mz': ['1337'], 'wl': [1337]}])
 92 | 
 93 |     def test_generate_whitelist_google_analytics(self):
 94 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
 95 |         parser.get_relevant_ids = lambda x: [1337]
 96 |         parser.get_top = lambda x: {'1337': 2048}
 97 |         self.assertEqual(google_analytics.generate_whitelist(parser, []),
 98 |                          [{'msg': 'Google analytics', 'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': [1337]}])
 99 |         self.assertEqual(google_analytics.generate_whitelist(parser, [{'id':1234}]),
100 |                          [{'msg': 'Google analytics', 'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': [1337]}])
101 |         self.assertEqual(google_analytics.generate_whitelist(parser, [{'wl':1002}]),
102 |                          [{'msg': 'Google analytics', 'mz': ['$ARGS_VAR_X:__utm[abctvz]'], 'wl': [1337]}])
103 | 
104 |     def test_generate_whitelist_zone_var_wide_url(self):
105 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
106 |         parser.get_relevant_ids = lambda x: [1337]
107 |         parser.get_top = lambda x: {'1337': 2048, '': 123, 'test': 1}
108 |         expected = [
109 |             {
110 |                 'msg': 'Variable zone-wide on a specific url if it matches a id 1337',
111 |                 'mz': ['$URL:1337|$BODY_VAR:1337'],
112 |                 'wl': [1337]
113 |             },
114 |             {
115 |                 'msg': 'Variable zone-wide on a specific url if it matches a id 1337',
116 |                 'mz': ['$URL:1337|ARGS:1337|NAME'],
117 |                 'wl': [1337]
118 |             },
119 |             {'msg': 'Variable zone-wide on a specific url if it matches a id 1337',
120 |              'mz': ['$URL:1337|$ARGS_VAR:1337'],
121 |              'wl': [1337]
122 |              },
123 |             {
124 |                 'msg': 'Variable zone-wide on a specific url if it matches a id 1337',
125 |                 'mz': ['$URL:1337|BODY:1337|NAME'],
126 |                 'wl': [1337]
127 |             }]
128 |         self.assertTrue(all(i in zone_var_wide_url.generate_whitelist(parser, [{'id':123}]) for i in expected))
129 | 
130 |     def test_generate_whitelist_array_like_variables_names(self):
131 |         parser = flat_file.FlatFile('./tests/data/images_1002.txt')
132 |         parser.get_relevant_ids = lambda x: [1337]
133 |         parser.get_top = lambda x: {'1337': 2048}
134 |         self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), [])
135 | 
136 |         parser.get_relevant_ids = lambda x: [1310]
137 |         parser.get_top = lambda x: {'test[1234]': 2048}
138 |         self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), [
139 |             {'msg': 'Array-like variable name', 'mz': ['$BODY_VAR_X:^test\\[.+\\]$'], 'wl': [1310, 1311]},
140 |             {'msg': 'Array-like variable name', 'mz': ['$ARGS_VAR_X:^test\\[.+\\]$'],'wl': [1310, 1311]}]
141 |                                                                        )
142 |         self.assertEqual(array_like_variables_names.generate_whitelist(parser, [{'id':3, 'mz':'BODY'}]), [
143 |             {'msg': 'Array-like variable name', 'mz': ['$BODY_VAR_X:^test\\[.+\\]$'], 'wl': [1310, 1311]},
144 |             {'msg': 'Array-like variable name', 'mz': ['$ARGS_VAR_X:^test\\[.+\\]$'],'wl': [1310, 1311]}]
145 |                                                                        )
146 |         parser.get_top = lambda x: {'test[1234]]': 2048}
147 |         self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), [])
148 | 
149 |         parser.get_top = lambda x: {'test[[1234]': 2048}
150 |         self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), [])
151 | 
152 |         parser.get_top = lambda x: {'test[1234]': 1}
153 |         self.assertEqual(array_like_variables_names.generate_whitelist(parser, []), [])
154 | 
155 | class TestFiltering(unittest.TestCase):
156 |     def test_filter_str(self):
157 |         parser = flat_file.FlatFile('./tests/data/cookies.txt')
158 |         self.assertEquals([i for i in parser.get_results()][0], {'block': '0',
159 |            'cscore0': '$UWA', 'id': '42000227', 'ip': 'X.X.X.X', 'learning': '0', 'score0': '8',
160 |            'server': 'Y.Y.Y.Y', 'total_blocked': '204', 'total_processed': '472',
161 |            'uri': '/phpMyAdmin-2.8.2/scripts/setup.php', 'var_name': 'cookie', 'vers': '0.52', 'zone': 'HEADERS',
162 |            'coords': None, 'date': '20131110T07:36:19'}
163 |         )
164 | 
165 |     def test_filter_list(self):
166 |         parser = flat_file.FlatFile('./tests/data/cookies.txt')
167 |         parser.add_filters({'ip': ['X.X.X.X', 'A.A.A.A']})
168 |         self.assertEquals([i for i in parser.get_results()], [{'block': '0',
169 |            'cscore0': '$UWA', 'id': '42000227', 'ip': 'X.X.X.X', 'learning': '0', 'score0': '8',
170 |            'server': 'Y.Y.Y.Y', 'total_blocked': '204', 'total_processed': '472',
171 |            'uri': '/phpMyAdmin-2.8.2/scripts/setup.php', 'var_name': 'cookie', 'vers': '0.52', 'zone': 'HEADERS',
172 |            'coords': None, 'date': '20131110T07:36:19'}])
173 | 
174 |         parser = flat_file.FlatFile('./tests/data/cookies.txt')
175 |         parser.add_filters({'ip': ['A.A.A.A']})
176 |         self.assertEquals([i for i in parser.get_results()], [])
177 | 
178 |         parser = flat_file.FlatFile('./tests/data/cookies.txt')
179 |         parser.add_filters({'ip': ['X.X.X.X']}, negative=True)
180 |         self.assertEquals([i for i in parser.get_results()], [])
181 | 


--------------------------------------------------------------------------------