├── drain3
    ├── py.typed
    ├── __init__.py
    ├── persistence_handler.py
    ├── memory_buffer_persistence.py
    ├── file_persistence.py
    ├── redis_persistence.py
    ├── kafka_persistence.py
    ├── masking.py
    ├── template_miner_config.py
    ├── simple_profiler.py
    ├── jaccard_drain.py
    ├── template_miner.py
    └── drain.py
├── deploy_new_ver.sh
├── .gitignore
├── tests
    ├── drain3_test.ini
    ├── test_masking.py
    ├── test_drain.py
    ├── test_jaccard_drain.py
    └── test_template_miner.py
├── LICENSE.txt
├── examples
    ├── drain3.ini
    ├── drain_bigfile_demo.py
    └── drain_stdin_demo.py
├── pyproject.toml
├── CONTRIBUTING.md
├── .github
    └── workflows
    │   └── CI.yaml
├── README.md
└── poetry.lock


/drain3/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deploy_new_ver.sh:
--------------------------------------------------------------------------------
1 | python3 setup.py sdist
2 | twine upload dist/*


--------------------------------------------------------------------------------
/drain3/__init__.py:
--------------------------------------------------------------------------------
1 | from drain3.template_miner import TemplateMiner
2 | 
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/__pycache__/*
 2 | MANIFEST
 3 | dist/*
 4 | *venv/*
 5 | .idea/*
 6 | .vscode/*
 7 | drain3.egg-info/*
 8 | snapshot.txt
 9 | examples/snapshot.txt
10 | *.bin
11 | *.log
12 | *.gz


--------------------------------------------------------------------------------
/drain3/persistence_handler.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | from abc import ABC, abstractmethod
 4 | from typing import Optional
 5 | 
 6 | 
 7 | class PersistenceHandler(ABC):
 8 | 
 9 |     @abstractmethod
10 |     def save_state(self, state: bytes) -> None:
11 |         pass
12 | 
13 |     @abstractmethod
14 |     def load_state(self) -> Optional[bytes]:
15 |         pass
16 | 


--------------------------------------------------------------------------------
/drain3/memory_buffer_persistence.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | from typing import Optional
 4 | 
 5 | from drain3.persistence_handler import PersistenceHandler
 6 | 
 7 | 
 8 | class MemoryBufferPersistence(PersistenceHandler):
 9 |     def __init__(self) -> None:
10 |         self.state: Optional[bytes] = None
11 | 
12 |     def save_state(self, state: bytes) -> None:
13 |         self.state = state
14 | 
15 |     def load_state(self) -> Optional[bytes]:
16 |         return self.state


--------------------------------------------------------------------------------
/drain3/file_persistence.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | import os
 4 | import pathlib
 5 | from typing import Optional
 6 | 
 7 | from drain3.persistence_handler import PersistenceHandler
 8 | 
 9 | 
10 | class FilePersistence(PersistenceHandler):
11 |     def __init__(self, file_path: str) -> None:
12 |         self.file_path = file_path
13 | 
14 |     def save_state(self, state: bytes) -> None:
15 |         pathlib.Path(self.file_path).write_bytes(state)
16 | 
17 |     def load_state(self) -> Optional[bytes]:
18 |         if not os.path.exists(self.file_path):
19 |             return None
20 | 
21 |         return pathlib.Path(self.file_path).read_bytes()
22 | 


--------------------------------------------------------------------------------
/tests/drain3_test.ini:
--------------------------------------------------------------------------------
 1 | [SNAPSHOT]
 2 | snapshot_interval_minutes = 10
 3 | compress_state = True
 4 | 
 5 | [MASKING]
 6 | masking = [
 7 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"},
 8 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
 9 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
10 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
11 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"},
12 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"},
13 |           {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"}
14 |           ]
15 | 
16 | [DRAIN]
17 | engine = Drain
18 | sim_th = 0.4
19 | depth = 4
20 | max_children = 100
21 | max_clusters = 1024
22 | extra_delimiters = ["_"]
23 | 
24 | [PROFILING]
25 | enabled = True
26 | report_sec = 30
27 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020-2022 International Business Machines
 4 | and the Drain3 project contributors.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/drain3/redis_persistence.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | from typing import Optional, Union
 4 | 
 5 | import redis
 6 | 
 7 | from drain3.persistence_handler import PersistenceHandler
 8 | 
 9 | 
10 | class RedisPersistence(PersistenceHandler):
11 |     def __init__(self,
12 |                  redis_host: str,
13 |                  redis_port: int,
14 |                  redis_db: int,
15 |                  redis_pass: Optional[str],
16 |                  is_ssl: bool,
17 |                  redis_key: Union[bytes, str, memoryview]) -> None:
18 |         self.redis_host = redis_host
19 |         self.redis_port = redis_port
20 |         self.redis_db = redis_db
21 |         self.redis_pass = redis_pass
22 |         self.is_ssl = is_ssl
23 |         self.redis_key = redis_key
24 |         self.r = redis.Redis(host=self.redis_host,
25 |                              port=self.redis_port,
26 |                              db=self.redis_db,
27 |                              password=self.redis_pass,
28 |                              ssl=self.is_ssl)
29 | 
30 |     def save_state(self, state: bytes) -> None:
31 |         self.r.set(self.redis_key, state)
32 | 
33 |     def load_state(self) -> Optional[bytes]:
34 |         return self.r.get(self.redis_key)
35 | 


--------------------------------------------------------------------------------
/examples/drain3.ini:
--------------------------------------------------------------------------------
 1 | [SNAPSHOT]
 2 | snapshot_interval_minutes = 10
 3 | compress_state = True
 4 | 
 5 | [MASKING]
 6 | masking = [
 7 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"},
 8 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
 9 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
10 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"},
11 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"},
12 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"},
13 |           {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"}
14 |           ]
15 | mask_prefix = <:
16 | mask_suffix = :>
17 | 
18 | [DRAIN]
19 | # engine is Optional parameter. Engine will be "Drain" if the engine argument is not specified.
20 | # engine has two options: 'Drain' and 'JaccardDrain'.
21 | # engine = Drain
22 | sim_th = 0.4
23 | depth = 4
24 | max_children = 100
25 | max_clusters = 1024
26 | extra_delimiters = ["_"]
27 | 
28 | [PROFILING]
29 | enabled = True
30 | report_sec = 30
31 | 


--------------------------------------------------------------------------------
/tests/test_masking.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | import unittest
 4 | 
 5 | from drain3.masking import MaskingInstruction, LogMasker
 6 | 
 7 | 
 8 | class MaskingTest(unittest.TestCase):
 9 | 
10 |     def test_instructions_by_mask_name(self):
11 |         instructions = []
12 |         a = MaskingInstruction(r"a", "1")
13 |         instructions.append(a)
14 |         b = MaskingInstruction(r"b", "1")
15 |         instructions.append(b)
16 |         c = MaskingInstruction(r"c", "2")
17 |         instructions.append(c)
18 |         d = MaskingInstruction(r"d", "3")
19 |         instructions.append(d)
20 |         x = MaskingInstruction(r"x", "something else")
21 |         instructions.append(x)
22 |         y = MaskingInstruction(r"y", "something else")
23 |         instructions.append(y)
24 |         masker = LogMasker(instructions, "", "")
25 |         self.assertCountEqual(["1", "2", "3", "something else"], masker.mask_names)
26 |         self.assertCountEqual([a, b], masker.instructions_by_mask_name("1"))
27 |         self.assertCountEqual([c], masker.instructions_by_mask_name("2"))
28 |         self.assertCountEqual([d], masker.instructions_by_mask_name("3"))
29 |         self.assertCountEqual([x, y], masker.instructions_by_mask_name("something else"))
30 | 
31 |     def test_mask(self):
32 |         s = "D9 test 999 888 1A ccc 3"
33 |         mi = MaskingInstruction(r"((?<=[^A-Za-z0-9])|^)([\-\+]?\d+)((?=[^A-Za-z0-9])|$)", "NUM")
34 |         masker = LogMasker([mi], "<!", "!>")
35 |         masked = masker.mask(s)
36 |         self.assertEqual("D9 test <!NUM!> <!NUM!> 1A ccc <!NUM!>", masked)
37 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "drain3"
 3 | version = "0.9.11"
 4 | description = "Persistent & streaming log template miner"
 5 | authors = ["IBM Research Haifa <david.ohana@ibm.com>"]
 6 | maintainers = ["Yihao Chen(Superskyyy) <yihaochen@apache.org>"]
 7 | readme = "README.md"
 8 | license = "MIT"
 9 | keywords=['drain', 'log', 'parser', 'IBM', 'template', 'logs', 'miner']
10 | 
11 | classifiers=[
12 |     "Programming Language :: Python :: 3.7",
13 |     "Programming Language :: Python :: 3.8",
14 |     "Programming Language :: Python :: 3.9",
15 |     "Programming Language :: Python :: 3.10",
16 |     "Programming Language :: Python :: 3.11",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 |     'Topic :: System :: Monitoring',
20 |     "Topic :: Software Development :: Libraries",
21 | ]
22 | 
23 | packages = [
24 |     { include = "drain3" },
25 | ]
26 | 
27 | exclude = ['tests', 'examples']
28 | 
29 | [tool.poetry.build]
30 | generate-setup-file = true
31 | 
32 | 
33 | [tool.poetry.dependencies]
34 | python = "^3.7"
35 | jsonpickle = "*"
36 | cachetools = "*"
37 | redis = { version = "*", optional = true }
38 | kafka-python = { version = "*", optional = true }
39 | 
40 | [tool.poetry.extras]
41 | all=[
42 |     'redis',
43 |     'kafka-python',
44 | ]
45 | kafka= [
46 |     'kafka-python',
47 | ]
48 | redis=[
49 |     'redis',
50 | ]
51 | 
52 | [tool.poetry.group.dev.dependencies]
53 | mypy = "*"
54 | types-cachetools = "*"
55 | types-redis = "*"
56 | 
57 | [build-system]
58 | requires = ["poetry-core"]
59 | build-backend = "poetry.core.masonry.api"
60 | 


--------------------------------------------------------------------------------
/drain3/kafka_persistence.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | from typing import Any, cast, Optional
 4 | 
 5 | import kafka  # type: ignore[import]
 6 | 
 7 | from drain3.persistence_handler import PersistenceHandler
 8 | 
 9 | 
10 | class KafkaPersistence(PersistenceHandler):
11 | 
12 |     def __init__(self, topic: str, snapshot_poll_timeout_sec: int = 60, **kafka_client_options: Any) -> None:
13 |         self.topic = topic
14 |         self.kafka_client_options = kafka_client_options
15 |         self.producer = kafka.KafkaProducer(**self.kafka_client_options)
16 |         self.snapshot_poll_timeout_sec = snapshot_poll_timeout_sec
17 | 
18 |     def save_state(self, state: bytes) -> None:
19 |         self.producer.send(self.topic, value=state)
20 | 
21 |     def load_state(self) -> Optional[bytes]:
22 |         consumer = kafka.KafkaConsumer(**self.kafka_client_options)
23 |         partition = kafka.TopicPartition(self.topic, 0)
24 |         consumer.assign([partition])
25 |         end_offsets = consumer.end_offsets([partition])
26 |         end_offset = list(end_offsets.values())[0]
27 |         if end_offset > 0:
28 |             consumer.seek(partition, end_offset - 1)
29 |             snapshot_poll_timeout_ms = self.snapshot_poll_timeout_sec * 1000
30 |             records = consumer.poll(snapshot_poll_timeout_ms)
31 |             if not records:
32 |                 raise RuntimeError(f"No message received from Kafka during restore even though end_offset>0")
33 |             last_msg = records[partition][0]
34 |             state = cast(bytes, last_msg.value)
35 |         else:
36 |             state = None
37 | 
38 |         consumer.close()
39 |         return state
40 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | All contributors must agree to the Developer Certificate of Origin Version 1.1. (DCO 1.1) by signing their commits with:
 2 | 
 3 | ```
 4 | Signed-off-by: [NAME] <[EMAIL]>
 5 | ```
 6 | 
 7 | This can be simply achieved with `git commit -s` when formatting your commit message.
 8 | 
 9 | The full text of the DCO 1.1 is as follows:
10 | 
11 | ```
12 | Developer Certificate of Origin
13 | Version 1.1
14 | 
15 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
16 | 660 York Street, Suite 102,
17 | San Francisco, CA 94110 USA
18 | 
19 | Everyone is permitted to copy and distribute verbatim copies of this
20 | license document, but changing it is not allowed.
21 | 
22 | 
23 | Developer's Certificate of Origin 1.1
24 | 
25 | By making a contribution to this project, I certify that:
26 | 
27 | (a) The contribution was created in whole or in part by me and I
28 | have the right to submit it under the open source license
29 | indicated in the file; or
30 | 
31 | (b) The contribution is based upon previous work that, to the best
32 | of my knowledge, is covered under an appropriate open source
33 | license and I have the right under that license to submit that
34 | work with modifications, whether created in whole or in part
35 | by me, under the same open source license (unless I am
36 | permitted to submit under a different license), as indicated
37 | in the file; or
38 | 
39 | (c) The contribution was provided directly to me by some other
40 | person who certified (a), (b) or (c) and I have not modified
41 | it.
42 | 
43 | (d) I understand and agree that this project and the contribution
44 | are public and that a record of the contribution (including all
45 | personal information I submit with it, including my sign-off) is
46 | maintained indefinitely and may be redistributed consistent with
47 | this project or the open source license(s) involved.
48 | ```
49 | 


--------------------------------------------------------------------------------
/examples/drain_bigfile_demo.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | import json
 4 | import logging
 5 | import os
 6 | import subprocess
 7 | import sys
 8 | import time
 9 | from os.path import dirname
10 | 
11 | from drain3 import TemplateMiner
12 | from drain3.template_miner_config import TemplateMinerConfig
13 | 
14 | logger = logging.getLogger(__name__)
15 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
16 | 
17 | in_gz_file = "SSH.tar.gz"
18 | in_log_file = "SSH.log"
19 | if not os.path.isfile(in_log_file):
20 |     logger.info(f"Downloading file {in_gz_file}")
21 |     p = subprocess.Popen(f"curl https://zenodo.org/record/3227177/files/{in_gz_file} --output {in_gz_file}", shell=True)
22 |     p.wait()
23 |     logger.info(f"Extracting file {in_gz_file}")
24 |     p = subprocess.Popen(f"tar -xvzf {in_gz_file}", shell=True)
25 |     p.wait()
26 | 
27 | 
28 | config = TemplateMinerConfig()
29 | config.load(f"{dirname(__file__)}/drain3.ini")
30 | config.profiling_enabled = True
31 | template_miner = TemplateMiner(config=config)
32 | 
33 | line_count = 0
34 | 
35 | with open(in_log_file) as f:
36 |     lines = f.readlines()
37 | 
38 | start_time = time.time()
39 | batch_start_time = start_time
40 | batch_size = 10000
41 | 
42 | for line in lines:
43 |     line = line.rstrip()
44 |     line = line.partition(": ")[2]
45 |     result = template_miner.add_log_message(line)
46 |     line_count += 1
47 |     if line_count % batch_size == 0:
48 |         time_took = time.time() - batch_start_time
49 |         rate = batch_size / time_took
50 |         logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
51 |                     f"{len(template_miner.drain.clusters)} clusters so far.")
52 |         batch_start_time = time.time()
53 |     if result["change_type"] != "none":
54 |         result_json = json.dumps(result)
55 |         logger.info(f"Input ({line_count}): {line}")
56 |         logger.info(f"Result: {result_json}")
57 | 
58 | time_took = time.time() - start_time
59 | rate = line_count / time_took
60 | logger.info(f"--- Done processing file in {time_took:.2f} sec. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
61 |             f"{len(template_miner.drain.clusters)} clusters")
62 | 
63 | sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)
64 | for cluster in sorted_clusters:
65 |     logger.info(cluster)
66 | 
67 | print("Prefix Tree:")
68 | template_miner.drain.print_tree()
69 | 
70 | template_miner.profiler.report(0)
71 | 


--------------------------------------------------------------------------------
/.github/workflows/CI.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - master
 8 |   schedule:
 9 |     - cron: '0 18 * * *'
10 | 
11 | concurrency:
12 |   group: CI-tests-${{ github.event.pull_request.number || github.ref }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   changes:
17 |     # Check if any file related to Drain3/ CI behavior is changed
18 |     # set outputs for other jobs to access for if conditions
19 |     name: Check Changes
20 |     runs-on: ubuntu-latest
21 |     # To prevent error when there's no base branch
22 |     if: github.event_name != 'schedule'
23 |     timeout-minutes: 10
24 |     outputs:
25 |       drain3: ${{ steps.filter.outputs.drain3 }}
26 |     steps:
27 |       - uses: actions/checkout@v3  # required for push event
28 |       - name: Check for file changes
29 |         uses: getsentry/paths-filter@v2.11.1
30 |         id: filter
31 |         with:
32 |           token: ${{ github.token }}
33 |           # The following filters indicate a category along with
34 |           # the files that should not be ignored by CI when modified.
35 |           filters: |
36 |             drain3:
37 |               - '.github/**/*.yaml'
38 |               - '**/*.py'
39 |               - '**/Dockerfile*'
40 |               - '**/Makefile'
41 |               - 'tests/**'
42 |               - '**/*.bat'
43 |               - '**/*.sh'
44 |               - '**/*.ps1'
45 |               - '**/pyproject.toml'
46 |               - '**/poetry.lock'
47 |               - '**/*.cfg'
48 |               - '**/*.ini'
49 |           list-files: json  # logs matched files
50 |   build:
51 |     runs-on: ubuntu-latest
52 |     needs: [changes]
53 |     if: |
54 |       ( always() && ! cancelled() ) &&
55 |       ((github.event_name == 'schedule' && github.repository == 'logpai/drain3') || needs.changes.outputs.drain3 == 'true')
56 |     
57 |     strategy:
58 |       matrix:
59 |         python-version: [ "3.8", "3.9", "3.10", "3.11" ]
60 |       fail-fast: false
61 |     env:
62 |       PYTHON_VERSION: ${{ matrix.python-version }}
63 |     
64 |     steps:
65 |     - name: Check out Drain3 codebase
66 |       uses: actions/checkout@v3
67 | 
68 |     - name: Set up Python ${{ matrix.python-version }}
69 |       uses: actions/setup-python@v4
70 |       with:
71 |         python-version: ${{ matrix.python-version }}
72 | 
73 |     - name: Setup Poetry
74 |       run: |
75 |         python -m pip install --upgrade pip
76 |         python -m pip install --upgrade poetry
77 | 
78 |     - name: Install dependencies
79 |       run:  poetry install
80 | 
81 |     - name: Test with unittest
82 |       run: poetry run python -m unittest discover --verbose --start-directory tests
83 | 


--------------------------------------------------------------------------------
/examples/drain_stdin_demo.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | import json
 4 | import logging
 5 | import sys
 6 | from os.path import dirname
 7 | 
 8 | from drain3 import TemplateMiner
 9 | from drain3.template_miner_config import TemplateMinerConfig
10 | 
11 | # persistence_type = "NONE"
12 | # persistence_type = "REDIS"
13 | # persistence_type = "KAFKA"
14 | persistence_type = "FILE"
15 | 
16 | logger = logging.getLogger(__name__)
17 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
18 | 
19 | if persistence_type == "KAFKA":
20 |     from drain3.kafka_persistence import KafkaPersistence
21 | 
22 |     persistence = KafkaPersistence("drain3_state", bootstrap_servers="localhost:9092")
23 | 
24 | elif persistence_type == "FILE":
25 |     from drain3.file_persistence import FilePersistence
26 | 
27 |     persistence = FilePersistence("drain3_state.bin")
28 | 
29 | elif persistence_type == "REDIS":
30 |     from drain3.redis_persistence import RedisPersistence
31 | 
32 |     persistence = RedisPersistence(redis_host='',
33 |                                    redis_port=25061,
34 |                                    redis_db=0,
35 |                                    redis_pass='',
36 |                                    is_ssl=True,
37 |                                    redis_key="drain3_state_key")
38 | else:
39 |     persistence = None
40 | 
41 | config = TemplateMinerConfig()
42 | config.load(f"{dirname(__file__)}/drain3.ini")
43 | config.profiling_enabled = False
44 | 
45 | template_miner = TemplateMiner(persistence, config)
46 | print(f"Drain3 started with '{persistence_type}' persistence")
47 | print(f"{len(config.masking_instructions)} masking instructions are in use")
48 | print(f"Starting training mode. Reading from std-in ('q' to finish)")
49 | while True:
50 |     log_line = input("> ")
51 |     if log_line == 'q':
52 |         break
53 |     result = template_miner.add_log_message(log_line)
54 |     result_json = json.dumps(result)
55 |     print(result_json)
56 |     template = result["template_mined"]
57 |     params = template_miner.extract_parameters(template, log_line)
58 |     print(f"Parameters: {str(params)}")
59 | 
60 | print("Training done. Mined clusters:")
61 | for cluster in template_miner.drain.clusters:
62 |     print(cluster)
63 | 
64 | print(f"Starting inference mode, matching to pre-trained clusters. Input log lines or 'q' to finish")
65 | while True:
66 |     log_line = input("> ")
67 |     if log_line == 'q':
68 |         break
69 |     cluster = template_miner.match(log_line)
70 |     if cluster is None:
71 |         print(f"No match found")
72 |     else:
73 |         template = cluster.get_template()
74 |         print(f"Matched template #{cluster.cluster_id}: {template}")
75 |         print(f"Parameters: {template_miner.get_parameter_list(template, log_line)}")
76 | 


--------------------------------------------------------------------------------
/drain3/masking.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | import abc
 4 | import re
 5 | from typing import cast, Collection, Dict, List
 6 | 
 7 | 
 8 | class AbstractMaskingInstruction(abc.ABC):
 9 | 
10 |     def __init__(self, mask_with: str):
11 |         self.mask_with = mask_with
12 | 
13 |     @abc.abstractmethod
14 |     def mask(self, content: str, mask_prefix: str, mask_suffix: str) -> str:
15 |         """
16 |         Mask content according to this instruction and return the result.
17 | 
18 |         :param content: text to apply masking to
19 |         :param mask_prefix: the prefix of any masks inserted
20 |         :param mask_suffix: the suffix of any masks inserted
21 |         """
22 |         pass
23 | 
24 | 
25 | class MaskingInstruction(AbstractMaskingInstruction):
26 | 
27 |     def __init__(self, pattern: str, mask_with: str):
28 |         super().__init__(mask_with)
29 |         self.regex = re.compile(pattern)
30 | 
31 |     @property
32 |     def pattern(self) -> str:
33 |         return self.regex.pattern
34 | 
35 |     def mask(self, content: str, mask_prefix: str, mask_suffix: str) -> str:
36 |         mask = mask_prefix + self.mask_with + mask_suffix
37 |         return self.regex.sub(mask, content)
38 | 
39 | 
40 | # Alias for `MaskingInstruction`.
41 | RegexMaskingInstruction = MaskingInstruction
42 | 
43 | 
44 | class LogMasker:
45 | 
46 |     def __init__(self, masking_instructions: Collection[AbstractMaskingInstruction],
47 |                  mask_prefix: str, mask_suffix: str):
48 |         self.mask_prefix = mask_prefix
49 |         self.mask_suffix = mask_suffix
50 |         self.masking_instructions = masking_instructions
51 |         mask_name_to_instructions: Dict[str, List[AbstractMaskingInstruction]] = {}
52 |         for mi in self.masking_instructions:
53 |             mask_name_to_instructions.setdefault(mi.mask_with, [])
54 |             mask_name_to_instructions[mi.mask_with].append(mi)
55 |         self.mask_name_to_instructions = mask_name_to_instructions
56 | 
57 |     def mask(self, content: str) -> str:
58 |         for mi in self.masking_instructions:
59 |             content = mi.mask(content, self.mask_prefix, self.mask_suffix)
60 |         return content
61 | 
62 |     @property
63 |     def mask_names(self) -> Collection[str]:
64 |         return self.mask_name_to_instructions.keys()
65 | 
66 |     def instructions_by_mask_name(self, mask_name: str) -> Collection[AbstractMaskingInstruction]:
67 |         return cast(Collection[AbstractMaskingInstruction], self.mask_name_to_instructions.get(mask_name, []))
68 | 
69 | # Some masking examples
70 | # ---------------------
71 | #
72 | # masking_instances = [
73 | #    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)', "ID"),
74 | #    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})((?=[^A-Za-z0-9])|$)', "IP"),
75 | #    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)', "SEQ"),
76 | #    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)', "SEQ"),
77 | #
78 | #    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)', "HEX"),
79 | #    MaskingInstruction(r'((?<=[^A-Za-z0-9])|^)([\-\+]?\d+)((?=[^A-Za-z0-9])|$)', "NUM"),
80 | #    MaskingInstruction(r'(?<=executed cmd )(".+?")', "CMD"),
81 | # ]
82 | 


--------------------------------------------------------------------------------
/drain3/template_miner_config.py:
--------------------------------------------------------------------------------
 1 | # SPDX-License-Identifier: MIT
 2 | 
 3 | import ast
 4 | import configparser
 5 | import json
 6 | import logging
 7 | from typing import Collection, Optional
 8 | 
 9 | from drain3.masking import AbstractMaskingInstruction, MaskingInstruction
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class TemplateMinerConfig:
15 |     def __init__(self) -> None:
16 |         self.engine = "Drain"
17 |         self.profiling_enabled = False
18 |         self.profiling_report_sec = 60
19 |         self.snapshot_interval_minutes = 5
20 |         self.snapshot_compress_state = True
21 |         self.drain_extra_delimiters: Collection[str] = []
22 |         self.drain_sim_th = 0.4
23 |         self.drain_depth = 4
24 |         self.drain_max_children = 100
25 |         self.drain_max_clusters: Optional[int] = None
26 |         self.masking_instructions: Collection[AbstractMaskingInstruction] = []
27 |         self.mask_prefix = "<"
28 |         self.mask_suffix = ">"
29 |         self.parameter_extraction_cache_capacity = 3000
30 |         self.parametrize_numeric_tokens = True
31 | 
32 |     def load(self, config_filename: str) -> None:
33 |         parser = configparser.ConfigParser()
34 |         read_files = parser.read(config_filename)
35 |         if len(read_files) == 0:
36 |             logger.warning(f"config file not found: {config_filename}")
37 | 
38 |         section_profiling = 'PROFILING'
39 |         section_snapshot = 'SNAPSHOT'
40 |         section_drain = 'DRAIN'
41 |         section_masking = 'MASKING'
42 | 
43 |         self.engine = parser.get(section_drain, 'engine', fallback=self.engine)
44 | 
45 |         self.profiling_enabled = parser.getboolean(section_profiling, 'enabled',
46 |                                                    fallback=self.profiling_enabled)
47 |         self.profiling_report_sec = parser.getint(section_profiling, 'report_sec',
48 |                                                   fallback=self.profiling_report_sec)
49 | 
50 |         self.snapshot_interval_minutes = parser.getint(section_snapshot, 'snapshot_interval_minutes',
51 |                                                        fallback=self.snapshot_interval_minutes)
52 |         self.snapshot_compress_state = parser.getboolean(section_snapshot, 'compress_state',
53 |                                                          fallback=self.snapshot_compress_state)
54 | 
55 |         drain_extra_delimiters_str = parser.get(section_drain, 'extra_delimiters',
56 |                                                 fallback=str(self.drain_extra_delimiters))
57 |         self.drain_extra_delimiters = ast.literal_eval(drain_extra_delimiters_str)
58 | 
59 |         self.drain_sim_th = parser.getfloat(section_drain, 'sim_th',
60 |                                             fallback=self.drain_sim_th)
61 |         self.drain_depth = parser.getint(section_drain, 'depth',
62 |                                          fallback=self.drain_depth)
63 |         self.drain_max_children = parser.getint(section_drain, 'max_children',
64 |                                                 fallback=self.drain_max_children)
65 |         self.drain_max_clusters = parser.getint(section_drain, 'max_clusters',
66 |                                                 fallback=self.drain_max_clusters)
67 |         self.parametrize_numeric_tokens = parser.getboolean(section_drain, 'parametrize_numeric_tokens',
68 |                                                             fallback=self.parametrize_numeric_tokens)
69 | 
70 |         masking_instructions_str = parser.get(section_masking, 'masking',
71 |                                               fallback=str(self.masking_instructions))
72 |         self.mask_prefix = parser.get(section_masking, 'mask_prefix', fallback=self.mask_prefix)
73 |         self.mask_suffix = parser.get(section_masking, 'mask_suffix', fallback=self.mask_suffix)
74 |         self.parameter_extraction_cache_capacity = parser.getint(section_masking, 'parameter_extraction_cache_capacity',
75 |                                                                  fallback=self.parameter_extraction_cache_capacity)
76 | 
77 |         masking_instructions = []
78 |         masking_list = json.loads(masking_instructions_str)
79 |         for mi in masking_list:
80 |             instruction = MaskingInstruction(mi['regex_pattern'], mi['mask_with'])
81 |             masking_instructions.append(instruction)
82 |         self.masking_instructions = masking_instructions
83 | 


--------------------------------------------------------------------------------
/drain3/simple_profiler.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: Apache-2.0
  2 | # Based on https://github.com/davidohana/SimpleProfiler/blob/main/python/simple_profiler.py
  3 | 
  4 | import os
  5 | import time
  6 | 
  7 | from abc import ABC, abstractmethod
  8 | from typing import Any, Callable, MutableMapping, Union
  9 | 
 10 | 
 11 | class Profiler(ABC):
 12 | 
 13 |     @abstractmethod
 14 |     def start_section(self, section_name: str) -> None:
 15 |         pass
 16 | 
 17 |     @abstractmethod
 18 |     def end_section(self, section_name: str = "") -> None:
 19 |         pass
 20 | 
 21 |     @abstractmethod
 22 |     def report(self, period_sec: int = 30) -> None:
 23 |         pass
 24 | 
 25 | 
 26 | class NullProfiler(Profiler):
 27 |     """A no-op profiler. Use it instead of SimpleProfiler in case you want to disable profiling."""
 28 | 
 29 |     def start_section(self, section_name: str) -> None:
 30 |         pass
 31 | 
 32 |     def end_section(self, section_name: str = "") -> None:
 33 |         pass
 34 | 
 35 |     def report(self, period_sec: int = 30) -> None:
 36 |         pass
 37 | 
 38 | 
 39 | class SimpleProfiler(Profiler):
 40 |     def __init__(self,
 41 |                  reset_after_sample_count: int = 0,
 42 |                  enclosing_section_name: str = "total",
 43 |                  printer: Callable[[str], Any] = print,
 44 |                  report_sec: int = 30):
 45 |         self.printer = printer
 46 |         self.enclosing_section_name = enclosing_section_name
 47 |         self.reset_after_sample_count = reset_after_sample_count
 48 |         self.report_sec = report_sec
 49 | 
 50 |         self.section_to_stats: MutableMapping[str, ProfiledSectionStats] = {}
 51 |         self.last_report_timestamp_sec = time.time()
 52 |         self.last_started_section_name = ""
 53 | 
 54 |     def start_section(self, section_name: str) -> None:
 55 |         """Start measuring a section"""
 56 | 
 57 |         if not section_name:
 58 |             raise ValueError("Section name is empty")
 59 |         self.last_started_section_name = section_name
 60 | 
 61 |         section = self.section_to_stats.get(section_name, None)
 62 |         if section is None:
 63 |             section = ProfiledSectionStats(section_name)
 64 |             self.section_to_stats[section_name] = section
 65 | 
 66 |         if section.start_time_sec != 0:
 67 |             raise ValueError(f"Section {section_name} is already started")
 68 | 
 69 |         section.start_time_sec = time.time()
 70 | 
 71 |     def end_section(self, name: str = "") -> None:
 72 |         """End measuring a section. Leave section name empty to end the last started section."""
 73 | 
 74 |         now = time.time()
 75 | 
 76 |         section_name = name
 77 |         if not name:
 78 |             section_name = self.last_started_section_name
 79 | 
 80 |         if not section_name:
 81 |             raise ValueError("Neither section name is specified nor a section is started")
 82 | 
 83 |         if section_name not in self.section_to_stats:
 84 |             raise ValueError(f"Section {section_name} does not exist")
 85 |         section = self.section_to_stats[section_name]
 86 | 
 87 |         if section.start_time_sec == 0:
 88 |             raise ValueError(f"Section {section_name} was not started")
 89 | 
 90 |         took_sec = now - section.start_time_sec
 91 |         if 0 < self.reset_after_sample_count == section.sample_count:
 92 |             section.sample_count_batch = 0
 93 |             section.total_time_sec_batch = 0
 94 | 
 95 |         section.sample_count += 1
 96 |         section.total_time_sec += took_sec
 97 |         section.sample_count_batch += 1
 98 |         section.total_time_sec_batch += took_sec
 99 |         section.start_time_sec = 0
100 | 
101 |     def report(self, period_sec: int = 30) -> None:
102 |         """Print results using [printer] function. By default prints to stdout."""
103 |         if time.time() - self.last_report_timestamp_sec < period_sec:
104 |             return
105 | 
106 |         enclosing_time_sec: Union[int, float] = 0
107 |         if self.enclosing_section_name:
108 |             if self.enclosing_section_name in self.section_to_stats:
109 |                 enclosing_time_sec = self.section_to_stats[self.enclosing_section_name].total_time_sec
110 | 
111 |         include_batch_rates = self.reset_after_sample_count > 0
112 | 
113 |         sections = self.section_to_stats.values()
114 |         sorted_sections = sorted(sections, key=lambda it: it.total_time_sec, reverse=True)
115 |         lines = map(lambda it: it.to_string(enclosing_time_sec, include_batch_rates), sorted_sections)
116 |         text = os.linesep.join(lines)
117 |         self.printer(text)
118 | 
119 |         self.last_report_timestamp_sec = time.time()
120 | 
121 | 
122 | class ProfiledSectionStats:
123 |     def __init__(self, section_name: str, start_time_sec: Union[int, float] = 0, sample_count: int = 0,
124 |                  total_time_sec: Union[int, float] = 0, sample_count_batch: int = 0,
125 |                  total_time_sec_batch: Union[int, float] = 0) -> None:
126 |         self.section_name = section_name
127 |         self.start_time_sec = start_time_sec
128 |         self.sample_count = sample_count
129 |         self.total_time_sec = total_time_sec
130 |         self.sample_count_batch = sample_count_batch
131 |         self.total_time_sec_batch = total_time_sec_batch
132 | 
133 |     def to_string(self, enclosing_time_sec: Union[int, float], include_batch_rates: bool) -> str:
134 |         took_sec_text = f"{self.total_time_sec:>8.2f} s"
135 |         if enclosing_time_sec > 0:
136 |             took_sec_text += f" ({100 * self.total_time_sec / enclosing_time_sec:>6.2f}%)"
137 | 
138 |         ms_per_k_samples = f"{1000000 * self.total_time_sec / self.sample_count: 7.2f}"
139 | 
140 |         if self.total_time_sec > 0:
141 |             samples_per_sec = f"{self.sample_count / self.total_time_sec: 15,.2f}"
142 |         else:
143 |             samples_per_sec = "N/A"
144 | 
145 |         if include_batch_rates:
146 |             ms_per_k_samples += f" ({1000000 * self.total_time_sec_batch / self.sample_count_batch: 7.2f})"
147 |             if self.total_time_sec_batch > 0:
148 |                 samples_per_sec += f" ({self.sample_count_batch / self.total_time_sec_batch: 15,.2f})"
149 |             else:
150 |                 samples_per_sec += " (N/A)"
151 | 
152 |         return f"{self.section_name: <15}: took {took_sec_text}, " \
153 |                f"{self.sample_count: >10,} samples, " \
154 |                f"{ms_per_k_samples} ms / 1000 samples, " \
155 |                f"{samples_per_sec} hz"
156 | 


--------------------------------------------------------------------------------
/drain3/jaccard_drain.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: MIT
  2 | # This file implements the Drain algorithm for log parsing.
  3 | # Based on https://github.com/logpai/logparser/blob/master/logparser/Drain/Drain.py by LogPAI team
  4 | 
  5 | from typing import Optional, Sequence, Tuple
  6 | 
  7 | from drain3.drain import DrainBase, LogCluster, Node
  8 | 
  9 | 
 10 | class JaccardDrain(DrainBase):
 11 |     """
 12 |     add a new matching pattern to the log cluster.
 13 |     Cancels log message length as  first token.
 14 |     Drain that uses Jaccard similarity to match log messages.
 15 |     """
 16 | 
 17 |     def tree_search(self,
 18 |                     root_node: Node,
 19 |                     tokens: Sequence[str],
 20 |                     sim_th: float,
 21 |                     include_params: bool) -> Optional[LogCluster]:
 22 |         # at first level, children are grouped by token (The first word in tokens)
 23 |         token_count = len(tokens)
 24 |         # cur_node = root_node.key_to_child_node.get(str(token_count))
 25 | 
 26 |         if not tokens:
 27 |             token_first = ""
 28 |             cur_node = root_node.key_to_child_node.get(token_first)
 29 |         else:
 30 |             token_first = tokens[0]
 31 |             cur_node = root_node.key_to_child_node.get(token_first)
 32 | 
 33 |         # no template with same token count yet
 34 |         if cur_node is None:
 35 |             return None
 36 | 
 37 |         # handle case of empty log string - return the single cluster in that group
 38 |         if token_count == 0:
 39 |             return self.id_to_cluster.get(cur_node.cluster_ids[0])
 40 | 
 41 |         # find the leaf node for this log - a path of nodes matching the first N tokens (N=tree depth)
 42 |         cur_node_depth = 1  # first level is 1 <root>
 43 | 
 44 |         for token in tokens[1:]:
 45 |             # at max depth
 46 |             if cur_node_depth >= self.max_node_depth:
 47 |                 break
 48 | 
 49 |             # this is last token
 50 |             # It starts with the second word, so the sentence length -1
 51 |             if cur_node_depth == token_count - 1:
 52 |                 break
 53 | 
 54 |             key_to_child_node = cur_node.key_to_child_node
 55 |             cur_node = key_to_child_node.get(token)
 56 | 
 57 |             if cur_node is None:  # no exact next token exist, try wildcard node
 58 |                 cur_node = key_to_child_node.get(self.param_str)
 59 |             if cur_node is None:  # no wildcard node exist
 60 |                 return None
 61 | 
 62 |             cur_node_depth += 1
 63 | 
 64 |         # get best match among all clusters with same prefix, or None if no match is above sim_th
 65 |         cluster = self.fast_match(cur_node.cluster_ids, tokens, sim_th, include_params)
 66 | 
 67 |         return cluster
 68 | 
 69 |     def add_seq_to_prefix_tree(self, root_node: Node, cluster: LogCluster) -> None:
 70 |         token_count = len(cluster.log_template_tokens)
 71 |         # Determine if the string is empty
 72 |         if not cluster.log_template_tokens:
 73 |             token_first = ""
 74 |         else:
 75 |             token_first = cluster.log_template_tokens[0]
 76 |         if token_first not in root_node.key_to_child_node:
 77 |             first_layer_node = Node()
 78 |             root_node.key_to_child_node[token_first] = first_layer_node
 79 |         else:
 80 |             first_layer_node = root_node.key_to_child_node[token_first]
 81 | 
 82 |         cur_node = first_layer_node
 83 | 
 84 |         # handle case of empty log string
 85 |         if token_count == 0:
 86 |             cur_node.cluster_ids = [cluster.cluster_id]
 87 |             return
 88 | 
 89 |         # test_add_shorter_than_depth_message : only one word add into current node
 90 |         if token_count == 1:
 91 |             # clean up stale clusters before adding a new one.
 92 |             new_cluster_ids = []
 93 |             for cluster_id in cur_node.cluster_ids:
 94 |                 if cluster_id in self.id_to_cluster:
 95 |                     new_cluster_ids.append(cluster_id)
 96 |             new_cluster_ids.append(cluster.cluster_id)
 97 |             cur_node.cluster_ids = new_cluster_ids
 98 | 
 99 |         current_depth = 1
100 |         for token in cluster.log_template_tokens[1:]:
101 |             # if at max depth or this is last token in template - add current log cluster to the leaf node
102 |             # It starts with the second word, so the sentence length -1
103 |             if current_depth >= self.max_node_depth or current_depth >= token_count - 1:
104 |                 # clean up stale clusters before adding a new one.
105 |                 new_cluster_ids = []
106 |                 for cluster_id in cur_node.cluster_ids:
107 |                     if cluster_id in self.id_to_cluster:
108 |                         new_cluster_ids.append(cluster_id)
109 |                 new_cluster_ids.append(cluster.cluster_id)
110 |                 cur_node.cluster_ids = new_cluster_ids
111 |                 break
112 | 
113 |             # if token not matched in this layer of existing tree.
114 |             if token not in cur_node.key_to_child_node:
115 |                 if self.parametrize_numeric_tokens and self.has_numbers(token):
116 |                     if self.param_str not in cur_node.key_to_child_node:
117 |                         new_node = Node()
118 |                         cur_node.key_to_child_node[self.param_str] = new_node
119 |                         cur_node = new_node
120 |                     else:
121 |                         cur_node = cur_node.key_to_child_node[self.param_str]
122 | 
123 |                 else:
124 |                     if self.param_str in cur_node.key_to_child_node:
125 |                         if len(cur_node.key_to_child_node) < self.max_children:
126 |                             new_node = Node()
127 |                             cur_node.key_to_child_node[token] = new_node
128 |                             cur_node = new_node
129 |                         else:
130 |                             cur_node = cur_node.key_to_child_node[self.param_str]
131 |                     else:
132 |                         if len(cur_node.key_to_child_node) + 1 < self.max_children:
133 |                             new_node = Node()
134 |                             cur_node.key_to_child_node[token] = new_node
135 |                             cur_node = new_node
136 |                         elif len(cur_node.key_to_child_node) + 1 == self.max_children:
137 |                             new_node = Node()
138 |                             cur_node.key_to_child_node[self.param_str] = new_node
139 |                             cur_node = new_node
140 |                         else:
141 |                             cur_node = cur_node.key_to_child_node[self.param_str]
142 | 
143 |             # if the token is matched
144 |             else:
145 |                 cur_node = cur_node.key_to_child_node[token]
146 | 
147 |             current_depth += 1
148 | 
149 |     # seq1 is a template, seq2 is the log to match
150 |     def get_seq_distance(self, seq1: Sequence[str], seq2: Sequence[str], include_params: bool) -> Tuple[float, int]:
151 |         # Jaccard index, It is used to measure the similarity of two sets.
152 |         # The closer its value is to 1, the more common members the two sets have, and the higher the similarity.
153 | 
154 |         # sequences are empty - full match
155 |         if len(seq1) == 0:
156 |             return 1.0, 0
157 | 
158 |         param_count = 0
159 | 
160 |         for token1 in seq1:
161 |             if token1 == self.param_str:
162 |                 param_count += 1
163 | 
164 |         # If the token and the data have the same length, and there are param_str in the token
165 |         if len(seq1) == len(seq2) and param_count > 0:
166 |             # seq2 removes the param_str position
167 |             seq2 = [x for i, x in enumerate(seq2) if seq1[i] != self.param_str]
168 | 
169 |         # If there are param_str, they are removed from the coefficient calculation
170 |         if include_params:
171 |             seq1 = [x for x in seq1 if x != self.param_str]
172 | 
173 |         # Calculate the Jaccard coefficient
174 |         ret_val = len(set(seq1) & set(seq2)) / len(set(seq1) | set(seq2))
175 | 
176 |         # Jaccard coefficient calculated under the same conditions has a low simSep value
177 |         # So gain is applied to the calculated value (The test case test_add_log_message_sim_75)
178 |         ret_val = ret_val * 1.3 if ret_val * 1.3 < 1 else 1
179 | 
180 |         return ret_val, param_count
181 | 
182 |     # seq1:tonkens->list seq2:template->tuple
183 |     def create_template(self, seq1: Sequence[str], seq2: Sequence[str]) -> Sequence[str]:
184 | 
185 |         inter_set = set(seq1) & set(seq2)
186 | 
187 |         # test_max_clusters_lru_multiple_leaf_nodes
188 |         # Update param_str at different positions with the same length
189 |         if len(seq1) == len(seq2):
190 |             ret_val = list(seq2)
191 |             for i, (token1, token2) in enumerate(zip(seq1, seq2)):
192 |                 if token1 != token2:
193 |                     ret_val[i] = self.param_str
194 |         # param_str is updated at the new position with different length
195 |         else:
196 |             # Take the template with long length
197 |             ret_val = list(seq1) if len(seq1) > len(seq2) else list(seq2)
198 |             for i, token in enumerate(ret_val):
199 |                 if token not in inter_set:
200 |                     ret_val[i] = self.param_str
201 | 
202 |         return ret_val
203 | 
204 |     def match(self, content: str, full_search_strategy: str = "never") -> Optional[LogCluster]:
205 | 
206 |         assert full_search_strategy in ["always", "never", "fallback"]
207 | 
208 |         # Because the template length and data are not equal in length, Jaccard distance required_sim_th != 1
209 |         required_sim_th = 0.8
210 |         content_tokens = self.get_content_as_tokens(content)
211 | 
212 |         def full_search() -> Optional[LogCluster]:
213 |             all_ids = self.get_clusters_ids_for_seq_len(content_tokens[0])
214 |             cluster = self.fast_match(all_ids, content_tokens, required_sim_th, include_params=True)
215 |             return cluster
216 | 
217 |         if full_search_strategy == "always":
218 |             return full_search()
219 | 
220 |         match_cluster = self.tree_search(self.root_node, content_tokens, required_sim_th, include_params=True)
221 |         if match_cluster is not None:
222 |             return match_cluster
223 | 
224 |         if full_search_strategy == "never":
225 |             return None
226 | 
227 |         return full_search()
228 | 
229 | 


--------------------------------------------------------------------------------
/tests/test_drain.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: MIT
  2 | 
  3 | import unittest
  4 | 
  5 | from drain3.drain import Drain, LogCluster
  6 | 
  7 | 
  8 | class DrainTest(unittest.TestCase):
  9 | 
 10 |     def test_add_shorter_than_depth_message(self):
 11 |         model = Drain(depth=4)
 12 |         res = model.add_log_message("hello")
 13 |         print(res[1])
 14 |         print(res[0])
 15 |         self.assertEqual(res[1], "cluster_created")
 16 | 
 17 |         res = model.add_log_message("hello")
 18 |         print(res[1])
 19 |         print(res[0])
 20 |         self.assertEqual(res[1], "none")
 21 | 
 22 |         res = model.add_log_message("otherword")
 23 |         print(res[1])
 24 |         print(res[0])
 25 |         self.assertEqual(res[1], "cluster_created")
 26 | 
 27 |         self.assertEqual(2, len(model.id_to_cluster))
 28 | 
 29 |     def test_add_log_message(self):
 30 |         model = Drain()
 31 |         entries = str.splitlines(
 32 |             """
 33 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 34 |             Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
 35 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 36 |             Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
 37 |             Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
 38 |             Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]
 39 |             """
 40 |         )
 41 |         expected = str.splitlines(
 42 |             """
 43 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 44 |             Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]
 45 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 46 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 47 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 48 |             Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]
 49 |             """
 50 |         )
 51 |         actual = []
 52 | 
 53 |         for entry in entries:
 54 |             cluster, change_type = model.add_log_message(entry)
 55 |             actual.append(cluster.get_template())
 56 | 
 57 |         self.assertListEqual(list(map(str.strip, expected)), actual)
 58 |         self.assertEqual(8, model.get_total_cluster_size())
 59 | 
 60 |     def test_add_log_message_sim_75(self):
 61 |         """When `sim_th` is set to 75% then only certain log entries match.
 62 | 
 63 |         In this test similarity threshold is set to 75% which makes the model
 64 |         less aggressive in grouping entries into clusters. In particular, it
 65 |         only finds clusters for "Failed password" entries.
 66 |         """
 67 |         model = Drain(
 68 |             depth=4,
 69 |             sim_th=0.75,
 70 |             max_children=100,
 71 |         )
 72 |         entries = str.splitlines(
 73 |             """
 74 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 75 |             Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
 76 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 77 |             Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
 78 |             Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
 79 |             Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]
 80 |             """
 81 |         )
 82 |         expected = str.splitlines(
 83 |             """
 84 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 85 |             Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
 86 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 87 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 88 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 89 |             Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]
 90 |             """
 91 |         )
 92 |         actual = []
 93 | 
 94 |         for entry in entries:
 95 |             cluster, change_type = model.add_log_message(entry)
 96 |             actual.append(cluster.get_template())
 97 | 
 98 |         self.assertListEqual(list(map(str.strip, expected)), actual)
 99 |         self.assertEqual(8, model.get_total_cluster_size())
100 | 
101 |     def test_max_clusters(self):
102 |         """Verify model respects the max_clusters option.
103 |         
104 |         Key difference between this and other tests is that with `max_clusters`
105 |         set to 1 model is capable of keeping track of a single cluster at a
106 |         time. Consequently, when log stream switched form the A format to the B
107 |         and back model doesn't recognize it and returnes a new template with no
108 |         slots.
109 |         """
110 |         model = Drain(max_clusters=1)
111 |         entries = str.splitlines(
112 |             """
113 |             A format 1
114 |             A format 2
115 |             B format 1
116 |             B format 2
117 |             A format 3
118 |             """
119 |         )
120 |         expected = str.splitlines(
121 |             """
122 |             A format 1
123 |             A format <*>
124 |             B format 1
125 |             B format <*>
126 |             A format 3
127 |             """
128 |         )
129 |         actual = []
130 | 
131 |         for entry in entries:
132 |             cluster, change_type = model.add_log_message(entry)
133 |             actual.append(cluster.get_template())
134 | 
135 |         self.assertListEqual(list(map(str.strip, expected)), actual)
136 |         self.assertEqual(1, model.get_total_cluster_size())
137 | 
138 |     def test_max_clusters_lru_multiple_leaf_nodes(self):
139 |         """When all templates end up in different nodes and the max number of
140 |         clusters is reached, then clusters are removed according to the lru
141 |         policy.
142 |         """
143 |         model = Drain(max_clusters=2, depth=4, param_str="*")  # sim_th=0.75
144 |         entries = [
145 |             "A A A",
146 |             "A A B",
147 |             "B A A",
148 |             "B A B",
149 |             "C A A",
150 |             "C A B",
151 |             "B A A",
152 |             "A A A",
153 |         ]
154 |         expected = [
155 |             # lru: []
156 |             "A A A",
157 |             # lru: ["A A A"]
158 |             "A A *",
159 |             # lru: ["A A *"]
160 |             "B A A",
161 |             # lru: ["B A A", "A A *"]
162 |             "B A *",
163 |             # lru: ["B A *", "A A *"]
164 |             "C A A",
165 |             # lru: ["C A A", "B A *"]
166 |             "C A *",
167 |             # lru: ["C A *", "B A *"]
168 |             "B A *",
169 |             # Message "B A A" was normalized because the template "B A *" is
170 |             # still present in the cache.
171 |             # lru: ["B A *", "C A *"]
172 |             "A A A",
173 |             # Message "A A A" was not normalized because the template "C A A"
174 |             # pushed out the template "A A *" from the cache.
175 |             # lru: ["A A A", "C A *"]
176 |         ]
177 |         actual = []
178 | 
179 |         for entry in entries:
180 |             cluster, _ = model.add_log_message(entry)
181 |             actual.append(cluster.get_template())
182 | 
183 |         self.assertListEqual(list(map(str.strip, expected)), actual)
184 |         self.assertEqual(4, model.get_total_cluster_size())
185 | 
186 |     def test_max_clusters_lru_single_leaf_node(self):
187 |         """When all templates end up in the same leaf node and the max number of
188 |         clusters is reached, then clusters are removed according to the lru
189 |         policy.
190 |         """
191 |         model = Drain(max_clusters=2, depth=4, param_str="*")
192 |         entries = [
193 |             "A A A",
194 |             "A A B",
195 |             "A B A",
196 |             "A B B",
197 |             "A C A",
198 |             "A C B",
199 |             "A B A",
200 |             "A A A",
201 |         ]
202 |         expected = [
203 |             # lru: []
204 |             "A A A",
205 |             # lru: ["A A A"]
206 |             "A A *",
207 |             # lru: ["A A *"]
208 |             "A B A",
209 |             # lru: ["B A A", "A A *"]
210 |             "A B *",
211 |             # lru: ["B A *", "A A *"]
212 |             "A C A",
213 |             # lru: ["C A A", "B A *"]
214 |             "A C *",
215 |             # lru: ["C A *", "B A *"]
216 |             "A B *",
217 |             # Message "B A A" was normalized because the template "B A *" is
218 |             # still present in the cache.
219 |             # lru: ["B A *", "C A *"]
220 |             "A A A",
221 |             # Message "A A A" was not normalized because the template "C A A"
222 |             # pushed out the template "A A *" from the cache.
223 |             # lru: ["A A A", "C A *"]
224 |         ]
225 |         actual = []
226 | 
227 |         for entry in entries:
228 |             cluster, _ = model.add_log_message(entry)
229 |             actual.append(cluster.get_template())
230 | 
231 |         self.assertListEqual(list(map(str.strip, expected)), actual)
232 |         # self.assertEqual(5, model.get_total_cluster_size())
233 | 
234 |     def test_match_only(self):
235 |         model = Drain()
236 |         res = model.add_log_message("aa aa aa")
237 |         print(res[0])
238 | 
239 |         res = model.add_log_message("aa aa bb")
240 |         print(res[0])
241 | 
242 |         res = model.add_log_message("aa aa cc")
243 |         print(res[0])
244 | 
245 |         res = model.add_log_message("xx yy zz")
246 |         print(res[0])
247 | 
248 |         c: LogCluster = model.match("aa aa tt")
249 |         self.assertEqual(1, c.cluster_id)
250 | 
251 |         c: LogCluster = model.match("xx yy zz")
252 |         self.assertEqual(2, c.cluster_id)
253 | 
254 |         c: LogCluster = model.match("xx yy rr")
255 |         self.assertIsNone(c)
256 | 
257 |         c: LogCluster = model.match("nothing")
258 |         self.assertIsNone(c)
259 | 
260 |     def test_create_template(self):
261 |         model = Drain(param_str="*")
262 | 
263 |         seq1 = ["aa", "bb", "dd"]
264 |         seq2 = ["aa", "bb", "cc"]
265 | 
266 |         # test for proper functionality
267 |         template = model.create_template(seq1, seq2)
268 |         self.assertListEqual(["aa", "bb", "*"], template)
269 | 
270 |         template = model.create_template(seq1, seq1)
271 |         self.assertListEqual(seq1, template)
272 | 
273 |         # Test for equal lengths input vectors
274 |         self.assertRaises(AssertionError, model.create_template, seq1, ["aa"])


--------------------------------------------------------------------------------
/tests/test_jaccard_drain.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: MIT
  2 | 
  3 | import unittest
  4 | 
  5 | from drain3.drain import LogCluster
  6 | from drain3.jaccard_drain import JaccardDrain
  7 | 
  8 | 
  9 | class DrainTest(unittest.TestCase):
 10 | 
 11 |     def test_add_shorter_than_depth_message(self):
 12 |         model = JaccardDrain(depth=4)
 13 |         res = model.add_log_message("hello")
 14 |         print(res[1])
 15 |         print(res[0])
 16 |         self.assertEqual(res[1], "cluster_created")
 17 | 
 18 |         res = model.add_log_message("hello")
 19 |         print(res[1])
 20 |         print(res[0])
 21 |         self.assertEqual(res[1], "none")
 22 | 
 23 |         res = model.add_log_message("otherword")
 24 |         print(res[1])
 25 |         print(res[0])
 26 |         self.assertEqual(res[1], "cluster_created")
 27 | 
 28 |         self.assertEqual(2, len(model.id_to_cluster))
 29 | 
 30 |     def test_add_log_message(self):
 31 |         model = JaccardDrain()
 32 |         entries = str.splitlines(
 33 |             """
 34 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 35 |             Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
 36 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 37 |             Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
 38 |             Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
 39 |             Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]
 40 |             """
 41 |         )
 42 |         expected = str.splitlines(
 43 |             """
 44 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 45 |             Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]
 46 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 47 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 48 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 49 |             Dec 10 <*> LabSZ <*> input_userauth_request: invalid user <*> [preauth]
 50 |             """
 51 |         )
 52 |         actual = []
 53 | 
 54 |         for entry in entries:
 55 |             cluster, change_type = model.add_log_message(entry)
 56 |             actual.append(cluster.get_template())
 57 | 
 58 |         self.assertListEqual(list(map(str.strip, expected)), actual)
 59 |         self.assertEqual(8, model.get_total_cluster_size())
 60 | 
 61 |     def test_add_log_message_sim_75(self):
 62 |         """When `sim_th` is set to 75% then only certain log entries match.
 63 | 
 64 |         In this test similarity threshold is set to 75% which makes the model
 65 |         less aggressive in grouping entries into clusters. In particular, it
 66 |         only finds clusters for "Failed password" entries.
 67 |         """
 68 |         model = JaccardDrain(
 69 |             depth=4,
 70 |             sim_th=0.75,
 71 |             max_children=100,
 72 |         )
 73 |         entries = str.splitlines(
 74 |             """
 75 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 76 |             Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
 77 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 78 |             Dec 10 09:12:35 LabSZ sshd[24492]: Failed password for invalid user pi from 0.0.0.0 port 49289 ssh2
 79 |             Dec 10 09:12:44 LabSZ sshd[24501]: Failed password for invalid user ftpuser from 0.0.0.0 port 60836 ssh2
 80 |             Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]
 81 |             """
 82 |         )
 83 |         expected = str.splitlines(
 84 |             """
 85 |             Dec 10 07:07:38 LabSZ sshd[24206]: input_userauth_request: invalid user test9 [preauth]
 86 |             Dec 10 07:08:28 LabSZ sshd[24208]: input_userauth_request: invalid user webmaster [preauth]
 87 |             Dec 10 09:12:32 LabSZ sshd[24490]: Failed password for invalid user ftpuser from 0.0.0.0 port 62891 ssh2
 88 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 89 |             Dec 10 <*> LabSZ <*> Failed password for invalid user <*> from 0.0.0.0 port <*> ssh2
 90 |             Dec 10 07:28:03 LabSZ sshd[24245]: input_userauth_request: invalid user pgadmin [preauth]
 91 |             """
 92 |         )
 93 |         actual = []
 94 | 
 95 |         for entry in entries:
 96 |             cluster, change_type = model.add_log_message(entry)
 97 |             actual.append(cluster.get_template())
 98 | 
 99 |         self.assertListEqual(list(map(str.strip, expected)), actual)
100 |         self.assertEqual(8, model.get_total_cluster_size())
101 | 
102 |     def test_max_clusters(self):
103 |         """Verify model respects the max_clusters option.
104 | 
105 |         Key difference between this and other tests is that with `max_clusters`
106 |         set to 1 model is capable of keeping track of a single cluster at a
107 |         time. Consequently, when log stream switched form the A format to the B
108 |         and back model doesn't recognize it and returnes a new template with no
109 |         slots.
110 |         """
111 |         model = JaccardDrain(max_clusters=1)
112 |         entries = str.splitlines(
113 |             """
114 |             A format 1
115 |             A format 2
116 |             B format 1
117 |             B format 2
118 |             A format 3
119 |             """
120 |         )
121 |         expected = str.splitlines(
122 |             """
123 |             A format 1
124 |             A format <*>
125 |             B format 1
126 |             B format <*>
127 |             A format 3
128 |             """
129 |         )
130 |         actual = []
131 | 
132 |         for entry in entries:
133 |             cluster, change_type = model.add_log_message(entry)
134 |             actual.append(cluster.get_template())
135 | 
136 |         self.assertListEqual(list(map(str.strip, expected)), actual)
137 |         self.assertEqual(1, model.get_total_cluster_size())
138 | 
139 |     def test_max_clusters_lru_multiple_leaf_nodes(self):
140 |         """When all templates end up in different nodes and the max number of
141 |         clusters is reached, then clusters are removed according to the lru
142 |         policy.
143 |         """
144 |         model = JaccardDrain(max_clusters=2, depth=4, param_str="*")  # sim_th=0.75
145 |         entries = [
146 |             "A A A",
147 |             "A A B",
148 |             "B A A",
149 |             "B A B",
150 |             "C A A",
151 |             "C A B",
152 |             "B A A",
153 |             "A A A",
154 |         ]
155 |         expected = [
156 |             # lru: []
157 |             "A A A",
158 |             # lru: ["A A A"]
159 |             "A A *",
160 |             # lru: ["A A *"]
161 |             "B A A",
162 |             # lru: ["B A A", "A A *"]
163 |             "B A *",
164 |             # lru: ["B A *", "A A *"]
165 |             "C A A",
166 |             # lru: ["C A A", "B A *"]
167 |             "C A *",
168 |             # lru: ["C A *", "B A *"]
169 |             "B A *",
170 |             # Message "B A A" was normalized because the template "B A *" is
171 |             # still present in the cache.
172 |             # lru: ["B A *", "C A *"]
173 |             "A A A",
174 |             # Message "A A A" was not normalized because the template "C A A"
175 |             # pushed out the template "A A *" from the cache.
176 |             # lru: ["A A A", "C A *"]
177 |         ]
178 |         actual = []
179 | 
180 |         for entry in entries:
181 |             cluster, _ = model.add_log_message(entry)
182 |             actual.append(cluster.get_template())
183 |             print(cluster.get_template())
184 | 
185 |         self.assertListEqual(list(map(str.strip, expected)), actual)
186 |         self.assertEqual(4, model.get_total_cluster_size())
187 | 
188 |     def test_max_clusters_lru_single_leaf_node(self):
189 |         """When all templates end up in the same leaf node and the max number of
190 |         clusters is reached, then clusters are removed according to the lru
191 |         policy.
192 |         """
193 |         model = JaccardDrain(max_clusters=2, depth=4, param_str="*")
194 |         entries = [
195 |             "A A A",
196 |             "A A B",
197 |             "A B A",
198 |             "A B B",
199 |             "A C A",
200 |             "A C B",
201 |             "A B A",
202 |             "A A A",
203 |         ]
204 |         expected = [
205 |             # lru: []
206 |             "A A A",
207 |             # lru: ["A A A"]
208 |             "A A *",
209 |             # lru: ["A A *"]
210 |             "A B A",
211 |             # lru: ["B A A", "A A *"]
212 |             "A B *",
213 |             # lru: ["B A *", "A A *"]
214 |             "A C A",
215 |             # lru: ["C A A", "B A *"]
216 |             "A C *",
217 |             # lru: ["C A *", "B A *"]
218 |             "A B *",
219 |             # Message "B A A" was normalized because the template "B A *" is
220 |             # still present in the cache.
221 |             # lru: ["B A *", "C A *"]
222 |             "A A A",
223 |             # Message "A A A" was not normalized because the template "C A A"
224 |             # pushed out the template "A A *" from the cache.
225 |             # lru: ["A A A", "C A *"]
226 |         ]
227 |         actual = []
228 | 
229 |         for entry in entries:
230 |             cluster, _ = model.add_log_message(entry)
231 |             actual.append(cluster.get_template())
232 | 
233 |         self.assertListEqual(list(map(str.strip, expected)), actual)
234 |         # self.assertEqual(5, model.get_total_cluster_size())
235 | 
236 |     def test_match_only(self):
237 |         model = JaccardDrain()
238 |         res = model.add_log_message("aa aa aa")
239 |         print(res[0])
240 | 
241 |         res = model.add_log_message("aa aa bb")
242 |         print(res[0])
243 | 
244 |         res = model.add_log_message("aa aa cc")
245 |         print(res[0])
246 | 
247 |         res = model.add_log_message("xx yy zz")
248 |         print(res[0])
249 | 
250 |         c: LogCluster = model.match("aa aa tt")
251 |         self.assertEqual(1, c.cluster_id)
252 | 
253 |         c: LogCluster = model.match("xx yy zz")
254 |         self.assertEqual(2, c.cluster_id)
255 | 
256 |         c: LogCluster = model.match("xx yy rr")
257 |         self.assertIsNone(c)
258 | 
259 |         c: LogCluster = model.match("nothing")
260 |         self.assertIsNone(c)
261 | 
262 |     def test_match_token_with_different_length(self):
263 |         model = JaccardDrain()
264 |         res = model.add_log_message("check pass; user unknown")
265 |         print(res[0])
266 | 
267 |         res = model.add_log_message("check pass; user Lisa")
268 |         print(res[0])
269 | 
270 |         res = model.add_log_message("check pass; user li Sa")
271 |         print(res[0])
272 | 
273 |         res = model.add_log_message("session opened for user cyrus by (uid=0)")
274 |         print(res[0])
275 | 
276 |         res = model.add_log_message("session closed for user cyrus")
277 |         print(res[0])
278 | 
279 |         c: LogCluster = model.match("check pass; user boris")
280 |         self.assertEqual(1, c.cluster_id)
281 | 
282 |         c: LogCluster = model.match("session opened for user cyrus by (uid=1)")
283 |         self.assertEqual(2, c.cluster_id)
284 | 
285 |         c: LogCluster = model.match("nothing")
286 |         self.assertIsNone(c)
287 | 
288 | 
289 | if __name__ == "__main__":
290 |     pass
291 | 


--------------------------------------------------------------------------------
/tests/test_template_miner.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: MIT
  2 | 
  3 | import io
  4 | import logging
  5 | import sys
  6 | import unittest
  7 | from os.path import dirname
  8 | 
  9 | from drain3 import TemplateMiner
 10 | from drain3.masking import MaskingInstruction
 11 | from drain3.memory_buffer_persistence import MemoryBufferPersistence
 12 | from drain3.template_miner_config import TemplateMinerConfig
 13 | 
 14 | 
 15 | class TemplateMinerTest(unittest.TestCase):
 16 |     logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
 17 | 
 18 |     def test_load_config(self):
 19 |         config = TemplateMinerConfig()
 20 |         config.load(f"{dirname(__file__)}/drain3_test.ini")
 21 |         self.assertEqual(1024, config.drain_max_clusters)
 22 |         self.assertListEqual(["_"], config.drain_extra_delimiters)
 23 |         self.assertEqual(7, len(config.masking_instructions))
 24 | 
 25 |     def test_save_load_snapshot_unlimited_clusters(self):
 26 |         self.save_load_snapshot(None)
 27 | 
 28 |     def test_save_load_snapshot_limited_clusters(self):
 29 |         self.save_load_snapshot(10)
 30 | 
 31 |     def save_load_snapshot(self, max_clusters):
 32 |         persistence = MemoryBufferPersistence()
 33 | 
 34 |         config = TemplateMinerConfig()
 35 |         config.drain_max_clusters = max_clusters
 36 |         template_miner1 = TemplateMiner(persistence, config)
 37 |         print(template_miner1.add_log_message("hello"))
 38 |         print(template_miner1.add_log_message("hello ABC"))
 39 |         print(template_miner1.add_log_message("hello BCD"))
 40 |         print(template_miner1.add_log_message("hello XYZ"))
 41 |         print(template_miner1.add_log_message("goodbye XYZ"))
 42 | 
 43 |         template_miner2 = TemplateMiner(persistence, config)
 44 | 
 45 |         self.assertListEqual(list(template_miner1.drain.id_to_cluster.keys()),
 46 |                              list(template_miner2.drain.id_to_cluster.keys()))
 47 | 
 48 |         self.assertListEqual(list(template_miner1.drain.root_node.key_to_child_node.keys()),
 49 |                              list(template_miner2.drain.root_node.key_to_child_node.keys()))
 50 | 
 51 |         def get_tree_lines(template_miner):
 52 |             sio = io.StringIO()
 53 |             template_miner.drain.print_tree(sio)
 54 |             sio.seek(0)
 55 |             return sio.readlines()
 56 | 
 57 |         self.assertListEqual(get_tree_lines(template_miner1),
 58 |                              get_tree_lines(template_miner2))
 59 | 
 60 |         print(template_miner2.add_log_message("hello yyy"))
 61 |         print(template_miner2.add_log_message("goodbye ABC"))
 62 | 
 63 |     def test_extract_parameters(self):
 64 |         config = TemplateMinerConfig()
 65 |         mi = MaskingInstruction("((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "NUM")
 66 |         config.masking_instructions.append(mi)
 67 |         mi = MaskingInstruction(r"multiple words", "WORDS")
 68 |         config.masking_instructions.append(mi)
 69 |         config.mask_prefix = "[:"
 70 |         config.mask_suffix = ":]"
 71 |         template_miner = TemplateMiner(None, config)
 72 | 
 73 |         def add_and_test(msg, expected_params, exact_matching=False):
 74 |             print(f"msg: {msg}")
 75 |             res = template_miner.add_log_message(msg)
 76 |             print(f"result: {res}")
 77 |             extracted_parameters = template_miner.extract_parameters(
 78 |                 res["template_mined"], msg, exact_matching=exact_matching)
 79 |             self.assertIsNotNone(extracted_parameters)
 80 |             params = [parameter.value for parameter in extracted_parameters]
 81 |             print(f"params: {params}")
 82 |             self.assertListEqual(params, expected_params)
 83 | 
 84 |         add_and_test("hello", [])
 85 |         add_and_test("hello ABC", [])
 86 |         add_and_test("hello BCD", ["BCD"])
 87 |         add_and_test("hello    BCD", ["BCD"])
 88 |         add_and_test("hello\tBCD", ["BCD"])
 89 |         add_and_test("request took 123 ms", ["123"])
 90 |         add_and_test("file saved [test.xml]", [])
 91 |         add_and_test("new order received: [:xyz:]", [])
 92 |         add_and_test("order type: new, order priority:3", ["3"])
 93 |         add_and_test("order type: changed, order priority:5", ["changed,", "5"])
 94 |         add_and_test("sometimes one needs multiple words", ["multiple words"], True)
 95 |         add_and_test("sometimes one needs not", ["not"], True)
 96 |         add_and_test("sometimes one needs multiple words", ["multiple words"], True)
 97 | 
 98 |     def test_extract_parameters_direct(self):
 99 |         config = TemplateMinerConfig()
100 |         mi = MaskingInstruction(r"hdfs://[\w.:@-]*((/[\w.~%+-]+)+/?)?", "hdfs_uri")
101 |         config.masking_instructions.append(mi)
102 |         mi = MaskingInstruction(r"(?P<quote>[\"'`]).*?(?P=quote)", "quoted_string")
103 |         config.masking_instructions.append(mi)
104 |         mi = MaskingInstruction(r"((?P<p_0>[*_])\2{0,2}).*?\1", "markdown_emph")
105 |         config.masking_instructions.append(mi)
106 |         mi = MaskingInstruction(r"multiple \*word\* pattern", "*words*")
107 |         config.masking_instructions.append(mi)
108 |         mi = MaskingInstruction(r"some \S+ \S+ pattern", "*words*")
109 |         config.masking_instructions.append(mi)
110 |         mi = MaskingInstruction(r"(\d{1,3}\.){3}\d{1,3}", "ip")
111 |         config.masking_instructions.append(mi)
112 |         mi = MaskingInstruction(r"(?P<number>\d+)\.\d+", "float")
113 |         config.masking_instructions.append(mi)
114 |         mi = MaskingInstruction(r"0[xX][a-fA-F0-9]+", "integer")
115 |         config.masking_instructions.append(mi)
116 |         mi = MaskingInstruction(r"(?P<number>\d+)", "integer")
117 |         config.masking_instructions.append(mi)
118 |         mi = MaskingInstruction(r"HelloWorld", "*")
119 |         config.masking_instructions.append(mi)
120 |         mi = MaskingInstruction(r"MaskPrefix", "<")
121 |         config.masking_instructions.append(mi)
122 |         template_miner = TemplateMiner(None, config)
123 | 
124 |         test_vectors = [
125 |             (
126 |                 "<hdfs_uri>:<integer>+<integer>",
127 |                 "hdfs://msra-sa-41:9000/pageinput2.txt:671088640+134217728",
128 |                 ["hdfs://msra-sa-41:9000/pageinput2.txt", "671088640", "134217728"],
129 |                 ["hdfs_uri", "integer", "integer"]
130 |             ),
131 |             (
132 |                 "Hello <quoted_string>",
133 |                 "Hello 'World'",
134 |                 ["'World'"],
135 |                 ["quoted_string"]
136 |             ),
137 |             (
138 |                 "<quoted_string><quoted_string>",
139 |                 """'This "should"'`do no breakin'`""",
140 |                 ["""'This "should"'""", "`do no breakin'`"],
141 |                 ["quoted_string", "quoted_string"]
142 |             ),
143 |             (
144 |                 "This is <markdown_emph> <markdown_emph>!.",
145 |                 "This is ___very___ *important*!.",
146 |                 ["___very___", "*important*"],
147 |                 ["markdown_emph", "markdown_emph"]
148 |             ),
149 |             (
150 |                 "<float>.<*>",
151 |                 "0.15.Test",
152 |                 ["0.15", "Test"],
153 |                 ["float", "*"]
154 |             ),
155 |             (
156 |                 "<ip>:<integer>",
157 |                 "192.0.0.1:5000",
158 |                 ["192.0.0.1", "5000"],
159 |                 ["ip", "integer"]
160 |             ),
161 |             (
162 |                 "<ip>:<integer>:<integer>",
163 |                 "192.0.0.1:5000:123",
164 |                 ["192.0.0.1", "5000", "123"],
165 |                 ["ip", "integer", "integer"]
166 |             ),
167 |             (
168 |                 "<float>.<*>.<float>",
169 |                 "0.15.Test.0.2",
170 |                 ["0.15", "Test", "0.2"],
171 |                 ["float", "*", "float"]
172 |             ),
173 |             (
174 |                 "<float> <float>",
175 |                 "0.15 10.16",
176 |                 ["0.15", "10.16"],
177 |                 ["float", "float"]
178 |             ),
179 |             (
180 |                 "<*words*>@<integer>",
181 |                 "some other cool pattern@0xe1f",
182 |                 ["some other cool pattern", "0xe1f"],
183 |                 ["*words*", "integer"]
184 |             ),
185 |             (
186 |                 "Another test with <*words*> that includes <integer><integer> and <integer> <*> <integer>",
187 |                 "Another test with some other 0Xadded pattern that includes 500xc0ffee and 0X4 times 5",
188 |                 ["some other 0Xadded pattern", "50", "0xc0ffee", "0X4", "times", "5"],
189 |                 ["*words*", "integer", "integer", "integer", "*", "integer"]
190 |             ),
191 |             (
192 |                 "some <*words*> <*words*>",
193 |                 "some multiple *word* pattern some confusing *word* pattern",
194 |                 ["multiple *word* pattern", "some confusing *word* pattern"],
195 |                 ["*words*", "*words*"]
196 |             ),
197 |             (
198 |                 "<*words*> <*>",
199 |                 "multiple *word* pattern <*words*>",
200 |                 ["multiple *word* pattern", "<*words*>"],
201 |                 ["*words*", "*"]
202 |             ),
203 |             (
204 |                 "<*> <*>",
205 |                 "HelloWorld Test",
206 |                 ["HelloWorld", "Test"],
207 |                 ["*", "*"]
208 |             ),
209 |             (
210 |                 "<*> <*>",
211 |                 "HelloWorld <anything>",
212 |                 ["HelloWorld", "<anything>"],
213 |                 ["*", "*"]
214 |             ),
215 |             (
216 |                 "<*><integer>",
217 |                 "HelloWorld1",
218 |                 ["HelloWorld", "1"],
219 |                 ["*", "integer"]
220 |             ),
221 |             (
222 |                 "<*> works <*>",
223 |                 "This works as-expected",
224 |                 ["This", "as-expected"],
225 |                 ["*", "*"]
226 |             ),
227 |             (
228 |                 "<memory:<integer>>",
229 |                 "<memory:8>",
230 |                 ["8"],
231 |                 ["integer"]
232 |             ),
233 |             (
234 |                 "<memory:<integer> <core:<float>>>",
235 |                 "<memory:8 <core:0.5>>",
236 |                 ["8", "0.5"],
237 |                 ["integer", "float"]
238 |             ),
239 |             (
240 |                 "<*> <memory:<<integer> <core:<float>>>",
241 |                 "New: <memory:<8 <core:0.5>>",
242 |                 ["New:", "8", "0.5"],
243 |                 ["*", "integer", "float"]
244 |             ),
245 |             (
246 |                 "<<>",
247 |                 "MaskPrefix",
248 |                 ["MaskPrefix"],
249 |                 ["<"]
250 |             ),
251 |             (
252 |                 "<<<>>",
253 |                 "<MaskPrefix>",
254 |                 ["MaskPrefix"],
255 |                 ["<"]
256 |             ),
257 |             (
258 |                 "There are no parameters here.",
259 |                 "There are no parameters here.",
260 |                 [],
261 |                 []
262 |             ),
263 |             (
264 |                 "<float> <float>",
265 |                 "0.15 10.16 3.19",
266 |                 None,
267 |                 None
268 |             ),
269 |             (
270 |                 "<float> <float>",
271 |                 "0.15 10.16 test 3.19",
272 |                 None,
273 |                 None
274 |             ),
275 |             (
276 |                 "<memory:<<integer> <core:<float>>>",
277 |                 "<memory:8 <core:0.5>>",
278 |                 None,
279 |                 None
280 |             ),
281 |             (
282 |                 "<<>",
283 |                 "<<>",
284 |                 None,
285 |                 None
286 |             ),
287 |             (
288 |                 "<*words*> <*words*>",
289 |                 "0.15 0.15",
290 |                 None,
291 |                 None
292 |             ),
293 |         ]
294 | 
295 |         for template, content, expected_parameters, expected_mask_names in test_vectors:
296 |             with self.subTest(template=template, content=content, expected_parameters=expected_parameters):
297 |                 extracted_parameters = template_miner.extract_parameters(template, content, exact_matching=True)
298 |                 if expected_parameters is None:
299 |                     self.assertIsNone(extracted_parameters)
300 |                 else:
301 |                     self.assertIsNotNone(extracted_parameters)
302 |                     self.assertListEqual([parameter.value for parameter in extracted_parameters],
303 |                                          expected_parameters)
304 |                     self.assertListEqual([parameter.mask_name for parameter in extracted_parameters],
305 |                                          expected_mask_names)
306 | 
307 |     def test_match_only(self):
308 |         config = TemplateMinerConfig()
309 |         config.drain_extra_delimiters = ["_"]
310 |         mi = MaskingInstruction("((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "NUM")
311 |         config.masking_instructions.append(mi)
312 |         tm = TemplateMiner(None, config)
313 | 
314 |         res = tm.add_log_message("aa aa aa")
315 |         print(res)
316 | 
317 |         res = tm.add_log_message("aa aa bb")
318 |         print(res)
319 | 
320 |         res = tm.add_log_message("xx yy zz")
321 |         print(res)
322 | 
323 |         res = tm.add_log_message("rrr qqq 123")
324 |         print(res)
325 | 
326 |         c = tm.match("aa   aa tt")
327 |         self.assertEqual(1, c.cluster_id)
328 | 
329 |         c = tm.match("aa aa 12")
330 |         self.assertEqual(1, c.cluster_id)
331 | 
332 |         c = tm.match("xx yy   zz")
333 |         self.assertEqual(2, c.cluster_id)
334 | 
335 |         c = tm.match("xx yy rr")
336 |         self.assertIsNone(c)
337 | 
338 |         c = tm.match("nothing")
339 |         self.assertIsNone(c)
340 | 
341 |         c = tm.match("rrr qqq   456   ")
342 |         self.assertEqual(3, c.cluster_id)
343 | 
344 |         c = tm.match("rrr qqq 555.2")
345 |         self.assertIsNone(c)
346 | 
347 |         c = tm.match("rrr qqq num")
348 |         self.assertIsNone(c)
349 | 
350 |     def test_match_strategies(self):
351 |         miner = TemplateMiner()
352 |         print(miner.add_log_message("training4Model start"))
353 |         print(miner.add_log_message("loadModel start"))
354 |         print(miner.add_log_message("loadModel stop"))
355 |         print(miner.add_log_message("this is a test"))
356 |         miner.drain.print_tree()
357 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="fallback"))
358 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="always"))
359 |         self.assertIsNone(miner.match("loadModel start", full_search_strategy="never"))
360 |         print(miner.add_log_message("loadModel start"))
361 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="fallback"))
362 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="always"))
363 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="never"))
364 | 
365 |         config = TemplateMinerConfig()
366 |         config.parametrize_numeric_tokens = False
367 |         miner = TemplateMiner(config=config)
368 |         print(miner.add_log_message("training4Model start"))
369 |         print(miner.add_log_message("loadModel start"))
370 |         print(miner.add_log_message("loadModel stop"))
371 |         print(miner.add_log_message("this is a test"))
372 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="fallback"))
373 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="always"))
374 |         self.assertIsNotNone(miner.match("loadModel start", full_search_strategy="never"))
375 | 
376 |         self.assertIsNone(miner.match("", full_search_strategy="never"))
377 |         self.assertIsNone(miner.match("", full_search_strategy="always"))
378 |         self.assertIsNone(miner.match("", full_search_strategy="fallback"))
379 | 
380 |         print(miner.add_log_message(""))
381 |         self.assertIsNotNone(miner.match("", full_search_strategy="never"))
382 |         self.assertIsNotNone(miner.match("", full_search_strategy="always"))
383 |         self.assertIsNotNone(miner.match("", full_search_strategy="fallback"))
384 | 


--------------------------------------------------------------------------------
/drain3/template_miner.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: MIT
  2 | 
  3 | import base64
  4 | import logging
  5 | import re
  6 | import time
  7 | import zlib
  8 | from typing import Optional, Mapping, MutableMapping, NamedTuple, Sequence, Tuple, Union
  9 | 
 10 | import jsonpickle  # type: ignore[import]
 11 | from cachetools import LRUCache, cachedmethod
 12 | 
 13 | from drain3.drain import Drain, DrainBase, LogCluster
 14 | from drain3.masking import LogMasker
 15 | from drain3.persistence_handler import PersistenceHandler
 16 | from drain3.simple_profiler import SimpleProfiler, NullProfiler, Profiler
 17 | from drain3.template_miner_config import TemplateMinerConfig
 18 | 
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | config_filename = 'drain3.ini'
 23 | 
 24 | ExtractedParameter = NamedTuple("ExtractedParameter", [("value", str), ("mask_name", str)])
 25 | 
 26 | 
 27 | class TemplateMiner:
 28 | 
 29 |     def __init__(self,
 30 |                  persistence_handler: Optional[PersistenceHandler] = None,
 31 |                  config: Optional[TemplateMinerConfig] = None):
 32 |         """
 33 |         Wrapper for Drain with persistence and masking support
 34 |         :param persistence_handler: The type of persistence to use. When None, no persistence is applied.
 35 |         :param config: Configuration object. When none, configuration is loaded from default .ini file (if exist)
 36 |         """
 37 |         logger.info("Starting Drain3 template miner")
 38 | 
 39 |         if config is None:
 40 |             logger.info(f"Loading configuration from {config_filename}")
 41 |             config = TemplateMinerConfig()
 42 |             config.load(config_filename)
 43 | 
 44 |         self.config = config
 45 | 
 46 |         self.profiler: Profiler = NullProfiler()
 47 | 
 48 |         if self.config.profiling_enabled:
 49 |             self.profiler = SimpleProfiler()
 50 | 
 51 |         self.persistence_handler = persistence_handler
 52 | 
 53 |         param_str = f"{self.config.mask_prefix}*{self.config.mask_suffix}"
 54 | 
 55 |         # Follow the configuration in the configuration file to instantiate Drain
 56 |         # target_obj will be "Drain" if the engine argument is not specified.
 57 |         target_obj = self.config.engine
 58 |         if target_obj not in ["Drain", "JaccardDrain"]:
 59 |             raise ValueError(f"Invalid matched_pattern: {target_obj}, must be either 'Drain' or 'JaccardDrain'")
 60 | 
 61 |         self.drain: DrainBase = globals()[target_obj](
 62 |             sim_th=self.config.drain_sim_th,
 63 |             depth=self.config.drain_depth,
 64 |             max_children=self.config.drain_max_children,
 65 |             max_clusters=self.config.drain_max_clusters,
 66 |             extra_delimiters=self.config.drain_extra_delimiters,
 67 |             profiler=self.profiler,
 68 |             param_str=param_str,
 69 |             parametrize_numeric_tokens=self.config.parametrize_numeric_tokens
 70 |         )
 71 | 
 72 |         self.masker = LogMasker(self.config.masking_instructions, self.config.mask_prefix, self.config.mask_suffix)
 73 |         self.parameter_extraction_cache: MutableMapping[Tuple[str, bool], str] = \
 74 |             LRUCache(self.config.parameter_extraction_cache_capacity)
 75 |         self.last_save_time = time.time()
 76 | 
 77 |         if persistence_handler is not None:
 78 |             self.load_state()
 79 | 
 80 |     def load_state(self) -> None:
 81 |         logger.info("Checking for saved state")
 82 | 
 83 |         assert self.persistence_handler is not None
 84 | 
 85 |         state = self.persistence_handler.load_state()
 86 |         if state is None:
 87 |             logger.info("Saved state not found")
 88 |             return
 89 | 
 90 |         if self.config.snapshot_compress_state:
 91 |             state = zlib.decompress(base64.b64decode(state))
 92 | 
 93 |         loaded_drain: Drain = jsonpickle.loads(state, keys=True)
 94 | 
 95 |         # json-pickle encoded keys as string by default, so we have to convert those back to int
 96 |         # this is only relevant for backwards compatibility when loading a snapshot of drain <= v0.9.1
 97 |         # which did not use json-pickle's keys=true
 98 |         if len(loaded_drain.id_to_cluster) > 0 and isinstance(next(iter(loaded_drain.id_to_cluster.keys())), str):
 99 |             loaded_drain.id_to_cluster = {int(k): v for k, v in list(loaded_drain.id_to_cluster.items())}
100 |             if self.config.drain_max_clusters:
101 |                 cache: MutableMapping[int, Optional[LogCluster]] = LRUCache(maxsize=self.config.drain_max_clusters)
102 |                 cache.update(loaded_drain.id_to_cluster)
103 |                 loaded_drain.id_to_cluster = cache
104 | 
105 |         self.drain.id_to_cluster = loaded_drain.id_to_cluster
106 |         self.drain.clusters_counter = loaded_drain.clusters_counter
107 |         self.drain.root_node = loaded_drain.root_node
108 | 
109 |         logger.info(f"Restored {len(loaded_drain.clusters)} clusters "
110 |                     f"built from {loaded_drain.get_total_cluster_size()} messages")
111 | 
112 |     def save_state(self, snapshot_reason: str) -> None:
113 |         assert self.persistence_handler is not None
114 | 
115 |         state = jsonpickle.dumps(self.drain, keys=True).encode('utf-8')
116 |         if self.config.snapshot_compress_state:
117 |             state = base64.b64encode(zlib.compress(state))
118 | 
119 |         logger.info(f"Saving state of {len(self.drain.clusters)} clusters "
120 |                     f"with {self.drain.get_total_cluster_size()} messages, {len(state)} bytes, "
121 |                     f"reason: {snapshot_reason}")
122 |         self.persistence_handler.save_state(state)
123 | 
124 |     def get_snapshot_reason(self, change_type: str, cluster_id: int) -> Optional[str]:
125 |         if change_type != "none":
126 |             return f"{change_type} ({cluster_id})"
127 | 
128 |         diff_time_sec = time.time() - self.last_save_time
129 |         if diff_time_sec >= self.config.snapshot_interval_minutes * 60:
130 |             return "periodic"
131 | 
132 |         return None
133 | 
134 |     def add_log_message(self, log_message: str) -> Mapping[str, Union[str, int]]:
135 |         self.profiler.start_section("total")
136 | 
137 |         self.profiler.start_section("mask")
138 |         masked_content = self.masker.mask(log_message)
139 |         self.profiler.end_section()
140 | 
141 |         self.profiler.start_section("drain")
142 |         cluster, change_type = self.drain.add_log_message(masked_content)
143 |         self.profiler.end_section("drain")
144 |         result: Mapping[str, Union[str, int]] = {
145 |             "change_type": change_type,
146 |             "cluster_id": cluster.cluster_id,
147 |             "cluster_size": cluster.size,
148 |             "template_mined": cluster.get_template(),
149 |             "cluster_count": len(self.drain.clusters)
150 |         }
151 | 
152 |         if self.persistence_handler is not None:
153 |             self.profiler.start_section("save_state")
154 |             snapshot_reason = self.get_snapshot_reason(change_type, cluster.cluster_id)
155 |             if snapshot_reason:
156 |                 self.save_state(snapshot_reason)
157 |                 self.last_save_time = time.time()
158 |             self.profiler.end_section()
159 | 
160 |         self.profiler.end_section("total")
161 |         self.profiler.report(self.config.profiling_report_sec)
162 |         return result
163 | 
164 |     def match(self, log_message: str, full_search_strategy: str = "never") -> Optional[LogCluster]:
165 |         """
166 |         Mask log message and match against an already existing cluster.
167 |         Match shall be perfect (sim_th=1.0).
168 |         New cluster will not be created as a result of this call, nor any cluster modifications.
169 | 
170 |         :param log_message: log message to match
171 |         :param full_search_strategy: when to perform full cluster search.
172 |             (1) "never" is the fastest, will always perform a tree search [O(log(n)] but might produce
173 |             false negatives (wrong mismatches) on some edge cases;
174 |             (2) "fallback" will perform a linear search [O(n)] among all clusters with the same token count, but only in
175 |             case tree search found no match.
176 |             It should not have false negatives, however tree-search may find a non-optimal match with
177 |             more wildcard parameters than necessary;
178 |             (3) "always" is the slowest. It will select the best match among all known clusters, by always evaluating
179 |             all clusters with the same token count, and selecting the cluster with perfect all token match and least
180 |             count of wildcard matches.
181 |         :return: Matched cluster or None if no match found.
182 |         """
183 | 
184 |         masked_content = self.masker.mask(log_message)
185 |         matched_cluster = self.drain.match(masked_content, full_search_strategy)
186 |         return matched_cluster
187 | 
188 |     def get_parameter_list(self, log_template: str, log_message: str) -> Sequence[str]:
189 |         """
190 |         Extract parameters from a log message according to a provided template that was generated
191 |         by calling `add_log_message()`.
192 | 
193 |         This function is deprecated. Please use extract_parameters instead.
194 | 
195 |         :param log_template: log template corresponding to the log message
196 |         :param log_message: log message to extract parameters from
197 |         :return: An ordered list of parameter values present in the log message.
198 |         """
199 | 
200 |         extracted_parameters = self.extract_parameters(log_template, log_message, exact_matching=False)
201 |         if not extracted_parameters:
202 |             return []
203 |         return [parameter.value for parameter in extracted_parameters]
204 | 
205 |     def extract_parameters(self,
206 |                            log_template: str,
207 |                            log_message: str,
208 |                            exact_matching: bool = True) -> Optional[Sequence[ExtractedParameter]]:
209 |         """
210 |         Extract parameters from a log message according to a provided template that was generated
211 |         by calling `add_log_message()`.
212 | 
213 |         For most accurate results, it is recommended that
214 |         - Each `MaskingInstruction` has a unique `mask_with` value,
215 |         - No `MaskingInstruction` has a `mask_with` value of `*`,
216 |         - The regex-patterns of `MaskingInstruction` do not use unnamed back-references;
217 |           instead use back-references to named groups e.g. `(?P=some-name)`.
218 | 
219 |         :param log_template: log template corresponding to the log message
220 |         :param log_message: log message to extract parameters from
221 |         :param exact_matching: whether to apply the correct masking-patterns to match parameters, or try to approximate;
222 |             disabling exact_matching may be faster but may lead to situations in which parameters
223 |             are wrongly identified.
224 |         :return: A ordered list of ExtractedParameter for the log message
225 |             or None if log_message does not correspond to log_template.
226 |         """
227 | 
228 |         for delimiter in self.config.drain_extra_delimiters:
229 |             log_message = re.sub(delimiter, " ", log_message)
230 | 
231 |         template_regex, param_group_name_to_mask_name = self._get_template_parameter_extraction_regex(
232 |             log_template, exact_matching)
233 | 
234 |         # Parameters are represented by specific named groups inside template_regex.
235 |         parameter_match = re.match(template_regex, log_message)
236 | 
237 |         # log template does not match template
238 |         if not parameter_match:
239 |             return None
240 | 
241 |         # create list of extracted parameters
242 |         extracted_parameters = []
243 |         for group_name, parameter in parameter_match.groupdict().items():
244 |             if group_name in param_group_name_to_mask_name:
245 |                 mask_name = param_group_name_to_mask_name[group_name]
246 |                 extracted_parameter = ExtractedParameter(parameter, mask_name)
247 |                 extracted_parameters.append(extracted_parameter)
248 | 
249 |         return extracted_parameters
250 | 
251 |     @cachedmethod(lambda self: self.parameter_extraction_cache)
252 |     def _get_template_parameter_extraction_regex(self,
253 |                                                  log_template: str,
254 |                                                  exact_matching: bool) -> Tuple[str, Mapping[str, str]]:
255 |         param_group_name_to_mask_name = {}
256 |         param_name_counter = [0]
257 | 
258 |         def get_next_param_name() -> str:
259 |             param_group_name = f"p_{str(param_name_counter[0])}"
260 |             param_name_counter[0] += 1
261 |             return param_group_name
262 | 
263 |         # Create a named group with the respective patterns for the given mask-name.
264 |         def create_capture_regex(_mask_name: str) -> str:
265 |             allowed_patterns = []
266 |             if exact_matching:
267 |                 # get all possible regex patterns from masking instructions that match this mask name
268 |                 masking_instructions = self.masker.instructions_by_mask_name(_mask_name)
269 |                 for mi in masking_instructions:
270 |                     # MaskingInstruction may already contain named groups.
271 |                     # We replace group names in those named groups, to avoid conflicts due to duplicate names.
272 |                     if hasattr(mi, 'regex') and hasattr(mi, 'pattern'):
273 |                         mi_groups = mi.regex.groupindex.keys()
274 |                         pattern: str = mi.pattern
275 |                     else:
276 |                         # non regex masking instructions - support only non-exact matching
277 |                         mi_groups = []
278 |                         pattern = ".+?"
279 | 
280 |                     for group_name in mi_groups:
281 |                         param_group_name = get_next_param_name()
282 | 
283 |                         def replace_captured_param_name(param_pattern: str) -> str:
284 |                             _search_str = param_pattern.format(group_name)
285 |                             _replace_str = param_pattern.format(param_group_name)
286 |                             return pattern.replace(_search_str, _replace_str)
287 | 
288 |                         pattern = replace_captured_param_name("(?P={}")
289 |                         pattern = replace_captured_param_name("(?P<{}>")
290 | 
291 |                     # support unnamed back-references in masks (simple cases only)
292 |                     pattern = re.sub(r"\\(?!0)\d{1,2}", r"(?:.+?)", pattern)
293 |                     allowed_patterns.append(pattern)
294 | 
295 |             if not exact_matching or _mask_name == "*":
296 |                 allowed_patterns.append(r".+?")
297 | 
298 |             # Give each capture group a unique name to avoid conflicts.
299 |             param_group_name = get_next_param_name()
300 |             param_group_name_to_mask_name[param_group_name] = _mask_name
301 |             joined_patterns = "|".join(allowed_patterns)
302 |             capture_regex = f"(?P<{param_group_name}>{joined_patterns})"
303 |             return capture_regex
304 | 
305 |         # For every mask in the template, replace it with a named group of all
306 |         # possible masking-patterns it could represent (in order).
307 |         mask_names = set(self.masker.mask_names)
308 | 
309 |         # the Drain catch-all mask
310 |         mask_names.add("*")
311 | 
312 |         escaped_prefix = re.escape(self.masker.mask_prefix)
313 |         escaped_suffix = re.escape(self.masker.mask_suffix)
314 |         template_regex = re.escape(log_template)
315 | 
316 |         # replace each mask name with a proper regex that captures it
317 |         for mask_name in mask_names:
318 |             search_str = escaped_prefix + re.escape(mask_name) + escaped_suffix
319 |             while True:
320 |                 rep_str = create_capture_regex(mask_name)
321 |                 # Replace one-by-one to get a new param group name for each replacement.
322 |                 template_regex_new = template_regex.replace(search_str, rep_str, 1)
323 |                 # Break when all replaces for this mask are done.
324 |                 if template_regex_new == template_regex:
325 |                     break
326 |                 template_regex = template_regex_new
327 | 
328 |         # match also messages with multiple spaces or other whitespace chars between tokens
329 |         template_regex = re.sub(r"\\ ", r"\\s+", template_regex)
330 |         template_regex = f"^{template_regex}$"
331 |         return template_regex, param_group_name_to_mask_name
332 | 


--------------------------------------------------------------------------------
/drain3/drain.py:
--------------------------------------------------------------------------------
  1 | # SPDX-License-Identifier: MIT
  2 | # This file implements the Drain algorithm for log parsing.
  3 | # Based on https://github.com/logpai/logparser/blob/master/logparser/Drain/Drain.py by LogPAI team
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from typing import cast, Collection, IO, Iterable, MutableMapping, MutableSequence, Optional, Sequence, Tuple, \
  7 |     TYPE_CHECKING, TypeVar, Union
  8 | 
  9 | from cachetools import LRUCache, Cache
 10 | 
 11 | from drain3.simple_profiler import Profiler, NullProfiler
 12 | 
 13 | 
 14 | class LogCluster:
 15 |     __slots__ = ["log_template_tokens", "cluster_id", "size"]
 16 | 
 17 |     def __init__(self, log_template_tokens: Iterable[str], cluster_id: int) -> None:
 18 |         self.log_template_tokens = tuple(log_template_tokens)
 19 |         self.cluster_id = cluster_id
 20 |         self.size = 1
 21 | 
 22 |     def get_template(self) -> str:
 23 |         return ' '.join(self.log_template_tokens)
 24 | 
 25 |     def __str__(self) -> str:
 26 |         return f"ID={str(self.cluster_id).ljust(5)} : size={str(self.size).ljust(10)}: {self.get_template()}"
 27 | 
 28 | 
 29 | _T = TypeVar("_T")
 30 | if TYPE_CHECKING:
 31 |     class _LRUCache(LRUCache[int, Optional[LogCluster]]):
 32 |         #  see https://github.com/python/mypy/issues/4148 for this hack
 33 |         ...
 34 | else:
 35 |     _LRUCache = LRUCache
 36 | 
 37 | class LogClusterCache(_LRUCache):
 38 |     """
 39 |     Least Recently Used (LRU) cache which allows callers to conditionally skip
 40 |     cache eviction algorithm when accessing elements.
 41 |     """
 42 | 
 43 |     def __missing__(self, key: int) -> None:
 44 |         return None
 45 | 
 46 |     def get(self, key: int, _: Union[Optional[LogCluster], _T] = None) -> Optional[LogCluster]:
 47 |         """
 48 |         Returns the value of the item with the specified key without updating
 49 |         the cache eviction algorithm.
 50 |         """
 51 |         return Cache.__getitem__(self, key)
 52 | 
 53 | 
 54 | class Node:
 55 |     __slots__ = ["key_to_child_node", "cluster_ids"]
 56 | 
 57 |     def __init__(self) -> None:
 58 |         self.key_to_child_node: MutableMapping[str, Node] = {}
 59 |         self.cluster_ids: Sequence[int] = []
 60 | 
 61 | 
 62 | class DrainBase(ABC):
 63 |     def __init__(self,
 64 |                  depth: int = 4,
 65 |                  sim_th: float = 0.4,
 66 |                  max_children: int = 100,
 67 |                  max_clusters: Optional[int] = None,
 68 |                  extra_delimiters: Sequence[str] = (),
 69 |                  profiler: Profiler = NullProfiler(),
 70 |                  param_str: str = "<*>",
 71 |                  parametrize_numeric_tokens: bool = True) -> None:
 72 |         """
 73 |         Create a new Drain instance.
 74 | 
 75 |         :param depth: max depth levels of log clusters. Minimum is 3.
 76 |             For example, for depth==4, Root is considered depth level 1.
 77 |             Token count is considered depth level 2.
 78 |             First log token is considered depth level 3.
 79 |             Log clusters below first token node are considered depth level 4.
 80 |         :param sim_th: similarity threshold - if percentage of similar tokens for a log message is below this
 81 |             number, a new log cluster will be created.
 82 |         :param max_children: max number of children of an internal node
 83 |         :param max_clusters: max number of tracked clusters (unlimited by default).
 84 |             When this number is reached, model starts replacing old clusters
 85 |             with a new ones according to the LRU policy.
 86 |         :param extra_delimiters: delimiters to apply when splitting log message into words (in addition to whitespace).
 87 |         :param parametrize_numeric_tokens: whether to treat tokens that contains at least one digit
 88 |             as template parameters.
 89 |         """
 90 |         if depth < 3:
 91 |             raise ValueError("depth argument must be at least 3")
 92 | 
 93 |         self.log_cluster_depth = depth
 94 |         self.max_node_depth = depth - 2  # max depth of a prefix tree node, starting from zero
 95 |         self.sim_th = sim_th
 96 |         self.max_children = max_children
 97 |         self.root_node = Node()
 98 |         self.profiler = profiler
 99 |         self.extra_delimiters = extra_delimiters
100 |         self.max_clusters = max_clusters
101 |         self.param_str = param_str
102 |         self.parametrize_numeric_tokens = parametrize_numeric_tokens
103 | 
104 |         self.id_to_cluster: MutableMapping[int, Optional[LogCluster]] = \
105 |             {} if max_clusters is None else LogClusterCache(maxsize=max_clusters)
106 |         self.clusters_counter = 0
107 | 
108 |     @property
109 |     def clusters(self) -> Collection[LogCluster]:
110 |         return cast(Collection[LogCluster], self.id_to_cluster.values())
111 | 
112 |     @staticmethod
113 |     def has_numbers(s: Iterable[str]) -> bool:
114 |         return any(char.isdigit() for char in s)
115 | 
116 |     def fast_match(self,
117 |                    cluster_ids: Collection[int],
118 |                    tokens: Sequence[str],
119 |                    sim_th: float,
120 |                    include_params: bool) -> Optional[LogCluster]:
121 |         """
122 |         Find the best match for a log message (represented as tokens) versus a list of clusters
123 |         :param cluster_ids: List of clusters to match against (represented by their IDs)
124 |         :param tokens: the log message, separated to tokens.
125 |         :param sim_th: minimum required similarity threshold (None will be returned in no clusters reached it)
126 |         :param include_params: consider tokens matched to wildcard parameters in similarity threshold.
127 |         :return: Best match cluster or None
128 |         """
129 |         match_cluster = None
130 | 
131 |         max_sim: Union[int, float] = -1
132 |         max_param_count = -1
133 |         max_cluster = None
134 | 
135 |         for cluster_id in cluster_ids:
136 |             # Try to retrieve cluster from cache with bypassing eviction
137 |             # algorithm as we are only testing candidates for a match.
138 |             cluster = self.id_to_cluster.get(cluster_id)
139 |             if cluster is None:
140 |                 continue
141 |             cur_sim, param_count = self.get_seq_distance(cluster.log_template_tokens, tokens, include_params)
142 |             if cur_sim > max_sim or (cur_sim == max_sim and param_count > max_param_count):
143 |                 max_sim = cur_sim
144 |                 max_param_count = param_count
145 |                 max_cluster = cluster
146 | 
147 |         if max_sim >= sim_th:
148 |             match_cluster = max_cluster
149 | 
150 |         return match_cluster
151 | 
152 |     def print_tree(self, file: Optional[IO[str]] = None, max_clusters: int = 5) -> None:
153 |         self.print_node("root", self.root_node, 0, file, max_clusters)
154 | 
155 |     def print_node(self, token: str, node: Node, depth: int, file: Optional[IO[str]], max_clusters: int) -> None:
156 |         out_str = '\t' * depth
157 | 
158 |         if depth == 0:
159 |             out_str += f'<{token}>'
160 |         elif depth == 1:
161 |             if token.isdigit():
162 |                 out_str += f'<L={token}>'
163 |             else:
164 |                 out_str += f'<{token}>'
165 |         else:
166 |             out_str += f'"{token}"'
167 | 
168 |         if len(node.cluster_ids) > 0:
169 |             out_str += f" (cluster_count={len(node.cluster_ids)})"
170 | 
171 |         print(out_str, file=file)
172 | 
173 |         for token, child in node.key_to_child_node.items():
174 |             self.print_node(token, child, depth + 1, file, max_clusters)
175 | 
176 |         for cid in node.cluster_ids[:max_clusters]:
177 |             cluster = self.id_to_cluster[cid]
178 |             out_str = '\t' * (depth + 1) + str(cluster)
179 |             print(out_str, file=file)
180 | 
181 |     def get_content_as_tokens(self, content: str) -> Sequence[str]:
182 |         content = content.strip()
183 |         for delimiter in self.extra_delimiters:
184 |             content = content.replace(delimiter, " ")
185 |         content_tokens = content.split()
186 |         return content_tokens
187 | 
188 |     def add_log_message(self, content: str) -> Tuple[LogCluster, str]:
189 |         content_tokens = self.get_content_as_tokens(content)
190 | 
191 |         if self.profiler:
192 |             self.profiler.start_section("tree_search")
193 |         match_cluster = self.tree_search(self.root_node, content_tokens, self.sim_th, False)
194 |         if self.profiler:
195 |             self.profiler.end_section()
196 | 
197 |         # Match no existing log cluster
198 |         if match_cluster is None:
199 |             if self.profiler:
200 |                 self.profiler.start_section("create_cluster")
201 |             self.clusters_counter += 1
202 |             cluster_id = self.clusters_counter
203 |             match_cluster = LogCluster(content_tokens, cluster_id)
204 |             self.id_to_cluster[cluster_id] = match_cluster
205 |             self.add_seq_to_prefix_tree(self.root_node, match_cluster)
206 |             update_type = "cluster_created"
207 | 
208 |         # Add the new log message to the existing cluster
209 |         else:
210 |             if self.profiler:
211 |                 self.profiler.start_section("cluster_exist")
212 |             new_template_tokens = self.create_template(content_tokens, match_cluster.log_template_tokens)
213 |             if tuple(new_template_tokens) == match_cluster.log_template_tokens:
214 |                 update_type = "none"
215 |             else:
216 |                 match_cluster.log_template_tokens = tuple(new_template_tokens)
217 |                 update_type = "cluster_template_changed"
218 |             match_cluster.size += 1
219 |             # Touch cluster to update its state in the cache.
220 |             # noinspection PyStatementEffect
221 |             self.id_to_cluster[match_cluster.cluster_id]
222 | 
223 |         if self.profiler:
224 |             self.profiler.end_section()
225 | 
226 |         return match_cluster, update_type
227 | 
228 |     def get_total_cluster_size(self) -> int:
229 |         size = 0
230 |         for c in self.id_to_cluster.values():
231 |             size += cast(LogCluster, c).size
232 |         return size
233 | 
234 |     def get_clusters_ids_for_seq_len(self, seq_fir: Union[int, str]) -> Collection[int]:
235 |         """
236 |         seq_fir: int/str - the first token of the sequence
237 |         Return all clusters with the specified count of tokens
238 |         """
239 | 
240 |         def append_clusters_recursive(node: Node, id_list_to_fill: MutableSequence[int]) -> None:
241 |             id_list_to_fill.extend(node.cluster_ids)
242 |             for child_node in node.key_to_child_node.values():
243 |                 append_clusters_recursive(child_node, id_list_to_fill)
244 | 
245 |         cur_node = self.root_node.key_to_child_node.get(str(seq_fir))
246 | 
247 |         # no template with same token count
248 |         if cur_node is None:
249 |             return []
250 | 
251 |         target: MutableSequence[int] = []
252 |         append_clusters_recursive(cur_node, target)
253 |         return target
254 | 
255 |     @abstractmethod
256 |     def tree_search(self,
257 |                     root_node: Node,
258 |                     tokens: Sequence[str],
259 |                     sim_th: float,
260 |                     include_params: bool) -> Optional[LogCluster]:
261 |         ...
262 | 
263 |     @abstractmethod
264 |     def add_seq_to_prefix_tree(self, root_node: Node, cluster: LogCluster) -> None:
265 |         ...
266 | 
267 |     @abstractmethod
268 |     def get_seq_distance(self, seq1: Sequence[str], seq2: Sequence[str], include_params: bool) -> Tuple[float, int]:
269 |         ...
270 | 
271 |     @abstractmethod
272 |     def create_template(self, seq1: Sequence[str], seq2: Sequence[str]) -> Sequence[str]:
273 |         ...
274 | 
275 |     @abstractmethod
276 |     def match(self, content: str, full_search_strategy: str = "never") -> Optional[LogCluster]:
277 |         ...
278 | 
279 | 
280 | class Drain(DrainBase):
281 | 
282 |     def tree_search(self,
283 |                     root_node: Node,
284 |                     tokens: Sequence[str],
285 |                     sim_th: float,
286 |                     include_params: bool) -> Optional[LogCluster]:
287 | 
288 |         # at first level, children are grouped by token (word) count
289 |         token_count = len(tokens)
290 |         cur_node = root_node.key_to_child_node.get(str(token_count))
291 | 
292 |         # no template with same token count yet
293 |         if cur_node is None:
294 |             return None
295 | 
296 |         # handle case of empty log string - return the single cluster in that group
297 |         if token_count == 0:
298 |             return self.id_to_cluster.get(cur_node.cluster_ids[0])
299 | 
300 |         # find the leaf node for this log - a path of nodes matching the first N tokens (N=tree depth)
301 |         cur_node_depth = 1
302 |         for token in tokens:
303 |             # at max depth
304 |             if cur_node_depth >= self.max_node_depth:
305 |                 break
306 | 
307 |             # this is last token
308 |             if cur_node_depth == token_count:
309 |                 break
310 | 
311 |             key_to_child_node = cur_node.key_to_child_node
312 |             cur_node = key_to_child_node.get(token)
313 |             if cur_node is None:  # no exact next token exist, try wildcard node
314 |                 cur_node = key_to_child_node.get(self.param_str)
315 |             if cur_node is None:  # no wildcard node exist
316 |                 return None
317 | 
318 |             cur_node_depth += 1
319 | 
320 |         # get best match among all clusters with same prefix, or None if no match is above sim_th
321 |         cluster = self.fast_match(cur_node.cluster_ids, tokens, sim_th, include_params)
322 |         return cluster
323 | 
324 |     def add_seq_to_prefix_tree(self, root_node: Node, cluster: LogCluster) -> None:
325 |         token_count = len(cluster.log_template_tokens)
326 |         token_count_str = str(token_count)
327 |         if token_count_str not in root_node.key_to_child_node:
328 |             first_layer_node = Node()
329 |             root_node.key_to_child_node[token_count_str] = first_layer_node
330 |         else:
331 |             first_layer_node = root_node.key_to_child_node[token_count_str]
332 | 
333 |         cur_node = first_layer_node
334 | 
335 |         # handle case of empty log string
336 |         if token_count == 0:
337 |             cur_node.cluster_ids = [cluster.cluster_id]
338 |             return
339 | 
340 |         current_depth = 1
341 |         for token in cluster.log_template_tokens:
342 | 
343 |             # if at max depth or this is last token in template - add current log cluster to the leaf node
344 |             if current_depth >= self.max_node_depth or current_depth >= token_count:
345 |                 # clean up stale clusters before adding a new one.
346 |                 new_cluster_ids = []
347 |                 for cluster_id in cur_node.cluster_ids:
348 |                     if cluster_id in self.id_to_cluster:
349 |                         new_cluster_ids.append(cluster_id)
350 |                 new_cluster_ids.append(cluster.cluster_id)
351 |                 cur_node.cluster_ids = new_cluster_ids
352 |                 break
353 | 
354 |             # if token not matched in this layer of existing tree.
355 |             if token not in cur_node.key_to_child_node:
356 |                 if self.parametrize_numeric_tokens and self.has_numbers(token):
357 |                     if self.param_str not in cur_node.key_to_child_node:
358 |                         new_node = Node()
359 |                         cur_node.key_to_child_node[self.param_str] = new_node
360 |                         cur_node = new_node
361 |                     else:
362 |                         cur_node = cur_node.key_to_child_node[self.param_str]
363 | 
364 |                 else:
365 |                     if self.param_str in cur_node.key_to_child_node:
366 |                         if len(cur_node.key_to_child_node) < self.max_children:
367 |                             new_node = Node()
368 |                             cur_node.key_to_child_node[token] = new_node
369 |                             cur_node = new_node
370 |                         else:
371 |                             cur_node = cur_node.key_to_child_node[self.param_str]
372 |                     else:
373 |                         if len(cur_node.key_to_child_node) + 1 < self.max_children:
374 |                             new_node = Node()
375 |                             cur_node.key_to_child_node[token] = new_node
376 |                             cur_node = new_node
377 |                         elif len(cur_node.key_to_child_node) + 1 == self.max_children:
378 |                             new_node = Node()
379 |                             cur_node.key_to_child_node[self.param_str] = new_node
380 |                             cur_node = new_node
381 |                         else:
382 |                             cur_node = cur_node.key_to_child_node[self.param_str]
383 | 
384 |             # if the token is matched
385 |             else:
386 |                 cur_node = cur_node.key_to_child_node[token]
387 | 
388 |             current_depth += 1
389 | 
390 |     # seq1 is a template, seq2 is the log to match
391 |     def get_seq_distance(self, seq1: Sequence[str], seq2: Sequence[str], include_params: bool) -> Tuple[float, int]:
392 |         assert len(seq1) == len(seq2)
393 | 
394 |         # sequences are empty - full match
395 |         if len(seq1) == 0:
396 |             return 1.0, 0
397 | 
398 |         sim_tokens = 0
399 |         param_count = 0
400 | 
401 |         for token1, token2 in zip(seq1, seq2):
402 |             if token1 == self.param_str:
403 |                 param_count += 1
404 |                 continue
405 |             if token1 == token2:
406 |                 sim_tokens += 1
407 | 
408 |         if include_params:
409 |             sim_tokens += param_count
410 | 
411 |         ret_val = float(sim_tokens) / len(seq1)
412 | 
413 |         return ret_val, param_count
414 | 
415 |     def create_template(self, seq1: Sequence[str], seq2: Sequence[str]) -> Sequence[str]:
416 |         """
417 |         Loop through two sequences and create a template sequence that
418 |         replaces unmatched tokens with the parameter string.
419 |         
420 |         :param seq1: first sequence
421 |         :param seq2: second sequence
422 |         :return: template sequence with param_str in place of unmatched tokens
423 |         """
424 |         assert len(seq1) == len(seq2)
425 |         return [token2 if token1 == token2 else self.param_str for token1, token2 in zip(seq1, seq2)]
426 | 
427 |     def match(self, content: str, full_search_strategy: str = "never") -> Optional[LogCluster]:
428 |         """
429 |         Match log message against an already existing cluster.
430 |         Match shall be perfect (sim_th=1.0).
431 |         New cluster will not be created as a result of this call, nor any cluster modifications.
432 | 
433 |         :param content: log message to match
434 |         :param full_search_strategy: when to perform full cluster search.
435 |             (1) "never" is the fastest, will always perform a tree search [O(log(n)] but might produce
436 |             false negatives (wrong mismatches) on some edge cases;
437 |             (2) "fallback" will perform a linear search [O(n)] among all clusters with the same token count, but only in
438 |             case tree search found no match.
439 |             It should not have false negatives, however tree-search may find a non-optimal match with
440 |             more wildcard parameters than necessary;
441 |             (3) "always" is the slowest. It will select the best match among all known clusters, by always evaluating
442 |             all clusters with the same token count, and selecting the cluster with perfect all token match and least
443 |             count of wildcard matches.
444 |         :return: Matched cluster or None if no match found.
445 |         """
446 | 
447 |         assert full_search_strategy in ["always", "never", "fallback"]
448 | 
449 |         required_sim_th = 1.0
450 |         content_tokens = self.get_content_as_tokens(content)
451 | 
452 |         # consider for future improvement:
453 |         # It is possible to implement a recursive tree_search (first try exact token match and fallback to
454 |         # wildcard match). This will be both accurate and more efficient than the linear full search
455 |         # also fast match can be optimized when exact match is required by early
456 |         # quitting on less than exact cluster matches.
457 |         def full_search() -> Optional[LogCluster]:
458 |             all_ids = self.get_clusters_ids_for_seq_len(len(content_tokens))
459 |             cluster = self.fast_match(all_ids, content_tokens, required_sim_th, include_params=True)
460 |             return cluster
461 | 
462 |         if full_search_strategy == "always":
463 |             return full_search()
464 | 
465 |         match_cluster = self.tree_search(self.root_node, content_tokens, required_sim_th, include_params=True)
466 |         if match_cluster is not None:
467 |             return match_cluster
468 | 
469 |         if full_search_strategy == "never":
470 |             return None
471 | 
472 |         return full_search()
473 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Drain3
  2 | 
  3 | ## Important Update
  4 | 
  5 | Drain3 was moved to the `logpai` GitHub organization (which is also the home for the original Drain implementation). We always welcome more contributors and maintainers to join us and push the project forward. We welcome more contributions and variants of implementations if you find practical enhancements to the algorithm in production scenarios.
  6 | 
  7 | ## Introduction
  8 | 
  9 | Drain3 is an online log template miner that can extract templates (clusters) from a stream of log messages in a timely
 10 | manner. It employs a parse tree with fixed depth to guide the log group search process, which effectively avoids
 11 | constructing a very deep and unbalanced tree.
 12 | 
 13 | Drain3 continuously learns on-the-fly and extracts log templates from raw log entries.
 14 | 
 15 | #### Example:
 16 | 
 17 | For the input:
 18 | 
 19 | ```
 20 | connected to 10.0.0.1
 21 | connected to 192.168.0.1
 22 | Hex number 0xDEADBEAF
 23 | user davidoh logged in
 24 | user eranr logged in
 25 | ```
 26 | 
 27 | Drain3 extracts the following templates:
 28 | 
 29 | ```
 30 | ID=1     : size=2         : connected to <:IP:>
 31 | ID=2     : size=1         : Hex number <:HEX:>
 32 | ID=3     : size=2         : user <:*:> logged in
 33 | ```
 34 | 
 35 | Full sample program output:
 36 | 
 37 | ```
 38 | Starting Drain3 template miner
 39 | Checking for saved state
 40 | Saved state not found
 41 | Drain3 started with 'FILE' persistence
 42 | Starting training mode. Reading from std-in ('q' to finish)
 43 | > connected to 10.0.0.1
 44 | Saving state of 1 clusters with 1 messages, 528 bytes, reason: cluster_created (1)
 45 | {"change_type": "cluster_created", "cluster_id": 1, "cluster_size": 1, "template_mined": "connected to <:IP:>", "cluster_count": 1}
 46 | Parameters: [ExtractedParameter(value='10.0.0.1', mask_name='IP')]
 47 | > connected to 192.168.0.1
 48 | {"change_type": "none", "cluster_id": 1, "cluster_size": 2, "template_mined": "connected to <:IP:>", "cluster_count": 1}
 49 | Parameters: [ExtractedParameter(value='192.168.0.1', mask_name='IP')]
 50 | > Hex number 0xDEADBEAF
 51 | Saving state of 2 clusters with 3 messages, 584 bytes, reason: cluster_created (2)
 52 | {"change_type": "cluster_created", "cluster_id": 2, "cluster_size": 1, "template_mined": "Hex number <:HEX:>", "cluster_count": 2}
 53 | Parameters: [ExtractedParameter(value='0xDEADBEAF', mask_name='HEX')]
 54 | > user davidoh logged in
 55 | Saving state of 3 clusters with 4 messages, 648 bytes, reason: cluster_created (3)
 56 | {"change_type": "cluster_created", "cluster_id": 3, "cluster_size": 1, "template_mined": "user davidoh logged in", "cluster_count": 3}
 57 | Parameters: []
 58 | > user eranr logged in
 59 | Saving state of 3 clusters with 5 messages, 644 bytes, reason: cluster_template_changed (3)
 60 | {"change_type": "cluster_template_changed", "cluster_id": 3, "cluster_size": 2, "template_mined": "user <:*:> logged in", "cluster_count": 3}
 61 | Parameters: [ExtractedParameter(value='eranr', mask_name='*')]
 62 | > q
 63 | Training done. Mined clusters:
 64 | ID=1     : size=2         : connected to <:IP:>
 65 | ID=2     : size=1         : Hex number <:HEX:>
 66 | ID=3     : size=2         : user <:*:> logged in
 67 | ```
 68 | 
 69 | This project is an upgrade of the original [Drain](https://github.com/logpai/logparser/blob/master/logparser/Drain)
 70 | project by LogPAI from Python 2.7 to Python 3.6 or later with additional features and bug-fixes.
 71 | 
 72 | Read more information about Drain from the following paper:
 73 | 
 74 | - Pinjia He, Jieming Zhu, Zibin Zheng, and Michael R.
 75 |   Lyu. [Drain: An Online Log Parsing Approach with Fixed Depth Tree](http://jiemingzhu.github.io/pub/pjhe_icws2017.pdf),
 76 |   Proceedings of the 24th International Conference on Web Services (ICWS), 2017.
 77 | 
 78 | A Drain3 use case is presented in this blog
 79 | post: [Use open source Drain3 log-template mining project to monitor for network outages](https://developer.ibm.com/blogs/how-mining-log-templates-can-help-ai-ops-in-cloud-scale-data-centers)
 80 | .
 81 | 
 82 | #### New features
 83 | 
 84 | - [**Persistence**](#persistence). Save and load Drain state into an [Apache Kafka](https://kafka.apache.org)
 85 |   topic, [Redis](https://redis.io/) or a file.
 86 | - **Streaming**. Support feeding Drain with messages one-be-one.
 87 | - [**Masking**](#masking). Replace some message parts (e.g numbers, IPs, emails) with wildcards. This improves the
 88 |   accuracy of template mining.
 89 | - [**Packaging**](#installation). As a pip package.
 90 | - [**Configuration**](#configuration). Support for configuring Drain3 using an `.ini` file or a configuration object. 
 91 | - [**Memory efficiency**](#memory-efficiency). Decrease the memory footprint of internal data structures and introduce
 92 |   cache to control max memory consumed (thanks to @StanislawSwierc)
 93 | - [**Inference mode**](#training-vs-inference-modes). In case you want to separate training and inference phase, Drain3
 94 |   provides a function for *fast* matching against already-learned clusters (templates) only, without the usage of
 95 |   regular expressions.
 96 | - [**Parameter extraction**](#parameter-extraction). Accurate extraction of the variable parts from a log message as an
 97 |   ordered list, based on its mined template and the defined masking instructions (thanks to @Impelon).
 98 | 
 99 | #### Expected Input and Output
100 | 
101 | Although Drain3 can be ingested with full raw log message, template mining accuracy can be improved if you feed it with
102 | only the unstructured free-text portion of log messages, by first removing structured parts like timestamp, hostname.
103 | severity, etc.
104 | 
105 | The output is a dictionary with the following fields:
106 | 
107 | - `change_type` - indicates either if a new template was identified, an existing template was changed or message added
108 |   to an existing cluster.
109 | - `cluster_id` - Sequential ID of the cluster that the log belongs to.
110 | - `cluster_size`- The size (message count) of the cluster that the log belongs to.
111 | - `cluster_count` - Count clusters seen so far.
112 | - `template_mined`- the last template of above cluster_id.
113 | 
114 | ## Configuration
115 | 
116 | Drain3 is configured using [configparser](https://docs.python.org/3.4/library/configparser.html). By default, config
117 | filename is `drain3.ini` in working directory. It can also be configured passing
118 | a [TemplateMinerConfig](drain3/template_miner_config.py) object to the [TemplateMiner](drain3/template_miner.py)
119 | constructor.
120 | 
121 | Primary configuration parameters:
122 | 
123 | - `[DRAIN]/sim_th` - similarity threshold. if percentage of similar tokens for a log message is below this number, a new
124 |   log cluster will be created (default 0.4)
125 | - `[DRAIN]/depth` - max depth levels of log clusters. Minimum is 3. (default 4)
126 | - `[DRAIN]/max_children` - max number of children of an internal node (default 100)
127 | - `[DRAIN]/max_clusters` - max number of tracked clusters (unlimited by default). When this number is reached, model
128 |   starts replacing old clusters with a new ones according to the LRU cache eviction policy.
129 | - `[DRAIN]/extra_delimiters` - delimiters to apply when splitting log message into words (in addition to whitespace) (
130 |   default none). Format is a Python list e.g. `['_', ':']`.
131 | - `[MASKING]/masking` - parameters masking - in json format (default "")
132 | - `[MASKING]/mask_prefix` & `[MASKING]/mask_suffix` - the wrapping of identified parameters in templates. By default, it
133 |   is `<` and `>` respectively.
134 | - `[SNAPSHOT]/snapshot_interval_minutes` - time interval for new snapshots (default 1)
135 | - `[SNAPSHOT]/compress_state` - whether to compress the state before saving it. This can be useful when using Kafka
136 |   persistence.
137 | 
138 | ## Masking
139 | 
140 | This feature allows masking of specific variable parts in log message with keywords, prior to passing to Drain. A
141 | well-defined masking can improve template mining accuracy.
142 | 
143 | Template parameters that do not match any custom mask in the preliminary masking phase are replaced with `<*>` by Drain
144 | core.
145 | 
146 | Use a list of regular expressions in the configuration file with the format `{'regex_pattern', 'mask_with'}` to set
147 | custom masking.
148 | 
149 | For example, following masking instructions in `drain3.ini` will mask IP addresses and integers:
150 | 
151 | ```
152 | [MASKING]
153 | masking = [
154 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"},
155 |           {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"},
156 |           ]
157 |     ]
158 | ```
159 | 
160 | ## Persistence
161 | 
162 | The persistence feature saves and loads a snapshot of Drain3 state in a (compressed) json format. This feature adds
163 | restart resiliency to Drain allowing continuation of activity and maintain learned knowledge across restarts.
164 | 
165 | Drain3 state includes the search tree and all the clusters that were identified up until snapshot time.
166 | 
167 | The snapshot also persist number of log messages matched each cluster, and it's `cluster_id`.
168 | 
169 | An example of a snapshot:
170 | 
171 | ```json
172 | {
173 |   "clusters": [
174 |     {
175 |       "cluster_id": 1,
176 |       "log_template_tokens": [
177 |         "aa",
178 |         "aa",
179 |         "<*>"
180 |       ],
181 |       "py/object": "drain3_core.LogCluster",
182 |       "size": 2
183 |     },
184 |     {
185 |       "cluster_id": 2,
186 |       "log_template_tokens": [
187 |         "My",
188 |         "IP",
189 |         "is",
190 |         "<IP>"
191 |       ],
192 |       "py/object": "drain3_core.LogCluster",
193 |       "size": 1
194 |     }
195 |   ]
196 | }
197 | ```
198 | 
199 | This example snapshot persist two clusters with the templates:
200 | 
201 | `["aa", "aa", "<*>"]` - occurs twice
202 | 
203 | `["My", "IP", "is", "<IP>"]` - occurs once
204 | 
205 | Snapshots are created in the following events:
206 | 
207 | - `cluster_created` - in any new template
208 | - `cluster_template_changed` - in any update of a template
209 | - `periodic` - after n minutes from the last snapshot. This is intended to save cluster sizes even if no new template
210 |   was identified.
211 | 
212 | Drain3 currently supports the following persistence modes:
213 | 
214 | - **Kafka** - The snapshot is saved in a dedicated topic used only for snapshots - the last message in this topic is the
215 |   last snapshot that will be loaded after restart. For Kafka persistence, you need to provide: `topic_name`. You may
216 |   also provide other `kwargs` that are supported by `kafka.KafkaConsumer` and `kafka.Producer` e.g `bootstrap_servers`
217 |   to change Kafka endpoint (default is `localhost:9092`).
218 | 
219 | - **Redis** - The snapshot is saved to a key in Redis database (contributed by @matabares).
220 | 
221 | - **File** - The snapshot is saved to a file.
222 | 
223 | - **Memory** - The snapshot is saved an in-memory object.
224 | 
225 | - **None** - No persistence.
226 | 
227 | Drain3 persistence modes can be easily extended to another medium / database by inheriting
228 | the [PersistenceHandler](drain3/persistence_handler.py) class.
229 | 
230 | ## Training vs. Inference modes
231 | 
232 | In some use-cases, it is required to separate training and inference phases.
233 | 
234 | In training phase you should call `template_miner.add_log_message(log_line)`. This will match log line against an
235 | existing cluster (if similarity is above threshold) or create a new cluster. It may also change the template of an
236 | existing cluster.
237 | 
238 | In inference mode you should call `template_miner.match(log_line)`. This will match log line against previously learned
239 | clusters only. No new clusters are created and templates of existing clusters are not changed. Match to existing cluster
240 | has to be perfect, otherwise `None` is returned. You can use persistence option to load previously trained clusters
241 | before inference.
242 | 
243 | ## Memory efficiency
244 | 
245 | This feature limits the max memory used by the model. It is particularly important for large and possibly unbounded log
246 | streams. This feature is controlled by the `max_clusters​` parameter, which sets the max number of clusters/templates
247 | trarcked by the model. When the limit is reached, new templates start to replace the old ones according to the Least
248 | Recently Used (LRU) eviction policy. This makes the model adapt quickly to the most recent templates in the log stream.
249 | 
250 | ## Parameter Extraction
251 | 
252 | Drain3 supports retrieving an ordered list of variables in a log message, after its template was mined. Each parameter
253 | is accompanied by the name of the mask that was matched, or `*` for the catch-all mask.
254 | 
255 | Parameter extraction is performed by generating a regular expression that matches the template and then applying it on
256 | the log message. When `exact_matching` is enabled (by default), the generated regex included the regular expression
257 | defined in relevant masking instructions. If there are multiple masking instructions with the same name, either match
258 | can satisfy the regex. It is possible to disable exact matching so that every variable is matched against a
259 | non-whitespace character sequence. This may improve performance on expanse of accuracy.
260 | 
261 | Parameter extraction regexes generated per template are cached by default, to improve performance. You can control cache
262 | size with the ` MASKING/parameter_extraction_cache_capacity` configuration parameter.
263 | 
264 | Sample usage:
265 | 
266 | ```python
267 | result = template_miner.add_log_message(log_line)
268 | params = template_miner.extract_parameters(
269 |     result["template_mined"], log_line, exact_matching=True)
270 | ```
271 | 
272 | For the input `"user johndoe logged in 11 minuts ago"`, the template would be:
273 | 
274 | ```
275 | "user <:*:> logged in <:NUM:> minuts ago"
276 | ```
277 | 
278 | ... and the extracted parameters:
279 | 
280 | ```
281 | [
282 |   ExtractedParameter(value='johndoe', mask_name='*'), 
283 |   ExtractedParameter(value='11', mask_name='NUM')
284 | ]
285 | ```
286 | 
287 | ## Installation
288 | 
289 | Drain3 is available from [PyPI](https://pypi.org/project/drain3). To install use `pip`:
290 | 
291 | ```
292 | pip3 install drain3
293 | ```
294 | 
295 | Note: If you decide to use Kafka or Redis persistence, you should install relevant client library explicitly, since it
296 | is declared as an extra (optional) dependency, by either:
297 | 
298 | ```
299 | pip3 install kafka-python
300 | ```
301 | 
302 | -- or --
303 | 
304 | ```
305 | pip3 install redis
306 | ```
307 | 
308 | ## Examples
309 | 
310 | In order to run the examples directly from the repository, you need to install dependencies. You can do that using *
311 | pipenv* by executing the following command (assuming pipenv already installed):
312 | 
313 | ```shell
314 | python3 -m pipenv sync
315 | ```
316 | 
317 | #### Example 1 - `drain_stdin_demo`
318 | 
319 | Run [examples/drain_stdin_demo.py](examples/drain_stdin_demo.py) from the root folder of the repository by:
320 | 
321 | ```
322 | python3 -m pipenv run python -m examples.drain_stdin_demo
323 | ```
324 | 
325 | This example uses Drain3 on input from stdin and persist to either Kafka / file / no persistence.
326 | 
327 | Change `persistence_type` variable in the example to change persistence mode.
328 | 
329 | Enter several log lines using the command line. Press `q` to end online learn-and-match mode.
330 | 
331 | Next, demo goes to match (inference) only mode, in which no new clusters are trained and input is matched against
332 | previously trained clusters only. Press `q` again to finish execution.
333 | 
334 | #### Example 2 - `drain_bigfile_demo`
335 | 
336 | Run [examples/drain_bigfile_demo](examples/drain_bigfile_demo.py) from the root folder of the repository by:
337 | 
338 | ```
339 | python3 -m pipenv run python -m examples.drain_bigfile_demo
340 | ```
341 | 
342 | This example downloads a real-world log file (of an SSH server) and process all lines, then prints result clusters,
343 | prefix tree and performance statistics.
344 | 
345 | #### Sample config file
346 | 
347 | An example `drain3.ini` file with masking instructions can be found in the [examples](examples) folder as well.
348 | 
349 | ## Contributing
350 | 
351 | Our project welcomes external contributions. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for further details.
352 | 
353 | ## Change Log
354 | 
355 | ##### v0.9.11
356 | 
357 | * Fixed possible DivideByZero error when the profiler is enabled - [Issue #65](https://github.com/IBM/Drain3/issues/65). 
358 | 
359 | ##### v0.9.10
360 | 
361 | * Fixed compatibility issue with Python 3.10 caused by removal of `KeysView`.
362 | 
363 | ##### v0.9.9
364 | 
365 | * Added support for accurate log message parameter extraction in a new function - `extract_parameters()`. The
366 |   function `get_parameter_list()` is deprecated (Thanks to *@Impelon*).
367 | * Refactored `AbstractMaskingInstruction` as a base class for `RegexMaskingInstruction`, allowing to introduce other
368 |   types of masking mechanisms.
369 | 
370 | ##### v0.9.8
371 | 
372 | * Added an option `full_search_strategy` option in `TemplateMiner.match()` and `Drain.match()`. See more info at
373 |   Issue [#48](https://github.com/IBM/Drain3/issues/48).
374 | * Added an option to disable parameterization of tokens that contains digits in
375 |   configuration: `TemplateMinerConfig.parametrize_numeric_tokens`
376 | * Loading Drain snapshot now only restores clusters state and not configuration parameters. This improves backwards
377 |   compatibility when introducing new Drain configuration parameters.
378 | 
379 | ##### v0.9.7
380 | 
381 | * Fixed bug in original Drain: log clusters were created multiple times for log messages with fewer tokens
382 |   than `max_node_depth`.
383 | * Changed `depth` property name to a more descriptive name `max_node_depth` as Drain always subtracts 2 of `depth`
384 |   argument value. Also added `log_cluster_depth` property to reflect original value of depth argument (Breaking Change).
385 | * Restricted `depth` param to minimum sensible value of 3.
386 | * Added log cluster count to nodes in `Drain.print_tree()`
387 | * Added optional log cluster details to `Drain.print_tree()`
388 | 
389 | ##### v0.9.6
390 | 
391 | * Fix issue https://github.com/IBM/Drain3/issues/38: Unnecessary update of LRU cache in case `max_clusters` is used (
392 |   thanks *@StanislawSwierc*).
393 | 
394 | ##### v0.9.5
395 | 
396 | * Added: `TemplateMiner.match()` function for fast matching against existing clusters only.
397 | 
398 | ##### v0.9.4
399 | 
400 | * Added: `TemplateMiner.get_parameter_list()` function to extract template parameters for raw log message (thanks to *
401 |   @cwyalpha*)
402 | * Added option to customize mask wrapper - Instead of the default `<*>`, `<NUM>` etc, you can select any wrapper prefix
403 |   or suffix by overriding `TemplateMinerConfig.mask_prefix` and `TemplateMinerConfig.mask_prefix`
404 | * Fixed: config `.ini` file is always read from same folder as source file in demos in tests (thanks *@RobinMaas95*)
405 | 
406 | ##### v0.9.3
407 | 
408 | * Fixed: comparison of type int with type str in function `add_seq_to_prefix_tree` #28 (bug introduced at v0.9.1)
409 | 
410 | ##### v0.9.2
411 | 
412 | * Updated jsonpickle version
413 | * Keys `id_to_cluster` dict are now persisted by jsonpickle as `int` instead of `str` to avoid keys type conversion on
414 |   load snapshot which caused some issues.
415 | * Added cachetools dependency to `setup.py`.
416 | 
417 | ##### v0.9.1
418 | 
419 | * Added option to configure `TemplateMiner` using a configuration object (without `.ini` file).
420 | * Support for `print_tree()` to a file/stream.
421 | * Added `MemoryBufferPersistence`
422 | * Added unit tests for state save/load.
423 | * Bug fix: missing type-conversion in state loading, introduced in v0.9.0
424 | * Refactor: Drain prefix tree keys are now of type `str` also for 1st level
425 |   (was `int` before), for type consistency.
426 | 
427 | ##### v0.9.0
428 | 
429 | * Decrease memory footprint of the main data structures.
430 | * Added `max_clusters` option to limit the number of tracked clusters.
431 | * Changed cluster identifier type from str to int
432 | * Added more unit tests and CI
433 | 
434 | ##### v0.8.6
435 | 
436 | * Added `extra_delimiters` configuration option to Drain
437 | 
438 | ##### v0.8.5
439 | 
440 | * Profiler improvements
441 | 
442 | ##### v0.8.4
443 | 
444 | * Masking speed improvement
445 | 
446 | ##### v0.8.3
447 | 
448 | * Fix: profiler state after load from snapshot
449 | 
450 | ##### v0.8.2
451 | 
452 | * Fixed snapshot backward compatibility to v0.7.9
453 | 
454 | ##### v0.8.1
455 | 
456 | * Bugfix in profiling configuration read
457 | 
458 | ##### v0.8.0
459 | 
460 | * Added time profiling support (disabled by default)
461 | * Added cluster ID to snapshot reason log (credit: @boernd)
462 | * Minor Readability and documentation improvements in Drain
463 | 
464 | ##### v0.7.9
465 | 
466 | * Fix: `KafkaPersistence` now accepts also `bootstrap_servers` as kwargs.
467 | 
468 | ##### v0.7.8
469 | 
470 | * Using `kafka-python` package instead of `kafka` (newer).
471 | * Added support for specifying additional configuration as `kwargs` in Kafka persistence handler.
472 | 
473 | ##### v0.7.7
474 | 
475 | * Corrected default Drain config values.
476 | 
477 | ##### v0.7.6
478 | 
479 | * Improvement in config file handling (Note: new sections were added instead of `DEFAULT` section)
480 | 
481 | ##### v0.7.5
482 | 
483 | * Made Kafka and Redis optional requirements
484 |  
485 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
  2 | 
  3 | [[package]]
  4 | name = "async-timeout"
  5 | version = "4.0.2"
  6 | description = "Timeout context manager for asyncio programs"
  7 | optional = true
  8 | python-versions = ">=3.6"
  9 | files = [
 10 |     {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"},
 11 |     {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"},
 12 | ]
 13 | 
 14 | [package.dependencies]
 15 | typing-extensions = {version = ">=3.6.5", markers = "python_version < \"3.8\""}
 16 | 
 17 | [[package]]
 18 | name = "cachetools"
 19 | version = "5.3.1"
 20 | description = "Extensible memoizing collections and decorators"
 21 | optional = false
 22 | python-versions = ">=3.7"
 23 | files = [
 24 |     {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"},
 25 |     {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"},
 26 | ]
 27 | 
 28 | [[package]]
 29 | name = "cffi"
 30 | version = "1.15.1"
 31 | description = "Foreign Function Interface for Python calling C code."
 32 | optional = false
 33 | python-versions = "*"
 34 | files = [
 35 |     {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
 36 |     {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"},
 37 |     {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"},
 38 |     {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"},
 39 |     {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"},
 40 |     {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"},
 41 |     {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"},
 42 |     {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"},
 43 |     {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"},
 44 |     {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"},
 45 |     {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"},
 46 |     {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"},
 47 |     {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"},
 48 |     {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"},
 49 |     {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"},
 50 |     {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"},
 51 |     {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"},
 52 |     {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"},
 53 |     {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"},
 54 |     {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"},
 55 |     {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"},
 56 |     {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"},
 57 |     {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"},
 58 |     {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"},
 59 |     {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"},
 60 |     {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"},
 61 |     {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"},
 62 |     {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"},
 63 |     {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"},
 64 |     {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"},
 65 |     {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"},
 66 |     {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"},
 67 |     {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"},
 68 |     {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"},
 69 |     {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"},
 70 |     {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"},
 71 |     {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"},
 72 |     {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"},
 73 |     {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"},
 74 |     {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"},
 75 |     {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"},
 76 |     {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"},
 77 |     {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"},
 78 |     {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"},
 79 |     {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"},
 80 |     {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"},
 81 |     {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"},
 82 |     {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"},
 83 |     {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"},
 84 |     {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"},
 85 |     {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"},
 86 |     {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"},
 87 |     {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"},
 88 |     {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"},
 89 |     {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"},
 90 |     {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"},
 91 |     {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"},
 92 |     {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"},
 93 |     {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"},
 94 |     {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"},
 95 |     {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"},
 96 |     {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"},
 97 |     {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"},
 98 |     {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"},
 99 | ]
100 | 
101 | [package.dependencies]
102 | pycparser = "*"
103 | 
104 | [[package]]
105 | name = "cryptography"
106 | version = "41.0.3"
107 | description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
108 | optional = false
109 | python-versions = ">=3.7"
110 | files = [
111 |     {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507"},
112 |     {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922"},
113 |     {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81"},
114 |     {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd"},
115 |     {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47"},
116 |     {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116"},
117 |     {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c"},
118 |     {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae"},
119 |     {file = "cryptography-41.0.3-cp37-abi3-win32.whl", hash = "sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306"},
120 |     {file = "cryptography-41.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574"},
121 |     {file = "cryptography-41.0.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087"},
122 |     {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858"},
123 |     {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906"},
124 |     {file = "cryptography-41.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e"},
125 |     {file = "cryptography-41.0.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd"},
126 |     {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207"},
127 |     {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84"},
128 |     {file = "cryptography-41.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7"},
129 |     {file = "cryptography-41.0.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d"},
130 |     {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de"},
131 |     {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1"},
132 |     {file = "cryptography-41.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4"},
133 |     {file = "cryptography-41.0.3.tar.gz", hash = "sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34"},
134 | ]
135 | 
136 | [package.dependencies]
137 | cffi = ">=1.12"
138 | 
139 | [package.extras]
140 | docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
141 | docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
142 | nox = ["nox"]
143 | pep8test = ["black", "check-sdist", "mypy", "ruff"]
144 | sdist = ["build"]
145 | ssh = ["bcrypt (>=3.1.5)"]
146 | test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
147 | test-randomorder = ["pytest-randomly"]
148 | 
149 | [[package]]
150 | name = "importlib-metadata"
151 | version = "6.7.0"
152 | description = "Read metadata from Python packages"
153 | optional = false
154 | python-versions = ">=3.7"
155 | files = [
156 |     {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"},
157 |     {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"},
158 | ]
159 | 
160 | [package.dependencies]
161 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
162 | zipp = ">=0.5"
163 | 
164 | [package.extras]
165 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
166 | perf = ["ipython"]
167 | testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
168 | 
169 | [[package]]
170 | name = "jsonpickle"
171 | version = "3.0.1"
172 | description = "Python library for serializing any arbitrary object graph into JSON"
173 | optional = false
174 | python-versions = ">=3.7"
175 | files = [
176 |     {file = "jsonpickle-3.0.1-py2.py3-none-any.whl", hash = "sha256:130d8b293ea0add3845de311aaba55e6d706d0bb17bc123bd2c8baf8a39ac77c"},
177 |     {file = "jsonpickle-3.0.1.tar.gz", hash = "sha256:032538804795e73b94ead410800ac387fdb6de98f8882ac957fcd247e3a85200"},
178 | ]
179 | 
180 | [package.dependencies]
181 | importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
182 | 
183 | [package.extras]
184 | docs = ["jaraco.packaging (>=3.2)", "rst.linker (>=1.9)", "sphinx"]
185 | testing = ["ecdsa", "feedparser", "gmpy2", "numpy", "pandas", "pymongo", "pytest (>=3.5,!=3.7.3)", "pytest-black-multipy", "pytest-checkdocs (>=1.2.3)", "pytest-cov", "pytest-flake8 (>=1.1.1)", "scikit-learn", "sqlalchemy"]
186 | testing-libs = ["simplejson", "ujson"]
187 | 
188 | [[package]]
189 | name = "kafka-python"
190 | version = "2.0.2"
191 | description = "Pure Python client for Apache Kafka"
192 | optional = true
193 | python-versions = "*"
194 | files = [
195 |     {file = "kafka-python-2.0.2.tar.gz", hash = "sha256:04dfe7fea2b63726cd6f3e79a2d86e709d608d74406638c5da33a01d45a9d7e3"},
196 |     {file = "kafka_python-2.0.2-py2.py3-none-any.whl", hash = "sha256:2d92418c7cb1c298fa6c7f0fb3519b520d0d7526ac6cb7ae2a4fc65a51a94b6e"},
197 | ]
198 | 
199 | [package.extras]
200 | crc32c = ["crc32c"]
201 | 
202 | [[package]]
203 | name = "mypy"
204 | version = "1.4.1"
205 | description = "Optional static typing for Python"
206 | optional = false
207 | python-versions = ">=3.7"
208 | files = [
209 |     {file = "mypy-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:566e72b0cd6598503e48ea610e0052d1b8168e60a46e0bfd34b3acf2d57f96a8"},
210 |     {file = "mypy-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ca637024ca67ab24a7fd6f65d280572c3794665eaf5edcc7e90a866544076878"},
211 |     {file = "mypy-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dde1d180cd84f0624c5dcaaa89c89775550a675aff96b5848de78fb11adabcd"},
212 |     {file = "mypy-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c4d8e89aa7de683e2056a581ce63c46a0c41e31bd2b6d34144e2c80f5ea53dc"},
213 |     {file = "mypy-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:bfdca17c36ae01a21274a3c387a63aa1aafe72bff976522886869ef131b937f1"},
214 |     {file = "mypy-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7549fbf655e5825d787bbc9ecf6028731973f78088fbca3a1f4145c39ef09462"},
215 |     {file = "mypy-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:98324ec3ecf12296e6422939e54763faedbfcc502ea4a4c38502082711867258"},
216 |     {file = "mypy-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:141dedfdbfe8a04142881ff30ce6e6653c9685b354876b12e4fe6c78598b45e2"},
217 |     {file = "mypy-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8207b7105829eca6f3d774f64a904190bb2231de91b8b186d21ffd98005f14a7"},
218 |     {file = "mypy-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:16f0db5b641ba159eff72cff08edc3875f2b62b2fa2bc24f68c1e7a4e8232d01"},
219 |     {file = "mypy-1.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:470c969bb3f9a9efcedbadcd19a74ffb34a25f8e6b0e02dae7c0e71f8372f97b"},
220 |     {file = "mypy-1.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5952d2d18b79f7dc25e62e014fe5a23eb1a3d2bc66318df8988a01b1a037c5b"},
221 |     {file = "mypy-1.4.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:190b6bab0302cec4e9e6767d3eb66085aef2a1cc98fe04936d8a42ed2ba77bb7"},
222 |     {file = "mypy-1.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9d40652cc4fe33871ad3338581dca3297ff5f2213d0df345bcfbde5162abf0c9"},
223 |     {file = "mypy-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:01fd2e9f85622d981fd9063bfaef1aed6e336eaacca00892cd2d82801ab7c042"},
224 |     {file = "mypy-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2460a58faeea905aeb1b9b36f5065f2dc9a9c6e4c992a6499a2360c6c74ceca3"},
225 |     {file = "mypy-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2746d69a8196698146a3dbe29104f9eb6a2a4d8a27878d92169a6c0b74435b6"},
226 |     {file = "mypy-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ae704dcfaa180ff7c4cfbad23e74321a2b774f92ca77fd94ce1049175a21c97f"},
227 |     {file = "mypy-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:43d24f6437925ce50139a310a64b2ab048cb2d3694c84c71c3f2a1626d8101dc"},
228 |     {file = "mypy-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c482e1246726616088532b5e964e39765b6d1520791348e6c9dc3af25b233828"},
229 |     {file = "mypy-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43b592511672017f5b1a483527fd2684347fdffc041c9ef53428c8dc530f79a3"},
230 |     {file = "mypy-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34a9239d5b3502c17f07fd7c0b2ae6b7dd7d7f6af35fbb5072c6208e76295816"},
231 |     {file = "mypy-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5703097c4936bbb9e9bce41478c8d08edd2865e177dc4c52be759f81ee4dd26c"},
232 |     {file = "mypy-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e02d700ec8d9b1859790c0475df4e4092c7bf3272a4fd2c9f33d87fac4427b8f"},
233 |     {file = "mypy-1.4.1-py3-none-any.whl", hash = "sha256:45d32cec14e7b97af848bddd97d85ea4f0db4d5a149ed9676caa4eb2f7402bb4"},
234 |     {file = "mypy-1.4.1.tar.gz", hash = "sha256:9bbcd9ab8ea1f2e1c8031c21445b511442cc45c89951e49bbf852cbb70755b1b"},
235 | ]
236 | 
237 | [package.dependencies]
238 | mypy-extensions = ">=1.0.0"
239 | tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
240 | typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""}
241 | typing-extensions = ">=4.1.0"
242 | 
243 | [package.extras]
244 | dmypy = ["psutil (>=4.0)"]
245 | install-types = ["pip"]
246 | python2 = ["typed-ast (>=1.4.0,<2)"]
247 | reports = ["lxml"]
248 | 
249 | [[package]]
250 | name = "mypy-extensions"
251 | version = "1.0.0"
252 | description = "Type system extensions for programs checked with the mypy type checker."
253 | optional = false
254 | python-versions = ">=3.5"
255 | files = [
256 |     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
257 |     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
258 | ]
259 | 
260 | [[package]]
261 | name = "pycparser"
262 | version = "2.21"
263 | description = "C parser in Python"
264 | optional = false
265 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
266 | files = [
267 |     {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
268 |     {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
269 | ]
270 | 
271 | [[package]]
272 | name = "redis"
273 | version = "4.6.0"
274 | description = "Python client for Redis database and key-value store"
275 | optional = true
276 | python-versions = ">=3.7"
277 | files = [
278 |     {file = "redis-4.6.0-py3-none-any.whl", hash = "sha256:e2b03db868160ee4591de3cb90d40ebb50a90dd302138775937f6a42b7ed183c"},
279 |     {file = "redis-4.6.0.tar.gz", hash = "sha256:585dc516b9eb042a619ef0a39c3d7d55fe81bdb4df09a52c9cdde0d07bf1aa7d"},
280 | ]
281 | 
282 | [package.dependencies]
283 | async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""}
284 | importlib-metadata = {version = ">=1.0", markers = "python_version < \"3.8\""}
285 | typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
286 | 
287 | [package.extras]
288 | hiredis = ["hiredis (>=1.0.0)"]
289 | ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"]
290 | 
291 | [[package]]
292 | name = "tomli"
293 | version = "2.0.1"
294 | description = "A lil' TOML parser"
295 | optional = false
296 | python-versions = ">=3.7"
297 | files = [
298 |     {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
299 |     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
300 | ]
301 | 
302 | [[package]]
303 | name = "typed-ast"
304 | version = "1.5.5"
305 | description = "a fork of Python 2 and 3 ast modules with type comment support"
306 | optional = false
307 | python-versions = ">=3.6"
308 | files = [
309 |     {file = "typed_ast-1.5.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4bc1efe0ce3ffb74784e06460f01a223ac1f6ab31c6bc0376a21184bf5aabe3b"},
310 |     {file = "typed_ast-1.5.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5f7a8c46a8b333f71abd61d7ab9255440d4a588f34a21f126bbfc95f6049e686"},
311 |     {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:597fc66b4162f959ee6a96b978c0435bd63791e31e4f410622d19f1686d5e769"},
312 |     {file = "typed_ast-1.5.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d41b7a686ce653e06c2609075d397ebd5b969d821b9797d029fccd71fdec8e04"},
313 |     {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5fe83a9a44c4ce67c796a1b466c270c1272e176603d5e06f6afbc101a572859d"},
314 |     {file = "typed_ast-1.5.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d5c0c112a74c0e5db2c75882a0adf3133adedcdbfd8cf7c9d6ed77365ab90a1d"},
315 |     {file = "typed_ast-1.5.5-cp310-cp310-win_amd64.whl", hash = "sha256:e1a976ed4cc2d71bb073e1b2a250892a6e968ff02aa14c1f40eba4f365ffec02"},
316 |     {file = "typed_ast-1.5.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c631da9710271cb67b08bd3f3813b7af7f4c69c319b75475436fcab8c3d21bee"},
317 |     {file = "typed_ast-1.5.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b445c2abfecab89a932b20bd8261488d574591173d07827c1eda32c457358b18"},
318 |     {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc95ffaaab2be3b25eb938779e43f513e0e538a84dd14a5d844b8f2932593d88"},
319 |     {file = "typed_ast-1.5.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61443214d9b4c660dcf4b5307f15c12cb30bdfe9588ce6158f4a005baeb167b2"},
320 |     {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6eb936d107e4d474940469e8ec5b380c9b329b5f08b78282d46baeebd3692dc9"},
321 |     {file = "typed_ast-1.5.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e48bf27022897577d8479eaed64701ecaf0467182448bd95759883300ca818c8"},
322 |     {file = "typed_ast-1.5.5-cp311-cp311-win_amd64.whl", hash = "sha256:83509f9324011c9a39faaef0922c6f720f9623afe3fe220b6d0b15638247206b"},
323 |     {file = "typed_ast-1.5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:44f214394fc1af23ca6d4e9e744804d890045d1643dd7e8229951e0ef39429b5"},
324 |     {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:118c1ce46ce58fda78503eae14b7664163aa735b620b64b5b725453696f2a35c"},
325 |     {file = "typed_ast-1.5.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be4919b808efa61101456e87f2d4c75b228f4e52618621c77f1ddcaae15904fa"},
326 |     {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:fc2b8c4e1bc5cd96c1a823a885e6b158f8451cf6f5530e1829390b4d27d0807f"},
327 |     {file = "typed_ast-1.5.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:16f7313e0a08c7de57f2998c85e2a69a642e97cb32f87eb65fbfe88381a5e44d"},
328 |     {file = "typed_ast-1.5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:2b946ef8c04f77230489f75b4b5a4a6f24c078be4aed241cfabe9cbf4156e7e5"},
329 |     {file = "typed_ast-1.5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2188bc33d85951ea4ddad55d2b35598b2709d122c11c75cffd529fbc9965508e"},
330 |     {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0635900d16ae133cab3b26c607586131269f88266954eb04ec31535c9a12ef1e"},
331 |     {file = "typed_ast-1.5.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57bfc3cf35a0f2fdf0a88a3044aafaec1d2f24d8ae8cd87c4f58d615fb5b6311"},
332 |     {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:fe58ef6a764de7b4b36edfc8592641f56e69b7163bba9f9c8089838ee596bfb2"},
333 |     {file = "typed_ast-1.5.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d09d930c2d1d621f717bb217bf1fe2584616febb5138d9b3e8cdd26506c3f6d4"},
334 |     {file = "typed_ast-1.5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:d40c10326893ecab8a80a53039164a224984339b2c32a6baf55ecbd5b1df6431"},
335 |     {file = "typed_ast-1.5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fd946abf3c31fb50eee07451a6aedbfff912fcd13cf357363f5b4e834cc5e71a"},
336 |     {file = "typed_ast-1.5.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ed4a1a42df8a3dfb6b40c3d2de109e935949f2f66b19703eafade03173f8f437"},
337 |     {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:045f9930a1550d9352464e5149710d56a2aed23a2ffe78946478f7b5416f1ede"},
338 |     {file = "typed_ast-1.5.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381eed9c95484ceef5ced626355fdc0765ab51d8553fec08661dce654a935db4"},
339 |     {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bfd39a41c0ef6f31684daff53befddae608f9daf6957140228a08e51f312d7e6"},
340 |     {file = "typed_ast-1.5.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8c524eb3024edcc04e288db9541fe1f438f82d281e591c548903d5b77ad1ddd4"},
341 |     {file = "typed_ast-1.5.5-cp38-cp38-win_amd64.whl", hash = "sha256:7f58fabdde8dcbe764cef5e1a7fcb440f2463c1bbbec1cf2a86ca7bc1f95184b"},
342 |     {file = "typed_ast-1.5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:042eb665ff6bf020dd2243307d11ed626306b82812aba21836096d229fdc6a10"},
343 |     {file = "typed_ast-1.5.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:622e4a006472b05cf6ef7f9f2636edc51bda670b7bbffa18d26b255269d3d814"},
344 |     {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1efebbbf4604ad1283e963e8915daa240cb4bf5067053cf2f0baadc4d4fb51b8"},
345 |     {file = "typed_ast-1.5.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0aefdd66f1784c58f65b502b6cf8b121544680456d1cebbd300c2c813899274"},
346 |     {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:48074261a842acf825af1968cd912f6f21357316080ebaca5f19abbb11690c8a"},
347 |     {file = "typed_ast-1.5.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:429ae404f69dc94b9361bb62291885894b7c6fb4640d561179548c849f8492ba"},
348 |     {file = "typed_ast-1.5.5-cp39-cp39-win_amd64.whl", hash = "sha256:335f22ccb244da2b5c296e6f96b06ee9bed46526db0de38d2f0e5a6597b81155"},
349 |     {file = "typed_ast-1.5.5.tar.gz", hash = "sha256:94282f7a354f36ef5dbce0ef3467ebf6a258e370ab33d5b40c249fa996e590dd"},
350 | ]
351 | 
352 | [[package]]
353 | name = "types-cachetools"
354 | version = "5.3.0.6"
355 | description = "Typing stubs for cachetools"
356 | optional = false
357 | python-versions = "*"
358 | files = [
359 |     {file = "types-cachetools-5.3.0.6.tar.gz", hash = "sha256:595f0342d246c8ba534f5a762cf4c2f60ecb61e8002b8b2277fd5cf791d4e851"},
360 |     {file = "types_cachetools-5.3.0.6-py3-none-any.whl", hash = "sha256:f7f8a25bfe306f2e6bc2ad0a2f949d9e72f2d91036d509c36d3810bf728bc6e1"},
361 | ]
362 | 
363 | [[package]]
364 | name = "types-pyopenssl"
365 | version = "23.2.0.2"
366 | description = "Typing stubs for pyOpenSSL"
367 | optional = false
368 | python-versions = "*"
369 | files = [
370 |     {file = "types-pyOpenSSL-23.2.0.2.tar.gz", hash = "sha256:6a010dac9ecd42b582d7dd2cc3e9e40486b79b3b64bb2fffba1474ff96af906d"},
371 |     {file = "types_pyOpenSSL-23.2.0.2-py3-none-any.whl", hash = "sha256:19536aa3debfbe25a918cf0d898e9f5fbbe6f3594a429da7914bf331deb1b342"},
372 | ]
373 | 
374 | [package.dependencies]
375 | cryptography = ">=35.0.0"
376 | 
377 | [[package]]
378 | name = "types-redis"
379 | version = "4.6.0.3"
380 | description = "Typing stubs for redis"
381 | optional = false
382 | python-versions = "*"
383 | files = [
384 |     {file = "types-redis-4.6.0.3.tar.gz", hash = "sha256:efdef37dc0c04bf5786195651fd694f8bfdd693eac09ec4af46d90f72652558f"},
385 |     {file = "types_redis-4.6.0.3-py3-none-any.whl", hash = "sha256:67c44c14369c33c2a300da2a50b5607c0fc888f7b85eeb7c73e15c78a0f05edd"},
386 | ]
387 | 
388 | [package.dependencies]
389 | cryptography = ">=35.0.0"
390 | types-pyOpenSSL = "*"
391 | 
392 | [[package]]
393 | name = "typing-extensions"
394 | version = "4.7.1"
395 | description = "Backported and Experimental Type Hints for Python 3.7+"
396 | optional = false
397 | python-versions = ">=3.7"
398 | files = [
399 |     {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"},
400 |     {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"},
401 | ]
402 | 
403 | [[package]]
404 | name = "zipp"
405 | version = "3.15.0"
406 | description = "Backport of pathlib-compatible object wrapper for zip files"
407 | optional = false
408 | python-versions = ">=3.7"
409 | files = [
410 |     {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"},
411 |     {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"},
412 | ]
413 | 
414 | [package.extras]
415 | docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
416 | testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
417 | 
418 | [extras]
419 | all = ["kafka-python", "redis"]
420 | kafka = ["kafka-python"]
421 | redis = ["redis"]
422 | 
423 | [metadata]
424 | lock-version = "2.0"
425 | python-versions = "^3.7"
426 | content-hash = "5b3714ad91781510078e19fc5e5ce57bcb57ff69edd98764b34f91c5bc509ea4"
427 | 


--------------------------------------------------------------------------------