├── tests
├── __init__.py
└── test_eknowledge.py
├── .github
└── FUNDING.yml
├── eknowledge
├── __init__.py
├── prompts.py
├── relations.py
└── main.py
├── setup.py
├── LICENSE
├── README.md
└── .gitignore
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [chigwell]
4 |
--------------------------------------------------------------------------------
/eknowledge/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import execute_graph_generation, split_text_by_words, SYSTEM_PROMPT, USER_PROMPT, RELATIONS
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 |
4 | setup(
5 | name='eknowledge',
6 | version='2025.4.171239',
7 | author='Eugene Evstafev',
8 | author_email='chigwel@gmail.com',
9 | description='A Python package for executing graph generation from textual inputs.',
10 | long_description=open('README.md').read(),
11 | long_description_content_type='text/markdown',
12 | url='https://github.com/chigwell/eknowledge',
13 | packages=find_packages(),
14 | install_requires=[
15 | 'langchain-core==0.3.51',
16 | 'langchain-ollama==0.3.0'
17 | ],
18 | classifiers=[
19 | 'License :: OSI Approved :: MIT License',
20 | 'Development Status :: 3 - Alpha',
21 | 'Intended Audience :: Developers',
22 | 'Programming Language :: Python :: 3',
23 | 'Operating System :: OS Independent',
24 | ],
25 | python_requires='>=3.6',
26 | )
--------------------------------------------------------------------------------
/eknowledge/prompts.py:
--------------------------------------------------------------------------------
1 | SYSTEM_PROMPT = """You are an ontology generation assistant. Your task is to identify and extract nodes and their relationships from the user's input text. You will receive a text input, and you need to generate a list of nodes and their relationships in the format specified below. Please ensure that the output is in the correct format and includes all relevant information.
2 | Please respond ONLY with the connections within ... tags, where each connection is inside a tag like this:
3 |
4 |
5 | ENTITY_A
6 | RELATIONSHIP_TYPE
7 | ENTITY_B
8 |
9 |
10 | """
11 | USER_PROMPT = """
12 | Please identify and list the relationships between nodes for the ontology based on the user's input text:
13 | ======
14 | {text}
15 | ======
16 | Possible relationships include:
17 | {relationships}
18 | """
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Eugene Evstafiev
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://badge.fury.io/py/eknowledge)
2 | [](https://opensource.org/licenses/MIT)
3 | [](https://pepy.tech/project/eknowledge)
4 | [](https://www.linkedin.com/in/eugene-evstafev-716669181/)
5 |
6 | # eknowledge
7 |
8 | `eknowledge` is a Python package designed to facilitate the generation of knowledge graphs from textual inputs. It leverages language models to parse text and extract relationships between entities, organizing these relationships into a structured graph format. This tool is ideal for developers, researchers, and anyone interested in structured knowledge extraction from unstructured text.
9 |
10 | ## Installation
11 |
12 | Install `eknowledge` using pip:
13 |
14 | ```bash
15 | pip install eknowledge langchain_llm7
16 | ```
17 |
18 | ## Usage
19 |
20 | Here's a simple example to get you started with `eknowledge`. This example demonstrates how to generate a knowledge graph from a given text input using the package.
21 |
22 | ### Example
23 |
24 | ```python
25 | from eknowledge import execute_graph_generation
26 | from langchain_llm7 import ChatLLM7
27 |
28 | # Initialize the language model
29 | MODEL = "deepseek-r1"
30 | llm = ChatLLM7(model=MODEL)
31 |
32 | # Define your input text
33 | input_text = "The quick brown fox jumps over the lazy dog."
34 |
35 | # Generate the graph
36 | graph = execute_graph_generation(
37 | text=input_text,
38 | llm=llm,
39 | chunk_size=100,
40 | verbose=True
41 | )
42 |
43 | # Output the graph
44 | print(graph)
45 | # > Splitting text into 1 chunks of size 100 words.
46 | # > Processing chunk 1/1...
47 | # > Nodes successfully processed in chunk 1/1.
48 | # > [{'from': 'quick brown fox', 'relationship': 'interacts_with', 'to': 'lazy dog'}]
49 | ```
50 |
51 | This script will output a knowledge graph based on the relationships identified in the text.
52 |
53 | ## Contributing
54 |
55 | Contributions are welcome! Please open issues or submit pull requests for any bugs, features, or improvements you would like to see.
56 |
57 | ## License
58 |
59 | `eknowledge` is MIT licensed, as found in the [LICENSE](https://opensource.org/licenses/MIT) file.
60 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/eknowledge/relations.py:
--------------------------------------------------------------------------------
1 | RELATIONS = [
2 | 'is_a', # Subclass relationship
3 | 'part_of', # Compositional relationship
4 | 'has_part', # Reverse of part_of
5 | 'associated_with', # General association
6 | 'equivalent_to', # Equivalence relationship
7 | 'disjoint_with', # Disjoint relationship
8 | 'depends_on', # Dependency relationship
9 | 'inverse_of', # Inverse relationship
10 | 'transitive', # Transitive relationship
11 | 'symmetrical', # Symmetrical relationship
12 | 'asymmetrical', # Asymmetrical relationship
13 | 'reflexive', # Reflexive relationship
14 | 'has_property', # Entity has a specific property
15 | 'has_attribute', # Similar to has_property, more general
16 | 'connected_to', # General connection, less specific than associated_with
17 | 'used_for', # Indicates typical use or purpose
18 | 'belongs_to', # Membership relation
19 | 'contains', # Contains relationship
20 | 'produced_by', # Indicates production or creation relationship
21 | 'preceded_by', # Temporal or sequential precedence
22 | 'succeeded_by', # Temporal or sequential succession
23 | 'interacts_with', # Interaction without specific direction
24 | 'causes', # Causality relationship
25 | 'influences', # Influence, weaker than causality
26 | 'contradicts', # Contradictory relationship
27 | 'complementary_to', # Complementarity in properties or function
28 | 'alternative_to', # Provides an alternative to
29 | 'derived_from', # Indicates origin or derivation
30 | 'has_member', # Indicates membership (group to individual)
31 | 'member_of', # Individual is member of group (reverse of has_member)
32 | 'subclass_of', # Another form of is_a, commonly used in RDF/OWL
33 | 'superclass_of', # Reverse of subclass_of
34 | 'annotated_with', # Used for linking annotations or metadata
35 | 'realizes', # Realization relationship in BFO (Basic Formal Ontology)
36 | 'has_quality', # Quality possession
37 | 'located_in', # Spatial containment or location relationship
38 | 'contains_information_about', # Information content relationship
39 | 'expresses', # Expression relationship in genetics or traits
40 | 'enabled_by', # Enabling condition relationship
41 | 'occurs_in', # Temporal occurrence within a context
42 | 'during', # Temporal relationship specifying during another event
43 | 'has_function', # Functionality relationship
44 | 'has_role', # Role specification relationship
45 | 'has_participant', # Participation in an event or process
46 | 'has_agent', # Agency relationship
47 | 'has_output', # Output specification relationship
48 | 'has_input', # Input specification relationship
49 | 'measured_by', # Measurement relationship
50 | 'provides', # Provision relationship
51 | 'requires', # Requirement relationship
52 | 'temporally_related_to', # General temporal relationship
53 | 'spatially_related_to', # General spatial relationship
54 | 'has_version', # Version control relationship
55 | 'has_exception', # Exception specification
56 | 'aggregates', # Aggregation relationship
57 | 'decomposed_into', # Decomposition relationship
58 | 'reified_as', # Reification relationship
59 | 'instantiated_by', # Instantiation relationship
60 | 'has_potential', # Potentiality relationship
61 | 'has_motive', # Motivation relationship
62 | 'negatively_regulates', # Negative regulation in biological contexts
63 | 'positively_regulates', # Positive regulation in biological contexts
64 | 'has_symptom', # Symptomatic relationship in medical ontologies
65 | 'treated_by', # Treatment relationship in medical contexts
66 | 'diagnosed_by' # Diagnostic relationship in medical contexts
67 | ]
--------------------------------------------------------------------------------
/eknowledge/main.py:
--------------------------------------------------------------------------------
1 | import re
2 | import time
3 | from .relations import RELATIONS
4 | from .prompts import SYSTEM_PROMPT, USER_PROMPT
5 | from langchain_core.messages import HumanMessage, SystemMessage
6 |
7 |
8 | def split_text_by_words(text: str, chunk_size: int) -> list[str]:
9 | """
10 | Splits a text into chunks, where each chunk has a maximum number of words.
11 |
12 | This function splits the text by whitespace to identify words and then
13 | groups these words into chunks, ensuring no chunk exceeds the specified
14 | word count.
15 |
16 | Args:
17 | text: The input string to split.
18 | chunk_size: The maximum number of words allowed in each chunk. Must be
19 | a positive integer.
20 |
21 | Returns:
22 | A list of strings, where each string is a chunk of the original text.
23 | Returns an empty list if the input text is empty or contains only
24 | whitespace.
25 |
26 | Raises:
27 | ValueError: If chunk_size is not a positive integer.
28 | TypeError: If the input 'text' is not a string.
29 | """
30 | # --- Input Validation ---
31 | if not isinstance(text, str):
32 | raise TypeError("Input 'text' must be a string.")
33 | if not isinstance(chunk_size, int) or chunk_size <= 0:
34 | raise ValueError("chunk_size must be a positive integer.")
35 |
36 | # --- Word Splitting ---
37 | # Use split() which handles various whitespace characters (spaces, tabs, newlines)
38 | # and ignores empty strings resulting from multiple spaces.
39 | words = text.split()
40 |
41 | # Handle case where text is empty or only whitespace
42 | if not words:
43 | return []
44 |
45 | # --- Chunking ---
46 | chunks = []
47 | current_chunk_word_count = 0
48 | start_index = 0
49 |
50 | # Iterate through the word list using range and step
51 | for i in range(0, len(words), chunk_size):
52 | # Determine the slice of words for this chunk
53 | word_slice = words[i : i + chunk_size]
54 |
55 | # Join the words in the slice back into a string with single spaces
56 | chunk_string = " ".join(word_slice)
57 |
58 | # Add the resulting chunk string to the list
59 | chunks.append(chunk_string)
60 |
61 | return chunks
62 |
63 |
64 | def execute_graph_generation(
65 | text="",
66 | llm=None,
67 | chunk_size=100,
68 | relations=RELATIONS,
69 | max_retries=10,
70 | system_prompt=SYSTEM_PROMPT,
71 | user_prompt=USER_PROMPT,
72 | verbose=False,
73 | sleep_time=0.75,
74 | ):
75 | if llm is None:
76 | raise ValueError("LLM object must be provided.")
77 |
78 | chunks = split_text_by_words(text, chunk_size)
79 | if verbose:
80 | print(f"Splitting text into {len(chunks)} chunks of size {chunk_size} words.")
81 |
82 | graph = []
83 | total_chunks = len(chunks)
84 | for count_chunk, chunk in enumerate(chunks, 1):
85 | if verbose:
86 | print(f"Processing chunk {count_chunk}/{total_chunks}...")
87 |
88 | found_valid_node_in_chunk = False
89 | retry_count = 0
90 | messages = [
91 | SystemMessage(content=system_prompt),
92 | HumanMessage(content=user_prompt.format(text=chunk, relationships=relations))
93 | ]
94 |
95 | # Loop until a valid node is found OR retries are exhausted for this chunk
96 | while not found_valid_node_in_chunk and retry_count < max_retries:
97 | try:
98 | response = llm.invoke(messages)
99 | # Ensure response and content are usable
100 | content = response.content if response and hasattr(response, 'content') else ""
101 |
102 | if not isinstance(content, str):
103 | if verbose:
104 | print(
105 | f"LLM response content is not a string (type: {type(content)}) for chunk {count_chunk}. Retrying (attempt {retry_count + 1}/{max_retries})...")
106 | retry_count += 1
107 | continue # Retry if content isn't a string
108 |
109 | nodes_raw = re.findall(r"(.*?)", content, re.DOTALL)
110 |
111 | if not nodes_raw:
112 | # If LLM responds but without any tags
113 | if verbose:
114 | print(
115 | f"No tags found in response for chunk {count_chunk}. Retrying (attempt {retry_count + 1}/{max_retries})...")
116 | # No need to raise error here, just retry
117 | retry_count += 1
118 | continue # Retry
119 |
120 | # --- Process found tags ---
121 | processed_at_least_one_node = False
122 | for node_content in nodes_raw:
123 | from_node_match = re.search(r"(.*?)", node_content)
124 | relationship_match = re.search(r"(.*?)", node_content)
125 | to_node_match = re.search(r"(.*?)", node_content)
126 |
127 | if from_node_match and relationship_match and to_node_match:
128 | graph.append({
129 | "from": from_node_match.group(1).strip(),
130 | "relationship": relationship_match.group(1).strip(),
131 | "to": to_node_match.group(1).strip()
132 | })
133 | processed_at_least_one_node = True # Mark that we found a valid one
134 |
135 | # --- Decide whether to exit the while loop ---
136 | if processed_at_least_one_node:
137 | if verbose:
138 | print(f"Nodes successfully processed in chunk {count_chunk}/{total_chunks}.")
139 | found_valid_node_in_chunk = True # Exit the while loop for this chunk
140 | else:
141 | # Found tags, but none had the correct inner structure
142 | if verbose:
143 | print(
144 | f" tags found but no valid structure in chunk {count_chunk}. Retrying (attempt {retry_count + 1}/{max_retries})...")
145 | retry_count += 1
146 | # Loop continues (while condition checked again)
147 |
148 | except Exception as e:
149 | # Catch LLM errors or unexpected processing errors (like regex on bad types if check failed)
150 | if verbose:
151 | print(
152 | f"Error during LLM invocation or processing for chunk {count_chunk}: {e}. Retrying (attempt {retry_count + 1}/{max_retries})...")
153 | retry_count += 1
154 | time.sleep(sleep_time)
155 | continue # Retry
156 | time.sleep(sleep_time)
157 | if not found_valid_node_in_chunk and verbose:
158 | print(f"Max retries ({max_retries}) reached for chunk {count_chunk}. No valid nodes added for this chunk.")
159 |
160 | return graph
161 |
162 |
163 |
--------------------------------------------------------------------------------
/tests/test_eknowledge.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest.mock import MagicMock, patch, call
3 | from eknowledge import split_text_by_words, execute_graph_generation, RELATIONS, SYSTEM_PROMPT, USER_PROMPT
4 |
5 | try:
6 | from langchain_core.messages import HumanMessage, SystemMessage
7 | except ImportError:
8 | class MockMessageBase:
9 | def __init__(self, content):
10 | self.content = content
11 | HumanMessage = MockMessageBase
12 | SystemMessage = MockMessageBase
13 |
14 | class MockLLMResponse:
15 | def __init__(self, content):
16 | self.content = content
17 |
18 | class TestSplitTextByWords(unittest.TestCase):
19 |
20 | def test_basic_splitting(self):
21 | text = "This is a test sentence with eight words total."
22 | expected = ["This is a test", "sentence with eight words", "total."]
23 | self.assertEqual(split_text_by_words(text, 4), expected)
24 |
25 | def test_exact_multiple(self):
26 | text = "One two three four five six"
27 | expected = ["One two three", "four five six"]
28 | self.assertEqual(split_text_by_words(text, 3), expected)
29 |
30 | def test_less_than_chunk_size(self):
31 | text = "Short text"
32 | expected = ["Short text"]
33 | self.assertEqual(split_text_by_words(text, 5), expected)
34 |
35 | def test_chunk_size_one(self):
36 | text = "Split word by word"
37 | expected = ["Split", "word", "by", "word"]
38 | self.assertEqual(split_text_by_words(text, 1), expected)
39 |
40 | def test_empty_string(self):
41 | text = ""
42 | expected = []
43 | self.assertEqual(split_text_by_words(text, 5), expected)
44 |
45 | def test_whitespace_string(self):
46 | text = " \n \t "
47 | expected = []
48 | self.assertEqual(split_text_by_words(text, 5), expected)
49 |
50 | def test_multiple_spaces(self):
51 | text = "Word1 Word2 Word3 \n Word4"
52 | expected = ["Word1 Word2", "Word3 Word4"]
53 | self.assertEqual(split_text_by_words(text, 2), expected)
54 |
55 | def test_invalid_chunk_size_zero(self):
56 | with self.assertRaisesRegex(ValueError, "chunk_size must be a positive integer."):
57 | split_text_by_words("Some text", 0)
58 |
59 | def test_invalid_chunk_size_negative(self):
60 | with self.assertRaisesRegex(ValueError, "chunk_size must be a positive integer."):
61 | split_text_by_words("Some text", -1)
62 |
63 | def test_invalid_chunk_size_float(self):
64 | # Floats are not ints, even if positive
65 | with self.assertRaisesRegex(ValueError, "chunk_size must be a positive integer."):
66 | split_text_by_words("Some text", 5.0)
67 |
68 | def test_invalid_text_type_int(self):
69 | with self.assertRaisesRegex(TypeError, "Input 'text' must be a string."):
70 | split_text_by_words(12345, 5) # type: ignore
71 |
72 | def test_invalid_text_type_none(self):
73 | with self.assertRaisesRegex(TypeError, "Input 'text' must be a string."):
74 | split_text_by_words(None, 5) # type: ignore
75 |
76 |
77 | class TestExecuteGraphGeneration(unittest.TestCase):
78 |
79 | def setUp(self):
80 | """Set up a mock LLM for tests."""
81 | self.mock_llm = MagicMock()
82 | self.default_relations = ["REL1", "REL2"]
83 | self.default_system_prompt = "System Instructions"
84 | self.default_user_prompt = "User Instructions: {text} - {relationships}"
85 |
86 | def test_no_llm_provided(self):
87 | """Test that ValueError is raised if llm is None."""
88 | with self.assertRaisesRegex(ValueError, "LLM object must be provided."):
89 | execute_graph_generation(text="Some text", llm=None)
90 |
91 | def test_single_chunk_success(self):
92 | """Test successful graph generation from a single chunk."""
93 | text = "Node A is connected to Node B."
94 | chunk_size = 10
95 | mock_response_content = """
96 | Some preamble...
97 |
98 | Node A
99 | is connected to
100 | Node B
101 |
102 | Some postamble...
103 | """
104 | self.mock_llm.invoke.return_value = MockLLMResponse(content=mock_response_content)
105 |
106 | expected_graph = [{
107 | "from": "Node A",
108 | "relationship": "is connected to",
109 | "to": "Node B"
110 | }]
111 |
112 | result_graph = execute_graph_generation(
113 | text=text,
114 | llm=self.mock_llm,
115 | chunk_size=chunk_size,
116 | relations=self.default_relations,
117 | system_prompt=self.default_system_prompt,
118 | user_prompt=self.default_user_prompt
119 | )
120 |
121 | self.assertEqual(result_graph, expected_graph)
122 | # Check LLM was called once with correct messages
123 | self.mock_llm.invoke.assert_called_once()
124 | call_args = self.mock_llm.invoke.call_args[0][0] # Get the 'messages' argument
125 | self.assertIsInstance(call_args[0], SystemMessage)
126 | self.assertEqual(call_args[0].content, self.default_system_prompt)
127 | self.assertIsInstance(call_args[1], HumanMessage)
128 | self.assertIn(text, call_args[1].content)
129 | self.assertIn(str(self.default_relations), call_args[1].content)
130 |
131 |
132 | def test_llm_returns_no_nodes(self):
133 | """Test scenario where LLM response lacks tags, causing retries."""
134 | text = "Some text that results in no nodes."
135 | chunk_size = 10
136 | max_retries = 3
137 | mock_response_content = "The LLM responded, but found nothing relevant."
138 |
139 | # LLM always returns content without nodes
140 | self.mock_llm.invoke.return_value = MockLLMResponse(content=mock_response_content)
141 |
142 | result_graph = execute_graph_generation(
143 | text=text,
144 | llm=self.mock_llm,
145 | chunk_size=chunk_size,
146 | max_retries=max_retries,
147 | verbose=False # Keep console clean for test output
148 | )
149 |
150 | self.assertEqual(result_graph, []) # Expect empty graph
151 | # Check LLM was called max_retries times
152 | self.assertEqual(self.mock_llm.invoke.call_count, max_retries)
153 |
154 | def test_llm_returns_malformed_node(self):
155 | """Test where exists but inner tags are missing."""
156 | text = "Text leading to malformed output."
157 | chunk_size = 10
158 | max_retries = 2
159 | mock_response_content = """
160 |
161 | GoodNode1
162 | RELATES_TO
163 | AnotherNode
164 |
165 |
166 | BadNode
167 |
168 | """
169 | # Mock LLM to return malformed content (will retry as no *valid* node found initially)
170 | # Then return valid content on retry
171 | self.mock_llm.invoke.side_effect = [
172 | MockLLMResponse(content="Bad"), # First attempt fails validation
173 | MockLLMResponse(content=mock_response_content) # Second attempt succeeds
174 | ]
175 |
176 |
177 | expected_graph = [{
178 | "from": "GoodNode1",
179 | "relationship": "RELATES_TO",
180 | "to": "AnotherNode"
181 | }]
182 |
183 | result_graph = execute_graph_generation(
184 | text=text,
185 | llm=self.mock_llm,
186 | chunk_size=chunk_size,
187 | max_retries=max_retries
188 | )
189 |
190 | self.assertEqual(result_graph, expected_graph)
191 | self.assertEqual(self.mock_llm.invoke.call_count, 2) # Called twice (initial + retry)
192 |
193 |
194 | def test_llm_invocation_error_then_success(self):
195 | """Test retry mechanism when llm.invoke raises an exception initially."""
196 | text = "Text causing initial failure."
197 | chunk_size = 10
198 | max_retries = 3
199 | success_response = "ARB"
200 |
201 | # Configure mock to raise error first, then succeed
202 | self.mock_llm.invoke.side_effect = [
203 | ValueError("Simulated LLM API error"),
204 | MockLLMResponse(content=success_response)
205 | ]
206 |
207 | expected_graph = [{"from": "A", "relationship": "R", "to": "B"}]
208 |
209 | # Use patch to temporarily suppress print statements during test
210 | with patch('builtins.print') as mock_print:
211 | result_graph = execute_graph_generation(
212 | text=text,
213 | llm=self.mock_llm,
214 | chunk_size=chunk_size,
215 | max_retries=max_retries,
216 | verbose=True # Enable verbose to test print suppression
217 | )
218 |
219 | self.assertEqual(result_graph, expected_graph)
220 | self.assertEqual(self.mock_llm.invoke.call_count, 2) # Failed once, succeeded once
221 | # Check if error message was printed (if verbose=True)
222 | self.assertTrue(any("Error during LLM invocation" in str(call_args) for call_args in mock_print.call_args_list))
223 |
224 |
225 | def test_llm_invocation_error_max_retries_exceeded(self):
226 | """Test scenario where LLM consistently fails, exceeding max_retries."""
227 | text = "Text causing consistent failure."
228 | chunk_size = 10
229 | max_retries = 2
230 |
231 | # Configure mock to always raise an error
232 | self.mock_llm.invoke.side_effect = ConnectionError("Simulated persistent network issue")
233 |
234 | # Suppress print for cleaner test output
235 | with patch('builtins.print'):
236 | result_graph = execute_graph_generation(
237 | text=text,
238 | llm=self.mock_llm,
239 | chunk_size=chunk_size,
240 | max_retries=max_retries,
241 | verbose=False
242 | )
243 |
244 | self.assertEqual(result_graph, []) # Expect empty graph
245 | # Check LLM was called max_retries times
246 | self.assertEqual(self.mock_llm.invoke.call_count, max_retries)
247 |
248 |
249 | def test_empty_input_text(self):
250 | """Test behavior with empty input text."""
251 | result_graph = execute_graph_generation(text="", llm=self.mock_llm)
252 | self.assertEqual(result_graph, [])
253 | self.mock_llm.invoke.assert_not_called() # LLM should not be called if no chunks
254 |
255 | def test_verbose_output(self):
256 | """Test that messages are printed when verbose=True."""
257 | text = "Verbose test text."
258 | chunk_size = 5
259 | mock_response = "VTT"
260 | self.mock_llm.invoke.return_value = MockLLMResponse(content=mock_response)
261 |
262 | with patch('builtins.print') as mock_print:
263 | execute_graph_generation(text=text, llm=self.mock_llm, chunk_size=chunk_size, verbose=True)
264 |
265 | # Check for expected print messages
266 | mock_print.assert_any_call("Splitting text into 1 chunks of size 5 words.")
267 | mock_print.assert_any_call("Processing chunk 1/1...")
268 | mock_print.assert_any_call("Nodes successfully processed in chunk 1/1.")
269 |
270 |
271 | if __name__ == '__main__':
272 | unittest.main(argv=['first-arg-is-ignored'], exit=False)
--------------------------------------------------------------------------------