├── .github
    └── FUNDING.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── ditana-assistant
├── ditana-assistant-gui.png
├── ditana-assistant.1
├── ditana-assistant.desktop
├── ditana-assistant.png
├── ditana_assistant
    ├── __init__.py
    ├── base
    │   ├── __init__.py
    │   ├── config.py
    │   ├── model_interface.py
    │   ├── output_manager.py
    │   ├── request_manager.py
    │   ├── string_cache.py
    │   ├── terminal.py
    │   └── wolfram_alpha_short_answers.py
    ├── benchmark
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── multiple_choice_dataset.py
    │   └── statistics.py
    ├── engine
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── context.py
    │   ├── context_processes.py
    │   ├── conversation_manager.py
    │   ├── input_analyzers_ai.py
    │   ├── input_analyzers_regex.py
    │   ├── pastime.py
    │   ├── terminal_interaction.py
    │   ├── text_processors_ai.py
    │   └── text_processors_regex.py
    ├── gui
    │   ├── __init__.py
    │   ├── assistant_window.py
    │   ├── ditana-logo.png
    │   └── index.html
    └── tests
    │   ├── __init__.py
    │   ├── test_base
    │       ├── __int__.py
    │       └── test_string_cache.py
    │   └── test_engine
    │       ├── __init__.py
    │       ├── code_detection_test_cases.py
    │       ├── input_analyzers_ai_code_test.py
    │       ├── input_analyzers_ai_test.py
    │       ├── input_analyzers_regex_code_test.py
    │       └── text_processors_regex_test.py
├── evaluate-ditana-assistant
├── packaging
    └── arch
    │   ├── .SRCINFO
    │   ├── .gitignore
    │   ├── PKGBUILD
    │   └── pre-build-hook
├── pastime-sample.png
├── poetry.lock
├── pyproject.toml
└── wolfram_alpha_short_answers_api_key.png


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: acrion
2 | liberapay: acrion
3 | buy_me_a_coffee: acrion
4 | patreon: acriondev
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | pkg
 2 | *.zst
 3 | *.zst.sig
 4 | *.whl
 5 | __pycache__
 6 | .idea
 7 | tmp
 8 | test.sh
 9 | requirements.txt
10 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Ditana Assistant
 2 | 
 3 | We're thrilled that you're interested in contributing to Ditana Assistant! This document provides guidelines for contributing to the project.
 4 | 
 5 | ## How to Contribute
 6 | 
 7 | There are many ways to contribute to Ditana Assistant:
 8 | 
 9 | 1. **Reporting Bugs**: If you encounter any issues, please report them with as much detail as possible.
10 |    If you can reproduce the bug, please enable `show_debug_messages` in `config.yaml` and provide the full output.
11 |    Please note that Ditana Assistant uses the LLM API for its internal processes, which may lead to non-reproducible effects.
12 |    Additionally, Ditana Assistant employs a request cache, which further complicates reproducibility:
13 |    A previously reproducible issue may suddenly disappear due to the cache’s sophisticated strategy
14 |    for managing the dynamic lifetime of cached requests, which depends on the persistence of the responses provided by the API.
15 |    The cache files are located in the following directories:
16 |    - Linux: `~/.local/share/ditana-assistant`
17 |    - macOS: `~/Library/Application Support/ditana-assistant`
18 |    - Windows: `C:\Users\<username>\AppData\Local\ditana-assistant`
19 | 2. **Suggesting Enhancements**: Have an idea for a new feature or an improvement? Open an issue and tag it as an enhancement. We love hearing new ideas!
20 | 
21 | 3. **Writing Documentation**: Good documentation is crucial. If you see an area where our documentation could be improved or expanded, please let us know or submit a pull request with your changes.
22 | 
23 | 4. **Contributing Code**: We welcome code contributions for various aspects of the project, including:
24 |    - Bug fixes
25 |    - Performance improvements
26 |    - New features
27 |    - Documentation enhancements
28 |    
29 |    Here’s the process for contributing code:
30 |    - Fork the repository
31 |    - Create a new branch for your feature or bug fix
32 |    - Write your code, adding unit tests for new features
33 |    - Ensure all tests pass
34 |    - Submit a pull request with a clear description of your changes
35 | 
36 | 5. **Testing**: With the wide variety of scenarios an AI assistant must handle, comprehensive testing is crucial. We particularly need:
37 |    - More unit tests, especially those that uncover edge cases or potential issues
38 |    - Cross-platform testing to ensure consistency across different operating systems
39 |    - User experience testing to identify areas for improvement in the interface and interaction flow
40 | 
41 | 6. **Providing Feedback**: Your experiences and observations are invaluable. Please share:
42 |    - Feature requests: Let us know what functionalities you'd like to see added or improved.
43 |    - Usage scenarios: Descriptions of how you use Ditana Assistant can help us optimize for real-world applications.
44 | 
45 | ## Code Style
46 | 
47 | We strive to maintain a consistent and readable codebase. Please adhere to the following guidelines:
48 | 
49 | - Follow the existing code style in the project
50 | - Use meaningful variable and function names
51 | - Comment your code where necessary, especially for complex logic
52 | - Use the pylint configuration provided in the `pyproject.toml` file at the root of the project. This customized configuration gives good indications of what to do and what to avoid.
53 | - Use type hints in method and function definitions. This improves code readability and helps catch type-related errors early.
54 | 
55 | Example of a properly documented and type-hinted function:
56 | 
57 | ```python
58 | def calculate_sum(a: int, b: int) -> int:
59 |     """
60 |     Calculate the sum of two integers.
61 | 
62 |     Args:
63 |         a (int): The first integer.
64 |         b (int): The second integer.
65 | 
66 |     Returns:
67 |         int: The sum of a and b.
68 |     """
69 |     return a + b
70 | ```
71 | 
72 | ## Commit Messages
73 | 
74 | Write clear, concise commit messages. Start with a short summary (50 characters or less), followed by a blank line and a more detailed explanation if necessary.
75 | 
76 | ## Pull Requests
77 | 
78 | - Create a new branch for each feature or bug fix
79 | - Keep pull requests focused on a single change
80 | - Include a description of what your change does and why it’s needed
81 | - Update documentation if your changes require it
82 | - Ensure your code adheres to the style guidelines mentioned above
83 | 
84 | ## Questions?
85 | 
86 | If you have any questions about contributing, feel free to open an issue or reach out to the maintainers.
87 | 
88 | Thank you for your interest in improving Ditana Assistant!


--------------------------------------------------------------------------------
/ditana-assistant:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright (c) 2024, 2025 acrion innovations GmbH
 4 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
 5 | #
 6 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
 7 | #
 8 | # Ditana Assistant is offered under a commercial and under the AGPL license.
 9 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
10 | 
11 | # AGPL licensing:
12 | #
13 | # Ditana Assistant is free software: you can redistribute it and/or modify
14 | # it under the terms of the GNU Affero General Public License as published by
15 | # the Free Software Foundation, either version 3 of the License, or
16 | # (at your option) any later version.
17 | #
18 | # Ditana Assistant is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 | # GNU Affero General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Affero General Public License
24 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
25 | 
26 | """
27 | This is the main entry point for the Ditana Assistant.
28 | It sets up the necessary paths and imports for running the assistant.
29 | """
30 | 
31 | from ditana_assistant.engine import __main__
32 | 
33 | if __name__ == '__main__':
34 |     __main__.main()
35 | 


--------------------------------------------------------------------------------
/ditana-assistant-gui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana-assistant-gui.png


--------------------------------------------------------------------------------
/ditana-assistant.1:
--------------------------------------------------------------------------------
 1 | .TH DITANA-ASSISTANT 1 "September 2024" "Version 1.031" "User Commands"
 2 | .SH NAME
 3 | ditana-assistant \- AI-powered command-line and GUI assistant
 4 | .SH SYNOPSIS
 5 | .B ditana-assistant
 6 | [\fB\-h\fR]
 7 | [\fB\-e\fR]
 8 | [\fB\-u\fR]
 9 | [\fB\-n\fR]
10 | [\fI\,task\/\fR]
11 | .SH DESCRIPTION
12 | .B ditana-assistant
13 | is an AI-powered application that combines a command-line interface with optional GUI functionality. It leverages Large Language Models (LLMs) to provide intelligent assistance for various tasks, including system management, file operations, and command generation.
14 | .SH OPTIONS
15 | .TP
16 | .BR \-h ", " \-\-help
17 | Show this help message and exit
18 | .TP
19 | .BR \-e ", " \-\-english
20 | Force output in English language
21 | .TP
22 | .BR \-u ", " \-\-gui
23 | Display a graphical dialog
24 | .TP
25 | .BR \-n ", " \-\-non\-interactive
26 | Run in non-interactive mode (no further user input expected)
27 | .TP
28 | .I task
29 | The task for the assistant (optional)
30 | .SH CONFIGURATION
31 | Configuration is stored in \fI~/.config/ditana-assistant/config.yaml\fR. Available options:
32 | .TP
33 | .B model_type
34 | Set to 'gemma' (default) or 'openai'
35 | .TP
36 | .B openai_model
37 | Specify OpenAI model (e.g., 'gpt-4o-mini') when using OpenAI
38 | .TP
39 | .B show_debug_messages
40 | Set to 'true' or 'false' to enable/disable debug messages
41 | .PP
42 | When using OpenAI, set the \fBOPENAI_API_KEY\fR environment variable with your API key.
43 | .SH EXAMPLES
44 | .TP
45 | Run in command-line mode:
46 | .B ditana-assistant "What are the largest files in the current directory?"
47 | .TP
48 | Run in GUI mode:
49 | .B ditana-assistant -u
50 | .TP
51 | Run in English, non-interactive mode:
52 | .B ditana-assistant -e -n "Summarize the contents of log.txt"
53 | .SH NOTES
54 | The assistant may suggest terminal commands, which are only executed after user confirmation. Due to the use of LLMs, there's a possibility of errors. Always review suggested commands before execution.
55 | .SH AUTHOR
56 | Written by Stefan Zipproth <s.zipproth@acrion.ch>, acrion innovations GmbH, Switzerland.
57 | .SH BUGS
58 | Report bugs to <https://github.com/acrion/ditana-assistant/issues>
59 | .SH COPYRIGHT
60 | Copyright \(co 2024 Stefan Zipproth, acrion innovations GmbH, Switzerland. License AGPLv3+: GNU AGPL version 3 or later <https://www.gnu.org/licenses/agpl-3.0.html>.
61 | This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law.
62 | .PP
63 | For additional licensing options, visit <https://acrion.ch/sales>.
64 | .SH SEE ALSO
65 | Full documentation at: <https://github.com/acrion/ditana-assistant>
66 | .PP
67 | For optimal performance with the default Gemma LLM, consider installing the \fBditana-koboldcpp\fR package from the AUR, which provides a pre-configured koboldcpp server.
68 | 


--------------------------------------------------------------------------------
/ditana-assistant.desktop:
--------------------------------------------------------------------------------
 1 | [Desktop Entry]
 2 | 
 3 | Name=Ditana Assistant
 4 | GenericName=AI Assistant
 5 | Icon=/usr/share/icons/hicolor/256x256/apps/ditana-assistant.png
 6 | Type=Application
 7 | Exec=/usr/bin/ditana-assistant --gui
 8 | StartupNotify=false
 9 | Terminal=true
10 | NoDisplay=false
11 | Categories=Office;
12 | 


--------------------------------------------------------------------------------
/ditana-assistant.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana-assistant.png


--------------------------------------------------------------------------------
/ditana_assistant/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ditana-assistant package.
 3 | 
 4 | An AI-powered assistant application that combines GUI and terminal functionality
 5 | with optional introspective contextual augmentation.
 6 | 
 7 | This package provides the core functionality for the Ditana Assistant,
 8 | including AI model integration, system interaction, and contextual enhancement.
 9 | 
10 | The version is imported from the package metadata if the package is installed,
11 | otherwise it defaults to "unknown".
12 | 
13 | Attributes:
14 |     __version__ (str): The version of the ditana-assistant package.
15 | 
16 | For detailed information about features and usage, please refer to the project's
17 | README.md file or official documentation.
18 | """
19 | 
20 | from importlib.metadata import version, PackageNotFoundError
21 | 
22 | try:
23 |     __version__ = version("ditana-assistant")
24 | except PackageNotFoundError:
25 |     # Package is not installed
26 |     __version__ = "unknown"
27 | 


--------------------------------------------------------------------------------
/ditana_assistant/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/base/__init__.py


--------------------------------------------------------------------------------
/ditana_assistant/base/model_interface.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides an interface for interacting with different AI models
 26 | in the Ditana Assistant. It includes functions for preparing requests and
 27 | processing responses from various AI models.
 28 | """
 29 | 
 30 | from typing import List, Dict, Literal, Any
 31 | import urllib.parse
 32 | 
 33 | from ditana_assistant.base import config
 34 | from ditana_assistant.base.config import Configuration
 35 | 
 36 | 
 37 | def get_endpoint() -> str:
 38 |     """
 39 |     Get the API endpoint for the configured model.
 40 | 
 41 |     Returns:
 42 |         str: The API endpoint URL for the specified model.
 43 |     """
 44 |     match Configuration.get()['MODEL_TYPE']:
 45 |         case config.ModelType.OPENAI:
 46 |             return "https://api.openai.com/v1/chat/completions"
 47 |         case config.ModelType.GEMMA:
 48 |             return urllib.parse.urljoin(Configuration.get()['KOBOLDCPP_BASE_URL'], "api/v1/generate")
 49 | 
 50 | 
 51 | def convert_messages_to_gemma_prompt(messages: List[Dict[Literal["role", "content"], str]]) -> str:
 52 |     """
 53 |     Convert a list of messages to a Gemma-compatible prompt format.
 54 | 
 55 |     Args:
 56 |         messages (List[Dict[str, str]]): A list of message dictionaries.
 57 | 
 58 |     Returns:
 59 |         str: A formatted prompt string for the Gemma model.
 60 |     """
 61 |     prompt = "<end_of_turn>\n"
 62 |     for message in messages:
 63 |         role = message["role"]
 64 |         content = message["content"]
 65 | 
 66 |         if role == "assistant":
 67 |             role = "model"
 68 | 
 69 |         prompt += f"<start_of_turn>{role}\n{content}<end_of_turn>\n"
 70 | 
 71 |     prompt += "<start_of_turn>model\n"
 72 | 
 73 |     return prompt
 74 | 
 75 | 
 76 | def extract_assistant_answer(response_json: Dict[str, Any]) -> str:
 77 |     """
 78 |     Extract the assistant’s answer from the API response JSON.
 79 | 
 80 |     Args:
 81 |         response_json (Dict[str, Any]): The JSON response from the API.
 82 | 
 83 |     Returns:
 84 |         str: The extracted answer from the assistant.
 85 |     """
 86 |     match Configuration.get()['MODEL_TYPE']:
 87 |         case config.ModelType.OPENAI:
 88 |             return response_json['choices'][0]['message']['content']
 89 |         case config.ModelType.GEMMA:
 90 |             return response_json['results'][0]['text'].strip()
 91 | 
 92 | 
 93 | def get_request(messages: List[Dict[Literal["role", "content"], str]]) -> Dict[str, Any]:
 94 |     """
 95 |     Prepare a request dictionary for the specified model.
 96 | 
 97 |     Args:
 98 |         messages (List[Dict[Literal["role", "content"], str]]): A list of message dictionaries.
 99 | 
100 |     Returns:
101 |         Dict[str, Any]: A dictionary containing the prepared request data.
102 |     """
103 |     match Configuration.get()['MODEL_TYPE']:
104 |         case config.ModelType.GEMMA:
105 |             return {
106 |                 "n": 1,
107 |                 "max_context_length": 4096,
108 |                 "max_length": 768,
109 |                 "rep_pen": 1.01,
110 |                 "temperature": 0.25,
111 |                 "top_p": 0.6,
112 |                 "top_k": 100,
113 |                 "top_a": 0,
114 |                 "typical": 1,
115 |                 "tfs": 1,
116 |                 "rep_pen_range": 320,
117 |                 "rep_pen_slope": 0.7,
118 |                 "sampler_order": [6, 0, 1, 3, 4, 2, 5],
119 |                 "memory": "",
120 |                 "trim_stop": True,
121 |                 "genkey": "KCPP9905",
122 |                 "min_p": 0,
123 |                 "dynatemp_range": 0,
124 |                 "dynatemp_exponent": 1,
125 |                 "smoothing_factor": 0,
126 |                 "banned_tokens": [],
127 |                 "render_special": False,
128 |                 "presence_penalty": 0,
129 |                 "logit_bias": {},
130 |                 "prompt": convert_messages_to_gemma_prompt(messages),
131 |                 "quiet": True,
132 |                 "stop_sequence": ["<end_of_turn>\n<start_of_turn>user", "<end_of_turn>\n<start_of_turn>model"],
133 |                 "use_default_badwordsids": False,
134 |                 "bypass_eos": False
135 |             }
136 |         case config.ModelType.OPENAI:
137 |             return {
138 |                 "model": Configuration.get()['OPENAI_MODEL'],
139 |                 "messages": messages,
140 |                 "max_tokens": 768,
141 |                 "temperature": 0,
142 |                 "top_p": 1,
143 |                 "frequency_penalty": 0,
144 |                 "presence_penalty": 0
145 |             }
146 | 


--------------------------------------------------------------------------------
/ditana_assistant/base/output_manager.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
 2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
 3 | #
 4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
 5 | #
 6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
 7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 8 | 
 9 | # AGPL licensing:
10 | #
11 | # Ditana Assistant is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Ditana Assistant is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | """
25 | This module provides an OutputManager class for managing console output in the Ditana Assistant.
26 | It includes functionality to print formatted messages, avoid duplicate outputs, reset output history,
27 | and convert newlines to spaces before processing.
28 | """
29 | 
30 | from typing import Dict
31 | 
32 | 
33 | def truncate_string(input_string: str, max_length: int = 100) -> str:
34 |     """
35 |     Truncates the input string to a maximum length and appends '...' if truncated.
36 | 
37 |     Args:
38 |         input_string (str): The string to be truncated.
39 |         max_length (int, optional): The maximum length of the output string,
40 |                                     including '...' if truncated. Defaults to 50.
41 | 
42 |     Returns:
43 |         str: The truncated string, with '...' appended if it was shortened.
44 |     """
45 |     if len(input_string) <= max_length:
46 |         return input_string
47 |     else:
48 |         return input_string[:max_length-3] + '...'
49 | 
50 | 
51 | class OutputManager:
52 |     """
53 |     A class to manage console output, avoiding duplicates and providing formatted printing.
54 |     """
55 | 
56 |     hide_messages: bool = False
57 |     left_size: int = 52
58 |     right_size: int = 100
59 |     output_history: Dict[str, bool] = {}
60 | 
61 |     @classmethod
62 |     def print_formatted(cls, prefix: str, message: str) -> None:
63 |         """
64 |         Print a formatted message if it hasn't been printed before.
65 |         Converts newlines to spaces before processing.
66 | 
67 |         Args:
68 |             prefix (str): The prefix for the message.
69 |             message (str): The main content of the message.
70 |         """
71 |         message = message.replace('\n', ' ')
72 |         output = f"   {truncate_string(prefix, cls.left_size).rjust(cls.left_size)}: \"{truncate_string(message, cls.right_size)}\""
73 |         if not cls.hide_messages and output not in cls.output_history:
74 |             print(output)
75 |             cls.output_history[output] = True
76 | 
77 |     @classmethod
78 |     def reset_history(cls) -> None:
79 |         """
80 |         Reset the output history, allowing all messages to be printed again.
81 |         """
82 |         cls.output_history.clear()
83 | 


--------------------------------------------------------------------------------
/ditana_assistant/base/request_manager.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides the RequestManager class, which serves as a foundational component for managing
 26 | API requests to AI models. It includes functionality for sending requests, caching responses, and
 27 | handling interactions with external services such as Wolfram Alpha. The module ensures efficient and
 28 | reliable communication with AI services by implementing error handling and retry mechanisms.
 29 | """
 30 | 
 31 | import hashlib
 32 | import json
 33 | import os
 34 | from pathlib import Path
 35 | import queue
 36 | import re
 37 | import threading
 38 | import time
 39 | from typing import Any, Dict, Optional
 40 | 
 41 | import requests
 42 | 
 43 | from ditana_assistant.base import config, model_interface
 44 | from ditana_assistant.base.config import Configuration
 45 | from ditana_assistant.base.string_cache import StringCache
 46 | from ditana_assistant.base.wolfram_alpha_short_answers import WolframAlphaShortAnswers
 47 | 
 48 | 
 49 | class RequestManager:
 50 |     """
 51 |     RequestManager is a base class responsible for managing API requests to AI models.
 52 | 
 53 |     It handles sending requests, caching responses to improve performance, and interacting with
 54 |     external services like Wolfram Alpha for additional functionalities. The class provides
 55 |     a standardized method `send_model_request` for sending requests and processing responses,
 56 |     including error handling and retry mechanisms to ensure reliable communication with AI
 57 |     services.
 58 | 
 59 |    Attributes:
 60 |         _wolfram_alpha (WolframAlphaShortAnswers): An instance to handle Wolfram Alpha short answers.
 61 |         _force_wolfram_alpha (bool): A flag to force the use of Wolfram Alpha.
 62 |         _pastime_mode (bool): A flag to enable pastime mode.
 63 |         _impersonate (str): A string to specify impersonation.
 64 |         _code_input_event (threading.Event): An event for code input synchronization.
 65 |         _code_input_global (queue.Queue): A global queue for code input.
 66 |         _stop_thread (threading.Event): An event to signal thread termination.
 67 |     """
 68 | 
 69 |     _request_cache: Optional[StringCache] = None
 70 |     _wolfram_alpha = WolframAlphaShortAnswers()
 71 |     _ica: bool = False
 72 |     _force_wolfram_alpha: bool = False
 73 |     _pastime_mode: bool = False
 74 |     _impersonate: str = None
 75 |     _code_input_event = threading.Event()
 76 |     _code_input_global = queue.Queue()
 77 |     _stop_thread = threading.Event()
 78 | 
 79 |     @classmethod
 80 |     def wolfram_alpha(cls) -> WolframAlphaShortAnswers:
 81 |         """
 82 |         Accessor method for the _wolfram_alpha class attribute.
 83 | 
 84 |         Returns:
 85 |             WolframAlphaShortAnswers: The instance handling Wolfram Alpha short answers.
 86 |         """
 87 |         return cls._wolfram_alpha
 88 | 
 89 |     @classmethod
 90 |     def force_wolfram_alpha(cls) -> bool:
 91 |         """
 92 |         Accessor method for the _force_wolfram_alpha class attribute.
 93 | 
 94 |         Returns:
 95 |             bool: Indicates whether to force the use of Wolfram Alpha.
 96 |         """
 97 |         return cls._force_wolfram_alpha
 98 | 
 99 |     @classmethod
100 |     def set_force_wolfram_alpha(cls, value: bool) -> None:
101 |         """
102 |         Mutator method for the _force_wolfram_alpha class attribute.
103 | 
104 |         Args:
105 |             value (bool): The new value to set for _force_wolfram_alpha.
106 | 
107 |         Returns:
108 |             None
109 |         """
110 |         cls._force_wolfram_alpha = value
111 | 
112 |     @classmethod
113 |     def ica(cls) -> bool:
114 |         """
115 |         Accessor method for the _ica class attribute.
116 | 
117 |         Returns:
118 |             bool: Indicates whether to augment the context of user requests introspectively with additional factual information.
119 |                   If True, the system will implement Introspective Contextual Augmentation by generating a contextual query based on the user’s input,
120 |                   attempt to retrieve factual information from Wolfram|Alpha or process it using an LLM,
121 |                   and append this information to the conversation history. This process enhances subsequent responses
122 |                   through introspective reasoning and contextual augmentation.
123 |         """
124 |         return cls._ica
125 | 
126 |     @classmethod
127 |     def set_ica(cls, value: bool) -> None:
128 |         """
129 |         Mutator method for the _ica class attribute.
130 | 
131 |         Args:
132 |             value (bool): The new value to set for _ica.
133 | 
134 |         Returns:
135 |             None
136 |         """
137 |         cls._ica = value
138 | 
139 |     @classmethod
140 |     def pastime_mode(cls) -> bool:
141 |         """
142 |         Accessor method for the _pastime_mode class attribute.
143 | 
144 |         Returns:
145 |             bool: Indicates whether pastime mode is enabled.
146 |         """
147 |         return cls._pastime_mode
148 | 
149 |     @classmethod
150 |     def set_pastime_mode(cls, value: bool) -> None:
151 |         """
152 |         Mutator method for the _pastime_mode class attribute.
153 | 
154 |         Args:
155 |             value (bool): The new value to set for _pastime_mode.
156 | 
157 |         Returns:
158 |             None
159 |         """
160 |         cls._pastime_mode = value
161 | 
162 |     @classmethod
163 |     def impersonate(cls) -> Optional[str]:
164 |         """
165 |         Accessor method for the _impersonate class attribute.
166 | 
167 |         Returns:
168 |             Optional[str]: The impersonation string, if any.
169 |         """
170 |         return cls._impersonate
171 | 
172 |     @classmethod
173 |     def set_impersonate(cls, value: Optional[str]) -> None:
174 |         """
175 |         Mutator method for the _impersonate class attribute.
176 | 
177 |         Args:
178 |             value (Optional[str]): The new impersonation string to set.
179 | 
180 |         Returns:
181 |             None
182 |         """
183 |         cls._impersonate = value
184 | 
185 |     @classmethod
186 |     def code_input_event(cls) -> threading.Event:
187 |         """
188 |         Accessor method for the _code_input_event class attribute.
189 | 
190 |         Returns:
191 |             threading.Event: The event used for code input synchronization.
192 |         """
193 |         return cls._code_input_event
194 | 
195 |     @classmethod
196 |     def code_input_global(cls) -> queue.Queue:
197 |         """
198 |         Accessor method for the _code_input_global class attribute.
199 | 
200 |         Returns:
201 |             queue.Queue: The global queue for code input.
202 |         """
203 |         return cls._code_input_global
204 | 
205 |     @classmethod
206 |     def stop_thread(cls) -> threading.Event:
207 |         """
208 |         Accessor method for the _stop_thread class attribute.
209 | 
210 |         Returns:
211 |             threading.Event: The event used to signal thread termination.
212 |         """
213 |         return cls._stop_thread
214 | 
215 |     @classmethod
216 |     def initialize_cache(cls, priority_cache_path: Path = None) -> None:
217 |         """
218 |         Initialize the global request cache.
219 | 
220 |         Args:
221 |             priority_cache_path (Path): The optional path for the priority cache.
222 | 
223 |         Returns:
224 |             None
225 |         """
226 |         # Although LLM responses may vary, the overall quality remains stable unless the provider
227 |         # makes significant improvements. A one-week cache is used to reduce the number of API calls.
228 |         cls._request_cache = StringCache(
229 |             base_filename="model_request_cache",
230 |             default_lifetime=Configuration.get()['MODEL_CACHE_START_LIFETIME_SEC'],
231 |             priority_cache_path=priority_cache_path,
232 |             max_size=Configuration.get()['MODEL_CACHE_SIZE']*1024*1024
233 |         )
234 | 
235 |     @classmethod
236 |     def send_model_request(cls, request: Dict[str, Any]) -> str:
237 |         """
238 |         Send a request to the AI model and handle the response.
239 | 
240 |         This function sends the prepared request to the appropriate AI model endpoint,
241 |         handles potential errors and retries, and extracts the assistant’s answer from the response.
242 | 
243 |         Args:
244 |             request (dict): The prepared request data to be sent to the AI model.
245 | 
246 |         Returns:
247 |             str: The assistant’s answer extracted from the model’s response.
248 |                 In case of an error, it returns an error message instead.
249 | 
250 |         Side effects:
251 |             - Sends HTTP requests to the AI model endpoint.
252 |             - Prints debug information if debug mode is enabled.
253 |             - Handles and retries on 'service unavailable' errors and rate limit errors.
254 |             - Extracts and processes errors from the response.
255 | 
256 |         Raises:
257 |             None: Errors are caught and returned as part of the assistant’s answer.
258 | 
259 |         Note:
260 |             This function uses a while loop to implement a retry mechanism
261 |             for 'service unavailable' errors and rate limit errors. It will keep retrying
262 |             with increasing wait times until a valid response is received or an error occurs.
263 |         """
264 |         if not cls._request_cache:
265 |             cls.initialize_cache()
266 | 
267 |         try:
268 |             # Initialize the retry delay
269 |             retry_delay = 1  # Initial retry delay in seconds
270 | 
271 |             while True:
272 |                 session = requests.Session()
273 |                 endpoint = model_interface.get_endpoint()
274 | 
275 |                 hash_input = (
276 |                     endpoint
277 |                     + json.dumps(request, sort_keys=True)
278 |                     + str(RequestManager.ica())
279 |                     + Configuration.get()['WOLFRAM_ALPHA_SHORT_ANSWERS_APP_ID']
280 |                 )
281 |                 hash_sum = hashlib.sha256(hash_input.encode()).hexdigest()
282 |                 assistant_answer = cls._request_cache.get(hash_sum)
283 |                 if assistant_answer is not None:
284 |                     return assistant_answer
285 | 
286 |                 headers = {"Content-Type": "application/json"}
287 | 
288 |                 if Configuration.get()['MODEL_TYPE'] == config.ModelType.OPENAI:
289 |                     headers["Authorization"] = f"Bearer {os.environ.get('OPENAI_API_KEY')}"
290 | 
291 |                 response = session.post(
292 |                     endpoint,
293 |                     headers=headers,
294 |                     json=request
295 |                 )
296 | 
297 |                 response_json = response.json()
298 | 
299 |                 if response_json.get('detail', {}).get('type') == 'service_unavailable':
300 |                     print(f"{endpoint} is busy, retrying in 3 seconds...")
301 |                     time.sleep(3)
302 |                 elif 'error' in response_json:
303 |                     error = response_json['error']
304 |                     if isinstance(error, dict):
305 |                         error_message = error.get('message', 'Unknown error occurred')
306 |                         error_type = error.get('type', 'unknown_error')
307 |                         error_code = error.get('code', 'unknown_code')
308 | 
309 |                         if error_code == 'rate_limit_exceeded':
310 |                             # Try to extract the wait time from the error message
311 |                             wait_time_match = re.search(r'try again in (\d+\.?\d*)s', error_message)
312 |                             if wait_time_match:
313 |                                 wait_time = float(wait_time_match.group(1))
314 |                                 print(f"Rate limit exceeded. Waiting for {wait_time} seconds before retrying.")
315 |                                 time.sleep(wait_time)
316 |                                 continue
317 |                             else:
318 |                                 print(error_message)
319 |                                 print(f"Rate limit exceeded. Retrying in {retry_delay} seconds.")
320 |                                 time.sleep(retry_delay)
321 |                                 retry_delay *= 2  # Double the retry delay for the next iteration
322 |                                 continue
323 | 
324 |                         assistant_answer = f"API Error: {error_type} - {error_code}\n{error_message}"
325 |                     else:
326 |                         assistant_answer = f"API Error: {error}"
327 |                     break
328 |                 else:
329 |                     assistant_answer = model_interface.extract_assistant_answer(response_json)
330 |                     cls._request_cache.set(hash_sum, assistant_answer)
331 |                     break
332 |         except requests.exceptions.RequestException as e:
333 |             assistant_answer = str(e)
334 | 
335 |         if Configuration.get()['SHOW_DEBUG_MESSAGES']:
336 |             print("----------------------------------------------------------------")
337 |             print("Response:")
338 |             print("----------------------------------------------------------------")
339 |             print(assistant_answer)
340 |             print("--- End of response --------------------------------------------")
341 | 
342 |         return assistant_answer
343 | 


--------------------------------------------------------------------------------
/ditana_assistant/base/string_cache.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides a cache implementation for key-value string pairs with automatic persistence to disk.
 26 | 
 27 | The cache supports automatic expiration of entries based on their lifetime, and provides mechanisms for
 28 | extending the lifetime of entries that are set to the same value after expiration. In this way, entries
 29 | that do not change are gradually given longer lifetimes.
 30 | """
 31 | 
 32 | import json
 33 | import os
 34 | import tempfile
 35 | from pathlib import Path
 36 | import time
 37 | from typing import Dict, Optional, Tuple
 38 | 
 39 | import platformdirs
 40 | 
 41 | 
 42 | class StringCache:
 43 |     """
 44 |     A cache class for storing key-value string pairs with automatic disk persistence,
 45 |     entry expiration, and a maximum cache size limit.
 46 | 
 47 |     The cache automatically writes to a JSON file on disk after each addition or modification of an entry.
 48 |     Entries have a lifetime and are automatically expired when accessed after their lifetime has passed.
 49 |     The cache also enforces a maximum size limit, removing entries when necessary to stay within the limit.
 50 |     """
 51 | 
 52 |     def __init__(
 53 |             self,
 54 |             base_filename: str,
 55 |             default_lifetime: float,
 56 |             max_size: int = 50*1024*1024,
 57 |             priority_cache_path: Optional[Path] = None):
 58 |         """
 59 |         Initialize the StringCache.
 60 | 
 61 |         Args:
 62 |             base_filename (str): The base name of the JSON file to store the cache.
 63 |             default_lifetime (float): The default lifetime of cache entries in seconds.
 64 |             max_size (int): The maximum size of the cache in bytes. Defaults to 20 MiB.
 65 |             priority_cache_path (Path): Use a priority cache file for read-only access to predefined responses, overriding the config file.
 66 |                 In contrast to the standard cache, existing entries will be used even if their lifetime is expired.
 67 |         """
 68 |         self.base_filename = base_filename
 69 |         self.default_lifetime = default_lifetime
 70 |         self.max_size = max_size
 71 |         self.cache: Dict[str, Tuple[str, float, float]] = {}
 72 |         self.file_path: Path = Path(platformdirs.user_data_dir("ditana-assistant", ".")) / f"{base_filename}.json"
 73 |         self.priority_cache_path: Optional[Path] = None if priority_cache_path is None else Path(priority_cache_path)
 74 |         self.priority_cache: Optional[Dict[str, Tuple[str, float, float]]] = None
 75 |         self._load_cache()
 76 |         self.current_size = self._get_current_size()
 77 | 
 78 |     def _load_cache(self) -> None:
 79 |         """Load the cache from the JSON file if it exists."""
 80 |         if self.file_path.exists():
 81 |             with self.file_path.open('r', encoding='utf-8') as f:
 82 |                 self.cache = json.load(f)
 83 | 
 84 |         if self.priority_cache_path is not None:
 85 |             with self.priority_cache_path.open('r', encoding='utf-8') as f:
 86 |                 self.priority_cache = json.load(f)
 87 | 
 88 |     def _save_cache(self) -> None:
 89 |         """Save the cache to the JSON file using a temporary file and atomic rename."""
 90 |         self.file_path.parent.mkdir(parents=True, exist_ok=True)
 91 | 
 92 |         # Create a temporary file in the same directory as the target file
 93 |         temp_fd, temp_path = tempfile.mkstemp(dir=self.file_path.parent,
 94 |                                               prefix=self.base_filename,
 95 |                                               suffix='.tmp')
 96 | 
 97 |         try:
 98 |             with os.fdopen(temp_fd, 'w', encoding='utf-8') as temp_file:
 99 |                 json.dump(self.cache, temp_file)
100 | 
101 |             # Perform an atomic rename
102 |             os.replace(temp_path, self.file_path)
103 |         except Exception as e:
104 |             # If an error occurs, make sure to remove the temporary file
105 |             os.unlink(temp_path)
106 |             raise e  # Re-raise the exception after cleanup
107 | 
108 |     @staticmethod
109 |     def _get_entry_size(key: str, value: str) -> int:
110 |         """Calculate the size of a cache entry in bytes."""
111 |         return len(key.encode('utf-8')) + len(value.encode('utf-8'))
112 | 
113 |     def _get_current_size(self) -> int:
114 |         """Calculate the current size of the cache in bytes."""
115 |         return sum(self._get_entry_size(k, v[0]) for k, v in self.cache.items())
116 | 
117 |     def set(self, key: str, value: str) -> bool:
118 |         """
119 |         Set a key-value pair in the cache.
120 | 
121 |         If the entry already exists and the value is the same, update the timestamp and extend the lifetime.
122 |         If adding the new entry would exceed the maximum cache size, older entries are removed to make space.
123 | 
124 |         Args:
125 |             key (str): The key to set.
126 |             value (str): The value to set.
127 | 
128 |         Returns:
129 |             bool: True if the entry was set successfully, False if it couldn't be set due to size constraints.
130 |         """
131 |         current_time = time.time()
132 |         new_entry_size = self._get_entry_size(key, value)
133 | 
134 |         # Check if the new entry alone exceeds the maximum cache size
135 |         if new_entry_size > self.max_size:
136 |             return False
137 | 
138 |         new_size = self.current_size
139 | 
140 |         # If the entry already exists, update it
141 |         if key in self.cache:
142 |             old_value, old_timestamp, old_lifetime = self.cache[key]
143 |             if old_value == value:
144 |                 new_lifetime = 2 * max(old_lifetime, current_time - old_timestamp)
145 |                 self.cache[key] = (value, current_time, new_lifetime)
146 |                 self._save_cache()
147 |                 return True
148 |             else:
149 |                 # Remove the old entry before adding the new one
150 |                 new_lifetime = old_lifetime / 2
151 |                 new_size -= self._get_entry_size(key, old_value)
152 |                 del self.cache[key]
153 |         else:
154 |             new_lifetime = self.default_lifetime
155 | 
156 |         # Check if adding the new entry would exceed the maximum size
157 |         while new_size + new_entry_size > self.max_size:
158 |             # Identify the entry with the most exceeded lifetime
159 |             most_exceeded_entry = None
160 |             max_exceeded_time = 0
161 |             for current_entry, (_, timestamp, lifetime) in self.cache.items():
162 |                 # We examine all entries that have less lifetime than the new entry,
163 |                 # hence we add "+ lifetime" to the exceeded time
164 |                 exceeded_time = current_time - timestamp - lifetime + new_lifetime
165 |                 if exceeded_time > max_exceeded_time:
166 |                     most_exceeded_entry = current_entry
167 |                     max_exceeded_time = exceeded_time
168 | 
169 |             if most_exceeded_entry is None:
170 |                 # No entries have exceeded their lifetime to make space, can’t add new entry
171 |                 if new_size != self.current_size:
172 |                     self._load_cache()  # restore that internal cache
173 |                 return False
174 | 
175 |             # Remove the most exceeded entry
176 |             removed_size = self._get_entry_size(most_exceeded_entry, self.cache[most_exceeded_entry][0])
177 |             del self.cache[most_exceeded_entry]
178 |             new_size -= removed_size
179 | 
180 |         # Add the new entry
181 |         self.cache[key] = (value, current_time, new_lifetime)
182 |         self.current_size = new_size + new_entry_size
183 |         self._save_cache()
184 |         return True
185 | 
186 |     def get(self, key: str) -> Optional[str]:
187 |         """
188 |         Retrieve the value associated with a key from the cache.
189 | 
190 |         This method first checks the priority cache. If the key exists in the priority cache,
191 |         its value is returned without considering its lifetime. If the key is not found in the
192 |         priority cache, the method then checks the standard cache and returns the value only
193 |         if it exists and has not expired based on its lifetime.
194 | 
195 |         Args:
196 |             key (str): The key to retrieve from the cache.
197 | 
198 |         Returns:
199 |             Optional[str]: The cached value if found and hasn't expired, otherwise None.
200 |         """
201 |         if self.priority_cache and key in self.priority_cache:
202 |             return self.priority_cache[key][0]
203 | 
204 |         if key in self.cache:
205 |             value, timestamp, lifetime = self.cache[key]
206 |             if time.time() - timestamp <= lifetime:
207 |                 return value
208 |             else:
209 |                 # We do not delete an old entry because we need to know its lifetime to calculate
210 |                 # the new lifetime in case the same key is later stored in `set`.
211 |                 pass
212 | 
213 |         return None
214 | 
215 |     def get_lifetime(self, key: str) -> Optional[float]:
216 |         """
217 |         Retrieve the remaining lifetime of the specified key in the cache.
218 | 
219 |         If the key exists in the priority cache, returns infinity as its lifetime is indefinite.
220 |         If the key exists in the standard cache, returns the remaining lifetime in seconds if it has not expired.
221 |         If the key does not exist or has expired, returns None.
222 | 
223 |         Args:
224 |             key (str): The key for which to retrieve the lifetime.
225 | 
226 |         Returns:
227 |             Optional[float]: The remaining lifetime in seconds (negative if it has expired),
228 |                              infinity if the key is in the priority cache,
229 |                              or None if the key does not exist
230 |         """
231 |         if self.priority_cache and key in self.priority_cache:
232 |             return float('inf')
233 |         if key in self.cache:
234 |             _, timestamp, lifetime = self.cache[key]
235 |             return lifetime - (time.time() - timestamp)
236 |         return None
237 | 
238 |     def __contains__(self, key: str) -> bool:
239 |         """
240 |         Check if a key exists in the cache and hasn't expired.
241 | 
242 |         Args:
243 |             key (str): The key to check.
244 | 
245 |         Returns:
246 |             bool: True if the key exists and hasn't expired, False otherwise.
247 |         """
248 |         return self.get(key) is not None
249 | 
250 |     def __len__(self) -> int:
251 |         """
252 |         Get the number of non-expired entries in the cache.
253 | 
254 |         Returns:
255 |             int: The number of non-expired entries.
256 |         """
257 |         valid_entries = sum(1 for key in self.cache if self.get(key) is not None)
258 |         return valid_entries
259 | 
260 |     def clear(self) -> None:
261 |         """
262 |         Clear all entries from the cache and delete the associated file.
263 | 
264 |         This method removes all entries from the internal cache dictionary,
265 |         resets the current size to 0, and deletes the JSON file from the disk.
266 |         It does not affect the priority cache.
267 | 
268 |         Note: After calling this method, the cache will be empty, and the
269 |         file will no longer exist on the disk. The next operation that
270 |         writes to the cache will create a new file.
271 |         """
272 |         self.cache.clear()
273 |         self.current_size = 0
274 | 
275 |         if self.file_path.exists():
276 |             self.file_path.unlink()
277 | 


--------------------------------------------------------------------------------
/ditana_assistant/base/terminal.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides functions for interacting with the terminal in the Ditana Assistant.
 26 | It includes utilities for running interactive commands and handling user input in the terminal.
 27 | """
 28 | 
 29 | import os
 30 | import subprocess
 31 | import sys
 32 | from typing import Tuple
 33 | 
 34 | 
 35 | def get_valid_input(question_text):
 36 |     """
 37 |     Prompt the user for a yes/no input and validate the response.
 38 | 
 39 |     Args:
 40 |         question_text (str): The question to ask the user.
 41 | 
 42 |     Returns:
 43 |         str: Either 'y' or 'n' based on the user’s validated input.
 44 |     """
 45 |     while True:
 46 |         reply = input(f"{question_text} (y/n) ").lower()
 47 |         if reply in ['y', 'n']:
 48 |             return reply
 49 |         print("Invalid input. Please enter 'y' or 'n'.")
 50 | 
 51 | 
 52 | def run_interactive_command_unix(command: str) -> Tuple[int, str]:
 53 |     """
 54 |     Run an interactive command on Unix-like systems.
 55 | 
 56 |     Args:
 57 |         command (str): The command to run.
 58 | 
 59 |     Returns:
 60 |         tuple: A tuple containing the return code and the command output.
 61 |     """
 62 |     import pty
 63 |     import select
 64 | 
 65 |     env = os.environ.copy()
 66 |     env['PAGER'] = 'cat'
 67 |     env['SYSTEMD_PAGER'] = ''
 68 |     env['LESS'] = '-F -X'
 69 |     env['COLUMNS'] = '500'
 70 |     env['LINES'] = '5000'
 71 | 
 72 |     master, slave = pty.openpty()
 73 |     output = []
 74 |     try:
 75 |         with subprocess.Popen(
 76 |                 command,
 77 |                 shell=True,
 78 |                 stdin=slave,
 79 |                 stdout=slave,
 80 |                 stderr=slave,
 81 |                 close_fds=True,
 82 |                 env=env
 83 |         ) as process:
 84 |             os.close(slave)
 85 | 
 86 |             while True:
 87 |                 rlist, _, _ = select.select([master, sys.stdin], [], [])
 88 | 
 89 |                 if master in rlist:
 90 |                     try:
 91 |                         data = os.read(master, 1024)
 92 |                         if not data:
 93 |                             break
 94 |                         sys.stdout.buffer.write(data)
 95 |                         sys.stdout.flush()
 96 |                         output.append(data)
 97 |                     except OSError:
 98 |                         break
 99 | 
100 |                 if sys.stdin in rlist:
101 |                     data = sys.stdin.buffer.read1(1024)
102 |                     if not data:
103 |                         break
104 |                     os.write(master, data)
105 | 
106 |             process.wait()
107 |             return_code = process.returncode
108 |     finally:
109 |         os.close(master)
110 | 
111 |     full_output = b''.join(output).decode('utf-8', errors='replace')
112 |     return return_code, full_output
113 | 
114 | 
115 | def run_interactive_command_windows(command: str) -> Tuple[int, str]:
116 |     """
117 |     Run an interactive command on Windows systems.
118 | 
119 |     Args:
120 |         command (str): The command to run.
121 | 
122 |     Returns:
123 |         tuple: A tuple containing the return code and the command output.
124 |     """
125 |     output = []
126 |     with subprocess.Popen(
127 |         command,
128 |         shell=True,
129 |         stdin=subprocess.PIPE,
130 |         stdout=subprocess.PIPE,
131 |         stderr=subprocess.PIPE,
132 |         text=True,
133 |         bufsize=1,
134 |         universal_newlines=True
135 |     ) as process:
136 |         while True:
137 |             line = process.stdout.readline()
138 |             if not line:
139 |                 break
140 |             sys.stdout.write(line)
141 |             sys.stdout.flush()
142 |             output.append(line)
143 | 
144 |         process.wait()
145 |         return_code = process.returncode
146 | 
147 |     full_output = ''.join(output)
148 |     return return_code, full_output
149 | 
150 | 
151 | def run_interactive_command(command: str) -> Tuple[int, str]:
152 |     """
153 |     Run an interactive command on the appropriate platform.
154 | 
155 |     Args:
156 |         command (str): The command to run.
157 | 
158 |     Returns:
159 |         tuple: A tuple containing the return code and the command output.
160 |     """
161 |     if os.name == 'nt':
162 |         return run_interactive_command_windows(command)
163 | 
164 |     return run_interactive_command_unix(command)
165 | 


--------------------------------------------------------------------------------
/ditana_assistant/base/wolfram_alpha_short_answers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | WolframAlphaShortAnswers Module
 26 | 
 27 | This module provides a class for interacting with the Wolfram|Alpha Short Answers API.
 28 | It allows for querying the API, caching responses, and handling errors.
 29 | 
 30 | The module contains the following main components:
 31 | 1. WolframAlphaShortAnswers class: Encapsulates the API interaction logic.
 32 | 2. StringCache usage: For caching API responses and errors.
 33 | 3. Configuration integration: To manage API credentials.
 34 | 
 35 | Dependencies:
 36 | - urllib.parse: For URL encoding.
 37 | - requests: For making HTTP requests to the API.
 38 | - config: For accessing configuration settings.
 39 | - string_cache: For caching mechanisms.
 40 | 
 41 | Usage:
 42 |     wa = WolframAlphaShortAnswers()
 43 |     answer, error = wa.query("What is the capital of France?")
 44 |     if error:
 45 |         print(f"An error occurred: {error}")
 46 |     else:
 47 |         print(f"The answer is: {answer}")
 48 | 
 49 | Note:
 50 |     This module requires a valid Wolfram|Alpha API application ID to be set in the
 51 |     configuration. You can obtain one from https://developer.wolframalpha.com for
 52 |     the "Short Answers API".
 53 | 
 54 | Error Handling:
 55 |     The module handles various error scenarios, including HTTP errors, request
 56 |     exceptions, and cases where the API cannot interpret the input or provide
 57 |     a short answer.
 58 | 
 59 | Caching:
 60 |     Responses and errors are cached to improve performance and reduce API calls
 61 |     for repeated queries.
 62 | """
 63 | 
 64 | import urllib.parse
 65 | from typing import Optional, Tuple
 66 | 
 67 | import requests
 68 | 
 69 | from ditana_assistant.base.config import Configuration
 70 | from ditana_assistant.base.string_cache import StringCache
 71 | 
 72 | 
 73 | class WolframAlphaShortAnswers:
 74 |     """
 75 |     A class to interact with the Wolfram|Alpha Short Answers API.
 76 | 
 77 |     This class encapsulates the functionality to make requests to the
 78 |     Wolfram|Alpha Short Answers API, handle caching of responses,
 79 |     and manage potential errors.
 80 | 
 81 |     Class Attributes:
 82 |         _answer_cache (StringCache): A dictionary to store cached API responses.
 83 |         _error_cache (StringCache): A dictionary to store API error responses.
 84 |     """
 85 | 
 86 |     # Wolfram|Alpha answers may contain real-time data. This cache duration
 87 |     # represents a balance between freshness and minimizing API requests.
 88 |     _answer_cache = StringCache(base_filename="wolfram_alpha_answer_cache", default_lifetime=Configuration.get()['WOLFRAM_ALPHA_CACHE_START_LIFETIME_SEC'], max_size=Configuration.get()['WOLFRAM_ALPHA_CACHE_SIZE']*1024*1024)
 89 | 
 90 |     # When Wolfram|Alpha declines a request, it is likely to continue rejecting the same
 91 |     #  request. A one-week cache helps avoid redundant API calls for failed requests.
 92 |     _error_cache = StringCache(base_filename="wolfram_alpha_error_cache", default_lifetime=Configuration.get()['WOLFRAM_ALPHA_ERROR_CACHE_START_LIFETIME_SEC'], max_size=Configuration.get()['WOLFRAM_ALPHA_ERROR_CACHE_SIZE']*1024*1024)
 93 | 
 94 |     @staticmethod
 95 |     def query(question: str) -> Tuple[Optional[str], Optional[str]]:
 96 |         """
 97 |         Query the Wolfram|Alpha Short Answers API.
 98 | 
 99 |         This method checks the cache for an existing answer, and if not found,
100 |         makes a request to the API. The result is cached before being returned.
101 | 
102 |         Args:
103 |             question (str): The question to ask the Wolfram|Alpha API.
104 | 
105 |         Returns:
106 |             Tuple[Optional[str], Optional[str]]: A tuple containing the API response
107 |             and an error message (if any). If successful, the error will be None.
108 |         """
109 |         app_id = Configuration.get()['WOLFRAM_ALPHA_SHORT_ANSWERS_APP_ID']
110 | 
111 |         if not app_id or app_id == "":
112 |             return None, 'API application ID is not set. You may generate one for the "Short Answers API" under https://developer.wolframalpha.com).'
113 | 
114 |         if question in WolframAlphaShortAnswers._answer_cache:
115 |             return WolframAlphaShortAnswers._answer_cache.get(question), None
116 | 
117 |         if question in WolframAlphaShortAnswers._error_cache:
118 |             return None, WolframAlphaShortAnswers._error_cache.get(question)
119 | 
120 |         encoded_question = urllib.parse.quote(question)
121 |         url = f"http://api.wolframalpha.com/v1/result?appid={app_id}&i={encoded_question}&units=metric"
122 | 
123 |         try:
124 |             response = requests.get(url, timeout=7)
125 |             response.raise_for_status()
126 |             result = response.text
127 |             WolframAlphaShortAnswers._answer_cache.set(question, result)
128 |             return result, None
129 |         except requests.exceptions.HTTPError as e:
130 |             if e.response.status_code == 501:
131 |                 error_text = "The input cannot be interpreted or no short answer is available."
132 |                 WolframAlphaShortAnswers._error_cache.set(question, error_text)
133 |             elif e.response.status_code == 400:
134 |                 error_text = "Invalid API request. Check the input parameter."
135 |             else:
136 |                 error_text = f"HTTP Error: {str(e)}"
137 |         except requests.exceptions.RequestException as e:
138 |             error_text = f"Request Error: {str(e)}"
139 | 
140 |         return None, error_text
141 | 


--------------------------------------------------------------------------------
/ditana_assistant/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/benchmark/__init__.py


--------------------------------------------------------------------------------
/ditana_assistant/benchmark/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  4 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  5 | #
  6 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  7 | #
  8 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  9 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 10 | 
 11 | # AGPL licensing:
 12 | #
 13 | # Ditana Assistant is free software: you can redistribute it and/or modify
 14 | # it under the terms of the GNU Affero General Public License as published by
 15 | # the Free Software Foundation, either version 3 of the License, or
 16 | # (at your option) any later version.
 17 | #
 18 | # Ditana Assistant is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 21 | # GNU Affero General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Affero General Public License
 24 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 25 | 
 26 | """
 27 | Automatic Evaluation System for Ditana Assistant
 28 | 
 29 | This module provides functionality to evaluate the performance of the Ditana Assistant
 30 | on the AI2 ARC (Artificial Intelligence 2 Reasoning Challenge) dataset. It specifically
 31 | tests the impact of Introspective Contextual Augmentation (ICA) on the assistant’s
 32 | accuracy in answering multiple-choice questions.
 33 | 
 34 | The evaluation process runs two iterations:
 35 | 1. With ICA disabled
 36 | 2. With ICA enabled
 37 | 
 38 | Key Features:
 39 | - Loads and processes the ARC-Challenge dataset
 40 | - Handles both letter and numeric answer keys in the dataset
 41 | - Implements a custom answer extraction method for reliable evaluation
 42 | - Calculates and compares accuracy scores with and without ICA
 43 | 
 44 | This evaluation helps quantify the effectiveness of ICA in enhancing
 45 | the Ditana Assistant’s problem-solving capabilities across a range of
 46 | complex reasoning tasks.
 47 | """
 48 | 
 49 | # __main__.py
 50 | 
 51 | import argparse
 52 | import sys
 53 | 
 54 | from ditana_assistant.benchmark import statistics
 55 | from ditana_assistant.base.config import Configuration, ModelType
 56 | from ditana_assistant.base.request_manager import RequestManager
 57 | from ditana_assistant.engine.conversation_manager import ConversationManager
 58 | from ditana_assistant.benchmark.multiple_choice_dataset import MultipleChoiceDataset, DatasetIdentifier
 59 | 
 60 | 
 61 | def run_evaluation(dataset_identifier: DatasetIdentifier, benchmark_experimental: bool):
 62 |     dataset = MultipleChoiceDataset(dataset_identifier)
 63 |     results = []
 64 | 
 65 |     for i, sample in enumerate(dataset.iterate_questions()):
 66 |         question = sample["question"]
 67 |         labeled_choices = sample["choices"]  # List of tuples (label, choice)
 68 |         correct_answer = sample["answer"]     # Correct answer label, e.g., 'B', 'C', etc.
 69 | 
 70 |         print(f"\n--- Question {i + 1} ---")
 71 |         print(f"Question: {question}")
 72 |         print("Choices:")
 73 |         for label, choice in labeled_choices:
 74 |             print(f"{label}. {choice}")
 75 |         print(f"Correct answer: {correct_answer if correct_answer else 'None'}")
 76 | 
 77 |         RequestManager.set_ica(benchmark_experimental)
 78 |         Configuration.set(enable_experimental_features=False)
 79 | 
 80 |         prediction_without_feature = dataset.process_question(question, labeled_choices)
 81 | 
 82 |         RequestManager.set_ica(True)
 83 |         Configuration.set(enable_experimental_features=benchmark_experimental)
 84 | 
 85 |         test_feature_name = "experimental" if benchmark_experimental else "ICA"
 86 | 
 87 |         print(f"Model’s answer (without {test_feature_name} feature): {prediction_without_feature if prediction_without_feature else 'None'}")
 88 | 
 89 |         prediction_with_feature = dataset.process_question(question, labeled_choices)
 90 | 
 91 |         print(f"Model’s answer (with {test_feature_name} feature): {prediction_with_feature if prediction_with_feature else 'None'}")
 92 | 
 93 |         correct_no_ica = prediction_without_feature == correct_answer
 94 |         correct_ica = prediction_with_feature == correct_answer
 95 | 
 96 |         results.append((correct_no_ica, correct_ica))
 97 | 
 98 |         statistics.update_results(results, test_feature_name)
 99 | 
100 | 
101 | def main():
102 |     Configuration.set(
103 |         model_type=ModelType.OPENAI,
104 |         show_debug_messages=False,
105 |         openai_model="gpt-4o-mini",
106 |         koboldcpp_base_url="http://localhost:5001",
107 |         wolfram_alpha_short_answers_app_id="",
108 |         generate_terminal_cmd=False,
109 |         offer_cmd_execution=False,
110 |         assume_english=True
111 |     )
112 | 
113 |     parser = argparse.ArgumentParser(description="Benchmark of Ditana Assistant")
114 |     parser.add_argument(
115 |         "-c", "--priority-cache",
116 |         type=str,
117 |         help="Use a priority cache file for read-only access to predefined responses. "
118 |              "This allows the assistant to respond using cached data from the specified priority cache file before accessing the normal request cache."
119 |     )
120 |     parser.add_argument("-r", "--run", action="store_true", help="Run the benchmark.")
121 |     parser.add_argument("-e", "--experimental", action="store_true", help="Compare ICA with an experimental version of it.")
122 |     parser.add_argument(
123 |         "-d", "--dataset",
124 |         type=str,
125 |         required=True,
126 |         help="Identifier of the dataset to use. For example, 'ai2_arc' or 'cais_mmlu_logical_fallacies_test'."
127 |     )
128 |     args = parser.parse_args()
129 | 
130 |     if not args.run:
131 |         parser.print_help()
132 |         sys.exit(0)
133 | 
134 |     if args.priority_cache:
135 |         ConversationManager.initialize_cache(priority_cache_path=args.priority_cache)
136 | 
137 |     try:
138 |         dataset_identifier = DatasetIdentifier(args.dataset)
139 |     except ValueError:
140 |         print(f"Unknown dataset identifier: {args.dataset}")
141 |         sys.exit(1)
142 | 
143 |     print("Running evaluation...")
144 |     run_evaluation(dataset_identifier, args.experimental)
145 | 
146 | 
147 | if __name__ == "__main__":
148 |     main()
149 | 


--------------------------------------------------------------------------------
/ditana_assistant/benchmark/multiple_choice_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | Module: ditana_assistant.benchmark.multiple_choice_dataset
 26 | 
 27 | This module provides functionality to handle multiple choice datasets, including AI2 ARC and CAIS MMLU.
 28 | """
 29 | 
 30 | import re
 31 | from datasets import load_dataset, get_dataset_config_names
 32 | from typing import List, Dict, Any, Optional, Iterator, Tuple
 33 | from enum import Enum
 34 | from ditana_assistant.engine.conversation_manager import ConversationManager
 35 | 
 36 | 
 37 | class DatasetIdentifier(Enum):
 38 |     """
 39 |     Enumeration of supported dataset identifiers.
 40 |     """
 41 |     AI2_ARC = "ai2_arc"
 42 |     CAIS_MMLU = "cais_mmlu"
 43 |     # Add more identifiers here as needed
 44 | 
 45 | 
 46 | class MultipleChoiceDataset:
 47 |     """
 48 |     A class to handle multiple choice datasets such as AI2 ARC and CAIS MMLU.
 49 |     """
 50 |     def __init__(self, identifier: DatasetIdentifier):
 51 |         """
 52 |         Initializes the MultipleChoiceDataset with the specified dataset identifier.
 53 | 
 54 |         Args:
 55 |             identifier (DatasetIdentifier): The identifier of the dataset to load.
 56 |         """
 57 |         self.identifier = identifier
 58 |         self.datasets: List[Tuple[str, Any]] = self._load_datasets()
 59 |         total_samples = sum(len(ds) for _, ds in self.datasets)
 60 |         print(f"Number of datasets loaded: {len(self.datasets)}")
 61 |         print(f"Total number of samples: {total_samples}")
 62 | 
 63 |     def _load_datasets(self) -> List[Tuple[str, Any]]:
 64 |         """
 65 |         Loads the dataset(s) based on the identifier.
 66 | 
 67 |         Returns:
 68 |             List[Tuple[str, Any]]: A list of tuples containing configuration names and loaded datasets.
 69 |         """
 70 |         datasets = []
 71 |         if self.identifier == DatasetIdentifier.AI2_ARC:
 72 |             split = 'test'
 73 |             print(f"Loading dataset 'ai2_arc' with split '{split}'...")
 74 |             try:
 75 |                 dataset = load_dataset("ai2_arc", "ARC-Challenge", split=split)
 76 |                 datasets.append(("ARC-Challenge", dataset))
 77 |                 print(f"Loaded 'ai2_arc' with {len(dataset)} samples.")
 78 |             except ValueError as ve:
 79 |                 print(f"Error loading 'ai2_arc' with split '{split}': {ve}")
 80 |         elif self.identifier == DatasetIdentifier.CAIS_MMLU:
 81 |             split='test'
 82 |             config = "all"
 83 |             print(f"Loading dataset 'cais/mmlu' with configuration '{config}' and split '{split}'...")
 84 |             try:
 85 |                 ds = load_dataset("cais/mmlu", config, split=split)
 86 |                 datasets.append((config, ds))
 87 |                 print(f"Loaded 'cais/mmlu' with configuration '{config}' and {len(ds)} samples.")
 88 |             except ValueError as ve:
 89 |                 print(f"Error loading 'cais/mmlu' with configuration '{config}' and split '{split}': {ve}")
 90 |         else:
 91 |             raise ValueError(f"Unknown dataset identifier: {self.identifier}")
 92 | 
 93 |         if not datasets:
 94 |             print(f"No datasets loaded for identifier '{self.identifier}' with split '{split}'.")
 95 |         return datasets
 96 | 
 97 |     def __len__(self) -> int:
 98 |         """
 99 |         Returns the total number of samples across all loaded datasets.
100 | 
101 |         Returns:
102 |             int: Total number of samples.
103 |         """
104 |         return sum(len(ds) for _, ds in self.datasets)
105 | 
106 |     def __getitem__(self, index: int) -> Dict[str, Any]:
107 |         """
108 |         Retrieves a sample by its global index across all datasets.
109 | 
110 |         Args:
111 |             index (int): The global index of the sample.
112 | 
113 |         Returns:
114 |             Dict[str, Any]: The dataset sample.
115 |         """
116 |         if self.identifier == DatasetIdentifier.AI2_ARC:
117 |             return self.datasets[0][1][index]
118 |         elif self.identifier == DatasetIdentifier.CAIS_MMLU:
119 |             cumulative = 0
120 |             for _, ds in self.datasets:
121 |                 if index < cumulative + len(ds):
122 |                     return ds[index - cumulative]
123 |                 cumulative += len(ds)
124 |             raise IndexError("Index out of range")
125 |         else:
126 |             raise ValueError(f"Unsupported dataset identifier: {self.identifier}")
127 | 
128 |     def iterate_questions(self) -> Iterator[Dict[str, Any]]:
129 |         """
130 |         Iterates over all questions in the loaded datasets.
131 | 
132 |         Yields:
133 |             Dict[str, Any]: A dictionary containing the question, choices, and the correct answer.
134 |         """
135 |         if self.identifier == DatasetIdentifier.AI2_ARC:
136 |             for sample in self.datasets[0][1]:
137 |                 question = sample.get("question", "").strip()
138 |                 choices_dict = sample.get("choices", {})
139 |                 choices = choices_dict.get("text", [])
140 |                 if not isinstance(choices, list):
141 |                     choices = []
142 |                 labeled_choices = self._label_choices(choices)
143 |                 answer_key = sample.get("answerKey", "").strip()
144 |                 correct_answer = self._map_ai2_arc_answer(answer_key, len(choices))
145 | 
146 |                 yield {
147 |                     "question": question,
148 |                     "choices": labeled_choices,  # List of tuples (label, choice)
149 |                     "answer": correct_answer      # Correct answer label, e.g., 'B', 'C', etc.
150 |                 }
151 |         elif self.identifier == DatasetIdentifier.CAIS_MMLU:
152 |             for config_name, ds in self.datasets:
153 |                 print(f"Processing configuration '{config_name}'...")
154 |                 for sample in ds:
155 |                     question = sample.get("question", "").strip()
156 |                     choices = sample.get("choices", [])
157 |                     labeled_choices = self._label_choices(choices)
158 |                     answer_index = sample.get("answer", None)
159 |                     correct_answer = self._map_cais_mmlu_answer(answer_index, len(choices))
160 | 
161 |                     yield {
162 |                         "question": question,
163 |                         "choices": labeled_choices,  # List of tuples (label, choice)
164 |                         "answer": correct_answer      # Correct answer label, e.g., 'B', 'C', etc.
165 |                     }
166 |         else:
167 |             raise ValueError(f"Unsupported dataset identifier: {self.identifier}")
168 | 
169 |     @staticmethod
170 |     def _label_choices(choices: List[str]) -> List[Tuple[str, str]]:
171 |         """
172 |         Labels the choices starting from 'B'.
173 | 
174 |         Args:
175 |             choices (List[str]): A list of choice strings.
176 | 
177 |         Returns:
178 |             List[Tuple[str, str]]: A list of tuples containing the label and the choice text.
179 |         """
180 |         labeled_choices = []
181 |         start_label = 66  # ASCII for 'B'
182 |         for i, choice in enumerate(choices):
183 |             label = chr(start_label + i)
184 |             labeled_choices.append((label, choice))
185 |         return labeled_choices
186 | 
187 |     @staticmethod
188 |     def _map_ai2_arc_answer(answer_key: str, num_choices: int) -> Optional[str]:
189 |         """
190 |         Maps the original answerKey to the new label starting at 'B'.
191 |         For example:
192 |             If answer_key is 'A' → 'B'
193 |             If answer_key is '1' → 'B'
194 |             If answer_key is '2' → 'C', etc.
195 | 
196 |         Args:
197 |             answer_key (str): The original answer key from the dataset.
198 |             num_choices (int): The number of available choices.
199 | 
200 |         Returns:
201 |             Optional[str]: The mapped answer label, e.g., 'B', 'C', etc., or None if invalid.
202 |         """
203 |         if answer_key.isdigit():
204 |             index = int(answer_key)  # 1-based index
205 |             mapped_label = chr(65 + index)  # 'A' + index
206 |         elif re.match(r'^[A-Z]$', answer_key.upper()):
207 |             original_label = answer_key.upper()
208 |             index = ord(original_label) - 65  # 'A' -> 0, 'B' -> 1, etc.
209 |             mapped_label = chr(66 + index)    # 'B' + index
210 |         else:
211 |             return None
212 | 
213 |         if 66 <= ord(mapped_label) <= 90 and (0 <= (ord(mapped_label) - 66) < num_choices):
214 |             return mapped_label
215 |         else:
216 |             print("Internal error in _map_ai2_arc_answer: Could not interpret correct answer in dataset!")
217 |             return None
218 | 
219 |     @staticmethod
220 |     def _map_cais_mmlu_answer(answer_index: Optional[int], num_choices: int) -> Optional[str]:
221 |         """
222 |         Maps the original answer index to the new label starting at 'B'.
223 |         For example:
224 |             If answer_index is 0 → 'B'
225 |             If answer_index is 1 → 'C', etc.
226 | 
227 |         Args:
228 |             answer_index (Optional[int]): The original answer index from the dataset.
229 |             num_choices (int): The number of available choices.
230 | 
231 |         Returns:
232 |             Optional[str]: The mapped answer label, e.g., 'B', 'C', etc., or None if invalid.
233 |         """
234 |         if isinstance(answer_index, int) and 0 <= answer_index < num_choices:
235 |             return chr(66 + answer_index)  # 'B' + index
236 |         else:
237 |             print("Internal error in _map_cais_mmlu_answer: Could not interpret correct answer in dataset!")
238 |             return None
239 | 
240 |     @staticmethod
241 |     def find_first_allowed_letter(text: str, n: int) -> Optional[str]:
242 |         """
243 |         Finds the first allowed letter in the text, starting from 'B'.
244 | 
245 |         Args:
246 |             text (str): The text to search within.
247 |             n (int): The number of allowed letters starting from 'B'.
248 | 
249 |         Returns:
250 |             Optional[str]: The first allowed letter found, or None if none are found.
251 |         """
252 |         allowed_letters = set(chr(66 + i) for i in range(n))  # B, C, D, etc.
253 |         pattern = r'\b[B-Z]\b'
254 | 
255 |         matches = re.finditer(pattern, text.upper())
256 | 
257 |         for match in matches:
258 |             found_letter = match.group()
259 |             if found_letter in allowed_letters:
260 |                 return found_letter
261 | 
262 |         return None
263 | 
264 |     def process_question(self, question: str, choices: List[Tuple[str, str]]) -> Optional[str]:
265 |         """
266 |         Processes the question and choices, sends them to the ConversationManager,
267 |         and returns the validated prediction.
268 | 
269 |         Args:
270 |             question (str): The question text.
271 |             choices (List[Tuple[str, str]]): A list of tuples containing choice labels and texts.
272 | 
273 |         Returns:
274 |             Optional[str]: The validated prediction label, e.g., 'B', 'C', etc., or None if invalid.
275 |         """
276 |         prompt = f"Question: {question}\n\nChoices:\n"
277 |         for label, choice in choices:
278 |             prompt += f"{label}. {choice}\n"
279 |         prompt += "\nPlease provide the letter of the correct answer."
280 | 
281 |         prediction = ConversationManager().process_input(query=prompt, meta_call=False)[0]
282 |         valid_prediction = self.find_first_allowed_letter(prediction, len(choices))
283 | 
284 |         return valid_prediction
285 | 


--------------------------------------------------------------------------------
/ditana_assistant/benchmark/statistics.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
 2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
 3 | #
 4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
 5 | #
 6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
 7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 8 | 
 9 | # AGPL licensing:
10 | #
11 | # Ditana Assistant is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Ditana Assistant is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | """Statistics functions used by the benchmark"""
25 | 
26 | from statsmodels.stats.contingency_tables import mcnemar
27 | 
28 | 
29 | def calculate_significance(results):
30 |     """
31 |     Calculate statistical significance between two procedures using McNemar’s test.
32 | 
33 |     Args:
34 |     results (list of tuples): Each tuple is (correct_no_ica, correct_ica), where
35 |                               correct_no_ica and correct_ica are booleans indicating
36 |                               whether the prediction was correct without ICA and with ICA.
37 | 
38 |     Returns:
39 |     float: The p-value from McNemar’s test.
40 |     """
41 |     n00 = n01 = n10 = n11 = 0
42 |     for res in results:
43 |         c1, c2 = res
44 |         if not c1 and not c2:
45 |             n00 += 1
46 |         elif not c1 and c2:
47 |             n01 += 1
48 |         elif c1 and not c2:
49 |             n10 += 1
50 |         elif c1 and c2:
51 |             n11 += 1
52 | 
53 |     table = [[n00, n01],
54 |              [n10, n11]]
55 | 
56 |     if (n01 + n10) == 0:
57 |         return None
58 | 
59 |     try:
60 |         if (n01 + n10) <= 25:
61 |             result = mcnemar(table, exact=True)
62 |         else:
63 |             result = mcnemar(table, exact=False, correction=True)
64 |         p_value = result.pvalue
65 |         return p_value
66 |     except ValueError:
67 |         return None
68 | 
69 | 
70 | def update_results(results, test_feature_name: str):
71 |     """
72 |     Update and print the current results of the benchmark comparison.
73 | 
74 |     This function calculates the hit rates for both procedures (with and without ICA),
75 |     computes the statistical significance using McNemar’s test, and prints a formatted output.
76 | 
77 |     Args:
78 |     results (list of tuples): Each tuple is (correct_no_ica, correct_ica), where
79 |                               correct_no_ica and correct_ica are booleans indicating
80 |                               whether the prediction was correct without ICA and with ICA.
81 |     """
82 |     hits1 = sum(1 for res in results if res[0])
83 |     hits2 = sum(1 for res in results if res[1])
84 |     total = len(results)
85 |     difference = hits2 - hits1
86 | 
87 |     p_value = calculate_significance(results)
88 |     print("--------------------")
89 |     print(f"without {test_feature_name} feature: {hits1}/{total} ({hits1 / total:.2%})")
90 |     print(f"with {test_feature_name} feature: {hits2}/{total} ({hits2 / total:.2%})")
91 |     print(f"Difference : {difference}/{total} ({difference / total:.2%})")
92 |     if p_value is not None:
93 |         print(f"p-value    : {p_value:.4f}")
94 |         print(f"significant: {'yes' if p_value < 0.05 else 'no'}")
95 |     else:
96 |         print("p-value    : Not available")
97 |         print("significant: Unable to determine")
98 |     print("--------------------")
99 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/engine/__init__.py


--------------------------------------------------------------------------------
/ditana_assistant/engine/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  4 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  5 | #
  6 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  7 | #
  8 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  9 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 10 | 
 11 | # AGPL licensing:
 12 | #
 13 | # Ditana Assistant is free software: you can redistribute it and/or modify
 14 | # it under the terms of the GNU Affero General Public License as published by
 15 | # the Free Software Foundation, either version 3 of the License, or
 16 | # (at your option) any later version.
 17 | #
 18 | # Ditana Assistant is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 21 | # GNU Affero General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Affero General Public License
 24 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 25 | 
 26 | """
 27 | This is the main entry point for the Ditana Assistant application.
 28 | It handles command-line arguments, initializes the necessary components,
 29 | and starts the main conversation loop.
 30 | """
 31 | 
 32 | import sys
 33 | import threading
 34 | import time
 35 | 
 36 | import argparse
 37 | from importlib.metadata import version, PackageNotFoundError
 38 | import os
 39 | import platform
 40 | 
 41 | import platformdirs
 42 | import webview  # https://pywebview.flowrl.com/guide/
 43 | 
 44 | from ditana_assistant.base import config
 45 | from ditana_assistant.base.config import Configuration
 46 | from ditana_assistant.base.output_manager import OutputManager
 47 | 
 48 | from ditana_assistant.engine import pastime
 49 | from ditana_assistant.engine import context
 50 | from ditana_assistant.engine.conversation_manager import ConversationManager
 51 | from ditana_assistant.engine import terminal_interaction
 52 | 
 53 | from ditana_assistant.gui.assistant_window import AssistantWindow
 54 | 
 55 | 
 56 | def main():
 57 |     """
 58 |     The main function that sets up and runs the Ditana Assistant.
 59 |     """
 60 |     if Configuration.get()['MODEL_TYPE'] == config.ModelType.OPENAI and not os.environ.get('OPENAI_API_KEY'):
 61 |         print("""
 62 | Error: OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.
 63 | 
 64 | To get an API key:
 65 | 1. Visit https://platform.openai.com/account/api-keys
 66 | 2. Generate a new key
 67 | 3. Set it in your environment:
 68 |    export OPENAI_API_KEY='your-api-key-here'  # Unix/Linux
 69 |    setx OPENAI_API_KEY "your-api-key-here"    # Windows (restart terminal after)""")
 70 |         sys.exit(1)
 71 | 
 72 |     parser = argparse.ArgumentParser(description="Ditana Assistant")
 73 |     parser.add_argument("-v", "--version", action="store_true", help="Show the version of Ditana Assistant and exit.")
 74 |     parser.add_argument("-u", "--gui", action="store_true", help="Display a graphical dialog.")
 75 |     parser.add_argument("-a", "--augmentation", action="store_true", help="Enable Introspective Contextual Augmentation (ICA) for enhanced AI responses.")
 76 |     parser.add_argument("-w", "--wolfram-alpha", action="store_true", help="Force use of Wolfram|Alpha for first prompt.")
 77 |     parser.add_argument("-q", "--quiet", action="store_true", help="Run in quiet mode. No progress output, no continuation of dialog (except confirmation of command execution).")
 78 |     parser.add_argument("-p", "--pastime", action="store_true", help="Pastime mode with a human-like dialog partner.")
 79 |     parser.add_argument("-i", "--impersonate", type=str, help="In Pastime mode, optionally impersonate the person you specify (implies -p).")
 80 |     parser.add_argument("task", nargs=argparse.REMAINDER, help="The task for the assistant.")
 81 | 
 82 |     args = parser.parse_args()
 83 | 
 84 |     version_info = ""
 85 |     try:
 86 |         version_info = version('ditana-assistant')
 87 |     except PackageNotFoundError:
 88 |         version_info = "unknown"
 89 | 
 90 |     if args.version:
 91 |         print(f"Ditana Assistant version: {version_info}")
 92 |         sys.exit(0)
 93 | 
 94 |     if args.gui and args.quiet:
 95 |         print("Error: The options '-u/--gui' and '-q/--quiet' cannot be used together. "
 96 |               "In GUI mode, user input is always expected. The quiet mode is intended for terminal-based usage only.",
 97 |               file=sys.stderr)
 98 |         sys.exit(1)
 99 | 
100 |     OutputManager.hide_messages = args.quiet
101 | 
102 |     if args.wolfram_alpha:
103 |         ConversationManager.set_force_wolfram_alpha(True)
104 | 
105 |     if args.augmentation:
106 |         ConversationManager.set_ica(True)
107 | 
108 |     if args.impersonate and not args.pastime:
109 |         args.pastime = True
110 | 
111 |     if args.pastime:
112 |         ConversationManager.set_pastime_mode(True)
113 |         ConversationManager.set_impersonate(args.impersonate)
114 |         if args.impersonate:
115 |             print(f'(impersonating {args.impersonate})')
116 |         else:
117 |             print('(impersonating Ditana)')
118 |         print()
119 | 
120 |     user_input = " ".join(args.task).strip() if args.task else ""
121 | 
122 |     if user_input == "" and not args.gui and not args.pastime:
123 |         parser.print_help()
124 |         sys.exit(0)
125 | 
126 |     conversation = ConversationManager()
127 |     if not args.pastime:
128 |         conversation.append_user_message(context.generate_initial_context())
129 | 
130 |     window = AssistantWindow(args.gui, conversation)
131 | 
132 |     if not args.gui and args.pastime:
133 |         if user_input == "":
134 |             print(pastime.initial_line())
135 | 
136 |     terminal_thread_instance = threading.Thread(target=terminal_interaction.terminal_thread, args=(conversation, window, user_input, args.quiet))
137 |     terminal_thread_instance.start()
138 | 
139 |     if args.gui:
140 |         window.set_version(version_info)
141 |         if args.pastime and user_input == "":
142 |             window.set_ui_response(pastime.initial_line())
143 | 
144 |         def ui_update_thread():
145 |             while not ConversationManager.stop_thread().is_set():
146 |                 window.process_ui_updates()
147 |                 time.sleep(0.1)  # Check for updates every 100ms
148 | 
149 |         ui_update_thread = threading.Thread(target=ui_update_thread)
150 |         ui_update_thread.start()
151 | 
152 |         if user_input != "":
153 |             window.set_ui_input(user_input)
154 |             window.click_send_button()
155 | 
156 |         # Environment variable configuration to mitigate rendering issues.
157 |         # Prevents blank window occurrences on systems with NVIDIA GPUs.
158 |         #  - WEBKIT_DISABLE_COMPOSITING_MODE=1 (Windows) - Disables compositing mode
159 |         #  - WEBKIT_DISABLE_DMABUF_RENDERER=1 (Linux) - Disables DMABUF renderer
160 |         # These settings do not impact Ditana Assistant’s performance
161 |         # as it does not rely on GPU-accelerated rendering features.
162 |         # While primarily needed for Windows and Linux, setting these for all platforms
163 |         # in the same way, including macOS, allows for consistent behavior across platforms
164 |         # and simplifies cross-platform development.
165 |         os.environ['WEBKIT_DISABLE_DMABUF_RENDERER'] = '1'  # relevant for Linux
166 |         os.environ['WEBKIT_DISABLE_COMPOSITING_MODE'] = '1'  # relevant for Windows
167 | 
168 |         # Force 'edgechromium' on Windows for Ditana Assistant compatibility.
169 |         # pywebview defaults to 'mshtml' on Windows if 'edgechromium' is unavailable
170 |         # This ensures a more meaningful error (hopefully related to missing Edge Runtime)
171 |         # instead of ambiguous JavaScript errors from 'mshtml'.
172 |         webview.start(storage_path=platformdirs.user_data_dir(),
173 |                       debug=Configuration.get()['SHOW_DEBUG_MESSAGES'],
174 |                       gui='edgechromium' if platform.system() == "Windows" else None)
175 | 
176 |         ui_update_thread.join()
177 | 
178 |     terminal_thread_instance.join()
179 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/context.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides functions to gather and manage context information
 26 | about the system environment for the Ditana Assistant.
 27 | 
 28 | It includes functions to determine OS information, desktop environment,
 29 | user language, and other system-specific details.
 30 | """
 31 | 
 32 | from datetime import datetime
 33 | import time
 34 | import os
 35 | import locale
 36 | import platform
 37 | import shutil
 38 | from typing import Final, Optional
 39 | 
 40 | from ditana_assistant.base.config import Configuration
 41 | from ditana_assistant.engine import context_processes
 42 | from ditana_assistant.engine import text_processors_ai
 43 | 
 44 | 
 45 | def get_linux_info() -> str:
 46 |     """
 47 |     Retrieve detailed information about the Linux distribution.
 48 | 
 49 |     Returns:
 50 |         str: A string describing the Linux distribution.
 51 |     """
 52 |     try:
 53 |         with open('/etc/os-release', 'r', encoding='utf-8') as f:
 54 |             os_release = dict(line.strip().split('=', 1) for line in f if '=' in line)
 55 | 
 56 |         id_like = os_release.get('ID_LIKE', '').strip('"')
 57 |         pretty_name = os_release.get('PRETTY_NAME', '').strip('"')
 58 | 
 59 |         if id_like and id_like.lower() != 'n/a':
 60 |             return id_like + " Linux"
 61 |         else:
 62 |             return pretty_name + " Linux"
 63 |     except FileNotFoundError:
 64 |         return "Linux"
 65 | 
 66 | 
 67 | def get_os_info() -> str:
 68 |     """
 69 |     Get basic information about the operating system.
 70 | 
 71 |     Returns:
 72 |         str: The name of the operating system.
 73 |     """
 74 |     system = platform.system()
 75 |     if system == "Darwin":
 76 |         return "Mac OS X"
 77 | 
 78 |     return system
 79 | 
 80 | 
 81 | def get_extended_os_info() -> str:
 82 |     """
 83 |     Get extended information about the operating system.
 84 | 
 85 |     For Linux, this includes distribution details.
 86 | 
 87 |     Returns:
 88 |         str: Detailed description of the operating system.
 89 |     """
 90 |     system = get_os_info()
 91 |     if system == "Linux":
 92 |         return get_linux_info()
 93 | 
 94 |     return system
 95 | 
 96 | 
 97 | def get_desktop_environment() -> str:
 98 |     """
 99 |     Determine the current desktop environment.
100 | 
101 |     Returns:
102 |         str: The name of the current desktop environment, or an empty string if not available.
103 |     """
104 |     return os.environ.get('XDG_CURRENT_DESKTOP', '')
105 | 
106 | 
107 | def get_shell() -> str:
108 |     """
109 |     Determine the current shell being used.
110 | 
111 |     Returns:
112 |         str: The name of the current shell.
113 |     """
114 |     if os.getenv("SHELL"):
115 |         return "bash"
116 |     elif os.getenv("PROMPT"):
117 |         return 'cmd.exe (Windows Batch)'
118 |     else:
119 |         return "PowerShell"
120 | 
121 | 
122 | def get_comment_identifier() -> str:
123 |     """
124 |     Get the appropriate comment identifier for the current shell.
125 | 
126 |     Returns:
127 |         str: The comment identifier (e.g., '#' or 'REM').
128 |     """
129 |     if get_shell() == 'cmd.exe (Windows Batch)':
130 |         return 'REM'
131 |     else:
132 |         return '#'
133 | 
134 | 
135 | def get_terminal() -> Optional[str]:
136 |     """
137 |     Get the name of the current terminal.
138 | 
139 |     Returns:
140 |         str: The name of the terminal, or None if not available.
141 |     """
142 |     terminal_value = os.getenv("TERMINAL")
143 |     if terminal_value:
144 |         return terminal_value
145 | 
146 |     return None
147 | 
148 | 
149 | def get_system_description() -> str:
150 |     """
151 |     Generate a comprehensive description of the current system.
152 | 
153 |     Returns:
154 |         str: A string describing the OS and desktop environment.
155 |     """
156 |     os_info = get_extended_os_info()
157 |     desktop_env = get_desktop_environment()
158 | 
159 |     description = f"""{os_info}"""
160 | 
161 |     if desktop_env:
162 |         description += f" with {desktop_env}"
163 | 
164 |     return description
165 | 
166 | 
167 | def get_open_command() -> Optional[str]:
168 |     """
169 |     Determine the appropriate command to open files or URLs on the current system.
170 | 
171 |     Returns:
172 |         str: The command to open files or URLs, or None if not available.
173 |     """
174 |     if platform.system() == 'Darwin':
175 |         return 'open'
176 |     elif platform.system() == 'Windows':
177 |         return 'start'
178 |     elif get_desktop_environment() == 'XFCE':
179 |         return 'exo-open'
180 |     elif shutil.which('xdg-open'):
181 |         return 'xdg-open'
182 |     else:
183 |         return None
184 | 
185 | 
186 | def get_temporal_locale_identifier():
187 |     """
188 |     Temporarily sets the locale for LC_TIME to the system default to retrieve the locale identifier.
189 |     The function handles locale settings that contain an underscore (e.g., 'de_CH.UTF-8') by returning
190 |     only the part before the underscore. If no underscore is present, it returns the full identifier.
191 |     If an error occurs during the process, 'en' is returned as a fallback.
192 | 
193 |     Temporarily changing the locale is necessary because Python does not automatically respect the
194 |     environment variables for locale (such as LC_TIME) unless explicitly set. Therefore, we temporarily
195 |     set it to the system’s default and then revert back to the original locale to avoid side effects.
196 | 
197 |     Returns:
198 |         str: The locale identifier without the part after the underscore, or 'en' if an error occurs.
199 |     """
200 |     current_locale = locale.getlocale(locale.LC_TIME)
201 |     locale.setlocale(locale.LC_TIME, "")
202 | 
203 |     # Get the locale identifier (e.g., 'de_CH' on Linux or 'English_United States' on Windows)
204 |     locale_identifier = locale.getlocale(locale.LC_TIME)[0]
205 | 
206 |     # Restore the previous locale setting
207 |     locale.setlocale(locale.LC_TIME, current_locale)
208 | 
209 |     if locale_identifier is None:
210 |         return "en"
211 | 
212 |     if "_" in locale_identifier:
213 |         return locale_identifier.split("_")[0]
214 |     else:
215 |         return locale_identifier
216 | 
217 | 
218 | def get_user_language() -> str:
219 |     """
220 |     Determine the user’s preferred language, or English, if the detection failed.
221 |     This is based on LC_TIME, because it has a higher chance to be actually the
222 |     user’s spoken language than identifiers such as LC_ALL, LC_MESSAGES or LC_NUMERIC.
223 |     Also see https://docs.python.org/3.12/library/locale.html#locale.LC_TIME
224 | 
225 |     Returns:
226 |         str: The user’s language
227 |     """
228 | 
229 |     if Configuration.get()['ASSUME_ENGLISH']:
230 |         return "English"
231 | 
232 |     locale_dict: Final = {
233 |         'aa': 'Afar',
234 |         'af': 'Afrikaans',
235 |         'an': 'Aragonese',
236 |         'ar': 'Arabic',
237 |         'ast': 'Asturian',
238 |         'be': 'Belarusian',
239 |         'bg': 'Bulgarian',
240 |         'bhb': 'Bhili',
241 |         'br': 'Breton',
242 |         'bs': 'Bosnian',
243 |         'ca': 'Catalan',
244 |         'cs': 'Czech',
245 |         'cy': 'Welsh',
246 |         'da': 'Danish',
247 |         'de': 'German',
248 |         'el': 'Greek',
249 |         'en': 'English',
250 |         'es': 'Spanish',
251 |         'et': 'Estonian',
252 |         'eu': 'Basque',
253 |         'fi': 'Finnish',
254 |         'fo': 'Faroese',
255 |         'fr': 'French',
256 |         'ga': 'Irish',
257 |         'gd': 'Scots',
258 |         'gl': 'Galician',
259 |         'gv': 'Manx',
260 |         'he': 'Hebrew',
261 |         'hr': 'Croatian',
262 |         'hsb': 'Upper',
263 |         'hu': 'Hungarian',
264 |         'id': 'Indonesian',
265 |         'is': 'Icelandic',
266 |         'it': 'Italian',
267 |         'ja': 'Japanese',
268 |         'ka': 'Georgian',
269 |         'kk': 'Kazakh',
270 |         'kl': 'Greenlandic',
271 |         'ko': 'Korean',
272 |         'ku': 'Kurdish',
273 |         'kw': 'Cornish',
274 |         'lg': 'Luganda',
275 |         'lt': 'Lithuanian',
276 |         'lv': 'Latvian',
277 |         'mg': 'Malagasy',
278 |         'mi': 'Maori',
279 |         'mk': 'Macedonian',
280 |         'ms': 'Malay',
281 |         'mt': 'Maltese',
282 |         'nb': 'Norwegian',
283 |         'nl': 'Dutch',
284 |         'nn': 'Nynorsk',
285 |         'oc': 'Occitan',
286 |         'om': 'Oromo',
287 |         'pl': 'Polish',
288 |         'pt': 'Portuguese',
289 |         'ro': 'Romanian',
290 |         'ru': 'Russian',
291 |         'sk': 'Slovak',
292 |         'sl': 'Slovenian',
293 |         'so': 'Somali',
294 |         'sq': 'Albanian',
295 |         'st': 'Sotho',
296 |         'sv': 'Swedish',
297 |         'tcy': 'Tulu',
298 |         'tg': 'Tajik',
299 |         'th': 'Thai',
300 |         'tl': 'Tagalog',
301 |         'tr': 'Turkish',
302 |         'uk': 'Ukrainian',
303 |         'uz': 'Uzbek',
304 |         'wa': 'Walloon',
305 |         'xh': 'Xhosa',
306 |         'yi': 'Yiddish',
307 |         'zh': 'Chinese',
308 |         'zu': 'Zulu',
309 |     }
310 | 
311 |     lang_code: Final = get_temporal_locale_identifier()
312 | 
313 |     if lang_code.lower() in locale_dict:
314 |         return locale_dict[lang_code.lower()]
315 |     else:
316 |         return lang_code  # On Windows this is not the code, but the actual name of the language.
317 | 
318 | 
319 | def get_system_timezone():
320 |     """
321 |     Retrieve the system timezone.
322 | 
323 |     This function attempts to get the system timezone name. On Windows, it tries to retrieve
324 |     the timezone from the registry. If that fails, or on non-Windows systems, it falls back
325 |     to using the system’s default timezone.
326 | 
327 |     Returns:
328 |         str: The name of the system timezone. On Windows, this will be the Windows-specific
329 |              timezone name (e.g., "W. Europe Standard Time"). On other systems, it will typically
330 |              be an IANA timezone abbreviation (e.g., "CET" for Central European Time), see
331 |              https://www.iana.org/time-zones
332 | 
333 |     See Also:
334 |         For more information about the historical context and complexities of Windows timezones:
335 |         https://superuser.com/questions/1709147/history-explanation-for-time-zones-on-windows
336 |     """
337 |     if platform.system() == 'Windows':
338 |         try:
339 |             import winreg
340 |             with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation") as key:
341 |                 tz_name = winreg.QueryValueEx(key, "TimeZoneKeyName")[0]
342 |             return tz_name
343 |         except OSError:
344 |             pass
345 | 
346 |     return time.tzname[0]
347 | 
348 | 
349 | def generate_initial_context() -> str:
350 |     """
351 |     Generate an initial command context for the AI assistant.
352 | 
353 |     Returns:
354 |         str: A formatted string containing system context and the user’s command.
355 |     """
356 |     user_language = get_user_language()
357 |     description = get_system_description()
358 |     terminal = get_terminal()
359 | 
360 |     initial_command = f"""I am working on {description}"""
361 | 
362 |     if terminal:
363 |         initial_command += f" and {terminal}. The current directory is `{os.getcwd()}`"
364 | 
365 |     local_tz = get_system_timezone()
366 | 
367 |     running_desktop_applications = context_processes.get_process_info().strip()
368 | 
369 |     initial_command += f". It is currently {datetime.now().strftime('%A, %B %d, %Y at %H o\'clock')} ({local_tz} time zone)."
370 | 
371 |     if running_desktop_applications != "":
372 |         initial_command += " The following desktop applications are running:"
373 | 
374 |     initial_command = text_processors_ai.translate_from_defined_language("English", user_language, initial_command)
375 | 
376 |     if running_desktop_applications != "":
377 |         initial_command += "\n\n" + running_desktop_applications
378 | 
379 |     return initial_command
380 | 
381 | 
382 | def generate_terminal_command(command) -> str:
383 |     """
384 |     Generate a context-aware terminal command based on the user’s input.
385 | 
386 |     Args:
387 |         command (str): The user’s command or query.
388 | 
389 |     Returns:
390 |         str: A formatted string containing system context and instructions for generating a terminal command.
391 |     """
392 |     open_cmd = get_open_command()
393 |     description = get_system_description()
394 | 
395 |     initial_command = f"""Please suggest a {get_shell()} command for {description}"""
396 | 
397 |     initial_command += f""" that is suitable for the following task:
398 | 
399 | "{command}"
400 | 
401 | Just write the command as it would appear in a terminal. When the task is formulated as a question, generate a command that is suitable for answering the question."""
402 | 
403 |     if open_cmd:
404 |         initial_command += f" If the task requires opening a desktop application, use {open_cmd} in a suitable way. For example to do Internet searches, use {open_cmd} 'https://duckduckgo.com/?q=example search'."
405 | 
406 |     initial_command += f""" If in doubt about the meaning of the task or the characteristics of the system, make an educated guess, but prefer indirect workarounds that cover a wider range of circumstances (a web search as a last resort) to guessing that a particular tool is available. Do not include any comments or suggestions, to make sure you issue only syntactically correct {get_shell()} code that could be copied to a terminal."""
407 | 
408 |     return initial_command
409 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/context_processes.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides functionality to retrieve information about running processes
 26 | and their associated windows across different operating systems (Linux, Windows, and macOS).
 27 | 
 28 | The main function, get_process_info(), determines the operating system and calls the
 29 | appropriate OS-specific function to gather process information.
 30 | """
 31 | 
 32 | import subprocess
 33 | import os
 34 | import re
 35 | import platform
 36 | 
 37 | 
 38 | def get_process_info_linux():
 39 |     """
 40 |     Retrieve information about running processes and their associated windows on Linux.
 41 | 
 42 |     This function uses the 'wmctrl' command to get window information and combines it
 43 |     with process details from the /proc filesystem.
 44 | 
 45 |     Returns:
 46 |         str: A string containing information about each window/process, with one entry per line.
 47 |              Each line includes the command line and window title.
 48 |              Returns an empty string if wmctrl is not available or returns a non-zero exit code.
 49 |     """
 50 |     try:
 51 |         result = []
 52 |         hostname = subprocess.check_output("hostname", stderr=subprocess.DEVNULL).decode().strip()
 53 |         wmctrl_output = subprocess.check_output(["wmctrl", "-l", "-p"], stderr=subprocess.DEVNULL).decode().splitlines()
 54 | 
 55 |         for line in wmctrl_output:
 56 |             _, _, pid, *title_parts = line.split(None, 3)
 57 |             title = title_parts[0] if title_parts else ""
 58 | 
 59 |             process_name = get_process_name(pid)
 60 |             cmdline = get_cmdline(pid)
 61 | 
 62 |             # Remove hostname from title
 63 |             title = re.sub(f'^{re.escape(hostname)} ', '', title)
 64 | 
 65 |             # Check if process name is in title (case-insensitive)
 66 |             if process_name.lower() not in title.lower():
 67 |                 title = f"{process_name} {title}"
 68 | 
 69 |             # Remove path components from result
 70 |             title = remove_path_components(title, cmdline)
 71 | 
 72 |             result_line = (cmdline + " " + title).strip()
 73 |             result.append(result_line)
 74 | 
 75 |         return "\n".join(result)
 76 |     except subprocess.CalledProcessError:
 77 |         # This catches both cases: when wmctrl is not available and when it returns a non-zero exit code
 78 |         return ""
 79 | 
 80 | 
 81 | def get_process_info_windows():
 82 |     """
 83 |     Retrieve information about running processes and their associated windows on Windows.
 84 | 
 85 |     This function uses the win32gui, win32process, and psutil libraries to gather
 86 |     information about visible windows and their associated processes.
 87 | 
 88 |     Returns:
 89 |         str: A string containing information about each window/process, with one entry per line.
 90 |              Each line includes the command line and window title.
 91 |     """
 92 |     import win32gui
 93 |     import win32process
 94 |     import psutil
 95 | 
 96 |     def callback(hwnd, windows):
 97 |         """
 98 |         Callback function for EnumWindows to process each window.
 99 | 
100 |         Args:
101 |             hwnd: Window handle.
102 |             windows: List to store window information.
103 | 
104 |         Returns:
105 |             bool: Always returns True to continue enumeration.
106 |         """
107 |         if win32gui.IsWindowVisible(hwnd) and win32gui.GetWindowText(hwnd):
108 |             _, pid = win32process.GetWindowThreadProcessId(hwnd)
109 |             try:
110 |                 process = psutil.Process(pid)
111 |                 exe = process.exe()
112 |                 name = process.name()
113 |                 title = win32gui.GetWindowText(hwnd)
114 | 
115 |                 # Remove path components from title
116 |                 title = remove_path_components(title, exe)
117 | 
118 |                 # Combine name and title if name is not in title
119 |                 if name.lower() not in title.lower():
120 |                     title = f"{name} {title}"
121 | 
122 |                 cmdline = " ".join(process.cmdline())
123 |                 windows.append(f"{cmdline} {title}")
124 |             except (psutil.NoSuchProcess, psutil.AccessDenied):
125 |                 pass
126 |         return True
127 | 
128 |     windows = []
129 |     win32gui.EnumWindows(callback, windows)
130 |     return "\n".join(windows)
131 | 
132 | 
133 | def get_process_info_macos():
134 |     """
135 |     Retrieve information about running processes and their associated windows on macOS.
136 | 
137 |     This function is currently not implemented.
138 | 
139 |     Returns:
140 |         str: An empty string.
141 |     """
142 |     return ""
143 | 
144 | 
145 | def get_process_name(pid):
146 |     """
147 |     Get the name of a process given its PID on Linux.
148 | 
149 |     Args:
150 |         pid (str): The process ID.
151 | 
152 |     Returns:
153 |         str: The name of the process, or an empty string if the information cannot be retrieved.
154 |     """
155 |     try:
156 |         with open(f'/proc/{pid}/comm', 'r', encoding='utf-8') as f:
157 |             return f.read().strip()
158 |     except:
159 |         return ""
160 | 
161 | 
162 | def get_cmdline(pid):
163 |     """
164 |     Get the command line of a process given its PID on Linux.
165 | 
166 |     Args:
167 |         pid (str): The process ID.
168 | 
169 |     Returns:
170 |         str: The command line of the process, or an empty string if the information cannot be retrieved.
171 |     """
172 |     try:
173 |         with open(f'/proc/{pid}/cmdline', 'r', encoding='utf-8') as f:
174 |             return f.read().replace('\x00', ' ').split()[0]
175 |     except:
176 |         return ""
177 | 
178 | 
179 | def remove_path_components(text, path):
180 |     """
181 |     Remove path components from a given text.
182 | 
183 |     This function is used to clean up window titles by removing path components
184 |     that might be present in the process executable path.
185 | 
186 |     Args:
187 |         text (str): The text to clean up.
188 |         path (str): The path containing components to remove from the text.
189 | 
190 |     Returns:
191 |         str: The cleaned up text with path components removed.
192 |     """
193 |     components = path.lower().split(os.sep)
194 |     for component in components:
195 |         if component:
196 |             text = re.sub(rf'\b{re.escape(component)}\b', '', text, flags=re.IGNORECASE)
197 |     return text
198 | 
199 | 
200 | def get_process_info():
201 |     """
202 |     Get information about running processes and their associated windows for the current operating system.
203 | 
204 |     This function determines the current operating system and calls the appropriate
205 |     OS-specific function to gather process and window information.
206 | 
207 |     Returns:
208 |         str: A string containing information about each window/process, with one entry per line.
209 |              Each line includes the command line and window title.
210 |              Returns an empty string if the operating system is not supported.
211 |     """
212 |     system = platform.system()
213 |     if system == "Linux":
214 |         return get_process_info_linux()
215 |     elif system == "Windows":
216 |         return get_process_info_windows()
217 |     elif system == "Darwin":
218 |         return get_process_info_macos()
219 |     else:
220 |         return ""
221 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/input_analyzers_ai.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides functions for analyzing a given text to determine its
 26 | nature of user and return `bool`, indicating if a certain attribute was found
 27 | in a text. All of these functions make use of the LLM. The parallel module
 28 | `input_analyzers_regex` uses regular expressions instead.
 29 | """
 30 | 
 31 | import re
 32 | from typing import Optional, List, Dict, Literal
 33 | 
 34 | from ditana_assistant.base.output_manager import OutputManager
 35 | from ditana_assistant.base import config
 36 | from ditana_assistant.base.config import Configuration
 37 | 
 38 | from ditana_assistant.engine import input_analyzers_regex
 39 | from ditana_assistant.engine import text_processors_ai
 40 | 
 41 | 
 42 | def answers_yes(query: str, messages: Optional[List[Dict[Literal["role", "content"], str]]] = None) -> bool:
 43 |     """
 44 |     Determine if the AI’s response to a query is affirmative.
 45 | 
 46 |     Args:
 47 |         query (str): The query to process.
 48 |         messages (Optional[List[Dict[Literal["role", "content"], str]]]): The dialog so far.
 49 | 
 50 |     Returns:
 51 |         bool: True if the response is affirmative, False otherwise.
 52 |     """
 53 |     from ditana_assistant.engine.conversation_manager import ConversationManager
 54 |     assistant_answer = ConversationManager(messages).process_input(query)[0].lower()
 55 | 
 56 |     OutputManager.print_formatted(query, assistant_answer)
 57 | 
 58 |     return bool(re.search(r'\byes\b', assistant_answer))
 59 | 
 60 | 
 61 | def is_language(txt: str, lang: str) -> bool:
 62 |     """
 63 |     Determine if the given text is in the given language.
 64 | 
 65 |     Args:
 66 |         txt (str): The text to analyze.
 67 |         lang (str): The language to check, e.g. "English"
 68 | 
 69 |     Returns:
 70 |         bool: True if the text is in the specified language, False otherwise.
 71 |     """
 72 |     if Configuration.get()['ASSUME_ENGLISH'] and lang == "English":
 73 |         return True
 74 | 
 75 |     result = answers_yes(f'''Is the following text 100% in {lang}? Answer with "yes" or "no" only:
 76 | 
 77 | "{txt}"
 78 | ''')
 79 | 
 80 |     return result
 81 | 
 82 | 
 83 | def query_refers_to_a_computer(query: str, messages: Optional[List[Dict[Literal["role", "content"], str]]] = None) -> bool:
 84 |     """
 85 |     Determine if the given query can typically be solved using command line tools.
 86 | 
 87 |     Args:
 88 |         messages (Optional[List[Dict[Literal["role", "content"], str]]]): The dialog so far.
 89 |         query (str): The query to analyze.
 90 | 
 91 |     Returns:
 92 |         bool: True if the query can likely be solved with terminal commands, False otherwise.
 93 |     """
 94 |     if query == "":
 95 |         return False
 96 | 
 97 |     base_question = "Does this query involve checking, modifying, or retrieving information (e.g. system status, file content, or opening applications) from the user’s current computer system?"
 98 |     question = f'''{base_question} Answer with "yes" or "no" only:
 99 | 
100 | "{text_processors_ai.ensure_language(query, "English")}"'''
101 | 
102 |     result = answers_yes(question, messages)
103 | 
104 |     OutputManager.print_formatted("refers to a computer" if result else "does not refer to a computer", query)
105 | 
106 |     return result
107 | 
108 | 
109 | def query_is_suitable_for_wolfram_alpha(query: str, messages: List[Dict[Literal["role", "content"], str]]) -> bool:
110 |     """
111 |     Determine if the given query is suitable for the [Wolfram|Alpha Short Answers API](https://products.wolframalpha.com/short-answers-api/documentation)
112 | 
113 |     Args:
114 |         query (str): The query to analyze.
115 |         messages (List[Dict[Literal["role", "content"], str]]): The dialog so far
116 | 
117 |     Returns:
118 |         bool: True if the query is suitable for the Wolfram|Alpha Short Answers API.
119 |     """
120 |     result = False
121 | 
122 |     if (query != ""
123 |             and not input_analyzers_regex.likely_contains_multiple_sentences(query)
124 |             and not bool(re.search(r'\n', query.strip()))):
125 |         question = f'''Does this request refer to a single calculation, quantitative measurement, statistic or real-time information about the physical world (such as weather, stock data or population) and can it be answered without knowledge of our previous messages? Answer with "yes" or "no" only:
126 | 
127 | "{query}"'''
128 | 
129 |         result = answers_yes(question, messages)
130 |         OutputManager.print_formatted("suitable for Wolfram|Alpha" if result else "not suitable for Wolfram|Alpha", query)
131 | 
132 |     return result
133 | 
134 | 
135 | # deprecated
136 | def query_requires_changes_on_computer(query: str) -> bool:
137 |     """
138 |     Determine if the given query requires changes on the computer system, e.g. changes to a file.
139 | 
140 |     Args:
141 |         query (str): The query to analyze.
142 | 
143 |     Returns:
144 |         bool: True if the query requires changes on the computer.
145 |     """
146 |     if query == "":
147 |         return False
148 | 
149 |     if Configuration.get()['MODEL_TYPE'] == config.ModelType.GEMMA:
150 |         # Gemma is not able to pass the unit tests when using 'Answer with "yes" or "no" only'.
151 |         base_question = "Does this request involve modifying files or states on the computer? To what extent?"
152 |     else:
153 |         base_question = 'Does this request involve changes to the computer? Answer with "yes" or "no" only:'
154 | 
155 |     question = f'''{base_question}
156 | 
157 | "{text_processors_ai.ensure_language(query, "English")}"'''
158 | 
159 |     result = answers_yes(question)
160 | 
161 |     OutputManager.print_formatted("requires changes on computer" if result else "does not require changes on computer", query)
162 | 
163 |     return result
164 | 
165 | 
166 | # deprecated
167 | def request_is_answerable(query: str, messages: List[Dict[Literal["role", "content"], str]]) -> bool:
168 |     """
169 |     Determine if the given query can be answered based on the dialog held so far.
170 | 
171 |     Args:
172 |         query (str): The query to analyze
173 |         messages (List[Dict[Literal["role", "content"], str]]): The dialog so far
174 | 
175 |     Returns:
176 |         bool: True if the query can be answered based on the dialog, False otherwise.
177 |     """
178 |     if query == "":
179 |         return False
180 | 
181 |     base_question = 'Does our conversation so far more or less contain the answer to the following request? Answer with "yes" or "no" only:'
182 | 
183 |     question = f'''{base_question}
184 | 
185 | "{text_processors_ai.ensure_language(query, "English")}"'''
186 | 
187 |     result = answers_yes(question, messages)
188 | 
189 |     OutputManager.print_formatted("previous dialog contains the answer" if result else "previous dialog does not contain the answer", query)
190 | 
191 |     return result
192 | 
193 | 
194 | def prompt_can_be_split(prompt: str) -> bool:
195 |     """Determine if the given prompt can be split into two subtasks"""
196 | 
197 |     return answers_yes(f'''Does it make sense to divide the following prompt into two subtasks in order to tackle them systematically?
198 |     
199 | ```
200 | {prompt}
201 | ```
202 | 
203 | Please answer only with "yes" or "no".
204 | ''')
205 | 
206 | 
207 | def request_is_complex(query: str, messages: List[Dict[Literal["role", "content"], str]]) -> bool:
208 |     """
209 |     Determines if the given request is complex.
210 | 
211 |     Args:
212 |         query (str): The query to analyze.
213 |         messages (List[Dict[Literal["role", "content"], str]]): The dialog so far.
214 | 
215 |     Returns:
216 |         bool: True if the request is complex and requires advanced skills, False otherwise.
217 |     """
218 |     if query == "":
219 |         return False  # An empty query is not complex
220 | 
221 |     result = answers_yes(f'''Does answering this prompt require skills like applying, analyzing, or evaluating information, rather than just remembering or understanding facts? Please answer only with "yes" or "no":
222 |     ```
223 |     {query}
224 |     ```''', messages)
225 | 
226 |     return result
227 | 
228 | 
229 | def are_you_sure(assistant_answer, messages) -> bool:
230 |     """
231 |     Returns if the LLM is sure about the given answer
232 |     Args:
233 |         assistant_answer: the suggested answer of the LLM
234 |         messages: the dialog so far
235 | 
236 |     Returns: if the LLM is sure about the answer
237 | 
238 |     """
239 |     from ditana_assistant.engine.conversation_manager import ConversationManager
240 |     conversation = ConversationManager(messages)
241 |     conversation.append_assistant_message(assistant_answer)
242 |     return answers_yes('Are you sure? Please answer only with "yes" or "no".', messages)
243 | 
244 | 
245 | def is_likely_code(text):
246 |     """
247 |     Determines whether the given text is likely to be code based on a set of heuristic features.
248 | 
249 |     Args:
250 |         text (str): The input text to be analyzed for code-like content.
251 | 
252 |     Returns:
253 |         tuple: A tuple containing two elements:
254 |             - bool: True if the text is likely code, False otherwise.
255 |             - float: A confidence score between 0 and 1, where 1 indicates high confidence
256 |                      that the text is code.
257 |     """
258 |     result = answers_yes(f"""Is the following text (at least part of it) natural language? Please answer in English "yes" or "no".
259 | 
260 | ```
261 | {text}
262 | ```""")
263 | 
264 |     return not result, 0 if result else 1
265 | 
266 | 
267 | def is_likely_code_delegate(text):
268 |     """
269 |     Delegates the code detection process to the appropriate method based on the current model type.
270 | 
271 |     This function serves as a wrapper to handle code detection for different language models.
272 |     For OpenAI models, it uses the AI-based 'is_likely_code' function. For Gemma models,
273 |     it falls back to a regex-based solution using weighted patterns.
274 | 
275 |     Args:
276 |         text (str): The input text to be analyzed for code-like content.
277 | 
278 |     Returns:
279 |         tuple: A tuple containing two elements:
280 |             - bool: True if the text is likely code, False otherwise.
281 |             - float: A confidence score between 0 and 1, where 1 indicates high confidence
282 |                      that the text is code. Note: This score is always only meaningful for the
283 |                      regex solution. For LLMs, it’s always 0 or 1.
284 | 
285 |     Note:
286 |         The behavior of this function depends on the current model type set in the Configuration.
287 |         It's designed to provide a unified interface for code detection across different model types.
288 |     """
289 |     match Configuration.get()['MODEL_TYPE']:
290 |         case config.ModelType.OPENAI:
291 |             return is_likely_code(text)
292 |         case config.ModelType.GEMMA:
293 |             return input_analyzers_regex.is_likely_code(text)
294 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/input_analyzers_regex.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module provides functions for analyzing a given text to determine its
 26 | nature of user and return `bool`, indicating if a certain attribute was found
 27 | in a text. All of these functions make use of regular expressions. The parallel
 28 | module `input_analyzers_ai` uses the LLM instead.
 29 | """
 30 | 
 31 | from typing import Final, Tuple
 32 | import re
 33 | 
 34 | 
 35 | def likely_contains_multiple_sentences(text: str) -> bool:
 36 |     """
 37 |     Estimate whether the given text likely contains multiple sentences.
 38 | 
 39 |     This function checks for patterns that are common in texts with multiple sentences:
 40 |     two lowercase letters followed by a period, question mark, or exclamation mark,
 41 |     and then any whitespace (including newlines). These patterns are often found at
 42 |     the end of a sentence within a larger text, but are less likely to occur in texts
 43 |     containing only a single sentence.
 44 | 
 45 |     Args:
 46 |         text (str): The input text to analyze.
 47 | 
 48 |     Returns:
 49 |         bool: True if the text likely contains multiple sentences, False otherwise.
 50 | 
 51 |     Note:
 52 |         - The text is stripped of leading and trailing whitespace before analysis.
 53 |         - This method has limitations:
 54 |           - It may give false positives for certain abbreviations followed by punctuation.
 55 |           - It doesn't account for unconventional writing styles or specific formatting.
 56 |     """
 57 |     return bool(re.search(r'[a-z.]{2}[.!?]\s', text.strip()))
 58 | 
 59 | 
 60 | def is_likely_code(text: str) -> Tuple[bool, float]:
 61 |     """
 62 |     Determines whether the given text is likely to be code based on a set of heuristic features.
 63 | 
 64 |     This function employs a weighted scoring system with multiple features to assess the likelihood
 65 |     of the input text being code. The weights and threshold have been optimized through an iterative
 66 |     process using unit tests, specifically by maximizing the difference between the minimum 'true'
 67 |     score and the maximum 'false' score across a diverse set of test cases.
 68 | 
 69 |     The optimization approach, while unconventional, proves effective for this specific use case.
 70 |     It allows for fine-tuning based on real-world examples encountered in the application, rather
 71 |     than relying on artificially generated test cases. New test cases are added as edge cases are
 72 |     discovered during actual usage, ensuring the function’s robustness and adaptability.
 73 | 
 74 |     Args:
 75 |         text (str): The input text to be analyzed.
 76 | 
 77 |     Returns:
 78 |         Tuple[bool, float]: A tuple containing:
 79 |             - bool: True if the text is likely code, False otherwise.
 80 |             - float: The confidence score of the classification.
 81 | 
 82 |     Note:
 83 |         The magic numbers (weights, bias, and threshold) in this function are the result of
 84 |         the aforementioned optimization process. They should be adjusted with caution and
 85 |         only after thorough testing with an expanded set of test cases.
 86 | 
 87 |     Warning:
 88 |         An empty string input will always return (False, NaN).
 89 |     """
 90 |     if text == "":
 91 |         return False, float('nan')
 92 | 
 93 |     features = [
 94 |         (count_programming_tokens, 13),
 95 |         (count_special_characters, 21),
 96 |         (check_indentation, 1),
 97 |         (check_line_starts, 25),
 98 |         (check_camel_case, 1),
 99 |         (count_single_letter_variables, 44),
100 |     ]
101 | 
102 |     total_score = 0
103 |     total_weight = 0
104 | 
105 |     bias: Final = 2/3
106 | 
107 |     for feature_func, weight in features:
108 |         score = feature_func(text)
109 |         total_score += score ** bias * weight
110 |         total_weight += weight
111 | 
112 |     confidence = total_score / total_weight
113 |     return confidence >= 0.22640178458793886, confidence
114 | 
115 | 
116 | def count_programming_tokens(text: str) -> float:
117 |     """
118 |     Count the proportion of programming-related patterns in the text.
119 | 
120 |     Args:
121 |         text (str): The text to analyze.
122 | 
123 |     Returns:
124 |         float: A score between 0 and 1 representing the proportion of programming patterns.
125 |     """
126 |     programming_patterns = [
127 |         r'\b(if|else|return|for|do|while|print|function|def|class|import|from)\b',  # Common keywords
128 |         r'\.[^\s0-9]',  # A dot followed by a non-whitespace and non-digit character (e.g., method calls, property access)
129 |         r'-[A-Z]',  # A hyphen followed by an uppercase letter (e.g., PowerShell cmdlets)
130 |         r'[\[\]]',  # Square brackets
131 |         r'==|!=|<=|>=|&&|\|\|',  # Common comparison and logical operators
132 |         r'#.*$',  # Single-line comments
133 |         r'//.*$',  # Alternative single-line comments
134 |         r'/\*[\s\S]*?\*/',  # Multi-line comments
135 |         r'"\w+":',  # JSON-style key definitions
136 |         r'(?<=\s)@\w+',  # Decorators or annotations
137 |         r'\$\w+',  # Variable names in shell scripts or PHP
138 |         r'(?<!:)//[^/\s]+',  # URLs in code (excluding http:// or https://)
139 |     ]
140 | 
141 |     total_matches = 0
142 |     for pattern in programming_patterns:
143 |         matches = re.findall(pattern, text, re.MULTILINE)
144 |         total_matches += len(matches)
145 | 
146 |     # Normalize the score based on text length
147 |     text_length = len(text.split())
148 |     normalized_score = min(total_matches / max(text_length, 1), 1)
149 | 
150 |     return normalized_score
151 | 
152 | 
153 | def count_special_characters(text: str) -> float:
154 |     """
155 |     Count the proportion of special characters often used in programming.
156 | 
157 |     Args:
158 |         text (str): The text to analyze.
159 | 
160 |     Returns:
161 |         float: A score between 0 and 1 representing the proportion of special characters.
162 |     """
163 |     special_chars = '|$#[]<>&_{}~/\\'
164 |     char_count = sum(text.count(char) for char in special_chars)
165 |     return min(char_count / len(text) * 10, 1) if text else 0
166 | 
167 | 
168 | def check_indentation(text: str) -> float:
169 |     """
170 |     Check the proportion of indented lines in the text.
171 | 
172 |     Args:
173 |         text (str): The text to analyze.
174 | 
175 |     Returns:
176 |         float: A score between 0 and 1 representing the proportion of indented lines.
177 |     """
178 |     lines = text.split('\n')
179 |     indented_lines = sum(
180 |         1 for line in lines if line.strip() and line[0].isspace() and not line.lstrip().startswith('-'))
181 |     return indented_lines / len(lines) if lines else 0
182 | 
183 | 
184 | def check_line_starts(text: str) -> float:
185 |     """
186 |     Check the proportion of lines starting with lowercase letters.
187 | 
188 |     Args:
189 |         text (str): The text to analyze.
190 | 
191 |     Returns:
192 |         float: A score between 0 and 1 representing the proportion of lines starting with lowercase letters.
193 |     """
194 |     lines = text.split('\n')
195 |     lowercase_starts = sum(1 for line in lines if line.strip() and line[0].islower())
196 |     return lowercase_starts / len(lines) if lines else 0
197 | 
198 | 
199 | def check_camel_case(text: str) -> float:
200 |     """
201 |     Check the proportion of camelCase words in the text.
202 | 
203 |     Args:
204 |         text (str): The text to analyze.
205 | 
206 |     Returns:
207 |         float: A score between 0 and 1 representing the proportion of camelCase words.
208 |     """
209 |     camel_case_pattern = r'[a-z]+([A-Z][a-z]+)+'
210 |     camel_case_words = len(re.findall(camel_case_pattern, text))
211 |     return min(camel_case_words / len(text.split()) if text.split() else 0, 1)
212 | 
213 | 
214 | def count_single_letter_variables(text: str) -> float:
215 |     """
216 |     Count the proportion of single-letter variables in the text.
217 | 
218 |     Args:
219 |         text (str): The text to analyze.
220 | 
221 |     Returns:
222 |         float: A score between 0 and 1 representing the proportion of single-letter variables.
223 |     """
224 |     single_letters = re.findall(r'\b[a-zA-Z]\b', text)
225 |     return min(len(single_letters) / len(text.split()) if text.split() else 0, 1)
226 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/pastime.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module is somewhat isolated from the rest of the code because it is essentially
 26 | intended to be fun. It does not help with the assistance functions, but aims to provide
 27 | a human-like conversation in the sense that the other person has their own interests.
 28 | 
 29 | We deliberately try not to influence the behaviour of the dialogue partner, but to get
 30 | the most human-like behaviour possible. This can lead to unexpected dialogue, which is
 31 | part of the fun.
 32 | 
 33 | We deliberately do not try to make the model impersonate a character, so we do not use
 34 | the message list. The entire dialogue is always represented in a single message from the
 35 | user, so the assistant’s response is the next line of dialogue, which is then appended
 36 | to that single message.
 37 | """
 38 | 
 39 | from typing import List, Tuple, Optional
 40 | import re
 41 | 
 42 | from ditana_assistant.base.config import Configuration, ModelType
 43 | 
 44 | from ditana_assistant.engine import context
 45 | from ditana_assistant.engine import text_processors_ai
 46 | from ditana_assistant.engine import text_processors_regex
 47 | 
 48 | 
 49 | class DialogContainer:
 50 |     """
 51 |     A class to manage and format dialog entries.
 52 | 
 53 |     This class provides functionality to add dialog entries, store them internally,
 54 |     and format them into a single string representation.
 55 | 
 56 |     Attributes:
 57 |         _dialog_container (List[Tuple[bool, str]]): A list to store dialog entries.
 58 |         _character_name: The name of the dialog partner that is not the user
 59 |     """
 60 | 
 61 |     def __init__(self):
 62 |         """Initialize an empty dialog container."""
 63 |         from ditana_assistant.engine.conversation_manager import ConversationManager
 64 |         # Gemma gets confused when translating below texts (even sentence-wise). Also, translation of the dialog proves suboptimal. So we use English for Gemma.
 65 |         self.use_user_language: bool = Configuration.get()['MODEL_TYPE'] != ModelType.GEMMA
 66 |         self._dialog_container: List[Tuple[bool, str]] = []
 67 |         self._character_name: str = ConversationManager.impersonate() if ConversationManager.impersonate() else "Jean-Baptiste Clamence from the novel of Albert Camus"
 68 |         self._short_name: str = get_short_name(self._character_name)
 69 |         self._translated_character_name: str = self.translate(self._character_name)
 70 |         self._translated_short_name: str = get_short_name(self._translated_character_name)
 71 |         self._user_name: str = self.translate("Stranger")
 72 | 
 73 |         self.request_for_response_of_fictional_character = self.translate(f"""This is a fictional dialog between {self._character_name} and a stranger that I wrote. \
 74 | Please suggest what {self._short_name} could say next to behave in a typical way, but still respond to what the stranger has said and encourage them to continue the conversation. \
 75 | Please just write a single suggestion for {self._short_name}’s next line of dialog without commenting or questioning.""")
 76 | 
 77 |         self.request_for_initial_line_of_fictional_character = self.translate(f"""\
 78 | Please suggest a line of dialog for a fictional dialog between {self._character_name} and a stranger. \
 79 | In this line of dialogue, {self._short_name} meets the stranger for the first time and behaves in a way that is typical of their behaviour. \
 80 | Please just write a single suggestion for {self._short_name}’s next line of dialog without commenting or questioning.""")
 81 | 
 82 |     @staticmethod
 83 |     def extract_cited_block(text: str):
 84 |         """
 85 |         If the given string contains exactly two lines that contain only three backticks ```,
 86 |         then return the lines between these two, otherwise the whole string. Some LLMs
 87 |         put generated text between such lines and comment it, even if instructed not to comment.
 88 |         Args:
 89 |             text: the string, potentially containing cited text
 90 | 
 91 |         Returns:
 92 |             the cleaned string
 93 |         """
 94 |         lines = text.split('\n')
 95 |         backtick_lines = [i for i, line in enumerate(lines) if line.strip() == '```']
 96 | 
 97 |         if len(backtick_lines) == 2:
 98 |             start, end = backtick_lines
 99 |             return '\n'.join(lines[start + 1:end])
100 |         else:
101 |             return text
102 | 
103 |     def add_dialog_entry(self, is_user: bool, dialog: str) -> None:
104 |         """
105 |         Add a new dialog entry to the internal dialog container.
106 |         If `dialog` is enclosed in quotation marks, they are removed.
107 | 
108 |         Args:
109 |             is_user (bool): True, if the dialog line is from the user, otherwise False
110 |             dialog (str): The dialog line(s) for the character.
111 |         """
112 |         self._dialog_container.append((is_user, dialog.strip('" ')))
113 | 
114 |     def format_dialog(self) -> str:
115 |         """
116 |         Format all stored dialog entries into a single string.
117 | 
118 |         Returns:
119 |             str: A formatted string representation of all dialog entries.
120 |         """
121 |         formatted_dialog = ""
122 |         for is_user, lines in self._dialog_container:
123 |             name = self._user_name if is_user else self._short_name
124 |             formatted_dialog += f'{name}: "{lines}"\n\n'
125 |         return formatted_dialog.strip()
126 | 
127 |     def response_of_fictional_character(self) -> str:
128 |         """
129 |         Generate the next thing that the fictional character says to the user.
130 | 
131 |         Returns:
132 |             The new dialog line of the fictional character.
133 | 
134 |         """
135 |         from ditana_assistant.engine.conversation_manager import ConversationManager
136 | 
137 |         return self.filter_response(ConversationManager().process_input(f"""```
138 | {self.format_dialog()}
139 | ```
140 | 
141 | {self.request_for_response_of_fictional_character}""")[0])
142 | 
143 |     def initial_line_of_fictional_character(self) -> str:
144 |         """
145 |         Return the first thing that the fictional character says to the user.
146 |         We deliberately do not influence what this should be, but leave
147 |         it to the model.
148 | 
149 |         Returns:
150 |             The first thing that the fictional character tells the user.
151 | 
152 |         """
153 |         from ditana_assistant.engine.conversation_manager import ConversationManager
154 | 
155 |         return self.filter_response(ConversationManager().process_input(self.request_for_initial_line_of_fictional_character)[0])
156 | 
157 |     @staticmethod
158 |     def extract_text(response):
159 |         """
160 |         Extract text from a string based on a specific pattern.
161 | 
162 |         This function looks for patterns of the form "Name: " or "Name-With-Hyphens: "
163 |         at the beginning of lines. It has two modes of operation:
164 | 
165 |         1. If the pattern occurs multiple times:
166 |            It returns the text between the first and second occurrence of the pattern.
167 | 
168 |         2. If the pattern occurs once or not at all:
169 |            It removes the first occurrence of the pattern (if present) and returns the rest of the text.
170 | 
171 |         Args:
172 |         response (str): The input string to process.
173 | 
174 |         Returns:
175 |         str: The extracted text based on the above rules.
176 |         """
177 |         # Define the pattern for the prefix (Name: or similar at the beginning of the line)
178 |         pattern = r'^[\w-]+:\s*"?'
179 | 
180 |         # Find all occurrences of the pattern
181 |         matches = list(re.finditer(pattern, response, re.MULTILINE))
182 | 
183 |         if len(matches) > 1:
184 |             # If there’s more than one occurrence, extract the text between the first and second occurrence
185 |             start = matches[0].end()
186 |             end = matches[1].start()
187 |             return response[start:end].strip()
188 |         else:
189 |             # If there’s only one or no occurrence, simply remove the first prefix
190 |             return re.sub(pattern, '', response, count=1).strip()
191 | 
192 |     def filter_response(self, response) -> str:
193 |         """
194 |         - Removes the name of the fictional character at the beginning of the given text
195 |         - Replaces the fictional character name with "Ditana" (in case it occurs anywhere else than at then beginning)
196 |         - Removes quotation marks
197 |         Args:
198 |             response: the input text
199 | 
200 |         Returns:
201 |             the cleaned text
202 |         """
203 |         response = DialogContainer.extract_cited_block(response)
204 |         response = DialogContainer.extract_text(response)
205 |         response = response.strip(' \n"')
206 |         from ditana_assistant.engine.conversation_manager import ConversationManager
207 |         if not ConversationManager.impersonate():
208 |             response = text_processors_regex.remove_words_and_phrases(response, self._translated_character_name, "Ditana")
209 |             response = text_processors_regex.remove_words_and_phrases(response, self._character_name, "Ditana")
210 |         return response
211 | 
212 |     def translate(self, text: str) -> str:
213 |         """
214 |         Convenience function to translate from English to the user’s language.
215 |         Args:
216 |             text: the english text
217 | 
218 |         Returns:
219 |             the translated text
220 |         """
221 |         if self.use_user_language:
222 |             return text_processors_ai.translate_from_defined_language("English", context.get_user_language(), text)
223 |         else:
224 |             return text
225 | 
226 | 
227 | dialog_container: Optional[DialogContainer] = None
228 | 
229 | 
230 | def get_short_name(character_name: str) -> str:
231 |     """
232 |     Extract the first word from the given string that has at least 4 letters.
233 | 
234 |     This function splits the input string into words and returns the first word
235 |     that is at least 4 characters long. If no such word is found, it returns
236 |     the entire original string.
237 | 
238 |     Args:
239 |         character_name (str): The input string to process.
240 | 
241 |     Returns:
242 |         str: The first word with at least 4 letters, or the entire input string
243 |              if no such word is found.
244 | 
245 |     Examples:
246 |         >>> get_short_name("John Doe")
247 |         'John'
248 |         >>> get_short_name("Dr. Smith")
249 |         'Defg'
250 |         >>> get_short_name("Bob")
251 |         'Bob'
252 |     """
253 |     words = character_name.split()
254 |     for short_name in words:
255 |         if len(short_name) >= 4:
256 |             return short_name
257 |     return character_name
258 | 
259 | 
260 | def reply(user_input: str) -> str:
261 |     """
262 |     Generate a reply based on the dialog so far
263 | 
264 |     Args:
265 |         user_input (str): the user input text
266 | 
267 |     Returns:
268 |         the response of the fictional character
269 |     """
270 | 
271 |     global dialog_container
272 |     if not dialog_container:
273 |         dialog_container = DialogContainer()
274 | 
275 |     dialog_container.add_dialog_entry(True, user_input)
276 | 
277 |     answer = dialog_container.response_of_fictional_character()
278 | 
279 |     dialog_container.add_dialog_entry(False, answer)
280 | 
281 |     return answer
282 | 
283 | 
284 | def initial_line() -> str:
285 |     """
286 |     Generate the initial line of the fictional character, in case the user did not say anything.
287 | 
288 |     Returns:
289 |         the response of the fictional character
290 |     """
291 | 
292 |     global dialog_container
293 |     if not dialog_container:
294 |         dialog_container = DialogContainer()
295 | 
296 |     answer = dialog_container.initial_line_of_fictional_character()
297 | 
298 |     dialog_container.add_dialog_entry(False, answer)
299 |     return answer
300 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/terminal_interaction.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module handles the terminal-based interaction for the Ditana Assistant.
 26 | It manages user input, command execution, and output display in terminal mode.
 27 | When the application runs with a GUI, this module handles terminal commands
 28 | that may be required during the conversation.
 29 | """
 30 | 
 31 | from ditana_assistant.gui.assistant_window import AssistantWindow
 32 | from ditana_assistant.base import terminal
 33 | 
 34 | from ditana_assistant.engine import text_processors_regex
 35 | from ditana_assistant.engine.conversation_manager import ConversationManager
 36 | 
 37 | 
 38 | def terminal_thread(conversation: ConversationManager, window: AssistantWindow, user_input: str, quiet: bool) -> None:
 39 |     """
 40 |     Manages the terminal-based interaction loop for the Ditana Assistant.
 41 | 
 42 |     This function handles user input, processes it through the conversation manager,
 43 |     executes terminal commands when necessary, and manages output display.
 44 |     In GUI mode, it only activates for terminal command execution.
 45 | 
 46 |     Args:
 47 |         conversation (ConversationManager): The conversation manager object.
 48 |         window (AssistantWindow): The UI window object (None in terminal-only mode).
 49 |         user_input (str): Initial user input.
 50 |         quiet (bool): Whether the assistant is running in quiet mode.
 51 |     """
 52 |     while not ConversationManager.stop_thread().is_set():
 53 |         try:
 54 |             if window.is_open:
 55 |                 ConversationManager.code_input_event().wait()
 56 |                 if ConversationManager.stop_thread().is_set():
 57 |                     break
 58 |                 code = ConversationManager.code_input_global().get()
 59 |                 ConversationManager.code_input_event().clear()
 60 |                 assistant_answer = ""
 61 |             else:
 62 |                 if user_input == "":
 63 |                     if quiet:
 64 |                         ConversationManager.stop_thread().set()
 65 |                         break
 66 |                     else:
 67 |                         print()
 68 |                         user_input = input("Your Message ('enter' to quit): ").strip()
 69 |                         if user_input.strip() == "":
 70 |                             ConversationManager.stop_thread().set()
 71 |                             break
 72 |                 assistant_answer, code = conversation.process_input(user_input, meta_call=False)
 73 | 
 74 |             if not quiet:
 75 |                 print()
 76 | 
 77 |             if code:
 78 |                 print(code)
 79 |                 reply = terminal.get_valid_input("Execute above command?")
 80 |                 if reply == 'n':
 81 |                     user_input = "I do not execute this command."
 82 |                     conversation.append_user_message(user_input)
 83 |                     if window.is_open:
 84 |                         window.set_ui_response(text_processors_regex.add_markdown_italics(user_input+"_"))
 85 |                         print("Ok, please focus the UI window.")
 86 |                         continue
 87 |                     else:
 88 |                         ConversationManager.stop_thread().set()
 89 |                         break
 90 | 
 91 |                 user_input = execute_code(code, conversation, window)
 92 |             else:
 93 |                 if window.is_open:
 94 |                     window.set_ui_response(text_processors_regex.ensure_markdown_horizontal_line(assistant_answer))
 95 |                     print("I answered in the UI window - please focus it.")
 96 |                 else:
 97 |                     print(assistant_answer)
 98 | 
 99 |                 user_input = ""
100 |         except Exception as e:  # pylint: disable=broad-exception-caught
101 |             print(e)
102 | 
103 | 
104 | def execute_code(code: str, conversation: ConversationManager, window: AssistantWindow) -> str:
105 |     """
106 |     Execute a given code command and handle its output.
107 | 
108 |     This function runs the provided code command, processes its output,
109 |     and updates the conversation and UI (if applicable) based on the execution result.
110 | 
111 |     Args:
112 |         code (str): The command to be executed.
113 |         conversation (ConversationManager): The conversation manager object.
114 |         window (AssistantWindow): The UI window object (None in terminal-only mode).
115 | 
116 |     Returns:
117 |         str: User input generated based on the command execution result.
118 |             Empty string if the command was successful or the user chose not to fix a failed command.
119 |             Otherwise, contains information about the failed command execution.
120 | 
121 |     Side effects:
122 |         - Executes the given command using the terminal.
123 |         - Updates the UI with the command output if in GUI mode.
124 |         - Appends the command result to the conversation history.
125 |         - Prompts the user for action if the command fails in terminal-only mode.
126 |     """
127 |     if window.is_open:
128 |         window.set_ui_response(text_processors_regex.ensure_markdown_horizontal_line(code))
129 | 
130 |     return_code, output = terminal.run_interactive_command(code)
131 | 
132 |     if return_code == 0:
133 |         user_input = f"""Command executed successfully. Output:
134 | {output}"""
135 | 
136 |         if window.is_open:
137 |             window.set_ui_response(text_processors_regex.add_markdown_italics(user_input + "_"))
138 |             conversation.append_user_message(user_input)
139 |         else:
140 |             conversation.append_user_message(user_input)
141 |             user_input = ""
142 |     else:
143 |         user_input = f"""Command failed with return code {return_code}. Output:
144 | {output}"""
145 | 
146 |         if window.is_open:
147 |             window.set_ui_input(user_input)
148 |         else:
149 |             reply = terminal.get_valid_input(f"The command failed with return code {return_code}. Do you want me to try to fix it?")
150 | 
151 |             # If the user responds with 'y', the error message in user_input will be automatically used
152 |             # by terminal_thread as the next user message, prompting the assistant to attempt a fix.
153 |             # If the response is 'n', we manually add the error message to the conversation history
154 |             # and clear user_input to prevent terminal_thread from automatically proceeding.
155 |             if reply == 'n':
156 |                 conversation.append_user_message(user_input)
157 |                 user_input = ""
158 | 
159 |     if window.is_open:
160 |         print("Please focus the UI window.")
161 | 
162 |     # In terminal mode, a non-empty return value will be used by terminal_thread
163 |     # as the next user message, continuing the conversation flow.
164 |     return user_input
165 | 


--------------------------------------------------------------------------------
/ditana_assistant/engine/text_processors_regex.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module contains utility functions for text processing in the Ditana Assistant.
 26 | 
 27 | It provides functions to modify text and prepare it for different output contexts.
 28 | It uses regular expressions for this. The parallel module `txt_processors_ai` uses LLM instead.
 29 | """
 30 | 
 31 | import re
 32 | 
 33 | from ditana_assistant.engine import context
 34 | 
 35 | 
 36 | def add_markdown_italics(text: str) -> str:
 37 |     """
 38 |     Add markdown italics formatting to each non-empty line of the input text.
 39 | 
 40 |     Args:
 41 |         text (str): The input text to be formatted.
 42 | 
 43 |     Returns:
 44 |         str: The text with markdown italics formatting applied.
 45 |     """
 46 |     lines = text.split('\n')
 47 |     modified_lines = ['_' + line.strip() + '_' if line.strip() else line for line in lines]
 48 |     return '\n\n'.join(modified_lines)
 49 | 
 50 | 
 51 | def ensure_markdown_horizontal_line(text: str) -> str:
 52 |     """
 53 |     Ensure the text ends with a markdown horizontal line.
 54 | 
 55 |     If the text doesn't end with a valid markdown horizontal line,
 56 |     this function adds one.
 57 | 
 58 |     Args:
 59 |         text (str): The input text to be modified.
 60 | 
 61 |     Returns:
 62 |         str: The text with a markdown horizontal line at the end.
 63 |     """
 64 |     valid_patterns = ['---', '***', '___']
 65 |     lines = text.strip().split('\n')
 66 |     if lines and lines[-1].strip() in valid_patterns:
 67 |         return text
 68 |     if lines and lines[-1].strip():
 69 |         lines.append('')
 70 |     lines.append('---')
 71 |     lines.append('')
 72 |     return '\n'.join(lines)
 73 | 
 74 | 
 75 | def remove_comments(text: str) -> str:
 76 |     """
 77 |     Remove comments from the input text based on the current shell’s comment identifier.
 78 | 
 79 |     Args:
 80 |         text (str): The input text containing comments to be removed.
 81 | 
 82 |     Returns:
 83 |         str: The text with comments removed.
 84 |     """
 85 |     comment_identifier = context.get_comment_identifier()
 86 |     result_lines = []
 87 | 
 88 |     for line in text.split('\n'):
 89 |         stripped_line = line.strip()
 90 |         if not stripped_line.startswith(comment_identifier):
 91 |             if comment_identifier in line:
 92 |                 line = line.split(comment_identifier)[0].rstrip()
 93 |             if line.strip():
 94 |                 result_lines.append(line)
 95 | 
 96 |     return '\n'.join(result_lines)
 97 | 
 98 | 
 99 | def edit_output_for_terminal(assistant_answer: str) -> str:
100 |     """
101 |     Edit the assistant's answer to make it suitable for terminal output.
102 | 
103 |     This function removes backticks, unnecessary whitespace, single quotes
104 |     at the beginning and end (if present on both sides), and other
105 |     formatting that might interfere with the actual code.
106 | 
107 |     Args:
108 |         assistant_answer (str): The original answer from the assistant.
109 | 
110 |     Returns:
111 |         str: The edited answer suitable for terminal output.
112 |     """
113 |     code = assistant_answer.strip()
114 | 
115 |     code = re.sub(r'^```.*?$', '', code, flags=re.MULTILINE)
116 |     code = re.sub(r'^\s*$\n', '', code, flags=re.MULTILINE)
117 |     code = re.sub(r'^`', '', code, flags=re.MULTILINE)
118 |     code = re.sub(r'`.*$', '', code, flags=re.MULTILINE)
119 |     code = re.sub(r'^\s*powershell\s*$', '', code, flags=re.MULTILINE | re.IGNORECASE)
120 | 
121 |     if re.match(r'^#\s*[a-z]', code) and code.count('\n') <= 1:
122 |         code = code.lstrip('#').lstrip()
123 | 
124 |     if code.startswith("'") and code.endswith("'"):
125 |         code = code[1:-1]
126 | 
127 |     if code.startswith("ditana-assistant "):
128 |         code = "ditana-assistant -q " + code[16:]
129 | 
130 |     return code
131 | 
132 | 
133 | def remove_words_and_phrases(input_text, remove_string, new_string):
134 |     """
135 |     Remove specified words and phrases from the input text, respecting word boundaries.
136 | 
137 |     This function removes occurrences of the entire remove_string, then
138 |     progressively shorter combinations of words from the remove_string,
139 |     down to individual words. It ensures that only complete words or phrases
140 |     are removed by using word boundaries in regular expressions.
141 | 
142 |     Args:
143 |     input_text (str): The text to process.
144 |     remove_string (str): String containing words/phrases to remove.
145 | 
146 |     Returns:
147 |     str: Processed text with specified words and phrases removed.
148 |     """
149 |     # First, remove the entire remove_string if it exists
150 |     input_text = re.sub(r'\b' + re.escape(remove_string) + r'\b', new_string, input_text)
151 | 
152 |     # Split the remove_string into words
153 |     remove_words = remove_string.split()
154 | 
155 |     # Generate all possible combinations of words, from longest to shortest
156 |     for length in range(len(remove_words), 0, -1):
157 |         for i in range(len(remove_words) - length + 1):
158 |             phrase = " ".join(remove_words[i:i+length])
159 |             if len(phrase) >= 4:
160 |                 input_text = re.sub(r'\b' + re.escape(phrase) + r'\b', new_string, input_text)
161 | 
162 |     # Remove any double spaces that might have been created
163 |     input_text = re.sub(r'\s+', ' ', input_text)
164 | 
165 |     # Trim leading and trailing whitespace
166 |     return input_text.strip()
167 | 
168 | 
169 | def split_multiline_string(input_string):
170 |     """
171 |     Splits a given multiline string into multiple substrings.
172 | 
173 |     The function splits the input string based on a pattern of round brackets
174 |     containing any text without spaces or newlines. This is particularly useful
175 |     for parsing lists where items are prefixed with labels in parentheses.
176 |     Everything before the first separator is ignored. Each resulting substring
177 |     is stripped of leading and trailing whitespace, but may contain newlines.
178 | 
179 |     Args:
180 |     input_string (str): The input multiline string to be split.
181 | 
182 |     Returns:
183 |     list: A list of substrings resulting from the split operation.
184 | 
185 |     Example:
186 |     >>> text = "Ignore this\n(a) first part (see below)\n(b) second part\n(c) third part"
187 |     >>> split_multiline_string(text)
188 |     ['first part (see below)', 'second part', 'third part']
189 |     """
190 |     # Define the pattern: round brackets with any content except spaces and newlines
191 |     pattern = r'\(\d+\)'
192 | 
193 |     # Find all occurrences of the pattern
194 |     separators = list(re.finditer(pattern, input_string))
195 | 
196 |     if not separators:
197 |         return []
198 | 
199 |     result = []
200 |     start = separators[0].end()  # Start from the end of the first separator
201 | 
202 |     # Iterate through separators and extract substrings
203 |     for i in range(1, len(separators)):
204 |         end = separators[i].start()
205 |         substring = input_string[start:end].strip()
206 |         if substring:
207 |             result.append(substring)
208 |         start = separators[i].end()
209 | 
210 |     # Add the last substring
211 |     last_substring = input_string[start:].strip()
212 |     if last_substring:
213 |         result.append(last_substring)
214 | 
215 |     return result
216 | 
217 | 
218 | def extract_backtick_content(text: str):
219 |     """
220 |     Extracts the stripped text enclosed by backticks from the input string.
221 | 
222 |     If no backticks are found or the enclosed stripped text is empty,
223 |     the entire input string (stripped) is returned.
224 | 
225 |     The function uses a robust strategy to handle multiple consecutive backticks:
226 |     It searches for and removes all immediately following backticks after the first one,
227 |     both from the front and back.
228 | 
229 |     Nested backticks remain unaffected, as only consecutive backticks are removed.
230 | 
231 |     If fewer than 2 backticks are in the string, the entire stripped text is returned.
232 | 
233 |     Args:
234 |         text (str): The input string to process.
235 | 
236 |     Returns:
237 |         str: The extracted and stripped content between backticks, or the entire
238 |              stripped input if no valid backtick-enclosed content is found.
239 |     """
240 |     stripped_text = text.strip()
241 | 
242 |     # Find the first backtick from the left
243 |     left_index = stripped_text.find('`')
244 |     if left_index == -1:
245 |         return stripped_text
246 | 
247 |     # Find consecutive backticks from the left
248 |     while left_index + 1 < len(stripped_text) and stripped_text[left_index + 1] == '`':
249 |         left_index += 1
250 | 
251 |     # Find the first backtick from the right
252 |     right_index = stripped_text.rfind('`')
253 |     if right_index == left_index:
254 |         return stripped_text
255 | 
256 |     # Find consecutive backticks from the right
257 |     while right_index > left_index and stripped_text[right_index - 1] == '`':
258 |         right_index -= 1
259 | 
260 |     # Extract and strip the content between backticks
261 |     extracted_content = stripped_text[left_index + 1:right_index].strip()
262 | 
263 |     # Return the extracted content if not empty, otherwise return the entire stripped text
264 |     return extracted_content if extracted_content else stripped_text
265 | 


--------------------------------------------------------------------------------
/ditana_assistant/gui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/gui/__init__.py


--------------------------------------------------------------------------------
/ditana_assistant/gui/assistant_window.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module manages the graphical user interface window for the Ditana Assistant.
 26 | It handles the creation, updating, and interaction with the webview-based GUI.
 27 | """
 28 | 
 29 | import json
 30 | import os
 31 | import queue
 32 | 
 33 | import typing
 34 | import webview  # https://pywebview.flowrl.com/guide/
 35 | 
 36 | from ditana_assistant.engine.conversation_manager import ConversationManager
 37 | from ditana_assistant.engine import text_processors_regex
 38 | 
 39 | 
 40 | class AssistantWindow:
 41 |     """
 42 |     Manages the graphical user interface window for the Ditana Assistant.
 43 |     """
 44 | 
 45 |     def __init__(self, is_open: bool, conversation: ConversationManager):
 46 |         """
 47 |         Initialize the AssistantWindow.
 48 | 
 49 |         Args:
 50 |             is_open (bool): Whether the window should be open.
 51 |             conversation (ConversationManager): The conversation manager object.
 52 |         """
 53 |         self.window: typing.Optional[webview.Window] = None
 54 |         self.is_open = is_open
 55 |         self.ui_update_queue = queue.Queue()
 56 | 
 57 |         if is_open:
 58 |             self.window = webview.create_window(title='Ditana Assistant',
 59 |                                                 url=os.path.join(os.path.dirname(__file__), 'index.html'),
 60 |                                                 js_api=conversation,
 61 |                                                 width=1280,
 62 |                                                 height=1024)
 63 | 
 64 |             def on_closed():
 65 |                 print("Main window closed, stopping thread...")
 66 |                 ConversationManager.stop_thread().set()
 67 |                 ConversationManager.code_input_event().set()
 68 | 
 69 |             self.window.events.closed += on_closed
 70 | 
 71 |     def set_version(self, version_info: str) -> None:
 72 |         """
 73 |         Set the version info for the about-dialog.
 74 |         Args:
 75 |             version_info (str): The version info string
 76 | 
 77 |         """
 78 |         if self.window is not None:
 79 |             self.ui_update_queue.put(('set_version', version_info))
 80 | 
 81 |     def set_ui_input(self, user_input: str) -> None:
 82 |         """
 83 |         Set the user input in the UI.
 84 | 
 85 |         Args:
 86 |             user_input (str): The user input to be set in the UI.
 87 |         """
 88 |         if self.window is not None:
 89 |             self.ui_update_queue.put(('input', user_input))
 90 | 
 91 |     def set_ui_response(self, response: str) -> None:
 92 |         """
 93 |         Set the assistant’s response in the UI.
 94 | 
 95 |         Args:
 96 |             response (str): The assistant’s response to be displayed in the UI.
 97 |         """
 98 |         if self.window is not None:
 99 |             self.ui_update_queue.put(('response', response))
100 | 
101 |     def click_send_button(self) -> None:
102 |         """
103 |         Simulate clicking the send button in the UI.
104 |         """
105 |         if self.window is not None:
106 |             self.ui_update_queue.put(('click_send', None))
107 | 
108 |     def process_ui_updates(self) -> None:
109 |         """
110 |         Process any pending UI updates from the queue.
111 |         """
112 |         while not self.ui_update_queue.empty():
113 |             update_type, content = self.ui_update_queue.get()
114 |             if update_type == 'input':
115 |                 escaped_content = json.dumps(content)
116 |                 self.window.evaluate_js(f"document.getElementById('input').value = {escaped_content}")
117 |             elif update_type == 'response':
118 |                 content = text_processors_regex.ensure_markdown_horizontal_line(content)
119 |                 escaped_content = json.dumps(content)
120 |                 self.window.evaluate_js(f"appendToResponse({escaped_content})")
121 |             elif update_type == 'click_send':
122 |                 self.window.evaluate_js("document.getElementById('sendButton').click()")
123 |             elif update_type == 'set_version':
124 |                 escaped_content = json.dumps(content)
125 |                 self.window.evaluate_js(f"setVersion({escaped_content})")
126 | 


--------------------------------------------------------------------------------
/ditana_assistant/gui/ditana-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/gui/ditana-logo.png


--------------------------------------------------------------------------------
/ditana_assistant/gui/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <!--
  4 | Copyright (c) 2024, 2025 acrion innovations GmbH
  5 | Authors: Stefan Zipproth, s.zipproth@acrion.ch
  6 | 
  7 | This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  8 | 
  9 | Ditana Assistant is offered under a commercial and under the AGPL license.
 10 | For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 11 | 
 12 | AGPL licensing:
 13 | 
 14 | Ditana Assistant is free software: you can redistribute it and/or modify
 15 | it under the terms of the GNU Affero General Public License as published by
 16 | the Free Software Foundation, either version 3 of the License, or
 17 | (at your option) any later version.
 18 | 
 19 | Ditana Assistant is distributed in the hope that it will be useful,
 20 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 22 | GNU Affero General Public License for more details.
 23 | 
 24 | You should have received a copy of the GNU Affero General Public License
 25 | along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 26 | -->
 27 | <head>
 28 |     <meta charset="UTF-8">
 29 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 30 |     <title>Ditana Assistant</title>
 31 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/marked/4.0.2/marked.min.js"></script>
 32 |     <style>
 33 |         body {
 34 |             background-color: #121212;
 35 |             color: #FFFFFF;
 36 |             font-family: Arial, sans-serif;
 37 |             margin: 0;
 38 |             height: 100vh;
 39 |             display: flex;
 40 |             flex-direction: column;
 41 |         }
 42 |         #logo-container {
 43 |             display: flex;
 44 |             justify-content: center;
 45 |             align-items: center;
 46 |             margin-top: 20px;
 47 |             height: 100px;
 48 |         }
 49 |         img {
 50 |             max-width: 80px;
 51 |             height: auto;
 52 |         }
 53 |         #response-container {
 54 |             flex: 1;
 55 |             display: flex;
 56 |             flex-direction: column;
 57 |             margin: 10px;
 58 |             overflow-y: auto;
 59 |         }
 60 |         #response {
 61 |             flex: 1;
 62 |             margin-bottom: 10px;
 63 |             background-color: #1e1e1e;
 64 |             color: #ffffff;
 65 |             border: 1px solid #444;
 66 |             padding: 10px;
 67 |             overflow-y: auto;
 68 |             user-select: text;
 69 |             -webkit-user-select: text;
 70 |             -moz-user-select: text;
 71 |             -ms-user-select: text;
 72 |             cursor: text;
 73 |         }
 74 |         #input-container {
 75 |             display: flex;
 76 |             flex-direction: column;
 77 |             margin: 10px;
 78 |         }
 79 |         #input {
 80 |             margin-bottom: 10px;
 81 |             background-color: #1e1e1e;
 82 |             color: #ffffff;
 83 |             border: 1px solid #444;
 84 |             padding: 10px;
 85 |             resize: none;
 86 |             width: 100%;
 87 |             box-sizing: border-box;
 88 |         }
 89 |         .button-container {
 90 |             display: flex;
 91 |             flex-direction: row;
 92 |             justify-content: flex-start;
 93 |         }
 94 |         button {
 95 |             background-color: #444;
 96 |             color: #ffffff;
 97 |             border: none;
 98 |             padding: 10px;
 99 |             cursor: pointer;
100 |             width: 150px;
101 |             height: 50px;
102 |             margin-right: 10px;
103 |         }
104 |         button:hover {
105 |             background-color: #555;
106 |         }
107 |         button:disabled {
108 |             background-color: #333;
109 |             cursor: not-allowed;
110 |         }
111 |         label {
112 |             color: #ffffff;
113 |         }
114 |         #processing-message {
115 |             color: #ffaa00;
116 |             margin-top: 10px;
117 |             height: 20px;
118 |             line-height: 20px;
119 |         }
120 |         .colored-italic {
121 |             color: #ffaa00;
122 |             font-style: italic;
123 |         }
124 |         #about-dialog {
125 |             display: none;
126 |             position: fixed;
127 |             z-index: 1;
128 |             left: 0;
129 |             top: 0;
130 |             width: 100%;
131 |             height: 100%;
132 |             overflow: auto;
133 |             background-color: rgba(0,0,0,0.4);
134 |         }
135 |         .dialog-content {
136 |             background-color: #1e1e1e;
137 |             margin: 15% auto;
138 |             padding: 20px;
139 |             border: 1px solid #444;
140 |             width: 80%;
141 |             max-width: 600px;
142 |         }
143 |         .close {
144 |             color: #aaa;
145 |             float: right;
146 |             font-size: 28px;
147 |             font-weight: bold;
148 |         }
149 |         .close:hover,
150 |         .close:focus {
151 |             color: #fff;
152 |             text-decoration: none;
153 |             cursor: pointer;
154 |         }
155 |     </style>
156 | </head>
157 | <body>
158 |     <div id="logo-container">
159 |         <img id="ditana-logo" src="ditana-logo.png" alt="Ditana Logo">
160 |     </div>
161 | 
162 |     <div id="response-container">
163 |         <div id="response"></div>
164 |     </div>
165 | 
166 |     <div id="input-container">
167 |         <label for="input">Your Request</label>
168 |         <textarea id="input" rows="5"></textarea>
169 |         <div class="button-container">
170 |             <button id="sendButton" onclick="sendInput()">Send</button>
171 |             <button id="aboutButton" onclick="openAboutDialog()">About</button>
172 |         </div>
173 |         <div id="processing-message"></div>
174 |     </div>
175 | 
176 |     <div id="about-dialog">
177 |         <div class="dialog-content">
178 |             <span class="close" onclick="closeAboutDialog()">&times;</span>
179 |             <div id="about-content"></div>
180 |         </div>
181 |     </div>
182 | 
183 |     <script>
184 |         let isProcessing = false;
185 |         let versionInfo = "unknown";
186 | 
187 |         // Custom renderer for marked
188 |         const renderer = new marked.Renderer();
189 |         renderer.em = function(text) {
190 |             return `<span class="colored-italic">${text}</span>`;
191 |         };
192 | 
193 |         // Set the custom renderer
194 |         marked.setOptions({
195 |             renderer: renderer
196 |         });
197 | 
198 |         function addMarkdownItalics(text) {
199 |             return text
200 |                 .split('\n')
201 |                 .map(line => line.trim() ? `_${line.trim()}_` : line)
202 |                 .join('\n');
203 |         }
204 | 
205 |         function sendInput() {
206 |             if (isProcessing) {
207 |                 alert("Please wait, your previous request is still being processed.");
208 |                 return;
209 |             }
210 | 
211 |             var input = document.getElementById("input").value;
212 | 
213 |             if (input.trim()) {
214 |                 appendToResponse(addMarkdownItalics(input) + "\n\n");
215 |             }
216 |             else {
217 |                 return;
218 |             }
219 | 
220 |             isProcessing = true;
221 |             updateUIState();
222 | 
223 |             window.pywebview.api.process_input_direct(input).then(function(response) {
224 |                 appendToResponse(response + "\n\n");
225 |                 isProcessing = false;
226 |                 updateUIState();
227 |             }).catch(function(e) {
228 |                 console.error("Error in process_input_direct:", e);
229 |                 isProcessing = false;
230 |                 updateUIState();
231 |             });
232 | 
233 |             document.getElementById("input").value = "";
234 |         }
235 | 
236 |         function appendToResponse(text) {
237 |             var responseDiv = document.getElementById("response");
238 |             responseDiv.innerHTML += marked.parse(text);
239 |             responseDiv.scrollTop = responseDiv.scrollHeight;
240 |         }
241 | 
242 |         function updateUIState() {
243 |             const sendButton = document.getElementById("sendButton");
244 |             const processingMessage = document.getElementById("processing-message");
245 |             const inputArea = document.getElementById("input");
246 | 
247 |             if (isProcessing) {
248 |                 sendButton.disabled = true;
249 |                 processingMessage.textContent = "Processing... Please wait.";
250 |                 inputArea.disabled = true;
251 |             } else {
252 |                 sendButton.disabled = false;
253 |                 processingMessage.textContent = "";
254 |                 inputArea.disabled = false;
255 |             }
256 |         }
257 | 
258 |         document.getElementById("input").addEventListener("keydown", function(event) {
259 |             if (event.key === "Enter" && !event.shiftKey) {
260 |                 event.preventDefault();
261 |                 sendInput();
262 |             }
263 |         });
264 | 
265 |         function openAboutDialog() {
266 |             document.getElementById("about-dialog").style.display = "block";
267 |             document.getElementById("about-content").innerHTML = `
268 |                 <h2>About Ditana Assistant</h2>
269 |                 <p>Version: ${versionInfo}</p>
270 |                 <p>Ditana Assistant is an innovative AI-powered application that combines a graphical user interface with terminal functionality and, optionally, introspective contextual augmentation via Wolfram|Alpha.</p>
271 |                 <p>For detailed information about features, usage, and licensing, please visit:</p>
272 |                 <p><a href="https://ditana.org/assistant" target="_blank">https://ditana.org/assistant</a></p>
273 |                 <p>Copyright © 2024 Stefan Zipproth, acrion innovations GmbH, Switzerland</p>
274 |                 <p><a href="https://acrion.ch" target="_blank">https://acrion.ch</a></p>
275 |             `;
276 |         }
277 | 
278 |         function closeAboutDialog() {
279 |             document.getElementById("about-dialog").style.display = "none";
280 |         }
281 | 
282 |         function setVersion(version) {
283 |             versionInfo = version;
284 |         }
285 |     </script>
286 | </body>
287 | </html>
288 | 


--------------------------------------------------------------------------------
/ditana_assistant/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/tests/__init__.py


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_base/__int__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/tests/test_base/__int__.py


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_base/test_string_cache.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module contains unit tests for the StringCache class.
 26 | 
 27 | The tests cover various aspects of the StringCache functionality, including:
 28 | - Basic set and get operations
 29 | - Entry expiration
 30 | - Lifetime extension and reduction
 31 | - Priority cache behavior
 32 | - Maximum size limit enforcement
 33 | - Data persistence between instances
 34 | - Cache clearing
 35 | - Key existence checking
 36 | - Cache size reporting
 37 | - File reading and writing consistency
 38 | 
 39 | These tests ensure that the StringCache class behaves correctly under different scenarios
 40 | and maintains data integrity across multiple instances and file operations.
 41 | """
 42 | 
 43 | import unittest
 44 | import time
 45 | import tempfile
 46 | import json
 47 | import os
 48 | from pathlib import Path
 49 | from typing import Dict, Tuple
 50 | 
 51 | from ditana_assistant.base import string_cache
 52 | 
 53 | """
 54 | This module contains unit tests for the StringCache class.
 55 | 
 56 | The tests cover various aspects of the StringCache functionality, including:
 57 | - Basic set and get operations
 58 | - Entry expiration
 59 | - Lifetime extension and reduction
 60 | - Priority cache behavior
 61 | - Maximum size limit enforcement
 62 | - Data persistence between instances
 63 | - Cache clearing
 64 | - Key existence checking
 65 | - Cache size reporting
 66 | - File reading and writing consistency
 67 | - Stress testing with random key/value pairs
 68 | 
 69 | These tests ensure that the StringCache class behaves correctly under different scenarios
 70 | and maintains data integrity across multiple instances and file operations.
 71 | """
 72 | 
 73 | import unittest
 74 | import time
 75 | import tempfile
 76 | import json
 77 | import os
 78 | import random
 79 | import string
 80 | import logging
 81 | from pathlib import Path
 82 | from typing import Dict, Tuple
 83 | 
 84 | from ditana_assistant.base import string_cache
 85 | 
 86 | logging.basicConfig(level=logging.DEBUG)
 87 | logger = logging.getLogger(__name__)
 88 | 
 89 | 
 90 | class TestStringCache(unittest.TestCase):
 91 |     """Unit tests for the StringCache class."""
 92 | 
 93 |     def setUp(self) -> None:
 94 |         """Set up a StringCache instance for each test."""
 95 |         self.cache = string_cache.StringCache("unit-test", default_lifetime=0.5)
 96 |         self.cache.clear()  # Ensure a clean state even if previous test was interrupted
 97 | 
 98 |     def tearDown(self) -> None:
 99 |         """Clean up after each test."""
100 |         self.cache.clear()
101 | 
102 |     def test_set_and_get_immediately(self) -> None:
103 |         """Test setting a cache entry and retrieving it immediately."""
104 |         self.cache.set("key1", "value1")
105 |         self.assertEqual(self.cache.get("key1"), "value1")
106 | 
107 |     def test_expiration(self) -> None:
108 |         """Test that a cache entry expires after its lifetime."""
109 |         self.cache.set("key2", "value2")
110 |         time.sleep(1)  # Wait for the entry to expire
111 |         self.assertIsNone(self.cache.get("key2"))
112 | 
113 |     def test_extend_lifetime_same_value(self) -> None:
114 |         """Test that setting the same value extends the lifetime."""
115 |         self.cache.set("key3", "value3")
116 |         time.sleep(1)  # Wait for the entry to expire
117 |         self.cache.set("key3", "value3")
118 |         self.assertGreaterEqual(self.cache.get_lifetime("key3"), 1.0)
119 | 
120 |     def test_reduce_lifetime_different_value(self) -> None:
121 |         """Test that setting a different value reduces the lifetime."""
122 |         self.cache.set("key4", "value4")
123 |         time.sleep(1)  # Wait for the entry to expire
124 |         self.cache.set("key4", "new_value4")
125 |         self.assertLess(self.cache.get_lifetime("key4"), 0.75)
126 | 
127 |     def test_priority_cache(self) -> None:
128 |         """Test the priority cache functionality."""
129 |         # Create a temporary file for the priority cache
130 |         with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file:
131 |             # Create a cache and set some values
132 |             temp_cache = string_cache.StringCache("temp-cache", default_lifetime=1000)
133 |             temp_cache.set("priority_key", "priority_value")
134 | 
135 |             # Copy the contents of the cache file to the temporary file
136 |             with open(temp_cache.file_path, 'r') as source_file:
137 |                 temp_file.write(source_file.read())
138 | 
139 |         # Create a new cache with the priority cache file
140 |         priority_cache = string_cache.StringCache("unit-test", default_lifetime=0.5,
141 |                                                   priority_cache_path=Path(temp_file.name))
142 | 
143 |         # Test that the priority cache entry is accessible
144 |         self.assertEqual(priority_cache.get("priority_key"), "priority_value")
145 | 
146 |         # Test that the priority cache entry doesn't expire
147 |         time.sleep(1)
148 |         self.assertEqual(priority_cache.get("priority_key"), "priority_value")
149 | 
150 |         # Clean up
151 |         Path(temp_file.name).unlink()
152 | 
153 |     def test_max_size_limit(self) -> None:
154 |         """Test that the cache respects the maximum size limit."""
155 |         # Set a small max_size for testing
156 |         small_cache = string_cache.StringCache("unit-test", default_lifetime=0.5, max_size=100)
157 | 
158 |         # Add entries until we exceed the limit
159 |         for i in range(10):
160 |             key = f"key{i}"
161 |             value = f"value{i}" * 5  # Make the value long enough to exceed the limit quickly
162 |             small_cache.set(key, value)
163 | 
164 |         # Check that the cache size is below or equal to the max_size
165 |         self.assertLessEqual(small_cache.current_size, 100)
166 | 
167 |     def test_persistence(self) -> None:
168 |         """Test that the cache persists data between instances."""
169 |         self.cache.set("persist_key", "persist_value")
170 | 
171 |         # Create a new cache instance to test persistence
172 |         new_cache = string_cache.StringCache("unit-test", default_lifetime=0.5)
173 |         self.assertEqual(new_cache.get("persist_key"), "persist_value")
174 | 
175 |     def test_clear(self) -> None:
176 |         """Test the clear method."""
177 |         self.cache.set("key_to_clear", "value_to_clear")
178 |         self.cache.clear()
179 |         self.assertIsNone(self.cache.get("key_to_clear"))
180 |         self.assertEqual(len(self.cache), 0)
181 | 
182 |     def test_contains(self) -> None:
183 |         """Test the __contains__ method."""
184 |         self.cache.set("contain_key", "contain_value")
185 |         self.assertIn("contain_key", self.cache)
186 |         self.assertNotIn("non_existent_key", self.cache)
187 | 
188 |     def test_len(self) -> None:
189 |         """Test the __len__ method."""
190 |         self.cache.set("len_key1", "len_value1")
191 |         self.cache.set("len_key2", "len_value2")
192 |         self.assertEqual(len(self.cache), 2)
193 | 
194 |     def test_file_read_write_consistency(self) -> None:
195 |         """Test that the stored file is correctly read when creating a new instance."""
196 |         # Set some values in the cache
197 |         self.cache.set("key1", "value1")
198 |         self.cache.set("key2", "value2")
199 | 
200 |         # Create a new instance to read from the file
201 |         new_cache = string_cache.StringCache("unit-test", default_lifetime=0.5)
202 | 
203 |         # Check if the values are correctly read
204 |         self.assertEqual(new_cache.get("key1"), "value1")
205 |         self.assertEqual(new_cache.get("key2"), "value2")
206 | 
207 |         # Check if the number of entries is correct
208 |         self.assertEqual(len(new_cache), 2)
209 | 
210 |     def test_stress(self) -> None:
211 |         """Stress test the cache with random key/value pairs."""
212 |         # Create a new cache with a larger max_size for stress testing
213 |         stress_cache = string_cache.StringCache("stress-test", default_lifetime=60, max_size=1024 * 1024)  # 1 MiB
214 | 
215 |         # Generate random key/value pairs
216 |         def random_string(length: int) -> str:
217 |             return ''.join(random.choice(string.ascii_letters) for _ in range(length))
218 | 
219 |         num_operations = 1000
220 |         keys = [random_string(10) for _ in range(num_operations)]
221 |         values = [random_string(50) for _ in range(num_operations)]
222 | 
223 |         # Perform set operations
224 |         for key, value in zip(keys, values):
225 |             stress_cache.set(key, value)
226 | 
227 |         # Verify all values are retrievable
228 |         for key, value in zip(keys, values):
229 |             self.assertEqual(stress_cache.get(key), value)
230 | 
231 |         # Perform mixed set and get operations
232 |         for _ in range(num_operations):
233 |             operation = random.choice(['set', 'get'])
234 |             key = random.choice(keys)
235 |             if operation == 'set':
236 |                 new_value = random_string(50)
237 |                 stress_cache.set(key, new_value)
238 |             else:
239 |                 stress_cache.get(key)
240 | 
241 |         # Verify cache size is within limits
242 |         self.assertLessEqual(stress_cache.current_size, 1024 * 1024)
243 | 
244 |         # Clean up
245 |         stress_cache.clear()
246 | 
247 | 
248 | if __name__ == '__main__':
249 |     unittest.main()
250 | 


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/ditana_assistant/tests/test_engine/__init__.py


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_engine/code_detection_test_cases.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module contains test cases for code detection functions.
 26 | It provides input strings and expected outputs for use in multiple test classes.
 27 | """
 28 | 
 29 | from typing import List, Tuple, Union
 30 | 
 31 | # Each test case is a tuple of (description, input_text, expected_output)
 32 | TestCase = Tuple[str, str, Union[bool, Tuple[bool, float]]]
 33 | 
 34 | 
 35 | def get_test_cases() -> List[TestCase]:
 36 |     """
 37 |     Returns a list of test cases for code detection.
 38 | 
 39 |     Each test case is a tuple containing:
 40 |     - A description of the test case
 41 |     - The input text to be analyzed
 42 |     - The expected output (either a boolean or a tuple of boolean and float)
 43 | 
 44 |     Returns:
 45 |         List[TestCase]: A list of test cases
 46 |     """
 47 |     return [
 48 |         (
 49 |             "Test if a single letter variable assignment is identified as code",
 50 |             "x = 5",
 51 |             True
 52 |         ),
 53 |         (
 54 |             "Test if multiple single letter variables in a loop are identified as code",
 55 |             "for i in range(n): a += b * c",
 56 |             True
 57 |         ),
 58 |         (
 59 |             "Test if normal text is correctly identified as non-code",
 60 |             "Dies ist ein normaler Satz ohne Code.",
 61 |             False
 62 |         ),
 63 |         (
 64 |             "Test if a PowerShell command is correctly identified as code",
 65 |             "Get-Content D:\\TestAusgabe.txt",
 66 |             True
 67 |         ),
 68 |         (
 69 |             "Test if a command without backticks is correctly identified as code",
 70 |             "systemctl status systemd-resolved",
 71 |             True
 72 |         ),
 73 |         (
 74 |             "Test if normal text that contains monospace formatted text is correctly identified as non-code",
 75 |             """The content of the file `~/test.txt` is:
 76 | 
 77 | ```
 78 | testtest
 79 | ``` 
 80 | 
 81 | Let me know if you'd like to explore other file operations!""",
 82 |             False
 83 |         ),
 84 |         (
 85 |             "Test if a short bash command is correctly identified as code",
 86 |             "cat ~/test.txt",
 87 |             True
 88 |         ),
 89 |         (
 90 |             "Test if meta execution of assistant is correctly identified as code",
 91 |             """```bash
 92 | ditana-assistant "Was kannst du mir über mein System erzählen?" $(uname -a)
 93 | ```""",
 94 |             True
 95 |         ),
 96 |         (
 97 |             "Test if the output of a mathematical calculation is identified as non-code",
 98 |             "Der Wert von \\( 3^{50} \\) ist 7.625.597.484.987.",
 99 |             False
100 |         ),
101 |         (
102 |             "Test if commented maths is correctly identified as not code",
103 |             """```To find the angle between the ladder and the ground, we can use trigonometry. Let's denote the angle we are looking for as θ.
104 | We have a right triangle formed by the ladder, the wall, and the ground. The ladder is the hypotenuse of the triangle, and its length is 4 meters. The distance from the bottom of the ladder to the wall is the adjacent side of the triangle, and its length is 3 meters.
105 | We can use the cosine function to find the angle θ:
106 | cos(θ) = adjacent / hypotenuse
107 | cos(θ) = 3 / 4
108 | θ = arccos(3/4)
109 | θ ≈ 36.87 degrees
110 | Therefore, the angle between the ladder and the ground is approximately 36.87 degrees.""",
111 |             False
112 |         ),
113 |         (
114 |             "Test if powershell code is correctly identified as code",
115 |             """```powershell
116 | $file = "~/test.txt"
117 | (Get-Content $file)
118 | ```""",
119 |             True
120 |         ),
121 |     ]
122 | 


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_engine/input_analyzers_ai_code_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
 2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
 3 | #
 4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
 5 | #
 6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
 7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 8 | 
 9 | # AGPL licensing:
10 | #
11 | # Ditana Assistant is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Ditana Assistant is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | """
25 | This module contains unit tests for the input_analyzers_ai.is_likely_code function.
26 | It tests the same things as the code related tests in input_analyzers_regex_code_test.py.
27 | """
28 | 
29 | import unittest
30 | 
31 | from ditana_assistant.base.config import Configuration, ModelType
32 | from ditana_assistant.base.output_manager import OutputManager
33 | from ditana_assistant.engine import input_analyzers_ai
34 | from ditana_assistant.tests.test_engine.code_detection_test_cases import get_test_cases
35 | 
36 | OutputManager.hide_messages = True
37 | 
38 | 
39 | class TestIsLikelyCode(unittest.TestCase):
40 |     """
41 |     Test cases for the is_likely_code function in the input_analyzer module.
42 |     Tests are performed for both OpenAI and Gemma models.
43 |     """
44 | 
45 |     @classmethod
46 |     def setUpClass(cls):
47 |         """
48 |         Set up the test environment with appropriate model types.
49 | 
50 |         Note:
51 |         ModelType.GEMMA is currently excluded from these tests due to limitations
52 |         in its code detection capabilities. For ModelType.GEMMA, the system falls back
53 |         to a regex-based solution in input_analyzers_ai.is_likely_code_delegate.
54 | 
55 |         This setup allows for:
56 |         1. Focused testing on models with reliable code detection (currently OpenAI).
57 |         2. Easy extension to include additional models in the future.
58 |         3. Compatibility with the fallback mechanism in the production code.
59 | 
60 |         Future developers can add new model types to this list as they become
61 |         available and capable of passing these tests. Alternatively, they can
62 |         update the fallback logic in input_analyzers_ai.is_likely_code_delegate
63 |         to handle new models that require the regex-based approach.
64 |         """
65 |         cls.model_configs = [
66 |             {"type": ModelType.OPENAI, "openai_model": "gpt-4o-mini"},
67 |             # {"type": ModelType.GEMMA, "openai_model": None}
68 |         ]
69 | 
70 |     def run_test_for_all_models(self, test_func):
71 |         """Run the given test function for all configured models."""
72 |         for config in self.model_configs:
73 |             model_type = config["type"]
74 |             openai_model = config["openai_model"]
75 | 
76 |             with self.subTest(model=model_type, openai_model=openai_model):
77 |                 Configuration.set(model_type=model_type)
78 |                 if model_type == ModelType.OPENAI:
79 |                     Configuration.set(openai_model=openai_model)
80 | 
81 |                 print(f"---- Model type: {model_type}, OpenAI Model: {openai_model or 'N/A'}  ----")
82 |                 test_func()
83 | 
84 |     def test_is_likely_code(self):
85 |         """Test the is_likely_code function with various test cases."""
86 |         def test_func():
87 |             test_cases = get_test_cases()
88 |             for description, input_text, expected_output in test_cases:
89 |                 with self.subTest(description=description):
90 |                     result = input_analyzers_ai.is_likely_code(input_text)[0]
91 |                     self.assertEqual(result, expected_output, f"Failed test case: {description}")
92 | 
93 |         self.run_test_for_all_models(test_func)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     unittest.main()
98 | 


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_engine/input_analyzers_ai_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module contains unit tests for the input_analyzers_ai module of the Ditana Assistant.
 26 | It tests various functions related to code detection and input analysis.
 27 | """
 28 | 
 29 | import unittest
 30 | 
 31 | from ditana_assistant.base.config import Configuration, ModelType
 32 | from ditana_assistant.base.output_manager import OutputManager
 33 | 
 34 | from ditana_assistant.engine import context
 35 | from ditana_assistant.engine import input_analyzers_ai
 36 | from ditana_assistant.engine.conversation_manager import ConversationManager
 37 | 
 38 | 
 39 | OutputManager.hide_messages = True
 40 | 
 41 | 
 42 | class TestRequestIsAnswerable(unittest.TestCase):
 43 |     """
 44 |     Test cases for the request_is_answerable function in the input_analyzer module.
 45 |     Tests are performed for both OpenAI and Gemma models.
 46 |     """
 47 | 
 48 |     conversation: ConversationManager
 49 | 
 50 |     @classmethod
 51 |     def setUpClass(cls):
 52 |         cls.model_types = [ModelType.GEMMA, ModelType.OPENAI]
 53 |         cls.conversation = ConversationManager()
 54 |         cls.conversation.append_user_message('Ich arbeite auf Arch Linux mit XFCE und Kitty. Es ist derzeit Montag, der 23. September 2024 um 14:00 Uhr. Die folgenden Desktop-Anwendungen sind geöffnet:\n\nxfce4-panel\nxfdesktop  Desktop\n/usr/share/pycharm/jbr//bin/java  003-ditana-assistant – input_analyzers_ai_test.py\n/usr/lib/chromium/chromium Claude -\n/usr/share/pycharm/jbr//bin/java  /media/stefan/data/Documents/git/my-projects/acrion/ditana/packages/003-ditana-assistant/src/input_analyzers_ai.py')
 55 | 
 56 |     def run_test_for_both_models(self, test_func):
 57 |         """Run the given test function for both OpenAI and Gemma models."""
 58 |         for model_type in self.model_types:
 59 |             with self.subTest(model=model_type):
 60 |                 Configuration.set(model_type=model_type)
 61 |                 print(f"---- Model type: {model_type}  ----")
 62 |                 test_func()
 63 | 
 64 |     def test_available_disk_space(self):
 65 |         """On Linux, test if dialog so far is categorized correctly to not help telling the available disk space."""
 66 | 
 67 |         def test_func():
 68 |             if context.get_os_info() == "Linux":
 69 |                 text = "Please check the available storage space on the drive containing the current directory."
 70 |                 self.assertFalse(input_analyzers_ai.request_is_answerable(text, self.conversation.messages))
 71 | 
 72 |         self.run_test_for_both_models(test_func)
 73 | 
 74 |     def test_summarize_running_apps(self):
 75 |         """On Linux, test if dialog so far is categorized correctly to help listing the open apps."""
 76 | 
 77 |         def test_func():
 78 |             if context.get_os_info() == "Linux":
 79 |                 text = "Kannst du die Informationen über meine laufenden Desktop-Anwendungen zusammenfassen?"
 80 |                 self.assertTrue(input_analyzers_ai.request_is_answerable(text, self.conversation.messages))
 81 | 
 82 |         self.run_test_for_both_models(test_func)
 83 | 
 84 |     def test_output_file(self):
 85 |         """On Linux, test if dialog so far is categorized correctly to not contain information about a file content"""
 86 | 
 87 |         def test_func():
 88 |             if context.get_os_info() == "Linux":
 89 |                 text = "Gib den Inhalt der Datei ~/test.txt aus."
 90 |                 self.assertFalse(input_analyzers_ai.request_is_answerable(text, self.conversation.messages))
 91 | 
 92 |         self.run_test_for_both_models(test_func)
 93 | 
 94 | 
 95 | class TestQueryCanBeSolvedWithTerminal(unittest.TestCase):
 96 |     """
 97 |     Test cases for the query_can_be_solved_with_terminal function in the input_analyzer module.
 98 |     Tests are performed for both OpenAI and Gemma models.
 99 |     """
100 | 
101 |     @classmethod
102 |     def setUpClass(cls):
103 |         cls.model_types = [ModelType.GEMMA, ModelType.OPENAI]
104 | 
105 |     def run_test_for_both_models(self, test_func):
106 |         """Run the given test function for both OpenAI and Gemma models."""
107 |         for model_type in self.model_types:
108 |             with self.subTest(model=model_type):
109 |                 Configuration.set(model_type=model_type)
110 |                 print(f"---- Model type: {model_type}  ----")
111 |                 test_func()
112 | 
113 |     def test_systemd_resolved(self):
114 |         """On Linux, test if question about systemd service is categorized correctly as a terminal task."""
115 | 
116 |         def test_func():
117 |             if context.get_os_info() == "Linux":
118 |                 text = "Is systemd-resolved running stably?"
119 |                 self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
120 |                 self.assertFalse(input_analyzers_ai.query_requires_changes_on_computer(text))
121 | 
122 |         self.run_test_for_both_models(test_func)
123 | 
124 |     def test_file_content_output(self):
125 |         """Test if question about outputting file content is categorized as a terminal task."""
126 | 
127 |         def test_func():
128 |             text = "Gib den Inhalt der Datei ~/test.txt aus"
129 |             self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
130 |             self.assertFalse(input_analyzers_ai.query_requires_changes_on_computer(text))
131 | 
132 |         self.run_test_for_both_models(test_func)
133 | 
134 |     def test_query_refers_to_a_computer(self):
135 |         """On Linux, test if question about the largest files in the current directory is correctly categorized as a terminal task."""
136 | 
137 |         def test_func():
138 |             if context.get_os_info() == "Linux":
139 |                 text = "What are the largest files in the current directory and below?"
140 |                 self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
141 |                 self.assertFalse(input_analyzers_ai.query_requires_changes_on_computer(text))
142 | 
143 |         self.run_test_for_both_models(test_func)
144 | 
145 |     def test_open_application(self):
146 |         """On Linux, test if a prompt to open libreoffice is correctly categorized as a terminal task."""
147 | 
148 |         def test_func():
149 |             if context.get_os_info() == "Linux":
150 |                 text = "Öffne libreoffice"
151 |                 self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
152 | 
153 |         self.run_test_for_both_models(test_func)
154 | 
155 |     def test_current_audio_device(self):
156 |         """Test if question about current audio device is categorized as a terminal task."""
157 | 
158 |         def test_func():
159 |             text = "What’s my current audio device?"
160 |             self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
161 |             self.assertFalse(input_analyzers_ai.query_requires_changes_on_computer(text))
162 | 
163 |         self.run_test_for_both_models(test_func)
164 | 
165 |     def test_replace_in_file(self):
166 |         """Test if task to replace text in a file is categorized as a terminal task."""
167 | 
168 |         def test_func():
169 |             text = "Replace all occurrences of the word 'sun' with 'moon' in the file ./project/test"
170 |             self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
171 |             self.assertTrue(input_analyzers_ai.query_requires_changes_on_computer(text))
172 | 
173 |         self.run_test_for_both_models(test_func)
174 | 
175 |     def test_cpu_consumption_query(self):
176 |         """Test if question about CPU consumption is categorized as a terminal task."""
177 | 
178 |         def test_func():
179 |             text = "Which process is consuming so much CPU time?"
180 |             self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
181 |             self.assertFalse(input_analyzers_ai.query_requires_changes_on_computer(text))
182 | 
183 |         self.run_test_for_both_models(test_func)
184 | 
185 |     def test_boot_log_errors_query(self):
186 |         """Test if question about errors in the boot log is categorized as a terminal task."""
187 | 
188 |         def test_func():
189 |             text = "List errors in the boot log."
190 |             self.assertTrue(input_analyzers_ai.query_refers_to_a_computer(text))
191 |             self.assertFalse(input_analyzers_ai.query_requires_changes_on_computer(text))
192 | 
193 |         self.run_test_for_both_models(test_func)
194 | 
195 |     def test_technical_non_terminal_query_blockchain(self):
196 |         """Test if a technical question about blockchain technology is correctly not categorized as a terminal task."""
197 | 
198 |         def test_func():
199 |             text = "Can you explain how the proof-of-stake consensus mechanism differs from proof-of-work in blockchain technology?"
200 |             self.assertFalse(input_analyzers_ai.query_refers_to_a_computer(text))
201 | 
202 |         self.run_test_for_both_models(test_func)
203 | 
204 |     def test_technical_non_terminal_query_quantum_computing(self):
205 |         """Test if a technical question about quantum computing is correctly not categorized as a terminal task."""
206 | 
207 |         def test_func():
208 |             text = "What are the potential implications of Shor’s algorithm for current encryption methods if large-scale quantum computers become available?"
209 |             self.assertFalse(input_analyzers_ai.query_refers_to_a_computer(text))
210 | 
211 |         self.run_test_for_both_models(test_func)
212 | 
213 |     def test_technical_non_terminal_query_ai_ethics(self):
214 |         """Test if a technical question about AI ethics is correctly not categorized as a terminal task."""
215 | 
216 |         def test_func():
217 |             text = "How can we implement fairness constraints in machine learning models to mitigate algorithmic bias without significantly compromising model performance?"
218 |             self.assertFalse(input_analyzers_ai.query_refers_to_a_computer(text))
219 | 
220 |         self.run_test_for_both_models(test_func)
221 | 
222 | 
223 | if __name__ == '__main__':
224 |     unittest.main()
225 | 


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_engine/input_analyzers_regex_code_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
  2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
  3 | #
  4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
  5 | #
  6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
  7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
  8 | 
  9 | # AGPL licensing:
 10 | #
 11 | # Ditana Assistant is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU Affero General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # Ditana Assistant is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU Affero General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU Affero General Public License
 22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
 23 | 
 24 | """
 25 | This module contains unit tests for the input_analyzers_regex module of the Ditana Assistant.
 26 | It tests various functions related to code detection and input analysis.
 27 | """
 28 | 
 29 | import unittest
 30 | from typing import Tuple
 31 | 
 32 | from ditana_assistant.engine import input_analyzers_regex
 33 | from ditana_assistant.tests.test_engine.code_detection_test_cases import get_test_cases
 34 | 
 35 | 
 36 | def is_likely_code_wrapper(text: str) -> Tuple[bool, float]:
 37 |     """Wrapper for is_likely_code to capture the score"""
 38 |     result, score = input_analyzers_regex.is_likely_code(text)
 39 |     return result, score
 40 | 
 41 | 
 42 | class TestIsLikelyCode(unittest.TestCase):
 43 |     """
 44 |     Test cases for the is_likely_code function in the input_analyzer module.
 45 |     """
 46 | 
 47 |     test_results = []  # Class variable to store all test results
 48 | 
 49 |     def run_test(self, description: str, text: str, expected_result: bool):
 50 |         """Run a single test case and store the result"""
 51 |         result, score = is_likely_code_wrapper(text)
 52 |         self.__class__.test_results.append((result, score, text, expected_result))
 53 |         if expected_result:
 54 |             self.assertTrue(result, f"Failed: {description}")
 55 |         else:
 56 |             self.assertFalse(result, f"Failed: {description}")
 57 | 
 58 |     def test_code_detection(self):
 59 |         """Run all test cases from the imported module"""
 60 |         test_cases = get_test_cases()
 61 |         for description, text, expected_result in test_cases:
 62 |             with self.subTest(description=description):
 63 |                 self.run_test(description, text, expected_result)
 64 | 
 65 |     @classmethod
 66 |     def tearDownClass(cls):
 67 |         """Print the analysis results after all tests have run"""
 68 |         print("\nAll tests completed. Analyzing results:")
 69 |         true_scores = []
 70 |         false_scores = []
 71 | 
 72 |         for result, score, _, expected_result in cls.test_results:
 73 |             if expected_result:
 74 |                 true_scores.append(score)
 75 |             else:
 76 |                 false_scores.append(score)
 77 | 
 78 |         if true_scores and false_scores:
 79 |             min_true_score = min(true_scores)
 80 |             max_false_score = max(false_scores)
 81 |             difference = min_true_score - max_false_score
 82 | 
 83 |             print(f"Minimum score for 'true' results: {min_true_score}")
 84 |             print(f"Maximum score for 'false' results: {max_false_score}")
 85 | 
 86 |             if difference < 0:
 87 |                 print("Optimum threshold: Some tests failed, calculation does not make sense.")
 88 |             else:
 89 |                 print(f"Optimum threshold: {(min_true_score + max_false_score) / 2}")
 90 | 
 91 |             print(f"Difference between min true and max false: {difference}")
 92 |         else:
 93 |             print("Not enough data to calculate the difference.")
 94 | 
 95 |         print("\nDetailed results:")
 96 |         for result, score, text, expected_result in cls.test_results:
 97 |             print(f"Text: {text}")
 98 |             print(f"Expected result: {'Code' if expected_result else 'Not code'}")
 99 |             print(f"Actual result: {'Code' if result else 'Not code'}")
100 |             print(f"Score: {score}")
101 |             print(f"Test {'passed' if result == expected_result else 'failed'}\n")
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     unittest.main()
106 | 


--------------------------------------------------------------------------------
/ditana_assistant/tests/test_engine/text_processors_regex_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024, 2025 acrion innovations GmbH
 2 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
 3 | #
 4 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
 5 | #
 6 | # Ditana Assistant is offered under a commercial and under the AGPL license.
 7 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
 8 | 
 9 | # AGPL licensing:
10 | #
11 | # Ditana Assistant is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # Ditana Assistant is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
23 | 
24 | """
25 | This module contains unit tests for the text_processors_regex module of the Ditana Assistant.
26 | """
27 | 
28 | import unittest
29 | 
30 | from ditana_assistant.engine import text_processors_regex
31 | 
32 | 
33 | class TestRegex(unittest.TestCase):
34 |     """
35 |     Test various input analyzer functions.
36 |     """
37 |     def test_remove_works_and_phrases(self) -> None:
38 |         """
39 |         Simple case of remove_words_and_phrases. Just replace two words with one.
40 |         """
41 |         text = 'Ich bin Sherlock Holmes, der berühmte Detektiv. Und ich weiß noch viel mehr über Sie, als Sie sich vorstellen können. Bitte erzählen Sie mir mehr über Ihre Reise aus Baskerville.'
42 |         result = text_processors_regex.remove_words_and_phrases(text, "Sherlock Holmes", "Ditana")
43 |         self.assertEqual("Ich bin Ditana, der berühmte Detektiv. Und ich weiß noch viel mehr über Sie, als Sie sich vorstellen können. Bitte erzählen Sie mir mehr über Ihre Reise aus Baskerville.", result)
44 | 


--------------------------------------------------------------------------------
/evaluate-ditana-assistant:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Copyright (c) 2024, 2025 acrion innovations GmbH
 4 | # Authors: Stefan Zipproth, s.zipproth@acrion.ch
 5 | #
 6 | # This file is part of Ditana Assistant, see https://github.com/acrion/ditana-assistant and https://ditana.org/assistant
 7 | #
 8 | # Ditana Assistant is offered under a commercial and under the AGPL license.
 9 | # For commercial licensing, contact us at https://acrion.ch/sales. For AGPL licensing, see below.
10 | 
11 | # AGPL licensing:
12 | #
13 | # Ditana Assistant is free software: you can redistribute it and/or modify
14 | # it under the terms of the GNU Affero General Public License as published by
15 | # the Free Software Foundation, either version 3 of the License, or
16 | # (at your option) any later version.
17 | #
18 | # Ditana Assistant is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 | # GNU Affero General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Affero General Public License
24 | # along with Ditana Assistant. If not, see <https://www.gnu.org/licenses/>.
25 | 
26 | """
27 | This is the main entry point for the Ditana Assistant.
28 | It sets up the necessary paths and imports for running the assistant.
29 | """
30 | 
31 | from ditana_assistant.benchmark import __main__
32 | 
33 | if __name__ == '__main__':
34 |     __main__.main()
35 | 


--------------------------------------------------------------------------------
/packaging/arch/.SRCINFO:
--------------------------------------------------------------------------------
 1 | pkgbase = ditana-assistant
 2 | 	pkgdesc = AI-powered assistant with GUI, capable of generating and executing terminal commands, offering optional contextual augmentation and Wolfram|Alpha integration
 3 | 	pkgver = 0.90.13
 4 | 	pkgrel = 1
 5 | 	url = https://github.com/acrion/ditana-assistant
 6 | 	arch = any
 7 | 	license = AGPL-3.0-or-later AND PSF-2.0 AND BSD-3-Clause AND Apache-2.0 AND MIT AND GPL
 8 | 	makedepends = gzip
 9 | 	makedepends = python-poetry
10 | 	depends = python
11 | 	depends = python-pywebview
12 | 	depends = python-requests
13 | 	depends = python-platformdirs
14 | 	depends = wmctrl
15 | 	source = file:///media/stefan/data/Documents/git/my-projects/acrion/ditana/packages/003-ditana-assistant/packaging/arch/../../ditana-assistant.desktop
16 | 	source = file:///media/stefan/data/Documents/git/my-projects/acrion/ditana/packages/003-ditana-assistant/packaging/arch/../../ditana-assistant.png
17 | 	source = file:///media/stefan/data/Documents/git/my-projects/acrion/ditana/packages/003-ditana-assistant/packaging/arch/../../ditana-assistant.1
18 | 	source = file:///media/stefan/data/Documents/git/my-projects/acrion/ditana/packages/003-ditana-assistant/packaging/arch/../../LICENSE
19 | 	source = file:///wheel-sources.tar.gz
20 | 	sha256sums = SKIP
21 | 	sha256sums = SKIP
22 | 	sha256sums = SKIP
23 | 	sha256sums = SKIP
24 | 	sha256sums = SKIP
25 | 
26 | pkgname = ditana-assistant
27 | 


--------------------------------------------------------------------------------
/packaging/arch/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !PKGBUILD
3 | !*.install
4 | !pre-build-hook
5 | !.gitignore
6 | !.SRCINFO
7 | 


--------------------------------------------------------------------------------
/packaging/arch/PKGBUILD:
--------------------------------------------------------------------------------
 1 | # Maintainer: Stefan Zipproth <s.zipproth@ditana.org>
 2 | # Author: Stefan Zipproth <s.zipproth@ditana.org>
 3 | 
 4 | pkgname=ditana-assistant
 5 | pkgver=$(grep '^version\s*=\s*' ../../pyproject.toml | sed 's/^version\s*=\s*"\(.*\)"$/\1/')
 6 | pkgrel=1
 7 | pkgdesc="$(grep '^description\s*=\s*' ../../pyproject.toml | sed 's/^description\s*=\s*"\(.*\)"$/\1/')"
 8 | arch=(any)
 9 | url="https://github.com/acrion/ditana-assistant"
10 | license=('AGPL-3.0-or-later AND PSF-2.0 AND BSD-3-Clause AND Apache-2.0 AND MIT AND GPL')
11 | conflicts=()
12 | depends=(python python-pywebview python-requests python-platformdirs wmctrl)
13 | makedepends=(gzip python-poetry)
14 | source=(
15 |     "file://$(pwd)/../../ditana-assistant.desktop"
16 |     "file://$(pwd)/../../ditana-assistant.png"
17 |     "file://$(pwd)/../../ditana-assistant.1"
18 |     "file://$(pwd)/../../LICENSE"
19 |     "file://${pwd}/wheel-sources.tar.gz"
20 | )
21 | 
22 | sha256sums=(
23 |     'SKIP'
24 |     'SKIP'
25 |     'SKIP'
26 |     'SKIP'
27 |     'SKIP'
28 | )
29 | 
30 | build() {
31 |     tar --no-same-owner --preserve-permissions -xzf "$srcdir/wheel-sources.tar.gz" -C ./
32 |     echo "Current directory : $(pwd)"
33 |     poetry build -f wheel
34 |     echo "Built wheel:"
35 |     ls dist/*.whl
36 | }
37 | 
38 | package() {
39 |     # Install the wheel into the package directory
40 |     python -m installer --destdir="$pkgdir" dist/*.whl
41 | 
42 |     # Install additional files
43 |     install -Dm644 "ditana-assistant.desktop" "$pkgdir/usr/share/applications/ditana-assistant.desktop"
44 |     install -Dm644 "ditana-assistant.png"     "$pkgdir/usr/share/icons/hicolor/256x256/apps/ditana-assistant.png"
45 |     install -Dm644 "ditana-assistant.1"       "$pkgdir/usr/share/man/man1/ditana-assistant.1"
46 |     install -Dm644 "LICENSE"             "$pkgdir/usr/share/licenses/$pkgname/LICENSE"
47 | }
48 | 


--------------------------------------------------------------------------------
/packaging/arch/pre-build-hook:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | target_dir=$(pwd)
 4 | cd ../..
 5 | tar -czvf "$target_dir/wheel-sources.tar.gz" \
 6 |     pyproject.toml \
 7 |     poetry.lock \
 8 |     ditana_assistant/ \
 9 |     ditana-assistant \
10 |     README.md
11 | 


--------------------------------------------------------------------------------
/pastime-sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/pastime-sample.png


--------------------------------------------------------------------------------
/wolfram_alpha_short_answers_api_key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acrion/ditana-assistant/f813ff3e1690882719fd9ccea1a7005dbc9068fb/wolfram_alpha_short_answers_api_key.png


--------------------------------------------------------------------------------