├── Dockerfile
├── LICENSE
├── README.md
├── agents
    ├── base.py
    ├── code_exec_agent.py
    ├── default_agent.py
    ├── pdf_file_agent.py
    ├── pdf_link_agent.py
    └── selector.py
├── config.py
├── llm
    └── phi_wrapper.py
├── main.py
├── requirements.txt
├── sanitizer
    └── prompt_sanitizer.py
├── society_mind
    └── autogen_society.py
├── templates
    ├── code_instruction.txt
    ├── critic_instruction.txt
    ├── default_instruction.txt
    ├── finalizer_instruction.txt
    ├── generator_instruction.txt
    └── pdf_instruction.txt
└── utils
    ├── cache.py
    ├── docker_sandbox.py
    ├── exceptions.py
    ├── io.py
    ├── logger.py
    └── pdf_utils.py


/Dockerfile:
--------------------------------------------------------------------------------
  1 | #
  2 | # NOTE: THIS DOCKERFILE IS GENERATED VIA "apply-templates.sh"
  3 | #
  4 | # PLEASE DO NOT EDIT IT DIRECTLY.
  5 | #
  6 | 
  7 | FROM debian:bookworm-slim
  8 | 
  9 | # ensure local python is preferred over distribution python
 10 | ENV PATH /usr/local/bin:$PATH
 11 | 
 12 | # runtime dependencies
 13 | RUN set -eux; \
 14 | 	apt-get update; \
 15 | 	apt-get install -y --no-install-recommends \
 16 | 		ca-certificates \
 17 | 		netbase \
 18 | 		tzdata \
 19 | 	; \
 20 | 	rm -rf /var/lib/apt/lists/*
 21 | 
 22 | ENV PYTHON_VERSION 3.14.0a7
 23 | ENV PYTHON_SHA256 71adbcec3ac9edf93308e55cfb4184f2eb4b16fda2bb0a5a382929ed29c8386d
 24 | 
 25 | RUN set -eux; \
 26 | 	\
 27 | 	savedAptMark="$(apt-mark showmanual)"; \
 28 | 	apt-get update; \
 29 | 	apt-get install -y --no-install-recommends \
 30 | 		dpkg-dev \
 31 | 		gcc \
 32 | 		gnupg \
 33 | 		libbluetooth-dev \
 34 | 		libbz2-dev \
 35 | 		libc6-dev \
 36 | 		libdb-dev \
 37 | 		libffi-dev \
 38 | 		libgdbm-dev \
 39 | 		liblzma-dev \
 40 | 		libncursesw5-dev \
 41 | 		libreadline-dev \
 42 | 		libsqlite3-dev \
 43 | 		libssl-dev \
 44 | 		make \
 45 | 		tk-dev \
 46 | 		uuid-dev \
 47 | 		wget \
 48 | 		xz-utils \
 49 | 		zlib1g-dev \
 50 | 	; \
 51 | 	\
 52 | 	wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz"; \
 53 | 	echo "$PYTHON_SHA256 *python.tar.xz" | sha256sum -c -; \
 54 | 	mkdir -p /usr/src/python; \
 55 | 	tar --extract --directory /usr/src/python --strip-components=1 --file python.tar.xz; \
 56 | 	rm python.tar.xz; \
 57 | 	\
 58 | 	cd /usr/src/python; \
 59 | 	gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)"; \
 60 | 	./configure \
 61 | 		--build="$gnuArch" \
 62 | 		--enable-loadable-sqlite-extensions \
 63 | 		--enable-optimizations \
 64 | 		--enable-option-checking=fatal \
 65 | 		--enable-shared \
 66 | 		--with-lto \
 67 | 		--with-ensurepip \
 68 | 	; \
 69 | 	nproc="$(nproc)"; \
 70 | 	EXTRA_CFLAGS="$(dpkg-buildflags --get CFLAGS)"; \
 71 | 	LDFLAGS="$(dpkg-buildflags --get LDFLAGS)"; \
 72 | 	LDFLAGS="${LDFLAGS:--Wl},--strip-all"; \
 73 | 		arch="$(dpkg --print-architecture)"; arch="${arch##*-}"; \
 74 | # https://docs.python.org/3.12/howto/perf_profiling.html
 75 | # https://github.com/docker-library/python/pull/1000#issuecomment-2597021615
 76 | 		case "$arch" in \
 77 | 			amd64|arm64) \
 78 | 				# only add "-mno-omit-leaf" on arches that support it
 79 | 				# https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/x86-Options.html#index-momit-leaf-frame-pointer-2
 80 | 				# https://gcc.gnu.org/onlinedocs/gcc-14.2.0/gcc/AArch64-Options.html#index-momit-leaf-frame-pointer
 81 | 				EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer"; \
 82 | 				;; \
 83 | 			i386) \
 84 | 				# don't enable frame-pointers on 32bit x86 due to performance drop.
 85 | 				;; \
 86 | 			*) \
 87 | 				# other arches don't support "-mno-omit-leaf"
 88 | 				EXTRA_CFLAGS="${EXTRA_CFLAGS:-} -fno-omit-frame-pointer"; \
 89 | 				;; \
 90 | 		esac; \
 91 | 	make -j "$nproc" \
 92 | 		"EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
 93 | 		"LDFLAGS=${LDFLAGS:-}" \
 94 | 	; \
 95 | # https://github.com/docker-library/python/issues/784
 96 | # prevent accidental usage of a system installed libpython of the same version
 97 | 	rm python; \
 98 | 	make -j "$nproc" \
 99 | 		"EXTRA_CFLAGS=${EXTRA_CFLAGS:-}" \
100 | 		"LDFLAGS=${LDFLAGS:--Wl},-rpath='\$\$ORIGIN/../lib'" \
101 | 		python \
102 | 	; \
103 | 	make install; \
104 | 	\
105 | 	cd /; \
106 | 	rm -rf /usr/src/python; \
107 | 	\
108 | 	find /usr/local -depth \
109 | 		\( \
110 | 			\( -type d -a \( -name test -o -name tests -o -name idle_test \) \) \
111 | 			-o \( -type f -a \( -name '*.pyc' -o -name '*.pyo' -o -name 'libpython*.a' \) \) \
112 | 		\) -exec rm -rf '{}' + \
113 | 	; \
114 | 	\
115 | 	ldconfig; \
116 | 	\
117 | 	apt-mark auto '.*' > /dev/null; \
118 | 	apt-mark manual $savedAptMark; \
119 | 	find /usr/local -type f -executable -not \( -name '*tkinter*' \) -exec ldd '{}' ';' \
120 | 		| awk '/=>/ { so = $(NF-1); if (index(so, "/usr/local/") == 1) { next }; gsub("^/(usr/)?", "", so); printf "*%s\n", so }' \
121 | 		| sort -u \
122 | 		| xargs -r dpkg-query --search \
123 | 		| cut -d: -f1 \
124 | 		| sort -u \
125 | 		| xargs -r apt-mark manual \
126 | 	; \
127 | 	apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \
128 | 	rm -rf /var/lib/apt/lists/*; \
129 | 	\
130 | 	export PYTHONDONTWRITEBYTECODE=1; \
131 | 	python3 --version; \
132 | 	pip3 --version
133 | 
134 | # make some useful symlinks that are expected to exist ("/usr/local/bin/python" and friends)
135 | RUN set -eux; \
136 | 	for src in idle3 pip3 pydoc3 python3 python3-config; do \
137 | 		dst="$(echo "$src" | tr -d 3)"; \
138 | 		[ -s "/usr/local/bin/$src" ]; \
139 | 		[ ! -e "/usr/local/bin/$dst" ]; \
140 | 		ln -svT "$src" "/usr/local/bin/$dst"; \
141 | 	done
142 | 
143 | CMD ["python3"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2025 by Enigma
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🧠 MindForсe: Intelligent Context-Aware Assistant [beta-version]
 2 | 
 3 | **Умный ассистент с поддержкой контекста, анализом документов и безопасным выполнением кода**
 4 | 
 5 | ---
 6 | 
 7 | ## 🌟 Особенности
 8 | 
 9 | - 📄 **Анализ PDF** (по ссылкам и локальным файлам)
10 | - ⌨️ **Безопасное выполнение кода** в Docker-песочнице
11 | - 🤖 **Итеративное улучшение ответов** (Society of Mind)
12 | - 🔒 **Защита от инъекций** с помощью BERT-модели
13 | - 🧩 **Модульная архитектура** с переиспользуемыми агентами
14 | - ⚡ **Умное кэширование** с учетом версий и данных
15 | 
16 | ---
17 | 
18 | ## 🏗 Архитектура системы
19 | 
20 | ```mermaid
21 | graph TD
22 |     A[Пользовательский запрос] --> B{Тип контента}
23 |     B -->|PDF-ссылка| C[PDFLinkAgent]
24 |     B -->|Код| D[CodeExecutionAgent]
25 |     B -->|Локальный PDF| E[PDFFileAgent]
26 |     B -->|Текст| F[DefaultAgent]
27 |     C --> G[Извлечение текста]
28 |     D --> H[Запуск в Docker]
29 |     E --> G
30 |     G --> I[Семантический поиск]
31 |     H --> J[Сбор результатов]
32 |     I --> K[Society of Mind]
33 |     J --> K
34 |     K --> L[Генерация ответа]
35 |     L --> M[Кэширование]
36 |     M --> N[Пользователь]
37 | 


--------------------------------------------------------------------------------
/agents/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, Dict
 3 | from utils.exceptions import ProcessingError
 4 | 
 5 | class Agent(ABC):
 6 |     def __init__(self, config: Dict[str, Any] = None):
 7 |         self.config = config or {}
 8 |         self._validate_config()
 9 |         
10 |     def _validate_config(self):
11 |         """Проверка конфигурации агента"""
12 |         required_params = self.required_params()
13 |         for param in required_params:
14 |             if param not in self.config:
15 |                 raise ProcessingError(f"Missing required parameter: {param}")
16 | 
17 |     @staticmethod
18 |     @abstractmethod
19 |     def required_params() -> list:
20 |         """Список обязательных параметров конфигурации"""
21 |         return []
22 | 
23 |     @abstractmethod
24 |     async def execute(self, input_data: str) -> str:
25 |         """Основной метод выполнения задачи"""
26 |         pass
27 | 
28 |     def __repr__(self):
29 |         return f"<{self.__class__.__name__} config={self.config}>"


--------------------------------------------------------------------------------
/agents/code_exec_agent.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Dict, Any
 3 | from .base import Agent
 4 | from utils.docker_sandbox import DockerSandbox
 5 | from utils.exceptions import (CodeExecutionError, ResourceLimitExceeded,
 6 |                          DockerSecurityException)
 7 | 
 8 | class CodeExecutionAgent(Agent):
 9 |     MAX_OUTPUT_LENGTH = 10000
10 |     BLACKLIST_PATTERNS = [
11 |         r"os\.system",
12 |         r"subprocess\.",
13 |         r"open\(",
14 |         r"import\s+(os|sys|subprocess)",
15 |         r"__import__",
16 |         r"eval\(",
17 |         r"exec\(",
18 |         r"pickle\.",
19 |         r"shutil\.",
20 |         r"socket\."
21 |     ]
22 | 
23 |     @staticmethod
24 |     def required_params():
25 |         return ["docker_config"]
26 | 
27 |     def __init__(self, config: Dict[str, Any]):
28 |         super().__init__(config)
29 |         self.sandbox = DockerSandbox(config["docker_config"])
30 |         
31 |     async def execute(self, input_data: str) -> str:
32 |         """Безопасное выполнение кода"""
33 |         try:
34 |             self._validate_code(input_data)
35 |             result = await self.sandbox.execute(input_data)
36 |             return self._sanitize_output(result)
37 |         except DockerSecurityException as e:
38 |             raise CodeExecutionError(f"Security violation: {str(e)}") from e
39 |         except Exception as e:
40 |             raise CodeExecutionError(str(e)) from e
41 | 
42 |     def _validate_code(self, code: str):
43 |         """Проверка кода на опасные паттерны"""
44 |         for pattern in self.BLACKLIST_PATTERNS:
45 |             if re.search(pattern, code):
46 |                 raise DockerSecurityException(f"Blocked pattern: {pattern}")
47 | 
48 |     def _sanitize_output(self, output: str) -> str:
49 |         """Санобработка вывода"""
50 |         if len(output) > self.MAX_OUTPUT_LENGTH:
51 |             raise ResourceLimitExceeded("Output too large")
52 |             
53 |         # Удаление чувствительной информации
54 |         cleaned = re.sub(r"(API_KEY|SECRET|PASSWORD)\s*=\s*'.*?'", "[REDACTED]", output)
55 |         return cleaned[:self.MAX_OUTPUT_LENGTH]


--------------------------------------------------------------------------------
/agents/default_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any
 2 | from .base import Agent
 3 | from utils.exceptions import ProcessingError
 4 | 
 5 | class DefaultAgent(Agent):
 6 |     async def execute(self, input_data: str) -> str:
 7 |         """Дефолтная обработка запроса"""
 8 |         try:
 9 |             return input_data  
10 |         except Exception as e:
11 |             raise ProcessingError(f"Default processing failed: {str(e)}") from e


--------------------------------------------------------------------------------
/agents/pdf_file_agent.py:
--------------------------------------------------------------------------------
 1 | import fitz
 2 | import os
 3 | from pathlib import Path
 4 | from typing import Dict, Any
 5 | from .base import Agent
 6 | from utils.exceptions import PDFProcessingError, ResourceLimitExceeded
 7 | 
 8 | class PDFFileAgent(Agent):
 9 |     MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB
10 |     ALLOWED_MIME_TYPES = ["application/pdf"]
11 |     
12 |     @staticmethod
13 |     def required_params():
14 |         return ["upload_dir", "embedding_model"]
15 | 
16 |     def __init__(self, config: Dict[str, Any]):
17 |         super().__init__(config)
18 |         self.upload_dir = Path(config["upload_dir"])
19 |         self.embedding_model = config["embedding_model"]
20 |         self._validate_upload_dir()
21 | 
22 |     def _validate_upload_dir(self):
23 |         """Проверка директории для загрузок"""
24 |         if not self.upload_dir.exists():
25 |             self.upload_dir.mkdir(parents=True)
26 |         if not os.access(self.upload_dir, os.W_OK):
27 |             raise PDFProcessingError("Upload directory not writable")
28 | 
29 |     async def execute(self, input_data: str) -> str:
30 |         """Обработка загруженного PDF"""
31 |         try:
32 |             file_path = self._validate_file(input_data)
33 |             text = self._parse_pdf(file_path)
34 |             return self._find_relevant_sections(text, input_data)
35 |         except Exception as e:
36 |             raise PDFProcessingError(str(e)) from e
37 | 
38 |     def _validate_file(self, input_data: str) -> Path:
39 |         """Валидация загруженного файла"""
40 |         file_match = re.search(r"<uploaded_file>(.+?)</uploaded_file>", input_data)
41 |         if not file_match:
42 |             raise PDFProcessingError("Invalid file format")
43 |             
44 |         file_path = self.upload_dir / file_match.group(1)
45 |         if not file_path.exists():
46 |             raise PDFProcessingError("File not found")
47 |             
48 |         if file_path.stat().st_size > self.MAX_FILE_SIZE:
49 |             raise ResourceLimitExceeded("File size exceeds limit")
50 |             
51 |         return file_path
52 | 
53 |     def _parse_pdf(self, file_path: Path) -> str:
54 |         """Парсинг PDF файла"""
55 |         try:
56 |             doc = fitz.open(file_path)
57 |             return "\n".join([page.get_text() for page in doc])
58 |         except fitz.FileDataError:
59 |             raise PDFProcessingError("Invalid PDF file structure")
60 |         except Exception as e:
61 |             raise PDFProcessingError(f"PDF parsing error: {str(e)}")
62 | 
63 |     def _find_relevant_sections(self, text: str, query: str) -> str:
64 |         """Поиск релевантных разделов"""
65 |         try:
66 |             chunks = text.split("\n\n")
67 |             query_embedding = self.embedding_model.encode(query)
68 |             doc_embeddings = self.embedding_model.encode(chunks)
69 |             
70 |             scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
71 |             top_indices = scores.argsort(descending=True)[:5]
72 |             
73 |             return "\n".join([chunks[i] for i in top_indices])
74 |         except Exception as e:
75 |             raise PDFProcessingError(f"Relevance search failed: {str(e)}")


--------------------------------------------------------------------------------
/agents/pdf_link_agent.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | import fitz
 4 | from io import BytesIO
 5 | from typing import Optional
 6 | from sentence_transformers import util
 7 | from .base import Agent
 8 | from utils.exceptions import (PDFProcessingError, NetworkError, 
 9 |                          ResourceLimitExceeded, SecurityException)
10 | 
11 | class PDFLinkAgent(Agent):
12 |     MAX_PDF_SIZE = 10 * 1024 * 1024  # 10MB
13 |     TIMEOUT = 15
14 |     
15 |     @staticmethod
16 |     def required_params():
17 |         return ["embedding_model"]
18 | 
19 |     def __init__(self, config: Dict[str, Any]):
20 |         super().__init__(config)
21 |         self.embedding_model = config["embedding_model"]
22 |         
23 |     async def execute(self, input_data: str) -> str:
24 |         """Основной метод обработки PDF по ссылке"""
25 |         try:
26 |             url = self._extract_url(input_data)
27 |             content = await self._download_pdf(url)
28 |             text = self._parse_pdf(content)
29 |             return self._find_relevant_sections(text, input_data)
30 |         except Exception as e:
31 |             raise PDFProcessingError(str(e)) from e
32 | 
33 |     def _extract_url(self, text: str) -> str:
34 |         """Извлечение PDF URL из текста"""
35 |         match = re.search(r'(https?://\S+\.pdf)', text)
36 |         if not match:
37 |             raise PDFProcessingError("No valid PDF URL found")
38 |         return match.group(1)
39 | 
40 |     async def _download_pdf(self, url: str) -> bytes:
41 |         """Безопасная загрузка PDF"""
42 |         try:
43 |             async with requests.Session() as session:
44 |                 response = await session.get(
45 |                     url,
46 |                     stream=True,
47 |                     timeout=self.TIMEOUT,
48 |                     headers={"User-Agent": "Mozilla/5.0"}
49 |                 )
50 |                 response.raise_for_status()
51 | 
52 |                 if int(response.headers.get('Content-Length', 0)) > self.MAX_PDF_SIZE:
53 |                     raise ResourceLimitExceeded("PDF file size exceeds limit")
54 | 
55 |                 return await response.content.read()
56 | 
57 |         except requests.RequestException as e:
58 |             raise NetworkError(f"Failed to download PDF: {str(e)}") from e
59 | 
60 |     def _parse_pdf(self, content: bytes) -> str:
61 |         """Парсинг PDF контента"""
62 |         try:
63 |             with BytesIO(content) as buffer:
64 |                 doc = fitz.open("pdf", buffer)
65 |                 return "\n".join([page.get_text() for page in doc])
66 |         except fitz.FileDataError:
67 |             raise PDFProcessingError("Invalid PDF file structure")
68 |         except Exception as e:
69 |             raise PDFProcessingError(f"PDF parsing error: {str(e)}")
70 | 
71 |     def _find_relevant_sections(self, text: str, query: str) -> str:
72 |         """Поиск релевантных разделов"""
73 |         try:
74 |             chunks = text.split("\n\n")
75 |             query_embedding = self.embedding_model.encode(query)
76 |             doc_embeddings = self.embedding_model.encode(chunks)
77 |             
78 |             scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
79 |             top_indices = scores.argsort(descending=True)[:5]
80 |             
81 |             return "\n".join([chunks[i] for i in top_indices])
82 |         except Exception as e:
83 |             raise PDFProcessingError(f"Relevance search failed: {str(e)}")


--------------------------------------------------------------------------------
/agents/selector.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import mimetypes
 3 | from typing import Optional
 4 | from .base import Agent
 5 | from .pdf_link_agent import PDFLinkAgent
 6 | from .code_exec_agent import CodeExecutionAgent
 7 | from .pdf_file_agent import PDFFileAgent
 8 | from .default_agent import DefaultAgent
 9 | from utils.exceptions import AgentSelectionError, SecurityException
10 | 
11 | class AgentSelector:
12 |     def __init__(self):
13 |         self.code_patterns = [
14 |             r'(def\s+\w+\s*\(.*\):)',
15 |             r'(class\s+\w+)',
16 |             r'(import\s+\w+)',
17 |             r'(print\(.*\))',
18 |             r'(\#\!.*python)'
19 |         ]
20 |         self.url_pattern = r'(https?:\/\/(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\.pdf)'
21 | 
22 |     def select_agent(self, prompt: str) -> Agent:
23 |         try:
24 |             # Проверка безопасности перед выбором агента
25 |             self._check_prompt_safety(prompt)
26 |             
27 |             # Определение типа задачи
28 |             if self._is_pdf_url(prompt):
29 |                 return PDFLinkAgent()
30 |                 
31 |             if self._is_code(prompt):
32 |                 return CodeExecutionAgent()
33 |                 
34 |             if self._has_uploaded_file(prompt):
35 |                 return self._handle_file_upload(prompt)
36 |                 
37 |             return DefaultAgent()
38 |             
39 |         except Exception as e:
40 |             raise AgentSelectionError(f"Agent selection failed: {str(e)}")
41 | 
42 |     def _check_prompt_safety(self, prompt: str):
43 |         forbidden_patterns = [
44 |             r'(\/etc\/passwd)',
45 |             r'(file:\/\/)',
46 |             r'(localhost:\d+)'
47 |         ]
48 |         for pattern in forbidden_patterns:
49 |             if re.search(pattern, prompt):
50 |                 raise SecurityException(f"Dangerous pattern detected: {pattern}")
51 | 
52 |     def _is_pdf_url(self, text: str) -> bool:
53 |         return bool(re.search(self.url_pattern, text))
54 | 
55 |     def _is_code(self, text: str) -> bool:
56 |         return any(re.search(pattern, text) for pattern in self.code_patterns)
57 | 
58 |     def _has_uploaded_file(self, text: str) -> bool:
59 |         return '<uploaded_file>' in text
60 | 
61 |     def _handle_file_upload(self, prompt: str) -> Agent:
62 |         file_info = self._parse_upload(prompt)
63 |         mime_type, _ = mimetypes.guess_type(file_info['name'])
64 |         
65 |         if mime_type == 'application/pdf':
66 |             return PDFFileAgent()
67 |         elif mime_type in ['text/plain', 'text/x-python']:
68 |             return CodeExecutionAgent()
69 |             
70 |         raise AgentSelectionError(f"Unsupported file type: {mime_type}")
71 | 
72 |     def _parse_upload(self, prompt: str) -> dict:
73 |         match = re.search(r'<uploaded_file>(?P<name>.+?)</uploaded_file>', prompt)
74 |         if not match:
75 |             raise AgentSelectionError("Invalid file upload format")
76 |         return {'name': match.group('name')}


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LastGuardian89/Mind-Force/6fa3ed29f17b451cdb2f76d925a9efc4ec2f72c9/config.py


--------------------------------------------------------------------------------
/llm/phi_wrapper.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer, AutoModelForCausalLM
 2 | import torch
 3 | import os
 4 | 
 5 | class PhiLLM:
 6 |     def __init__(self, model_id="microsoft/phi-2"):
 7 |         self.tokenizer = AutoTokenizer.from_pretrained(model_id)
 8 |         self.model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16).cuda()
 9 | 
10 |     def _load_template(self, template_name):
11 |         path = os.path.join("templates", template_name)
12 |         with open(path) as f:
13 |             return f.read()
14 | 
15 |     def generate(self, prompt, context="", mode="auto"):
16 |         if mode == "pdf":
17 |             template = self._load_template("pdf_instruction.txt")
18 |             filled = template.format(context=context, question=prompt)
19 |         elif mode == "code":
20 |             template = self._load_template("code_instruction.txt")
21 |             filled = template.format(code=prompt, question="What does this code do?")
22 |         else:
23 |             template = self._load_template("default_instruction.txt")
24 |             filled = template.format(question=prompt)
25 | 
26 |         inputs = self.tokenizer(filled, return_tensors="pt").to(self.model.device)
27 |         outputs = self.model.generate(**inputs, max_new_tokens=300)
28 |         return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
29 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from typing import Optional
 3 | from agents.selector import AgentSelector
 4 | from .llm.phi_wrapper import PhiLLM
 5 | from .society_mind.autogen_society import SocietyMind
 6 | from .sanitizer.prompt_sanitizer import SanitizationPipeline
 7 | from .utils.io import get_input_data, send_response, log_request
 8 | from .utils.cache import check_cache, save_cache
 9 | from .utils.logger import setup_logging, RequestLogger
10 | from utils.exceptions import (SecurityException, ProcessingError, 
11 |                         NetworkError, ResourceLimitExceeded)
12 | 
13 | class AIOrchestrator:
14 |     def __init__(self):
15 |         setup_logging()
16 |         self.logger = RequestLogger()
17 |         self.selector = AgentSelector()
18 |         self.llm = PhiLLM()
19 |         self.society = SocietyMind(self.llm)
20 |         self.cache_enabled = True
21 | 
22 |     async def process_request(self, user_input: str) -> str:
23 |         try:
24 |             # Шаг 1: Санитайзинг ввода
25 |             clean_input = await SanitizationPipeline.process(user_input)
26 |             
27 |             # Шаг 2: Проверка кэша
28 |             if self.cache_enabled:
29 |                 cached = check_cache(clean_input)
30 |                 if cached:
31 |                     self.logger.log("CACHE_HIT", {"input": clean_input})
32 |                     return cached
33 | 
34 |             # Шаг 3: Выбор и выполнение агента
35 |             agent = self.selector.select_agent(clean_input)
36 |             context = await agent.execute(clean_input)
37 |             
38 |             # Шаг 4: Генерация ответа
39 |             raw_response = await self.llm.generate_async(clean_input, context)
40 |             
41 |             # Шаг 5: Обсуждение в SocietyMind
42 |             final_response = await self.society.refine(
43 |                 prompt=clean_input,
44 |                 context=context,
45 |                 initial_response=raw_response
46 |             )
47 | 
48 |             # Шаг 6: Сохранение и возврат результата
49 |             save_cache(clean_input, final_response)
50 |             return final_response
51 | 
52 |         except SecurityException as e:
53 |             self.logger.log("SECURITY_BLOCK", {
54 |                 "input": user_input,
55 |                 "reason": str(e)
56 |             })
57 |             return "Request blocked for security reasons"
58 |             
59 |         except ProcessingError as e:
60 |             self.logger.log("PROCESSING_ERROR", {
61 |                 "input": user_input,
62 |                 "error": str(e)
63 |             })
64 |             return "Error processing your request"
65 |             
66 |         except Exception as e:
67 |             self.logger.log("INTERNAL_ERROR", {
68 |                 "input": user_input,
69 |                 "error": str(e)
70 |             })
71 |             return "Internal server error"
72 | 
73 |         finally:
74 |             log_request(user_input, final_response if 'final_response' in locals() else None)
75 | 
76 | async def main_flow():
77 |     orchestrator = AIOrchestrator()
78 |     while True:
79 |         try:
80 |             user_input = get_input_data()
81 |             response = await orchestrator.process_request(user_input)
82 |             send_response(response)
83 |         except KeyboardInterrupt:
84 |             break
85 | 
86 | if __name__ == "__main__":
87 |     asyncio.run(main_flow())


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.30
2 | torch>=2.0
3 | python-dotenv>=0.19
4 | sentence-transformers>=2.2
5 | pymupdf>=1.22
6 | docker>=6.0
7 | aiohttp>=3.8


--------------------------------------------------------------------------------
/sanitizer/prompt_sanitizer.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import torch
 3 | from transformers import BertTokenizer, BertForSequenceClassification
 4 | from typing import Optional
 5 | from utils.exceptions import InjectionAttemptError, SecurityException
 6 | 
 7 | class PromptSanitizer:
 8 |     def __init__(self, model_path: str = "bert-prompt-sanitizer"):
 9 |         self.patterns = [
10 |             (r'(?i)(delete|drop|truncate)', "SQL injection"),
11 |             (r'<script.*?>', "HTML injection"),
12 |             (r'\{%|%\}', "Template injection"),
13 |             (r'__import__|eval\(|exec\(', "Code injection"),
14 |             (r'(ftp|ssh|sftp)://', "Dangerous protocol"),
15 |             (r'/etc/passwd', "Sensitive file access")
16 |         ]
17 |         
18 |         try:
19 |             self.tokenizer = BertTokenizer.from_pretrained(model_path)
20 |             self.model = BertForSequenceClassification.from_pretrained(model_path).eval()
21 |         except Exception as e:
22 |             raise RuntimeError(f"Failed to load security model: {str(e)}")
23 | 
24 |     def sanitize(self, prompt: str) -> str:
25 |         self._check_patterns(prompt)
26 |         self._check_ml(prompt)
27 |         return prompt
28 | 
29 |     def _check_patterns(self, text: str):
30 |         for pattern, description in self.patterns:
31 |             if re.search(pattern, text):
32 |                 raise InjectionAttemptError(f"Pattern detected: {description} - {pattern}")
33 | 
34 |     def _check_ml(self, text: str):
35 |         try:
36 |             inputs = self.tokenizer(
37 |                 text,
38 |                 return_tensors="pt",
39 |                 max_length=512,
40 |                 truncation=True
41 |             )
42 |             
43 |             with torch.no_grad():
44 |                 outputs = self.model(**inputs)
45 |             
46 |             probs = torch.softmax(outputs.logits, dim=1)
47 |             if probs[0][1].item() > 0.85:
48 |                 raise SecurityException("ML model detected malicious intent")
49 |                 
50 |         except Exception as e:
51 |             raise SecurityException(f"Security check failed: {str(e)}")
52 | 
53 | class SanitizationPipeline:
54 |     @staticmethod
55 |     async def process(prompt: str) -> str:
56 |         try:
57 |             sanitizer = PromptSanitizer()
58 |             return sanitizer.sanitize(prompt)
59 |         except Exception as e:
60 |             raise SecurityException(str(e))


--------------------------------------------------------------------------------
/society_mind/autogen_society.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import asyncio
  3 | import torch
  4 | import re
  5 | from typing import Optional, Tuple
  6 | from sentence_transformers import SentenceTransformer, util
  7 | from llm.phi_wrapper import PhiLLM
  8 | from utils.exceptions import QualityThresholdReached
  9 | 
 10 | class SocietyMind:
 11 |     def __init__(
 12 |         self,
 13 |         model: PhiLLM,
 14 |         max_rounds: int = 3,
 15 |         similarity_threshold: float = 0.85,
 16 |         quality_threshold: float = 0.7
 17 |     ):
 18 |         self.model = model
 19 |         self.max_rounds = max_rounds
 20 |         self.similarity_threshold = similarity_threshold
 21 |         self.quality_threshold = quality_threshold
 22 |         self.similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
 23 |         
 24 |         self.templates = {
 25 |             'generator': self._load_template("generator_instruction.txt"),
 26 |             'critic': self._load_template("critic_instruction.txt"),
 27 |             'finalizer': self._load_template("finalizer_instruction.txt")
 28 |         }
 29 | 
 30 |     async def refine_response(
 31 |         self,
 32 |         query: str,
 33 |         context: str,
 34 |         initial_response: str
 35 |     ) -> str:
 36 |         current_response = initial_response
 37 |         previous_response = ""
 38 |         iteration = 0
 39 |         
 40 |         while iteration < self.max_rounds:
 41 |             # 1. Generate critique with context
 42 |             critique = await self._generate_critique(query, current_response, context)
 43 |             
 44 |             # 2. Check stopping conditions
 45 |             stop_reason = self._check_stopping_conditions(
 46 |                 current_response,
 47 |                 previous_response,
 48 |                 context
 49 |             )
 50 |             if stop_reason:
 51 |                 print(f"Stopping iteration: {stop_reason}")
 52 |                 break
 53 |                 
 54 |             # 3. Generate improved response
 55 |             previous_response = current_response
 56 |             current_response = await self._generate_improved(
 57 |                 query,
 58 |                 context,
 59 |                 critique
 60 |             )
 61 |             
 62 |             iteration += 1
 63 |             
 64 |         return await self._finalize_response(current_response, context)
 65 | 
 66 |     def _check_stopping_conditions(
 67 |         self,
 68 |         current: str,
 69 |         previous: str,
 70 |         context: str
 71 |     ) -> Optional[str]:
 72 |         # 1. Check similarity between iterations
 73 |         iteration_similarity = self._calculate_similarity(current, previous)
 74 |         if iteration_similarity > self.similarity_threshold:
 75 |             return f"Iteration similarity {iteration_similarity:.2f}"
 76 |             
 77 |         # 2. Check quality score
 78 |         quality_score = self._calculate_quality_score(current, context)
 79 |         if quality_score >= self.quality_threshold:
 80 |             return f"Quality threshold {quality_score:.2f}"
 81 |             
 82 |         return None
 83 | 
 84 |     def _calculate_quality_score(self, response: str, context: str) -> float:
 85 | 
 86 |         context_sim = self._calculate_similarity(response, context)
 87 |         
 88 |         key_terms = self._extract_key_terms(context)
 89 |         coverage = sum(1 for term in key_terms if term in response) / len(key_terms)
 90 |         
 91 |         
 92 |         length_factor = min(max(len(response)/500, 0.5), 1.0)
 93 |         
 94 |         
 95 |         return 0.6*context_sim + 0.3*coverage + 0.1*length_factor
 96 | 
 97 |     def _extract_key_terms(self, text: str, top_n: int = 10) -> list:
 98 |         words = re.findall(r'\w+', text.lower())
 99 |         freq = {}
100 |         for word in words:
101 |             if word in freq:
102 |                 freq[word] += 1
103 |             else:
104 |                 freq[word] = 1
105 |         return sorted(freq.items(), key=lambda x: x[1], reverse=True)[:top_n]
106 | 
107 |     async def _generate_critique(
108 |         self,
109 |         query: str,
110 |         response: str,
111 |         context: str
112 |     ) -> str:
113 |         prompt = self.templates['critic'].format(
114 |             query=query,
115 |             response=response,
116 |             context=context
117 |         )
118 |         return await self._safe_generate(prompt)
119 | 
120 |     async def _generate_improved(
121 |         self,
122 |         query: str,
123 |         context: str,
124 |         critique: str
125 |     ) -> str:
126 |         prompt = self.templates['generator'].format(
127 |             query=query,
128 |             context=context,
129 |             feedback=critique
130 |         )
131 |         return await self._safe_generate(prompt)
132 | 
133 |     def _calculate_similarity(self, text1: str, text2: str) -> float:
134 |         if not text1 or not text2:
135 |             return 0.0
136 |         embeddings = self.similarity_model.encode([text1, text2])
137 |         return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
138 | 
139 |     async def _safe_generate(self, prompt: str) -> str:
140 |         try:
141 |             inputs = self.model.tokenizer(
142 |                 prompt,
143 |                 return_tensors="pt",
144 |                 max_length=1024,
145 |                 truncation=True
146 |             ).to(self.model.device)
147 | 
148 |             outputs = await asyncio.to_thread(
149 |                 self.model.model.generate,
150 |                 **inputs,
151 |                 max_new_tokens=500,
152 |                 temperature=0.7,
153 |                 top_p=0.9,
154 |                 repetition_penalty=1.1
155 |             )
156 |             
157 |             return self.model.tokenizer.decode(
158 |                 outputs[0],
159 |                 skip_special_tokens=True
160 |             ).strip()
161 |         except Exception as e:
162 |             raise RuntimeError(f"Generation failed: {str(e)}")
163 | 
164 |     def _load_template(self, filename: str) -> str:
165 |         template_path = os.path.join("templates", filename)
166 |         with open(template_path, "r") as f:
167 |             return f.read()
168 | 
169 |     async def _finalize_response(self, response: str, context: str) -> str:
170 |         prompt = self.templates['finalizer'].format(
171 |             response=response,
172 |             context=context
173 |         )
174 |         return await self._safe_generate(prompt)


--------------------------------------------------------------------------------
/templates/code_instruction.txt:
--------------------------------------------------------------------------------
1 | Instruction: You are an AI engineer. The user will send a code snippet and possibly a related question. You must simulate the code in a secure Python environment, return the result, and optionally explain it.
2 | Input (Code):
3 | {code}
4 | 
5 | Question: {question}
6 | Output:


--------------------------------------------------------------------------------
/templates/critic_instruction.txt:
--------------------------------------------------------------------------------
 1 | [ROLE]
 2 | You are a quality assurance expert. Analyze this response considering the context from user documents/code.
 3 | 
 4 | [USER QUESTION]
 5 | {query}
 6 | 
 7 | [RESPONSE TO CRITIQUE]
 8 | {response}
 9 | 
10 | [CONTEXT FROM USER DATA]
11 | {context}
12 | 
13 | [INSTRUCTIONS]
14 | 1. Identify factual inconsistencies with context
15 | 2. Check technical accuracy
16 | 3. Verify source citations
17 | 4. Assess clarity for non-experts
18 | 5. Rate 1-5 with justification


--------------------------------------------------------------------------------
/templates/default_instruction.txt:
--------------------------------------------------------------------------------
1 | Instruction: You are a general-purpose assistant. Provide clear, accurate, and helpful answers to the user's questions.
2 | Question: {question}
3 | Output:


--------------------------------------------------------------------------------
/templates/finalizer_instruction.txt:
--------------------------------------------------------------------------------
 1 | [ROLE]
 2 | You are an editor. Refine this response to meet quality standards.
 3 | 
 4 | [RESPONSE DRAFT]
 5 | {response}
 6 | 
 7 | [CONTEXT]
 8 | {context}
 9 | 
10 | [INSTRUCTIONS]
11 | - Fix grammar and style issues
12 | - Ensure proper formatting
13 | - Add disclaimer if needed
14 | - Keep under 500 words


--------------------------------------------------------------------------------
/templates/generator_instruction.txt:
--------------------------------------------------------------------------------
 1 | [ROLE]
 2 | You are an AI assistant. Improve this response using feedback and context.
 3 | 
 4 | [ORIGINAL QUESTION]
 5 | {query}
 6 | 
 7 | [CRITIQUE FEEDBACK]
 8 | {feedback}
 9 | 
10 | [USER PROVIDED CONTEXT]
11 | {context}
12 | 
13 | [INSTRUCTIONS]
14 | 1. Address all feedback points
15 | 2. Cite relevant context sections
16 | 3. Maintain technical accuracy
17 | 4. Use markdown formatting


--------------------------------------------------------------------------------
/templates/pdf_instruction.txt:
--------------------------------------------------------------------------------
1 | Instruction: You are a helpful assistant. The user will provide a question and a PDF document (either via link or upload). Your task is to answer the question using only the information relevant from the PDF document.
2 | Input: {context}
3 | Question: {question}
4 | Output:
5 | 


--------------------------------------------------------------------------------
/utils/cache.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import json
  3 | import os
  4 | from datetime import datetime
  5 | from pathlib import Path
  6 | from typing import Optional, Dict, Any
  7 | 
  8 | class SmartCache:
  9 |     def __init__(self, cache_dir: str = "cache", ttl: int = 86400):
 10 |         self.cache_dir = Path(cache_dir)
 11 |         self.ttl = ttl  # Время жизни записи в секундах (по умолчанию 24 часа)
 12 |         self._init_cache_dir()
 13 | 
 14 |     def _init_cache_dir(self):
 15 |         self.cache_dir.mkdir(exist_ok=True, parents=True)
 16 | 
 17 |     def _get_key_path(self, key: str) -> Path:
 18 |         return self.cache_dir / f"{key}.json"
 19 | 
 20 |     def generate_key(
 21 |         self,
 22 |         prompt: str,
 23 |         context: str,
 24 |         model_version: str,
 25 |         data_hash: str
 26 |     ) -> str:
 27 |         """Генерация уникального ключа кэша"""
 28 |         key_data = f"{prompt}-{context}-{model_version}-{data_hash}"
 29 |         return hashlib.sha256(key_data.encode()).hexdigest()
 30 | 
 31 |     def check_cache(self, key: str) -> Optional[Dict[str, Any]]:
 32 |         """Проверка наличия записи в кэше"""
 33 |         key_path = self._get_key_path(key)
 34 |         
 35 |         if not key_path.exists():
 36 |             return None
 37 | 
 38 |         with open(key_path, 'r') as f:
 39 |             entry = json.load(f)
 40 |             
 41 |         if self._is_expired(entry['timestamp']):
 42 |             key_path.unlink()
 43 |             return None
 44 |             
 45 |         return entry['response']
 46 | 
 47 |     def save_cache(
 48 |         self,
 49 |         key: str,
 50 |         response: str,
 51 |         metadata: Optional[Dict] = None
 52 |     ):
 53 |         """Сохранение записи в кэш"""
 54 |         entry = {
 55 |             'timestamp': datetime.now().isoformat(),
 56 |             'response': response,
 57 |             'metadata': metadata or {}
 58 |         }
 59 |         
 60 |         with open(self._get_key_path(key), 'w') as f:
 61 |             json.dump(entry, f)
 62 | 
 63 |     def _is_expired(self, timestamp: str) -> bool:
 64 |         """Проверка истечения срока жизни записи"""
 65 |         entry_time = datetime.fromisoformat(timestamp)
 66 |         return (datetime.now() - entry_time).total_seconds() > self.ttl
 67 | 
 68 | class DataHasher:
 69 |     @staticmethod
 70 |     def hash_content(content: bytes) -> str:
 71 |         return hashlib.sha256(content).hexdigest()
 72 | 
 73 |     @classmethod
 74 |     def hash_file(cls, file_path: Path) -> str:
 75 |         with open(file_path, 'rb') as f:
 76 |             return cls.hash_content(f.read())
 77 | 
 78 |     @classmethod
 79 |     def hash_code(cls, code: str) -> str:
 80 |         return cls.hash_content(code.encode())
 81 | 
 82 | class CacheManager:
 83 |     def __init__(self, model: PhiLLM): #? Решить вопрос с PhiLLM
 84 |         self.cache = SmartCache()
 85 |         self.model = model
 86 |         self.hasher = DataHasher()
 87 | 
 88 |     async def process_request(
 89 |         self,
 90 |         prompt: str,
 91 |         context: str,
 92 |         data_source: Optional[Path] = None,
 93 |         code: Optional[str] = None
 94 |     ) -> Optional[str]:
 95 |         data_hash = self._get_data_hash(data_source, code)
 96 |         
 97 |         cache_key = self.cache.generate_key(
 98 |             prompt=prompt,
 99 |             context=context,
100 |             model_version=self.model.version,
101 |             data_hash=data_hash
102 |         )
103 |         
104 |         if cached := self.cache.check_cache(cache_key):
105 |             return cached
106 |         
107 |         return None
108 | 
109 |     def _get_data_hash(
110 |         self,
111 |         data_source: Optional[Path],
112 |         code: Optional[str]
113 |     ) -> str:
114 |         if data_source:
115 |             return self.hasher.hash_file(data_source)
116 |         if code:
117 |             return self.hasher.hash_code(code)
118 |         return "no_data"
119 | 
120 | async def handle_user_request(prompt: str, context: str, file_path: Path):
121 |     model = PhiLLM()
122 |     cache_manager = CacheManager(model)
123 |     
124 |     cached_response = await cache_manager.process_request(
125 |         prompt=prompt,
126 |         context=context,
127 |         data_source=file_path
128 |     )
129 |     
130 |     if cached_response:
131 |         return cached_response
132 |     
133 |     response = await process_request(prompt, context, file_path)
134 |     
135 |     cache_manager.cache.save_cache(
136 |         key=cache_manager.cache.generate_key(
137 |             prompt=prompt,
138 |             context=context,
139 |             model_version=model.version,
140 |             data_hash=cache_manager.hasher.hash_file(file_path)
141 |         ),
142 |         response=response,
143 |         metadata={
144 |             'source': str(file_path),
145 |             'model_version': model.version
146 |         }
147 |     )
148 |     
149 |     return response


--------------------------------------------------------------------------------
/utils/docker_sandbox.py:
--------------------------------------------------------------------------------
 1 | import docker
 2 | from docker.errors import DockerException
 3 | from .exceptions import DockerSecurityException, ResourceLimitExceeded, CodeExecutionError
 4 | 
 5 | class DockerSandbox:
 6 |     def __init__(self):
 7 |         self.client = docker.from_env()
 8 |         self._validate_docker()
 9 |         
10 |     def _validate_docker(self):
11 |         try:
12 |             self.client.ping()
13 |         except DockerException:
14 |             raise RuntimeError("Docker daemon not available")
15 | 
16 |     async def execute(self, code: str, timeout=10, mem_limit='100m') -> str:
17 |         self._check_code_safety(code)
18 |         
19 |         try:
20 |             container = self.client.containers.run(
21 |                 image="python-sandbox:secure",
22 |                 command=f"timeout -s KILL {timeout} python -c '{code}'",
23 |                 mem_limit=mem_limit,
24 |                 network_mode="none",
25 |                 pids_limit=100,
26 |                 read_only=True,
27 |                 detach=True
28 |             )
29 |             
30 |             try:
31 |                 result = container.wait(timeout=timeout + 2)
32 |                 if result['StatusCode'] != 0:
33 |                     raise CodeExecutionError(f"Exit code {result['StatusCode']}")
34 |                     
35 |                 logs = container.logs().decode()
36 |                 self._check_output_safety(logs)
37 |                 return logs
38 |                 
39 |             except docker.errors.ContainerError as e:
40 |                 raise CodeExecutionError(str(e))
41 |             finally:
42 |                 container.remove(force=True)
43 |                 
44 |         except docker.errors.ImageNotFound:
45 |             raise CodeExecutionError("Sandbox image not found")
46 |         except Exception as e:
47 |             raise CodeExecutionError(str(e))
48 | 
49 |     def _check_code_safety(self, code: str):
50 |         dangerous_patterns = [
51 |             'os.system', 'subprocess', 'open(',
52 |             'import socket', 'import shutil',
53 |             '__import__', 'eval(', 'exec('
54 |         ]
55 |         
56 |         if any(pattern in code for pattern in dangerous_patterns):
57 |             raise DockerSecurityException(code)
58 | 
59 |     def _check_output_safety(self, output: str):
60 |         if len(output) > 10_000:
61 |             raise ResourceLimitExceeded("Output size")


--------------------------------------------------------------------------------
/utils/exceptions.py:
--------------------------------------------------------------------------------
 1 | class SecurityException(Exception):
 2 |     def __init__(self, message="Security violation detectted"):
 3 |         super().__init__(message)
 4 | 
 5 | class InjectionAttemptError(SecurityException):
 6 |     def __init__(self, pattern):
 7 |         super().__init__(f"Potential injection attempt detect: {pattern}")
 8 |     
 9 | class DockerSecurityException(SecurityException):
10 |     def __init__(self, code_snippet):
11 |         super().__init__(f"Dangerous code blocked: {code_snippet}")
12 | 
13 | class ProcessingError(Exception):
14 |     """Base class for processing errors"""
15 |     def __init__(self, message="Processing failed"):
16 |         super().__init__(message)
17 | 
18 | class PDFProcessingError(ProcessingError):
19 |     """PDF-related errors"""
20 |     def __init__(self, reason):
21 |         super().__init__(f"PDF processing failed: {reason}")
22 | 
23 | class CodeExecutionError(ProcessingError):
24 |     """Code execution errors"""
25 |     def __init__(self, reason):
26 |         super().__init__(f"Code execution failed: {reason}")
27 | 
28 | class ResourceLimitExceeded(ProcessingError):
29 |     """Resource limitation errors"""
30 |     def __init__(self, resource_type):
31 |         super().__init__(f"{resource_type} limit exceeded")
32 | 
33 | class NetworkError(ProcessingError):
34 |     """Network-related errors"""
35 |     def __init__(self, url):
36 |         super().__init__(f"Network operation failed for: {url}")


--------------------------------------------------------------------------------
/utils/io.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | def get_input_data():
 5 |     return input("Enter your question/code/link or upload: ")
 6 | 
 7 | def send_response_to_user(response):
 8 |     print("\n\n[Final Response]:\n", response)
 9 | 
10 | def log_request(prompt, response):
11 |     log_line = f"{datetime.datetime.now().isoformat()} | PROMPT: {prompt}\nRESPONSE: {response}\n{'='*80}\n"
12 |     with open("logs/request_log.txt", "a") as log_file:
13 |         log_file.write(log_line)


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import json
 3 | from datetime import datetime
 4 | 
 5 | def setup_logging():
 6 |     logging.basicConfig(
 7 |         level=logging.INFO,
 8 |         format='%(asctime)s - %(levelname)s - %(message)s',
 9 |         handlers=[
10 |             logging.FileHandler('app.log'),
11 |             logging.StreamHandler()
12 |         ]
13 |     )
14 | 
15 | class RequestLogger:
16 |     def __init__(self):
17 |         self.logger = logging.getLogger('security')
18 | 
19 |     def log(self, event_type: str, details: dict):
20 |         log_entry = {
21 |             'timestamp': datetime.utcnow().isoformat(),
22 |             'type': event_type,
23 |             'details': details
24 |         }
25 |         self.logger.info(json.dumps(log_entry))


--------------------------------------------------------------------------------
/utils/pdf_utils.py:
--------------------------------------------------------------------------------
 1 | import fitz  # PyMuPDF
 2 | import requests
 3 | from sentence_transformers import SentenceTransformer, util
 4 | 
 5 | model = SentenceTransformer('all-MiniLM-L6-v2')
 6 | 
 7 | def extract_text_from_url_pdf(url):
 8 |     response = requests.get(url)
 9 |     with open("temp.pdf", 'wb') as f:
10 |         f.write(response.content)
11 |     return extract_text_from_uploaded_pdf("temp.pdf")
12 | 
13 | def extract_text_from_uploaded_pdf(path='temp.pdf'):
14 |     doc = fitz.open(path)
15 |     return "\n".join([page.get_text() for page in doc])
16 | 
17 | def find_relevant_passages(text, question, k=5):
18 |     chunks = text.split("\n\n")
19 |     embeddings = model.encode(chunks, convert_to_tensor=True)
20 |     question_emb = model.encode(question, convert_to_tensor=True)
21 |     top_k = util.semantic_search(question_emb, embeddings, top_k=k)[0]
22 |     return "\n".join([chunks[idx['corpus_id']] for idx in top_k])


--------------------------------------------------------------------------------