├── .cursor ├── mcp.json └── rules │ └── restrictions.mdc ├── .env.example ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── changelog ├── config.yaml ├── config.yaml.bak ├── docs ├── development │ └── component-configuration-schemas.md ├── diagrams │ ├── class.mmd │ ├── error-handling.mmd │ ├── flow.mmd │ ├── history.mmd │ ├── ingestion.mmd │ ├── initialisation.mmd │ ├── multi-ingestor.mmd │ ├── query.mmd │ ├── state-management.mmd │ └── system.mmd ├── features │ ├── default-ingestion-pipeline.md │ ├── feat-dynamic-ingestor-pipeline.md │ ├── feat-repo-restructure.md │ ├── whatsapp-ingestor.md │ ├── whatsapp-nodejs-service.md │ └── whatsapp-python-ingestor.md ├── functional-requirements.md ├── guides │ ├── connector.md │ ├── embedder.md │ ├── generator.md │ ├── ingestor.md │ ├── preprocessor.md │ ├── prompt_builder.md │ ├── validator.md │ └── vector_store.md ├── project_structure.md ├── prompts.md └── tech-specs.md ├── examples ├── README.md ├── advanced_vector_query.py ├── chat_example.py ├── chroma_vector_store_example.py ├── config_utils_example.py ├── default_pipeline_example.py ├── query_vectorstore_example.py ├── sentence_transformer_embedder_example.py ├── structured_logger_example.py ├── telegram_debug.py ├── telegram_ingestor_example.py ├── telegram_session_string.py └── telegram_session_test.py ├── ici ├── __init__.py ├── adapters │ ├── __init__.py │ ├── chat │ │ ├── __init__.py │ │ └── json_chat_history_manager.py │ ├── controller │ │ ├── __init__.py │ │ └── command_line.py │ ├── embedders │ │ ├── __init__.py │ │ └── sentence_transformer.py │ ├── generators │ │ ├── __init__.py │ │ ├── factory.py │ │ ├── langchain_generator.py │ │ └── openai_generator.py │ ├── ingestors │ │ ├── __init__.py │ │ ├── telegram.py │ │ └── whatsapp.py │ ├── loggers │ │ ├── __init__.py │ │ └── structured_logger.py │ ├── orchestrators │ │ ├── __init__.py │ │ └── default_orchestrator.py │ ├── pipelines │ │ ├── __init__.py │ │ └── default.py │ ├── preprocessors │ │ ├── __init__.py │ │ ├── telegram.py │ │ └── whatsapp.py │ ├── prompt_builders │ │ ├── __init__.py │ │ └── basic_prompt_builder.py │ ├── user_id │ │ ├── __init__.py │ │ └── default_user_id_generator.py │ ├── validators │ │ ├── __init__.py │ │ └── rule_based.py │ └── vector_stores │ │ ├── __init__.py │ │ └── chroma.py ├── core │ ├── __init__.py │ ├── exceptions │ │ └── __init__.py │ └── interfaces │ │ ├── __init__.py │ │ ├── chat_history_manager.py │ │ ├── embedder.py │ │ ├── generator.py │ │ ├── ingestor.py │ │ ├── logger.py │ │ ├── orchestrator.py │ │ ├── pipeline.py │ │ ├── preprocessor.py │ │ ├── prompt_builder.py │ │ ├── user_id_generator.py │ │ ├── validator.py │ │ └── vector_store.py └── utils │ ├── __init__.py │ ├── component_loader.py │ ├── config.py │ ├── datetime_utils.py │ ├── load_env.py │ ├── print_banner.py │ └── state_manager.py ├── install.bat ├── install.sh ├── main.py ├── pytest.ini ├── requirements.txt ├── services └── whatsapp-service │ ├── .gitignore │ ├── README.md │ ├── config.js │ ├── package-lock.json │ ├── package.json │ └── src │ ├── api │ └── routes │ │ ├── auth.js │ │ ├── index.js │ │ ├── messages.js │ │ └── sessions.js │ ├── client │ ├── client-manager.js │ └── whatsapp-client.js │ ├── index.js │ ├── public │ └── index.html │ ├── utils │ ├── event-emitter.js │ ├── logger.js │ └── message-formatter.js │ └── websocket │ └── ws-server.js ├── setup.bat ├── setup.py ├── setup.sh ├── tests ├── __init__.py ├── adapters │ ├── generators │ │ ├── test_langchain_generator.py │ │ └── test_openai_generator.py │ ├── orchestrators │ │ └── test_default_orchestrator.py │ ├── prompt_builders │ │ └── test_basic_prompt_builder.py │ └── validators │ │ └── test_rule_based.py └── unit │ ├── __init__.py │ ├── adapters │ ├── __init__.py │ ├── test_chroma_store.py │ ├── test_sentence_transformer_embedder.py │ ├── test_structured_logger.py │ └── test_telegram_ingestor.py │ └── core │ └── __init__.py └── troubleshoot.md /.cursor/mcp.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "linear": { 4 | "command": "npx", 5 | "args": [ 6 | "-y", 7 | "linear-mcp-server" 8 | ], 9 | "env": { 10 | "LINEAR_API_KEY": "lin_oauth_88b2bafda66d8df6b590f1f83e74611057c322074cd9de4768dfae5d4bf31239" 11 | } 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /.cursor/rules/restrictions.mdc: -------------------------------------------------------------------------------- 1 | --- 2 | description: 3 | globs: 4 | alwaysApply: true 5 | --- 6 | 7 | # Restrictions 8 | Use YAML file which exists in root "./config.yaml", do not create new yaml file or create function to create a test YAML file. 9 | Do not change function interface until specifically told to do so. Maintain the function name, arguments, output. -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | # Telegram API credentials 2 | TELEGRAM_API_ID=your_telegram_api_id 3 | TELEGRAM_API_HASH=your_telegram_api_hash 4 | TELEGRAM_PHONE_NUMBER=your_phone_number_with_country_code 5 | TELEGRAM_SESSION_STRING=your_telegram_session_string 6 | 7 | # Generator API key 8 | GENERATOR_API_KEY=your_generator_api_key 9 | 10 | # Logger settings 11 | INGESTION_HOST=your_betterstack_host 12 | SOURCE_TOKEN=your_betterstack_source_token 13 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # Unit test / coverage reports 28 | htmlcov/ 29 | .tox/ 30 | .coverage 31 | .coverage.* 32 | .cache 33 | nosetests.xml 34 | coverage.xml 35 | *.cover 36 | .hypothesis/ 37 | .pytest_cache/ 38 | 39 | # Environments 40 | .env 41 | .venv 42 | env/ 43 | venv/ 44 | ENV/ 45 | env.bak/ 46 | venv.bak/ 47 | ici-env/ 48 | 49 | # IDE specific files 50 | .idea/ 51 | .vscode/ 52 | *.swp 53 | *.swo 54 | 55 | # Project specific 56 | examples/logs/ 57 | logs/ 58 | 59 | # DB 60 | db/ 61 | 62 | # Logs 63 | **.log 64 | logs/ 65 | 66 | **/chats/ 67 | services/**/data/ 68 | services/whatsapp-service/.wwebjs_cache/ 69 | services/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ICI Core 2 | 3 | Thank you for your interest in contributing to ICI Core! This document provides guidelines and instructions for contributing to the project. 4 | 5 | ## Code of Conduct 6 | 7 | By participating in this project, you agree to uphold our Code of Conduct: 8 | 9 | - Be respectful and inclusive of all contributors 10 | - Exercise empathy and kindness in all interactions 11 | - Focus on constructive feedback and collaboration 12 | - Respect differing viewpoints and experiences 13 | 14 | ## Getting Started 15 | 16 | 1. Fork the repository 17 | 2. Clone your fork: `git clone https://github.com/yourusername/ici-core.git` 18 | 3. Create a branch for your changes: `git checkout -b feature/your-feature-name` 19 | 4. Install dependencies using the setup scripts or manual setup as described in the README 20 | 21 | ## Development Environment 22 | 23 | We recommend using a virtual environment for development: 24 | 25 | ```bash 26 | # Create a virtual environment 27 | python -m venv venv 28 | 29 | # Activate it 30 | # On macOS/Linux: 31 | source venv/bin/activate 32 | # On Windows: 33 | venv\Scripts\activate 34 | 35 | # Install dependencies 36 | pip install -r requirements.txt 37 | ``` 38 | 39 | ## Project Structure 40 | 41 | Please review the [Project Structure](docs/project_structure.md) documentation to understand how the codebase is organized. This will help you place your contributions in the correct locations. 42 | 43 | ## How to Contribute 44 | 45 | ### Reporting Bugs 46 | 47 | If you find a bug, please create an issue with: 48 | 49 | 1. A clear, descriptive title 50 | 2. Detailed steps to reproduce the bug 51 | 3. Expected and actual behavior 52 | 4. System information (OS, Python version, etc.) 53 | 5. Any relevant logs or screenshots 54 | 55 | ### Suggesting Features 56 | 57 | For feature suggestions: 58 | 59 | 1. Check if the feature has already been suggested or implemented 60 | 2. Create an issue with a clear title and detailed description 61 | 3. Explain the use case and benefits of the feature 62 | 4. If possible, outline a potential implementation approach 63 | 64 | ### Pull Requests 65 | 66 | When submitting a pull request: 67 | 68 | 1. Update the README.md with details of changes if applicable 69 | 2. Update any relevant documentation 70 | 3. Include tests that verify your changes 71 | 4. Ensure all tests pass locally 72 | 5. Link to any related issues 73 | 6. Follow the existing code style 74 | 75 | ## Coding Standards 76 | 77 | We follow these coding standards: 78 | 79 | 1. **PEP 8**: Follow Python's PEP 8 style guide 80 | 2. **Type Hints**: Use Python type hints for function parameters and return values 81 | 3. **Docstrings**: Include docstrings for all functions, classes, and modules 82 | 4. **Comments**: Add comments for complex logic 83 | 5. **File Structure**: Follow the project structure guidelines 84 | 85 | ### Code Style 86 | 87 | - We use the `black` formatter for Python code 88 | - Include type hints for function parameters and return values 89 | - Follow our naming conventions: 90 | - Classes: CamelCase (`MyClass`) 91 | - Functions/methods: snake_case (`my_function`) 92 | - Variables: snake_case (`my_variable`) 93 | - Constants: UPPER_CASE (`MY_CONSTANT`) 94 | 95 | ### Example Function 96 | 97 | ```python 98 | from typing import Dict, Any, Optional 99 | 100 | def process_data(data: Dict[str, Any], options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 101 | """ 102 | Process the input data with optional configuration. 103 | 104 | Args: 105 | data: The input data to process 106 | options: Optional configuration parameters 107 | 108 | Returns: 109 | Processed data as a dictionary 110 | 111 | Raises: 112 | ValueError: If data is empty or invalid 113 | """ 114 | if not data: 115 | raise ValueError("Input data cannot be empty") 116 | 117 | # Processing logic here 118 | 119 | return processed_data 120 | ``` 121 | 122 | ## Testing 123 | 124 | All contributions should include appropriate tests: 125 | 126 | - **Unit Tests**: Test individual components in isolation 127 | - **Integration Tests**: Test interactions between components 128 | - **Doctest Examples**: Include examples in docstrings when helpful 129 | 130 | Run tests using: 131 | 132 | ```bash 133 | pytest 134 | ``` 135 | 136 | ## Documentation 137 | 138 | Please update documentation for any new features or changes: 139 | 140 | - Update README.md if introducing new features 141 | - Update/create documentation in the docs folder 142 | - Include examples where appropriate 143 | - Update function/class docstrings 144 | 145 | ## Commit Messages 146 | 147 | Follow these guidelines for commit messages: 148 | 149 | - Use the present tense ("Add feature" not "Added feature") 150 | - Use the imperative mood ("Move cursor to..." not "Moves cursor to...") 151 | - Limit the first line to 72 characters or less 152 | - Reference issues and pull requests after the first line 153 | 154 | ## Pull Request Process 155 | 156 | 1. Update documentation as needed 157 | 2. Include tests for your changes 158 | 3. Ensure all tests pass 159 | 4. Update the CHANGELOG.md with details of changes 160 | 5. Your PR will be reviewed by at least one maintainer 161 | 6. Once approved, a maintainer will merge your PR 162 | 163 | ## License 164 | 165 | By contributing to ICI Core, you agree that your contributions will be licensed under the project's MIT License. 166 | 167 | ## Questions? 168 | 169 | If you have any questions or need help, please create an issue with the "question" label or reach out to the project maintainers. 170 | 171 | Thank you for contributing to ICI Core! -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 ICI Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | # Core System Configuration 2 | system: 3 | loggers: 4 | structured_logger: 5 | console_output: true 6 | level: WARNING 7 | log_file: ./logs/structured_logger.log 8 | name: structured_logger 9 | source_token: $SOURCE_TOKEN 10 | host: $INGESTION_HOST 11 | use_betterstack: false 12 | state_manager: 13 | db_path: ./db/sql/ingestor_state.db 14 | validator: 15 | allowed_sources: 16 | - COMMAND_LINE 17 | rules: [] 18 | chat_history_manager: 19 | json: 20 | base_path: ./db/chat 21 | use_subdirectories: true 22 | file_permissions: 0o600 23 | default_message_limit: 20 24 | max_messages_per_chat: 1000 25 | user_id_generator: 26 | default: 27 | sources: 28 | - cli 29 | - web 30 | - api 31 | - test 32 | default_identifier: anonymous 33 | 34 | # Main Orchestrator Configuration 35 | orchestrator: 36 | # Core orchestrator settings 37 | error_messages: 38 | generation_failed: I'm having trouble generating a response right now. Please try again later. 39 | no_documents: I don't have specific information on that topic yet. 40 | validation_failed: I'm unable to process your request due to security restrictions. 41 | generation_options: 42 | max_tokens: 1024 43 | temperature: 0.7 44 | num_results: 5 45 | similarity_threshold: 0.7 46 | rules_source: config 47 | user_context: 48 | default: 49 | permission_level: user 50 | validation_rules: 51 | default: [] 52 | 53 | # Components used by orchestrator 54 | generator: 55 | api_key: $GENERATOR_API_KEY 56 | base_url: http://localhost:11434 57 | base_retry_delay: 1 58 | chain_type: simple 59 | default_options: 60 | frequency_penalty: 0.0 61 | max_tokens: 1024 62 | presence_penalty: 0.0 63 | temperature: 0.7 64 | top_p: 1.0 65 | max_retries: 3 66 | memory: 67 | k: 5 68 | type: buffer 69 | model: deepseek-r1:32b 70 | provider: ollama 71 | type: langchain 72 | 73 | embedder: 74 | sentence_transformer: 75 | batch_size: 32 76 | device: cpu 77 | model_name: all-MiniLM-L6-v2 78 | 79 | vector_store: 80 | chroma: 81 | type: chroma 82 | collection_name: messages 83 | embedding_function: sentence_transformer 84 | persist_directory: ./db/vector/chroma_db 85 | 86 | prompt_builder: 87 | error_template: 'Unable to process: {error}' 88 | fallback_template: 'Answer based on general knowledge: {question}' 89 | template: "Context:\n{context}\n\nQuestion: {question}\n\nYour response should be conversational and helpful. Remember to continue from any previous conversation if relevant." 90 | 91 | # Pipeline configurations 92 | pipelines: 93 | telegram: 94 | schedule: 95 | interval_minutes: 1 96 | ingestor: 97 | telegram: 98 | # Telegram API credentials 99 | api_hash: $TELEGRAM_API_HASH 100 | api_id: $TELEGRAM_API_ID 101 | phone_number: $TELEGRAM_PHONE_NUMBER 102 | session_string: $TELEGRAM_SESSION_STRING 103 | 104 | # Chat and message limits 105 | max_chats: 100 # Maximum number of chats to fetch (-1 for all available) 106 | max_messages_per_chat: 200 # Maximum messages per chat (-1 for all available) 107 | ignored_chats: [] # List of chat IDs to exclude from fetching 108 | 109 | # Rate limiting and batching settings 110 | batch_size: 50 # Number of messages to fetch in each request 111 | request_delay: 1.0 # Delay between batched requests in seconds 112 | preprocessor: 113 | chunk_size: 512 114 | include_overlap: true 115 | max_messages_per_chunk: 10 116 | time_window_minutes: 15 117 | store_chat_history: true 118 | chat_history_dir: "db/telegram_chats" 119 | 120 | whatsapp: 121 | batch_size: 100 122 | schedule: 123 | interval_minutes: 5 124 | ingestor: 125 | whatsapp: 126 | service_url: "http://localhost:3006" 127 | session_id: "default_session" 128 | request_timeout: 30 129 | preprocessor: 130 | chunk_size: 512 131 | include_overlap: true 132 | max_messages_per_chunk: 10 133 | time_window_minutes: 15 134 | store_chat_history: true 135 | chat_history_dir: "db/whatsapp_chats" 136 | -------------------------------------------------------------------------------- /config.yaml.bak: -------------------------------------------------------------------------------- 1 | embedders: 2 | sentence_transformer: 3 | batch_size: 32 4 | device: cpu 5 | model_name: all-MiniLM-L6-v2 6 | generator: 7 | api_key: $GENERATOR_API_KEY 8 | base_retry_delay: 1 9 | chain_type: simple 10 | default_options: 11 | frequency_penalty: 0.0 12 | max_tokens: 1024 13 | presence_penalty: 0.0 14 | temperature: 0.7 15 | top_p: 1.0 16 | max_retries: 3 17 | memory: 18 | k: 5 19 | type: buffer 20 | model: deepseek/deepseek-chat-v3-0324:free 21 | provider: openrouter 22 | type: langchain 23 | ingestors: 24 | telegram: 25 | api_hash: $TELEGRAM_API_HASH 26 | api_id: $TELEGRAM_API_ID 27 | phone_number: $TELEGRAM_PHONE_NUMBER 28 | request_delay: 0.5 29 | session_string: $TELEGRAM_SESSION_STRING 30 | loggers: 31 | structured_logger: 32 | console_output: true 33 | level: ERROR 34 | log_file: ./logs/structured_logger.log 35 | name: structured_logger 36 | source_token: $SOURCE_TOKEN 37 | host: $INGESTION_HOST 38 | use_betterstack: false 39 | orchestrator: 40 | error_messages: 41 | generation_failed: I'm having trouble generating a response right now. Please 42 | try again later. 43 | no_documents: I don't have specific information on that topic yet. 44 | validation_failed: I'm unable to process your request due to security restrictions. 45 | generation_options: 46 | max_tokens: 1024 47 | temperature: 0.7 48 | num_results: 5 49 | pipeline: 50 | auto_start: true 51 | ingestor_id: telegram 52 | rules_source: config 53 | similarity_threshold: 0.7 54 | user_context: 55 | default: 56 | permission_level: user 57 | validation_rules: 58 | default: [] 59 | pipelines: 60 | telegram: 61 | batch_size: 100 62 | schedule: 63 | interval_minutes: 1 64 | preprocessors: 65 | telegram: 66 | chunk_size: 512 67 | include_overlap: true 68 | max_messages_per_chunk: 10 69 | time_window_minutes: 15 70 | prompt_builder: 71 | error_template: 'Unable to process: {error}' 72 | fallback_template: 'Answer based on general knowledge: {question}' 73 | template: "Context:\n{context}\n\nQuestion: {question}.\n Your tone should be pleasent and friendly" 74 | state_manager: 75 | db_path: ./db/sql/ingestor_state.db 76 | validator: 77 | allowed_sources: 78 | - COMMAND_LINE 79 | rules: [] 80 | vector_stores: 81 | chroma: 82 | collection_name: telegram_messages 83 | embedding_function: sentence_transformer 84 | persist_directory: ./db/vector/chroma_db 85 | chat_history_manager: 86 | json: 87 | base_path: ./db/chat 88 | use_subdirectories: true 89 | file_permissions: 0o600 90 | default_message_limit: 20 91 | max_messages_per_chat: 1000 92 | user_id_generator: 93 | default: 94 | sources: 95 | - cli 96 | - web 97 | - api 98 | - test 99 | default_identifier: anonymous 100 | -------------------------------------------------------------------------------- /docs/diagrams/class.mmd: -------------------------------------------------------------------------------- 1 | classDiagram 2 | %% Ingestion Pipeline Classes 3 | class IngestionPipeline { 4 | +run_ingestion(ingestor_id: str) None 5 | +start() None 6 | } 7 | class Ingestor { 8 | +fetch_full_data() Any 9 | +fetch_new_data(since: Optional[datetime]) Any 10 | +fetch_data_in_range(start: datetime, end: datetime) Any 11 | } 12 | class Preprocessor { 13 | +preprocess(raw_data: Any) List[Dict[str, Any]] 14 | } 15 | class Embedder { 16 | +embed(text: str) List[float] 17 | } 18 | class VectorStore { 19 | +store_documents(documents: List[Dict[str, Any]]) None 20 | +search(query_vector: List[float], num_results: int, filters: Dict[str, Any]) List[Dict[str, Any]] 21 | } 22 | 23 | %% Query Pipeline Classes 24 | class Orchestrator { 25 | +process_query(user_id: str, input: str, chat_id: Optional[str]) Dict[str, Any] 26 | -get_rules(user_id: str) List[Dict[str, Any]] 27 | -build_context(user_id: str) Dict[str, Any] 28 | } 29 | class Validator { 30 | +validate(input: str, context: Dict[str, Any], rules: List[Dict[str, Any]]) bool 31 | } 32 | class PromptBuilder { 33 | +build_prompt(input: str, documents: List[Dict[str, Any]], chat_history: List[Dict[str, Any]]) str 34 | } 35 | class Generator { 36 | +generate(prompt: str) str 37 | } 38 | class ChatHistoryManager { 39 | +create_chat(user_id: str) str 40 | +add_message(chat_id: str, content: str, role: str, metadata: Optional[Dict[str, Any]]) str 41 | +get_messages(chat_id: str, limit: Optional[int]) List[Dict[str, Any]] 42 | +list_chats(user_id: str) List[Dict[str, Any]] 43 | +generate_title(chat_id: str) Optional[str] 44 | +rename_chat(chat_id: str, new_title: str) bool 45 | +delete_chat(chat_id: str) bool 46 | +export_chat(chat_id: str, format: str) Any 47 | } 48 | 49 | %% Shared Components 50 | class Logger { 51 | +debug(message: str, *args: Any) None 52 | +info(message: str, *args: Any) None 53 | +warning(message: str, *args: Any) None 54 | +error(message: str, *args: Any) None 55 | +critical(message: str, *args: Any) None 56 | } 57 | class ingestor_state { 58 | +ingestor_id: TEXT 59 | +last_timestamp: INTEGER 60 | +additional_metadata: TEXT 61 | } 62 | 63 | %% Relationships 64 | IngestionPipeline --> Ingestor : uses 65 | IngestionPipeline --> Preprocessor : uses 66 | IngestionPipeline --> Embedder : uses 67 | IngestionPipeline --> VectorStore : uses 68 | IngestionPipeline --> ingestor_state : manages state 69 | 70 | Orchestrator --> Validator : uses 71 | Orchestrator --> Embedder : uses 72 | Orchestrator --> VectorStore : uses 73 | Orchestrator --> PromptBuilder : uses 74 | Orchestrator --> Generator : uses 75 | Orchestrator --> ChatHistoryManager : uses 76 | 77 | PromptBuilder --> ChatHistoryManager : uses history from 78 | 79 | Logger <.. IngestionPipeline : logs 80 | Logger <.. Orchestrator : logs 81 | Logger <.. Ingestor : logs 82 | Logger <.. Preprocessor : logs 83 | Logger <.. Embedder : logs 84 | Logger <.. VectorStore : logs 85 | Logger <.. Validator : logs 86 | Logger <.. PromptBuilder : logs 87 | Logger <.. Generator : logs 88 | Logger <.. ChatHistoryManager : logs 89 | 90 | %% Comments 91 | note for IngestionPipeline "Coordinates ingestion components and manages scheduling." 92 | note for Orchestrator "Manages the query workflow from validation to response generation." 93 | note for ingestor_state "Database table for tracking ingestion progress." 94 | note for ChatHistoryManager "Manages persistent chat sessions and message history." -------------------------------------------------------------------------------- /docs/diagrams/error-handling.mmd: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | participant IngestionPipeline 3 | participant Ingestor 4 | participant Logger 5 | 6 | loop Retry up to 3 times 7 | IngestionPipeline->>Ingestor: fetch_new_data(since=last_timestamp) 8 | alt Fetch succeeds 9 | Ingestor-->>IngestionPipeline: raw_data 10 | IngestionPipeline->>Logger: Log successful fetch 11 | else Fetch fails 12 | Ingestor-->>IngestionPipeline: IngestorError 13 | IngestionPipeline->>Logger: Log error and retry 14 | end 15 | end 16 | alt Max retries exceeded 17 | IngestionPipeline->>Logger: Log critical error, skip cycle 18 | end -------------------------------------------------------------------------------- /docs/diagrams/flow.mmd: -------------------------------------------------------------------------------- 1 | graph TD 2 | Start --> InitializeSystem 3 | InitializeSystem --> LoadConfig 4 | LoadConfig --> CreateDatabaseConnection 5 | CreateDatabaseConnection --> InitializeLogger 6 | InitializeLogger --> InitializeComponents 7 | InitializeComponents --> StartIngestionPipeline 8 | StartIngestionPipeline --> IngestionLoop 9 | IngestionLoop --> FetchData 10 | FetchData --> ProcessData 11 | ProcessData --> EmbedData 12 | EmbedData --> StoreData 13 | StoreData --> UpdateState 14 | UpdateState --> LogIngestion 15 | LogIngestion --> WaitForNextCycle 16 | 17 | InitializeComponents --> ReadyForQueries 18 | ReadyForQueries --> ReceiveQuery 19 | ReceiveQuery --> ValidateQuery 20 | ValidateQuery --> EmbedQuery 21 | EmbedQuery --> RetrieveDocuments 22 | RetrieveDocuments --> BuildPrompt 23 | BuildPrompt --> GenerateResponse 24 | GenerateResponse --> ReturnResponse 25 | ReturnResponse --> LogQuery 26 | 27 | WaitForNextCycle --> IngestionLoop 28 | LogQuery --> ReceiveQuery -------------------------------------------------------------------------------- /docs/diagrams/history.mmd: -------------------------------------------------------------------------------- 1 | classDiagram 2 | class User { 3 | +user_id: string 4 | +source: string 5 | +identifier: string 6 | } 7 | 8 | class Chat { 9 | +chat_id: string 10 | +user_id: string 11 | +title: string 12 | +created_at: datetime 13 | +updated_at: datetime 14 | +message_count: int 15 | +is_pinned: bool 16 | +last_message_preview: string 17 | } 18 | 19 | class Message { 20 | +message_id: string 21 | +chat_id: string 22 | +role: string 23 | +content: string 24 | +created_at: datetime 25 | +metadata: Dict 26 | } 27 | 28 | class ChatHistoryManager { 29 | +create_chat(user_id: str) str 30 | +add_message(chat_id: str, content: str, role: str, metadata: Dict) str 31 | +get_messages(chat_id: str, limit: int) List[Dict] 32 | +list_chats(user_id: str) List[Dict] 33 | +generate_title(chat_id: str) Optional[str] 34 | +rename_chat(chat_id: str, new_title: str) bool 35 | +delete_chat(chat_id: str) bool 36 | +export_chat(chat_id: str, format: str) Any 37 | } 38 | 39 | class JSONStorage { 40 | -base_path: str 41 | +save_chat(chat: Chat) None 42 | +save_message(message: Message) None 43 | +load_chat(chat_id: str) Chat 44 | +load_messages(chat_id: str) List[Message] 45 | +list_user_chats(user_id: str) List[Chat] 46 | +delete_chat(chat_id: str) bool 47 | } 48 | 49 | class DatabaseStorage { 50 | -conn: Connection 51 | +save_chat(chat: Chat) None 52 | +save_message(message: Message) None 53 | +load_chat(chat_id: str) Chat 54 | +load_messages(chat_id: str) List[Message] 55 | +list_user_chats(user_id: str) List[Chat] 56 | +delete_chat(chat_id: str) bool 57 | } 58 | 59 | User "1" --> "many" Chat : has 60 | Chat "1" --> "many" Message : contains 61 | ChatHistoryManager --> JSONStorage : uses 62 | ChatHistoryManager --> DatabaseStorage : uses 63 | 64 | note for User "Represents a user with a unique composite ID" 65 | note for Chat "Represents a single conversation thread" 66 | note for Message "Individual message in a chat session" 67 | note for JSONStorage "Stores chats as JSON files in directory structure" 68 | note for DatabaseStorage "Stores chats in database tables (alternative)" 69 | 70 | %% Example file storage structure 71 | note "JSON Storage Example:\n./chats/{user_id}/{chat_id}.json" as Note1 -------------------------------------------------------------------------------- /docs/diagrams/ingestion.mmd: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | participant IngestionPipeline 3 | participant Database 4 | participant Ingestor 5 | participant Preprocessor 6 | participant Embedder 7 | participant VectorStore 8 | participant Logger 9 | 10 | loop Every Interval 11 | IngestionPipeline->>Database: Retrieve last_timestamp for ingestor_id 12 | Database-->>IngestionPipeline: last_timestamp 13 | IngestionPipeline->>Ingestor: fetch_new_data(since=last_timestamp) 14 | Ingestor-->>IngestionPipeline: raw_data 15 | IngestionPipeline->>Preprocessor: preprocess(raw_data) 16 | Preprocessor-->>IngestionPipeline: processed_data 17 | IngestionPipeline->>Embedder: embed(processed_data['text']) 18 | Embedder-->>IngestionPipeline: vectors 19 | IngestionPipeline->>VectorStore: store_documents(processed_data, vectors) 20 | VectorStore-->>IngestionPipeline: success 21 | IngestionPipeline->>Database: Update last_timestamp for ingestor_id 22 | Database-->>IngestionPipeline: success 23 | IngestionPipeline->>Logger: Log ingestion cycle completion 24 | end -------------------------------------------------------------------------------- /docs/diagrams/initialisation.mmd: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | participant Main 3 | participant ConfigLoader 4 | participant Database 5 | participant Logger 6 | participant Ingestor 7 | participant Preprocessor 8 | participant Embedder 9 | participant VectorStore 10 | participant Validator 11 | participant PromptBuilder 12 | participant Generator 13 | participant IngestionPipeline 14 | participant Orchestrator 15 | 16 | Main->>ConfigLoader: Load configuration from YAML 17 | ConfigLoader-->>Main: config 18 | Main->>Database: Connect to SQLite (config["database"]["path"]) 19 | Database-->>Main: db_connection 20 | Main->>Logger: Initialize Logger (config["logger"]) 21 | Logger-->>Main: logger_instance 22 | Main->>Ingestor: Initialize Ingestor (config["ingestors"]) 23 | Ingestor-->>Main: ingestor_instance 24 | Main->>Preprocessor: Initialize Preprocessor 25 | Preprocessor-->>Main: preprocessor_instance 26 | Main->>Embedder: Initialize Embedder (config["embedder"]) 27 | Embedder-->>Main: embedder_instance 28 | Main->>VectorStore: Initialize VectorStore (config["vector_store"]) 29 | VectorStore-->>Main: vector_store_instance 30 | Main->>Validator: Initialize Validator (config["validator"]) 31 | Validator-->>Main: validator_instance 32 | Main->>PromptBuilder: Initialize PromptBuilder (config["prompt_builder"]) 33 | PromptBuilder-->>Main: prompt_builder_instance 34 | Main->>Generator: Initialize Generator (config["generator"]) 35 | Generator-->>Main: generator_instance 36 | Main->>IngestionPipeline: Initialize IngestionPipeline(ingestor, preprocessor, embedder, vector_store, db_connection, logger, config["ingestion_pipeline"]) 37 | IngestionPipeline-->>Main: ingestion_pipeline_instance 38 | Main->>Orchestrator: Initialize Orchestrator(validator, embedder, vector_store, prompt_builder, generator, logger, config["orchestrator"]) 39 | Orchestrator->>Orchestrator: Configure rules_source and context_filters 40 | Orchestrator->>Orchestrator: Set up retry mechanisms 41 | Orchestrator-->>Main: orchestrator_instance 42 | Main->>IngestionPipeline: Start ingestion in a separate thread 43 | Main->>Orchestrator: Ready to process queries -------------------------------------------------------------------------------- /docs/diagrams/multi-ingestor.mmd: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | participant IngestionPipeline 3 | participant Database 4 | participant Ingestor1 5 | participant Ingestor2 6 | participant Logger 7 | 8 | loop Every Interval 9 | IngestionPipeline->>Database: Retrieve last_timestamp for ingestor_id1 10 | Database-->>IngestionPipeline: last_timestamp1 11 | IngestionPipeline->>Ingestor1: fetch_new_data(since=last_timestamp1) 12 | Ingestor1-->>IngestionPipeline: raw_data1 13 | IngestionPipeline->>Logger: Log ingestion for ingestor_id1 14 | 15 | IngestionPipeline->>Database: Retrieve last_timestamp for ingestor_id2 16 | Database-->>IngestionPipeline: last_timestamp2 17 | IngestionPipeline->>Ingestor2: fetch_new_data(since=last_timestamp2) 18 | Ingestor2-->>IngestionPipeline: raw_data2 19 | IngestionPipeline->>Logger: Log ingestion for ingestor_id2 20 | end -------------------------------------------------------------------------------- /docs/diagrams/query.mmd: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | participant User 3 | participant Orchestrator 4 | participant ChatHistoryManager 5 | participant Validator 6 | participant Embedder 7 | participant VectorStore 8 | participant PromptBuilder 9 | participant Generator 10 | participant Logger 11 | 12 | User->>Orchestrator: process_query(user_id, input, chat_id?) 13 | 14 | alt chat_id is None 15 | Orchestrator->>ChatHistoryManager: create_chat(user_id) 16 | ChatHistoryManager-->>Orchestrator: new_chat_id 17 | else 18 | Note over Orchestrator: Use provided chat_id 19 | end 20 | 21 | Orchestrator->>Orchestrator: get_rules(user_id) 22 | Orchestrator->>Orchestrator: build_context(user_id) 23 | Orchestrator->>Validator: validate(input, context, rules) 24 | Validator-->>Orchestrator: validation_result 25 | 26 | alt validation_result == True 27 | Orchestrator->>ChatHistoryManager: get_messages(chat_id, limit) 28 | ChatHistoryManager-->>Orchestrator: chat_history 29 | 30 | Orchestrator->>Embedder: embed(input) 31 | Embedder-->>Orchestrator: query_vector 32 | 33 | Orchestrator->>VectorStore: search(query_vector, num_results, filters) 34 | VectorStore-->>Orchestrator: documents 35 | 36 | Orchestrator->>PromptBuilder: build_prompt(input, documents, chat_history) 37 | PromptBuilder-->>Orchestrator: prompt 38 | 39 | Orchestrator->>Generator: generate(prompt) 40 | Generator-->>Orchestrator: response 41 | 42 | Orchestrator->>ChatHistoryManager: add_message(chat_id, input, "user") 43 | Orchestrator->>ChatHistoryManager: add_message(chat_id, response, "assistant") 44 | 45 | alt New or short chat 46 | Orchestrator->>ChatHistoryManager: generate_title(chat_id) 47 | ChatHistoryManager-->>Orchestrator: title 48 | end 49 | 50 | Orchestrator-->>User: {"response": response, "chat_id": chat_id} 51 | else 52 | Orchestrator-->>User: {"error": "Access denied"} 53 | end 54 | 55 | Orchestrator->>Logger: Log query processing -------------------------------------------------------------------------------- /docs/diagrams/state-management.mmd: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | participant IngestionPipeline 3 | participant Database 4 | participant Logger 5 | 6 | IngestionPipeline->>Database: SELECT last_timestamp FROM ingestor_state WHERE ingestor_id='id' 7 | Database-->>IngestionPipeline: last_timestamp 8 | IngestionPipeline->>Logger: Log retrieved last_timestamp 9 | %% Ingestion process occurs here... 10 | IngestionPipeline->>Database: UPDATE ingestor_state SET last_timestamp=new_timestamp WHERE ingestor_id='id' 11 | Database-->>IngestionPipeline: success 12 | IngestionPipeline->>Logger: Log state update -------------------------------------------------------------------------------- /docs/diagrams/system.mmd: -------------------------------------------------------------------------------- 1 | graph TD 2 | %% Ingestion Pipeline 3 | subgraph Ingestion Pipeline 4 | Ingestor --> Preprocessor 5 | Preprocessor --> Embedder 6 | Embedder --> VectorStore 7 | IngestionPipeline --> Ingestor 8 | IngestionPipeline --> Preprocessor 9 | IngestionPipeline --> Embedder 10 | IngestionPipeline --> VectorStore 11 | IngestionPipeline --> Database[ingestor_state] 12 | end 13 | 14 | %% Query Pipeline 15 | subgraph Query Pipeline 16 | Orchestrator --> Validator 17 | Orchestrator --> Embedder 18 | Orchestrator --> VectorStore 19 | Orchestrator --> PromptBuilder 20 | Orchestrator --> Generator 21 | end 22 | 23 | %% Shared Components 24 | Logger --> IngestionPipeline 25 | Logger --> Orchestrator 26 | Logger --> Ingestor 27 | Logger --> Preprocessor 28 | Logger --> Embedder 29 | Logger --> VectorStore 30 | Logger --> Validator 31 | Logger --> PromptBuilder 32 | Logger --> Generator 33 | 34 | %% Database 35 | Database --> IngestionPipeline 36 | 37 | %% Notes 38 | classDef pipeline fill:#f9f,stroke:#333,stroke-width:2px; 39 | class IngestionPipeline,Orchestrator pipeline; -------------------------------------------------------------------------------- /docs/guides/prompt_builder.md: -------------------------------------------------------------------------------- 1 | # Prompt Builder Component Guide 2 | 3 | ## Overview 4 | 5 | A Prompt Builder creates the prompt that is sent to a language model for generating responses. It formats queries and retrieved documents into a structured prompt template. 6 | 7 | The Prompt Builder is an optional component - you don't need to implement a custom Prompt Builder when simply connecting a new data source. 8 | 9 | ## Interface 10 | 11 | All prompt builders must implement the `PromptBuilder` interface: 12 | 13 | ```python 14 | class PromptBuilder(ABC): 15 | @abstractmethod 16 | async def initialize(self) -> None: 17 | """Initialize the prompt builder with configuration parameters.""" 18 | pass 19 | 20 | @abstractmethod 21 | async def build_prompt( 22 | self, 23 | query: str, 24 | documents: List[Dict[str, Any]], 25 | **kwargs 26 | ) -> Dict[str, Any]: 27 | """ 28 | Build a prompt from a query and relevant documents. 29 | 30 | Args: 31 | query: The user's query 32 | documents: Relevant documents retrieved for the query 33 | **kwargs: Additional parameters for prompt building 34 | 35 | Returns: 36 | Dict[str, Any]: The built prompt with all necessary components 37 | """ 38 | pass 39 | ``` 40 | 41 | ## Basic Implementation 42 | 43 | A simple prompt builder might combine query and documents: 44 | 45 | ```python 46 | async def build_prompt( 47 | self, 48 | query: str, 49 | documents: List[Dict[str, Any]], 50 | **kwargs 51 | ) -> Dict[str, Any]: 52 | """Build a basic prompt from query and documents.""" 53 | 54 | # Format documents for the prompt 55 | formatted_docs = [] 56 | for i, doc in enumerate(documents): 57 | doc_text = doc.get("text", "") 58 | doc_source = doc.get("metadata", {}).get("source", f"Document {i+1}") 59 | formatted_docs.append(f"Source: {doc_source}\n{doc_text}") 60 | 61 | # Combine documents 62 | context = "\n\n".join(formatted_docs) 63 | 64 | # Build the prompt template 65 | prompt = f""" 66 | Answer the following question based on the provided context. If the answer cannot be determined from the context, say "I don't have enough information to answer this question." 67 | 68 | Context: 69 | {context} 70 | 71 | Question: {query} 72 | 73 | Answer: 74 | """ 75 | 76 | return { 77 | "prompt": prompt, 78 | "template_type": "basic_rag" 79 | } 80 | ``` 81 | 82 | ## Configuration 83 | 84 | In your `config.yaml` file: 85 | 86 | ```yaml 87 | prompt_builders: 88 | default: 89 | template_type: basic_rag 90 | ``` 91 | 92 | ## Conclusion 93 | 94 | The Prompt Builder component formats queries and documents into prompts for language models. For most use cases, the default implementation will be sufficient. 95 | 96 | If you are connecting a new data source, you typically won't need to modify this component at all. 97 | -------------------------------------------------------------------------------- /docs/guides/validator.md: -------------------------------------------------------------------------------- 1 | # Validator Component Guide 2 | 3 | ## Overview 4 | 5 | A Validator checks the quality of responses generated by the system. It ensures that responses are accurate and relevant before being delivered to the user. 6 | 7 | The Validator is an optional component - you don't need to implement a custom Validator when simply connecting a new data source. 8 | 9 | ## Interface 10 | 11 | All validators must implement the `Validator` interface: 12 | 13 | ```python 14 | class Validator(ABC): 15 | @abstractmethod 16 | async def initialize(self) -> None: 17 | """Initialize the validator with configuration parameters.""" 18 | pass 19 | 20 | @abstractmethod 21 | async def validate( 22 | self, 23 | query: str, 24 | response: Dict[str, Any], 25 | documents: List[Dict[str, Any]], 26 | **kwargs 27 | ) -> Dict[str, Any]: 28 | """ 29 | Validate the generated response against the query and documents. 30 | 31 | Args: 32 | query: The user's original query 33 | response: The generated response to validate 34 | documents: The documents used to generate the response 35 | **kwargs: Additional parameters for validation 36 | 37 | Returns: 38 | Dict[str, Any]: Validation result with potentially modified response 39 | """ 40 | pass 41 | ``` 42 | 43 | ## Basic Implementation 44 | 45 | A simple validator might check relevance and add validation metadata: 46 | 47 | ```python 48 | async def validate( 49 | self, 50 | query: str, 51 | response: Dict[str, Any], 52 | documents: List[Dict[str, Any]], 53 | **kwargs 54 | ) -> Dict[str, Any]: 55 | """Basic validation of response.""" 56 | 57 | # Extract the answer 58 | answer = response.get("answer", "") 59 | 60 | # Simple validation check 61 | is_valid = len(answer) > 0 62 | 63 | # Create modified response with validation metadata 64 | modified_response = response.copy() 65 | if "metadata" not in modified_response: 66 | modified_response["metadata"] = {} 67 | 68 | modified_response["metadata"]["validation"] = { 69 | "status": "passed" if is_valid else "failed" 70 | } 71 | 72 | # Return validation result 73 | result = { 74 | "is_valid": is_valid, 75 | "modified_response": modified_response, 76 | "warnings": [] if is_valid else ["Empty response detected"] 77 | } 78 | 79 | return result 80 | ``` 81 | 82 | ## Configuration 83 | 84 | In your `config.yaml` file, you can add basic configuration: 85 | 86 | ```yaml 87 | validators: 88 | default: 89 | enabled: true 90 | ``` 91 | 92 | ## Conclusion 93 | 94 | The Validator component performs basic quality checks on generated responses. For most use cases, the default implementation will be sufficient. 95 | 96 | If you are connecting a new data source, you typically won't need to modify this component at all. 97 | -------------------------------------------------------------------------------- /docs/prompts.md: -------------------------------------------------------------------------------- 1 | # Effective Prompts for Working with ICI Core 2 | 3 | This guide provides detailed prompts that you can use when interacting with AI assistants to help you work with the ICI Core framework. Copy and paste these prompts to get targeted assistance with understanding the codebase or developing custom components. 4 | 5 | ## Understanding the Codebase 6 | 7 | ``` 8 | I want to understand the ICI Core codebase architecture. Please explain: 9 | 10 | 1. The overall architecture and how components interact 11 | 2. The main data flow from ingestion to response generation 12 | 3. The purpose of each major component (Ingestor, Preprocessor, Embedder, Vector Store, Generator, etc.) 13 | 4. How configuration works via config.yaml 14 | 5. Which interfaces need to be implemented when creating custom components 15 | 16 | Please reference specific files like core interfaces in ici/core/interfaces/ and example implementations in ici/adapters/ when explaining. 17 | ``` 18 | 19 | ``` 20 | Please analyze the DefaultIngestionPipeline in ici/core/pipelines/default.py and explain how data flows through the ingestion process step by step. Show how the data is transformed from raw input to database storage. 21 | ``` 22 | 23 | ## Building Custom Ingestors and Preprocessors 24 | 25 | ``` 26 | I want to build a custom ingestor and preprocessor for [XYZ] (replace with your specific data source like Twitter, Notion, Slack, etc.). 27 | 28 | For the ingestor: 29 | 1. Which interface must I implement? (Please reference ici/core/interfaces/ingestor.py) 30 | 2. What methods need to be implemented and what should each return? 31 | 3. How should I handle authentication with [XYZ]? 32 | 4. What's the expected format of the data I should return? 33 | 5. Where should I place my custom ingestor file? 34 | 6. How do I configure it in config.yaml? 35 | 36 | For the preprocessor: 37 | 1. Which interface must I implement? (Please reference ici/core/interfaces/preprocessor.py) 38 | 2. How should I structure my preprocess method to convert [XYZ] data into standard documents? 39 | 3. What metadata should I include in each document? 40 | 4. How do I handle message grouping or chunking? 41 | 5. Where should I place my custom preprocessor file? 42 | 6. How do I configure it in config.yaml? 43 | 44 | Please provide code examples that I can adapt. 45 | ``` 46 | 47 | ``` 48 | I'm implementing a custom [XYZ] ingestor and I'm stuck on the retrieve_data method. I need to fetch [specific data type] from [XYZ API/data source]. Can you help me write this method with proper error handling and pagination? Here's what I have so far: 49 | 50 | [paste your code here] 51 | ``` 52 | 53 | ## Building a Custom Generator 54 | 55 | ``` 56 | I want to implement a custom Generator that uses [model/API of your choice, e.g., Claude, LLaMA, Gemini, etc.]. 57 | 58 | 1. Please explain the Generator interface from ici/core/interfaces/generator.py 59 | 2. Walk me through how to implement the required initialize() and generate() methods 60 | 3. How should I handle token limits and context windows? 61 | 4. How do I integrate this with the prompt template system? 62 | 5. How do I configure authentication and API settings in config.yaml? 63 | 6. Where should I place my custom generator file? 64 | 65 | Can you provide a code example with proper error handling and logging? 66 | ``` 67 | 68 | ``` 69 | I'm building a custom Generator that needs to support streaming responses. How can I implement this functionality while adhering to the Generator interface? Please provide examples of how to: 70 | 71 | 1. Structure the generate() method to support streaming 72 | 2. Handle errors during generation 73 | 3. Configure stream parameters in config.yaml 74 | ``` 75 | 76 | ## Debugging Common Issues 77 | 78 | ``` 79 | I'm encountering an error when running my ingestion pipeline with my custom [Component]. The error message is: 80 | 81 | [paste error message here] 82 | 83 | My implementation looks like: 84 | 85 | [paste relevant code here] 86 | 87 | How can I diagnose and fix this issue? 88 | ``` 89 | 90 | ## Extending the System 91 | 92 | ``` 93 | I want to extend the ICI Core system to add [feature/functionality]. Which components should I modify or extend? Are there existing interfaces I should use? Please provide guidance on the best approach and reference relevant files in the codebase. 94 | ``` 95 | 96 | ## Optimizing Performance 97 | 98 | ``` 99 | I'm experiencing performance issues with my [specific component, e.g., vector store, embedder, etc.]. The system is [describe issues, e.g., slow to respond, using excessive memory]. How can I optimize my implementation while still adhering to the required interfaces? 100 | ``` 101 | 102 | ## Testing Components 103 | 104 | ``` 105 | How should I properly test my custom [Component Type]? Please provide examples of: 106 | 107 | 1. Unit tests for each method 108 | 2. Integration tests with other components 109 | 3. Approaches for mocking dependencies 110 | 4. Performance benchmarking 111 | 112 | Please reference testing patterns used elsewhere in the codebase. 113 | ``` 114 | 115 | --- 116 | 117 | When using these prompts, replace the placeholders (text in brackets) with your specific details. For more information on each component, refer to the corresponding guide in the `docs/guides/` directory. -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | This directory contains example scripts demonstrating the usage of the ICI framework components. 4 | 5 | ## Vector Store Query Tools 6 | 7 | This directory contains example scripts for querying the ChromaDB vector store where Telegram messages are stored. 8 | 9 | ### Basic Query Script 10 | 11 | The `query_vectorstore_example.py` script provides a simple interface for querying the vector store: 12 | 13 | ```bash 14 | # Basic query 15 | python query_vectorstore_example.py "your search query" 16 | 17 | # Specify number of results 18 | python query_vectorstore_example.py "your search query" --top_k 10 19 | 20 | # Filter by conversation 21 | python query_vectorstore_example.py "your search query" --conversation_id "123456789" 22 | ``` 23 | 24 | ### Advanced Query Tool 25 | 26 | The `advanced_vector_query.py` script offers more advanced features: 27 | 28 | ```bash 29 | # Basic search 30 | python advanced_vector_query.py "your search query" 31 | 32 | # List all documents 33 | python advanced_vector_query.py --list_all --top_k 20 34 | 35 | # Filter by conversation and date range 36 | python advanced_vector_query.py "your search query" --conversation_id "123456789" --date_from "2025-01-01" --date_to "2025-03-20" 37 | 38 | # Get conversation context around a specific message 39 | python advanced_vector_query.py --message_id "message_uuid" --context_window 5 40 | 41 | # Export results to JSON 42 | python advanced_vector_query.py "your search query" --format json --export results.json 43 | 44 | # Export results to CSV 45 | python advanced_vector_query.py "your search query" --export results.csv 46 | 47 | # Show full text in results 48 | python advanced_vector_query.py "your search query" --full_text 49 | ``` 50 | 51 | ## Telegram Ingestion Pipeline 52 | 53 | The examples also include scripts for running the Telegram ingestion pipeline: 54 | 55 | ### Single Run Pipeline 56 | 57 | The `async_telegram_pipeline_example.py` script demonstrates a single run of the Telegram ingestion pipeline: 58 | 59 | ```bash 60 | # Run the pipeline once 61 | python async_telegram_pipeline_example.py 62 | ``` 63 | 64 | ### Scheduled Pipeline 65 | 66 | The `scheduled_telegram_pipeline_example.py` script demonstrates scheduled ingestion with the Telegram pipeline: 67 | 68 | ```bash 69 | # Register the ingestor (first-time use) and start scheduled ingestion 70 | python scheduled_telegram_pipeline_example.py --register 71 | 72 | # Start scheduled ingestion (if already registered) 73 | python scheduled_telegram_pipeline_example.py 74 | 75 | # Just run once (don't schedule) 76 | python scheduled_telegram_pipeline_example.py --run-once 77 | ``` 78 | 79 | The scheduled pipeline will run at the interval specified in `config.yaml` (under `pipelines.telegram.schedule.interval_minutes`). 80 | 81 | ## Usage Notes 82 | 83 | - These scripts use the configuration from `config.yaml` in the project root. 84 | - Vector store queries require that data has been ingested using one of the pipeline examples. 85 | - For date filtering, use ISO format dates (YYYY-MM-DD). 86 | - The scheduled pipeline can be stopped with Ctrl+C, which will gracefully shut down the scheduler. 87 | 88 | ## Examples 89 | 90 | **Search for messages about a specific topic:** 91 | ```bash 92 | python advanced_vector_query.py "cryptocurrency market updates" 93 | ``` 94 | 95 | **Find all messages from a specific conversation mentioning a topic:** 96 | ```bash 97 | python advanced_vector_query.py "login code" --conversation_id "777000" 98 | ``` 99 | 100 | **Export all messages from the last month:** 101 | ```bash 102 | python advanced_vector_query.py --list_all --date_from "2025-02-20" --export recent_messages.csv 103 | ``` 104 | 105 | **Get the conversation context around a specific message:** 106 | ```bash 107 | python advanced_vector_query.py --message_id "d290f1ee-6c54-4b01-90e6-d701748f0851" --context_window 3 108 | ``` -------------------------------------------------------------------------------- /examples/chat_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Chat system example for the ICI framework. 4 | 5 | This script demonstrates how to use the DefaultOrchestrator for 6 | multi-turn conversations with chat history. 7 | """ 8 | 9 | import os 10 | import sys 11 | import asyncio 12 | import time 13 | from typing import Dict, Any, List 14 | import json 15 | 16 | # Set up path to find ICI modules 17 | current_dir = os.path.dirname(os.path.abspath(__file__)) 18 | parent_dir = os.path.dirname(current_dir) 19 | if parent_dir not in sys.path: 20 | sys.path.insert(0, parent_dir) 21 | 22 | # Import the orchestrator 23 | try: 24 | from ici.adapters import DefaultOrchestrator 25 | except ImportError as e: 26 | print(f"Error importing ICI: {e}") 27 | sys.exit(1) 28 | 29 | async def main(): 30 | """ 31 | Main chat example function. 32 | 33 | This demonstrates multi-turn conversation capabilities 34 | using the DefaultOrchestrator to manage chat state and context. 35 | """ 36 | print("Initializing orchestrator...") 37 | 38 | # Initialize orchestrator 39 | orchestrator = DefaultOrchestrator(logger_name="chat_example") 40 | await orchestrator.initialize() 41 | 42 | print("Orchestrator initialized successfully.") 43 | 44 | # User ID and source 45 | user_id = "example_user_1" 46 | source = "example" 47 | 48 | # Welcome the user 49 | print("\nWelcome to the ICI Chat Example!") 50 | print("This demonstrates a multi-turn conversation with history.") 51 | print("Type 'exit' or 'quit' to end the conversation.") 52 | print("Type '/new' to start a new conversation.") 53 | print("Type '/help' for help.\n") 54 | 55 | while True: 56 | # Get user input 57 | user_input = input("You: ").strip() 58 | 59 | # Check for exit command 60 | if user_input.lower() in ("exit", "quit"): 61 | break 62 | 63 | # Process the query 64 | try: 65 | response = await orchestrator.process_query( 66 | source=source, 67 | user_id=user_id, 68 | query=user_input, 69 | additional_info={} 70 | ) 71 | 72 | # Print response 73 | print(f"\nAssistant: {response}\n") 74 | 75 | except Exception as e: 76 | print(f"Error processing query: {e}") 77 | 78 | print("Chat example completed.") 79 | 80 | if __name__ == "__main__": 81 | # Set config path environment variable if needed 82 | if len(sys.argv) > 1: 83 | os.environ["ICI_CONFIG_PATH"] = sys.argv[1] 84 | 85 | # Run the example 86 | asyncio.run(main()) -------------------------------------------------------------------------------- /examples/config_utils_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Example script demonstrating how to use the configuration utilities. 4 | 5 | This script shows how to: 6 | 1. Load the full configuration from a file 7 | 2. Get configuration for a specific component 8 | 3. Handle different error cases 9 | """ 10 | 11 | import os 12 | import sys 13 | import json 14 | 15 | from ici.utils.config import load_config, get_component_config 16 | from ici.core.exceptions import ConfigurationError 17 | from ici.adapters.loggers import StructuredLogger 18 | 19 | 20 | # Setup logging 21 | logger = StructuredLogger(name="example.config") 22 | 23 | 24 | def pretty_print_json(data, title=None): 25 | """Print data as formatted JSON with optional title.""" 26 | if title: 27 | print(f"\n{title}") 28 | print("=" * len(title)) 29 | print(json.dumps(data, indent=2)) 30 | print() 31 | 32 | 33 | def main(): 34 | """Demonstrate the configuration utilities.""" 35 | print("Configuration Utilities Example") 36 | print("-------------------------------") 37 | 38 | # Ensure we have a config file to work with 39 | if not os.path.exists("config.yaml"): 40 | print("Creating sample config.yaml file...") 41 | with open("config.yaml", "w") as f: 42 | f.write("""# ICI Framework Configuration 43 | 44 | # Telegram Ingestor Configuration 45 | telegram: 46 | api_id: "YOUR_API_ID_HERE" 47 | api_hash: "YOUR_API_HASH_HERE" 48 | phone_number: "+12345678901" 49 | session_file: "telegram_session" 50 | request_delay: 1.0 51 | 52 | # Vector Store Configuration 53 | vector_store: 54 | type: "chroma" 55 | collection_name: "example_collection" 56 | persist_directory: "./data/chroma_db" 57 | 58 | # Embedder Configuration 59 | embedder: 60 | model_name: "sentence-transformers/all-MiniLM-L6-v2" 61 | device: "cpu" 62 | batch_size: 32 63 | """) 64 | print("Sample config.yaml file created.") 65 | 66 | try: 67 | # Example 1: Load the entire configuration 68 | print("\nExample 1: Loading the entire configuration") 69 | full_config = load_config() 70 | print(f"Successfully loaded configuration with {len(full_config)} sections:") 71 | for section in full_config.keys(): 72 | print(f" - {section}") 73 | 74 | # Example 2: Get configuration for a specific component 75 | print("\nExample 2: Getting configuration for the vector store component") 76 | try: 77 | vector_store_config = get_component_config("vector_store") 78 | pretty_print_json(vector_store_config, "Vector Store Configuration") 79 | except ConfigurationError as e: 80 | print(f"Error: {e}") 81 | 82 | # Example 3: Get configuration for a non-existent component 83 | print("\nExample 3: Getting configuration for a non-existent component") 84 | try: 85 | nonexistent_config = get_component_config("nonexistent_component") 86 | pretty_print_json(nonexistent_config, "Non-existent Component Configuration") 87 | print("Note: Returns an empty dictionary for non-existent components") 88 | except ConfigurationError as e: 89 | print(f"Error: {e}") 90 | 91 | # Example 4: Load configuration from a custom path 92 | print("\nExample 4: Loading configuration from a custom path") 93 | custom_config_path = "custom_config.yaml" 94 | try: 95 | # Create a custom config for demonstration 96 | with open(custom_config_path, "w") as f: 97 | f.write(""" 98 | custom_component: 99 | setting1: "value1" 100 | setting2: 42 101 | """) 102 | 103 | # Load the custom configuration 104 | custom_config = load_config(custom_config_path) 105 | pretty_print_json(custom_config, "Custom Configuration") 106 | 107 | # Clean up the temporary file 108 | os.remove(custom_config_path) 109 | 110 | except ConfigurationError as e: 111 | print(f"Error: {e}") 112 | 113 | # Example 5: Error handling for missing file 114 | print("\nExample 5: Error handling for missing file") 115 | try: 116 | missing_config = load_config("nonexistent_file.yaml") 117 | pretty_print_json(missing_config, "Missing File Configuration") 118 | except ConfigurationError as e: 119 | print(f"Error correctly handled: {e}") 120 | 121 | print("\nExample completed successfully!") 122 | return 0 123 | 124 | except Exception as e: 125 | print(f"Unexpected error: {e}") 126 | return 1 127 | 128 | 129 | if __name__ == "__main__": 130 | sys.exit(main()) -------------------------------------------------------------------------------- /examples/default_pipeline_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Default Ingestion Pipeline Example 4 | 5 | This script demonstrates how to use the DefaultIngestionPipeline with 6 | both WhatsApp and Telegram ingestors. 7 | 8 | The pipeline handles: 9 | 1. Loading and initializing all components for both ingestors 10 | 2. Tracking state separately for each ingestor 11 | 3. Fetching data from each ingestor based on its latest state 12 | 4. Processing the data through the appropriate preprocessor 13 | 5. Generating embeddings and storing in a vector database 14 | 15 | Usage: 16 | python default_pipeline_example.py [--config-path CONFIG_PATH] [--ingestor-id INGESTOR_ID] [--full] [--verbose] 17 | """ 18 | 19 | import os 20 | import sys 21 | import asyncio 22 | import argparse 23 | import logging 24 | from datetime import datetime, timezone 25 | 26 | # Add the project root to the Python path 27 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 28 | 29 | from ici.adapters.loggers import StructuredLogger 30 | from ici.adapters.pipelines import DefaultIngestionPipeline 31 | 32 | 33 | async def main(): 34 | # Parse command line arguments 35 | parser = argparse.ArgumentParser(description="Run the Default Ingestion Pipeline") 36 | parser.add_argument("--config-path", default="config.yaml", help="Path to configuration file") 37 | parser.add_argument("--ingestor-id", help="Specific ingestor ID to run (optional)") 38 | parser.add_argument("--full", action="store_true", help="Force a full ingestion run") 39 | parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") 40 | args = parser.parse_args() 41 | 42 | # Set environment variable for config path 43 | os.environ["ICI_CONFIG_PATH"] = args.config_path 44 | 45 | # Setup logging 46 | logger = StructuredLogger(name="default_pipeline_example") 47 | 48 | # Set verbose logging if requested 49 | if args.verbose: 50 | # Configure root logger with more detail for debugging 51 | logging.basicConfig(level=logging.INFO) 52 | # Enable debug logs for aiohttp specifically 53 | logging.getLogger('aiohttp').setLevel(logging.DEBUG) 54 | 55 | try: 56 | # Create and initialize the pipeline 57 | pipeline = DefaultIngestionPipeline() 58 | await pipeline.initialize() 59 | 60 | # Run health check before ingestion 61 | health_info = await pipeline.healthcheck() 62 | logger.info({ 63 | "action": "HEALTH_CHECK", 64 | "message": "Pipeline health check result", 65 | "data": health_info 66 | }) 67 | 68 | if args.ingestor_id: 69 | # Run ingestion for a specific ingestor 70 | logger.info({ 71 | "action": "RUN_SPECIFIC_INGESTOR", 72 | "message": f"Starting ingestion for {args.ingestor_id}", 73 | "data": {"ingestor_id": args.ingestor_id} 74 | }) 75 | 76 | # Check if we should force a fresh start 77 | if args.full: 78 | # Reset the state to 0 for a full run 79 | logger.info({ 80 | "action": "RESET_STATE", 81 | "message": f"Resetting state for {args.ingestor_id} to perform full ingestion" 82 | }) 83 | pipeline.set_ingestor_state( 84 | ingestor_id=args.ingestor_id, 85 | last_timestamp=0, 86 | additional_metadata={ 87 | "reset_time": datetime.now(timezone.utc).isoformat(), 88 | "reset_reason": "Manual full ingestion requested" 89 | } 90 | ) 91 | 92 | # Run ingestion for the specific ingestor 93 | result = await pipeline.run_ingestion(args.ingestor_id) 94 | 95 | # Print results 96 | logger.info({ 97 | "action": "INGESTION_RESULT", 98 | "message": f"Ingestion result for {args.ingestor_id}", 99 | "data": { 100 | "success": result["success"], 101 | "documents_processed": result.get("documents_processed", 0), 102 | "errors": result.get("errors", []), 103 | "duration": result.get("duration", 0) 104 | } 105 | }) 106 | 107 | # Show the updated state 108 | state = pipeline.get_ingestor_state(args.ingestor_id) 109 | logger.info({ 110 | "action": "UPDATED_STATE", 111 | "message": f"Updated state for {args.ingestor_id}", 112 | "data": state 113 | }) 114 | 115 | else: 116 | # Run ingestion for all registered ingestors 117 | logger.info({ 118 | "action": "RUN_ALL_INGESTORS", 119 | "message": "Starting ingestion for all registered ingestors" 120 | }) 121 | 122 | await pipeline.start() 123 | 124 | logger.info({ 125 | "action": "INGESTION_COMPLETE", 126 | "message": "Completed ingestion for all ingestors" 127 | }) 128 | 129 | # Clean up resources 130 | await pipeline.close() 131 | 132 | except Exception as e: 133 | logger.error({ 134 | "action": "PIPELINE_ERROR", 135 | "message": f"Error running default pipeline: {str(e)}", 136 | "data": {"error": str(e), "error_type": type(e).__name__} 137 | }) 138 | sys.exit(1) 139 | 140 | if __name__ == "__main__": 141 | asyncio.run(main()) -------------------------------------------------------------------------------- /examples/query_vectorstore_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Example script that demonstrates how to query the ChromaDB vector store. 4 | 5 | This script: 6 | 1. Loads the configuration from config.yaml 7 | 2. Initializes the ChromaDB vector store and the sentence transformer embedder 8 | 3. Queries the vector store using a text query 9 | 4. Displays the results 10 | """ 11 | 12 | import os 13 | import asyncio 14 | import argparse 15 | from typing import List, Dict, Any 16 | 17 | from ici.adapters.vector_stores.chroma import ChromaDBStore 18 | from ici.adapters.embedders.sentence_transformer import SentenceTransformerEmbedder 19 | from ici.utils.config import load_config 20 | from ici.adapters.loggers import StructuredLogger 21 | 22 | # Set up logger 23 | logger = StructuredLogger(name="vector_store_query") 24 | 25 | async def query_vector_store(query: str, top_k: int = 5, filters: Dict[str, Any] = None) -> List[Dict[str, Any]]: 26 | """ 27 | Query the vector store with a text query. 28 | 29 | Args: 30 | query: The text query to search for 31 | top_k: Number of results to return 32 | filters: Optional metadata filters to apply to the search 33 | 34 | Returns: 35 | List of documents matching the query 36 | """ 37 | # Initialize the vector store 38 | vector_store = ChromaDBStore() 39 | await vector_store.initialize() 40 | 41 | # Initialize the embedder 42 | embedder = SentenceTransformerEmbedder() 43 | await embedder.initialize() 44 | 45 | # Generate embedding for the query 46 | query_embedding, _ = await embedder.embed(query) 47 | 48 | # Search the vector store 49 | logger.info({ 50 | "action": "QUERY_VECTOR_STORE", 51 | "message": f"Searching for: '{query}'", 52 | "data": {"top_k": top_k, "filters": filters} 53 | }) 54 | 55 | results = vector_store.search( 56 | query_vector=query_embedding, 57 | num_results=top_k, 58 | filters=filters 59 | ) 60 | 61 | logger.info({ 62 | "action": "QUERY_RESULTS", 63 | "message": f"Found {len(results)} results", 64 | "data": {"result_count": len(results)} 65 | }) 66 | 67 | return results 68 | 69 | def format_results(results: List[Dict[str, Any]]) -> None: 70 | """ 71 | Format and print the search results. 72 | 73 | Args: 74 | results: The search results from the vector store 75 | """ 76 | print(f"\n{'='*80}\n{'SEARCH RESULTS':^80}\n{'='*80}") 77 | 78 | for i, result in enumerate(results, 1): 79 | print(f"\n--- Result {i} ---") 80 | print(f"Document ID: {result.get('id', 'N/A')}") 81 | print(f"Score: {result.get('score', 0):.4f}") 82 | 83 | # Print metadata 84 | print("\nMetadata:") 85 | metadata = result.get('metadata', {}) 86 | for key, value in metadata.items(): 87 | if key != 'text': # Avoid duplicating the text content 88 | print(f" {key}: {value}") 89 | 90 | # Print the text content 91 | print("\nContent:") 92 | print(f"{result.get('text', 'No content available')[:500]}...") 93 | 94 | if i < len(results): 95 | print("\n" + "-"*80) 96 | 97 | print("\n" + "="*80) 98 | 99 | async def main(): 100 | # Parse command line arguments 101 | parser = argparse.ArgumentParser(description="Query the ChromaDB vector store") 102 | parser.add_argument("query", help="The text query to search for") 103 | parser.add_argument("--top_k", type=int, default=5, help="Number of results to return") 104 | parser.add_argument("--conversation_id", help="Filter by conversation ID") 105 | parser.add_argument("--date_from", help="Filter by date (format: YYYY-MM-DD)") 106 | 107 | args = parser.parse_args() 108 | 109 | # Prepare filters based on command line arguments 110 | filters = {} 111 | if args.conversation_id: 112 | filters["conversation_id"] = args.conversation_id 113 | 114 | # Execute the query 115 | results = await query_vector_store(args.query, args.top_k, filters if filters else None) 116 | 117 | # Format and display the results 118 | format_results(results) 119 | 120 | if __name__ == "__main__": 121 | # Ensure ICI_CONFIG_PATH is set to use the config.yaml in the current directory 122 | if not os.environ.get("ICI_CONFIG_PATH"): 123 | os.environ["ICI_CONFIG_PATH"] = os.path.join(os.getcwd(), "config.yaml") 124 | 125 | # Run the main async function 126 | asyncio.run(main()) -------------------------------------------------------------------------------- /examples/sentence_transformer_embedder_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Example demonstrating the use of the SentenceTransformerEmbedder. 4 | 5 | This script initializes the SentenceTransformerEmbedder and uses it to generate 6 | embeddings for sample texts, demonstrating both single and batch embedding. 7 | """ 8 | 9 | import os 10 | import asyncio 11 | import yaml 12 | from pathlib import Path 13 | 14 | from ici.adapters import SentenceTransformerEmbedder 15 | from ici.adapters.loggers import StructuredLogger 16 | 17 | 18 | async def main(): 19 | """Run the example script.""" 20 | # Create a logger 21 | logger = StructuredLogger( 22 | name="embedder_example", 23 | log_level="INFO" 24 | ) 25 | 26 | # Create configuration directory and file if they don't exist 27 | config_dir = Path("config") 28 | config_dir.mkdir(exist_ok=True) 29 | 30 | config_path = config_dir / "embedders.yaml" 31 | 32 | # Create example config if it doesn't exist 33 | if not config_path.exists(): 34 | config = { 35 | "embedders": { 36 | "sentence_transformer": { 37 | "model_name": "all-MiniLM-L6-v2" # Small, fast model for demonstration 38 | } 39 | } 40 | } 41 | 42 | with open(config_path, "w") as f: 43 | yaml.dump(config, f, default_flow_style=False) 44 | 45 | logger.info(f"Created example config at {config_path}") 46 | 47 | # Create the embedder 48 | embedder = SentenceTransformerEmbedder(logger_name="sentence_transformer_example") 49 | 50 | # Initialize the embedder 51 | logger.info("Initializing embedder...") 52 | await embedder.initialize() 53 | 54 | # Display embedder details 55 | logger.info(f"Embedder initialized with model: {embedder._model_name}") 56 | logger.info(f"Embedding dimensions: {embedder.dimensions}") 57 | 58 | # Check health 59 | health = embedder.healthcheck() 60 | logger.info(f"Embedder health: {health['healthy']} - {health['message']}") 61 | 62 | # Sample text for embedding 63 | sample_text = "The quick brown fox jumps over the lazy dog." 64 | 65 | # Generate embedding for a single text 66 | logger.info(f"Generating embedding for: '{sample_text}'") 67 | embedding, metadata = await embedder.embed(sample_text) 68 | 69 | # Display results 70 | logger.info(f"Embedding generated: {len(embedding)} dimensions") 71 | logger.info(f"Metadata: {metadata}") 72 | logger.info(f"First 5 dimensions: {embedding[:5]}") 73 | 74 | # Batch embedding example 75 | batch_texts = [ 76 | "Artificial intelligence is transforming the world.", 77 | "Machine learning models learn from data.", 78 | "Natural language processing helps computers understand human language.", 79 | "", # Empty text to demonstrate handling 80 | ] 81 | 82 | logger.info(f"Generating embeddings for batch of {len(batch_texts)} texts...") 83 | batch_results = await embedder.embed_batch(batch_texts) 84 | 85 | # Display batch results 86 | for i, (embedding, metadata) in enumerate(batch_results): 87 | logger.info(f"Text {i+1} embedding: {len(embedding)} dimensions") 88 | logger.info(f"Text {i+1} metadata: {metadata}") 89 | 90 | logger.info("Example completed successfully.") 91 | 92 | 93 | if __name__ == "__main__": 94 | asyncio.run(main()) -------------------------------------------------------------------------------- /examples/structured_logger_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Example script demonstrating the use of the StructuredLogger implementation. 4 | 5 | This script shows how to use the StructuredLogger to create well-structured 6 | log entries with action names, messages, and additional data. 7 | """ 8 | 9 | import sys 10 | import os 11 | import time 12 | 13 | # Add the parent directory to the path to allow importing the ici package 14 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 15 | 16 | from ici.adapters.loggers import StructuredLogger 17 | 18 | 19 | def example_function(): 20 | """Example function that demonstrates logging various events.""" 21 | # Create a logger that logs to both console and file 22 | logger = StructuredLogger( 23 | name="example", 24 | level="DEBUG", 25 | log_file="examples/logs/structured.log", 26 | console_output=True, 27 | ) 28 | 29 | # Log a simple debug message 30 | logger.debug( 31 | { 32 | "action": "FUNCTION_START", 33 | "message": "Starting example function", 34 | "data": {"timestamp": time.time()}, 35 | } 36 | ) 37 | 38 | # Log an info message with additional data 39 | logger.info( 40 | { 41 | "action": "CONFIG_LOADED", 42 | "message": "Configuration loaded successfully", 43 | "data": { 44 | "config_file": "config.json", 45 | "settings": {"max_connections": 100, "timeout": 30}, 46 | }, 47 | } 48 | ) 49 | 50 | # Log a warning message 51 | logger.warning( 52 | { 53 | "action": "RESOURCE_LOW", 54 | "message": "System resources are running low", 55 | "data": {"memory_usage": "85%", "cpu_usage": "78%"}, 56 | } 57 | ) 58 | 59 | # Log an error message with exception 60 | try: 61 | # Simulate an error 62 | result = 1 / 0 63 | except Exception as e: 64 | logger.error( 65 | { 66 | "action": "CALCULATION_FAILED", 67 | "message": "Failed to perform calculation", 68 | "data": {"operation": "division", "parameters": [1, 0]}, 69 | "exception": e, 70 | } 71 | ) 72 | 73 | # Log a critical message 74 | logger.critical( 75 | { 76 | "action": "SERVICE_UNAVAILABLE", 77 | "message": "Critical dependency unavailable", 78 | "data": {"service": "database", "attempts": 5, "retry_in": 60}, 79 | } 80 | ) 81 | 82 | # Log the end of the function 83 | logger.info( 84 | { 85 | "action": "FUNCTION_END", 86 | "message": "Example function completed", 87 | "data": {"duration_ms": 123}, 88 | } 89 | ) 90 | 91 | 92 | def database_example(): 93 | """Example function demonstrating structured logging for database operations.""" 94 | logger = StructuredLogger(name="example.database") 95 | 96 | # Log a database connection 97 | logger.info( 98 | { 99 | "action": "DB_CONNECT", 100 | "message": "Connected to database", 101 | "data": { 102 | "host": "localhost", 103 | "database": "users", 104 | "connection_id": "conn-123456", 105 | }, 106 | } 107 | ) 108 | 109 | # Log a database query 110 | logger.debug( 111 | { 112 | "action": "DB_QUERY", 113 | "message": "Executing database query", 114 | "data": { 115 | "query": "SELECT * FROM users WHERE status = ?", 116 | "parameters": ["active"], 117 | "query_id": "q-987654", 118 | }, 119 | } 120 | ) 121 | 122 | # Log a slow query warning 123 | logger.warning( 124 | { 125 | "action": "DB_SLOW_QUERY", 126 | "message": "Query execution time exceeded threshold", 127 | "data": { 128 | "query_id": "q-987654", 129 | "execution_time_ms": 1520, 130 | "threshold_ms": 1000, 131 | }, 132 | } 133 | ) 134 | 135 | # Log a database disconnect 136 | logger.info( 137 | { 138 | "action": "DB_DISCONNECT", 139 | "message": "Disconnected from database", 140 | "data": {"connection_id": "conn-123456", "duration_s": 35}, 141 | } 142 | ) 143 | 144 | 145 | if __name__ == "__main__": 146 | # Create the logs directory if it doesn't exist 147 | os.makedirs("examples/logs", exist_ok=True) 148 | 149 | # Run the examples 150 | print("Running structured logging examples...") 151 | example_function() 152 | database_example() 153 | 154 | print("\nExamples completed. Check logs directory for file output.") 155 | print("The logs are in JSON format for easy parsing and analysis.") 156 | -------------------------------------------------------------------------------- /examples/telegram_debug.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Example script to run Telegram ingestion with debug mode enabled. 4 | This will fetch just one message and print detailed diagnostics about it. 5 | """ 6 | 7 | import os 8 | import asyncio 9 | import logging 10 | import yaml 11 | from datetime import datetime, timedelta 12 | 13 | from ici.adapters.ingestors.telegram import TelegramIngestor 14 | from ici.adapters.pipelines.telegram import TelegramIngestionPipeline 15 | from ici.adapters.storage.memory import InMemoryStorage 16 | 17 | # Configure logging 18 | logging.basicConfig( 19 | level=logging.INFO, 20 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" 21 | ) 22 | logger = logging.getLogger("telegram_debug") 23 | 24 | async def run_debug(): 25 | """Run a single ingestion with debug mode enabled""" 26 | 27 | # Load configuration 28 | config_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'config.yaml') 29 | with open(config_path, 'r') as f: 30 | config = yaml.safe_load(f) 31 | 32 | telegram_config = config.get('telegram', {}) 33 | 34 | # Create storage 35 | storage = InMemoryStorage(logger=logger) 36 | 37 | # Create ingestor with short timeframe 38 | now = datetime.now() 39 | start_date = now - timedelta(days=3) # Last 3 days 40 | 41 | ingestor = TelegramIngestor( 42 | api_id=telegram_config.get('api_id'), 43 | api_hash=telegram_config.get('api_hash'), 44 | phone=telegram_config.get('phone'), 45 | session_name="telegram_debug", 46 | conversation_ids=telegram_config.get('conversation_ids', []), 47 | start_date=start_date, 48 | end_date=now, 49 | logger=logger 50 | ) 51 | 52 | # Create pipeline 53 | pipeline = TelegramIngestionPipeline( 54 | ingestors={ingestor.id: ingestor}, 55 | storage=storage, 56 | interval_seconds=3600, # Not used in debug mode 57 | logger=logger 58 | ) 59 | 60 | try: 61 | # Run with debug flag 62 | logger.info("Starting debug ingestion...") 63 | result = await pipeline.start(ingestor_id=ingestor.id, debug_first_message=True) 64 | 65 | logger.info(f"Debug ingestion complete: {result}") 66 | finally: 67 | # Always disconnect the ingestor 68 | await ingestor.disconnect() 69 | logger.info("Disconnected from Telegram") 70 | 71 | if __name__ == "__main__": 72 | asyncio.run(run_debug()) -------------------------------------------------------------------------------- /examples/telegram_ingestor_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Example script demonstrating how to use the Telegram ingestor. 4 | 5 | This script shows how to: 6 | 1. Initialize a TelegramIngestor using config.yaml 7 | 2. Fetch all direct message history 8 | 3. Fetch messages from a specific date range 9 | 4. Check ingestor health status 10 | 11 | Usage: 12 | 1. Create a Telegram API application at https://my.telegram.org/apps 13 | 2. Get your API ID and API hash 14 | 3. Create a config.yaml file with your credentials 15 | 4. Run the script 16 | 17 | Example config.yaml: 18 | ```yaml 19 | telegram: 20 | api_id: "your_api_id" 21 | api_hash: "your_api_hash" 22 | phone_number: "+12345678901" 23 | session_file: "examples/data/telegram_session" 24 | request_delay: 1.0 25 | ``` 26 | """ 27 | 28 | import os 29 | import sys 30 | import json 31 | import logging 32 | import asyncio 33 | from datetime import datetime, timedelta 34 | 35 | from ici.adapters.ingestors.telegram import TelegramIngestor 36 | from ici.adapters.loggers import StructuredLogger 37 | 38 | 39 | # Setup logging 40 | logging.basicConfig(level=logging.INFO) 41 | logger = StructuredLogger(name="example") 42 | 43 | 44 | def pretty_print_json(data, title=None): 45 | """Print data as formatted JSON with optional title.""" 46 | if title: 47 | print(f"\n{title}") 48 | print("=" * len(title)) 49 | print(json.dumps(data, indent=2)) 50 | print() 51 | 52 | 53 | async def main_async(): 54 | """Run the Telegram ingestor example asynchronously.""" 55 | print("Telegram Ingestor Example") 56 | print("-----------------------") 57 | 58 | try: 59 | # Create a sample config.yaml file if it doesn't exist 60 | if not os.path.exists("config.yaml"): 61 | create_sample_config() 62 | print("Created sample config.yaml file. Please edit it with your Telegram credentials.") 63 | return 1 64 | 65 | # Initialize using config.yaml 66 | print("Initializing ingestor from config.yaml...") 67 | ingestor = TelegramIngestor(logger_name="example.telegram") 68 | 69 | # Call the initialize method 70 | await ingestor.initialize() 71 | 72 | # Check health status 73 | health = ingestor.healthcheck() 74 | pretty_print_json(health, "Health Check") 75 | 76 | if not health["healthy"]: 77 | print("Ingestor is not healthy. Cannot proceed.") 78 | return 1 79 | 80 | # Example 1: Fetch recent data (last 7 days) 81 | print("\nFetching messages from the last 7 days...") 82 | since_date = datetime.now() - timedelta(days=7) 83 | recent_data = ingestor.fetch_new_data(since=since_date) 84 | 85 | print(f"Retrieved {len(recent_data['messages'])} messages from {len(recent_data['conversations'])} conversations") 86 | 87 | # Example 2: Fetch data for a specific date range 88 | print("\nFetching messages for a specific date range...") 89 | start_date = datetime.now() - timedelta(days=30) 90 | end_date = datetime.now() - timedelta(days=15) 91 | 92 | print(f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}") 93 | range_data = ingestor.fetch_data_in_range(start=start_date, end=end_date) 94 | 95 | print(f"Retrieved {len(range_data['messages'])} messages from {len(range_data['conversations'])} conversations") 96 | 97 | # Print sample message (if available) 98 | if recent_data["messages"]: 99 | sample_message = recent_data["messages"][0] 100 | pretty_print_json(sample_message, "Sample Message") 101 | 102 | # Save data to JSON file 103 | output_dir = "examples/data" 104 | os.makedirs(output_dir, exist_ok=True) 105 | 106 | output_file = os.path.join(output_dir, "telegram_messages.json") 107 | with open(output_file, 'w', encoding='utf-8') as f: 108 | json.dump(recent_data, f, indent=2, ensure_ascii=False) 109 | 110 | print(f"\nSaved messages to {output_file}") 111 | 112 | except Exception as e: 113 | print(f"Error: {e}") 114 | return 1 115 | 116 | return 0 117 | 118 | 119 | def create_sample_config(): 120 | """Create a sample config.yaml file.""" 121 | config_content = """# ICI Framework Configuration 122 | 123 | # Telegram Ingestor Configuration 124 | telegram: 125 | # Get these values from https://my.telegram.org/apps 126 | api_id: "YOUR_API_ID_HERE" 127 | api_hash: "YOUR_API_HASH_HERE" 128 | phone_number: "+12345678901" # Your phone number with country code 129 | 130 | # Authentication options - use either session_file OR session_string 131 | session_file: "examples/data/telegram_session" # Option 1: Session file path 132 | # session_string: "1BQANOTEuMTA4LjU..." # Option 2: Session string 133 | 134 | request_delay: 1.0 # Seconds between API requests to avoid rate limiting 135 | """ 136 | with open("config.yaml", "w") as f: 137 | f.write(config_content) 138 | 139 | 140 | def main(): 141 | """Run the async main function.""" 142 | return asyncio.run(main_async()) 143 | 144 | 145 | if __name__ == "__main__": 146 | sys.exit(main()) -------------------------------------------------------------------------------- /examples/telegram_session_string.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Example script for generating and using Telegram session strings. 4 | 5 | This script demonstrates how to: 6 | 1. Generate a session string from existing session file or new authentication 7 | 2. Connect to Telegram using a session string 8 | 3. Use the session string with the TelegramIngestor 9 | 10 | Usage: 11 | 1. Run this script with --generate to create a session string 12 | 2. Run this script with --use to demonstrate using a session string 13 | 14 | Example: 15 | python examples/telegram_session_string.py --generate 16 | python examples/telegram_session_string.py --use "YOUR_SESSION_STRING" 17 | """ 18 | 19 | import os 20 | import sys 21 | import json 22 | import argparse 23 | import asyncio 24 | from datetime import datetime, timedelta 25 | from typing import Optional 26 | 27 | from telethon.sessions import StringSession 28 | from telethon.sync import TelegramClient 29 | 30 | from ici.adapters.ingestors.telegram import TelegramIngestor 31 | from ici.adapters.loggers import StructuredLogger 32 | 33 | 34 | # Setup CLI arguments 35 | parser = argparse.ArgumentParser(description='Telegram Session String Example') 36 | group = parser.add_mutually_exclusive_group(required=True) 37 | group.add_argument('--generate', action='store_true', help='Generate a new session string') 38 | group.add_argument('--use', type=str, metavar='SESSION_STRING', help='Use the provided session string') 39 | 40 | 41 | async def generate_session_string() -> str: 42 | """ 43 | Generate a session string using the Telethon client. 44 | 45 | Returns: 46 | str: The generated session string. 47 | """ 48 | # Load Telegram credentials from environment or prompt the user 49 | api_id = os.environ.get("TELEGRAM_API_ID") 50 | api_hash = os.environ.get("TELEGRAM_API_HASH") 51 | session_file = os.environ.get("TELEGRAM_SESSION_FILE", "telegram_session") 52 | 53 | if not api_id: 54 | api_id = input("Enter your Telegram API ID: ") 55 | 56 | if not api_hash: 57 | api_hash = input("Enter your Telegram API hash: ") 58 | 59 | # Create a new Telegram client 60 | async with TelegramClient(session_file, api_id, api_hash) as client: 61 | # Generate the session string 62 | session_string = StringSession.save(client.session) 63 | print("\nYour session string has been generated:") 64 | print("-" * 50) 65 | print(session_string) 66 | print("-" * 50) 67 | print("\nStore this string securely as it provides access to your Telegram account.") 68 | 69 | return session_string 70 | 71 | 72 | async def use_session_string(session_string: str) -> None: 73 | """ 74 | Demonstrate using a session string with the TelegramIngestor. 75 | 76 | Args: 77 | session_string: The session string to use. 78 | """ 79 | # Load Telegram credentials from environment or prompt the user 80 | api_id = os.environ.get("TELEGRAM_API_ID") 81 | api_hash = os.environ.get("TELEGRAM_API_HASH") 82 | phone = os.environ.get("TELEGRAM_PHONE") 83 | 84 | if not api_id: 85 | api_id = input("Enter your Telegram API ID: ") 86 | 87 | if not api_hash: 88 | api_hash = input("Enter your Telegram API hash: ") 89 | 90 | if not phone: 91 | phone = input("Enter your phone number (with country code, e.g., +12345678901): ") 92 | 93 | print("\nInitializing TelegramIngestor with session string...") 94 | ingestor = TelegramIngestor(logger_name="example.telegram") 95 | 96 | # Connect using the session string 97 | config = { 98 | "api_id": api_id, 99 | "api_hash": api_hash, 100 | "phone_number": phone, 101 | "session_string": session_string, 102 | "request_delay": 1.0 103 | } 104 | 105 | await ingestor._connect(config) 106 | 107 | # Check health status 108 | health = ingestor.healthcheck() 109 | print(f"Health check: {'Healthy' if health['healthy'] else 'Unhealthy'}") 110 | 111 | if not health["healthy"]: 112 | print("Ingestor is not healthy. Cannot proceed.") 113 | return 114 | 115 | # Fetch recent messages as a demonstration 116 | print("\nFetching messages from the last 3 days...") 117 | since_date = datetime.now() - timedelta(days=3) 118 | recent_data = ingestor.fetch_new_data(since=since_date) 119 | 120 | print(f"Retrieved {len(recent_data['messages'])} messages from {len(recent_data['conversations'])} conversations") 121 | 122 | # Print a sample message 123 | if recent_data["messages"]: 124 | sample_message = recent_data["messages"][0] 125 | print("\nSample message:") 126 | print(f"From: {sample_message['conversation_name']}") 127 | print(f"Date: {sample_message['date']}") 128 | print(f"Message: {sample_message['text'][:100]}...") 129 | 130 | 131 | async def main_async() -> int: 132 | """ 133 | Run the example script asynchronously. 134 | 135 | Returns: 136 | int: The exit code. 137 | """ 138 | args = parser.parse_args() 139 | 140 | try: 141 | if args.generate: 142 | await generate_session_string() 143 | elif args.use: 144 | await use_session_string(args.use) 145 | 146 | except Exception as e: 147 | print(f"Error: {e}") 148 | return 1 149 | 150 | return 0 151 | 152 | 153 | def main() -> int: 154 | """ 155 | Run the async main function. 156 | 157 | Returns: 158 | int: The exit code. 159 | """ 160 | return asyncio.run(main_async()) 161 | 162 | 163 | if __name__ == "__main__": 164 | sys.exit(main()) -------------------------------------------------------------------------------- /examples/telegram_session_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Test script for Telegram session validation and generation. 4 | 5 | This script demonstrates the use of the TelegramIngestor with its 6 | built-in session validation and generation capabilities. 7 | """ 8 | 9 | import os 10 | import sys 11 | import asyncio 12 | import argparse 13 | 14 | # Add the project root to the path 15 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 16 | 17 | from ici.adapters.ingestors.telegram import TelegramIngestor 18 | from ici.adapters.loggers import StructuredLogger 19 | 20 | 21 | async def main(): 22 | """Test the TelegramIngestor with session handling.""" 23 | # Parse command line arguments 24 | parser = argparse.ArgumentParser(description="Test Telegram session handling") 25 | parser.add_argument("--config", default="config.yaml", help="Path to config file") 26 | args = parser.parse_args() 27 | 28 | # Set environment variable for config path 29 | os.environ["ICI_CONFIG_PATH"] = args.config 30 | 31 | # Ensure config file exists 32 | if not os.path.exists(args.config): 33 | print(f"Config file not found: {args.config}") 34 | print(f"Please copy config.example.yaml to {args.config} and update with your credentials.") 35 | return 36 | 37 | # Create logger 38 | logger = StructuredLogger(name="telegram_test") 39 | logger.info({ 40 | "action": "TEST_START", 41 | "message": "Starting Telegram session test" 42 | }) 43 | 44 | # Create ingestor 45 | ingestor = TelegramIngestor(logger_name="telegram_test") 46 | 47 | try: 48 | # Initialize ingestor (this will validate or generate the session) 49 | await ingestor.initialize() 50 | 51 | # Run health check 52 | health_result = ingestor.healthcheck() 53 | 54 | # Check if health_result is a coroutine (async context) 55 | if asyncio.iscoroutine(health_result): 56 | logger.info({ 57 | "action": "HEALTH_CHECK_ASYNC", 58 | "message": "Health check returned coroutine - awaiting result" 59 | }) 60 | health = await health_result 61 | else: 62 | health = health_result 63 | 64 | if health["healthy"]: 65 | print(f"\n✅ Connection successful! Logged in as: {health['details'].get('name', 'Unknown')}") 66 | logger.info({ 67 | "action": "HEALTH_CHECK_SUCCESS", 68 | "message": "Telegram connection is healthy", 69 | "data": health 70 | }) 71 | else: 72 | print(f"\n❌ Connection failed: {health['message']}") 73 | logger.error({ 74 | "action": "HEALTH_CHECK_FAILED", 75 | "message": "Telegram connection is not healthy", 76 | "data": health 77 | }) 78 | 79 | except Exception as e: 80 | logger.error({ 81 | "action": "TEST_ERROR", 82 | "message": f"Test failed: {str(e)}", 83 | "data": { 84 | "exception": str(e), 85 | "exception_type": type(e).__name__ 86 | } 87 | }) 88 | print(f"\n❌ Error: {str(e)}") 89 | 90 | finally: 91 | # Clean up 92 | if ingestor._client and ingestor._is_connected: 93 | await ingestor._disconnect() 94 | 95 | logger.info({ 96 | "action": "TEST_COMPLETE", 97 | "message": "Telegram session test completed" 98 | }) 99 | 100 | 101 | if __name__ == "__main__": 102 | asyncio.run(main()) -------------------------------------------------------------------------------- /ici/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Intelligent Consciousness Interface (ICI) 3 | 4 | A modular framework for creating a personal AI assistant that is context-aware, 5 | style-aware, personality-aware, and security-aware. The system processes data 6 | through an Ingestion Pipeline and responds to queries via a Query Pipeline, 7 | leveraging vector databases for efficient retrieval. 8 | """ 9 | 10 | __version__ = "0.1.0" 11 | 12 | # Import core interfaces and exceptions 13 | from ici.core import ( 14 | # Interfaces 15 | Ingestor, 16 | Preprocessor, 17 | Embedder, 18 | VectorStore, 19 | Validator, 20 | PromptBuilder, 21 | Generator, 22 | Orchestrator, 23 | IngestionPipeline, 24 | Logger, 25 | # Exceptions 26 | ICIError, 27 | IngestionError, 28 | IngestorError, 29 | APIAuthenticationError, 30 | APIRateLimitError, 31 | DataFetchError, 32 | PreprocessorError, 33 | IngestionPipelineError, 34 | QueryError, 35 | ValidationError, 36 | EmbeddingError, 37 | VectorStoreError, 38 | PromptBuilderError, 39 | GenerationError, 40 | OrchestratorError, 41 | ConfigurationError, 42 | LoggerError, 43 | ) 44 | 45 | # Import utilities 46 | from ici.utils import ( 47 | load_config, 48 | get_component_config, 49 | ) 50 | 51 | # Export core interfaces and exceptions 52 | __all__ = [ 53 | # Interfaces 54 | "Ingestor", 55 | "Preprocessor", 56 | "Embedder", 57 | "VectorStore", 58 | "Validator", 59 | "PromptBuilder", 60 | "Generator", 61 | "Orchestrator", 62 | "IngestionPipeline", 63 | "Logger", 64 | # Exceptions 65 | "ICIError", 66 | "IngestionError", 67 | "IngestorError", 68 | "APIAuthenticationError", 69 | "APIRateLimitError", 70 | "DataFetchError", 71 | "PreprocessorError", 72 | "IngestionPipelineError", 73 | "QueryError", 74 | "ValidationError", 75 | "EmbeddingError", 76 | "VectorStoreError", 77 | "PromptBuilderError", 78 | "GenerationError", 79 | "OrchestratorError", 80 | "ConfigurationError", 81 | "LoggerError", 82 | # Utilities 83 | "load_config", 84 | "get_component_config", 85 | ] 86 | -------------------------------------------------------------------------------- /ici/adapters/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adapters module for the ICI framework. 3 | 4 | This module contains concrete implementations of the interfaces defined in the core module. 5 | """ 6 | 7 | # Import logger implementations 8 | from ici.adapters.loggers import StructuredLogger 9 | 10 | # Import embedder implementations 11 | from ici.adapters.embedders import SentenceTransformerEmbedder 12 | 13 | # Import preprocessor implementations 14 | from ici.adapters.preprocessors import TelegramPreprocessor 15 | 16 | # Import chat history implementations 17 | from ici.adapters.chat import JSONChatHistoryManager 18 | 19 | # Import user ID generator implementations 20 | from ici.adapters.user_id import DefaultUserIDGenerator 21 | 22 | # Import orchestrator implementations 23 | from ici.adapters.orchestrators import DefaultOrchestrator 24 | 25 | # Export all implementations 26 | __all__ = [ 27 | # Logger implementations 28 | "StructuredLogger", 29 | 30 | # Embedder implementations 31 | "SentenceTransformerEmbedder", 32 | 33 | # Preprocessor implementations 34 | "TelegramPreprocessor", 35 | 36 | # Chat history implementations 37 | "JSONChatHistoryManager", 38 | 39 | # User ID generator implementations 40 | "DefaultUserIDGenerator", 41 | 42 | # Orchestrator implementations 43 | "DefaultOrchestrator", 44 | ] 45 | -------------------------------------------------------------------------------- /ici/adapters/chat/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Chat history management implementations. 3 | """ 4 | 5 | from ici.adapters.chat.json_chat_history_manager import JSONChatHistoryManager 6 | 7 | __all__ = [ 8 | "JSONChatHistoryManager", 9 | ] -------------------------------------------------------------------------------- /ici/adapters/controller/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Controller implementations for the ICI Framework. 3 | 4 | This module contains adapter implementations for the Controller interface, 5 | providing controller functionality for the ICI Framework. 6 | """ 7 | 8 | # Import adapters 9 | from .command_line import command_line_controller 10 | 11 | __all__ = ["command_line_controller"] -------------------------------------------------------------------------------- /ici/adapters/embedders/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Embedder implementations for the ICI Framework. 3 | 4 | This module contains adapter implementations for the Embedder interface, 5 | providing embedding functionality for text data. 6 | """ 7 | 8 | # Import adapters 9 | from .sentence_transformer import SentenceTransformerEmbedder 10 | 11 | __all__ = ["SentenceTransformerEmbedder"] -------------------------------------------------------------------------------- /ici/adapters/generators/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generator implementations for the ICI framework. 3 | 4 | This package contains concrete implementations of the Generator interface 5 | for producing responses using various language models. 6 | """ 7 | 8 | from ici.adapters.generators.openai_generator import OpenAIGenerator 9 | from ici.adapters.generators.langchain_generator import LangchainGenerator 10 | from ici.adapters.generators.factory import create_generator 11 | 12 | __all__ = ["OpenAIGenerator", "LangchainGenerator", "create_generator"] -------------------------------------------------------------------------------- /ici/adapters/generators/factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Factory for creating Generator implementations. 3 | 4 | This module provides a factory function to create the appropriate 5 | Generator implementation based on configuration. 6 | """ 7 | 8 | from typing import Optional, Dict, Any 9 | 10 | from ici.core.interfaces.generator import Generator 11 | from ici.adapters.generators.openai_generator import OpenAIGenerator 12 | from ici.adapters.generators.langchain_generator import LangchainGenerator 13 | from ici.utils.config import get_component_config 14 | 15 | 16 | def create_generator(config_type: Optional[str] = None, logger_name: str = "generator") -> Generator: 17 | """ 18 | Creates a Generator implementation based on configuration. 19 | 20 | Args: 21 | config_type: Optional override for the generator type from config 22 | logger_name: Name to use for the logger 23 | 24 | Returns: 25 | Generator: An instance of a Generator implementation 26 | 27 | Raises: 28 | ValueError: If the specified generator type is invalid 29 | """ 30 | # Get generator configuration 31 | generator_config = get_component_config("generator") 32 | 33 | # Determine generator type from config or parameter 34 | generator_type = config_type or generator_config.get("type", "openai") 35 | 36 | # Create appropriate generator 37 | if generator_type == "openai": 38 | return OpenAIGenerator(logger_name=logger_name) 39 | elif generator_type == "langchain": 40 | return LangchainGenerator(logger_name=logger_name) 41 | else: 42 | raise ValueError(f"Invalid generator type: {generator_type}") -------------------------------------------------------------------------------- /ici/adapters/ingestors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ingestor implementations for various data sources. 3 | """ 4 | 5 | from ici.adapters.ingestors.telegram import TelegramIngestor 6 | from ici.adapters.ingestors.whatsapp import WhatsAppIngestor 7 | 8 | __all__ = ["TelegramIngestor", "WhatsAppIngestor"] -------------------------------------------------------------------------------- /ici/adapters/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logger implementations for the ICI framework. 3 | 4 | This module provides the StructuredLogger implementation of the Logger interface. 5 | """ 6 | 7 | from ici.adapters.loggers.structured_logger import StructuredLogger 8 | 9 | __all__ = ["StructuredLogger"] 10 | -------------------------------------------------------------------------------- /ici/adapters/orchestrators/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Orchestrator module. 3 | 4 | This package provides concrete implementations of the Orchestrator interface 5 | for coordinating the processing of user queries from validation to response generation. 6 | 7 | The DefaultOrchestrator uses the DefaultIngestionPipeline, supporting both 8 | Telegram and WhatsApp data sources. 9 | """ 10 | 11 | from ici.adapters.orchestrators.default_orchestrator import DefaultOrchestrator 12 | 13 | __all__ = ["DefaultOrchestrator"] -------------------------------------------------------------------------------- /ici/adapters/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pipeline implementation for ingesting data from various sources. 3 | """ 4 | 5 | from ici.adapters.pipelines.default import DefaultIngestionPipeline 6 | 7 | __all__ = ["DefaultIngestionPipeline"] -------------------------------------------------------------------------------- /ici/adapters/preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Preprocessor implementations for various data sources. 3 | """ 4 | 5 | from ici.adapters.preprocessors.telegram import TelegramPreprocessor 6 | from ici.adapters.preprocessors.whatsapp import WhatsAppPreprocessor 7 | 8 | __all__ = ["TelegramPreprocessor", "WhatsAppPreprocessor"] -------------------------------------------------------------------------------- /ici/adapters/prompt_builders/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Prompt Builder module. 3 | 4 | This package provides concrete implementations of the PromptBuilder interface 5 | for building prompts for language models. 6 | """ 7 | 8 | from ici.adapters.prompt_builders.basic_prompt_builder import BasicPromptBuilder 9 | 10 | __all__ = ["BasicPromptBuilder"] -------------------------------------------------------------------------------- /ici/adapters/user_id/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | User ID generator implementations. 3 | """ 4 | 5 | from ici.adapters.user_id.default_user_id_generator import DefaultUserIDGenerator 6 | 7 | __all__ = [ 8 | "DefaultUserIDGenerator", 9 | ] -------------------------------------------------------------------------------- /ici/adapters/validators/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Validator implementations. 3 | 4 | This package provides concrete implementations of the Validator interface 5 | for validating user input against security rules. 6 | """ 7 | 8 | from ici.adapters.validators.rule_based import RuleBasedValidator 9 | 10 | __all__ = ["RuleBasedValidator"] -------------------------------------------------------------------------------- /ici/adapters/vector_stores/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Vector Store implementations for the ICI framework. 3 | 4 | This package contains implementations of the VectorStore interface 5 | for different vector database technologies. 6 | 7 | Available implementations: 8 | - ChromaDBStore: Vector store implementation using ChromaDB 9 | """ 10 | 11 | from ici.adapters.vector_stores.chroma import ChromaDBStore 12 | 13 | __all__ = ["ChromaDBStore"] -------------------------------------------------------------------------------- /ici/core/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Core module for the Intelligent Consciousness Interface (ICI). 3 | 4 | This module contains the core interfaces and exceptions that define the 5 | architecture of the ICI framework, establishing the contract that all 6 | implementations must follow. 7 | """ 8 | 9 | # Import all interfaces 10 | from ici.core.interfaces import ( 11 | Ingestor, 12 | Preprocessor, 13 | Embedder, 14 | VectorStore, 15 | Validator, 16 | PromptBuilder, 17 | Generator, 18 | Orchestrator, 19 | IngestionPipeline, 20 | Logger, 21 | ) 22 | 23 | # Import all exceptions 24 | from ici.core.exceptions import ( 25 | ICIError, 26 | IngestionError, 27 | IngestorError, 28 | APIAuthenticationError, 29 | APIRateLimitError, 30 | DataFetchError, 31 | PreprocessorError, 32 | IngestionPipelineError, 33 | QueryError, 34 | ValidationError, 35 | EmbeddingError, 36 | VectorStoreError, 37 | PromptBuilderError, 38 | GenerationError, 39 | OrchestratorError, 40 | ConfigurationError, 41 | LoggerError, 42 | ) 43 | 44 | # Export all interfaces and exceptions 45 | __all__ = [ 46 | # Interfaces 47 | "Ingestor", 48 | "Preprocessor", 49 | "Embedder", 50 | "VectorStore", 51 | "Validator", 52 | "PromptBuilder", 53 | "Generator", 54 | "Orchestrator", 55 | "IngestionPipeline", 56 | "Logger", 57 | # Exceptions 58 | "ICIError", 59 | "IngestionError", 60 | "IngestorError", 61 | "APIAuthenticationError", 62 | "APIRateLimitError", 63 | "DataFetchError", 64 | "PreprocessorError", 65 | "IngestionPipelineError", 66 | "QueryError", 67 | "ValidationError", 68 | "EmbeddingError", 69 | "VectorStoreError", 70 | "PromptBuilderError", 71 | "GenerationError", 72 | "OrchestratorError", 73 | "ConfigurationError", 74 | "LoggerError", 75 | ] 76 | -------------------------------------------------------------------------------- /ici/core/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exception hierarchy for the ICI framework. 3 | 4 | This module defines the base exception types for all components, providing 5 | a structured hierarchy for error handling and recovery. 6 | """ 7 | 8 | 9 | class ICIError(Exception): 10 | """Base exception for all ICI-related errors.""" 11 | 12 | pass 13 | 14 | 15 | # Ingestion Pipeline Errors 16 | 17 | 18 | class IngestionError(ICIError): 19 | """Base exception for all ingestion-related errors.""" 20 | 21 | pass 22 | 23 | 24 | class IngestorError(IngestionError): 25 | """Base exception for all ingestor-related errors.""" 26 | 27 | pass 28 | 29 | 30 | class APIAuthenticationError(IngestorError): 31 | """Raised when API authentication fails.""" 32 | 33 | pass 34 | 35 | 36 | class APIRateLimitError(IngestorError): 37 | """Raised when API rate limits are exceeded.""" 38 | 39 | pass 40 | 41 | 42 | class DataFetchError(IngestorError): 43 | """Raised when data fetching fails.""" 44 | 45 | pass 46 | 47 | 48 | class AuthenticationError(IngestorError): 49 | """Raised when authentication fails.""" 50 | 51 | pass 52 | 53 | 54 | class PreprocessorError(IngestionError): 55 | """Raised when preprocessing fails.""" 56 | 57 | pass 58 | 59 | 60 | class IngestionPipelineError(IngestionError): 61 | """Raised when the ingestion pipeline encounters an error.""" 62 | 63 | pass 64 | 65 | 66 | # Query Pipeline Errors 67 | 68 | 69 | class QueryError(ICIError): 70 | """Base exception for all query-related errors.""" 71 | 72 | pass 73 | 74 | 75 | class ValidationError(QueryError): 76 | """Raised when input validation fails.""" 77 | 78 | pass 79 | 80 | 81 | class EmbeddingError(ICIError): 82 | """Raised when embedding generation fails.""" 83 | 84 | pass 85 | 86 | 87 | class VectorStoreError(ICIError): 88 | """Base exception for all vector store related errors.""" 89 | 90 | pass 91 | 92 | 93 | class PromptBuilderError(QueryError): 94 | """Raised when prompt construction fails.""" 95 | 96 | pass 97 | 98 | 99 | class GenerationError(QueryError): 100 | """Raised when text generation fails.""" 101 | 102 | pass 103 | 104 | 105 | class OrchestratorError(QueryError): 106 | """Raised when the orchestrator encounters an error.""" 107 | 108 | pass 109 | 110 | 111 | # Chat History Errors 112 | 113 | 114 | class ChatError(ICIError): 115 | """Base exception for all chat-related errors.""" 116 | 117 | pass 118 | 119 | 120 | class ChatHistoryError(ChatError): 121 | """Raised when chat history operations fail.""" 122 | 123 | pass 124 | 125 | 126 | class ChatIDError(ChatError): 127 | """Raised when a chat ID is invalid or not found.""" 128 | 129 | pass 130 | 131 | 132 | class ChatStorageError(ChatError): 133 | """Raised when chat storage operations fail.""" 134 | 135 | pass 136 | 137 | 138 | class UserIDError(ICIError): 139 | """Raised when user ID generation or validation fails.""" 140 | 141 | pass 142 | 143 | 144 | # Other Errors 145 | 146 | 147 | class ConfigurationError(ICIError): 148 | """Raised when configuration is invalid.""" 149 | 150 | pass 151 | 152 | 153 | class LoggerError(ICIError): 154 | """Raised when logging encounters an error.""" 155 | 156 | pass 157 | 158 | 159 | class ComponentLoadError(ICIError): 160 | """Raised when a component cannot be loaded.""" 161 | 162 | pass 163 | 164 | 165 | # Exporting all exception types 166 | __all__ = [ 167 | "ICIError", 168 | "IngestionError", 169 | "IngestorError", 170 | "APIAuthenticationError", 171 | "APIRateLimitError", 172 | "DataFetchError", 173 | "AuthenticationError", 174 | "PreprocessorError", 175 | "IngestionPipelineError", 176 | "QueryError", 177 | "ValidationError", 178 | "EmbeddingError", 179 | "VectorStoreError", 180 | "PromptBuilderError", 181 | "GenerationError", 182 | "OrchestratorError", 183 | "ChatError", 184 | "ChatHistoryError", 185 | "ChatIDError", 186 | "ChatStorageError", 187 | "UserIDError", 188 | "ConfigurationError", 189 | "LoggerError", 190 | "ComponentLoadError" 191 | ] 192 | -------------------------------------------------------------------------------- /ici/core/interfaces/__init__.py: -------------------------------------------------------------------------------- 1 | from ici.core.interfaces.ingestor import Ingestor 2 | from ici.core.interfaces.preprocessor import Preprocessor 3 | from ici.core.interfaces.embedder import Embedder 4 | from ici.core.interfaces.vector_store import VectorStore 5 | from ici.core.interfaces.validator import Validator 6 | from ici.core.interfaces.prompt_builder import PromptBuilder 7 | from ici.core.interfaces.generator import Generator 8 | from ici.core.interfaces.orchestrator import Orchestrator 9 | from ici.core.interfaces.pipeline import IngestionPipeline 10 | from ici.core.interfaces.logger import Logger 11 | from ici.core.interfaces.chat_history_manager import ChatHistoryManager 12 | from ici.core.interfaces.user_id_generator import UserIDGenerator 13 | 14 | __all__ = [ 15 | "Ingestor", 16 | "Preprocessor", 17 | "Embedder", 18 | "VectorStore", 19 | "Validator", 20 | "PromptBuilder", 21 | "Generator", 22 | "Orchestrator", 23 | "IngestionPipeline", 24 | "Logger", 25 | "ChatHistoryManager", 26 | "UserIDGenerator", 27 | ] 28 | -------------------------------------------------------------------------------- /ici/core/interfaces/embedder.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict, Any, Optional, Tuple 3 | 4 | 5 | class Embedder(ABC): 6 | """ 7 | Interface for components that generate vector embeddings from text data. 8 | 9 | The Embedder is shared between Ingestion and Query Pipelines to ensure 10 | identical embedding logic, crucial for accurate similarity matching. 11 | """ 12 | 13 | @abstractmethod 14 | async def initialize(self) -> None: 15 | """ 16 | Initialize the embedder with configuration parameters. 17 | 18 | This method should be called after the embedder instance is created, 19 | before any other methods are used. Configuration should be loaded from 20 | a central configuration source (e.g., config.yaml). 21 | 22 | Returns: 23 | None 24 | 25 | Raises: 26 | Exception: If initialization fails for any reason. 27 | """ 28 | pass 29 | 30 | @abstractmethod 31 | async def embed(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]: 32 | """ 33 | Generates a vector embedding from the input text. 34 | 35 | Args: 36 | text: The text to embed 37 | 38 | Returns: 39 | List[float]: A fixed-length vector of floats representing the text embedding 40 | 41 | Raises: 42 | EmbeddingError: If embedding generation fails for any reason 43 | """ 44 | pass 45 | 46 | @abstractmethod 47 | async def embed_batch(self, texts: List[str]) -> List[Tuple[List[float], Optional[Dict[str, Any]]]]: 48 | """ 49 | Generates vector embeddings for multiple texts. 50 | 51 | This method should optimize batch processing for efficiency when embedding 52 | multiple texts at once. 53 | 54 | Args: 55 | texts: List of texts to embed 56 | 57 | Returns: 58 | List[List[float]]: A list of fixed-length vectors, one for each input text 59 | 60 | Raises: 61 | EmbeddingError: If batch embedding generation fails for any reason 62 | """ 63 | pass 64 | 65 | @property 66 | @abstractmethod 67 | def dimensions(self) -> int: 68 | """ 69 | Returns the dimensionality of the embeddings produced by this embedder. 70 | 71 | Returns: 72 | int: The number of dimensions in the embedding vectors 73 | """ 74 | pass 75 | 76 | @abstractmethod 77 | def healthcheck(self) -> Dict[str, Any]: 78 | """ 79 | Checks if the embedder is properly configured and functioning. 80 | 81 | Returns: 82 | Dict[str, Any]: A dictionary containing health status information: 83 | { 84 | 'healthy': bool, # Whether the embedder is functioning properly 85 | 'message': str, # Optional message providing more details 86 | 'details': dict # Optional additional details about the health check 87 | } 88 | 89 | Raises: 90 | EmbeddingError: If the health check itself encounters an error 91 | """ 92 | pass 93 | 94 | def arguments(self) -> Dict[str, Any]: 95 | """ 96 | Get the arguments used to initialize this embedder. 97 | 98 | Returns: 99 | Dict[str, Any]: Dictionary of initialization arguments 100 | """ 101 | return {} 102 | 103 | 104 | -------------------------------------------------------------------------------- /ici/core/interfaces/generator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, Any, Optional 3 | 4 | 5 | class Generator(ABC): 6 | """ 7 | Interface for components that produce responses using language models. 8 | 9 | The Generator abstracts the language model implementation, supporting multiple 10 | providers including OpenAI, xAI, Anthropic, and local models, with configurable parameters. 11 | """ 12 | 13 | @abstractmethod 14 | async def initialize(self) -> None: 15 | """ 16 | Initialize the generator with configuration parameters. 17 | 18 | This method should be called after the generator instance is created, 19 | before any other methods are used. Configuration should be loaded from 20 | a central configuration source (e.g., config.yaml). 21 | 22 | Returns: 23 | None 24 | 25 | Raises: 26 | Exception: If initialization fails for any reason. 27 | """ 28 | pass 29 | 30 | @abstractmethod 31 | async def generate( 32 | self, prompt: str, generation_options: Optional[Dict[str, Any]] = None 33 | ) -> str: 34 | """ 35 | Generates an output based on the provided prompt. 36 | 37 | Generation options can include parameters like: 38 | - temperature: Controls randomness (0.0-2.0) 39 | - max_tokens: Limits response length 40 | - top_p: Controls diversity via nucleus sampling 41 | - frequency_penalty: Reduces word repetition 42 | - presence_penalty: Reduces topic repetition 43 | 44 | Args: 45 | prompt: The input prompt for the language model 46 | generation_options: Optional parameters to override defaults 47 | 48 | Returns: 49 | str: The generated text response 50 | 51 | Raises: 52 | GenerationError: If text generation fails for any reason 53 | """ 54 | pass 55 | 56 | @abstractmethod 57 | async def set_model(self, model: str) -> None: 58 | """ 59 | Sets the specific model to use for generation. 60 | 61 | Args: 62 | model: The model identifier (e.g., 'gpt-4', 'claude-2', 'mistral-7b') 63 | 64 | Raises: 65 | GenerationError: If the model is invalid or unavailable 66 | """ 67 | pass 68 | 69 | @abstractmethod 70 | async def set_default_options(self, options: Dict[str, Any]) -> None: 71 | """ 72 | Sets default options for all generation requests. 73 | 74 | Args: 75 | options: Dictionary of default generation parameters 76 | 77 | Raises: 78 | GenerationError: If any option is invalid 79 | """ 80 | pass 81 | 82 | @abstractmethod 83 | async def healthcheck(self) -> Dict[str, Any]: 84 | """ 85 | Checks if the generator is properly configured and can connect to the language model. 86 | 87 | Returns: 88 | Dict[str, Any]: A dictionary containing health status information: 89 | { 90 | 'healthy': bool, # Whether the generator is functioning properly 91 | 'message': str, # Optional message providing more details 92 | 'details': dict # Optional additional details about the health check 93 | } 94 | 95 | Raises: 96 | GenerationError: If the health check itself encounters an error 97 | """ 98 | pass 99 | -------------------------------------------------------------------------------- /ici/core/interfaces/ingestor.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Optional, Dict 3 | from datetime import datetime 4 | 5 | 6 | class Ingestor(ABC): 7 | """ 8 | Interface for components that fetch raw data from external sources. 9 | 10 | Each Ingestor is designed for a specific data source, handling authentication 11 | and API-specific logic. Ingestors should be stateless, with state information 12 | maintained externally in a dedicated state storage. 13 | """ 14 | 15 | @abstractmethod 16 | async def initialize(self) -> None: 17 | """ 18 | Initialize the ingestor with configuration parameters. 19 | 20 | This method should be called after the ingestor instance is created, 21 | before any other methods are used. Configuration should be loaded from 22 | a central configuration source (e.g., config.yaml). 23 | 24 | Returns: 25 | None 26 | 27 | Raises: 28 | Exception: If initialization fails for any reason. 29 | """ 30 | pass 31 | 32 | @abstractmethod 33 | async def fetch_full_data(self) -> Any: 34 | """ 35 | Fetches all available data for initial ingestion. 36 | 37 | Returns: 38 | Any: Raw data in a source-native format for the Preprocessor to handle. 39 | 40 | Raises: 41 | IngestorError: If data fetching fails for any reason. 42 | """ 43 | pass 44 | 45 | @abstractmethod 46 | async def fetch_new_data(self, since: Optional[datetime] = None) -> Any: 47 | """ 48 | Fetches new data since the given timestamp. 49 | 50 | This method enables incremental ingestion by retrieving only data newer 51 | than the specified timestamp. 52 | 53 | Args: 54 | since: Optional timestamp to fetch data from. If None, should use 55 | a reasonable default (e.g., last hour or day). 56 | 57 | Returns: 58 | Any: Raw data in a source-native format for the Preprocessor to handle. 59 | 60 | Raises: 61 | IngestorError: If data fetching fails for any reason. 62 | """ 63 | pass 64 | 65 | @abstractmethod 66 | async def fetch_data_in_range(self, start: datetime, end: datetime) -> Any: 67 | """ 68 | Fetches data within a specified date range. 69 | 70 | Args: 71 | start: Start timestamp for data range. 72 | end: End timestamp for data range. 73 | 74 | Returns: 75 | Any: Raw data in a source-native format for the Preprocessor to handle. 76 | 77 | Raises: 78 | IngestorError: If data fetching fails for any reason. 79 | """ 80 | pass 81 | 82 | @abstractmethod 83 | async def healthcheck(self) -> Dict[str, Any]: 84 | """ 85 | Checks if the ingestor is properly configured and can connect to its data source. 86 | 87 | Returns: 88 | Dict[str, Any]: A dictionary containing health status information: 89 | { 90 | 'healthy': bool, # Whether the ingestor is functioning properly 91 | 'message': str, # Optional message providing more details 92 | 'details': dict # Optional additional details about the health check 93 | } 94 | 95 | Raises: 96 | IngestorError: If the health check itself encounters an error. 97 | """ 98 | pass 99 | -------------------------------------------------------------------------------- /ici/core/interfaces/logger.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Dict, Optional 3 | 4 | 5 | class Logger(ABC): 6 | """ 7 | Interface for logging functionality in the ICI framework. 8 | 9 | Provides standard methods for logging at different severity levels with a structured format. 10 | Each log entry should contain: action name, message, and optional data dictionary. 11 | """ 12 | 13 | @abstractmethod 14 | def initialize(self) -> None: 15 | """ 16 | Initialize the logger with configuration parameters. 17 | 18 | This method should be called after the logger instance is created, 19 | before any other methods are used. Configuration should be loaded from 20 | a central configuration source (e.g., config.yaml). 21 | 22 | Returns: 23 | None 24 | 25 | Raises: 26 | Exception: If initialization fails for any reason. 27 | """ 28 | pass 29 | 30 | @abstractmethod 31 | def debug(self, log_data: Dict[str, Any]) -> None: 32 | """ 33 | Log a debug message with structured data. 34 | 35 | Args: 36 | log_data: A dictionary with the structure: 37 | { 38 | "action": "ACTION_NAME", # The action or event being logged 39 | "message": "MESSAGE TEXT", # The log message 40 | "data": {} # Optional additional data as a dictionary 41 | } 42 | """ 43 | pass 44 | 45 | @abstractmethod 46 | def info(self, log_data: Dict[str, Any]) -> None: 47 | """ 48 | Log an info message with structured data. 49 | 50 | Args: 51 | log_data: A dictionary with the structure: 52 | { 53 | "action": "ACTION_NAME", # The action or event being logged 54 | "message": "MESSAGE TEXT", # The log message 55 | "data": {} # Optional additional data as a dictionary 56 | } 57 | """ 58 | pass 59 | 60 | @abstractmethod 61 | def warning(self, log_data: Dict[str, Any]) -> None: 62 | """ 63 | Log a warning message with structured data. 64 | 65 | Args: 66 | log_data: A dictionary with the structure: 67 | { 68 | "action": "ACTION_NAME", # The action or event being logged 69 | "message": "MESSAGE TEXT", # The log message 70 | "data": {} # Optional additional data as a dictionary 71 | } 72 | """ 73 | pass 74 | 75 | @abstractmethod 76 | def error(self, log_data: Dict[str, Any]) -> None: 77 | """ 78 | Log an error message with structured data. 79 | 80 | Args: 81 | log_data: A dictionary with the structure: 82 | { 83 | "action": "ACTION_NAME", # The action or event being logged 84 | "message": "MESSAGE TEXT", # The log message 85 | "data": {}, # Optional additional data as a dictionary 86 | "exception": Exception # Optional exception object 87 | } 88 | """ 89 | pass 90 | 91 | @abstractmethod 92 | def critical(self, log_data: Dict[str, Any]) -> None: 93 | """ 94 | Log a critical message with structured data. 95 | 96 | Args: 97 | log_data: A dictionary with the structure: 98 | { 99 | "action": "ACTION_NAME", # The action or event being logged 100 | "message": "MESSAGE TEXT", # The log message 101 | "data": {}, # Optional additional data as a dictionary 102 | "exception": Exception # Optional exception object 103 | } 104 | """ 105 | pass 106 | -------------------------------------------------------------------------------- /ici/core/interfaces/orchestrator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, Any, List, Optional 3 | 4 | 5 | class Orchestrator(ABC): 6 | """ 7 | Interface for components that manage the query pipeline, coordinating components 8 | from validation to response generation. 9 | 10 | The Orchestrator centralizes query handling and rule/context management, ensuring a 11 | consistent workflow while delegating tasks to specialized components. 12 | """ 13 | 14 | @abstractmethod 15 | async def initialize(self) -> None: 16 | """ 17 | Initialize the orchestrator with configuration parameters. 18 | 19 | This method should be called after the orchestrator instance is created, 20 | before any other methods are used. Configuration should be loaded from 21 | a central configuration source (e.g., config.yaml). 22 | 23 | Returns: 24 | None 25 | 26 | Raises: 27 | Exception: If initialization fails for any reason. 28 | """ 29 | pass 30 | 31 | @abstractmethod 32 | async def process_query(self, source: str, user_id: str, query: str, additional_info: Dict[str, Any]) -> str: 33 | """ 34 | Manages query processing from validation to generation. 35 | 36 | Workflow: 37 | 1. Retrieves validation rules dynamically based on user_id 38 | 2. Builds context for validation based on user_id and runtime data 39 | 3. Validates input with validator 40 | 4. If validation fails, returns appropriate error message 41 | 5. Generates query embedding 42 | 6. Retrieves relevant documents with user-specific filters 43 | 7. Constructs prompt with prompt_builder 44 | 8. Generates response with generator 45 | 9. Returns final output or error message 46 | 47 | Args: 48 | source: The source of the query 49 | user_id: Identifier for the user making the request 50 | query: The user input/question to process 51 | additional_info: Dictionary containing additional attributes and values 52 | 53 | Returns: 54 | str: The final response to the user 55 | 56 | Raises: 57 | OrchestratorError: If the orchestration process fails 58 | """ 59 | pass 60 | 61 | @abstractmethod 62 | async def configure(self, config: Dict[str, Any]) -> None: 63 | """ 64 | Configures the orchestrator with the provided settings. 65 | 66 | Configuration can include: 67 | - num_results: Number of documents to retrieve 68 | - rules_source: Where to fetch validation rules from 69 | - context_filters: Metadata filters to apply 70 | - error_messages: Custom error messages 71 | - retry: Retry configuration 72 | 73 | Args: 74 | config: Dictionary containing configuration options 75 | 76 | Raises: 77 | OrchestratorError: If configuration is invalid 78 | """ 79 | pass 80 | 81 | @abstractmethod 82 | def get_rules(self, user_id: str) -> List[Dict[str, Any]]: 83 | """ 84 | Retrieves validation rules for the specified user. 85 | 86 | Retrieves rules from the configured rules source (database, config files). 87 | 88 | Args: 89 | user_id: Identifier for the user 90 | 91 | Returns: 92 | List[Dict[str, Any]]: List of validation rule dictionaries 93 | 94 | Raises: 95 | OrchestratorError: If rules cannot be retrieved 96 | """ 97 | pass 98 | 99 | @abstractmethod 100 | async def build_context(self, user_id: str) -> Dict[str, Any]: 101 | """ 102 | Builds validation context for the specified user. 103 | 104 | Assembles context data including user information, current time, 105 | and other relevant runtime data needed for validation. 106 | 107 | Args: 108 | user_id: Identifier for the user 109 | 110 | Returns: 111 | Dict[str, Any]: Context dictionary for validation 112 | 113 | Raises: 114 | OrchestratorError: If context cannot be built 115 | """ 116 | pass 117 | 118 | @abstractmethod 119 | async def healthcheck(self) -> Dict[str, Any]: 120 | """ 121 | Checks if the orchestrator and all its components are properly configured and functioning. 122 | 123 | Returns: 124 | Dict[str, Any]: A dictionary containing health status information: 125 | { 126 | 'healthy': bool, # Whether the orchestrator is functioning properly 127 | 'message': str, # Optional message providing more details 128 | 'details': dict, # Optional additional details about the health check 129 | 'components': { # Health status of individual components 130 | 'validator': {...}, 131 | 'embedder': {...}, 132 | 'vector_store': {...}, 133 | 'prompt_builder': {...}, 134 | 'generator': {...} 135 | } 136 | } 137 | 138 | Raises: 139 | OrchestratorError: If the health check itself encounters an error 140 | """ 141 | pass 142 | -------------------------------------------------------------------------------- /ici/core/interfaces/preprocessor.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, List, Dict 3 | 4 | 5 | class Preprocessor(ABC): 6 | """ 7 | Interface for components that transform raw, source-specific data into a standardized format. 8 | 9 | Each Preprocessor is typically paired with a specific Ingestor to handle its unique data 10 | structure. It transforms raw data into a consistent document format for downstream processing. 11 | """ 12 | 13 | @abstractmethod 14 | async def initialize(self) -> None: 15 | """ 16 | Initialize the preprocessor with configuration parameters. 17 | 18 | This method should be called after the preprocessor instance is created, 19 | before any other methods are used. Configuration should be loaded from 20 | a central configuration source (e.g., config.yaml). 21 | 22 | Returns: 23 | None 24 | 25 | Raises: 26 | Exception: If initialization fails for any reason. 27 | """ 28 | pass 29 | 30 | @abstractmethod 31 | async def preprocess(self, raw_data: Any) -> List[Dict[str, Any]]: 32 | """ 33 | Transforms raw data into a list of standardized documents. 34 | 35 | The standardized document format should include at minimum: 36 | - 'text': str - The primary content to be embedded 37 | - 'metadata': Dict[str, Any] - Contextual data about the document 38 | 39 | Args: 40 | raw_data: Source-specific data from an Ingestor 41 | 42 | Returns: 43 | List[Dict[str, Any]]: A list of standardized documents, each with 'text' and 'metadata' fields. 44 | 45 | Raises: 46 | PreprocessorError: If preprocessing fails for any reason. 47 | """ 48 | pass 49 | 50 | @abstractmethod 51 | def healthcheck(self) -> Dict[str, Any]: 52 | """ 53 | Checks if the preprocessor is properly configured and functioning. 54 | 55 | Returns: 56 | Dict[str, Any]: A dictionary containing health status information: 57 | { 58 | 'healthy': bool, # Whether the preprocessor is functioning properly 59 | 'message': str, # Optional message providing more details 60 | 'details': dict # Optional additional details about the health check 61 | } 62 | 63 | Raises: 64 | PreprocessorError: If the health check itself encounters an error. 65 | """ 66 | pass 67 | -------------------------------------------------------------------------------- /ici/core/interfaces/prompt_builder.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict, Any, Optional 3 | 4 | 5 | class PromptBuilder(ABC): 6 | """ 7 | Interface for components that construct prompts for language models by integrating 8 | user input with retrieved documents. 9 | 10 | The PromptBuilder combines user input with relevant context to create effective prompts, 11 | handling edge cases and providing fallback mechanisms. 12 | """ 13 | 14 | @abstractmethod 15 | async def initialize(self) -> None: 16 | """ 17 | Initialize the prompt builder with configuration parameters. 18 | 19 | This method should be called after the prompt builder instance is created, 20 | before any other methods are used. Configuration should be loaded from 21 | a central configuration source (e.g., config.yaml). 22 | 23 | Returns: 24 | None 25 | 26 | Raises: 27 | Exception: If initialization fails for any reason. 28 | """ 29 | pass 30 | 31 | @abstractmethod 32 | async def build_prompt( 33 | self, 34 | input: str, 35 | documents: List[Dict[str, Any]], 36 | max_context_length: Optional[int] = None, 37 | ) -> str: 38 | """ 39 | Constructs a prompt from the input and retrieved documents. 40 | 41 | Handles edge cases through specific fallback mechanisms: 42 | - No documents: Uses a fallback template 43 | - Empty or invalid input: Returns standardized error prompt 44 | - Excessive content: Implements truncation strategies to fit model context windows 45 | 46 | Args: 47 | input: The user input/question 48 | documents: List of relevant documents from the vector store 49 | max_context_length: Optional maximum length for context section 50 | 51 | Returns: 52 | str: Complete prompt for the language model 53 | 54 | Raises: 55 | PromptBuilderError: If prompt construction fails for any reason 56 | """ 57 | pass 58 | 59 | @abstractmethod 60 | async def set_template(self, template: str) -> None: 61 | """ 62 | Sets a custom template for the prompt builder. 63 | 64 | The template should include placeholders for context and question: 65 | "Context:\n{context}\n\nQuestion: {question}" 66 | 67 | Args: 68 | template: The template string with {context} and {question} placeholders 69 | 70 | Raises: 71 | PromptBuilderError: If the template is invalid 72 | """ 73 | pass 74 | 75 | @abstractmethod 76 | def set_fallback_template(self, template: str) -> None: 77 | """ 78 | Sets a custom fallback template for when no documents are available. 79 | 80 | The template should include a placeholder for the question: 81 | "Answer based on general knowledge: {question}" 82 | 83 | Args: 84 | template: The fallback template string with {question} placeholder 85 | 86 | Raises: 87 | PromptBuilderError: If the template is invalid 88 | """ 89 | pass 90 | 91 | @abstractmethod 92 | async def healthcheck(self) -> Dict[str, Any]: 93 | """ 94 | Checks if the prompt builder is properly configured and functioning. 95 | 96 | Returns: 97 | Dict[str, Any]: A dictionary containing health status information: 98 | { 99 | 'healthy': bool, # Whether the prompt builder is functioning properly 100 | 'message': str, # Optional message providing more details 101 | 'details': dict # Optional additional details about the health check 102 | } 103 | 104 | Raises: 105 | PromptBuilderError: If the health check itself encounters an error 106 | """ 107 | pass 108 | -------------------------------------------------------------------------------- /ici/core/interfaces/user_id_generator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Optional, Dict, Any 3 | 4 | 5 | class UserIDGenerator(ABC): 6 | """ 7 | Interface for components that generate and validate user IDs. 8 | 9 | The UserIDGenerator is responsible for creating consistent user identifiers 10 | across different sources/connectors, following a standardized format. 11 | """ 12 | 13 | @abstractmethod 14 | async def initialize(self) -> None: 15 | """ 16 | Initialize the user ID generator with configuration parameters. 17 | 18 | This method should be called after the generator instance is created, 19 | before any other methods are used. Configuration should be loaded from 20 | a central configuration source (e.g., config.yaml). 21 | 22 | Returns: 23 | None 24 | 25 | Raises: 26 | UserIDError: If initialization fails for any reason. 27 | """ 28 | pass 29 | 30 | @abstractmethod 31 | async def generate_id(self, source: str, identifier: Optional[str] = None) -> str: 32 | """ 33 | Generates a unique user ID based on source and identifier. 34 | 35 | Args: 36 | source: The connector/source type (e.g., 'cli', 'telegram', 'web') 37 | identifier: A unique identifier within that source. If None, 38 | an appropriate identifier will be generated. 39 | 40 | Returns: 41 | str: A unique composite user ID in the format "{source}:{identifier}" 42 | 43 | Raises: 44 | UserIDError: If ID generation fails or parameters are invalid 45 | """ 46 | pass 47 | 48 | @abstractmethod 49 | async def validate_id(self, user_id: str) -> bool: 50 | """ 51 | Validates a user ID format. 52 | 53 | Args: 54 | user_id: The user ID to validate 55 | 56 | Returns: 57 | bool: True if valid, False otherwise 58 | """ 59 | pass 60 | 61 | @abstractmethod 62 | async def parse_id(self, user_id: str) -> Dict[str, str]: 63 | """ 64 | Parses a user ID into its component parts. 65 | 66 | Args: 67 | user_id: The user ID to parse 68 | 69 | Returns: 70 | Dict[str, str]: A dictionary containing the 'source' and 'identifier' 71 | 72 | Raises: 73 | UserIDError: If the ID format is invalid and cannot be parsed 74 | """ 75 | pass 76 | 77 | @abstractmethod 78 | async def healthcheck(self) -> Dict[str, Any]: 79 | """ 80 | Checks if the user ID generator is properly configured and functioning. 81 | 82 | Returns: 83 | Dict[str, Any]: A dictionary containing health status information: 84 | { 85 | 'healthy': bool, # Whether the generator is functioning properly 86 | 'message': str, # Optional message providing more details 87 | 'details': dict # Optional additional details about the health check 88 | } 89 | 90 | Raises: 91 | UserIDError: If the health check itself encounters an error 92 | """ 93 | pass -------------------------------------------------------------------------------- /ici/core/interfaces/validator.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Dict, Any, List, Optional 3 | 4 | 5 | class Validator(ABC): 6 | """ 7 | Interface for components that ensure user input adheres to security and compliance rules. 8 | 9 | The Validator enforces security constraints on user input, providing a critical security 10 | layer before any query processing occurs. 11 | """ 12 | 13 | @abstractmethod 14 | async def initialize(self) -> None: 15 | """ 16 | Initialize the validator with configuration parameters. 17 | 18 | This method should be called after the validator instance is created, 19 | before any other methods are used. Configuration should be loaded from 20 | a central configuration source (e.g., config.yaml). 21 | 22 | Returns: 23 | None 24 | 25 | Raises: 26 | Exception: If initialization fails for any reason. 27 | """ 28 | pass 29 | 30 | @abstractmethod 31 | async def validate( 32 | self, 33 | input: str, 34 | context: Dict[str, Any], 35 | rules: List[Dict[str, Any]], 36 | failure_reasons: Optional[List[str]] = None, 37 | ) -> bool: 38 | """ 39 | Validates the input based on provided rules and context. 40 | 41 | Rules are dynamically supplied as structured dictionaries for maximum flexibility: 42 | - Keyword filtering: {'type': 'keyword', 'forbidden': ['delete', 'drop']} 43 | - Time restrictions: {'type': 'time', 'allowed_hours': [8, 18]} 44 | - User permissions: {'type': 'permission', 'required_level': 'admin'} 45 | - Content length: {'type': 'length', 'max': 1000, 'min': 5} 46 | - Pattern matching: {'type': 'regex', 'pattern': '^[a-zA-Z0-9\\s]+$'} 47 | 48 | Args: 49 | input: The user input to validate 50 | context: Runtime data for rule evaluation (e.g., user_id, timestamp) 51 | rules: List of validation rule dictionaries 52 | failure_reasons: Optional list to populate with reasons for validation failure 53 | 54 | Returns: 55 | bool: True if input passes all rules, False otherwise 56 | 57 | Raises: 58 | ValidationError: If the validation process itself fails 59 | """ 60 | pass 61 | 62 | @abstractmethod 63 | async def healthcheck(self) -> Dict[str, Any]: 64 | """ 65 | Checks if the validator is properly configured and functioning. 66 | 67 | Returns: 68 | Dict[str, Any]: A dictionary containing health status information: 69 | { 70 | 'healthy': bool, # Whether the validator is functioning properly 71 | 'message': str, # Optional message providing more details 72 | 'details': dict # Optional additional details about the health check 73 | } 74 | 75 | Raises: 76 | ValidationError: If the health check itself encounters an error 77 | """ 78 | pass 79 | -------------------------------------------------------------------------------- /ici/core/interfaces/vector_store.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict, Any, Optional 3 | 4 | 5 | class VectorStore(ABC): 6 | """ 7 | Interface for components that store processed documents with embeddings and 8 | retrieve relevant data based on vector similarity. 9 | 10 | The VectorStore abstracts the underlying storage technology, allowing flexibility 11 | in scaling from local to distributed systems while supporting advanced metadata filtering. 12 | """ 13 | 14 | @abstractmethod 15 | async def initialize(self) -> None: 16 | """ 17 | Initialize the vector store with configuration parameters. 18 | 19 | This method should be called after the vector store instance is created, 20 | before any other methods are used. Configuration should be loaded from 21 | a central configuration source (e.g., config.yaml). 22 | 23 | Returns: 24 | None 25 | 26 | Raises: 27 | Exception: If initialization fails for any reason. 28 | """ 29 | pass 30 | 31 | @abstractmethod 32 | def store_documents(self, documents: List[Dict[str, Any]]) -> None: 33 | """ 34 | Stores documents with their vectors, text, and metadata. 35 | 36 | Input documents should have the following structure: 37 | - 'vector': List[float] - Embedding vector 38 | - 'text': str - Original text content 39 | - 'metadata': Dict[str, Any] - Contextual data (e.g., source, timestamp) 40 | 41 | Args: 42 | documents: List of documents to store 43 | 44 | Raises: 45 | VectorStoreError: If document storage fails for any reason 46 | """ 47 | pass 48 | 49 | @abstractmethod 50 | def search( 51 | self, 52 | query_vector: List[float], 53 | num_results: int = 5, 54 | filters: Optional[Dict[str, Any]] = None, 55 | ) -> List[Dict[str, Any]]: 56 | """ 57 | Retrieves the most similar documents based on the query vector. 58 | 59 | Supports advanced metadata filtering with comparison operators: 60 | - Equality: {'source': 'Twitter'} 61 | - Greater than/less than: {'timestamp': {'gte': 1698777600}} 62 | - Array containment: {'tags': {'in': ['important', 'urgent']}} 63 | - Logical combinations: {'$and': [{'source': 'Twitter'}, {'timestamp': {'gte': 1698777600}}]} 64 | 65 | Args: 66 | query_vector: The vector to search for 67 | num_results: Number of results to return 68 | filters: Optional metadata filters to apply during search 69 | 70 | Returns: 71 | List[Dict[str, Any]]: List of documents, each containing: 72 | - 'text': Original text content 73 | - 'metadata': Original metadata 74 | - 'score': Similarity score (higher is more similar) 75 | 76 | Raises: 77 | VectorStoreError: If the search operation fails for any reason 78 | """ 79 | pass 80 | 81 | @abstractmethod 82 | def delete( 83 | self, 84 | document_ids: Optional[List[str]] = None, 85 | filters: Optional[Dict[str, Any]] = None, 86 | ) -> int: 87 | """ 88 | Deletes documents from the vector store by ID or filter. 89 | 90 | Args: 91 | document_ids: Optional list of document IDs to delete 92 | filters: Optional metadata filters to select documents for deletion 93 | 94 | Returns: 95 | int: Number of documents deleted 96 | 97 | Raises: 98 | VectorStoreError: If the delete operation fails for any reason 99 | """ 100 | pass 101 | 102 | @abstractmethod 103 | def count(self, filters: Optional[Dict[str, Any]] = None) -> int: 104 | """ 105 | Counts documents in the vector store, optionally filtered by metadata. 106 | 107 | Args: 108 | filters: Optional metadata filters to apply 109 | 110 | Returns: 111 | int: Number of documents matching the filter 112 | 113 | Raises: 114 | VectorStoreError: If the count operation fails for any reason 115 | """ 116 | pass 117 | 118 | @abstractmethod 119 | def healthcheck(self) -> Dict[str, Any]: 120 | """ 121 | Checks if the vector store is properly configured and functioning. 122 | 123 | Returns: 124 | Dict[str, Any]: A dictionary containing health status information: 125 | { 126 | 'healthy': bool, # Whether the vector store is functioning properly 127 | 'message': str, # Optional message providing more details 128 | 'details': dict # Optional additional details about the health check 129 | } 130 | 131 | Raises: 132 | VectorStoreError: If the health check itself encounters an error 133 | """ 134 | pass 135 | 136 | @abstractmethod 137 | def add_documents( 138 | self, documents: List[Dict[str, Any]], vectors: List[List[float]] 139 | ) -> List[str]: 140 | """ 141 | Stores documents along with their vector embeddings. 142 | 143 | Args: 144 | documents: List of documents to store 145 | vectors: List of vector embeddings for the documents 146 | 147 | Returns: 148 | List[str]: List of document IDs 149 | 150 | Raises: 151 | VectorStoreError: If document storage fails for any reason 152 | """ 153 | pass 154 | -------------------------------------------------------------------------------- /ici/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility modules for the ICI framework. 3 | 4 | This package contains utility modules that provide common functionality 5 | across the framework. 6 | """ 7 | 8 | from ici.utils.config import get_component_config, load_config 9 | from ici.utils.state_manager import StateManager 10 | from ici.utils.datetime_utils import ( 11 | ensure_tz_aware, 12 | to_utc, 13 | from_timestamp, 14 | from_isoformat, 15 | safe_compare 16 | ) 17 | from ici.utils.load_env import load_env 18 | from ici.utils.component_loader import load_component_class 19 | from ici.utils.print_banner import print_banner 20 | 21 | __all__ = [ 22 | "get_component_config", 23 | "load_config", 24 | "StateManager", 25 | "ensure_tz_aware", 26 | "to_utc", 27 | "from_timestamp", 28 | "from_isoformat", 29 | "safe_compare", 30 | "load_env", 31 | "load_component_class", 32 | "print_banner", 33 | ] -------------------------------------------------------------------------------- /ici/utils/component_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Component loader utility for dynamic instantiation of components. 3 | 4 | This module provides utilities for dynamically loading and initializing 5 | components from configuration based on class paths. 6 | """ 7 | 8 | import importlib 9 | from typing import Any, Dict, Optional, Type, TypeVar 10 | 11 | from ici.core.exceptions import ComponentLoadError 12 | 13 | T = TypeVar('T') 14 | 15 | def load_component_class(class_path: str) -> Type[Any]: 16 | """ 17 | Dynamically load a class from a fully qualified path string. 18 | 19 | Args: 20 | class_path: Fully qualified class path (e.g., 'ici.adapters.ingestors.telegram.TelegramIngestor') 21 | 22 | Returns: 23 | Type[Any]: The loaded class 24 | 25 | Raises: 26 | ComponentLoadError: If the class cannot be loaded 27 | """ 28 | try: 29 | # Split into module path and class name 30 | module_path, class_name = class_path.rsplit('.', 1) 31 | 32 | # Import the module 33 | module = importlib.import_module(module_path) 34 | 35 | # Get the class 36 | component_class = getattr(module, class_name) 37 | 38 | return component_class 39 | 40 | except ImportError as e: 41 | raise ComponentLoadError(f"Failed to import module for component {class_path}: {str(e)}") 42 | except AttributeError as e: 43 | raise ComponentLoadError(f"Class not found in module: {class_path}: {str(e)}") 44 | except Exception as e: 45 | raise ComponentLoadError(f"Failed to load component class {class_path}: {str(e)}") 46 | 47 | async def instantiate_component(class_path: str, config: Optional[Dict[str, Any]] = None) -> Any: 48 | """ 49 | Dynamically instantiate a component from its class path and initialize it. 50 | 51 | Args: 52 | class_path: Fully qualified class path 53 | config: Configuration for the component 54 | 55 | Returns: 56 | Any: The instantiated and initialized component 57 | 58 | Raises: 59 | ComponentLoadError: If the component cannot be instantiated or initialized 60 | """ 61 | try: 62 | # Load the class 63 | component_class = load_component_class(class_path) 64 | 65 | # Create an instance with config if provided 66 | component = component_class() if config is None else component_class(**config) 67 | 68 | # Initialize the component if it has an initialize method 69 | if hasattr(component, 'initialize') and callable(component.initialize): 70 | # Check if initialize is a coroutine function 71 | if hasattr(component.initialize, '__await__'): 72 | await component.initialize() 73 | else: 74 | component.initialize() 75 | 76 | return component 77 | 78 | except ComponentLoadError: 79 | # Re-raise ComponentLoadError from load_component_class 80 | raise 81 | except Exception as e: 82 | raise ComponentLoadError(f"Failed to instantiate or initialize component {class_path}: {str(e)}") 83 | 84 | def load_component_by_type(component_type: str, component_config: Dict[str, Any], base_class: Type[T]) -> T: 85 | """ 86 | Load a component by type string and validate it against a base class. 87 | 88 | Args: 89 | component_type: String identifier for the component type 90 | component_config: Configuration for the component 91 | base_class: Base class that the component should inherit from 92 | 93 | Returns: 94 | T: The instantiated component 95 | 96 | Raises: 97 | ComponentLoadError: If the component cannot be loaded or is not a subclass of base_class 98 | """ 99 | try: 100 | # Load the class 101 | component_class = load_component_class(component_type) 102 | 103 | # Verify it's a subclass of the base class 104 | if not issubclass(component_class, base_class): 105 | raise ComponentLoadError( 106 | f"Component {component_type} is not a subclass of {base_class.__name__}" 107 | ) 108 | 109 | # Create an instance with config 110 | return component_class(**component_config) 111 | 112 | except ComponentLoadError: 113 | # Re-raise ComponentLoadError from other functions 114 | raise 115 | except Exception as e: 116 | raise ComponentLoadError(f"Failed to load component {component_type}: {str(e)}") -------------------------------------------------------------------------------- /ici/utils/datetime_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Datetime utilities for the ICI framework. 3 | 4 | This module provides standardized datetime handling functions to ensure 5 | consistent timezone handling throughout the application. 6 | """ 7 | 8 | from datetime import datetime, timezone 9 | from typing import Optional, Union 10 | 11 | 12 | def ensure_tz_aware(dt: Optional[datetime]) -> Optional[datetime]: 13 | """ 14 | Ensure a datetime is timezone-aware (UTC if naive). 15 | 16 | Args: 17 | dt: The datetime to process, can be None 18 | 19 | Returns: 20 | The timezone-aware datetime (or None if input was None) 21 | """ 22 | if dt is None: 23 | return None 24 | 25 | if dt.tzinfo is None: 26 | return dt.replace(tzinfo=timezone.utc) 27 | return dt 28 | 29 | 30 | def to_utc(dt: Optional[datetime]) -> Optional[datetime]: 31 | """ 32 | Convert a datetime to UTC. 33 | 34 | Args: 35 | dt: The datetime to convert, can be None 36 | 37 | Returns: 38 | The UTC datetime (or None if input was None) 39 | """ 40 | if dt is None: 41 | return None 42 | 43 | # First ensure it's timezone-aware 44 | dt = ensure_tz_aware(dt) 45 | 46 | # Then convert to UTC if it's not already 47 | if dt.tzinfo != timezone.utc: 48 | return dt.astimezone(timezone.utc) 49 | return dt 50 | 51 | 52 | def from_timestamp(timestamp: Union[int, float]) -> datetime: 53 | """ 54 | Create a timezone-aware UTC datetime from a timestamp. 55 | 56 | Args: 57 | timestamp: Unix timestamp (seconds since epoch) 58 | 59 | Returns: 60 | Timezone-aware datetime in UTC 61 | """ 62 | return datetime.fromtimestamp(timestamp, tz=timezone.utc) 63 | 64 | 65 | def from_isoformat(iso_string: str) -> datetime: 66 | """ 67 | Create a timezone-aware datetime from an ISO format string. 68 | 69 | If the string has no timezone info, UTC is assumed. 70 | 71 | Args: 72 | iso_string: ISO 8601 formatted datetime string 73 | 74 | Returns: 75 | Timezone-aware datetime 76 | """ 77 | dt = datetime.fromisoformat(iso_string) 78 | return ensure_tz_aware(dt) 79 | 80 | 81 | def safe_compare(dt1: Optional[datetime], dt2: Optional[datetime]) -> bool: 82 | """ 83 | Safely compare two datetimes that may have different timezone information. 84 | 85 | Args: 86 | dt1: First datetime (may be None) 87 | dt2: Second datetime (may be None) 88 | 89 | Returns: 90 | True if dt1 is less than dt2, False otherwise 91 | If either is None, returns False 92 | """ 93 | if dt1 is None or dt2 is None: 94 | return False 95 | 96 | # Ensure both datetimes are timezone-aware before comparison 97 | dt1 = ensure_tz_aware(dt1) 98 | dt2 = ensure_tz_aware(dt2) 99 | 100 | return dt1 < dt2 -------------------------------------------------------------------------------- /ici/utils/load_env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Environment Variable Loader 4 | 5 | This script loads environment variables from a .env file and can be imported 6 | or run before other scripts to ensure environment variables are set properly. 7 | """ 8 | 9 | import os 10 | import sys 11 | import argparse 12 | from typing import Optional 13 | 14 | try: 15 | from dotenv import load_dotenv 16 | except ImportError: 17 | print("python-dotenv not installed. Installing now...") 18 | import subprocess 19 | subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"]) 20 | from dotenv import load_dotenv 21 | 22 | 23 | def load_env(env_file: Optional[str] = None) -> None: 24 | """ 25 | Load environment variables from a .env file. 26 | 27 | Args: 28 | env_file: Path to the .env file. If None, looks for .env in the current directory. 29 | """ 30 | # Default to .env in the current directory if not specified 31 | if env_file is None: 32 | env_file = ".env" 33 | 34 | # Check if the file exists 35 | if not os.path.exists(env_file): 36 | print(f"Warning: Environment file {env_file} not found.") 37 | print(f"Create one by copying .env.example: cp .env.example .env") 38 | return 39 | 40 | # Load the .env file with override=True to override existing environment variables 41 | load_dotenv(env_file, override=True) 42 | # print(f"Loaded environment variables from {env_file} (with override)") 43 | 44 | 45 | if __name__ == "__main__": 46 | parser = argparse.ArgumentParser(description="Load environment variables from a .env file") 47 | parser.add_argument("--env-file", type=str, help="Path to the .env file") 48 | args = parser.parse_args() 49 | 50 | # Load environment variables 51 | load_env(args.env_file) 52 | 53 | # Print the loaded environment variables (without values for security) 54 | # print("\nLoaded environment variables (showing names only for security):") 55 | env_vars = [var for var in os.environ if var in open(args.env_file or ".env").read()] 56 | for var in sorted(env_vars): 57 | print(f" - {var}: ***") -------------------------------------------------------------------------------- /ici/utils/print_banner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Banner printing utility for ICI Core. 3 | 4 | This module provides functions for displaying ASCII art banners 5 | for the ICI Core application. 6 | """ 7 | 8 | def print_banner(): 9 | """Print ASCII art banner for ICI Core""" 10 | banner = r""" 11 | 12 | _____ _ _ _ _ 13 | / ____(_) | | | | (_) 14 | | (___ _ __| | ___| |_ _ __ _ _ __ 15 | \___ \| |/ _` |/ _ \ __| '__| | '_ \ 16 | ____) | | (_| | __/ |_| | | | |_) | 17 | |_____/|_|\__,_|\___|\__|_| |_| .__/ 18 | | | 19 | |_| 20 | 21 | Intelligent Consciousness Interface Core 22 | """ 23 | print(banner) 24 | print("=" * 50) -------------------------------------------------------------------------------- /install.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | 4 | :: Colors for output 5 | set GREEN=[92m 6 | set YELLOW=[93m 7 | set RED=[91m 8 | set NC=[0m 9 | 10 | :: Repository details 11 | set REPO_URL=https://github.com/sidetrip-ai/ici-core.git 12 | set REPO_NAME=ici-core 13 | 14 | :: Check if git is installed 15 | call :check_git || exit /b 1 16 | 17 | :: Check if Python is installed 18 | call :check_python || exit /b 1 19 | 20 | :: Check if repository exists and clone if needed 21 | call :check_repo || exit /b 1 22 | 23 | :: Run the setup script 24 | echo %YELLOW%Running setup script...%NC% 25 | if exist "./setup.bat" ( 26 | call setup.bat 27 | ) else ( 28 | echo %RED%Setup script not found.%NC% 29 | exit /b 1 30 | ) 31 | 32 | goto :eof 33 | 34 | :check_git 35 | :: Check if git is installed 36 | where git >nul 2>&1 37 | if %ERRORLEVEL% neq 0 ( 38 | echo %RED%Git is not installed.%NC% 39 | echo %YELLOW%Please install git first:%NC% 40 | echo For Windows: %GREEN%https://git-scm.com/download/win%NC% 41 | exit /b 1 42 | ) 43 | echo %GREEN%Git is installed.%NC% 44 | exit /b 0 45 | goto :eof 46 | 47 | :check_python 48 | :: Check if Python is installed 49 | where python3 >nul 2>&1 50 | if %ERRORLEVEL% neq 0 ( 51 | echo %RED%Python 3 is not installed.%NC% 52 | echo %YELLOW%Please install Python 3 first:%NC% 53 | echo For Windows: %GREEN%https://www.python.org/downloads/%NC% 54 | exit /b 1 55 | ) 56 | echo %GREEN%Python is installed.%NC% 57 | exit /b 0 58 | goto :eof 59 | 60 | :find_repo 61 | :: First check current directory 62 | if exist "%REPO_NAME%" ( 63 | set REPO_PATH=%CD%\%REPO_NAME% 64 | exit /b 0 65 | ) 66 | 67 | :: Then check parent directory 68 | if exist "..\%REPO_NAME%" ( 69 | pushd .. 70 | set REPO_PATH=!CD!\%REPO_NAME% 71 | popd 72 | exit /b 0 73 | ) 74 | 75 | :: Then check user home directory 76 | if exist "%USERPROFILE%\%REPO_NAME%" ( 77 | set REPO_PATH=%USERPROFILE%\%REPO_NAME% 78 | exit /b 0 79 | ) 80 | 81 | exit /b 1 82 | goto :eof 83 | 84 | :check_repo 85 | :: Check if repository is already cloned 86 | call :find_repo 87 | if %ERRORLEVEL% equ 0 ( 88 | echo %GREEN%Repository found at: %REPO_PATH%%NC% 89 | cd /d "%REPO_PATH%" 90 | exit /b 0 91 | ) else ( 92 | echo %YELLOW%Repository not found. Cloning from %REPO_URL%...%NC% 93 | 94 | :: Check if directory exists and is empty 95 | if exist "%REPO_NAME%" ( 96 | echo %YELLOW%Directory %REPO_NAME% exists but is not a git repository.%NC% 97 | echo %YELLOW%Removing existing directory...%NC% 98 | rmdir /s /q "%REPO_NAME%" 99 | ) 100 | 101 | :: Clone the repository 102 | git clone "%REPO_URL%" 103 | if %ERRORLEVEL% neq 0 ( 104 | echo %RED%Failed to clone repository.%NC% 105 | echo %YELLOW%Please try the manual installation method from the README.%NC% 106 | exit /b 1 107 | ) 108 | 109 | :: Change to the repository directory 110 | cd /d "%REPO_NAME%" 111 | if %ERRORLEVEL% neq 0 ( 112 | echo %RED%Failed to change to repository directory.%NC% 113 | exit /b 1 114 | ) 115 | 116 | echo %GREEN%Repository cloned successfully!%NC% 117 | exit /b 0 118 | ) 119 | goto :eof 120 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Colors for output 4 | GREEN='\033[0;32m' 5 | YELLOW='\033[1;33m' 6 | RED='\033[0;31m' 7 | NC='\033[0m' # No Color 8 | 9 | # Repository details 10 | REPO_URL="https://github.com/sidetrip-ai/ici-core.git" 11 | REPO_NAME="ici-core" 12 | 13 | # Function to check if git is installed 14 | check_git() { 15 | if ! command -v git &> /dev/null; then 16 | echo -e "${RED}Git is not installed.${NC}" 17 | echo -e "${YELLOW}Please install git first:${NC}" 18 | echo -e " For Ubuntu/Debian: ${GREEN}sudo apt-get install git${NC}" 19 | echo -e " For macOS: ${GREEN}brew install git${NC}" 20 | echo -e " For Windows: ${GREEN}https://git-scm.com/download/win${NC}" 21 | exit 1 22 | fi 23 | } 24 | 25 | # Function to check if Python is installed 26 | check_python() { 27 | if ! command -v python3 &> /dev/null; then 28 | echo -e "${RED}Python 3 is not installed.${NC}" 29 | echo -e "${YELLOW}Please install Python 3 first:${NC}" 30 | echo -e " For Ubuntu/Debian: ${GREEN}sudo apt-get install python3 python3-venv${NC}" 31 | echo -e " For macOS: ${GREEN}brew install python3${NC}" 32 | echo -e " For Windows: ${GREEN}https://www.python.org/downloads/${NC}" 33 | exit 1 34 | fi 35 | } 36 | 37 | # Function to find repository location 38 | find_repo() { 39 | # First check current directory 40 | if [ -d "$REPO_NAME" ]; then 41 | echo "$(pwd)/$REPO_NAME" 42 | return 0 43 | fi 44 | 45 | # Then check parent directory 46 | if [ -d "../$REPO_NAME" ]; then 47 | echo "$(cd .. && pwd)/$REPO_NAME" 48 | return 0 49 | fi 50 | 51 | # Then check home directory 52 | if [ -d "$HOME/$REPO_NAME" ]; then 53 | echo "$HOME/$REPO_NAME" 54 | return 0 55 | fi 56 | 57 | return 1 58 | } 59 | 60 | # Function to check if repository is already cloned 61 | check_repo() { 62 | local repo_path=$(find_repo) 63 | 64 | if [ ! -z "$repo_path" ]; then 65 | echo -e "${GREEN}Repository found at: $repo_path${NC}" 66 | cd "$repo_path" 67 | return 0 68 | else 69 | echo -e "${YELLOW}Repository not found. Cloning from $REPO_URL...${NC}" 70 | git clone "$REPO_URL" 71 | if [ $? -ne 0 ]; then 72 | echo -e "${RED}Failed to clone repository.${NC}" 73 | exit 1 74 | fi 75 | cd "$REPO_NAME" 76 | return 1 77 | fi 78 | } 79 | 80 | # Main execution 81 | echo -e "${YELLOW}========== ICI Core Installation Script ==========${NC}" 82 | 83 | # Check if git is installed 84 | check_git 85 | 86 | # Check if Python is installed 87 | check_python 88 | 89 | # Check if repository exists and clone if needed 90 | check_repo 91 | 92 | # Run the setup script 93 | echo -e "${YELLOW}Running setup script...${NC}" 94 | if [ -f "./setup.sh" ]; then 95 | bash ./setup.sh 96 | else 97 | echo -e "${RED}Setup script not found.${NC}" 98 | exit 1 99 | fi -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import sys 3 | import traceback 4 | 5 | from ici.adapters.controller import command_line_controller 6 | from ici.utils import print_banner 7 | 8 | # set an env variable 9 | import os 10 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 11 | 12 | import warnings 13 | warnings.simplefilter("ignore") # Ignore all warnings 14 | 15 | if __name__ == "__main__": 16 | # Print ASCII banner 17 | print_banner() 18 | 19 | # Run the main function 20 | try: 21 | print("Starting main function...") 22 | exit_code = asyncio.run(command_line_controller()) 23 | sys.exit(exit_code) 24 | except Exception as e: 25 | print(f"Fatal error: {e}") 26 | traceback.print_exc() 27 | sys.exit(1) -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | python_files = test_*.py 4 | python_classes = Test* 5 | python_functions = test_* 6 | 7 | # Display more test info 8 | addopts = --verbose 9 | 10 | # Code coverage settings 11 | [coverage:run] 12 | source = ici 13 | omit = 14 | tests/* 15 | examples/* 16 | setup.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | pyyaml>=6.0 3 | 4 | # Development dependencies 5 | pytest>=7.0.0 6 | pytest-cov>=4.0.0 7 | pytest-asyncio>=0.25.0 8 | black>=25.0.1 9 | 10 | sentence-transformers>=3.4.1 # For text embeddings 11 | torch>=2.2.0 # Required for sentence-transformers 12 | faiss-cpu>=1.7.0 # For vector similarity search 13 | telethon>=1.39.0 # For Telegram API access 14 | chromadb>=0.6.3 # For ChromaDB vector database 15 | numpy>=2.2.2 # Required for vector operations 16 | logtail-python>=0.3.3 17 | openai>=1.68.0 18 | langchain>=0.3.21 # Core LangChain functionality 19 | langchain-openai>=0.1.0 # For OpenAI integration 20 | langchain-community>=0.3.20 # For additional model providers (including Ollama) 21 | langchain-anthropic>=0.3.10 # For Claude models 22 | langchain-ollama>=0.3.0 23 | python-dotenv>=1.0.1 24 | -------------------------------------------------------------------------------- /services/whatsapp-service/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | 4 | # Session data 5 | data/sessions/ 6 | 7 | # Logs 8 | logs/ 9 | *.log 10 | npm-debug.log* 11 | 12 | # Environment variables 13 | .env 14 | 15 | # Editor directories and files 16 | .idea/ 17 | .vscode/ 18 | *.swp 19 | *.swo 20 | 21 | # OS files 22 | .DS_Store 23 | Thumbs.db -------------------------------------------------------------------------------- /services/whatsapp-service/README.md: -------------------------------------------------------------------------------- 1 | # WhatsApp Service 2 | 3 | This service provides a REST API and WebSocket interface for integrating WhatsApp messaging into the ICI system. It uses the [whatsapp-web.js](https://github.com/pedroslopez/whatsapp-web.js) library to interact with WhatsApp Web. 4 | 5 | ## Features 6 | 7 | - Multiple WhatsApp sessions management 8 | - QR code authentication 9 | - REST API for sending and receiving messages 10 | - WebSocket interface for real-time updates 11 | - Session persistence 12 | 13 | ## Directory Structure 14 | 15 | ``` 16 | whatsapp-service/ 17 | ├── config.js # Configuration file 18 | ├── data/ 19 | │ └── sessions/ # WhatsApp session data 20 | ├── package.json # Dependencies and scripts 21 | ├── src/ 22 | │ ├── api/ # REST API endpoints 23 | │ │ └── routes/ # API routes 24 | │ ├── client/ # WhatsApp client implementation 25 | │ ├── utils/ # Utility functions 26 | │ ├── websocket/ # WebSocket server 27 | │ └── index.js # Main entry point 28 | └── README.md # This file 29 | ``` 30 | 31 | ## API Endpoints 32 | 33 | ### Session Management 34 | 35 | - `GET /api/sessions` - List all active WhatsApp sessions 36 | - `POST /api/sessions` - Create a new WhatsApp session 37 | - `GET /api/sessions/:sessionId` - Get session status 38 | - `GET /api/sessions/:sessionId/qr` - Get QR code for session authentication 39 | - `DELETE /api/sessions/:sessionId` - Logout and destroy a session 40 | 41 | ### Messaging 42 | 43 | - `POST /api/messages/:sessionId/send` - Send a message 44 | - `GET /api/messages/:sessionId/chats` - Get all chats 45 | - `GET /api/messages/:sessionId/chat/:chatId` - Get messages from a specific chat 46 | - `GET /api/messages/:sessionId/contacts` - Get all contacts 47 | 48 | ## WebSocket Interface 49 | 50 | The WebSocket server provides real-time updates for: 51 | 52 | - Connection state changes 53 | - Incoming messages 54 | 55 | ### Events 56 | 57 | - `connection_update` - When the connection state changes (e.g., QR code received, authenticated, connected) 58 | - `message` - When a new message is received 59 | 60 | ## Installation and Setup 61 | 62 | 1. Install dependencies: 63 | ``` 64 | npm install 65 | ``` 66 | 67 | 2. Start the service: 68 | ``` 69 | npm start 70 | ``` 71 | 72 | For development: 73 | ``` 74 | npm run dev 75 | ``` 76 | 77 | ## Authentication Flow 78 | 79 | 1. Create a new session using the REST API 80 | 2. Retrieve the QR code using the API 81 | 3. Scan the QR code with your WhatsApp mobile app 82 | 4. Once authenticated, the session will be saved and can be reused 83 | 84 | ## Notes 85 | 86 | - Puppeteer is required for WhatsApp Web interaction 87 | - Session data is stored in the `data/sessions` directory 88 | - Multiple WhatsApp accounts can be used simultaneously with different session IDs -------------------------------------------------------------------------------- /services/whatsapp-service/config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | /** 4 | * Configuration for the WhatsApp service 5 | */ 6 | module.exports = { 7 | // Server settings 8 | port: process.env.PORT || 3006, 9 | wsPort: process.env.WS_PORT || 3005, 10 | 11 | // WhatsApp client settings 12 | clientOptions: { 13 | puppeteer: { 14 | headless: true, 15 | args: ['--no-sandbox', '--disable-setuid-sandbox'] 16 | } 17 | }, 18 | 19 | // Session settings 20 | sessions: { 21 | dataPath: path.join(__dirname, "services", "whatsapp-service", "data", "sessions"), 22 | sessionFile: 'session.json' 23 | }, 24 | 25 | // API settings 26 | api: { 27 | maxMessages: 1000, // Maximum number of messages to return in a single request 28 | } 29 | }; -------------------------------------------------------------------------------- /services/whatsapp-service/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "whatsapp-service", 3 | "version": "1.0.0", 4 | "description": "WhatsApp Web JS service for ICI system", 5 | "main": "src/index.js", 6 | "scripts": { 7 | "start": "node src/index.js", 8 | "dev": "nodemon src/index.js" 9 | }, 10 | "dependencies": { 11 | "cors": "^2.8.5", 12 | "express": "^4.18.2", 13 | "qrcode": "^1.5.3", 14 | "whatsapp-web.js": "^1.27.0", 15 | "ws": "^8.14.2" 16 | }, 17 | "devDependencies": { 18 | "nodemon": "^3.0.1" 19 | } 20 | } -------------------------------------------------------------------------------- /services/whatsapp-service/src/api/routes/auth.js: -------------------------------------------------------------------------------- 1 | /** 2 | * WhatsApp authentication routes 3 | */ 4 | 5 | const express = require('express'); 6 | const whatsAppClient = require('../../client/whatsapp-client'); 7 | const eventEmitter = require('../../utils/event-emitter'); 8 | 9 | const router = express.Router(); 10 | 11 | /** 12 | * GET /api/status 13 | * Get the current status of the WhatsApp client 14 | */ 15 | router.get('/status', async (req, res) => { 16 | const status = whatsAppClient.getStatus(); 17 | res.json(status); 18 | }); 19 | 20 | /** 21 | * GET /api/qr 22 | * Get the current QR code or generate a new one 23 | */ 24 | router.get('/qr', async (req, res) => { 25 | try { 26 | // Initialize client if not already done 27 | if (!whatsAppClient.initialized) { 28 | await whatsAppClient.initialize(); 29 | } 30 | 31 | // If already connected, return an error 32 | if (whatsAppClient.status === 'CONNECTED') { 33 | return res.status(400).json({ 34 | success: false, 35 | message: 'Already connected to WhatsApp' 36 | }); 37 | } 38 | 39 | // If we have a QR code that's less than 2 minutes old, return it 40 | const qrCodeImage = whatsAppClient.getQrCodeImage(); 41 | if (qrCodeImage && whatsAppClient.lastQrTimestamp && 42 | Date.now() - whatsAppClient.lastQrTimestamp < 120000) { 43 | // Set content type to image/png 44 | res.setHeader('Content-Type', 'image/png'); 45 | 46 | // Convert data URL to buffer and send 47 | const imgData = qrCodeImage.split(',')[1]; 48 | const imgBuffer = Buffer.from(imgData, 'base64'); 49 | return res.send(imgBuffer); 50 | } 51 | 52 | // Generate a new QR code if none exists or it's too old 53 | const result = await whatsAppClient.generateNewQrCode(); 54 | 55 | if (!result.success) { 56 | return res.status(500).json({ 57 | success: false, 58 | message: result.message || 'Failed to generate QR code' 59 | }); 60 | } 61 | 62 | // Wait for a new QR code to be generated (with a timeout) 63 | let timeoutId = null; 64 | const waitForQrCode = new Promise((resolve, reject) => { 65 | // Set a timeout 66 | timeoutId = setTimeout(() => { 67 | reject(new Error('Timeout waiting for QR code')); 68 | }, 10000); 69 | 70 | // Listen for QR code event 71 | const handler = (data) => { 72 | clearTimeout(timeoutId); 73 | resolve(data); 74 | eventEmitter.off('whatsapp.qr', handler); // Remove listener 75 | }; 76 | 77 | eventEmitter.once('whatsapp.qr', handler); 78 | }); 79 | 80 | try { 81 | await waitForQrCode; 82 | 83 | // Get the newly generated QR code 84 | const newQrCodeImage = whatsAppClient.getQrCodeImage(); 85 | 86 | if (!newQrCodeImage) { 87 | return res.status(500).json({ 88 | success: false, 89 | message: 'Failed to generate QR code image' 90 | }); 91 | } 92 | 93 | // Set content type to image/png 94 | res.setHeader('Content-Type', 'image/png'); 95 | 96 | // Convert data URL to buffer and send 97 | const imgData = newQrCodeImage.split(',')[1]; 98 | const imgBuffer = Buffer.from(imgData, 'base64'); 99 | return res.send(imgBuffer); 100 | } catch (timeoutError) { 101 | clearTimeout(timeoutId); 102 | return res.status(500).json({ 103 | success: false, 104 | message: 'Timeout waiting for QR code' 105 | }); 106 | } 107 | } catch (error) { 108 | console.error('Error in QR code generation:', error); 109 | res.status(500).json({ 110 | success: false, 111 | message: error.message || 'Internal server error' 112 | }); 113 | } 114 | }); 115 | 116 | /** 117 | * POST /api/logout 118 | * Logout from WhatsApp Web 119 | */ 120 | router.post('/logout', async (req, res) => { 121 | try { 122 | const result = await whatsAppClient.logout(); 123 | res.json(result); 124 | } catch (error) { 125 | console.error('Error in logout:', error); 126 | res.status(500).json({ 127 | success: false, 128 | message: error.message || 'Internal server error' 129 | }); 130 | } 131 | }); 132 | 133 | module.exports = router; -------------------------------------------------------------------------------- /services/whatsapp-service/src/api/routes/index.js: -------------------------------------------------------------------------------- 1 | const express = require('express'); 2 | const router = express.Router(); 3 | const sessionsRoutes = require('./sessions'); 4 | const messagesRoutes = require('./messages'); 5 | const logger = require('../../utils/logger'); 6 | 7 | // Health check endpoint 8 | router.get('/health', (req, res) => { 9 | res.json({ 10 | status: 'ok', 11 | service: 'whatsapp-service', 12 | timestamp: new Date().toISOString() 13 | }); 14 | }); 15 | 16 | // Mount routes 17 | router.use('/sessions', sessionsRoutes); 18 | router.use('/messages', messagesRoutes); 19 | 20 | // Handle 404 for API routes 21 | router.use((req, res) => { 22 | logger.warn(`API endpoint not found: ${req.method} ${req.originalUrl}`); 23 | res.status(404).json({ 24 | success: false, 25 | error: 'API endpoint not found' 26 | }); 27 | }); 28 | 29 | module.exports = router; -------------------------------------------------------------------------------- /services/whatsapp-service/src/api/routes/messages.js: -------------------------------------------------------------------------------- 1 | /** 2 | * WhatsApp messages routes 3 | */ 4 | 5 | const express = require('express'); 6 | const whatsAppClient = require('../../client/whatsapp-client'); 7 | 8 | const router = express.Router(); 9 | 10 | /** 11 | * GET /api/messages 12 | * Fetch messages from a specific chat 13 | */ 14 | router.get('/messages', async (req, res) => { 15 | try { 16 | const { chatId, limit } = req.query; 17 | 18 | if (!chatId) { 19 | return res.status(400).json({ 20 | success: false, 21 | message: 'Chat ID is required' 22 | }); 23 | } 24 | 25 | // Check if client is connected 26 | const status = whatsAppClient.getStatus(); 27 | if (status.status !== 'CONNECTED') { 28 | return res.status(400).json({ 29 | success: false, 30 | message: `WhatsApp is not connected. Current status: ${status.status}` 31 | }); 32 | } 33 | 34 | // Fetch messages 35 | const messages = await whatsAppClient.fetchMessages( 36 | chatId, 37 | limit ? parseInt(limit, 10) : 2000 38 | ); 39 | 40 | res.json({ 41 | success: true, 42 | messages 43 | }); 44 | } catch (error) { 45 | console.error('Error fetching messages:', error); 46 | res.status(500).json({ 47 | success: false, 48 | message: error.message || 'Failed to fetch messages' 49 | }); 50 | } 51 | }); 52 | 53 | /** 54 | * GET /api/fetch-all 55 | * Fetch all messages from all chats, optionally since a date 56 | */ 57 | router.get('/fetch-all', async (req, res) => { 58 | try { 59 | const { since } = req.query; 60 | 61 | // Check if client is connected 62 | const status = whatsAppClient.getStatus(); 63 | if (status.status !== 'CONNECTED') { 64 | return res.status(400).json({ 65 | success: false, 66 | message: `WhatsApp is not connected. Current status: ${status.status}` 67 | }); 68 | } 69 | 70 | // Parse since date if provided 71 | let sinceDate = null; 72 | if (since) { 73 | try { 74 | sinceDate = new Date(since); 75 | } catch (error) { 76 | return res.status(400).json({ 77 | success: false, 78 | message: 'Invalid date format for since parameter' 79 | }); 80 | } 81 | } 82 | 83 | // Fetch all messages 84 | const data = await whatsAppClient.fetchAllMessages(sinceDate); 85 | 86 | res.json({ 87 | success: true, 88 | ...data 89 | }); 90 | } catch (error) { 91 | console.error('Error fetching all messages:', error); 92 | res.status(500).json({ 93 | success: false, 94 | message: error.message || 'Failed to fetch messages' 95 | }); 96 | } 97 | }); 98 | 99 | /** 100 | * GET /api/chats 101 | * Get a list of all chats 102 | */ 103 | router.get('/chats', async (req, res) => { 104 | try { 105 | // Check if client is connected 106 | const status = whatsAppClient.getStatus(); 107 | if (status.status !== 'CONNECTED') { 108 | return res.status(400).json({ 109 | success: false, 110 | message: `WhatsApp is not connected. Current status: ${status.status}` 111 | }); 112 | } 113 | 114 | // Fetch all chats 115 | const data = await whatsAppClient.fetchAllMessages(); 116 | 117 | res.json({ 118 | success: true, 119 | chats: data.conversations 120 | }); 121 | } catch (error) { 122 | console.error('Error fetching chats:', error); 123 | res.status(500).json({ 124 | success: false, 125 | message: error.message || 'Failed to fetch chats' 126 | }); 127 | } 128 | }); 129 | 130 | module.exports = router; -------------------------------------------------------------------------------- /services/whatsapp-service/src/api/routes/sessions.js: -------------------------------------------------------------------------------- 1 | const express = require('express'); 2 | const router = express.Router(); 3 | const clientManager = require('../../client/client-manager'); 4 | const logger = require('../../utils/logger'); 5 | 6 | /** 7 | * GET /api/sessions 8 | * List all active WhatsApp sessions 9 | */ 10 | router.get('/', (req, res) => { 11 | try { 12 | const sessions = clientManager.getAllClientInfo(); 13 | res.json({ 14 | success: true, 15 | count: sessions.length, 16 | sessions 17 | }); 18 | } catch (error) { 19 | logger.error(`Error listing sessions: ${error.message}`, { error: error.stack }); 20 | res.status(500).json({ 21 | success: false, 22 | error: 'Failed to list sessions' 23 | }); 24 | } 25 | }); 26 | 27 | /** 28 | * POST /api/sessions 29 | * Create a new WhatsApp session 30 | */ 31 | router.post('/', async (req, res) => { 32 | try { 33 | let { sessionId } = req.body; 34 | 35 | // Generate a random session ID if not provided 36 | if (!sessionId) { 37 | sessionId = `session_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`; 38 | } 39 | 40 | // Check if session already exists 41 | const existingClient = clientManager.getClient(sessionId); 42 | if (existingClient) { 43 | return res.status(409).json({ 44 | success: false, 45 | error: `Session ${sessionId} already exists`, 46 | sessionId 47 | }); 48 | } 49 | 50 | // Create new client 51 | const client = await clientManager.getOrCreateClient(sessionId); 52 | 53 | res.status(201).json({ 54 | success: true, 55 | message: 'WhatsApp session created', 56 | session: client.getInfo() 57 | }); 58 | } catch (error) { 59 | logger.error(`Error creating session: ${error.message}`, { error: error.stack }); 60 | res.status(500).json({ 61 | success: false, 62 | error: 'Failed to create WhatsApp session' 63 | }); 64 | } 65 | }); 66 | 67 | /** 68 | * GET /api/sessions/:sessionId 69 | * Get session status 70 | */ 71 | router.get('/:sessionId', (req, res) => { 72 | const { sessionId } = req.params; 73 | 74 | try { 75 | const client = clientManager.getClient(sessionId); 76 | if (!client) { 77 | return res.status(404).json({ 78 | success: false, 79 | error: `Session ${sessionId} not found` 80 | }); 81 | } 82 | 83 | res.json({ 84 | success: true, 85 | session: client.getInfo() 86 | }); 87 | } catch (error) { 88 | logger.error(`Error getting session ${sessionId}: ${error.message}`, { 89 | sessionId, 90 | error: error.stack 91 | }); 92 | res.status(500).json({ 93 | success: false, 94 | error: 'Failed to get session status' 95 | }); 96 | } 97 | }); 98 | 99 | /** 100 | * GET /api/sessions/:sessionId/qr 101 | * Get QR code for session 102 | */ 103 | router.get('/:sessionId/qr', async (req, res) => { 104 | const { sessionId } = req.params; 105 | 106 | try { 107 | const client = clientManager.getClient(sessionId); 108 | if (!client) { 109 | return res.status(404).json({ 110 | success: false, 111 | error: `Session ${sessionId} not found` 112 | }); 113 | } 114 | 115 | if (client.status !== 'qr_received') { 116 | return res.status(400).json({ 117 | success: false, 118 | error: `No QR code available for session ${sessionId}`, 119 | status: client.status 120 | }); 121 | } 122 | 123 | const qrDataUrl = await client.generateQRCodeDataUrl(); 124 | 125 | if (!qrDataUrl) { 126 | return res.status(404).json({ 127 | success: false, 128 | error: 'QR code is not available' 129 | }); 130 | } 131 | 132 | res.json({ 133 | success: true, 134 | qrCode: qrDataUrl 135 | }); 136 | } catch (error) { 137 | logger.error(`Error getting QR code for session ${sessionId}: ${error.message}`, { 138 | sessionId, 139 | error: error.stack 140 | }); 141 | res.status(500).json({ 142 | success: false, 143 | error: 'Failed to get QR code' 144 | }); 145 | } 146 | }); 147 | 148 | /** 149 | * DELETE /api/sessions/:sessionId 150 | * Logout and destroy a session 151 | */ 152 | router.delete('/:sessionId', async (req, res) => { 153 | const { sessionId } = req.params; 154 | const { action = 'logout' } = req.query; // 'logout' or 'destroy' 155 | 156 | try { 157 | const client = clientManager.getClient(sessionId); 158 | if (!client) { 159 | return res.status(404).json({ 160 | success: false, 161 | error: `Session ${sessionId} not found` 162 | }); 163 | } 164 | 165 | // Perform the requested action 166 | let result = false; 167 | if (action === 'destroy') { 168 | result = await clientManager.closeClient(sessionId); 169 | } else { 170 | result = await clientManager.logoutClient(sessionId); 171 | } 172 | 173 | if (result) { 174 | res.json({ 175 | success: true, 176 | message: `Session ${sessionId} ${action === 'destroy' ? 'destroyed' : 'logged out'}` 177 | }); 178 | } else { 179 | res.status(500).json({ 180 | success: false, 181 | error: `Failed to ${action === 'destroy' ? 'destroy' : 'logout'} session` 182 | }); 183 | } 184 | } catch (error) { 185 | logger.error(`Error deleting session ${sessionId}: ${error.message}`, { 186 | sessionId, 187 | error: error.stack 188 | }); 189 | res.status(500).json({ 190 | success: false, 191 | error: `Failed to delete session: ${error.message}` 192 | }); 193 | } 194 | }); 195 | 196 | module.exports = router; -------------------------------------------------------------------------------- /services/whatsapp-service/src/client/client-manager.js: -------------------------------------------------------------------------------- 1 | const WhatsAppClient = require('./whatsapp-client'); 2 | const logger = require('../utils/logger'); 3 | 4 | /** 5 | * Manager for multiple WhatsApp client instances 6 | */ 7 | class ClientManager { 8 | constructor() { 9 | this.clients = new Map(); 10 | } 11 | 12 | /** 13 | * Get or create a WhatsApp client 14 | * @param {string} sessionId - Session identifier 15 | * @returns {Promise} WhatsApp client instance 16 | */ 17 | async getOrCreateClient(sessionId) { 18 | if (!sessionId) { 19 | throw new Error('Session ID is required'); 20 | } 21 | 22 | // Return existing client if it exists 23 | if (this.clients.has(sessionId)) { 24 | logger.debug(`Using existing client for session ${sessionId}`); 25 | return this.clients.get(sessionId); 26 | } 27 | 28 | try { 29 | // Create new client 30 | logger.info(`Creating new client for session ${sessionId}`); 31 | const client = new WhatsAppClient(sessionId); 32 | this.clients.set(sessionId, client); 33 | 34 | // Initialize the client 35 | await client.initialize(); 36 | return client; 37 | } catch (error) { 38 | logger.error(`Failed to create client: ${error.message}`, { sessionId, error: error.stack }); 39 | throw error; 40 | } 41 | } 42 | 43 | /** 44 | * Get a client by session ID 45 | * @param {string} sessionId - Session identifier 46 | * @returns {WhatsAppClient|null} WhatsApp client instance or null if not found 47 | */ 48 | getClient(sessionId) { 49 | return this.clients.get(sessionId) || null; 50 | } 51 | 52 | /** 53 | * Close and destroy a client session 54 | * @param {string} sessionId - Session identifier 55 | * @returns {Promise} Success status 56 | */ 57 | async closeClient(sessionId) { 58 | const client = this.clients.get(sessionId); 59 | if (!client) { 60 | logger.warn(`Client session ${sessionId} not found for closing`); 61 | return false; 62 | } 63 | 64 | try { 65 | logger.info(`Closing client session ${sessionId}`); 66 | await client.destroy(); 67 | this.clients.delete(sessionId); 68 | return true; 69 | } catch (error) { 70 | logger.error(`Error closing client: ${error.message}`, { sessionId, error: error.stack }); 71 | return false; 72 | } 73 | } 74 | 75 | /** 76 | * Logout a client session 77 | * @param {string} sessionId - Session identifier 78 | * @returns {Promise} Success status 79 | */ 80 | async logoutClient(sessionId) { 81 | const client = this.clients.get(sessionId); 82 | if (!client) { 83 | logger.warn(`Client session ${sessionId} not found for logout`); 84 | return false; 85 | } 86 | 87 | try { 88 | logger.info(`Logging out client session ${sessionId}`); 89 | await client.logout(); 90 | this.clients.delete(sessionId); 91 | return true; 92 | } catch (error) { 93 | logger.error(`Error logging out client: ${error.message}`, { sessionId, error: error.stack }); 94 | return false; 95 | } 96 | } 97 | 98 | /** 99 | * Get all active clients 100 | * @returns {Array} List of client info 101 | */ 102 | getAllClientInfo() { 103 | const clientInfoList = []; 104 | 105 | for (const [sessionId, client] of this.clients.entries()) { 106 | clientInfoList.push(client.getInfo()); 107 | } 108 | 109 | return clientInfoList; 110 | } 111 | 112 | /** 113 | * Close all client sessions 114 | * @returns {Promise} 115 | */ 116 | async closeAllClients() { 117 | logger.info(`Closing all WhatsApp client sessions (${this.clients.size} clients)`); 118 | 119 | const closePromises = []; 120 | for (const [sessionId, client] of this.clients.entries()) { 121 | closePromises.push(client.destroy().catch(error => { 122 | logger.error(`Error closing client ${sessionId}: ${error.message}`, { error: error.stack }); 123 | })); 124 | } 125 | 126 | await Promise.all(closePromises); 127 | this.clients.clear(); 128 | } 129 | } 130 | 131 | // Create singleton instance 132 | const clientManager = new ClientManager(); 133 | 134 | module.exports = clientManager; -------------------------------------------------------------------------------- /services/whatsapp-service/src/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * WhatsApp Service - Main Entry Point 3 | */ 4 | const express = require('express'); 5 | const http = require('http'); 6 | const path = require('path'); 7 | const cors = require('cors'); 8 | const WebSocket = require('ws'); 9 | const whatsAppClient = require('./client/whatsapp-client'); 10 | const eventEmitter = require('./utils/event-emitter'); 11 | const config = require('../config'); 12 | 13 | // Import API routes 14 | const authRoutes = require('./api/routes/auth'); 15 | const messagesRoutes = require('./api/routes/messages'); 16 | 17 | // Create Express app 18 | const app = express(); 19 | const server = http.createServer(app); 20 | 21 | // Use middleware 22 | app.use(cors()); 23 | app.use(express.json()); 24 | app.use(express.urlencoded({ extended: true })); 25 | 26 | // Serve static files from 'public' directory 27 | app.use(express.static(path.join(__dirname, 'public'))); 28 | 29 | // Use API routes 30 | app.use('/api', authRoutes); 31 | app.use('/api', messagesRoutes); 32 | 33 | // Root route redirects to index.html 34 | app.get('/', (req, res) => { 35 | res.sendFile(path.join(__dirname, 'public', 'index.html')); 36 | }); 37 | 38 | // 404 handler 39 | app.use((req, res) => { 40 | res.status(404).json({ 41 | success: false, 42 | message: 'Route not found' 43 | }); 44 | }); 45 | 46 | // WebSocket server 47 | const wss = new WebSocket.Server({ 48 | server: server, 49 | path: '/ws' 50 | }); 51 | 52 | // WebSocket connection handler 53 | wss.on('connection', (ws) => { 54 | console.log('WebSocket client connected'); 55 | 56 | // Send initial status on connection 57 | ws.send(JSON.stringify({ 58 | type: 'status', 59 | data: whatsAppClient.getStatus() 60 | })); 61 | 62 | // Event handlers 63 | const handleQr = (data) => { 64 | ws.send(JSON.stringify({ 65 | type: 'qr', 66 | data: { 67 | timestamp: data.timestamp, 68 | hasQrCode: true 69 | } 70 | })); 71 | }; 72 | 73 | const handleStatusChange = (status) => { 74 | ws.send(JSON.stringify({ 75 | type: 'status', 76 | data: whatsAppClient.getStatus() 77 | })); 78 | }; 79 | 80 | // Register event listeners 81 | eventEmitter.on('whatsapp.qr', handleQr); 82 | eventEmitter.on('whatsapp.ready', handleStatusChange); 83 | eventEmitter.on('whatsapp.disconnected', handleStatusChange); 84 | eventEmitter.on('whatsapp.auth_failure', handleStatusChange); 85 | 86 | // Handle WebSocket messages (like ping) 87 | ws.on('message', (message) => { 88 | try { 89 | const data = JSON.parse(message); 90 | 91 | if (data.type === 'ping') { 92 | ws.send(JSON.stringify({ type: 'pong' })); 93 | } 94 | } catch (error) { 95 | console.error('Error parsing WebSocket message:', error); 96 | } 97 | }); 98 | 99 | // Handle WebSocket close 100 | ws.on('close', () => { 101 | console.log('WebSocket client disconnected'); 102 | 103 | // Remove event listeners 104 | eventEmitter.off('whatsapp.qr', handleQr); 105 | eventEmitter.off('whatsapp.ready', handleStatusChange); 106 | eventEmitter.off('whatsapp.disconnected', handleStatusChange); 107 | eventEmitter.off('whatsapp.auth_failure', handleStatusChange); 108 | }); 109 | }); 110 | 111 | // Initialize WhatsApp client 112 | whatsAppClient.initialize() 113 | .then(() => { 114 | console.log('WhatsApp client initialized'); 115 | }) 116 | .catch((error) => { 117 | console.error('Failed to initialize WhatsApp client:', error); 118 | }); 119 | 120 | // Start the server 121 | const port = config.port || 3000; 122 | server.listen(port, () => { 123 | console.log(`WhatsApp service running on port ${port}`); 124 | console.log(`Web interface available at http://localhost:${port}/`); 125 | }); -------------------------------------------------------------------------------- /services/whatsapp-service/src/utils/event-emitter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Event emitter singleton for the WhatsApp service 3 | */ 4 | 5 | const EventEmitter = require('events'); 6 | 7 | // Create a singleton instance 8 | const eventEmitter = new EventEmitter(); 9 | 10 | // Increase max listeners to avoid warnings when many modules listen 11 | eventEmitter.setMaxListeners(20); 12 | 13 | module.exports = eventEmitter; -------------------------------------------------------------------------------- /services/whatsapp-service/src/utils/logger.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Simple logger utility for WhatsApp service 3 | */ 4 | 5 | // Log levels 6 | const LOG_LEVELS = { 7 | ERROR: 'ERROR', 8 | WARNING: 'WARNING', 9 | INFO: 'INFO', 10 | DEBUG: 'DEBUG' 11 | }; 12 | 13 | // Current log level 14 | const currentLevel = process.env.LOG_LEVEL || LOG_LEVELS.INFO; 15 | 16 | // Check if a level is enabled 17 | const isLevelEnabled = (level) => { 18 | const levels = Object.values(LOG_LEVELS); 19 | const currentIndex = levels.indexOf(currentLevel); 20 | const levelIndex = levels.indexOf(level); 21 | 22 | return levelIndex <= currentIndex; 23 | }; 24 | 25 | /** 26 | * Log a message at the specified level 27 | * @param {string} level - Log level 28 | * @param {string} message - Log message 29 | * @param {object} data - Additional data to log 30 | */ 31 | const log = (level, message, data = {}) => { 32 | if (!isLevelEnabled(level)) return; 33 | 34 | const timestamp = new Date().toISOString(); 35 | const logData = { 36 | timestamp, 37 | level, 38 | message, 39 | ...data 40 | }; 41 | 42 | console.log(JSON.stringify(logData)); 43 | }; 44 | 45 | // Export logger methods 46 | module.exports = { 47 | error: (message, data) => log(LOG_LEVELS.ERROR, message, data), 48 | warn: (message, data) => log(LOG_LEVELS.WARNING, message, data), 49 | info: (message, data) => log(LOG_LEVELS.INFO, message, data), 50 | debug: (message, data) => log(LOG_LEVELS.DEBUG, message, data), 51 | LOG_LEVELS 52 | }; -------------------------------------------------------------------------------- /services/whatsapp-service/src/utils/message-formatter.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Utility to format WhatsApp messages into a standardized format 3 | */ 4 | 5 | /** 6 | * Format a WhatsApp message object into a standardized format 7 | * @param {object} message - WhatsApp message object 8 | * @returns {object} Formatted message 9 | */ 10 | function formatMessage(message) { 11 | // Basic message info 12 | const formattedMessage = { 13 | id: message.id._serialized || message.id, 14 | timestamp: message.timestamp * 1000, // Convert to milliseconds 15 | from: message.from, 16 | fromMe: message.fromMe, 17 | chatId: message.chatId || message.from, 18 | type: message.type, 19 | }; 20 | 21 | // Handle different message types 22 | switch (message.type) { 23 | case 'chat': 24 | formattedMessage.body = message.body; 25 | break; 26 | 27 | case 'image': 28 | formattedMessage.body = message.caption || ''; 29 | formattedMessage.mimetype = message.mimetype; 30 | formattedMessage.hasMedia = true; 31 | break; 32 | 33 | case 'video': 34 | formattedMessage.body = message.caption || ''; 35 | formattedMessage.mimetype = message.mimetype; 36 | formattedMessage.hasMedia = true; 37 | break; 38 | 39 | case 'audio': 40 | formattedMessage.body = ''; 41 | formattedMessage.mimetype = message.mimetype; 42 | formattedMessage.hasMedia = true; 43 | break; 44 | 45 | case 'document': 46 | formattedMessage.body = message.caption || ''; 47 | formattedMessage.filename = message.filename; 48 | formattedMessage.mimetype = message.mimetype; 49 | formattedMessage.hasMedia = true; 50 | break; 51 | 52 | case 'location': 53 | formattedMessage.body = message.body || ''; 54 | formattedMessage.location = { 55 | latitude: message.location.latitude, 56 | longitude: message.location.longitude, 57 | description: message.location.description || '' 58 | }; 59 | break; 60 | 61 | case 'contact': 62 | formattedMessage.body = ''; 63 | formattedMessage.contacts = message.vCards.map(vcard => ({ vcard })); 64 | break; 65 | 66 | default: 67 | formattedMessage.body = message.body || ''; 68 | } 69 | 70 | // Handle optional properties if they exist 71 | if (message.quotedMsg) { 72 | formattedMessage.quotedMessage = { 73 | id: message.quotedMsg.id._serialized || message.quotedMsg.id, 74 | body: message.quotedMsg.body || '', 75 | type: message.quotedMsg.type 76 | }; 77 | } 78 | 79 | // Add metadata 80 | formattedMessage.metadata = { 81 | source: 'whatsapp', 82 | raw: { messageType: message.type } 83 | }; 84 | 85 | return formattedMessage; 86 | } 87 | 88 | /** 89 | * Format chat data into a standardized format 90 | * @param {object} chat - WhatsApp chat object 91 | * @returns {object} Formatted chat 92 | */ 93 | function formatChat(chat) { 94 | return { 95 | id: chat.id._serialized || chat.id, 96 | name: chat.name || '', 97 | isGroup: chat.isGroup, 98 | timestamp: chat.timestamp * 1000, // Convert to milliseconds 99 | unreadCount: chat.unreadCount, 100 | metadata: { 101 | source: 'whatsapp' 102 | } 103 | }; 104 | } 105 | 106 | /** 107 | * Format contact data into a standardized format 108 | * @param {object} contact - WhatsApp contact object 109 | * @returns {object} Formatted contact 110 | */ 111 | function formatContact(contact) { 112 | return { 113 | id: contact.id._serialized || contact.id, 114 | name: contact.name || contact.pushname || '', 115 | number: contact.number, 116 | metadata: { 117 | source: 'whatsapp', 118 | isMyContact: contact.isMyContact 119 | } 120 | }; 121 | } 122 | 123 | module.exports = { 124 | formatMessage, 125 | formatChat, 126 | formatContact 127 | }; -------------------------------------------------------------------------------- /setup.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | 4 | :: Colors for output 5 | set GREEN=[92m 6 | set YELLOW=[93m 7 | set RED=[91m 8 | set NC=[0m 9 | 10 | :: Default virtual environment directory name 11 | set VENV_DIR=venv 12 | 13 | :: Check if the script is being run from the project root directory 14 | if not exist "requirements.txt" ( 15 | echo %RED%Error: requirements.txt not found.%NC% 16 | echo %YELLOW%Please run this script from the project root directory.%NC% 17 | exit /b 1 18 | ) 19 | 20 | :: Function to check if running in an active virtual environment 21 | call :check_venv 22 | if %ERRORLEVEL% neq 0 ( 23 | call :setup_venv 24 | ) 25 | 26 | :: Install dependencies 27 | call :install_dependencies 28 | 29 | :: Verify all dependencies are installed 30 | call :verify_dependencies 31 | 32 | echo %GREEN%Setup completed successfully!%NC% 33 | echo %YELLOW%You can now run the application.%NC% 34 | 35 | :: Print next steps 36 | echo. 37 | echo %YELLOW%Next Steps:%NC% 38 | echo 1. To activate the virtual environment in a new terminal: 39 | echo %GREEN%%VENV_DIR%\Scripts\activate%NC% 40 | echo 2. Create your environment file: 41 | echo %GREEN%copy .env.example .env%NC% 42 | echo 3. Edit the .env file with your API keys: 43 | echo %GREEN%notepad .env%NC% 44 | echo Or use one of these commands: 45 | echo %GREEN%code .env%NC% # For Visual Studio Code 46 | echo %GREEN%start notepad .env%NC% # For Notepad 47 | echo %GREEN%start wordpad .env%NC% # For WordPad 48 | echo 4. To run the Telegram Application: 49 | echo %GREEN%python3 main.py%NC% 50 | echo. 51 | echo %YELLOW%Note: Make sure you have configured your Telegram API credentials in the config file before running the application.%NC% 52 | 53 | goto :eof 54 | 55 | :check_venv 56 | :: Check if running in an active virtual environment 57 | if "%VIRTUAL_ENV%"=="" ( 58 | echo %YELLOW%No active virtual environment detected.%NC% 59 | exit /b 1 60 | ) else ( 61 | echo %GREEN%Active virtual environment detected: %VIRTUAL_ENV%%NC% 62 | exit /b 0 63 | ) 64 | goto :eof 65 | 66 | :setup_venv 67 | :: Create and activate virtual environment if it doesn't exist 68 | if not exist "%VENV_DIR%\" ( 69 | echo %YELLOW%Creating virtual environment in %VENV_DIR%...%NC% 70 | python3 -m venv %VENV_DIR% 71 | if %ERRORLEVEL% neq 0 ( 72 | echo %RED%Failed to create virtual environment.%NC% 73 | echo %YELLOW%Please ensure Python 3 and venv are installed.%NC% 74 | exit /b 1 75 | ) 76 | ) else ( 77 | echo %GREEN%Virtual environment already exists in %VENV_DIR%.%NC% 78 | ) 79 | 80 | echo %YELLOW%Activating virtual environment...%NC% 81 | call %VENV_DIR%\Scripts\activate 82 | if %ERRORLEVEL% neq 0 ( 83 | echo %RED%Failed to activate virtual environment.%NC% 84 | exit /b 1 85 | ) 86 | echo %GREEN%Virtual environment activated!%NC% 87 | goto :eof 88 | 89 | :install_dependencies 90 | :: Install dependencies from requirements.txt 91 | echo %YELLOW%Installing dependencies from requirements.txt...%NC% 92 | python3 -m pip install -q -r requirements.txt 93 | if %ERRORLEVEL% neq 0 ( 94 | echo %RED%Failed to install dependencies.%NC% 95 | exit /b 1 96 | ) 97 | echo %GREEN%Dependencies installed successfully!%NC% 98 | goto :eof 99 | 100 | :verify_dependencies 101 | :: Verify all dependencies are installed 102 | echo %YELLOW%Verifying installed dependencies...%NC% 103 | 104 | set has_missing=0 105 | set has_mismatch=0 106 | set missing_packages= 107 | 108 | :: Read requirements.txt and check each package 109 | for /f "tokens=*" %%a in (requirements.txt) do ( 110 | set line=%%a 111 | 112 | :: Skip comments and empty lines 113 | echo !line! | findstr /r "^#" > nul 114 | if !ERRORLEVEL! neq 0 ( 115 | if not "!line!"=="" ( 116 | :: Extract package name and version 117 | for /f "tokens=1,2 delims=>=" %%b in ("!line!") do ( 118 | set package=%%b 119 | set package=!package: =! 120 | set version=%%c 121 | 122 | :: Check if package is installed 123 | python3 -m pip show !package! > nul 2>&1 124 | if !ERRORLEVEL! neq 0 ( 125 | set has_missing=1 126 | set missing_packages=!missing_packages! - !line! 127 | ) else if not "!version!"=="" ( 128 | :: Version check is simplified in batch - just report the version 129 | for /f "tokens=2" %%i in ('python3 -m pip show !package! ^| findstr "Version"') do ( 130 | set installed_version=%%i 131 | echo %YELLOW%Package !package! installed version: !installed_version!, required: !version!%NC% 132 | ) 133 | ) 134 | ) 135 | ) 136 | ) 137 | ) 138 | 139 | :: Report issues if any 140 | if !has_missing! neq 0 ( 141 | echo %RED%Some dependencies are missing.%NC% 142 | 143 | if not "!missing_packages!"=="" ( 144 | echo %YELLOW%Missing packages:%NC% 145 | echo !missing_packages! 146 | ) 147 | 148 | echo %YELLOW%Please run the following command to install packages:%NC% 149 | echo python3 -m pip install -r requirements.txt 150 | exit /b 1 151 | ) 152 | 153 | echo %GREEN%All dependencies verified successfully!%NC% 154 | goto :eof 155 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="ici-core", 5 | version="0.1.0", 6 | description="Intelligent Consciousness Interface - Core Framework", 7 | author="ICI Team", 8 | packages=find_packages(), 9 | python_requires=">=3.8", 10 | install_requires=[ 11 | # Core dependencies - keep minimal 12 | "pyyaml>=6.0", # For configuration files 13 | "sentence-transformers>=3.4.1", # For text embeddings 14 | "torch>=2.6.0", # Required for sentence-transformers 15 | "faiss-cpu>=1.7.0", # For vector similarity search 16 | "chromadb>=0.6.3", # For ChromaDB vector database 17 | "numpy>=2.2.2", # Required for vector operations 18 | "telethon>=1.39.0", # For Telegram API access 19 | "logtail-python>=0.3.3", 20 | "openai>=1.68.0", 21 | "langchain>=0.3.21", # Core LangChain functionality 22 | "langchain-openai>=0.1.0", # For OpenAI integration 23 | "langchain-community>=0.3.20", # For additional model providers (including Ollama) 24 | "langchain-anthropic>=0.3.10", # For Claude models 25 | "langchain-ollama>=0.3.0", 26 | "python-dotenv>=1.0.1" 27 | ], 28 | extras_require={ 29 | "dev": [ 30 | "pytest>=7.0.0", # For testing 31 | "pytest-cov>=4.0.0", # For test coverage 32 | "black>=23.0.0", # For code formatting 33 | ] 34 | }, 35 | ) 36 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/__init__.py -------------------------------------------------------------------------------- /tests/adapters/prompt_builders/test_basic_prompt_builder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for BasicPromptBuilder. 3 | 4 | This module contains tests for the BasicPromptBuilder implementation. 5 | """ 6 | 7 | import pytest 8 | from typing import Dict, Any 9 | 10 | from ici.adapters.prompt_builders.basic_prompt_builder import BasicPromptBuilder 11 | from ici.core.exceptions import PromptBuilderError 12 | 13 | 14 | @pytest.fixture 15 | async def prompt_builder(): 16 | """Create and initialize a BasicPromptBuilder for testing.""" 17 | builder = BasicPromptBuilder() 18 | await builder.initialize() 19 | return builder 20 | 21 | 22 | @pytest.mark.asyncio 23 | async def test_build_prompt_with_documents(prompt_builder): 24 | """Test building a prompt with documents.""" 25 | # Setup 26 | input_text = "What is the capital of France?" 27 | documents = [ 28 | {"text": "Paris is the capital of France."}, 29 | {"text": "France is a country in Western Europe."} 30 | ] 31 | 32 | # Build prompt 33 | prompt = await prompt_builder.build_prompt(input_text, documents) 34 | 35 | # Verify 36 | assert "Paris is the capital of France" in prompt 37 | assert "France is a country in Western Europe" in prompt 38 | assert "What is the capital of France?" in prompt 39 | 40 | 41 | @pytest.mark.asyncio 42 | async def test_build_prompt_no_documents(prompt_builder): 43 | """Test building a prompt with no documents.""" 44 | # Setup 45 | input_text = "What is the capital of France?" 46 | 47 | # Build prompt with empty documents list 48 | prompt = await prompt_builder.build_prompt(input_text, []) 49 | 50 | # Verify fallback template is used 51 | assert "general knowledge" in prompt 52 | assert "What is the capital of France?" in prompt 53 | 54 | 55 | @pytest.mark.asyncio 56 | async def test_build_prompt_with_max_length(prompt_builder): 57 | """Test building a prompt with max_context_length.""" 58 | # Setup 59 | input_text = "What is AI?" 60 | documents = [{"text": "Artificial Intelligence (AI) is a broad field of computer science..." * 20}] 61 | max_length = 100 62 | 63 | # Build prompt with length restriction 64 | prompt = await prompt_builder.build_prompt(input_text, documents, max_length) 65 | 66 | # Count context length (excluding template parts and question) 67 | template = prompt_builder._template 68 | question_part = template.split("{context}")[1].format(question=input_text) 69 | context_part = prompt[:prompt.index(question_part)] 70 | 71 | # Verify context is truncated 72 | assert len(context_part) <= max_length + len(template.split("{context}")[0]) 73 | 74 | 75 | @pytest.mark.asyncio 76 | async def test_set_template(prompt_builder): 77 | """Test setting a custom template.""" 78 | # Setup 79 | custom_template = "Custom {context}\n\nQuery: {question}" 80 | 81 | # Set custom template 82 | await prompt_builder.set_template(custom_template) 83 | 84 | # Build prompt 85 | input_text = "test question" 86 | documents = [{"text": "test document"}] 87 | prompt = await prompt_builder.build_prompt(input_text, documents) 88 | 89 | # Verify 90 | assert prompt.startswith("Custom test document") 91 | assert "Query: test question" in prompt 92 | 93 | 94 | @pytest.mark.asyncio 95 | async def test_set_fallback_template(prompt_builder): 96 | """Test setting a custom fallback template.""" 97 | # Setup 98 | custom_fallback = "No information available. Please answer: {question}" 99 | 100 | # Set custom fallback template 101 | prompt_builder.set_fallback_template(custom_fallback) 102 | 103 | # Build prompt with no documents 104 | input_text = "test question" 105 | prompt = await prompt_builder.build_prompt(input_text, []) 106 | 107 | # Verify 108 | assert prompt == "No information available. Please answer: test question" 109 | 110 | 111 | @pytest.mark.asyncio 112 | async def test_invalid_template(prompt_builder): 113 | """Test setting an invalid template.""" 114 | # Setup - missing {question} placeholder 115 | invalid_template = "Context: {context}" 116 | 117 | # Attempt to set invalid template 118 | with pytest.raises(PromptBuilderError): 119 | await prompt_builder.set_template(invalid_template) 120 | 121 | 122 | @pytest.mark.asyncio 123 | async def test_healthcheck(prompt_builder): 124 | """Test the healthcheck method.""" 125 | # Run healthcheck 126 | health_result = await prompt_builder.healthcheck() 127 | 128 | # Verify 129 | assert isinstance(health_result, dict) 130 | assert "healthy" in health_result 131 | assert health_result["healthy"] is True 132 | assert "message" in health_result 133 | assert "details" in health_result -------------------------------------------------------------------------------- /tests/adapters/validators/test_rule_based.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for RuleBasedValidator. 3 | 4 | This module contains tests for the RuleBasedValidator implementation. 5 | """ 6 | 7 | import pytest 8 | from typing import Dict, Any 9 | 10 | from ici.adapters.validators.rule_based import RuleBasedValidator 11 | from ici.core.exceptions import ValidationError 12 | 13 | 14 | @pytest.fixture 15 | async def validator(): 16 | """Create and initialize a RuleBasedValidator for testing.""" 17 | validator = RuleBasedValidator() 18 | await validator.initialize() 19 | return validator 20 | 21 | 22 | @pytest.mark.asyncio 23 | async def test_validate_command_line_source(validator): 24 | """Test validation with COMMAND_LINE source.""" 25 | # Setup 26 | rules = [] # Not used in current implementation 27 | failure_reasons = [] 28 | 29 | # Valid source (COMMAND_LINE) 30 | command_line_context = {"source": "COMMAND_LINE"} 31 | result = await validator.validate("test input", command_line_context, rules, failure_reasons) 32 | assert result is True 33 | assert len(failure_reasons) == 0 34 | 35 | # Invalid source (not COMMAND_LINE) 36 | failure_reasons.clear() 37 | web_context = {"source": "WEB"} 38 | result = await validator.validate("test input", web_context, rules, failure_reasons) 39 | assert result is False 40 | assert len(failure_reasons) == 1 41 | assert "not from COMMAND_LINE" in failure_reasons[0] 42 | 43 | # Missing source 44 | failure_reasons.clear() 45 | empty_context = {} 46 | result = await validator.validate("test input", empty_context, rules, failure_reasons) 47 | assert result is False 48 | assert len(failure_reasons) == 1 49 | 50 | 51 | @pytest.mark.asyncio 52 | async def test_healthcheck(validator): 53 | """Test the healthcheck method.""" 54 | # Run healthcheck 55 | health_result = await validator.healthcheck() 56 | 57 | # Verify response structure 58 | assert isinstance(health_result, dict) 59 | assert "healthy" in health_result 60 | assert "message" in health_result 61 | assert "details" in health_result 62 | 63 | # Should be healthy 64 | assert health_result["healthy"] is True -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/unit/adapters/__init__.py -------------------------------------------------------------------------------- /tests/unit/adapters/test_structured_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for the StructuredLogger implementation. 3 | """ 4 | 5 | import json 6 | import os 7 | import tempfile 8 | from typing import Dict, Any 9 | 10 | import pytest 11 | 12 | from ici.adapters.loggers import StructuredLogger 13 | 14 | 15 | class TestStructuredLogger: 16 | """Test cases for the StructuredLogger.""" 17 | 18 | def test_initialization(self): 19 | """Test logger initialization.""" 20 | logger = StructuredLogger(name="test_logger") 21 | assert logger.name == "test_logger" 22 | assert logger.logger.level == 20 # INFO level 23 | 24 | def test_log_levels(self): 25 | """Test all log levels work correctly.""" 26 | logger = StructuredLogger(name="test_logger") 27 | 28 | # These should not raise exceptions 29 | logger.debug({"action": "TEST", "message": "Debug message"}) 30 | logger.info({"action": "TEST", "message": "Info message"}) 31 | logger.warning({"action": "TEST", "message": "Warning message"}) 32 | logger.error({"action": "TEST", "message": "Error message"}) 33 | logger.critical({"action": "TEST", "message": "Critical message"}) 34 | 35 | def test_log_to_file(self): 36 | """Test logging to a file.""" 37 | # Create a temporary file 38 | with tempfile.NamedTemporaryFile(delete=False) as tmp: 39 | tmp_path = tmp.name 40 | 41 | try: 42 | # Create logger with file output 43 | logger = StructuredLogger( 44 | name="test_file_logger", 45 | level="INFO", 46 | log_file=tmp_path, 47 | console_output=False, 48 | ) 49 | 50 | # Log a message 51 | test_message = "File logging test" 52 | logger.info({"action": "FILE_TEST", "message": test_message}) 53 | 54 | # Verify file contains the log 55 | with open(tmp_path, "r") as f: 56 | content = f.read() 57 | log_data = json.loads(content) 58 | assert log_data["action"] == "FILE_TEST" 59 | assert log_data["message"] == test_message 60 | assert log_data["logger"] == "test_file_logger" 61 | 62 | finally: 63 | # Clean up 64 | if os.path.exists(tmp_path): 65 | os.unlink(tmp_path) 66 | 67 | def test_structured_data(self): 68 | """Test logging with structured data.""" 69 | # Create a temporary file to capture log output 70 | with tempfile.NamedTemporaryFile(delete=False) as tmp: 71 | tmp_path = tmp.name 72 | 73 | try: 74 | # Create logger 75 | logger = StructuredLogger( 76 | name="test_data_logger", log_file=tmp_path, console_output=False 77 | ) 78 | 79 | # Log with structured data 80 | test_data = { 81 | "user_id": 123, 82 | "items": ["apple", "banana"], 83 | "metadata": {"source": "test"}, 84 | } 85 | 86 | logger.info( 87 | { 88 | "action": "DATA_TEST", 89 | "message": "Testing structured data", 90 | "data": test_data, 91 | } 92 | ) 93 | 94 | # Verify the structured data was logged correctly 95 | with open(tmp_path, "r") as f: 96 | content = f.read() 97 | log_data = json.loads(content) 98 | assert log_data["action"] == "DATA_TEST" 99 | assert log_data["data"] == test_data 100 | 101 | finally: 102 | # Clean up 103 | if os.path.exists(tmp_path): 104 | os.unlink(tmp_path) 105 | 106 | def test_exception_logging(self): 107 | """Test logging exceptions.""" 108 | # Create a temporary file to capture log output 109 | with tempfile.NamedTemporaryFile(delete=False) as tmp: 110 | tmp_path = tmp.name 111 | 112 | try: 113 | # Create logger 114 | logger = StructuredLogger( 115 | name="test_exception_logger", log_file=tmp_path, console_output=False 116 | ) 117 | 118 | # Create and log an exception 119 | try: 120 | raise ValueError("Test exception") 121 | except ValueError as e: 122 | logger.error( 123 | { 124 | "action": "EXCEPTION_TEST", 125 | "message": "Testing exception logging", 126 | "exception": e, 127 | } 128 | ) 129 | 130 | # Verify the exception was logged correctly 131 | with open(tmp_path, "r") as f: 132 | content = f.read() 133 | log_data = json.loads(content) 134 | assert log_data["action"] == "EXCEPTION_TEST" 135 | assert "exception" in log_data 136 | assert log_data["exception"]["type"] == "ValueError" 137 | assert log_data["exception"]["message"] == "Test exception" 138 | assert isinstance(log_data["exception"]["traceback"], list) 139 | 140 | finally: 141 | # Clean up 142 | if os.path.exists(tmp_path): 143 | os.unlink(tmp_path) 144 | -------------------------------------------------------------------------------- /tests/unit/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/unit/core/__init__.py --------------------------------------------------------------------------------