├── .cursor
    ├── mcp.json
    └── rules
    │   └── restrictions.mdc
├── .env.example
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── changelog
├── config.yaml
├── config.yaml.bak
├── docs
    ├── development
    │   └── component-configuration-schemas.md
    ├── diagrams
    │   ├── class.mmd
    │   ├── error-handling.mmd
    │   ├── flow.mmd
    │   ├── history.mmd
    │   ├── ingestion.mmd
    │   ├── initialisation.mmd
    │   ├── multi-ingestor.mmd
    │   ├── query.mmd
    │   ├── state-management.mmd
    │   └── system.mmd
    ├── features
    │   ├── default-ingestion-pipeline.md
    │   ├── feat-dynamic-ingestor-pipeline.md
    │   ├── feat-repo-restructure.md
    │   ├── whatsapp-ingestor.md
    │   ├── whatsapp-nodejs-service.md
    │   └── whatsapp-python-ingestor.md
    ├── functional-requirements.md
    ├── guides
    │   ├── connector.md
    │   ├── embedder.md
    │   ├── generator.md
    │   ├── ingestor.md
    │   ├── preprocessor.md
    │   ├── prompt_builder.md
    │   ├── validator.md
    │   └── vector_store.md
    ├── project_structure.md
    ├── prompts.md
    └── tech-specs.md
├── examples
    ├── README.md
    ├── advanced_vector_query.py
    ├── chat_example.py
    ├── chroma_vector_store_example.py
    ├── config_utils_example.py
    ├── default_pipeline_example.py
    ├── query_vectorstore_example.py
    ├── sentence_transformer_embedder_example.py
    ├── structured_logger_example.py
    ├── telegram_debug.py
    ├── telegram_ingestor_example.py
    ├── telegram_session_string.py
    └── telegram_session_test.py
├── ici
    ├── __init__.py
    ├── adapters
    │   ├── __init__.py
    │   ├── chat
    │   │   ├── __init__.py
    │   │   └── json_chat_history_manager.py
    │   ├── controller
    │   │   ├── __init__.py
    │   │   └── command_line.py
    │   ├── embedders
    │   │   ├── __init__.py
    │   │   └── sentence_transformer.py
    │   ├── generators
    │   │   ├── __init__.py
    │   │   ├── factory.py
    │   │   ├── langchain_generator.py
    │   │   └── openai_generator.py
    │   ├── ingestors
    │   │   ├── __init__.py
    │   │   ├── telegram.py
    │   │   └── whatsapp.py
    │   ├── loggers
    │   │   ├── __init__.py
    │   │   └── structured_logger.py
    │   ├── orchestrators
    │   │   ├── __init__.py
    │   │   └── default_orchestrator.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   └── default.py
    │   ├── preprocessors
    │   │   ├── __init__.py
    │   │   ├── telegram.py
    │   │   └── whatsapp.py
    │   ├── prompt_builders
    │   │   ├── __init__.py
    │   │   └── basic_prompt_builder.py
    │   ├── user_id
    │   │   ├── __init__.py
    │   │   └── default_user_id_generator.py
    │   ├── validators
    │   │   ├── __init__.py
    │   │   └── rule_based.py
    │   └── vector_stores
    │   │   ├── __init__.py
    │   │   └── chroma.py
    ├── core
    │   ├── __init__.py
    │   ├── exceptions
    │   │   └── __init__.py
    │   └── interfaces
    │   │   ├── __init__.py
    │   │   ├── chat_history_manager.py
    │   │   ├── embedder.py
    │   │   ├── generator.py
    │   │   ├── ingestor.py
    │   │   ├── logger.py
    │   │   ├── orchestrator.py
    │   │   ├── pipeline.py
    │   │   ├── preprocessor.py
    │   │   ├── prompt_builder.py
    │   │   ├── user_id_generator.py
    │   │   ├── validator.py
    │   │   └── vector_store.py
    └── utils
    │   ├── __init__.py
    │   ├── component_loader.py
    │   ├── config.py
    │   ├── datetime_utils.py
    │   ├── load_env.py
    │   ├── print_banner.py
    │   └── state_manager.py
├── install.bat
├── install.sh
├── main.py
├── pytest.ini
├── requirements.txt
├── services
    └── whatsapp-service
    │   ├── .gitignore
    │   ├── README.md
    │   ├── config.js
    │   ├── package-lock.json
    │   ├── package.json
    │   └── src
    │       ├── api
    │           └── routes
    │           │   ├── auth.js
    │           │   ├── index.js
    │           │   ├── messages.js
    │           │   └── sessions.js
    │       ├── client
    │           ├── client-manager.js
    │           └── whatsapp-client.js
    │       ├── index.js
    │       ├── public
    │           └── index.html
    │       ├── utils
    │           ├── event-emitter.js
    │           ├── logger.js
    │           └── message-formatter.js
    │       └── websocket
    │           └── ws-server.js
├── setup.bat
├── setup.py
├── setup.sh
├── tests
    ├── __init__.py
    ├── adapters
    │   ├── generators
    │   │   ├── test_langchain_generator.py
    │   │   └── test_openai_generator.py
    │   ├── orchestrators
    │   │   └── test_default_orchestrator.py
    │   ├── prompt_builders
    │   │   └── test_basic_prompt_builder.py
    │   └── validators
    │   │   └── test_rule_based.py
    └── unit
    │   ├── __init__.py
    │   ├── adapters
    │       ├── __init__.py
    │       ├── test_chroma_store.py
    │       ├── test_sentence_transformer_embedder.py
    │       ├── test_structured_logger.py
    │       └── test_telegram_ingestor.py
    │   └── core
    │       └── __init__.py
└── troubleshoot.md


/.cursor/mcp.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mcpServers": {
 3 |       "linear": {
 4 |         "command": "npx",
 5 |         "args": [
 6 |           "-y",
 7 |           "linear-mcp-server"
 8 |         ],
 9 |         "env": {
10 |           "LINEAR_API_KEY": "lin_oauth_88b2bafda66d8df6b590f1f83e74611057c322074cd9de4768dfae5d4bf31239"
11 |         }
12 |       }
13 |     }
14 |   }


--------------------------------------------------------------------------------
/.cursor/rules/restrictions.mdc:
--------------------------------------------------------------------------------
1 | ---
2 | description: 
3 | globs: 
4 | alwaysApply: true
5 | ---
6 | 
7 | # Restrictions
8 | Use YAML file which exists in root "./config.yaml", do not create new yaml file or create function to create a test YAML file.
9 | Do not change function interface until specifically told to do so. Maintain the function name, arguments, output.


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Telegram API credentials
 2 | TELEGRAM_API_ID=your_telegram_api_id
 3 | TELEGRAM_API_HASH=your_telegram_api_hash
 4 | TELEGRAM_PHONE_NUMBER=your_phone_number_with_country_code
 5 | TELEGRAM_SESSION_STRING=your_telegram_session_string
 6 | 
 7 | # Generator API key
 8 | GENERATOR_API_KEY=your_generator_api_key
 9 | 
10 | # Logger settings
11 | INGESTION_HOST=your_betterstack_host
12 | SOURCE_TOKEN=your_betterstack_source_token
13 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # Unit test / coverage reports
28 | htmlcov/
29 | .tox/
30 | .coverage
31 | .coverage.*
32 | .cache
33 | nosetests.xml
34 | coverage.xml
35 | *.cover
36 | .hypothesis/
37 | .pytest_cache/
38 | 
39 | # Environments
40 | .env
41 | .venv
42 | env/
43 | venv/
44 | ENV/
45 | env.bak/
46 | venv.bak/
47 | ici-env/
48 | 
49 | # IDE specific files
50 | .idea/
51 | .vscode/
52 | *.swp
53 | *.swo
54 | 
55 | # Project specific
56 | examples/logs/
57 | logs/
58 | 
59 | # DB
60 | db/
61 | 
62 | # Logs
63 | **.log
64 | logs/
65 | 
66 | **/chats/
67 | services/**/data/
68 | services/whatsapp-service/.wwebjs_cache/
69 | services/


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to ICI Core
  2 | 
  3 | Thank you for your interest in contributing to ICI Core! This document provides guidelines and instructions for contributing to the project.
  4 | 
  5 | ## Code of Conduct
  6 | 
  7 | By participating in this project, you agree to uphold our Code of Conduct:
  8 | 
  9 | - Be respectful and inclusive of all contributors
 10 | - Exercise empathy and kindness in all interactions
 11 | - Focus on constructive feedback and collaboration
 12 | - Respect differing viewpoints and experiences
 13 | 
 14 | ## Getting Started
 15 | 
 16 | 1. Fork the repository
 17 | 2. Clone your fork: `git clone https://github.com/yourusername/ici-core.git`
 18 | 3. Create a branch for your changes: `git checkout -b feature/your-feature-name`
 19 | 4. Install dependencies using the setup scripts or manual setup as described in the README
 20 | 
 21 | ## Development Environment
 22 | 
 23 | We recommend using a virtual environment for development:
 24 | 
 25 | ```bash
 26 | # Create a virtual environment
 27 | python -m venv venv
 28 | 
 29 | # Activate it
 30 | # On macOS/Linux:
 31 | source venv/bin/activate
 32 | # On Windows:
 33 | venv\Scripts\activate
 34 | 
 35 | # Install dependencies
 36 | pip install -r requirements.txt
 37 | ```
 38 | 
 39 | ## Project Structure
 40 | 
 41 | Please review the [Project Structure](docs/project_structure.md) documentation to understand how the codebase is organized. This will help you place your contributions in the correct locations.
 42 | 
 43 | ## How to Contribute
 44 | 
 45 | ### Reporting Bugs
 46 | 
 47 | If you find a bug, please create an issue with:
 48 | 
 49 | 1. A clear, descriptive title
 50 | 2. Detailed steps to reproduce the bug
 51 | 3. Expected and actual behavior
 52 | 4. System information (OS, Python version, etc.)
 53 | 5. Any relevant logs or screenshots
 54 | 
 55 | ### Suggesting Features
 56 | 
 57 | For feature suggestions:
 58 | 
 59 | 1. Check if the feature has already been suggested or implemented
 60 | 2. Create an issue with a clear title and detailed description
 61 | 3. Explain the use case and benefits of the feature
 62 | 4. If possible, outline a potential implementation approach
 63 | 
 64 | ### Pull Requests
 65 | 
 66 | When submitting a pull request:
 67 | 
 68 | 1. Update the README.md with details of changes if applicable
 69 | 2. Update any relevant documentation
 70 | 3. Include tests that verify your changes
 71 | 4. Ensure all tests pass locally
 72 | 5. Link to any related issues
 73 | 6. Follow the existing code style
 74 | 
 75 | ## Coding Standards
 76 | 
 77 | We follow these coding standards:
 78 | 
 79 | 1. **PEP 8**: Follow Python's PEP 8 style guide
 80 | 2. **Type Hints**: Use Python type hints for function parameters and return values
 81 | 3. **Docstrings**: Include docstrings for all functions, classes, and modules
 82 | 4. **Comments**: Add comments for complex logic
 83 | 5. **File Structure**: Follow the project structure guidelines
 84 | 
 85 | ### Code Style
 86 | 
 87 | - We use the `black` formatter for Python code
 88 | - Include type hints for function parameters and return values
 89 | - Follow our naming conventions:
 90 |   - Classes: CamelCase (`MyClass`)
 91 |   - Functions/methods: snake_case (`my_function`)
 92 |   - Variables: snake_case (`my_variable`)
 93 |   - Constants: UPPER_CASE (`MY_CONSTANT`)
 94 | 
 95 | ### Example Function
 96 | 
 97 | ```python
 98 | from typing import Dict, Any, Optional
 99 | 
100 | def process_data(data: Dict[str, Any], options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
101 |     """
102 |     Process the input data with optional configuration.
103 |     
104 |     Args:
105 |         data: The input data to process
106 |         options: Optional configuration parameters
107 |         
108 |     Returns:
109 |         Processed data as a dictionary
110 |         
111 |     Raises:
112 |         ValueError: If data is empty or invalid
113 |     """
114 |     if not data:
115 |         raise ValueError("Input data cannot be empty")
116 |         
117 |     # Processing logic here
118 |     
119 |     return processed_data
120 | ```
121 | 
122 | ## Testing
123 | 
124 | All contributions should include appropriate tests:
125 | 
126 | - **Unit Tests**: Test individual components in isolation
127 | - **Integration Tests**: Test interactions between components
128 | - **Doctest Examples**: Include examples in docstrings when helpful
129 | 
130 | Run tests using:
131 | 
132 | ```bash
133 | pytest
134 | ```
135 | 
136 | ## Documentation
137 | 
138 | Please update documentation for any new features or changes:
139 | 
140 | - Update README.md if introducing new features
141 | - Update/create documentation in the docs folder
142 | - Include examples where appropriate
143 | - Update function/class docstrings
144 | 
145 | ## Commit Messages
146 | 
147 | Follow these guidelines for commit messages:
148 | 
149 | - Use the present tense ("Add feature" not "Added feature")
150 | - Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
151 | - Limit the first line to 72 characters or less
152 | - Reference issues and pull requests after the first line
153 | 
154 | ## Pull Request Process
155 | 
156 | 1. Update documentation as needed
157 | 2. Include tests for your changes
158 | 3. Ensure all tests pass
159 | 4. Update the CHANGELOG.md with details of changes
160 | 5. Your PR will be reviewed by at least one maintainer
161 | 6. Once approved, a maintainer will merge your PR
162 | 
163 | ## License
164 | 
165 | By contributing to ICI Core, you agree that your contributions will be licensed under the project's MIT License.
166 | 
167 | ## Questions?
168 | 
169 | If you have any questions or need help, please create an issue with the "question" label or reach out to the project maintainers.
170 | 
171 | Thank you for contributing to ICI Core! 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 ICI Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
  1 | # Core System Configuration
  2 | system:
  3 |   loggers:
  4 |     structured_logger:
  5 |       console_output: true
  6 |       level: WARNING
  7 |       log_file: ./logs/structured_logger.log
  8 |       name: structured_logger
  9 |       source_token: $SOURCE_TOKEN
 10 |       host: $INGESTION_HOST
 11 |       use_betterstack: false
 12 |   state_manager:
 13 |     db_path: ./db/sql/ingestor_state.db
 14 |   validator:
 15 |     allowed_sources:
 16 |       - COMMAND_LINE
 17 |     rules: []
 18 |   chat_history_manager:
 19 |     json:
 20 |       base_path: ./db/chat
 21 |       use_subdirectories: true
 22 |       file_permissions: 0o600
 23 |       default_message_limit: 20
 24 |       max_messages_per_chat: 1000
 25 |   user_id_generator:
 26 |     default:
 27 |       sources:
 28 |         - cli
 29 |         - web
 30 |         - api
 31 |         - test
 32 |       default_identifier: anonymous
 33 | 
 34 | # Main Orchestrator Configuration
 35 | orchestrator:
 36 |   # Core orchestrator settings
 37 |   error_messages:
 38 |     generation_failed: I'm having trouble generating a response right now. Please try again later.
 39 |     no_documents: I don't have specific information on that topic yet.
 40 |     validation_failed: I'm unable to process your request due to security restrictions.
 41 |   generation_options:
 42 |     max_tokens: 1024
 43 |     temperature: 0.7
 44 |   num_results: 5
 45 |   similarity_threshold: 0.7
 46 |   rules_source: config
 47 |   user_context:
 48 |     default:
 49 |       permission_level: user
 50 |   validation_rules:
 51 |     default: []
 52 |   
 53 |   # Components used by orchestrator
 54 |   generator:
 55 |     api_key: $GENERATOR_API_KEY
 56 |     base_url: http://localhost:11434
 57 |     base_retry_delay: 1
 58 |     chain_type: simple
 59 |     default_options:
 60 |       frequency_penalty: 0.0
 61 |       max_tokens: 1024
 62 |       presence_penalty: 0.0
 63 |       temperature: 0.7
 64 |       top_p: 1.0
 65 |     max_retries: 3
 66 |     memory:
 67 |       k: 5
 68 |       type: buffer
 69 |     model: deepseek-r1:32b
 70 |     provider: ollama
 71 |     type: langchain
 72 |   
 73 |   embedder:
 74 |     sentence_transformer:
 75 |       batch_size: 32
 76 |       device: cpu
 77 |       model_name: all-MiniLM-L6-v2
 78 |   
 79 |   vector_store:
 80 |     chroma:
 81 |       type: chroma
 82 |       collection_name: messages
 83 |       embedding_function: sentence_transformer
 84 |       persist_directory: ./db/vector/chroma_db
 85 |   
 86 |   prompt_builder:
 87 |     error_template: 'Unable to process: {error}'
 88 |     fallback_template: 'Answer based on general knowledge: {question}'
 89 |     template: "Context:\n{context}\n\nQuestion: {question}\n\nYour response should be conversational and helpful. Remember to continue from any previous conversation if relevant."
 90 |   
 91 |   # Pipeline configurations
 92 |   pipelines:
 93 |     telegram:
 94 |       schedule:
 95 |         interval_minutes: 1
 96 |       ingestor:
 97 |         telegram:
 98 |           # Telegram API credentials
 99 |           api_hash: $TELEGRAM_API_HASH
100 |           api_id: $TELEGRAM_API_ID
101 |           phone_number: $TELEGRAM_PHONE_NUMBER
102 |           session_string: $TELEGRAM_SESSION_STRING
103 |           
104 |           # Chat and message limits
105 |           max_chats: 100            # Maximum number of chats to fetch (-1 for all available)
106 |           max_messages_per_chat: 200  # Maximum messages per chat (-1 for all available)
107 |           ignored_chats: []         # List of chat IDs to exclude from fetching
108 |           
109 |           # Rate limiting and batching settings
110 |           batch_size: 50            # Number of messages to fetch in each request
111 |           request_delay: 1.0        # Delay between batched requests in seconds
112 |       preprocessor:
113 |         chunk_size: 512
114 |         include_overlap: true
115 |         max_messages_per_chunk: 10
116 |         time_window_minutes: 15
117 |         store_chat_history: true
118 |         chat_history_dir: "db/telegram_chats"
119 |     
120 |     whatsapp:
121 |       batch_size: 100
122 |       schedule:
123 |         interval_minutes: 5
124 |       ingestor:
125 |         whatsapp:
126 |           service_url: "http://localhost:3006"
127 |           session_id: "default_session"
128 |           request_timeout: 30
129 |       preprocessor:
130 |         chunk_size: 512
131 |         include_overlap: true
132 |         max_messages_per_chunk: 10
133 |         time_window_minutes: 15
134 |         store_chat_history: true
135 |         chat_history_dir: "db/whatsapp_chats"
136 | 


--------------------------------------------------------------------------------
/config.yaml.bak:
--------------------------------------------------------------------------------
  1 | embedders:
  2 |   sentence_transformer:
  3 |     batch_size: 32
  4 |     device: cpu
  5 |     model_name: all-MiniLM-L6-v2
  6 | generator:
  7 |   api_key: $GENERATOR_API_KEY
  8 |   base_retry_delay: 1
  9 |   chain_type: simple
 10 |   default_options:
 11 |     frequency_penalty: 0.0
 12 |     max_tokens: 1024
 13 |     presence_penalty: 0.0
 14 |     temperature: 0.7
 15 |     top_p: 1.0
 16 |   max_retries: 3
 17 |   memory:
 18 |     k: 5
 19 |     type: buffer
 20 |   model: deepseek/deepseek-chat-v3-0324:free
 21 |   provider: openrouter
 22 |   type: langchain
 23 | ingestors:
 24 |   telegram:
 25 |     api_hash: $TELEGRAM_API_HASH
 26 |     api_id: $TELEGRAM_API_ID
 27 |     phone_number: $TELEGRAM_PHONE_NUMBER
 28 |     request_delay: 0.5
 29 |     session_string: $TELEGRAM_SESSION_STRING
 30 | loggers:
 31 |   structured_logger:
 32 |     console_output: true
 33 |     level: ERROR
 34 |     log_file: ./logs/structured_logger.log
 35 |     name: structured_logger
 36 |     source_token: $SOURCE_TOKEN
 37 |     host: $INGESTION_HOST
 38 |     use_betterstack: false
 39 | orchestrator:
 40 |   error_messages:
 41 |     generation_failed: I'm having trouble generating a response right now. Please
 42 |       try again later.
 43 |     no_documents: I don't have specific information on that topic yet.
 44 |     validation_failed: I'm unable to process your request due to security restrictions.
 45 |   generation_options:
 46 |     max_tokens: 1024
 47 |     temperature: 0.7
 48 |   num_results: 5
 49 |   pipeline:
 50 |     auto_start: true
 51 |     ingestor_id: telegram
 52 |   rules_source: config
 53 |   similarity_threshold: 0.7
 54 |   user_context:
 55 |     default:
 56 |       permission_level: user
 57 |   validation_rules:
 58 |     default: []
 59 | pipelines:
 60 |   telegram:
 61 |     batch_size: 100
 62 |     schedule:
 63 |       interval_minutes: 1
 64 | preprocessors:
 65 |   telegram:
 66 |     chunk_size: 512
 67 |     include_overlap: true
 68 |     max_messages_per_chunk: 10
 69 |     time_window_minutes: 15
 70 | prompt_builder:
 71 |   error_template: 'Unable to process: {error}'
 72 |   fallback_template: 'Answer based on general knowledge: {question}'
 73 |   template: "Context:\n{context}\n\nQuestion: {question}.\n Your tone should be pleasent and friendly"
 74 | state_manager:
 75 |   db_path: ./db/sql/ingestor_state.db
 76 | validator:
 77 |   allowed_sources:
 78 |   - COMMAND_LINE
 79 |   rules: []
 80 | vector_stores:
 81 |   chroma:
 82 |     collection_name: telegram_messages
 83 |     embedding_function: sentence_transformer
 84 |     persist_directory: ./db/vector/chroma_db
 85 | chat_history_manager:
 86 |   json:
 87 |     base_path: ./db/chat
 88 |     use_subdirectories: true
 89 |     file_permissions: 0o600
 90 |     default_message_limit: 20
 91 |     max_messages_per_chat: 1000
 92 | user_id_generator:
 93 |   default:
 94 |     sources:
 95 |       - cli
 96 |       - web
 97 |       - api
 98 |       - test
 99 |     default_identifier: anonymous
100 | 


--------------------------------------------------------------------------------
/docs/diagrams/class.mmd:
--------------------------------------------------------------------------------
 1 | classDiagram
 2 |     %% Ingestion Pipeline Classes
 3 |     class IngestionPipeline {
 4 |         +run_ingestion(ingestor_id: str) None
 5 |         +start() None
 6 |     }
 7 |     class Ingestor {
 8 |         +fetch_full_data() Any
 9 |         +fetch_new_data(since: Optional[datetime]) Any
10 |         +fetch_data_in_range(start: datetime, end: datetime) Any
11 |     }
12 |     class Preprocessor {
13 |         +preprocess(raw_data: Any) List[Dict[str, Any]]
14 |     }
15 |     class Embedder {
16 |         +embed(text: str) List[float]
17 |     }
18 |     class VectorStore {
19 |         +store_documents(documents: List[Dict[str, Any]]) None
20 |         +search(query_vector: List[float], num_results: int, filters: Dict[str, Any]) List[Dict[str, Any]]
21 |     }
22 | 
23 |     %% Query Pipeline Classes
24 |     class Orchestrator {
25 |         +process_query(user_id: str, input: str, chat_id: Optional[str]) Dict[str, Any]
26 |         -get_rules(user_id: str) List[Dict[str, Any]]
27 |         -build_context(user_id: str) Dict[str, Any]
28 |     }
29 |     class Validator {
30 |         +validate(input: str, context: Dict[str, Any], rules: List[Dict[str, Any]]) bool
31 |     }
32 |     class PromptBuilder {
33 |         +build_prompt(input: str, documents: List[Dict[str, Any]], chat_history: List[Dict[str, Any]]) str
34 |     }
35 |     class Generator {
36 |         +generate(prompt: str) str
37 |     }
38 |     class ChatHistoryManager {
39 |         +create_chat(user_id: str) str
40 |         +add_message(chat_id: str, content: str, role: str, metadata: Optional[Dict[str, Any]]) str
41 |         +get_messages(chat_id: str, limit: Optional[int]) List[Dict[str, Any]]
42 |         +list_chats(user_id: str) List[Dict[str, Any]]
43 |         +generate_title(chat_id: str) Optional[str]
44 |         +rename_chat(chat_id: str, new_title: str) bool
45 |         +delete_chat(chat_id: str) bool
46 |         +export_chat(chat_id: str, format: str) Any
47 |     }
48 | 
49 |     %% Shared Components
50 |     class Logger {
51 |         +debug(message: str, *args: Any) None
52 |         +info(message: str, *args: Any) None
53 |         +warning(message: str, *args: Any) None
54 |         +error(message: str, *args: Any) None
55 |         +critical(message: str, *args: Any) None
56 |     }
57 |     class ingestor_state {
58 |         +ingestor_id: TEXT
59 |         +last_timestamp: INTEGER
60 |         +additional_metadata: TEXT
61 |     }
62 | 
63 |     %% Relationships
64 |     IngestionPipeline --> Ingestor : uses
65 |     IngestionPipeline --> Preprocessor : uses
66 |     IngestionPipeline --> Embedder : uses
67 |     IngestionPipeline --> VectorStore : uses
68 |     IngestionPipeline --> ingestor_state : manages state
69 | 
70 |     Orchestrator --> Validator : uses
71 |     Orchestrator --> Embedder : uses
72 |     Orchestrator --> VectorStore : uses
73 |     Orchestrator --> PromptBuilder : uses
74 |     Orchestrator --> Generator : uses
75 |     Orchestrator --> ChatHistoryManager : uses
76 | 
77 |     PromptBuilder --> ChatHistoryManager : uses history from
78 | 
79 |     Logger <.. IngestionPipeline : logs
80 |     Logger <.. Orchestrator : logs
81 |     Logger <.. Ingestor : logs
82 |     Logger <.. Preprocessor : logs
83 |     Logger <.. Embedder : logs
84 |     Logger <.. VectorStore : logs
85 |     Logger <.. Validator : logs
86 |     Logger <.. PromptBuilder : logs
87 |     Logger <.. Generator : logs
88 |     Logger <.. ChatHistoryManager : logs
89 | 
90 |     %% Comments
91 |     note for IngestionPipeline "Coordinates ingestion components and manages scheduling."
92 |     note for Orchestrator "Manages the query workflow from validation to response generation."
93 |     note for ingestor_state "Database table for tracking ingestion progress."
94 |     note for ChatHistoryManager "Manages persistent chat sessions and message history."


--------------------------------------------------------------------------------
/docs/diagrams/error-handling.mmd:
--------------------------------------------------------------------------------
 1 | sequenceDiagram
 2 |     participant IngestionPipeline
 3 |     participant Ingestor
 4 |     participant Logger
 5 | 
 6 |     loop Retry up to 3 times
 7 |         IngestionPipeline->>Ingestor: fetch_new_data(since=last_timestamp)
 8 |         alt Fetch succeeds
 9 |             Ingestor-->>IngestionPipeline: raw_data
10 |             IngestionPipeline->>Logger: Log successful fetch
11 |         else Fetch fails
12 |             Ingestor-->>IngestionPipeline: IngestorError
13 |             IngestionPipeline->>Logger: Log error and retry
14 |         end
15 |     end
16 |     alt Max retries exceeded
17 |         IngestionPipeline->>Logger: Log critical error, skip cycle
18 |     end


--------------------------------------------------------------------------------
/docs/diagrams/flow.mmd:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     Start --> InitializeSystem
 3 |     InitializeSystem --> LoadConfig
 4 |     LoadConfig --> CreateDatabaseConnection
 5 |     CreateDatabaseConnection --> InitializeLogger
 6 |     InitializeLogger --> InitializeComponents
 7 |     InitializeComponents --> StartIngestionPipeline
 8 |     StartIngestionPipeline --> IngestionLoop
 9 |     IngestionLoop --> FetchData
10 |     FetchData --> ProcessData
11 |     ProcessData --> EmbedData
12 |     EmbedData --> StoreData
13 |     StoreData --> UpdateState
14 |     UpdateState --> LogIngestion
15 |     LogIngestion --> WaitForNextCycle
16 | 
17 |     InitializeComponents --> ReadyForQueries
18 |     ReadyForQueries --> ReceiveQuery
19 |     ReceiveQuery --> ValidateQuery
20 |     ValidateQuery --> EmbedQuery
21 |     EmbedQuery --> RetrieveDocuments
22 |     RetrieveDocuments --> BuildPrompt
23 |     BuildPrompt --> GenerateResponse
24 |     GenerateResponse --> ReturnResponse
25 |     ReturnResponse --> LogQuery
26 | 
27 |     WaitForNextCycle --> IngestionLoop
28 |     LogQuery --> ReceiveQuery


--------------------------------------------------------------------------------
/docs/diagrams/history.mmd:
--------------------------------------------------------------------------------
 1 | classDiagram
 2 |     class User {
 3 |         +user_id: string
 4 |         +source: string
 5 |         +identifier: string
 6 |     }
 7 | 
 8 |     class Chat {
 9 |         +chat_id: string
10 |         +user_id: string
11 |         +title: string
12 |         +created_at: datetime
13 |         +updated_at: datetime
14 |         +message_count: int
15 |         +is_pinned: bool
16 |         +last_message_preview: string
17 |     }
18 | 
19 |     class Message {
20 |         +message_id: string
21 |         +chat_id: string
22 |         +role: string
23 |         +content: string
24 |         +created_at: datetime
25 |         +metadata: Dict
26 |     }
27 | 
28 |     class ChatHistoryManager {
29 |         +create_chat(user_id: str) str
30 |         +add_message(chat_id: str, content: str, role: str, metadata: Dict) str
31 |         +get_messages(chat_id: str, limit: int) List[Dict]
32 |         +list_chats(user_id: str) List[Dict]
33 |         +generate_title(chat_id: str) Optional[str]
34 |         +rename_chat(chat_id: str, new_title: str) bool
35 |         +delete_chat(chat_id: str) bool
36 |         +export_chat(chat_id: str, format: str) Any
37 |     }
38 | 
39 |     class JSONStorage {
40 |         -base_path: str
41 |         +save_chat(chat: Chat) None
42 |         +save_message(message: Message) None
43 |         +load_chat(chat_id: str) Chat
44 |         +load_messages(chat_id: str) List[Message]
45 |         +list_user_chats(user_id: str) List[Chat]
46 |         +delete_chat(chat_id: str) bool
47 |     }
48 | 
49 |     class DatabaseStorage {
50 |         -conn: Connection
51 |         +save_chat(chat: Chat) None
52 |         +save_message(message: Message) None
53 |         +load_chat(chat_id: str) Chat
54 |         +load_messages(chat_id: str) List[Message]
55 |         +list_user_chats(user_id: str) List[Chat]
56 |         +delete_chat(chat_id: str) bool
57 |     }
58 | 
59 |     User "1" --> "many" Chat : has
60 |     Chat "1" --> "many" Message : contains
61 |     ChatHistoryManager --> JSONStorage : uses
62 |     ChatHistoryManager --> DatabaseStorage : uses
63 |     
64 |     note for User "Represents a user with a unique composite ID"
65 |     note for Chat "Represents a single conversation thread"
66 |     note for Message "Individual message in a chat session"
67 |     note for JSONStorage "Stores chats as JSON files in directory structure"
68 |     note for DatabaseStorage "Stores chats in database tables (alternative)"
69 | 
70 |     %% Example file storage structure
71 |     note "JSON Storage Example:\n./chats/{user_id}/{chat_id}.json" as Note1 


--------------------------------------------------------------------------------
/docs/diagrams/ingestion.mmd:
--------------------------------------------------------------------------------
 1 | sequenceDiagram
 2 |     participant IngestionPipeline
 3 |     participant Database
 4 |     participant Ingestor
 5 |     participant Preprocessor
 6 |     participant Embedder
 7 |     participant VectorStore
 8 |     participant Logger
 9 | 
10 |     loop Every Interval
11 |         IngestionPipeline->>Database: Retrieve last_timestamp for ingestor_id
12 |         Database-->>IngestionPipeline: last_timestamp
13 |         IngestionPipeline->>Ingestor: fetch_new_data(since=last_timestamp)
14 |         Ingestor-->>IngestionPipeline: raw_data
15 |         IngestionPipeline->>Preprocessor: preprocess(raw_data)
16 |         Preprocessor-->>IngestionPipeline: processed_data
17 |         IngestionPipeline->>Embedder: embed(processed_data['text'])
18 |         Embedder-->>IngestionPipeline: vectors
19 |         IngestionPipeline->>VectorStore: store_documents(processed_data, vectors)
20 |         VectorStore-->>IngestionPipeline: success
21 |         IngestionPipeline->>Database: Update last_timestamp for ingestor_id
22 |         Database-->>IngestionPipeline: success
23 |         IngestionPipeline->>Logger: Log ingestion cycle completion
24 |     end


--------------------------------------------------------------------------------
/docs/diagrams/initialisation.mmd:
--------------------------------------------------------------------------------
 1 | sequenceDiagram
 2 |     participant Main
 3 |     participant ConfigLoader
 4 |     participant Database
 5 |     participant Logger
 6 |     participant Ingestor
 7 |     participant Preprocessor
 8 |     participant Embedder
 9 |     participant VectorStore
10 |     participant Validator
11 |     participant PromptBuilder
12 |     participant Generator
13 |     participant IngestionPipeline
14 |     participant Orchestrator
15 | 
16 |     Main->>ConfigLoader: Load configuration from YAML
17 |     ConfigLoader-->>Main: config
18 |     Main->>Database: Connect to SQLite (config["database"]["path"])
19 |     Database-->>Main: db_connection
20 |     Main->>Logger: Initialize Logger (config["logger"])
21 |     Logger-->>Main: logger_instance
22 |     Main->>Ingestor: Initialize Ingestor (config["ingestors"])
23 |     Ingestor-->>Main: ingestor_instance
24 |     Main->>Preprocessor: Initialize Preprocessor
25 |     Preprocessor-->>Main: preprocessor_instance
26 |     Main->>Embedder: Initialize Embedder (config["embedder"])
27 |     Embedder-->>Main: embedder_instance
28 |     Main->>VectorStore: Initialize VectorStore (config["vector_store"])
29 |     VectorStore-->>Main: vector_store_instance
30 |     Main->>Validator: Initialize Validator (config["validator"])
31 |     Validator-->>Main: validator_instance
32 |     Main->>PromptBuilder: Initialize PromptBuilder (config["prompt_builder"])
33 |     PromptBuilder-->>Main: prompt_builder_instance
34 |     Main->>Generator: Initialize Generator (config["generator"])
35 |     Generator-->>Main: generator_instance
36 |     Main->>IngestionPipeline: Initialize IngestionPipeline(ingestor, preprocessor, embedder, vector_store, db_connection, logger, config["ingestion_pipeline"])
37 |     IngestionPipeline-->>Main: ingestion_pipeline_instance
38 |     Main->>Orchestrator: Initialize Orchestrator(validator, embedder, vector_store, prompt_builder, generator, logger, config["orchestrator"])
39 |     Orchestrator->>Orchestrator: Configure rules_source and context_filters
40 |     Orchestrator->>Orchestrator: Set up retry mechanisms
41 |     Orchestrator-->>Main: orchestrator_instance
42 |     Main->>IngestionPipeline: Start ingestion in a separate thread
43 |     Main->>Orchestrator: Ready to process queries


--------------------------------------------------------------------------------
/docs/diagrams/multi-ingestor.mmd:
--------------------------------------------------------------------------------
 1 | sequenceDiagram
 2 |     participant IngestionPipeline
 3 |     participant Database
 4 |     participant Ingestor1
 5 |     participant Ingestor2
 6 |     participant Logger
 7 | 
 8 |     loop Every Interval
 9 |         IngestionPipeline->>Database: Retrieve last_timestamp for ingestor_id1
10 |         Database-->>IngestionPipeline: last_timestamp1
11 |         IngestionPipeline->>Ingestor1: fetch_new_data(since=last_timestamp1)
12 |         Ingestor1-->>IngestionPipeline: raw_data1
13 |         IngestionPipeline->>Logger: Log ingestion for ingestor_id1
14 | 
15 |         IngestionPipeline->>Database: Retrieve last_timestamp for ingestor_id2
16 |         Database-->>IngestionPipeline: last_timestamp2
17 |         IngestionPipeline->>Ingestor2: fetch_new_data(since=last_timestamp2)
18 |         Ingestor2-->>IngestionPipeline: raw_data2
19 |         IngestionPipeline->>Logger: Log ingestion for ingestor_id2
20 |     end


--------------------------------------------------------------------------------
/docs/diagrams/query.mmd:
--------------------------------------------------------------------------------
 1 | sequenceDiagram
 2 |     participant User
 3 |     participant Orchestrator
 4 |     participant ChatHistoryManager
 5 |     participant Validator
 6 |     participant Embedder
 7 |     participant VectorStore
 8 |     participant PromptBuilder
 9 |     participant Generator
10 |     participant Logger
11 | 
12 |     User->>Orchestrator: process_query(user_id, input, chat_id?)
13 |     
14 |     alt chat_id is None
15 |         Orchestrator->>ChatHistoryManager: create_chat(user_id)
16 |         ChatHistoryManager-->>Orchestrator: new_chat_id
17 |     else
18 |         Note over Orchestrator: Use provided chat_id
19 |     end
20 |     
21 |     Orchestrator->>Orchestrator: get_rules(user_id)
22 |     Orchestrator->>Orchestrator: build_context(user_id)
23 |     Orchestrator->>Validator: validate(input, context, rules)
24 |     Validator-->>Orchestrator: validation_result
25 |     
26 |     alt validation_result == True
27 |         Orchestrator->>ChatHistoryManager: get_messages(chat_id, limit)
28 |         ChatHistoryManager-->>Orchestrator: chat_history
29 |         
30 |         Orchestrator->>Embedder: embed(input)
31 |         Embedder-->>Orchestrator: query_vector
32 |         
33 |         Orchestrator->>VectorStore: search(query_vector, num_results, filters)
34 |         VectorStore-->>Orchestrator: documents
35 |         
36 |         Orchestrator->>PromptBuilder: build_prompt(input, documents, chat_history)
37 |         PromptBuilder-->>Orchestrator: prompt
38 |         
39 |         Orchestrator->>Generator: generate(prompt)
40 |         Generator-->>Orchestrator: response
41 |         
42 |         Orchestrator->>ChatHistoryManager: add_message(chat_id, input, "user")
43 |         Orchestrator->>ChatHistoryManager: add_message(chat_id, response, "assistant")
44 |         
45 |         alt New or short chat
46 |             Orchestrator->>ChatHistoryManager: generate_title(chat_id)
47 |             ChatHistoryManager-->>Orchestrator: title
48 |         end
49 |         
50 |         Orchestrator-->>User: {"response": response, "chat_id": chat_id}
51 |     else
52 |         Orchestrator-->>User: {"error": "Access denied"}
53 |     end
54 |     
55 |     Orchestrator->>Logger: Log query processing


--------------------------------------------------------------------------------
/docs/diagrams/state-management.mmd:
--------------------------------------------------------------------------------
 1 | sequenceDiagram
 2 |     participant IngestionPipeline
 3 |     participant Database
 4 |     participant Logger
 5 | 
 6 |     IngestionPipeline->>Database: SELECT last_timestamp FROM ingestor_state WHERE ingestor_id='id'
 7 |     Database-->>IngestionPipeline: last_timestamp
 8 |     IngestionPipeline->>Logger: Log retrieved last_timestamp
 9 |     %% Ingestion process occurs here...
10 |     IngestionPipeline->>Database: UPDATE ingestor_state SET last_timestamp=new_timestamp WHERE ingestor_id='id'
11 |     Database-->>IngestionPipeline: success
12 |     IngestionPipeline->>Logger: Log state update


--------------------------------------------------------------------------------
/docs/diagrams/system.mmd:
--------------------------------------------------------------------------------
 1 | graph TD
 2 |     %% Ingestion Pipeline
 3 |     subgraph Ingestion Pipeline
 4 |         Ingestor --> Preprocessor
 5 |         Preprocessor --> Embedder
 6 |         Embedder --> VectorStore
 7 |         IngestionPipeline --> Ingestor
 8 |         IngestionPipeline --> Preprocessor
 9 |         IngestionPipeline --> Embedder
10 |         IngestionPipeline --> VectorStore
11 |         IngestionPipeline --> Database[ingestor_state]
12 |     end
13 | 
14 |     %% Query Pipeline
15 |     subgraph Query Pipeline
16 |         Orchestrator --> Validator
17 |         Orchestrator --> Embedder
18 |         Orchestrator --> VectorStore
19 |         Orchestrator --> PromptBuilder
20 |         Orchestrator --> Generator
21 |     end
22 | 
23 |     %% Shared Components
24 |     Logger --> IngestionPipeline
25 |     Logger --> Orchestrator
26 |     Logger --> Ingestor
27 |     Logger --> Preprocessor
28 |     Logger --> Embedder
29 |     Logger --> VectorStore
30 |     Logger --> Validator
31 |     Logger --> PromptBuilder
32 |     Logger --> Generator
33 | 
34 |     %% Database
35 |     Database --> IngestionPipeline
36 | 
37 |     %% Notes
38 |     classDef pipeline fill:#f9f,stroke:#333,stroke-width:2px;
39 |     class IngestionPipeline,Orchestrator pipeline;


--------------------------------------------------------------------------------
/docs/guides/prompt_builder.md:
--------------------------------------------------------------------------------
 1 | # Prompt Builder Component Guide
 2 | 
 3 | ## Overview
 4 | 
 5 | A Prompt Builder creates the prompt that is sent to a language model for generating responses. It formats queries and retrieved documents into a structured prompt template.
 6 | 
 7 | The Prompt Builder is an optional component - you don't need to implement a custom Prompt Builder when simply connecting a new data source.
 8 | 
 9 | ## Interface
10 | 
11 | All prompt builders must implement the `PromptBuilder` interface:
12 | 
13 | ```python
14 | class PromptBuilder(ABC):
15 |     @abstractmethod
16 |     async def initialize(self) -> None:
17 |         """Initialize the prompt builder with configuration parameters."""
18 |         pass
19 |         
20 |     @abstractmethod
21 |     async def build_prompt(
22 |         self, 
23 |         query: str, 
24 |         documents: List[Dict[str, Any]], 
25 |         **kwargs
26 |     ) -> Dict[str, Any]:
27 |         """
28 |         Build a prompt from a query and relevant documents.
29 |         
30 |         Args:
31 |             query: The user's query
32 |             documents: Relevant documents retrieved for the query
33 |             **kwargs: Additional parameters for prompt building
34 |             
35 |         Returns:
36 |             Dict[str, Any]: The built prompt with all necessary components
37 |         """
38 |         pass
39 | ```
40 | 
41 | ## Basic Implementation
42 | 
43 | A simple prompt builder might combine query and documents:
44 | 
45 | ```python
46 | async def build_prompt(
47 |     self, 
48 |     query: str, 
49 |     documents: List[Dict[str, Any]], 
50 |     **kwargs
51 | ) -> Dict[str, Any]:
52 |     """Build a basic prompt from query and documents."""
53 |     
54 |     # Format documents for the prompt
55 |     formatted_docs = []
56 |     for i, doc in enumerate(documents):
57 |         doc_text = doc.get("text", "")
58 |         doc_source = doc.get("metadata", {}).get("source", f"Document {i+1}")
59 |         formatted_docs.append(f"Source: {doc_source}\n{doc_text}")
60 |     
61 |     # Combine documents
62 |     context = "\n\n".join(formatted_docs)
63 |     
64 |     # Build the prompt template
65 |     prompt = f"""
66 |     Answer the following question based on the provided context. If the answer cannot be determined from the context, say "I don't have enough information to answer this question."
67 |     
68 |     Context:
69 |     {context}
70 |     
71 |     Question: {query}
72 |     
73 |     Answer:
74 |     """
75 |     
76 |     return {
77 |         "prompt": prompt,
78 |         "template_type": "basic_rag"
79 |     }
80 | ```
81 | 
82 | ## Configuration
83 | 
84 | In your `config.yaml` file:
85 | 
86 | ```yaml
87 | prompt_builders:
88 |   default:
89 |     template_type: basic_rag
90 | ```
91 | 
92 | ## Conclusion
93 | 
94 | The Prompt Builder component formats queries and documents into prompts for language models. For most use cases, the default implementation will be sufficient.
95 | 
96 | If you are connecting a new data source, you typically won't need to modify this component at all.
97 | 


--------------------------------------------------------------------------------
/docs/guides/validator.md:
--------------------------------------------------------------------------------
 1 | # Validator Component Guide
 2 | 
 3 | ## Overview
 4 | 
 5 | A Validator checks the quality of responses generated by the system. It ensures that responses are accurate and relevant before being delivered to the user.
 6 | 
 7 | The Validator is an optional component - you don't need to implement a custom Validator when simply connecting a new data source.
 8 | 
 9 | ## Interface
10 | 
11 | All validators must implement the `Validator` interface:
12 | 
13 | ```python
14 | class Validator(ABC):
15 |     @abstractmethod
16 |     async def initialize(self) -> None:
17 |         """Initialize the validator with configuration parameters."""
18 |         pass
19 |         
20 |     @abstractmethod
21 |     async def validate(
22 |         self, 
23 |         query: str, 
24 |         response: Dict[str, Any], 
25 |         documents: List[Dict[str, Any]],
26 |         **kwargs
27 |     ) -> Dict[str, Any]:
28 |         """
29 |         Validate the generated response against the query and documents.
30 |         
31 |         Args:
32 |             query: The user's original query
33 |             response: The generated response to validate
34 |             documents: The documents used to generate the response
35 |             **kwargs: Additional parameters for validation
36 |             
37 |         Returns:
38 |             Dict[str, Any]: Validation result with potentially modified response
39 |         """
40 |         pass
41 | ```
42 | 
43 | ## Basic Implementation
44 | 
45 | A simple validator might check relevance and add validation metadata:
46 | 
47 | ```python
48 | async def validate(
49 |     self, 
50 |     query: str, 
51 |     response: Dict[str, Any], 
52 |     documents: List[Dict[str, Any]],
53 |     **kwargs
54 | ) -> Dict[str, Any]:
55 |     """Basic validation of response."""
56 |     
57 |     # Extract the answer
58 |     answer = response.get("answer", "")
59 |     
60 |     # Simple validation check
61 |     is_valid = len(answer) > 0
62 |     
63 |     # Create modified response with validation metadata
64 |     modified_response = response.copy()
65 |     if "metadata" not in modified_response:
66 |         modified_response["metadata"] = {}
67 |         
68 |     modified_response["metadata"]["validation"] = {
69 |         "status": "passed" if is_valid else "failed"
70 |     }
71 |     
72 |     # Return validation result
73 |     result = {
74 |         "is_valid": is_valid,
75 |         "modified_response": modified_response,
76 |         "warnings": [] if is_valid else ["Empty response detected"]
77 |     }
78 |     
79 |     return result
80 | ```
81 | 
82 | ## Configuration
83 | 
84 | In your `config.yaml` file, you can add basic configuration:
85 | 
86 | ```yaml
87 | validators:
88 |   default:
89 |     enabled: true
90 | ```
91 | 
92 | ## Conclusion
93 | 
94 | The Validator component performs basic quality checks on generated responses. For most use cases, the default implementation will be sufficient.
95 | 
96 | If you are connecting a new data source, you typically won't need to modify this component at all.
97 | 


--------------------------------------------------------------------------------
/docs/prompts.md:
--------------------------------------------------------------------------------
  1 | # Effective Prompts for Working with ICI Core
  2 | 
  3 | This guide provides detailed prompts that you can use when interacting with AI assistants to help you work with the ICI Core framework. Copy and paste these prompts to get targeted assistance with understanding the codebase or developing custom components.
  4 | 
  5 | ## Understanding the Codebase
  6 | 
  7 | ```
  8 | I want to understand the ICI Core codebase architecture. Please explain:
  9 | 
 10 | 1. The overall architecture and how components interact
 11 | 2. The main data flow from ingestion to response generation
 12 | 3. The purpose of each major component (Ingestor, Preprocessor, Embedder, Vector Store, Generator, etc.)
 13 | 4. How configuration works via config.yaml
 14 | 5. Which interfaces need to be implemented when creating custom components
 15 | 
 16 | Please reference specific files like core interfaces in ici/core/interfaces/ and example implementations in ici/adapters/ when explaining.
 17 | ```
 18 | 
 19 | ```
 20 | Please analyze the DefaultIngestionPipeline in ici/core/pipelines/default.py and explain how data flows through the ingestion process step by step. Show how the data is transformed from raw input to database storage.
 21 | ```
 22 | 
 23 | ## Building Custom Ingestors and Preprocessors
 24 | 
 25 | ```
 26 | I want to build a custom ingestor and preprocessor for [XYZ] (replace with your specific data source like Twitter, Notion, Slack, etc.). 
 27 | 
 28 | For the ingestor:
 29 | 1. Which interface must I implement? (Please reference ici/core/interfaces/ingestor.py)
 30 | 2. What methods need to be implemented and what should each return?
 31 | 3. How should I handle authentication with [XYZ]?
 32 | 4. What's the expected format of the data I should return?
 33 | 5. Where should I place my custom ingestor file?
 34 | 6. How do I configure it in config.yaml?
 35 | 
 36 | For the preprocessor:
 37 | 1. Which interface must I implement? (Please reference ici/core/interfaces/preprocessor.py)
 38 | 2. How should I structure my preprocess method to convert [XYZ] data into standard documents?
 39 | 3. What metadata should I include in each document?
 40 | 4. How do I handle message grouping or chunking?
 41 | 5. Where should I place my custom preprocessor file?
 42 | 6. How do I configure it in config.yaml?
 43 | 
 44 | Please provide code examples that I can adapt.
 45 | ```
 46 | 
 47 | ```
 48 | I'm implementing a custom [XYZ] ingestor and I'm stuck on the retrieve_data method. I need to fetch [specific data type] from [XYZ API/data source]. Can you help me write this method with proper error handling and pagination? Here's what I have so far:
 49 | 
 50 | [paste your code here]
 51 | ```
 52 | 
 53 | ## Building a Custom Generator
 54 | 
 55 | ```
 56 | I want to implement a custom Generator that uses [model/API of your choice, e.g., Claude, LLaMA, Gemini, etc.].
 57 | 
 58 | 1. Please explain the Generator interface from ici/core/interfaces/generator.py
 59 | 2. Walk me through how to implement the required initialize() and generate() methods
 60 | 3. How should I handle token limits and context windows?
 61 | 4. How do I integrate this with the prompt template system?
 62 | 5. How do I configure authentication and API settings in config.yaml?
 63 | 6. Where should I place my custom generator file?
 64 | 
 65 | Can you provide a code example with proper error handling and logging?
 66 | ```
 67 | 
 68 | ```
 69 | I'm building a custom Generator that needs to support streaming responses. How can I implement this functionality while adhering to the Generator interface? Please provide examples of how to:
 70 | 
 71 | 1. Structure the generate() method to support streaming
 72 | 2. Handle errors during generation
 73 | 3. Configure stream parameters in config.yaml
 74 | ```
 75 | 
 76 | ## Debugging Common Issues
 77 | 
 78 | ```
 79 | I'm encountering an error when running my ingestion pipeline with my custom [Component]. The error message is:
 80 | 
 81 | [paste error message here]
 82 | 
 83 | My implementation looks like:
 84 | 
 85 | [paste relevant code here]
 86 | 
 87 | How can I diagnose and fix this issue?
 88 | ```
 89 | 
 90 | ## Extending the System
 91 | 
 92 | ```
 93 | I want to extend the ICI Core system to add [feature/functionality]. Which components should I modify or extend? Are there existing interfaces I should use? Please provide guidance on the best approach and reference relevant files in the codebase.
 94 | ```
 95 | 
 96 | ## Optimizing Performance
 97 | 
 98 | ```
 99 | I'm experiencing performance issues with my [specific component, e.g., vector store, embedder, etc.]. The system is [describe issues, e.g., slow to respond, using excessive memory]. How can I optimize my implementation while still adhering to the required interfaces?
100 | ```
101 | 
102 | ## Testing Components
103 | 
104 | ```
105 | How should I properly test my custom [Component Type]? Please provide examples of:
106 | 
107 | 1. Unit tests for each method
108 | 2. Integration tests with other components
109 | 3. Approaches for mocking dependencies
110 | 4. Performance benchmarking
111 | 
112 | Please reference testing patterns used elsewhere in the codebase.
113 | ```
114 | 
115 | ---
116 | 
117 | When using these prompts, replace the placeholders (text in brackets) with your specific details. For more information on each component, refer to the corresponding guide in the `docs/guides/` directory. 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
  1 | # Examples
  2 | 
  3 | This directory contains example scripts demonstrating the usage of the ICI framework components.
  4 | 
  5 | ## Vector Store Query Tools
  6 | 
  7 | This directory contains example scripts for querying the ChromaDB vector store where Telegram messages are stored.
  8 | 
  9 | ### Basic Query Script
 10 | 
 11 | The `query_vectorstore_example.py` script provides a simple interface for querying the vector store:
 12 | 
 13 | ```bash
 14 | # Basic query
 15 | python query_vectorstore_example.py "your search query"
 16 | 
 17 | # Specify number of results
 18 | python query_vectorstore_example.py "your search query" --top_k 10
 19 | 
 20 | # Filter by conversation
 21 | python query_vectorstore_example.py "your search query" --conversation_id "123456789"
 22 | ```
 23 | 
 24 | ### Advanced Query Tool
 25 | 
 26 | The `advanced_vector_query.py` script offers more advanced features:
 27 | 
 28 | ```bash
 29 | # Basic search
 30 | python advanced_vector_query.py "your search query"
 31 | 
 32 | # List all documents
 33 | python advanced_vector_query.py --list_all --top_k 20
 34 | 
 35 | # Filter by conversation and date range
 36 | python advanced_vector_query.py "your search query" --conversation_id "123456789" --date_from "2025-01-01" --date_to "2025-03-20"
 37 | 
 38 | # Get conversation context around a specific message
 39 | python advanced_vector_query.py --message_id "message_uuid" --context_window 5
 40 | 
 41 | # Export results to JSON
 42 | python advanced_vector_query.py "your search query" --format json --export results.json
 43 | 
 44 | # Export results to CSV
 45 | python advanced_vector_query.py "your search query" --export results.csv
 46 | 
 47 | # Show full text in results
 48 | python advanced_vector_query.py "your search query" --full_text
 49 | ```
 50 | 
 51 | ## Telegram Ingestion Pipeline
 52 | 
 53 | The examples also include scripts for running the Telegram ingestion pipeline:
 54 | 
 55 | ### Single Run Pipeline
 56 | 
 57 | The `async_telegram_pipeline_example.py` script demonstrates a single run of the Telegram ingestion pipeline:
 58 | 
 59 | ```bash
 60 | # Run the pipeline once
 61 | python async_telegram_pipeline_example.py
 62 | ```
 63 | 
 64 | ### Scheduled Pipeline
 65 | 
 66 | The `scheduled_telegram_pipeline_example.py` script demonstrates scheduled ingestion with the Telegram pipeline:
 67 | 
 68 | ```bash
 69 | # Register the ingestor (first-time use) and start scheduled ingestion
 70 | python scheduled_telegram_pipeline_example.py --register
 71 | 
 72 | # Start scheduled ingestion (if already registered)
 73 | python scheduled_telegram_pipeline_example.py
 74 | 
 75 | # Just run once (don't schedule)
 76 | python scheduled_telegram_pipeline_example.py --run-once
 77 | ```
 78 | 
 79 | The scheduled pipeline will run at the interval specified in `config.yaml` (under `pipelines.telegram.schedule.interval_minutes`).
 80 | 
 81 | ## Usage Notes
 82 | 
 83 | - These scripts use the configuration from `config.yaml` in the project root.
 84 | - Vector store queries require that data has been ingested using one of the pipeline examples.
 85 | - For date filtering, use ISO format dates (YYYY-MM-DD).
 86 | - The scheduled pipeline can be stopped with Ctrl+C, which will gracefully shut down the scheduler.
 87 | 
 88 | ## Examples
 89 | 
 90 | **Search for messages about a specific topic:**
 91 | ```bash
 92 | python advanced_vector_query.py "cryptocurrency market updates"
 93 | ```
 94 | 
 95 | **Find all messages from a specific conversation mentioning a topic:**
 96 | ```bash
 97 | python advanced_vector_query.py "login code" --conversation_id "777000"
 98 | ```
 99 | 
100 | **Export all messages from the last month:**
101 | ```bash
102 | python advanced_vector_query.py --list_all --date_from "2025-02-20" --export recent_messages.csv
103 | ```
104 | 
105 | **Get the conversation context around a specific message:**
106 | ```bash
107 | python advanced_vector_query.py --message_id "d290f1ee-6c54-4b01-90e6-d701748f0851" --context_window 3
108 | ``` 


--------------------------------------------------------------------------------
/examples/chat_example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Chat system example for the ICI framework.
 4 | 
 5 | This script demonstrates how to use the DefaultOrchestrator for
 6 | multi-turn conversations with chat history.
 7 | """
 8 | 
 9 | import os
10 | import sys
11 | import asyncio
12 | import time
13 | from typing import Dict, Any, List
14 | import json
15 | 
16 | # Set up path to find ICI modules
17 | current_dir = os.path.dirname(os.path.abspath(__file__))
18 | parent_dir = os.path.dirname(current_dir)
19 | if parent_dir not in sys.path:
20 |     sys.path.insert(0, parent_dir)
21 | 
22 | # Import the orchestrator
23 | try:
24 |     from ici.adapters import DefaultOrchestrator
25 | except ImportError as e:
26 |     print(f"Error importing ICI: {e}")
27 |     sys.exit(1)
28 | 
29 | async def main():
30 |     """
31 |     Main chat example function.
32 |     
33 |     This demonstrates multi-turn conversation capabilities
34 |     using the DefaultOrchestrator to manage chat state and context.
35 |     """
36 |     print("Initializing orchestrator...")
37 |     
38 |     # Initialize orchestrator
39 |     orchestrator = DefaultOrchestrator(logger_name="chat_example")
40 |     await orchestrator.initialize()
41 |     
42 |     print("Orchestrator initialized successfully.")
43 |     
44 |     # User ID and source
45 |     user_id = "example_user_1"
46 |     source = "example"
47 |     
48 |     # Welcome the user
49 |     print("\nWelcome to the ICI Chat Example!")
50 |     print("This demonstrates a multi-turn conversation with history.")
51 |     print("Type 'exit' or 'quit' to end the conversation.")
52 |     print("Type '/new' to start a new conversation.")
53 |     print("Type '/help' for help.\n")
54 |     
55 |     while True:
56 |         # Get user input
57 |         user_input = input("You: ").strip()
58 |         
59 |         # Check for exit command
60 |         if user_input.lower() in ("exit", "quit"):
61 |             break
62 |         
63 |         # Process the query
64 |         try:
65 |             response = await orchestrator.process_query(
66 |                 source=source,
67 |                 user_id=user_id,
68 |                 query=user_input,
69 |                 additional_info={}
70 |             )
71 |             
72 |             # Print response
73 |             print(f"\nAssistant: {response}\n")
74 |             
75 |         except Exception as e:
76 |             print(f"Error processing query: {e}")
77 |     
78 |     print("Chat example completed.")
79 | 
80 | if __name__ == "__main__":
81 |     # Set config path environment variable if needed
82 |     if len(sys.argv) > 1:
83 |         os.environ["ICI_CONFIG_PATH"] = sys.argv[1]
84 |     
85 |     # Run the example
86 |     asyncio.run(main()) 


--------------------------------------------------------------------------------
/examples/config_utils_example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Example script demonstrating how to use the configuration utilities.
  4 | 
  5 | This script shows how to:
  6 | 1. Load the full configuration from a file
  7 | 2. Get configuration for a specific component
  8 | 3. Handle different error cases
  9 | """
 10 | 
 11 | import os
 12 | import sys
 13 | import json
 14 | 
 15 | from ici.utils.config import load_config, get_component_config
 16 | from ici.core.exceptions import ConfigurationError
 17 | from ici.adapters.loggers import StructuredLogger
 18 | 
 19 | 
 20 | # Setup logging
 21 | logger = StructuredLogger(name="example.config")
 22 | 
 23 | 
 24 | def pretty_print_json(data, title=None):
 25 |     """Print data as formatted JSON with optional title."""
 26 |     if title:
 27 |         print(f"\n{title}")
 28 |         print("=" * len(title))
 29 |     print(json.dumps(data, indent=2))
 30 |     print()
 31 | 
 32 | 
 33 | def main():
 34 |     """Demonstrate the configuration utilities."""
 35 |     print("Configuration Utilities Example")
 36 |     print("-------------------------------")
 37 |     
 38 |     # Ensure we have a config file to work with
 39 |     if not os.path.exists("config.yaml"):
 40 |         print("Creating sample config.yaml file...")
 41 |         with open("config.yaml", "w") as f:
 42 |             f.write("""# ICI Framework Configuration
 43 | 
 44 | # Telegram Ingestor Configuration
 45 | telegram:
 46 |   api_id: "YOUR_API_ID_HERE"
 47 |   api_hash: "YOUR_API_HASH_HERE"
 48 |   phone_number: "+12345678901"
 49 |   session_file: "telegram_session"
 50 |   request_delay: 1.0
 51 | 
 52 | # Vector Store Configuration
 53 | vector_store:
 54 |   type: "chroma"
 55 |   collection_name: "example_collection"
 56 |   persist_directory: "./data/chroma_db"
 57 | 
 58 | # Embedder Configuration
 59 | embedder:
 60 |   model_name: "sentence-transformers/all-MiniLM-L6-v2"
 61 |   device: "cpu"
 62 |   batch_size: 32
 63 | """)
 64 |         print("Sample config.yaml file created.")
 65 |     
 66 |     try:
 67 |         # Example 1: Load the entire configuration
 68 |         print("\nExample 1: Loading the entire configuration")
 69 |         full_config = load_config()
 70 |         print(f"Successfully loaded configuration with {len(full_config)} sections:")
 71 |         for section in full_config.keys():
 72 |             print(f"  - {section}")
 73 |         
 74 |         # Example 2: Get configuration for a specific component
 75 |         print("\nExample 2: Getting configuration for the vector store component")
 76 |         try:
 77 |             vector_store_config = get_component_config("vector_store")
 78 |             pretty_print_json(vector_store_config, "Vector Store Configuration")
 79 |         except ConfigurationError as e:
 80 |             print(f"Error: {e}")
 81 |         
 82 |         # Example 3: Get configuration for a non-existent component
 83 |         print("\nExample 3: Getting configuration for a non-existent component")
 84 |         try:
 85 |             nonexistent_config = get_component_config("nonexistent_component")
 86 |             pretty_print_json(nonexistent_config, "Non-existent Component Configuration")
 87 |             print("Note: Returns an empty dictionary for non-existent components")
 88 |         except ConfigurationError as e:
 89 |             print(f"Error: {e}")
 90 |         
 91 |         # Example 4: Load configuration from a custom path
 92 |         print("\nExample 4: Loading configuration from a custom path")
 93 |         custom_config_path = "custom_config.yaml"
 94 |         try:
 95 |             # Create a custom config for demonstration
 96 |             with open(custom_config_path, "w") as f:
 97 |                 f.write("""
 98 | custom_component:
 99 |   setting1: "value1"
100 |   setting2: 42
101 | """)
102 |             
103 |             # Load the custom configuration
104 |             custom_config = load_config(custom_config_path)
105 |             pretty_print_json(custom_config, "Custom Configuration")
106 |             
107 |             # Clean up the temporary file
108 |             os.remove(custom_config_path)
109 |             
110 |         except ConfigurationError as e:
111 |             print(f"Error: {e}")
112 |         
113 |         # Example 5: Error handling for missing file
114 |         print("\nExample 5: Error handling for missing file")
115 |         try:
116 |             missing_config = load_config("nonexistent_file.yaml")
117 |             pretty_print_json(missing_config, "Missing File Configuration")
118 |         except ConfigurationError as e:
119 |             print(f"Error correctly handled: {e}")
120 |         
121 |         print("\nExample completed successfully!")
122 |         return 0
123 |         
124 |     except Exception as e:
125 |         print(f"Unexpected error: {e}")
126 |         return 1
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     sys.exit(main()) 


--------------------------------------------------------------------------------
/examples/default_pipeline_example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Default Ingestion Pipeline Example
  4 | 
  5 | This script demonstrates how to use the DefaultIngestionPipeline with
  6 | both WhatsApp and Telegram ingestors.
  7 | 
  8 | The pipeline handles:
  9 | 1. Loading and initializing all components for both ingestors
 10 | 2. Tracking state separately for each ingestor
 11 | 3. Fetching data from each ingestor based on its latest state
 12 | 4. Processing the data through the appropriate preprocessor
 13 | 5. Generating embeddings and storing in a vector database
 14 | 
 15 | Usage:
 16 |   python default_pipeline_example.py [--config-path CONFIG_PATH] [--ingestor-id INGESTOR_ID] [--full] [--verbose]
 17 | """
 18 | 
 19 | import os
 20 | import sys
 21 | import asyncio
 22 | import argparse
 23 | import logging
 24 | from datetime import datetime, timezone
 25 | 
 26 | # Add the project root to the Python path
 27 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 28 | 
 29 | from ici.adapters.loggers import StructuredLogger
 30 | from ici.adapters.pipelines import DefaultIngestionPipeline
 31 | 
 32 | 
 33 | async def main():
 34 |     # Parse command line arguments
 35 |     parser = argparse.ArgumentParser(description="Run the Default Ingestion Pipeline")
 36 |     parser.add_argument("--config-path", default="config.yaml", help="Path to configuration file")
 37 |     parser.add_argument("--ingestor-id", help="Specific ingestor ID to run (optional)")
 38 |     parser.add_argument("--full", action="store_true", help="Force a full ingestion run")
 39 |     parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
 40 |     args = parser.parse_args()
 41 |     
 42 |     # Set environment variable for config path
 43 |     os.environ["ICI_CONFIG_PATH"] = args.config_path
 44 |     
 45 |     # Setup logging
 46 |     logger = StructuredLogger(name="default_pipeline_example")
 47 |     
 48 |     # Set verbose logging if requested
 49 |     if args.verbose:
 50 |         # Configure root logger with more detail for debugging
 51 |         logging.basicConfig(level=logging.INFO)
 52 |         # Enable debug logs for aiohttp specifically
 53 |         logging.getLogger('aiohttp').setLevel(logging.DEBUG)
 54 |     
 55 |     try:
 56 |         # Create and initialize the pipeline
 57 |         pipeline = DefaultIngestionPipeline()
 58 |         await pipeline.initialize()
 59 |         
 60 |         # Run health check before ingestion
 61 |         health_info = await pipeline.healthcheck()
 62 |         logger.info({
 63 |             "action": "HEALTH_CHECK",
 64 |             "message": "Pipeline health check result",
 65 |             "data": health_info
 66 |         })
 67 |         
 68 |         if args.ingestor_id:
 69 |             # Run ingestion for a specific ingestor
 70 |             logger.info({
 71 |                 "action": "RUN_SPECIFIC_INGESTOR",
 72 |                 "message": f"Starting ingestion for {args.ingestor_id}",
 73 |                 "data": {"ingestor_id": args.ingestor_id}
 74 |             })
 75 |             
 76 |             # Check if we should force a fresh start
 77 |             if args.full:
 78 |                 # Reset the state to 0 for a full run
 79 |                 logger.info({
 80 |                     "action": "RESET_STATE",
 81 |                     "message": f"Resetting state for {args.ingestor_id} to perform full ingestion"
 82 |                 })
 83 |                 pipeline.set_ingestor_state(
 84 |                     ingestor_id=args.ingestor_id,
 85 |                     last_timestamp=0,
 86 |                     additional_metadata={
 87 |                         "reset_time": datetime.now(timezone.utc).isoformat(),
 88 |                         "reset_reason": "Manual full ingestion requested"
 89 |                     }
 90 |                 )
 91 |             
 92 |             # Run ingestion for the specific ingestor
 93 |             result = await pipeline.run_ingestion(args.ingestor_id)
 94 |             
 95 |             # Print results
 96 |             logger.info({
 97 |                 "action": "INGESTION_RESULT",
 98 |                 "message": f"Ingestion result for {args.ingestor_id}",
 99 |                 "data": {
100 |                     "success": result["success"],
101 |                     "documents_processed": result.get("documents_processed", 0),
102 |                     "errors": result.get("errors", []),
103 |                     "duration": result.get("duration", 0)
104 |                 }
105 |             })
106 |             
107 |             # Show the updated state
108 |             state = pipeline.get_ingestor_state(args.ingestor_id)
109 |             logger.info({
110 |                 "action": "UPDATED_STATE",
111 |                 "message": f"Updated state for {args.ingestor_id}",
112 |                 "data": state
113 |             })
114 |             
115 |         else:
116 |             # Run ingestion for all registered ingestors
117 |             logger.info({
118 |                 "action": "RUN_ALL_INGESTORS",
119 |                 "message": "Starting ingestion for all registered ingestors"
120 |             })
121 |             
122 |             await pipeline.start()
123 |             
124 |             logger.info({
125 |                 "action": "INGESTION_COMPLETE",
126 |                 "message": "Completed ingestion for all ingestors"
127 |             })
128 |         
129 |         # Clean up resources
130 |         await pipeline.close()
131 |         
132 |     except Exception as e:
133 |         logger.error({
134 |             "action": "PIPELINE_ERROR",
135 |             "message": f"Error running default pipeline: {str(e)}",
136 |             "data": {"error": str(e), "error_type": type(e).__name__}
137 |         })
138 |         sys.exit(1)
139 | 
140 | if __name__ == "__main__":
141 |     asyncio.run(main()) 


--------------------------------------------------------------------------------
/examples/query_vectorstore_example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Example script that demonstrates how to query the ChromaDB vector store.
  4 | 
  5 | This script:
  6 | 1. Loads the configuration from config.yaml
  7 | 2. Initializes the ChromaDB vector store and the sentence transformer embedder
  8 | 3. Queries the vector store using a text query
  9 | 4. Displays the results
 10 | """
 11 | 
 12 | import os
 13 | import asyncio
 14 | import argparse
 15 | from typing import List, Dict, Any
 16 | 
 17 | from ici.adapters.vector_stores.chroma import ChromaDBStore
 18 | from ici.adapters.embedders.sentence_transformer import SentenceTransformerEmbedder
 19 | from ici.utils.config import load_config
 20 | from ici.adapters.loggers import StructuredLogger
 21 | 
 22 | # Set up logger
 23 | logger = StructuredLogger(name="vector_store_query")
 24 | 
 25 | async def query_vector_store(query: str, top_k: int = 5, filters: Dict[str, Any] = None) -> List[Dict[str, Any]]:
 26 |     """
 27 |     Query the vector store with a text query.
 28 |     
 29 |     Args:
 30 |         query: The text query to search for
 31 |         top_k: Number of results to return
 32 |         filters: Optional metadata filters to apply to the search
 33 |         
 34 |     Returns:
 35 |         List of documents matching the query
 36 |     """
 37 |     # Initialize the vector store
 38 |     vector_store = ChromaDBStore()
 39 |     await vector_store.initialize()
 40 |     
 41 |     # Initialize the embedder
 42 |     embedder = SentenceTransformerEmbedder()
 43 |     await embedder.initialize()
 44 |     
 45 |     # Generate embedding for the query
 46 |     query_embedding, _ = await embedder.embed(query)
 47 |     
 48 |     # Search the vector store
 49 |     logger.info({
 50 |         "action": "QUERY_VECTOR_STORE",
 51 |         "message": f"Searching for: '{query}'",
 52 |         "data": {"top_k": top_k, "filters": filters}
 53 |     })
 54 |     
 55 |     results = vector_store.search(
 56 |         query_vector=query_embedding,
 57 |         num_results=top_k,
 58 |         filters=filters
 59 |     )
 60 |     
 61 |     logger.info({
 62 |         "action": "QUERY_RESULTS",
 63 |         "message": f"Found {len(results)} results",
 64 |         "data": {"result_count": len(results)}
 65 |     })
 66 |     
 67 |     return results
 68 | 
 69 | def format_results(results: List[Dict[str, Any]]) -> None:
 70 |     """
 71 |     Format and print the search results.
 72 |     
 73 |     Args:
 74 |         results: The search results from the vector store
 75 |     """
 76 |     print(f"\n{'='*80}\n{'SEARCH RESULTS':^80}\n{'='*80}")
 77 |     
 78 |     for i, result in enumerate(results, 1):
 79 |         print(f"\n--- Result {i} ---")
 80 |         print(f"Document ID: {result.get('id', 'N/A')}")
 81 |         print(f"Score: {result.get('score', 0):.4f}")
 82 |         
 83 |         # Print metadata
 84 |         print("\nMetadata:")
 85 |         metadata = result.get('metadata', {})
 86 |         for key, value in metadata.items():
 87 |             if key != 'text':  # Avoid duplicating the text content
 88 |                 print(f"  {key}: {value}")
 89 |         
 90 |         # Print the text content
 91 |         print("\nContent:")
 92 |         print(f"{result.get('text', 'No content available')[:500]}...")
 93 |         
 94 |         if i < len(results):
 95 |             print("\n" + "-"*80)
 96 |     
 97 |     print("\n" + "="*80)
 98 | 
 99 | async def main():
100 |     # Parse command line arguments
101 |     parser = argparse.ArgumentParser(description="Query the ChromaDB vector store")
102 |     parser.add_argument("query", help="The text query to search for")
103 |     parser.add_argument("--top_k", type=int, default=5, help="Number of results to return")
104 |     parser.add_argument("--conversation_id", help="Filter by conversation ID")
105 |     parser.add_argument("--date_from", help="Filter by date (format: YYYY-MM-DD)")
106 |     
107 |     args = parser.parse_args()
108 |     
109 |     # Prepare filters based on command line arguments
110 |     filters = {}
111 |     if args.conversation_id:
112 |         filters["conversation_id"] = args.conversation_id
113 |     
114 |     # Execute the query
115 |     results = await query_vector_store(args.query, args.top_k, filters if filters else None)
116 |     
117 |     # Format and display the results
118 |     format_results(results)
119 | 
120 | if __name__ == "__main__":
121 |     # Ensure ICI_CONFIG_PATH is set to use the config.yaml in the current directory
122 |     if not os.environ.get("ICI_CONFIG_PATH"):
123 |         os.environ["ICI_CONFIG_PATH"] = os.path.join(os.getcwd(), "config.yaml")
124 |     
125 |     # Run the main async function
126 |     asyncio.run(main()) 


--------------------------------------------------------------------------------
/examples/sentence_transformer_embedder_example.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Example demonstrating the use of the SentenceTransformerEmbedder.
 4 | 
 5 | This script initializes the SentenceTransformerEmbedder and uses it to generate
 6 | embeddings for sample texts, demonstrating both single and batch embedding.
 7 | """
 8 | 
 9 | import os
10 | import asyncio
11 | import yaml
12 | from pathlib import Path
13 | 
14 | from ici.adapters import SentenceTransformerEmbedder
15 | from ici.adapters.loggers import StructuredLogger
16 | 
17 | 
18 | async def main():
19 |     """Run the example script."""
20 |     # Create a logger
21 |     logger = StructuredLogger(
22 |         name="embedder_example",
23 |         log_level="INFO"
24 |     )
25 |     
26 |     # Create configuration directory and file if they don't exist
27 |     config_dir = Path("config")
28 |     config_dir.mkdir(exist_ok=True)
29 |     
30 |     config_path = config_dir / "embedders.yaml"
31 |     
32 |     # Create example config if it doesn't exist
33 |     if not config_path.exists():
34 |         config = {
35 |             "embedders": {
36 |                 "sentence_transformer": {
37 |                     "model_name": "all-MiniLM-L6-v2"  # Small, fast model for demonstration
38 |                 }
39 |             }
40 |         }
41 |         
42 |         with open(config_path, "w") as f:
43 |             yaml.dump(config, f, default_flow_style=False)
44 |         
45 |         logger.info(f"Created example config at {config_path}")
46 |     
47 |     # Create the embedder
48 |     embedder = SentenceTransformerEmbedder(logger_name="sentence_transformer_example")
49 |     
50 |     # Initialize the embedder
51 |     logger.info("Initializing embedder...")
52 |     await embedder.initialize()
53 |     
54 |     # Display embedder details
55 |     logger.info(f"Embedder initialized with model: {embedder._model_name}")
56 |     logger.info(f"Embedding dimensions: {embedder.dimensions}")
57 |     
58 |     # Check health
59 |     health = embedder.healthcheck()
60 |     logger.info(f"Embedder health: {health['healthy']} - {health['message']}")
61 |     
62 |     # Sample text for embedding
63 |     sample_text = "The quick brown fox jumps over the lazy dog."
64 |     
65 |     # Generate embedding for a single text
66 |     logger.info(f"Generating embedding for: '{sample_text}'")
67 |     embedding, metadata = await embedder.embed(sample_text)
68 |     
69 |     # Display results
70 |     logger.info(f"Embedding generated: {len(embedding)} dimensions")
71 |     logger.info(f"Metadata: {metadata}")
72 |     logger.info(f"First 5 dimensions: {embedding[:5]}")
73 |     
74 |     # Batch embedding example
75 |     batch_texts = [
76 |         "Artificial intelligence is transforming the world.",
77 |         "Machine learning models learn from data.",
78 |         "Natural language processing helps computers understand human language.",
79 |         "",  # Empty text to demonstrate handling
80 |     ]
81 |     
82 |     logger.info(f"Generating embeddings for batch of {len(batch_texts)} texts...")
83 |     batch_results = await embedder.embed_batch(batch_texts)
84 |     
85 |     # Display batch results
86 |     for i, (embedding, metadata) in enumerate(batch_results):
87 |         logger.info(f"Text {i+1} embedding: {len(embedding)} dimensions")
88 |         logger.info(f"Text {i+1} metadata: {metadata}")
89 |     
90 |     logger.info("Example completed successfully.")
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     asyncio.run(main()) 


--------------------------------------------------------------------------------
/examples/structured_logger_example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Example script demonstrating the use of the StructuredLogger implementation.
  4 | 
  5 | This script shows how to use the StructuredLogger to create well-structured
  6 | log entries with action names, messages, and additional data.
  7 | """
  8 | 
  9 | import sys
 10 | import os
 11 | import time
 12 | 
 13 | # Add the parent directory to the path to allow importing the ici package
 14 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 15 | 
 16 | from ici.adapters.loggers import StructuredLogger
 17 | 
 18 | 
 19 | def example_function():
 20 |     """Example function that demonstrates logging various events."""
 21 |     # Create a logger that logs to both console and file
 22 |     logger = StructuredLogger(
 23 |         name="example",
 24 |         level="DEBUG",
 25 |         log_file="examples/logs/structured.log",
 26 |         console_output=True,
 27 |     )
 28 | 
 29 |     # Log a simple debug message
 30 |     logger.debug(
 31 |         {
 32 |             "action": "FUNCTION_START",
 33 |             "message": "Starting example function",
 34 |             "data": {"timestamp": time.time()},
 35 |         }
 36 |     )
 37 | 
 38 |     # Log an info message with additional data
 39 |     logger.info(
 40 |         {
 41 |             "action": "CONFIG_LOADED",
 42 |             "message": "Configuration loaded successfully",
 43 |             "data": {
 44 |                 "config_file": "config.json",
 45 |                 "settings": {"max_connections": 100, "timeout": 30},
 46 |             },
 47 |         }
 48 |     )
 49 | 
 50 |     # Log a warning message
 51 |     logger.warning(
 52 |         {
 53 |             "action": "RESOURCE_LOW",
 54 |             "message": "System resources are running low",
 55 |             "data": {"memory_usage": "85%", "cpu_usage": "78%"},
 56 |         }
 57 |     )
 58 | 
 59 |     # Log an error message with exception
 60 |     try:
 61 |         # Simulate an error
 62 |         result = 1 / 0
 63 |     except Exception as e:
 64 |         logger.error(
 65 |             {
 66 |                 "action": "CALCULATION_FAILED",
 67 |                 "message": "Failed to perform calculation",
 68 |                 "data": {"operation": "division", "parameters": [1, 0]},
 69 |                 "exception": e,
 70 |             }
 71 |         )
 72 | 
 73 |     # Log a critical message
 74 |     logger.critical(
 75 |         {
 76 |             "action": "SERVICE_UNAVAILABLE",
 77 |             "message": "Critical dependency unavailable",
 78 |             "data": {"service": "database", "attempts": 5, "retry_in": 60},
 79 |         }
 80 |     )
 81 | 
 82 |     # Log the end of the function
 83 |     logger.info(
 84 |         {
 85 |             "action": "FUNCTION_END",
 86 |             "message": "Example function completed",
 87 |             "data": {"duration_ms": 123},
 88 |         }
 89 |     )
 90 | 
 91 | 
 92 | def database_example():
 93 |     """Example function demonstrating structured logging for database operations."""
 94 |     logger = StructuredLogger(name="example.database")
 95 | 
 96 |     # Log a database connection
 97 |     logger.info(
 98 |         {
 99 |             "action": "DB_CONNECT",
100 |             "message": "Connected to database",
101 |             "data": {
102 |                 "host": "localhost",
103 |                 "database": "users",
104 |                 "connection_id": "conn-123456",
105 |             },
106 |         }
107 |     )
108 | 
109 |     # Log a database query
110 |     logger.debug(
111 |         {
112 |             "action": "DB_QUERY",
113 |             "message": "Executing database query",
114 |             "data": {
115 |                 "query": "SELECT * FROM users WHERE status = ?",
116 |                 "parameters": ["active"],
117 |                 "query_id": "q-987654",
118 |             },
119 |         }
120 |     )
121 | 
122 |     # Log a slow query warning
123 |     logger.warning(
124 |         {
125 |             "action": "DB_SLOW_QUERY",
126 |             "message": "Query execution time exceeded threshold",
127 |             "data": {
128 |                 "query_id": "q-987654",
129 |                 "execution_time_ms": 1520,
130 |                 "threshold_ms": 1000,
131 |             },
132 |         }
133 |     )
134 | 
135 |     # Log a database disconnect
136 |     logger.info(
137 |         {
138 |             "action": "DB_DISCONNECT",
139 |             "message": "Disconnected from database",
140 |             "data": {"connection_id": "conn-123456", "duration_s": 35},
141 |         }
142 |     )
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     # Create the logs directory if it doesn't exist
147 |     os.makedirs("examples/logs", exist_ok=True)
148 | 
149 |     # Run the examples
150 |     print("Running structured logging examples...")
151 |     example_function()
152 |     database_example()
153 | 
154 |     print("\nExamples completed. Check logs directory for file output.")
155 |     print("The logs are in JSON format for easy parsing and analysis.")
156 | 


--------------------------------------------------------------------------------
/examples/telegram_debug.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Example script to run Telegram ingestion with debug mode enabled.
 4 | This will fetch just one message and print detailed diagnostics about it.
 5 | """
 6 | 
 7 | import os
 8 | import asyncio
 9 | import logging
10 | import yaml
11 | from datetime import datetime, timedelta
12 | 
13 | from ici.adapters.ingestors.telegram import TelegramIngestor
14 | from ici.adapters.pipelines.telegram import TelegramIngestionPipeline
15 | from ici.adapters.storage.memory import InMemoryStorage
16 | 
17 | # Configure logging
18 | logging.basicConfig(
19 |     level=logging.INFO,
20 |     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
21 | )
22 | logger = logging.getLogger("telegram_debug")
23 | 
24 | async def run_debug():
25 |     """Run a single ingestion with debug mode enabled"""
26 |     
27 |     # Load configuration
28 |     config_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'config.yaml')
29 |     with open(config_path, 'r') as f:
30 |         config = yaml.safe_load(f)
31 |     
32 |     telegram_config = config.get('telegram', {})
33 |     
34 |     # Create storage
35 |     storage = InMemoryStorage(logger=logger)
36 |     
37 |     # Create ingestor with short timeframe
38 |     now = datetime.now()
39 |     start_date = now - timedelta(days=3)  # Last 3 days
40 |     
41 |     ingestor = TelegramIngestor(
42 |         api_id=telegram_config.get('api_id'),
43 |         api_hash=telegram_config.get('api_hash'),
44 |         phone=telegram_config.get('phone'),
45 |         session_name="telegram_debug",
46 |         conversation_ids=telegram_config.get('conversation_ids', []),
47 |         start_date=start_date,
48 |         end_date=now,
49 |         logger=logger
50 |     )
51 |     
52 |     # Create pipeline
53 |     pipeline = TelegramIngestionPipeline(
54 |         ingestors={ingestor.id: ingestor},
55 |         storage=storage,
56 |         interval_seconds=3600,  # Not used in debug mode
57 |         logger=logger
58 |     )
59 |     
60 |     try:
61 |         # Run with debug flag
62 |         logger.info("Starting debug ingestion...")
63 |         result = await pipeline.start(ingestor_id=ingestor.id, debug_first_message=True)
64 |         
65 |         logger.info(f"Debug ingestion complete: {result}")
66 |     finally:
67 |         # Always disconnect the ingestor
68 |         await ingestor.disconnect()
69 |         logger.info("Disconnected from Telegram")
70 | 
71 | if __name__ == "__main__":
72 |     asyncio.run(run_debug()) 


--------------------------------------------------------------------------------
/examples/telegram_ingestor_example.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Example script demonstrating how to use the Telegram ingestor.
  4 | 
  5 | This script shows how to:
  6 | 1. Initialize a TelegramIngestor using config.yaml
  7 | 2. Fetch all direct message history
  8 | 3. Fetch messages from a specific date range
  9 | 4. Check ingestor health status
 10 | 
 11 | Usage:
 12 |     1. Create a Telegram API application at https://my.telegram.org/apps
 13 |     2. Get your API ID and API hash
 14 |     3. Create a config.yaml file with your credentials
 15 |     4. Run the script
 16 | 
 17 | Example config.yaml:
 18 | ```yaml
 19 | telegram:
 20 |   api_id: "your_api_id"
 21 |   api_hash: "your_api_hash"
 22 |   phone_number: "+12345678901"
 23 |   session_file: "examples/data/telegram_session"
 24 |   request_delay: 1.0
 25 | ```
 26 | """
 27 | 
 28 | import os
 29 | import sys
 30 | import json
 31 | import logging
 32 | import asyncio
 33 | from datetime import datetime, timedelta
 34 | 
 35 | from ici.adapters.ingestors.telegram import TelegramIngestor
 36 | from ici.adapters.loggers import StructuredLogger
 37 | 
 38 | 
 39 | # Setup logging
 40 | logging.basicConfig(level=logging.INFO)
 41 | logger = StructuredLogger(name="example")
 42 | 
 43 | 
 44 | def pretty_print_json(data, title=None):
 45 |     """Print data as formatted JSON with optional title."""
 46 |     if title:
 47 |         print(f"\n{title}")
 48 |         print("=" * len(title))
 49 |     print(json.dumps(data, indent=2))
 50 |     print()
 51 | 
 52 | 
 53 | async def main_async():
 54 |     """Run the Telegram ingestor example asynchronously."""
 55 |     print("Telegram Ingestor Example")
 56 |     print("-----------------------")
 57 |     
 58 |     try:
 59 |         # Create a sample config.yaml file if it doesn't exist
 60 |         if not os.path.exists("config.yaml"):
 61 |             create_sample_config()
 62 |             print("Created sample config.yaml file. Please edit it with your Telegram credentials.")
 63 |             return 1
 64 |             
 65 |         # Initialize using config.yaml
 66 |         print("Initializing ingestor from config.yaml...")
 67 |         ingestor = TelegramIngestor(logger_name="example.telegram")
 68 |         
 69 |         # Call the initialize method
 70 |         await ingestor.initialize()
 71 |         
 72 |         # Check health status
 73 |         health = ingestor.healthcheck()
 74 |         pretty_print_json(health, "Health Check")
 75 |         
 76 |         if not health["healthy"]:
 77 |             print("Ingestor is not healthy. Cannot proceed.")
 78 |             return 1
 79 |         
 80 |         # Example 1: Fetch recent data (last 7 days)
 81 |         print("\nFetching messages from the last 7 days...")
 82 |         since_date = datetime.now() - timedelta(days=7)
 83 |         recent_data = ingestor.fetch_new_data(since=since_date)
 84 |         
 85 |         print(f"Retrieved {len(recent_data['messages'])} messages from {len(recent_data['conversations'])} conversations")
 86 |         
 87 |         # Example 2: Fetch data for a specific date range
 88 |         print("\nFetching messages for a specific date range...")
 89 |         start_date = datetime.now() - timedelta(days=30)
 90 |         end_date = datetime.now() - timedelta(days=15)
 91 |         
 92 |         print(f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
 93 |         range_data = ingestor.fetch_data_in_range(start=start_date, end=end_date)
 94 |         
 95 |         print(f"Retrieved {len(range_data['messages'])} messages from {len(range_data['conversations'])} conversations")
 96 |         
 97 |         # Print sample message (if available)
 98 |         if recent_data["messages"]:
 99 |             sample_message = recent_data["messages"][0]
100 |             pretty_print_json(sample_message, "Sample Message")
101 |         
102 |         # Save data to JSON file
103 |         output_dir = "examples/data"
104 |         os.makedirs(output_dir, exist_ok=True)
105 |         
106 |         output_file = os.path.join(output_dir, "telegram_messages.json")
107 |         with open(output_file, 'w', encoding='utf-8') as f:
108 |             json.dump(recent_data, f, indent=2, ensure_ascii=False)
109 |             
110 |         print(f"\nSaved messages to {output_file}")
111 |         
112 |     except Exception as e:
113 |         print(f"Error: {e}")
114 |         return 1
115 |     
116 |     return 0
117 | 
118 | 
119 | def create_sample_config():
120 |     """Create a sample config.yaml file."""
121 |     config_content = """# ICI Framework Configuration
122 | 
123 | # Telegram Ingestor Configuration
124 | telegram:
125 |   # Get these values from https://my.telegram.org/apps
126 |   api_id: "YOUR_API_ID_HERE"
127 |   api_hash: "YOUR_API_HASH_HERE"
128 |   phone_number: "+12345678901"  # Your phone number with country code
129 |   
130 |   # Authentication options - use either session_file OR session_string
131 |   session_file: "examples/data/telegram_session"  # Option 1: Session file path
132 |   # session_string: "1BQANOTEuMTA4LjU..."  # Option 2: Session string
133 |   
134 |   request_delay: 1.0  # Seconds between API requests to avoid rate limiting
135 | """
136 |     with open("config.yaml", "w") as f:
137 |         f.write(config_content)
138 | 
139 | 
140 | def main():
141 |     """Run the async main function."""
142 |     return asyncio.run(main_async())
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     sys.exit(main()) 


--------------------------------------------------------------------------------
/examples/telegram_session_string.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Example script for generating and using Telegram session strings.
  4 | 
  5 | This script demonstrates how to:
  6 | 1. Generate a session string from existing session file or new authentication
  7 | 2. Connect to Telegram using a session string
  8 | 3. Use the session string with the TelegramIngestor
  9 | 
 10 | Usage:
 11 |     1. Run this script with --generate to create a session string
 12 |     2. Run this script with --use to demonstrate using a session string
 13 | 
 14 | Example:
 15 |     python examples/telegram_session_string.py --generate
 16 |     python examples/telegram_session_string.py --use "YOUR_SESSION_STRING"
 17 | """
 18 | 
 19 | import os
 20 | import sys
 21 | import json
 22 | import argparse
 23 | import asyncio
 24 | from datetime import datetime, timedelta
 25 | from typing import Optional
 26 | 
 27 | from telethon.sessions import StringSession
 28 | from telethon.sync import TelegramClient
 29 | 
 30 | from ici.adapters.ingestors.telegram import TelegramIngestor
 31 | from ici.adapters.loggers import StructuredLogger
 32 | 
 33 | 
 34 | # Setup CLI arguments
 35 | parser = argparse.ArgumentParser(description='Telegram Session String Example')
 36 | group = parser.add_mutually_exclusive_group(required=True)
 37 | group.add_argument('--generate', action='store_true', help='Generate a new session string')
 38 | group.add_argument('--use', type=str, metavar='SESSION_STRING', help='Use the provided session string')
 39 | 
 40 | 
 41 | async def generate_session_string() -> str:
 42 |     """
 43 |     Generate a session string using the Telethon client.
 44 |     
 45 |     Returns:
 46 |         str: The generated session string.
 47 |     """
 48 |     # Load Telegram credentials from environment or prompt the user
 49 |     api_id = os.environ.get("TELEGRAM_API_ID")
 50 |     api_hash = os.environ.get("TELEGRAM_API_HASH")
 51 |     session_file = os.environ.get("TELEGRAM_SESSION_FILE", "telegram_session")
 52 |     
 53 |     if not api_id:
 54 |         api_id = input("Enter your Telegram API ID: ")
 55 |     
 56 |     if not api_hash:
 57 |         api_hash = input("Enter your Telegram API hash: ")
 58 |     
 59 |     # Create a new Telegram client
 60 |     async with TelegramClient(session_file, api_id, api_hash) as client:
 61 |         # Generate the session string
 62 |         session_string = StringSession.save(client.session)
 63 |         print("\nYour session string has been generated:")
 64 |         print("-" * 50)
 65 |         print(session_string)
 66 |         print("-" * 50)
 67 |         print("\nStore this string securely as it provides access to your Telegram account.")
 68 |         
 69 |         return session_string
 70 | 
 71 | 
 72 | async def use_session_string(session_string: str) -> None:
 73 |     """
 74 |     Demonstrate using a session string with the TelegramIngestor.
 75 |     
 76 |     Args:
 77 |         session_string: The session string to use.
 78 |     """
 79 |     # Load Telegram credentials from environment or prompt the user
 80 |     api_id = os.environ.get("TELEGRAM_API_ID")
 81 |     api_hash = os.environ.get("TELEGRAM_API_HASH")
 82 |     phone = os.environ.get("TELEGRAM_PHONE")
 83 |     
 84 |     if not api_id:
 85 |         api_id = input("Enter your Telegram API ID: ")
 86 |     
 87 |     if not api_hash:
 88 |         api_hash = input("Enter your Telegram API hash: ")
 89 |     
 90 |     if not phone:
 91 |         phone = input("Enter your phone number (with country code, e.g., +12345678901): ")
 92 |     
 93 |     print("\nInitializing TelegramIngestor with session string...")
 94 |     ingestor = TelegramIngestor(logger_name="example.telegram")
 95 |     
 96 |     # Connect using the session string
 97 |     config = {
 98 |         "api_id": api_id,
 99 |         "api_hash": api_hash,
100 |         "phone_number": phone,
101 |         "session_string": session_string,
102 |         "request_delay": 1.0
103 |     }
104 |     
105 |     await ingestor._connect(config)
106 |     
107 |     # Check health status
108 |     health = ingestor.healthcheck()
109 |     print(f"Health check: {'Healthy' if health['healthy'] else 'Unhealthy'}")
110 |     
111 |     if not health["healthy"]:
112 |         print("Ingestor is not healthy. Cannot proceed.")
113 |         return
114 |     
115 |     # Fetch recent messages as a demonstration
116 |     print("\nFetching messages from the last 3 days...")
117 |     since_date = datetime.now() - timedelta(days=3)
118 |     recent_data = ingestor.fetch_new_data(since=since_date)
119 |     
120 |     print(f"Retrieved {len(recent_data['messages'])} messages from {len(recent_data['conversations'])} conversations")
121 |     
122 |     # Print a sample message
123 |     if recent_data["messages"]:
124 |         sample_message = recent_data["messages"][0]
125 |         print("\nSample message:")
126 |         print(f"From: {sample_message['conversation_name']}")
127 |         print(f"Date: {sample_message['date']}")
128 |         print(f"Message: {sample_message['text'][:100]}...")
129 | 
130 | 
131 | async def main_async() -> int:
132 |     """
133 |     Run the example script asynchronously.
134 |     
135 |     Returns:
136 |         int: The exit code.
137 |     """
138 |     args = parser.parse_args()
139 |     
140 |     try:
141 |         if args.generate:
142 |             await generate_session_string()
143 |         elif args.use:
144 |             await use_session_string(args.use)
145 |     
146 |     except Exception as e:
147 |         print(f"Error: {e}")
148 |         return 1
149 |     
150 |     return 0
151 | 
152 | 
153 | def main() -> int:
154 |     """
155 |     Run the async main function.
156 |     
157 |     Returns:
158 |         int: The exit code.
159 |     """
160 |     return asyncio.run(main_async())
161 | 
162 | 
163 | if __name__ == "__main__":
164 |     sys.exit(main()) 


--------------------------------------------------------------------------------
/examples/telegram_session_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Test script for Telegram session validation and generation.
  4 | 
  5 | This script demonstrates the use of the TelegramIngestor with its
  6 | built-in session validation and generation capabilities.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import asyncio
 12 | import argparse
 13 | 
 14 | # Add the project root to the path
 15 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 16 | 
 17 | from ici.adapters.ingestors.telegram import TelegramIngestor
 18 | from ici.adapters.loggers import StructuredLogger
 19 | 
 20 | 
 21 | async def main():
 22 |     """Test the TelegramIngestor with session handling."""
 23 |     # Parse command line arguments
 24 |     parser = argparse.ArgumentParser(description="Test Telegram session handling")
 25 |     parser.add_argument("--config", default="config.yaml", help="Path to config file")
 26 |     args = parser.parse_args()
 27 |     
 28 |     # Set environment variable for config path
 29 |     os.environ["ICI_CONFIG_PATH"] = args.config
 30 |     
 31 |     # Ensure config file exists
 32 |     if not os.path.exists(args.config):
 33 |         print(f"Config file not found: {args.config}")
 34 |         print(f"Please copy config.example.yaml to {args.config} and update with your credentials.")
 35 |         return
 36 |     
 37 |     # Create logger
 38 |     logger = StructuredLogger(name="telegram_test")
 39 |     logger.info({
 40 |         "action": "TEST_START",
 41 |         "message": "Starting Telegram session test"
 42 |     })
 43 |     
 44 |     # Create ingestor
 45 |     ingestor = TelegramIngestor(logger_name="telegram_test")
 46 |     
 47 |     try:
 48 |         # Initialize ingestor (this will validate or generate the session)
 49 |         await ingestor.initialize()
 50 |         
 51 |         # Run health check
 52 |         health_result = ingestor.healthcheck()
 53 |         
 54 |         # Check if health_result is a coroutine (async context)
 55 |         if asyncio.iscoroutine(health_result):
 56 |             logger.info({
 57 |                 "action": "HEALTH_CHECK_ASYNC",
 58 |                 "message": "Health check returned coroutine - awaiting result"
 59 |             })
 60 |             health = await health_result
 61 |         else:
 62 |             health = health_result
 63 |         
 64 |         if health["healthy"]:
 65 |             print(f"\n✅ Connection successful! Logged in as: {health['details'].get('name', 'Unknown')}")
 66 |             logger.info({
 67 |                 "action": "HEALTH_CHECK_SUCCESS",
 68 |                 "message": "Telegram connection is healthy",
 69 |                 "data": health
 70 |             })
 71 |         else:
 72 |             print(f"\n❌ Connection failed: {health['message']}")
 73 |             logger.error({
 74 |                 "action": "HEALTH_CHECK_FAILED",
 75 |                 "message": "Telegram connection is not healthy",
 76 |                 "data": health
 77 |             })
 78 |     
 79 |     except Exception as e:
 80 |         logger.error({
 81 |             "action": "TEST_ERROR",
 82 |             "message": f"Test failed: {str(e)}",
 83 |             "data": {
 84 |                 "exception": str(e),
 85 |                 "exception_type": type(e).__name__
 86 |             }
 87 |         })
 88 |         print(f"\n❌ Error: {str(e)}")
 89 |     
 90 |     finally:
 91 |         # Clean up
 92 |         if ingestor._client and ingestor._is_connected:
 93 |             await ingestor._disconnect()
 94 |         
 95 |         logger.info({
 96 |             "action": "TEST_COMPLETE",
 97 |             "message": "Telegram session test completed"
 98 |         })
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     asyncio.run(main()) 


--------------------------------------------------------------------------------
/ici/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Intelligent Consciousness Interface (ICI)
 3 | 
 4 | A modular framework for creating a personal AI assistant that is context-aware,
 5 | style-aware, personality-aware, and security-aware. The system processes data 
 6 | through an Ingestion Pipeline and responds to queries via a Query Pipeline,
 7 | leveraging vector databases for efficient retrieval.
 8 | """
 9 | 
10 | __version__ = "0.1.0"
11 | 
12 | # Import core interfaces and exceptions
13 | from ici.core import (
14 |     # Interfaces
15 |     Ingestor,
16 |     Preprocessor,
17 |     Embedder,
18 |     VectorStore,
19 |     Validator,
20 |     PromptBuilder,
21 |     Generator,
22 |     Orchestrator,
23 |     IngestionPipeline,
24 |     Logger,
25 |     # Exceptions
26 |     ICIError,
27 |     IngestionError,
28 |     IngestorError,
29 |     APIAuthenticationError,
30 |     APIRateLimitError,
31 |     DataFetchError,
32 |     PreprocessorError,
33 |     IngestionPipelineError,
34 |     QueryError,
35 |     ValidationError,
36 |     EmbeddingError,
37 |     VectorStoreError,
38 |     PromptBuilderError,
39 |     GenerationError,
40 |     OrchestratorError,
41 |     ConfigurationError,
42 |     LoggerError,
43 | )
44 | 
45 | # Import utilities
46 | from ici.utils import (
47 |     load_config,
48 |     get_component_config,
49 | )
50 | 
51 | # Export core interfaces and exceptions
52 | __all__ = [
53 |     # Interfaces
54 |     "Ingestor",
55 |     "Preprocessor",
56 |     "Embedder",
57 |     "VectorStore",
58 |     "Validator",
59 |     "PromptBuilder",
60 |     "Generator",
61 |     "Orchestrator",
62 |     "IngestionPipeline",
63 |     "Logger",
64 |     # Exceptions
65 |     "ICIError",
66 |     "IngestionError",
67 |     "IngestorError",
68 |     "APIAuthenticationError",
69 |     "APIRateLimitError",
70 |     "DataFetchError",
71 |     "PreprocessorError",
72 |     "IngestionPipelineError",
73 |     "QueryError",
74 |     "ValidationError",
75 |     "EmbeddingError",
76 |     "VectorStoreError",
77 |     "PromptBuilderError",
78 |     "GenerationError",
79 |     "OrchestratorError",
80 |     "ConfigurationError",
81 |     "LoggerError",
82 |     # Utilities
83 |     "load_config",
84 |     "get_component_config",
85 | ]
86 | 


--------------------------------------------------------------------------------
/ici/adapters/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Adapters module for the ICI framework.
 3 | 
 4 | This module contains concrete implementations of the interfaces defined in the core module.
 5 | """
 6 | 
 7 | # Import logger implementations
 8 | from ici.adapters.loggers import StructuredLogger
 9 | 
10 | # Import embedder implementations
11 | from ici.adapters.embedders import SentenceTransformerEmbedder
12 | 
13 | # Import preprocessor implementations
14 | from ici.adapters.preprocessors import TelegramPreprocessor
15 | 
16 | # Import chat history implementations
17 | from ici.adapters.chat import JSONChatHistoryManager
18 | 
19 | # Import user ID generator implementations
20 | from ici.adapters.user_id import DefaultUserIDGenerator
21 | 
22 | # Import orchestrator implementations
23 | from ici.adapters.orchestrators import DefaultOrchestrator
24 | 
25 | # Export all implementations
26 | __all__ = [
27 |     # Logger implementations
28 |     "StructuredLogger",
29 |     
30 |     # Embedder implementations
31 |     "SentenceTransformerEmbedder",
32 |     
33 |     # Preprocessor implementations
34 |     "TelegramPreprocessor",
35 |     
36 |     # Chat history implementations
37 |     "JSONChatHistoryManager",
38 |     
39 |     # User ID generator implementations
40 |     "DefaultUserIDGenerator",
41 |     
42 |     # Orchestrator implementations
43 |     "DefaultOrchestrator",
44 | ]
45 | 


--------------------------------------------------------------------------------
/ici/adapters/chat/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Chat history management implementations.
3 | """
4 | 
5 | from ici.adapters.chat.json_chat_history_manager import JSONChatHistoryManager
6 | 
7 | __all__ = [
8 |     "JSONChatHistoryManager",
9 | ] 


--------------------------------------------------------------------------------
/ici/adapters/controller/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Controller implementations for the ICI Framework.
 3 | 
 4 | This module contains adapter implementations for the Controller interface,
 5 | providing controller functionality for the ICI Framework.
 6 | """
 7 | 
 8 | # Import adapters
 9 | from .command_line import command_line_controller 
10 | 
11 | __all__ = ["command_line_controller"]


--------------------------------------------------------------------------------
/ici/adapters/embedders/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Embedder implementations for the ICI Framework.
 3 | 
 4 | This module contains adapter implementations for the Embedder interface,
 5 | providing embedding functionality for text data.
 6 | """
 7 | 
 8 | # Import adapters
 9 | from .sentence_transformer import SentenceTransformerEmbedder 
10 | 
11 | __all__ = ["SentenceTransformerEmbedder"]


--------------------------------------------------------------------------------
/ici/adapters/generators/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generator implementations for the ICI framework.
 3 | 
 4 | This package contains concrete implementations of the Generator interface
 5 | for producing responses using various language models.
 6 | """
 7 | 
 8 | from ici.adapters.generators.openai_generator import OpenAIGenerator
 9 | from ici.adapters.generators.langchain_generator import LangchainGenerator
10 | from ici.adapters.generators.factory import create_generator
11 | 
12 | __all__ = ["OpenAIGenerator", "LangchainGenerator", "create_generator"] 


--------------------------------------------------------------------------------
/ici/adapters/generators/factory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Factory for creating Generator implementations.
 3 | 
 4 | This module provides a factory function to create the appropriate
 5 | Generator implementation based on configuration.
 6 | """
 7 | 
 8 | from typing import Optional, Dict, Any
 9 | 
10 | from ici.core.interfaces.generator import Generator
11 | from ici.adapters.generators.openai_generator import OpenAIGenerator
12 | from ici.adapters.generators.langchain_generator import LangchainGenerator
13 | from ici.utils.config import get_component_config
14 | 
15 | 
16 | def create_generator(config_type: Optional[str] = None, logger_name: str = "generator") -> Generator:
17 |     """
18 |     Creates a Generator implementation based on configuration.
19 |     
20 |     Args:
21 |         config_type: Optional override for the generator type from config
22 |         logger_name: Name to use for the logger
23 |         
24 |     Returns:
25 |         Generator: An instance of a Generator implementation
26 |         
27 |     Raises:
28 |         ValueError: If the specified generator type is invalid
29 |     """
30 |     # Get generator configuration
31 |     generator_config = get_component_config("generator")
32 |     
33 |     # Determine generator type from config or parameter
34 |     generator_type = config_type or generator_config.get("type", "openai")
35 |     
36 |     # Create appropriate generator
37 |     if generator_type == "openai":
38 |         return OpenAIGenerator(logger_name=logger_name)
39 |     elif generator_type == "langchain":
40 |         return LangchainGenerator(logger_name=logger_name)
41 |     else:
42 |         raise ValueError(f"Invalid generator type: {generator_type}") 


--------------------------------------------------------------------------------
/ici/adapters/ingestors/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Ingestor implementations for various data sources.
3 | """ 
4 | 
5 | from ici.adapters.ingestors.telegram import TelegramIngestor
6 | from ici.adapters.ingestors.whatsapp import WhatsAppIngestor
7 | 
8 | __all__ = ["TelegramIngestor", "WhatsAppIngestor"] 


--------------------------------------------------------------------------------
/ici/adapters/loggers/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Logger implementations for the ICI framework.
 3 | 
 4 | This module provides the StructuredLogger implementation of the Logger interface.
 5 | """
 6 | 
 7 | from ici.adapters.loggers.structured_logger import StructuredLogger
 8 | 
 9 | __all__ = ["StructuredLogger"]
10 | 


--------------------------------------------------------------------------------
/ici/adapters/orchestrators/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Orchestrator module.
 3 | 
 4 | This package provides concrete implementations of the Orchestrator interface
 5 | for coordinating the processing of user queries from validation to response generation.
 6 | 
 7 | The DefaultOrchestrator uses the DefaultIngestionPipeline, supporting both
 8 | Telegram and WhatsApp data sources.
 9 | """
10 | 
11 | from ici.adapters.orchestrators.default_orchestrator import DefaultOrchestrator
12 | 
13 | __all__ = ["DefaultOrchestrator"] 


--------------------------------------------------------------------------------
/ici/adapters/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Pipeline implementation for ingesting data from various sources.
3 | """
4 | 
5 | from ici.adapters.pipelines.default import DefaultIngestionPipeline
6 | 
7 | __all__ = ["DefaultIngestionPipeline"]


--------------------------------------------------------------------------------
/ici/adapters/preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Preprocessor implementations for various data sources.
3 | """
4 | 
5 | from ici.adapters.preprocessors.telegram import TelegramPreprocessor
6 | from ici.adapters.preprocessors.whatsapp import WhatsAppPreprocessor
7 | 
8 | __all__ = ["TelegramPreprocessor", "WhatsAppPreprocessor"] 


--------------------------------------------------------------------------------
/ici/adapters/prompt_builders/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Prompt Builder module.
 3 | 
 4 | This package provides concrete implementations of the PromptBuilder interface
 5 | for building prompts for language models.
 6 | """
 7 | 
 8 | from ici.adapters.prompt_builders.basic_prompt_builder import BasicPromptBuilder
 9 | 
10 | __all__ = ["BasicPromptBuilder"] 


--------------------------------------------------------------------------------
/ici/adapters/user_id/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | User ID generator implementations.
3 | """
4 | 
5 | from ici.adapters.user_id.default_user_id_generator import DefaultUserIDGenerator
6 | 
7 | __all__ = [
8 |     "DefaultUserIDGenerator",
9 | ] 


--------------------------------------------------------------------------------
/ici/adapters/validators/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Validator implementations.
 3 | 
 4 | This package provides concrete implementations of the Validator interface
 5 | for validating user input against security rules.
 6 | """
 7 | 
 8 | from ici.adapters.validators.rule_based import RuleBasedValidator
 9 | 
10 | __all__ = ["RuleBasedValidator"] 


--------------------------------------------------------------------------------
/ici/adapters/vector_stores/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Vector Store implementations for the ICI framework.
 3 | 
 4 | This package contains implementations of the VectorStore interface
 5 | for different vector database technologies.
 6 | 
 7 | Available implementations:
 8 | - ChromaDBStore: Vector store implementation using ChromaDB
 9 | """
10 | 
11 | from ici.adapters.vector_stores.chroma import ChromaDBStore
12 | 
13 | __all__ = ["ChromaDBStore"] 


--------------------------------------------------------------------------------
/ici/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Core module for the Intelligent Consciousness Interface (ICI).
 3 | 
 4 | This module contains the core interfaces and exceptions that define the
 5 | architecture of the ICI framework, establishing the contract that all
 6 | implementations must follow.
 7 | """
 8 | 
 9 | # Import all interfaces
10 | from ici.core.interfaces import (
11 |     Ingestor,
12 |     Preprocessor,
13 |     Embedder,
14 |     VectorStore,
15 |     Validator,
16 |     PromptBuilder,
17 |     Generator,
18 |     Orchestrator,
19 |     IngestionPipeline,
20 |     Logger,
21 | )
22 | 
23 | # Import all exceptions
24 | from ici.core.exceptions import (
25 |     ICIError,
26 |     IngestionError,
27 |     IngestorError,
28 |     APIAuthenticationError,
29 |     APIRateLimitError,
30 |     DataFetchError,
31 |     PreprocessorError,
32 |     IngestionPipelineError,
33 |     QueryError,
34 |     ValidationError,
35 |     EmbeddingError,
36 |     VectorStoreError,
37 |     PromptBuilderError,
38 |     GenerationError,
39 |     OrchestratorError,
40 |     ConfigurationError,
41 |     LoggerError,
42 | )
43 | 
44 | # Export all interfaces and exceptions
45 | __all__ = [
46 |     # Interfaces
47 |     "Ingestor",
48 |     "Preprocessor",
49 |     "Embedder",
50 |     "VectorStore",
51 |     "Validator",
52 |     "PromptBuilder",
53 |     "Generator",
54 |     "Orchestrator",
55 |     "IngestionPipeline",
56 |     "Logger",
57 |     # Exceptions
58 |     "ICIError",
59 |     "IngestionError",
60 |     "IngestorError",
61 |     "APIAuthenticationError",
62 |     "APIRateLimitError",
63 |     "DataFetchError",
64 |     "PreprocessorError",
65 |     "IngestionPipelineError",
66 |     "QueryError",
67 |     "ValidationError",
68 |     "EmbeddingError",
69 |     "VectorStoreError",
70 |     "PromptBuilderError",
71 |     "GenerationError",
72 |     "OrchestratorError",
73 |     "ConfigurationError",
74 |     "LoggerError",
75 | ]
76 | 


--------------------------------------------------------------------------------
/ici/core/exceptions/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Exception hierarchy for the ICI framework.
  3 | 
  4 | This module defines the base exception types for all components, providing
  5 | a structured hierarchy for error handling and recovery.
  6 | """
  7 | 
  8 | 
  9 | class ICIError(Exception):
 10 |     """Base exception for all ICI-related errors."""
 11 | 
 12 |     pass
 13 | 
 14 | 
 15 | # Ingestion Pipeline Errors
 16 | 
 17 | 
 18 | class IngestionError(ICIError):
 19 |     """Base exception for all ingestion-related errors."""
 20 | 
 21 |     pass
 22 | 
 23 | 
 24 | class IngestorError(IngestionError):
 25 |     """Base exception for all ingestor-related errors."""
 26 | 
 27 |     pass
 28 | 
 29 | 
 30 | class APIAuthenticationError(IngestorError):
 31 |     """Raised when API authentication fails."""
 32 | 
 33 |     pass
 34 | 
 35 | 
 36 | class APIRateLimitError(IngestorError):
 37 |     """Raised when API rate limits are exceeded."""
 38 | 
 39 |     pass
 40 | 
 41 | 
 42 | class DataFetchError(IngestorError):
 43 |     """Raised when data fetching fails."""
 44 | 
 45 |     pass
 46 | 
 47 | 
 48 | class AuthenticationError(IngestorError):
 49 |     """Raised when authentication fails."""
 50 | 
 51 |     pass
 52 | 
 53 | 
 54 | class PreprocessorError(IngestionError):
 55 |     """Raised when preprocessing fails."""
 56 | 
 57 |     pass
 58 | 
 59 | 
 60 | class IngestionPipelineError(IngestionError):
 61 |     """Raised when the ingestion pipeline encounters an error."""
 62 | 
 63 |     pass
 64 | 
 65 | 
 66 | # Query Pipeline Errors
 67 | 
 68 | 
 69 | class QueryError(ICIError):
 70 |     """Base exception for all query-related errors."""
 71 | 
 72 |     pass
 73 | 
 74 | 
 75 | class ValidationError(QueryError):
 76 |     """Raised when input validation fails."""
 77 | 
 78 |     pass
 79 | 
 80 | 
 81 | class EmbeddingError(ICIError):
 82 |     """Raised when embedding generation fails."""
 83 | 
 84 |     pass
 85 | 
 86 | 
 87 | class VectorStoreError(ICIError):
 88 |     """Base exception for all vector store related errors."""
 89 | 
 90 |     pass
 91 | 
 92 | 
 93 | class PromptBuilderError(QueryError):
 94 |     """Raised when prompt construction fails."""
 95 | 
 96 |     pass
 97 | 
 98 | 
 99 | class GenerationError(QueryError):
100 |     """Raised when text generation fails."""
101 | 
102 |     pass
103 | 
104 | 
105 | class OrchestratorError(QueryError):
106 |     """Raised when the orchestrator encounters an error."""
107 | 
108 |     pass
109 | 
110 | 
111 | # Chat History Errors
112 | 
113 | 
114 | class ChatError(ICIError):
115 |     """Base exception for all chat-related errors."""
116 | 
117 |     pass
118 | 
119 | 
120 | class ChatHistoryError(ChatError):
121 |     """Raised when chat history operations fail."""
122 | 
123 |     pass
124 | 
125 | 
126 | class ChatIDError(ChatError):
127 |     """Raised when a chat ID is invalid or not found."""
128 | 
129 |     pass
130 | 
131 | 
132 | class ChatStorageError(ChatError):
133 |     """Raised when chat storage operations fail."""
134 | 
135 |     pass
136 | 
137 | 
138 | class UserIDError(ICIError):
139 |     """Raised when user ID generation or validation fails."""
140 | 
141 |     pass
142 | 
143 | 
144 | # Other Errors
145 | 
146 | 
147 | class ConfigurationError(ICIError):
148 |     """Raised when configuration is invalid."""
149 | 
150 |     pass
151 | 
152 | 
153 | class LoggerError(ICIError):
154 |     """Raised when logging encounters an error."""
155 | 
156 |     pass
157 | 
158 | 
159 | class ComponentLoadError(ICIError):
160 |     """Raised when a component cannot be loaded."""
161 | 
162 |     pass
163 | 
164 | 
165 | # Exporting all exception types
166 | __all__ = [
167 |     "ICIError",
168 |     "IngestionError",
169 |     "IngestorError",
170 |     "APIAuthenticationError",
171 |     "APIRateLimitError",
172 |     "DataFetchError",
173 |     "AuthenticationError",
174 |     "PreprocessorError",
175 |     "IngestionPipelineError",
176 |     "QueryError",
177 |     "ValidationError",
178 |     "EmbeddingError",
179 |     "VectorStoreError",
180 |     "PromptBuilderError",
181 |     "GenerationError",
182 |     "OrchestratorError",
183 |     "ChatError",
184 |     "ChatHistoryError",
185 |     "ChatIDError",
186 |     "ChatStorageError",
187 |     "UserIDError",
188 |     "ConfigurationError",
189 |     "LoggerError",
190 |     "ComponentLoadError"
191 | ]
192 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/__init__.py:
--------------------------------------------------------------------------------
 1 | from ici.core.interfaces.ingestor import Ingestor
 2 | from ici.core.interfaces.preprocessor import Preprocessor
 3 | from ici.core.interfaces.embedder import Embedder
 4 | from ici.core.interfaces.vector_store import VectorStore
 5 | from ici.core.interfaces.validator import Validator
 6 | from ici.core.interfaces.prompt_builder import PromptBuilder
 7 | from ici.core.interfaces.generator import Generator
 8 | from ici.core.interfaces.orchestrator import Orchestrator
 9 | from ici.core.interfaces.pipeline import IngestionPipeline
10 | from ici.core.interfaces.logger import Logger
11 | from ici.core.interfaces.chat_history_manager import ChatHistoryManager
12 | from ici.core.interfaces.user_id_generator import UserIDGenerator
13 | 
14 | __all__ = [
15 |     "Ingestor",
16 |     "Preprocessor",
17 |     "Embedder",
18 |     "VectorStore",
19 |     "Validator",
20 |     "PromptBuilder",
21 |     "Generator",
22 |     "Orchestrator",
23 |     "IngestionPipeline",
24 |     "Logger",
25 |     "ChatHistoryManager",
26 |     "UserIDGenerator",
27 | ]
28 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/embedder.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import List, Dict, Any, Optional, Tuple
  3 | 
  4 | 
  5 | class Embedder(ABC):
  6 |     """
  7 |     Interface for components that generate vector embeddings from text data.
  8 | 
  9 |     The Embedder is shared between Ingestion and Query Pipelines to ensure
 10 |     identical embedding logic, crucial for accurate similarity matching.
 11 |     """
 12 | 
 13 |     @abstractmethod
 14 |     async def initialize(self) -> None:
 15 |         """
 16 |         Initialize the embedder with configuration parameters.
 17 |         
 18 |         This method should be called after the embedder instance is created,
 19 |         before any other methods are used. Configuration should be loaded from
 20 |         a central configuration source (e.g., config.yaml).
 21 |         
 22 |         Returns:
 23 |             None
 24 |         
 25 |         Raises:
 26 |             Exception: If initialization fails for any reason.
 27 |         """
 28 |         pass
 29 | 
 30 |     @abstractmethod
 31 |     async def embed(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
 32 |         """
 33 |         Generates a vector embedding from the input text.
 34 | 
 35 |         Args:
 36 |             text: The text to embed
 37 | 
 38 |         Returns:
 39 |             List[float]: A fixed-length vector of floats representing the text embedding
 40 | 
 41 |         Raises:
 42 |             EmbeddingError: If embedding generation fails for any reason
 43 |         """
 44 |         pass
 45 | 
 46 |     @abstractmethod
 47 |     async def embed_batch(self, texts: List[str]) -> List[Tuple[List[float], Optional[Dict[str, Any]]]]:
 48 |         """
 49 |         Generates vector embeddings for multiple texts.
 50 | 
 51 |         This method should optimize batch processing for efficiency when embedding
 52 |         multiple texts at once.
 53 | 
 54 |         Args:
 55 |             texts: List of texts to embed
 56 | 
 57 |         Returns:
 58 |             List[List[float]]: A list of fixed-length vectors, one for each input text
 59 | 
 60 |         Raises:
 61 |             EmbeddingError: If batch embedding generation fails for any reason
 62 |         """
 63 |         pass
 64 | 
 65 |     @property
 66 |     @abstractmethod
 67 |     def dimensions(self) -> int:
 68 |         """
 69 |         Returns the dimensionality of the embeddings produced by this embedder.
 70 | 
 71 |         Returns:
 72 |             int: The number of dimensions in the embedding vectors
 73 |         """
 74 |         pass
 75 | 
 76 |     @abstractmethod
 77 |     def healthcheck(self) -> Dict[str, Any]:
 78 |         """
 79 |         Checks if the embedder is properly configured and functioning.
 80 | 
 81 |         Returns:
 82 |             Dict[str, Any]: A dictionary containing health status information:
 83 |                 {
 84 |                     'healthy': bool,  # Whether the embedder is functioning properly
 85 |                     'message': str,   # Optional message providing more details
 86 |                     'details': dict   # Optional additional details about the health check
 87 |                 }
 88 | 
 89 |         Raises:
 90 |             EmbeddingError: If the health check itself encounters an error
 91 |         """
 92 |         pass
 93 | 
 94 |     def arguments(self) -> Dict[str, Any]:
 95 |         """
 96 |         Get the arguments used to initialize this embedder.
 97 |         
 98 |         Returns:
 99 |             Dict[str, Any]: Dictionary of initialization arguments
100 |         """
101 |         return {}
102 | 
103 |         
104 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/generator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Dict, Any, Optional
 3 | 
 4 | 
 5 | class Generator(ABC):
 6 |     """
 7 |     Interface for components that produce responses using language models.
 8 | 
 9 |     The Generator abstracts the language model implementation, supporting multiple
10 |     providers including OpenAI, xAI, Anthropic, and local models, with configurable parameters.
11 |     """
12 | 
13 |     @abstractmethod
14 |     async def initialize(self) -> None:
15 |         """
16 |         Initialize the generator with configuration parameters.
17 |         
18 |         This method should be called after the generator instance is created,
19 |         before any other methods are used. Configuration should be loaded from
20 |         a central configuration source (e.g., config.yaml).
21 |         
22 |         Returns:
23 |             None
24 |         
25 |         Raises:
26 |             Exception: If initialization fails for any reason.
27 |         """
28 |         pass
29 | 
30 |     @abstractmethod
31 |     async def generate(
32 |         self, prompt: str, generation_options: Optional[Dict[str, Any]] = None
33 |     ) -> str:
34 |         """
35 |         Generates an output based on the provided prompt.
36 | 
37 |         Generation options can include parameters like:
38 |         - temperature: Controls randomness (0.0-2.0)
39 |         - max_tokens: Limits response length
40 |         - top_p: Controls diversity via nucleus sampling
41 |         - frequency_penalty: Reduces word repetition
42 |         - presence_penalty: Reduces topic repetition
43 | 
44 |         Args:
45 |             prompt: The input prompt for the language model
46 |             generation_options: Optional parameters to override defaults
47 | 
48 |         Returns:
49 |             str: The generated text response
50 | 
51 |         Raises:
52 |             GenerationError: If text generation fails for any reason
53 |         """
54 |         pass
55 | 
56 |     @abstractmethod
57 |     async def set_model(self, model: str) -> None:
58 |         """
59 |         Sets the specific model to use for generation.
60 | 
61 |         Args:
62 |             model: The model identifier (e.g., 'gpt-4', 'claude-2', 'mistral-7b')
63 | 
64 |         Raises:
65 |             GenerationError: If the model is invalid or unavailable
66 |         """
67 |         pass
68 | 
69 |     @abstractmethod
70 |     async def set_default_options(self, options: Dict[str, Any]) -> None:
71 |         """
72 |         Sets default options for all generation requests.
73 | 
74 |         Args:
75 |             options: Dictionary of default generation parameters
76 | 
77 |         Raises:
78 |             GenerationError: If any option is invalid
79 |         """
80 |         pass
81 | 
82 |     @abstractmethod
83 |     async def healthcheck(self) -> Dict[str, Any]:
84 |         """
85 |         Checks if the generator is properly configured and can connect to the language model.
86 | 
87 |         Returns:
88 |             Dict[str, Any]: A dictionary containing health status information:
89 |                 {
90 |                     'healthy': bool,  # Whether the generator is functioning properly
91 |                     'message': str,   # Optional message providing more details
92 |                     'details': dict   # Optional additional details about the health check
93 |                 }
94 | 
95 |         Raises:
96 |             GenerationError: If the health check itself encounters an error
97 |         """
98 |         pass
99 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/ingestor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, Optional, Dict
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | class Ingestor(ABC):
 7 |     """
 8 |     Interface for components that fetch raw data from external sources.
 9 | 
10 |     Each Ingestor is designed for a specific data source, handling authentication
11 |     and API-specific logic. Ingestors should be stateless, with state information
12 |     maintained externally in a dedicated state storage.
13 |     """
14 | 
15 |     @abstractmethod
16 |     async def initialize(self) -> None:
17 |         """
18 |         Initialize the ingestor with configuration parameters.
19 |         
20 |         This method should be called after the ingestor instance is created,
21 |         before any other methods are used. Configuration should be loaded from
22 |         a central configuration source (e.g., config.yaml).
23 |         
24 |         Returns:
25 |             None
26 |         
27 |         Raises:
28 |             Exception: If initialization fails for any reason.
29 |         """
30 |         pass
31 | 
32 |     @abstractmethod
33 |     async def fetch_full_data(self) -> Any:
34 |         """
35 |         Fetches all available data for initial ingestion.
36 | 
37 |         Returns:
38 |             Any: Raw data in a source-native format for the Preprocessor to handle.
39 | 
40 |         Raises:
41 |             IngestorError: If data fetching fails for any reason.
42 |         """
43 |         pass
44 | 
45 |     @abstractmethod
46 |     async def fetch_new_data(self, since: Optional[datetime] = None) -> Any:
47 |         """
48 |         Fetches new data since the given timestamp.
49 | 
50 |         This method enables incremental ingestion by retrieving only data newer
51 |         than the specified timestamp.
52 | 
53 |         Args:
54 |             since: Optional timestamp to fetch data from. If None, should use
55 |                   a reasonable default (e.g., last hour or day).
56 | 
57 |         Returns:
58 |             Any: Raw data in a source-native format for the Preprocessor to handle.
59 | 
60 |         Raises:
61 |             IngestorError: If data fetching fails for any reason.
62 |         """
63 |         pass
64 | 
65 |     @abstractmethod
66 |     async def fetch_data_in_range(self, start: datetime, end: datetime) -> Any:
67 |         """
68 |         Fetches data within a specified date range.
69 | 
70 |         Args:
71 |             start: Start timestamp for data range.
72 |             end: End timestamp for data range.
73 | 
74 |         Returns:
75 |             Any: Raw data in a source-native format for the Preprocessor to handle.
76 | 
77 |         Raises:
78 |             IngestorError: If data fetching fails for any reason.
79 |         """
80 |         pass
81 | 
82 |     @abstractmethod
83 |     async def healthcheck(self) -> Dict[str, Any]:
84 |         """
85 |         Checks if the ingestor is properly configured and can connect to its data source.
86 | 
87 |         Returns:
88 |             Dict[str, Any]: A dictionary containing health status information:
89 |                 {
90 |                     'healthy': bool,  # Whether the ingestor is functioning properly
91 |                     'message': str,   # Optional message providing more details
92 |                     'details': dict   # Optional additional details about the health check
93 |                 }
94 | 
95 |         Raises:
96 |             IngestorError: If the health check itself encounters an error.
97 |         """
98 |         pass
99 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/logger.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Any, Dict, Optional
  3 | 
  4 | 
  5 | class Logger(ABC):
  6 |     """
  7 |     Interface for logging functionality in the ICI framework.
  8 | 
  9 |     Provides standard methods for logging at different severity levels with a structured format.
 10 |     Each log entry should contain: action name, message, and optional data dictionary.
 11 |     """
 12 | 
 13 |     @abstractmethod
 14 |     def initialize(self) -> None:
 15 |         """
 16 |         Initialize the logger with configuration parameters.
 17 |         
 18 |         This method should be called after the logger instance is created,
 19 |         before any other methods are used. Configuration should be loaded from
 20 |         a central configuration source (e.g., config.yaml).
 21 |         
 22 |         Returns:
 23 |             None
 24 |         
 25 |         Raises:
 26 |             Exception: If initialization fails for any reason.
 27 |         """
 28 |         pass
 29 | 
 30 |     @abstractmethod
 31 |     def debug(self, log_data: Dict[str, Any]) -> None:
 32 |         """
 33 |         Log a debug message with structured data.
 34 | 
 35 |         Args:
 36 |             log_data: A dictionary with the structure:
 37 |                 {
 38 |                     "action": "ACTION_NAME",     # The action or event being logged
 39 |                     "message": "MESSAGE TEXT",   # The log message
 40 |                     "data": {}                   # Optional additional data as a dictionary
 41 |                 }
 42 |         """
 43 |         pass
 44 | 
 45 |     @abstractmethod
 46 |     def info(self, log_data: Dict[str, Any]) -> None:
 47 |         """
 48 |         Log an info message with structured data.
 49 | 
 50 |         Args:
 51 |             log_data: A dictionary with the structure:
 52 |                 {
 53 |                     "action": "ACTION_NAME",     # The action or event being logged
 54 |                     "message": "MESSAGE TEXT",   # The log message
 55 |                     "data": {}                   # Optional additional data as a dictionary
 56 |                 }
 57 |         """
 58 |         pass
 59 | 
 60 |     @abstractmethod
 61 |     def warning(self, log_data: Dict[str, Any]) -> None:
 62 |         """
 63 |         Log a warning message with structured data.
 64 | 
 65 |         Args:
 66 |             log_data: A dictionary with the structure:
 67 |                 {
 68 |                     "action": "ACTION_NAME",     # The action or event being logged
 69 |                     "message": "MESSAGE TEXT",   # The log message
 70 |                     "data": {}                   # Optional additional data as a dictionary
 71 |                 }
 72 |         """
 73 |         pass
 74 | 
 75 |     @abstractmethod
 76 |     def error(self, log_data: Dict[str, Any]) -> None:
 77 |         """
 78 |         Log an error message with structured data.
 79 | 
 80 |         Args:
 81 |             log_data: A dictionary with the structure:
 82 |                 {
 83 |                     "action": "ACTION_NAME",     # The action or event being logged
 84 |                     "message": "MESSAGE TEXT",   # The log message
 85 |                     "data": {},                  # Optional additional data as a dictionary
 86 |                     "exception": Exception       # Optional exception object
 87 |                 }
 88 |         """
 89 |         pass
 90 | 
 91 |     @abstractmethod
 92 |     def critical(self, log_data: Dict[str, Any]) -> None:
 93 |         """
 94 |         Log a critical message with structured data.
 95 | 
 96 |         Args:
 97 |             log_data: A dictionary with the structure:
 98 |                 {
 99 |                     "action": "ACTION_NAME",     # The action or event being logged
100 |                     "message": "MESSAGE TEXT",   # The log message
101 |                     "data": {},                  # Optional additional data as a dictionary
102 |                     "exception": Exception       # Optional exception object
103 |                 }
104 |         """
105 |         pass
106 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/orchestrator.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Dict, Any, List, Optional
  3 | 
  4 | 
  5 | class Orchestrator(ABC):
  6 |     """
  7 |     Interface for components that manage the query pipeline, coordinating components
  8 |     from validation to response generation.
  9 | 
 10 |     The Orchestrator centralizes query handling and rule/context management, ensuring a
 11 |     consistent workflow while delegating tasks to specialized components.
 12 |     """
 13 | 
 14 |     @abstractmethod
 15 |     async def initialize(self) -> None:
 16 |         """
 17 |         Initialize the orchestrator with configuration parameters.
 18 |         
 19 |         This method should be called after the orchestrator instance is created,
 20 |         before any other methods are used. Configuration should be loaded from
 21 |         a central configuration source (e.g., config.yaml).
 22 |         
 23 |         Returns:
 24 |             None
 25 |         
 26 |         Raises:
 27 |             Exception: If initialization fails for any reason.
 28 |         """
 29 |         pass
 30 | 
 31 |     @abstractmethod
 32 |     async def process_query(self, source: str, user_id: str, query: str, additional_info: Dict[str, Any]) -> str:
 33 |         """
 34 |         Manages query processing from validation to generation.
 35 | 
 36 |         Workflow:
 37 |         1. Retrieves validation rules dynamically based on user_id
 38 |         2. Builds context for validation based on user_id and runtime data
 39 |         3. Validates input with validator
 40 |         4. If validation fails, returns appropriate error message
 41 |         5. Generates query embedding
 42 |         6. Retrieves relevant documents with user-specific filters
 43 |         7. Constructs prompt with prompt_builder
 44 |         8. Generates response with generator
 45 |         9. Returns final output or error message
 46 | 
 47 |         Args:
 48 |             source: The source of the query
 49 |             user_id: Identifier for the user making the request
 50 |             query: The user input/question to process
 51 |             additional_info: Dictionary containing additional attributes and values
 52 | 
 53 |         Returns:
 54 |             str: The final response to the user
 55 | 
 56 |         Raises:
 57 |             OrchestratorError: If the orchestration process fails
 58 |         """
 59 |         pass
 60 | 
 61 |     @abstractmethod
 62 |     async def configure(self, config: Dict[str, Any]) -> None:
 63 |         """
 64 |         Configures the orchestrator with the provided settings.
 65 | 
 66 |         Configuration can include:
 67 |         - num_results: Number of documents to retrieve
 68 |         - rules_source: Where to fetch validation rules from
 69 |         - context_filters: Metadata filters to apply
 70 |         - error_messages: Custom error messages
 71 |         - retry: Retry configuration
 72 | 
 73 |         Args:
 74 |             config: Dictionary containing configuration options
 75 | 
 76 |         Raises:
 77 |             OrchestratorError: If configuration is invalid
 78 |         """
 79 |         pass
 80 | 
 81 |     @abstractmethod
 82 |     def get_rules(self, user_id: str) -> List[Dict[str, Any]]:
 83 |         """
 84 |         Retrieves validation rules for the specified user.
 85 | 
 86 |         Retrieves rules from the configured rules source (database, config files).
 87 | 
 88 |         Args:
 89 |             user_id: Identifier for the user
 90 | 
 91 |         Returns:
 92 |             List[Dict[str, Any]]: List of validation rule dictionaries
 93 | 
 94 |         Raises:
 95 |             OrchestratorError: If rules cannot be retrieved
 96 |         """
 97 |         pass
 98 | 
 99 |     @abstractmethod
100 |     async def build_context(self, user_id: str) -> Dict[str, Any]:
101 |         """
102 |         Builds validation context for the specified user.
103 | 
104 |         Assembles context data including user information, current time,
105 |         and other relevant runtime data needed for validation.
106 | 
107 |         Args:
108 |             user_id: Identifier for the user
109 | 
110 |         Returns:
111 |             Dict[str, Any]: Context dictionary for validation
112 | 
113 |         Raises:
114 |             OrchestratorError: If context cannot be built
115 |         """
116 |         pass
117 | 
118 |     @abstractmethod
119 |     async def healthcheck(self) -> Dict[str, Any]:
120 |         """
121 |         Checks if the orchestrator and all its components are properly configured and functioning.
122 | 
123 |         Returns:
124 |             Dict[str, Any]: A dictionary containing health status information:
125 |                 {
126 |                     'healthy': bool,  # Whether the orchestrator is functioning properly
127 |                     'message': str,   # Optional message providing more details
128 |                     'details': dict,  # Optional additional details about the health check
129 |                     'components': {   # Health status of individual components
130 |                         'validator': {...},
131 |                         'embedder': {...},
132 |                         'vector_store': {...},
133 |                         'prompt_builder': {...},
134 |                         'generator': {...}
135 |                     }
136 |                 }
137 | 
138 |         Raises:
139 |             OrchestratorError: If the health check itself encounters an error
140 |         """
141 |         pass
142 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/preprocessor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, List, Dict
 3 | 
 4 | 
 5 | class Preprocessor(ABC):
 6 |     """
 7 |     Interface for components that transform raw, source-specific data into a standardized format.
 8 | 
 9 |     Each Preprocessor is typically paired with a specific Ingestor to handle its unique data
10 |     structure. It transforms raw data into a consistent document format for downstream processing.
11 |     """
12 | 
13 |     @abstractmethod
14 |     async def initialize(self) -> None:
15 |         """
16 |         Initialize the preprocessor with configuration parameters.
17 |         
18 |         This method should be called after the preprocessor instance is created,
19 |         before any other methods are used. Configuration should be loaded from
20 |         a central configuration source (e.g., config.yaml).
21 |         
22 |         Returns:
23 |             None
24 |         
25 |         Raises:
26 |             Exception: If initialization fails for any reason.
27 |         """
28 |         pass
29 | 
30 |     @abstractmethod
31 |     async def preprocess(self, raw_data: Any) -> List[Dict[str, Any]]:
32 |         """
33 |         Transforms raw data into a list of standardized documents.
34 | 
35 |         The standardized document format should include at minimum:
36 |         - 'text': str - The primary content to be embedded
37 |         - 'metadata': Dict[str, Any] - Contextual data about the document
38 | 
39 |         Args:
40 |             raw_data: Source-specific data from an Ingestor
41 | 
42 |         Returns:
43 |             List[Dict[str, Any]]: A list of standardized documents, each with 'text' and 'metadata' fields.
44 | 
45 |         Raises:
46 |             PreprocessorError: If preprocessing fails for any reason.
47 |         """
48 |         pass
49 | 
50 |     @abstractmethod
51 |     def healthcheck(self) -> Dict[str, Any]:
52 |         """
53 |         Checks if the preprocessor is properly configured and functioning.
54 | 
55 |         Returns:
56 |             Dict[str, Any]: A dictionary containing health status information:
57 |                 {
58 |                     'healthy': bool,  # Whether the preprocessor is functioning properly
59 |                     'message': str,   # Optional message providing more details
60 |                     'details': dict   # Optional additional details about the health check
61 |                 }
62 | 
63 |         Raises:
64 |             PreprocessorError: If the health check itself encounters an error.
65 |         """
66 |         pass
67 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/prompt_builder.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import List, Dict, Any, Optional
  3 | 
  4 | 
  5 | class PromptBuilder(ABC):
  6 |     """
  7 |     Interface for components that construct prompts for language models by integrating
  8 |     user input with retrieved documents.
  9 | 
 10 |     The PromptBuilder combines user input with relevant context to create effective prompts,
 11 |     handling edge cases and providing fallback mechanisms.
 12 |     """
 13 | 
 14 |     @abstractmethod
 15 |     async def initialize(self) -> None:
 16 |         """
 17 |         Initialize the prompt builder with configuration parameters.
 18 |         
 19 |         This method should be called after the prompt builder instance is created,
 20 |         before any other methods are used. Configuration should be loaded from
 21 |         a central configuration source (e.g., config.yaml).
 22 |         
 23 |         Returns:
 24 |             None
 25 |         
 26 |         Raises:
 27 |             Exception: If initialization fails for any reason.
 28 |         """
 29 |         pass
 30 | 
 31 |     @abstractmethod
 32 |     async def build_prompt(
 33 |         self,
 34 |         input: str,
 35 |         documents: List[Dict[str, Any]],
 36 |         max_context_length: Optional[int] = None,
 37 |     ) -> str:
 38 |         """
 39 |         Constructs a prompt from the input and retrieved documents.
 40 | 
 41 |         Handles edge cases through specific fallback mechanisms:
 42 |         - No documents: Uses a fallback template
 43 |         - Empty or invalid input: Returns standardized error prompt
 44 |         - Excessive content: Implements truncation strategies to fit model context windows
 45 | 
 46 |         Args:
 47 |             input: The user input/question
 48 |             documents: List of relevant documents from the vector store
 49 |             max_context_length: Optional maximum length for context section
 50 | 
 51 |         Returns:
 52 |             str: Complete prompt for the language model
 53 | 
 54 |         Raises:
 55 |             PromptBuilderError: If prompt construction fails for any reason
 56 |         """
 57 |         pass
 58 | 
 59 |     @abstractmethod
 60 |     async def set_template(self, template: str) -> None:
 61 |         """
 62 |         Sets a custom template for the prompt builder.
 63 | 
 64 |         The template should include placeholders for context and question:
 65 |         "Context:\n{context}\n\nQuestion: {question}"
 66 | 
 67 |         Args:
 68 |             template: The template string with {context} and {question} placeholders
 69 | 
 70 |         Raises:
 71 |             PromptBuilderError: If the template is invalid
 72 |         """
 73 |         pass
 74 | 
 75 |     @abstractmethod
 76 |     def set_fallback_template(self, template: str) -> None:
 77 |         """
 78 |         Sets a custom fallback template for when no documents are available.
 79 | 
 80 |         The template should include a placeholder for the question:
 81 |         "Answer based on general knowledge: {question}"
 82 | 
 83 |         Args:
 84 |             template: The fallback template string with {question} placeholder
 85 | 
 86 |         Raises:
 87 |             PromptBuilderError: If the template is invalid
 88 |         """
 89 |         pass
 90 | 
 91 |     @abstractmethod
 92 |     async def healthcheck(self) -> Dict[str, Any]:
 93 |         """
 94 |         Checks if the prompt builder is properly configured and functioning.
 95 | 
 96 |         Returns:
 97 |             Dict[str, Any]: A dictionary containing health status information:
 98 |                 {
 99 |                     'healthy': bool,  # Whether the prompt builder is functioning properly
100 |                     'message': str,   # Optional message providing more details
101 |                     'details': dict   # Optional additional details about the health check
102 |                 }
103 | 
104 |         Raises:
105 |             PromptBuilderError: If the health check itself encounters an error
106 |         """
107 |         pass
108 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/user_id_generator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Optional, Dict, Any
 3 | 
 4 | 
 5 | class UserIDGenerator(ABC):
 6 |     """
 7 |     Interface for components that generate and validate user IDs.
 8 |     
 9 |     The UserIDGenerator is responsible for creating consistent user identifiers
10 |     across different sources/connectors, following a standardized format.
11 |     """
12 | 
13 |     @abstractmethod
14 |     async def initialize(self) -> None:
15 |         """
16 |         Initialize the user ID generator with configuration parameters.
17 |         
18 |         This method should be called after the generator instance is created,
19 |         before any other methods are used. Configuration should be loaded from
20 |         a central configuration source (e.g., config.yaml).
21 |         
22 |         Returns:
23 |             None
24 |         
25 |         Raises:
26 |             UserIDError: If initialization fails for any reason.
27 |         """
28 |         pass
29 | 
30 |     @abstractmethod
31 |     async def generate_id(self, source: str, identifier: Optional[str] = None) -> str:
32 |         """
33 |         Generates a unique user ID based on source and identifier.
34 |         
35 |         Args:
36 |             source: The connector/source type (e.g., 'cli', 'telegram', 'web')
37 |             identifier: A unique identifier within that source. If None,
38 |                         an appropriate identifier will be generated.
39 |             
40 |         Returns:
41 |             str: A unique composite user ID in the format "{source}:{identifier}"
42 |         
43 |         Raises:
44 |             UserIDError: If ID generation fails or parameters are invalid
45 |         """
46 |         pass
47 | 
48 |     @abstractmethod
49 |     async def validate_id(self, user_id: str) -> bool:
50 |         """
51 |         Validates a user ID format.
52 |         
53 |         Args:
54 |             user_id: The user ID to validate
55 |             
56 |         Returns:
57 |             bool: True if valid, False otherwise
58 |         """
59 |         pass
60 | 
61 |     @abstractmethod
62 |     async def parse_id(self, user_id: str) -> Dict[str, str]:
63 |         """
64 |         Parses a user ID into its component parts.
65 |         
66 |         Args:
67 |             user_id: The user ID to parse
68 |             
69 |         Returns:
70 |             Dict[str, str]: A dictionary containing the 'source' and 'identifier'
71 |         
72 |         Raises:
73 |             UserIDError: If the ID format is invalid and cannot be parsed
74 |         """
75 |         pass
76 | 
77 |     @abstractmethod
78 |     async def healthcheck(self) -> Dict[str, Any]:
79 |         """
80 |         Checks if the user ID generator is properly configured and functioning.
81 |         
82 |         Returns:
83 |             Dict[str, Any]: A dictionary containing health status information:
84 |                 {
85 |                     'healthy': bool,  # Whether the generator is functioning properly
86 |                     'message': str,   # Optional message providing more details
87 |                     'details': dict   # Optional additional details about the health check
88 |                 }
89 |                 
90 |         Raises:
91 |             UserIDError: If the health check itself encounters an error
92 |         """
93 |         pass 


--------------------------------------------------------------------------------
/ici/core/interfaces/validator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Dict, Any, List, Optional
 3 | 
 4 | 
 5 | class Validator(ABC):
 6 |     """
 7 |     Interface for components that ensure user input adheres to security and compliance rules.
 8 | 
 9 |     The Validator enforces security constraints on user input, providing a critical security
10 |     layer before any query processing occurs.
11 |     """
12 | 
13 |     @abstractmethod
14 |     async def initialize(self) -> None:
15 |         """
16 |         Initialize the validator with configuration parameters.
17 |         
18 |         This method should be called after the validator instance is created,
19 |         before any other methods are used. Configuration should be loaded from
20 |         a central configuration source (e.g., config.yaml).
21 |         
22 |         Returns:
23 |             None
24 |         
25 |         Raises:
26 |             Exception: If initialization fails for any reason.
27 |         """
28 |         pass
29 | 
30 |     @abstractmethod
31 |     async def validate(
32 |         self,
33 |         input: str,
34 |         context: Dict[str, Any],
35 |         rules: List[Dict[str, Any]],
36 |         failure_reasons: Optional[List[str]] = None,
37 |     ) -> bool:
38 |         """
39 |         Validates the input based on provided rules and context.
40 | 
41 |         Rules are dynamically supplied as structured dictionaries for maximum flexibility:
42 |         - Keyword filtering: {'type': 'keyword', 'forbidden': ['delete', 'drop']}
43 |         - Time restrictions: {'type': 'time', 'allowed_hours': [8, 18]}
44 |         - User permissions: {'type': 'permission', 'required_level': 'admin'}
45 |         - Content length: {'type': 'length', 'max': 1000, 'min': 5}
46 |         - Pattern matching: {'type': 'regex', 'pattern': '^[a-zA-Z0-9\\s]+$'}
47 | 
48 |         Args:
49 |             input: The user input to validate
50 |             context: Runtime data for rule evaluation (e.g., user_id, timestamp)
51 |             rules: List of validation rule dictionaries
52 |             failure_reasons: Optional list to populate with reasons for validation failure
53 | 
54 |         Returns:
55 |             bool: True if input passes all rules, False otherwise
56 | 
57 |         Raises:
58 |             ValidationError: If the validation process itself fails
59 |         """
60 |         pass
61 | 
62 |     @abstractmethod
63 |     async def healthcheck(self) -> Dict[str, Any]:
64 |         """
65 |         Checks if the validator is properly configured and functioning.
66 | 
67 |         Returns:
68 |             Dict[str, Any]: A dictionary containing health status information:
69 |                 {
70 |                     'healthy': bool,  # Whether the validator is functioning properly
71 |                     'message': str,   # Optional message providing more details
72 |                     'details': dict   # Optional additional details about the health check
73 |                 }
74 | 
75 |         Raises:
76 |             ValidationError: If the health check itself encounters an error
77 |         """
78 |         pass
79 | 


--------------------------------------------------------------------------------
/ici/core/interfaces/vector_store.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import List, Dict, Any, Optional
  3 | 
  4 | 
  5 | class VectorStore(ABC):
  6 |     """
  7 |     Interface for components that store processed documents with embeddings and
  8 |     retrieve relevant data based on vector similarity.
  9 | 
 10 |     The VectorStore abstracts the underlying storage technology, allowing flexibility
 11 |     in scaling from local to distributed systems while supporting advanced metadata filtering.
 12 |     """
 13 | 
 14 |     @abstractmethod
 15 |     async def initialize(self) -> None:
 16 |         """
 17 |         Initialize the vector store with configuration parameters.
 18 |         
 19 |         This method should be called after the vector store instance is created,
 20 |         before any other methods are used. Configuration should be loaded from
 21 |         a central configuration source (e.g., config.yaml).
 22 |         
 23 |         Returns:
 24 |             None
 25 |         
 26 |         Raises:
 27 |             Exception: If initialization fails for any reason.
 28 |         """
 29 |         pass
 30 | 
 31 |     @abstractmethod
 32 |     def store_documents(self, documents: List[Dict[str, Any]]) -> None:
 33 |         """
 34 |         Stores documents with their vectors, text, and metadata.
 35 | 
 36 |         Input documents should have the following structure:
 37 |         - 'vector': List[float] - Embedding vector
 38 |         - 'text': str - Original text content
 39 |         - 'metadata': Dict[str, Any] - Contextual data (e.g., source, timestamp)
 40 | 
 41 |         Args:
 42 |             documents: List of documents to store
 43 | 
 44 |         Raises:
 45 |             VectorStoreError: If document storage fails for any reason
 46 |         """
 47 |         pass
 48 | 
 49 |     @abstractmethod
 50 |     def search(
 51 |         self,
 52 |         query_vector: List[float],
 53 |         num_results: int = 5,
 54 |         filters: Optional[Dict[str, Any]] = None,
 55 |     ) -> List[Dict[str, Any]]:
 56 |         """
 57 |         Retrieves the most similar documents based on the query vector.
 58 | 
 59 |         Supports advanced metadata filtering with comparison operators:
 60 |         - Equality: {'source': 'Twitter'}
 61 |         - Greater than/less than: {'timestamp': {'gte': 1698777600}}
 62 |         - Array containment: {'tags': {'in': ['important', 'urgent']}}
 63 |         - Logical combinations: {'$and': [{'source': 'Twitter'}, {'timestamp': {'gte': 1698777600}}]}
 64 | 
 65 |         Args:
 66 |             query_vector: The vector to search for
 67 |             num_results: Number of results to return
 68 |             filters: Optional metadata filters to apply during search
 69 | 
 70 |         Returns:
 71 |             List[Dict[str, Any]]: List of documents, each containing:
 72 |                 - 'text': Original text content
 73 |                 - 'metadata': Original metadata
 74 |                 - 'score': Similarity score (higher is more similar)
 75 | 
 76 |         Raises:
 77 |             VectorStoreError: If the search operation fails for any reason
 78 |         """
 79 |         pass
 80 | 
 81 |     @abstractmethod
 82 |     def delete(
 83 |         self,
 84 |         document_ids: Optional[List[str]] = None,
 85 |         filters: Optional[Dict[str, Any]] = None,
 86 |     ) -> int:
 87 |         """
 88 |         Deletes documents from the vector store by ID or filter.
 89 | 
 90 |         Args:
 91 |             document_ids: Optional list of document IDs to delete
 92 |             filters: Optional metadata filters to select documents for deletion
 93 | 
 94 |         Returns:
 95 |             int: Number of documents deleted
 96 | 
 97 |         Raises:
 98 |             VectorStoreError: If the delete operation fails for any reason
 99 |         """
100 |         pass
101 | 
102 |     @abstractmethod
103 |     def count(self, filters: Optional[Dict[str, Any]] = None) -> int:
104 |         """
105 |         Counts documents in the vector store, optionally filtered by metadata.
106 | 
107 |         Args:
108 |             filters: Optional metadata filters to apply
109 | 
110 |         Returns:
111 |             int: Number of documents matching the filter
112 | 
113 |         Raises:
114 |             VectorStoreError: If the count operation fails for any reason
115 |         """
116 |         pass
117 | 
118 |     @abstractmethod
119 |     def healthcheck(self) -> Dict[str, Any]:
120 |         """
121 |         Checks if the vector store is properly configured and functioning.
122 | 
123 |         Returns:
124 |             Dict[str, Any]: A dictionary containing health status information:
125 |                 {
126 |                     'healthy': bool,  # Whether the vector store is functioning properly
127 |                     'message': str,   # Optional message providing more details
128 |                     'details': dict   # Optional additional details about the health check
129 |                 }
130 | 
131 |         Raises:
132 |             VectorStoreError: If the health check itself encounters an error
133 |         """
134 |         pass
135 | 
136 |     @abstractmethod
137 |     def add_documents(
138 |         self, documents: List[Dict[str, Any]], vectors: List[List[float]]
139 |     ) -> List[str]:
140 |         """
141 |         Stores documents along with their vector embeddings.
142 | 
143 |         Args:
144 |             documents: List of documents to store
145 |             vectors: List of vector embeddings for the documents
146 | 
147 |         Returns:
148 |             List[str]: List of document IDs
149 | 
150 |         Raises:
151 |             VectorStoreError: If document storage fails for any reason
152 |         """
153 |         pass
154 | 


--------------------------------------------------------------------------------
/ici/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility modules for the ICI framework.
 3 | 
 4 | This package contains utility modules that provide common functionality
 5 | across the framework.
 6 | """
 7 | 
 8 | from ici.utils.config import get_component_config, load_config
 9 | from ici.utils.state_manager import StateManager
10 | from ici.utils.datetime_utils import (
11 |     ensure_tz_aware, 
12 |     to_utc, 
13 |     from_timestamp, 
14 |     from_isoformat,
15 |     safe_compare
16 | )
17 | from ici.utils.load_env import load_env
18 | from ici.utils.component_loader import load_component_class
19 | from ici.utils.print_banner import print_banner
20 | 
21 | __all__ = [
22 |     "get_component_config",
23 |     "load_config",
24 |     "StateManager",
25 |     "ensure_tz_aware",
26 |     "to_utc",
27 |     "from_timestamp",
28 |     "from_isoformat",
29 |     "safe_compare",
30 |     "load_env",
31 |     "load_component_class",
32 |     "print_banner",
33 | ] 


--------------------------------------------------------------------------------
/ici/utils/component_loader.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Component loader utility for dynamic instantiation of components.
  3 | 
  4 | This module provides utilities for dynamically loading and initializing
  5 | components from configuration based on class paths.
  6 | """
  7 | 
  8 | import importlib
  9 | from typing import Any, Dict, Optional, Type, TypeVar
 10 | 
 11 | from ici.core.exceptions import ComponentLoadError
 12 | 
 13 | T = TypeVar('T')
 14 | 
 15 | def load_component_class(class_path: str) -> Type[Any]:
 16 |     """
 17 |     Dynamically load a class from a fully qualified path string.
 18 |     
 19 |     Args:
 20 |         class_path: Fully qualified class path (e.g., 'ici.adapters.ingestors.telegram.TelegramIngestor')
 21 |         
 22 |     Returns:
 23 |         Type[Any]: The loaded class
 24 |         
 25 |     Raises:
 26 |         ComponentLoadError: If the class cannot be loaded
 27 |     """
 28 |     try:
 29 |         # Split into module path and class name
 30 |         module_path, class_name = class_path.rsplit('.', 1)
 31 |         
 32 |         # Import the module
 33 |         module = importlib.import_module(module_path)
 34 |         
 35 |         # Get the class
 36 |         component_class = getattr(module, class_name)
 37 |         
 38 |         return component_class
 39 |     
 40 |     except ImportError as e:
 41 |         raise ComponentLoadError(f"Failed to import module for component {class_path}: {str(e)}")
 42 |     except AttributeError as e:
 43 |         raise ComponentLoadError(f"Class not found in module: {class_path}: {str(e)}")
 44 |     except Exception as e:
 45 |         raise ComponentLoadError(f"Failed to load component class {class_path}: {str(e)}")
 46 | 
 47 | async def instantiate_component(class_path: str, config: Optional[Dict[str, Any]] = None) -> Any:
 48 |     """
 49 |     Dynamically instantiate a component from its class path and initialize it.
 50 |     
 51 |     Args:
 52 |         class_path: Fully qualified class path
 53 |         config: Configuration for the component
 54 |         
 55 |     Returns:
 56 |         Any: The instantiated and initialized component
 57 |         
 58 |     Raises:
 59 |         ComponentLoadError: If the component cannot be instantiated or initialized
 60 |     """
 61 |     try:
 62 |         # Load the class
 63 |         component_class = load_component_class(class_path)
 64 |         
 65 |         # Create an instance with config if provided
 66 |         component = component_class() if config is None else component_class(**config)
 67 |         
 68 |         # Initialize the component if it has an initialize method
 69 |         if hasattr(component, 'initialize') and callable(component.initialize):
 70 |             # Check if initialize is a coroutine function
 71 |             if hasattr(component.initialize, '__await__'):
 72 |                 await component.initialize()
 73 |             else:
 74 |                 component.initialize()
 75 |         
 76 |         return component
 77 |     
 78 |     except ComponentLoadError:
 79 |         # Re-raise ComponentLoadError from load_component_class
 80 |         raise
 81 |     except Exception as e:
 82 |         raise ComponentLoadError(f"Failed to instantiate or initialize component {class_path}: {str(e)}")
 83 | 
 84 | def load_component_by_type(component_type: str, component_config: Dict[str, Any], base_class: Type[T]) -> T:
 85 |     """
 86 |     Load a component by type string and validate it against a base class.
 87 |     
 88 |     Args:
 89 |         component_type: String identifier for the component type
 90 |         component_config: Configuration for the component
 91 |         base_class: Base class that the component should inherit from
 92 |         
 93 |     Returns:
 94 |         T: The instantiated component
 95 |         
 96 |     Raises:
 97 |         ComponentLoadError: If the component cannot be loaded or is not a subclass of base_class
 98 |     """
 99 |     try:
100 |         # Load the class
101 |         component_class = load_component_class(component_type)
102 |         
103 |         # Verify it's a subclass of the base class
104 |         if not issubclass(component_class, base_class):
105 |             raise ComponentLoadError(
106 |                 f"Component {component_type} is not a subclass of {base_class.__name__}"
107 |             )
108 |         
109 |         # Create an instance with config
110 |         return component_class(**component_config)
111 |     
112 |     except ComponentLoadError:
113 |         # Re-raise ComponentLoadError from other functions
114 |         raise
115 |     except Exception as e:
116 |         raise ComponentLoadError(f"Failed to load component {component_type}: {str(e)}") 


--------------------------------------------------------------------------------
/ici/utils/datetime_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Datetime utilities for the ICI framework.
  3 | 
  4 | This module provides standardized datetime handling functions to ensure
  5 | consistent timezone handling throughout the application.
  6 | """
  7 | 
  8 | from datetime import datetime, timezone
  9 | from typing import Optional, Union
 10 | 
 11 | 
 12 | def ensure_tz_aware(dt: Optional[datetime]) -> Optional[datetime]:
 13 |     """
 14 |     Ensure a datetime is timezone-aware (UTC if naive).
 15 |     
 16 |     Args:
 17 |         dt: The datetime to process, can be None
 18 |         
 19 |     Returns:
 20 |         The timezone-aware datetime (or None if input was None)
 21 |     """
 22 |     if dt is None:
 23 |         return None
 24 |         
 25 |     if dt.tzinfo is None:
 26 |         return dt.replace(tzinfo=timezone.utc)
 27 |     return dt
 28 | 
 29 | 
 30 | def to_utc(dt: Optional[datetime]) -> Optional[datetime]:
 31 |     """
 32 |     Convert a datetime to UTC.
 33 |     
 34 |     Args:
 35 |         dt: The datetime to convert, can be None
 36 |         
 37 |     Returns:
 38 |         The UTC datetime (or None if input was None)
 39 |     """
 40 |     if dt is None:
 41 |         return None
 42 |         
 43 |     # First ensure it's timezone-aware
 44 |     dt = ensure_tz_aware(dt)
 45 |     
 46 |     # Then convert to UTC if it's not already
 47 |     if dt.tzinfo != timezone.utc:
 48 |         return dt.astimezone(timezone.utc)
 49 |     return dt
 50 | 
 51 | 
 52 | def from_timestamp(timestamp: Union[int, float]) -> datetime:
 53 |     """
 54 |     Create a timezone-aware UTC datetime from a timestamp.
 55 |     
 56 |     Args:
 57 |         timestamp: Unix timestamp (seconds since epoch)
 58 |         
 59 |     Returns:
 60 |         Timezone-aware datetime in UTC
 61 |     """
 62 |     return datetime.fromtimestamp(timestamp, tz=timezone.utc)
 63 | 
 64 | 
 65 | def from_isoformat(iso_string: str) -> datetime:
 66 |     """
 67 |     Create a timezone-aware datetime from an ISO format string.
 68 |     
 69 |     If the string has no timezone info, UTC is assumed.
 70 |     
 71 |     Args:
 72 |         iso_string: ISO 8601 formatted datetime string
 73 |         
 74 |     Returns:
 75 |         Timezone-aware datetime
 76 |     """
 77 |     dt = datetime.fromisoformat(iso_string)
 78 |     return ensure_tz_aware(dt)
 79 | 
 80 | 
 81 | def safe_compare(dt1: Optional[datetime], dt2: Optional[datetime]) -> bool:
 82 |     """
 83 |     Safely compare two datetimes that may have different timezone information.
 84 |     
 85 |     Args:
 86 |         dt1: First datetime (may be None)
 87 |         dt2: Second datetime (may be None)
 88 |         
 89 |     Returns:
 90 |         True if dt1 is less than dt2, False otherwise
 91 |         If either is None, returns False
 92 |     """
 93 |     if dt1 is None or dt2 is None:
 94 |         return False
 95 |         
 96 |     # Ensure both datetimes are timezone-aware before comparison
 97 |     dt1 = ensure_tz_aware(dt1)
 98 |     dt2 = ensure_tz_aware(dt2)
 99 |     
100 |     return dt1 < dt2 


--------------------------------------------------------------------------------
/ici/utils/load_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Environment Variable Loader
 4 | 
 5 | This script loads environment variables from a .env file and can be imported
 6 | or run before other scripts to ensure environment variables are set properly.
 7 | """
 8 | 
 9 | import os
10 | import sys
11 | import argparse
12 | from typing import Optional
13 | 
14 | try:
15 |     from dotenv import load_dotenv
16 | except ImportError:
17 |     print("python-dotenv not installed. Installing now...")
18 |     import subprocess
19 |     subprocess.check_call([sys.executable, "-m", "pip", "install", "python-dotenv"])
20 |     from dotenv import load_dotenv
21 | 
22 | 
23 | def load_env(env_file: Optional[str] = None) -> None:
24 |     """
25 |     Load environment variables from a .env file.
26 |     
27 |     Args:
28 |         env_file: Path to the .env file. If None, looks for .env in the current directory.
29 |     """
30 |     # Default to .env in the current directory if not specified
31 |     if env_file is None:
32 |         env_file = ".env"
33 |     
34 |     # Check if the file exists
35 |     if not os.path.exists(env_file):
36 |         print(f"Warning: Environment file {env_file} not found.")
37 |         print(f"Create one by copying .env.example: cp .env.example .env")
38 |         return
39 |     
40 |     # Load the .env file with override=True to override existing environment variables
41 |     load_dotenv(env_file, override=True)
42 |     # print(f"Loaded environment variables from {env_file} (with override)")
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     parser = argparse.ArgumentParser(description="Load environment variables from a .env file")
47 |     parser.add_argument("--env-file", type=str, help="Path to the .env file")
48 |     args = parser.parse_args()
49 |     
50 |     # Load environment variables
51 |     load_env(args.env_file)
52 |     
53 |     # Print the loaded environment variables (without values for security)
54 |     # print("\nLoaded environment variables (showing names only for security):")
55 |     env_vars = [var for var in os.environ if var in open(args.env_file or ".env").read()]
56 |     for var in sorted(env_vars):
57 |         print(f"  - {var}: ***") 


--------------------------------------------------------------------------------
/ici/utils/print_banner.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Banner printing utility for ICI Core.
 3 | 
 4 | This module provides functions for displaying ASCII art banners
 5 | for the ICI Core application.
 6 | """
 7 | 
 8 | def print_banner():
 9 |     """Print ASCII art banner for ICI Core"""
10 |     banner = r"""
11 | 
12 |    _____ _     _      _        _       
13 |   / ____(_)   | |    | |      (_)      
14 |  | (___  _  __| | ___| |_ _ __ _ _ __  
15 |   \___ \| |/ _` |/ _ \ __| '__| | '_ \ 
16 |   ____) | | (_| |  __/ |_| |  | | |_) |
17 |  |_____/|_|\__,_|\___|\__|_|  |_| .__/ 
18 |                                 | |    
19 |                                 |_|    
20 |                                                
21 |     Intelligent Consciousness Interface Core
22 |     """
23 |     print(banner)
24 |     print("=" * 50) 


--------------------------------------------------------------------------------
/install.bat:
--------------------------------------------------------------------------------
  1 | @echo off
  2 | setlocal enabledelayedexpansion
  3 | 
  4 | :: Colors for output
  5 | set GREEN=[92m
  6 | set YELLOW=[93m
  7 | set RED=[91m
  8 | set NC=[0m
  9 | 
 10 | :: Repository details
 11 | set REPO_URL=https://github.com/sidetrip-ai/ici-core.git
 12 | set REPO_NAME=ici-core
 13 | 
 14 | :: Check if git is installed
 15 | call :check_git || exit /b 1
 16 | 
 17 | :: Check if Python is installed
 18 | call :check_python || exit /b 1
 19 | 
 20 | :: Check if repository exists and clone if needed
 21 | call :check_repo || exit /b 1
 22 | 
 23 | :: Run the setup script
 24 | echo %YELLOW%Running setup script...%NC%
 25 | if exist "./setup.bat" (
 26 |     call setup.bat
 27 | ) else (
 28 |     echo %RED%Setup script not found.%NC%
 29 |     exit /b 1
 30 | )
 31 | 
 32 | goto :eof
 33 | 
 34 | :check_git
 35 | :: Check if git is installed
 36 | where git >nul 2>&1
 37 | if %ERRORLEVEL% neq 0 (
 38 |     echo %RED%Git is not installed.%NC%
 39 |     echo %YELLOW%Please install git first:%NC%
 40 |     echo   For Windows: %GREEN%https://git-scm.com/download/win%NC%
 41 |     exit /b 1
 42 | )
 43 | echo %GREEN%Git is installed.%NC%
 44 | exit /b 0
 45 | goto :eof
 46 | 
 47 | :check_python
 48 | :: Check if Python is installed
 49 | where python3 >nul 2>&1
 50 | if %ERRORLEVEL% neq 0 (
 51 |     echo %RED%Python 3 is not installed.%NC%
 52 |     echo %YELLOW%Please install Python 3 first:%NC%
 53 |     echo   For Windows: %GREEN%https://www.python.org/downloads/%NC%
 54 |     exit /b 1
 55 | )
 56 | echo %GREEN%Python is installed.%NC%
 57 | exit /b 0
 58 | goto :eof
 59 | 
 60 | :find_repo
 61 | :: First check current directory
 62 | if exist "%REPO_NAME%" (
 63 |     set REPO_PATH=%CD%\%REPO_NAME%
 64 |     exit /b 0
 65 | )
 66 | 
 67 | :: Then check parent directory
 68 | if exist "..\%REPO_NAME%" (
 69 |     pushd ..
 70 |     set REPO_PATH=!CD!\%REPO_NAME%
 71 |     popd
 72 |     exit /b 0
 73 | )
 74 | 
 75 | :: Then check user home directory
 76 | if exist "%USERPROFILE%\%REPO_NAME%" (
 77 |     set REPO_PATH=%USERPROFILE%\%REPO_NAME%
 78 |     exit /b 0
 79 | )
 80 | 
 81 | exit /b 1
 82 | goto :eof
 83 | 
 84 | :check_repo
 85 | :: Check if repository is already cloned
 86 | call :find_repo
 87 | if %ERRORLEVEL% equ 0 (
 88 |     echo %GREEN%Repository found at: %REPO_PATH%%NC%
 89 |     cd /d "%REPO_PATH%"
 90 |     exit /b 0
 91 | ) else (
 92 |     echo %YELLOW%Repository not found. Cloning from %REPO_URL%...%NC%
 93 |     
 94 |     :: Check if directory exists and is empty
 95 |     if exist "%REPO_NAME%" (
 96 |         echo %YELLOW%Directory %REPO_NAME% exists but is not a git repository.%NC%
 97 |         echo %YELLOW%Removing existing directory...%NC%
 98 |         rmdir /s /q "%REPO_NAME%"
 99 |     )
100 |     
101 |     :: Clone the repository
102 |     git clone "%REPO_URL%"
103 |     if %ERRORLEVEL% neq 0 (
104 |         echo %RED%Failed to clone repository.%NC%
105 |         echo %YELLOW%Please try the manual installation method from the README.%NC%
106 |         exit /b 1
107 |     )
108 |     
109 |     :: Change to the repository directory
110 |     cd /d "%REPO_NAME%"
111 |     if %ERRORLEVEL% neq 0 (
112 |         echo %RED%Failed to change to repository directory.%NC%
113 |         exit /b 1
114 |     )
115 |     
116 |     echo %GREEN%Repository cloned successfully!%NC%
117 |     exit /b 0
118 | )
119 | goto :eof 
120 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Colors for output
 4 | GREEN='\033[0;32m'
 5 | YELLOW='\033[1;33m'
 6 | RED='\033[0;31m'
 7 | NC='\033[0m' # No Color
 8 | 
 9 | # Repository details
10 | REPO_URL="https://github.com/sidetrip-ai/ici-core.git"
11 | REPO_NAME="ici-core"
12 | 
13 | # Function to check if git is installed
14 | check_git() {
15 |     if ! command -v git &> /dev/null; then
16 |         echo -e "${RED}Git is not installed.${NC}"
17 |         echo -e "${YELLOW}Please install git first:${NC}"
18 |         echo -e "  For Ubuntu/Debian: ${GREEN}sudo apt-get install git${NC}"
19 |         echo -e "  For macOS: ${GREEN}brew install git${NC}"
20 |         echo -e "  For Windows: ${GREEN}https://git-scm.com/download/win${NC}"
21 |         exit 1
22 |     fi
23 | }
24 | 
25 | # Function to check if Python is installed
26 | check_python() {
27 |     if ! command -v python3 &> /dev/null; then
28 |         echo -e "${RED}Python 3 is not installed.${NC}"
29 |         echo -e "${YELLOW}Please install Python 3 first:${NC}"
30 |         echo -e "  For Ubuntu/Debian: ${GREEN}sudo apt-get install python3 python3-venv${NC}"
31 |         echo -e "  For macOS: ${GREEN}brew install python3${NC}"
32 |         echo -e "  For Windows: ${GREEN}https://www.python.org/downloads/${NC}"
33 |         exit 1
34 |     fi
35 | }
36 | 
37 | # Function to find repository location
38 | find_repo() {
39 |     # First check current directory
40 |     if [ -d "$REPO_NAME" ]; then
41 |         echo "$(pwd)/$REPO_NAME"
42 |         return 0
43 |     fi
44 |     
45 |     # Then check parent directory
46 |     if [ -d "../$REPO_NAME" ]; then
47 |         echo "$(cd .. && pwd)/$REPO_NAME"
48 |         return 0
49 |     fi
50 |     
51 |     # Then check home directory
52 |     if [ -d "$HOME/$REPO_NAME" ]; then
53 |         echo "$HOME/$REPO_NAME"
54 |         return 0
55 |     fi
56 |     
57 |     return 1
58 | }
59 | 
60 | # Function to check if repository is already cloned
61 | check_repo() {
62 |     local repo_path=$(find_repo)
63 |     
64 |     if [ ! -z "$repo_path" ]; then
65 |         echo -e "${GREEN}Repository found at: $repo_path${NC}"
66 |         cd "$repo_path"
67 |         return 0
68 |     else
69 |         echo -e "${YELLOW}Repository not found. Cloning from $REPO_URL...${NC}"
70 |         git clone "$REPO_URL"
71 |         if [ $? -ne 0 ]; then
72 |             echo -e "${RED}Failed to clone repository.${NC}"
73 |             exit 1
74 |         fi
75 |         cd "$REPO_NAME"
76 |         return 1
77 |     fi
78 | }
79 | 
80 | # Main execution
81 | echo -e "${YELLOW}========== ICI Core Installation Script ==========${NC}"
82 | 
83 | # Check if git is installed
84 | check_git
85 | 
86 | # Check if Python is installed
87 | check_python
88 | 
89 | # Check if repository exists and clone if needed
90 | check_repo
91 | 
92 | # Run the setup script
93 | echo -e "${YELLOW}Running setup script...${NC}"
94 | if [ -f "./setup.sh" ]; then
95 |     bash ./setup.sh
96 | else
97 |     echo -e "${RED}Setup script not found.${NC}"
98 |     exit 1
99 | fi 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import sys
 3 | import traceback
 4 | 
 5 | from ici.adapters.controller import command_line_controller
 6 | from ici.utils import print_banner
 7 | 
 8 | # set an env variable
 9 | import os
10 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
11 | 
12 | import warnings
13 | warnings.simplefilter("ignore")  # Ignore all warnings
14 | 
15 | if __name__ == "__main__":
16 |     # Print ASCII banner
17 |     print_banner()
18 |     
19 |     # Run the main function
20 |     try:
21 |         print("Starting main function...")
22 |         exit_code = asyncio.run(command_line_controller())
23 |         sys.exit(exit_code)
24 |     except Exception as e:
25 |         print(f"Fatal error: {e}")
26 |         traceback.print_exc()
27 |         sys.exit(1) 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | testpaths = tests
 3 | python_files = test_*.py
 4 | python_classes = Test*
 5 | python_functions = test_*
 6 | 
 7 | # Display more test info
 8 | addopts = --verbose
 9 | 
10 | # Code coverage settings
11 | [coverage:run]
12 | source = ici
13 | omit = 
14 |     tests/*
15 |     examples/*
16 |     setup.py 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core dependencies
 2 | pyyaml>=6.0
 3 | 
 4 | # Development dependencies
 5 | pytest>=7.0.0
 6 | pytest-cov>=4.0.0
 7 | pytest-asyncio>=0.25.0
 8 | black>=25.0.1
 9 | 
10 | sentence-transformers>=3.4.1  # For text embeddings
11 | torch>=2.2.0  # Required for sentence-transformers
12 | faiss-cpu>=1.7.0  # For vector similarity search
13 | telethon>=1.39.0  # For Telegram API access
14 | chromadb>=0.6.3  # For ChromaDB vector database
15 | numpy>=2.2.2  # Required for vector operations
16 | logtail-python>=0.3.3
17 | openai>=1.68.0
18 | langchain>=0.3.21  # Core LangChain functionality
19 | langchain-openai>=0.1.0  # For OpenAI integration
20 | langchain-community>=0.3.20  # For additional model providers (including Ollama)
21 | langchain-anthropic>=0.3.10  # For Claude models
22 | langchain-ollama>=0.3.0
23 | python-dotenv>=1.0.1
24 | 


--------------------------------------------------------------------------------
/services/whatsapp-service/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/
 3 | 
 4 | # Session data
 5 | data/sessions/
 6 | 
 7 | # Logs
 8 | logs/
 9 | *.log
10 | npm-debug.log*
11 | 
12 | # Environment variables
13 | .env
14 | 
15 | # Editor directories and files
16 | .idea/
17 | .vscode/
18 | *.swp
19 | *.swo
20 | 
21 | # OS files
22 | .DS_Store
23 | Thumbs.db 


--------------------------------------------------------------------------------
/services/whatsapp-service/README.md:
--------------------------------------------------------------------------------
 1 | # WhatsApp Service
 2 | 
 3 | This service provides a REST API and WebSocket interface for integrating WhatsApp messaging into the ICI system. It uses the [whatsapp-web.js](https://github.com/pedroslopez/whatsapp-web.js) library to interact with WhatsApp Web.
 4 | 
 5 | ## Features
 6 | 
 7 | - Multiple WhatsApp sessions management
 8 | - QR code authentication
 9 | - REST API for sending and receiving messages
10 | - WebSocket interface for real-time updates
11 | - Session persistence
12 | 
13 | ## Directory Structure
14 | 
15 | ```
16 | whatsapp-service/
17 | ├── config.js           # Configuration file
18 | ├── data/
19 | │   └── sessions/       # WhatsApp session data
20 | ├── package.json        # Dependencies and scripts
21 | ├── src/
22 | │   ├── api/            # REST API endpoints
23 | │   │   └── routes/     # API routes
24 | │   ├── client/         # WhatsApp client implementation
25 | │   ├── utils/          # Utility functions
26 | │   ├── websocket/      # WebSocket server
27 | │   └── index.js        # Main entry point
28 | └── README.md           # This file
29 | ```
30 | 
31 | ## API Endpoints
32 | 
33 | ### Session Management
34 | 
35 | - `GET /api/sessions` - List all active WhatsApp sessions
36 | - `POST /api/sessions` - Create a new WhatsApp session
37 | - `GET /api/sessions/:sessionId` - Get session status
38 | - `GET /api/sessions/:sessionId/qr` - Get QR code for session authentication
39 | - `DELETE /api/sessions/:sessionId` - Logout and destroy a session
40 | 
41 | ### Messaging
42 | 
43 | - `POST /api/messages/:sessionId/send` - Send a message
44 | - `GET /api/messages/:sessionId/chats` - Get all chats
45 | - `GET /api/messages/:sessionId/chat/:chatId` - Get messages from a specific chat
46 | - `GET /api/messages/:sessionId/contacts` - Get all contacts
47 | 
48 | ## WebSocket Interface
49 | 
50 | The WebSocket server provides real-time updates for:
51 | 
52 | - Connection state changes
53 | - Incoming messages
54 | 
55 | ### Events
56 | 
57 | - `connection_update` - When the connection state changes (e.g., QR code received, authenticated, connected)
58 | - `message` - When a new message is received
59 | 
60 | ## Installation and Setup
61 | 
62 | 1. Install dependencies:
63 | ```
64 | npm install
65 | ```
66 | 
67 | 2. Start the service:
68 | ```
69 | npm start
70 | ```
71 | 
72 | For development:
73 | ```
74 | npm run dev
75 | ```
76 | 
77 | ## Authentication Flow
78 | 
79 | 1. Create a new session using the REST API
80 | 2. Retrieve the QR code using the API
81 | 3. Scan the QR code with your WhatsApp mobile app
82 | 4. Once authenticated, the session will be saved and can be reused
83 | 
84 | ## Notes
85 | 
86 | - Puppeteer is required for WhatsApp Web interaction
87 | - Session data is stored in the `data/sessions` directory
88 | - Multiple WhatsApp accounts can be used simultaneously with different session IDs 


--------------------------------------------------------------------------------
/services/whatsapp-service/config.js:
--------------------------------------------------------------------------------
 1 | const path = require('path');
 2 | 
 3 | /**
 4 |  * Configuration for the WhatsApp service
 5 |  */
 6 | module.exports = {
 7 |   // Server settings
 8 |   port: process.env.PORT || 3006,
 9 |   wsPort: process.env.WS_PORT || 3005,
10 |   
11 |   // WhatsApp client settings
12 |   clientOptions: {
13 |     puppeteer: {
14 |       headless: true,
15 |       args: ['--no-sandbox', '--disable-setuid-sandbox']
16 |     }
17 |   },
18 |   
19 |   // Session settings
20 |   sessions: {
21 |     dataPath: path.join(__dirname, "services", "whatsapp-service", "data", "sessions"),
22 |     sessionFile: 'session.json'
23 |   },
24 |   
25 |   // API settings
26 |   api: {
27 |     maxMessages: 1000, // Maximum number of messages to return in a single request
28 |   }
29 | }; 


--------------------------------------------------------------------------------
/services/whatsapp-service/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "whatsapp-service",
 3 |   "version": "1.0.0",
 4 |   "description": "WhatsApp Web JS service for ICI system",
 5 |   "main": "src/index.js",
 6 |   "scripts": {
 7 |     "start": "node src/index.js",
 8 |     "dev": "nodemon src/index.js"
 9 |   },
10 |   "dependencies": {
11 |     "cors": "^2.8.5",
12 |     "express": "^4.18.2",
13 |     "qrcode": "^1.5.3",
14 |     "whatsapp-web.js": "^1.27.0",
15 |     "ws": "^8.14.2"
16 |   },
17 |   "devDependencies": {
18 |     "nodemon": "^3.0.1"
19 |   }
20 | } 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/api/routes/auth.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * WhatsApp authentication routes
  3 |  */
  4 | 
  5 | const express = require('express');
  6 | const whatsAppClient = require('../../client/whatsapp-client');
  7 | const eventEmitter = require('../../utils/event-emitter');
  8 | 
  9 | const router = express.Router();
 10 | 
 11 | /**
 12 |  * GET /api/status
 13 |  * Get the current status of the WhatsApp client
 14 |  */
 15 | router.get('/status', async (req, res) => {
 16 |   const status = whatsAppClient.getStatus();
 17 |   res.json(status);
 18 | });
 19 | 
 20 | /**
 21 |  * GET /api/qr
 22 |  * Get the current QR code or generate a new one
 23 |  */
 24 | router.get('/qr', async (req, res) => {
 25 |   try {
 26 |     // Initialize client if not already done
 27 |     if (!whatsAppClient.initialized) {
 28 |       await whatsAppClient.initialize();
 29 |     }
 30 | 
 31 |     // If already connected, return an error
 32 |     if (whatsAppClient.status === 'CONNECTED') {
 33 |       return res.status(400).json({
 34 |         success: false,
 35 |         message: 'Already connected to WhatsApp'
 36 |       });
 37 |     }
 38 | 
 39 |     // If we have a QR code that's less than 2 minutes old, return it
 40 |     const qrCodeImage = whatsAppClient.getQrCodeImage();
 41 |     if (qrCodeImage && whatsAppClient.lastQrTimestamp && 
 42 |         Date.now() - whatsAppClient.lastQrTimestamp < 120000) {
 43 |       // Set content type to image/png
 44 |       res.setHeader('Content-Type', 'image/png');
 45 |       
 46 |       // Convert data URL to buffer and send
 47 |       const imgData = qrCodeImage.split(',')[1];
 48 |       const imgBuffer = Buffer.from(imgData, 'base64');
 49 |       return res.send(imgBuffer);
 50 |     }
 51 | 
 52 |     // Generate a new QR code if none exists or it's too old
 53 |     const result = await whatsAppClient.generateNewQrCode();
 54 |     
 55 |     if (!result.success) {
 56 |       return res.status(500).json({
 57 |         success: false,
 58 |         message: result.message || 'Failed to generate QR code'
 59 |       });
 60 |     }
 61 | 
 62 |     // Wait for a new QR code to be generated (with a timeout)
 63 |     let timeoutId = null;
 64 |     const waitForQrCode = new Promise((resolve, reject) => {
 65 |       // Set a timeout
 66 |       timeoutId = setTimeout(() => {
 67 |         reject(new Error('Timeout waiting for QR code'));
 68 |       }, 10000);
 69 | 
 70 |       // Listen for QR code event
 71 |       const handler = (data) => {
 72 |         clearTimeout(timeoutId);
 73 |         resolve(data);
 74 |         eventEmitter.off('whatsapp.qr', handler); // Remove listener
 75 |       };
 76 | 
 77 |       eventEmitter.once('whatsapp.qr', handler);
 78 |     });
 79 | 
 80 |     try {
 81 |       await waitForQrCode;
 82 |       
 83 |       // Get the newly generated QR code
 84 |       const newQrCodeImage = whatsAppClient.getQrCodeImage();
 85 |       
 86 |       if (!newQrCodeImage) {
 87 |         return res.status(500).json({
 88 |           success: false,
 89 |           message: 'Failed to generate QR code image'
 90 |         });
 91 |       }
 92 |       
 93 |       // Set content type to image/png
 94 |       res.setHeader('Content-Type', 'image/png');
 95 |       
 96 |       // Convert data URL to buffer and send
 97 |       const imgData = newQrCodeImage.split(',')[1];
 98 |       const imgBuffer = Buffer.from(imgData, 'base64');
 99 |       return res.send(imgBuffer);
100 |     } catch (timeoutError) {
101 |       clearTimeout(timeoutId);
102 |       return res.status(500).json({
103 |         success: false,
104 |         message: 'Timeout waiting for QR code'
105 |       });
106 |     }
107 |   } catch (error) {
108 |     console.error('Error in QR code generation:', error);
109 |     res.status(500).json({
110 |       success: false,
111 |       message: error.message || 'Internal server error'
112 |     });
113 |   }
114 | });
115 | 
116 | /**
117 |  * POST /api/logout
118 |  * Logout from WhatsApp Web
119 |  */
120 | router.post('/logout', async (req, res) => {
121 |   try {
122 |     const result = await whatsAppClient.logout();
123 |     res.json(result);
124 |   } catch (error) {
125 |     console.error('Error in logout:', error);
126 |     res.status(500).json({
127 |       success: false,
128 |       message: error.message || 'Internal server error'
129 |     });
130 |   }
131 | });
132 | 
133 | module.exports = router; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/api/routes/index.js:
--------------------------------------------------------------------------------
 1 | const express = require('express');
 2 | const router = express.Router();
 3 | const sessionsRoutes = require('./sessions');
 4 | const messagesRoutes = require('./messages');
 5 | const logger = require('../../utils/logger');
 6 | 
 7 | // Health check endpoint
 8 | router.get('/health', (req, res) => {
 9 |   res.json({
10 |     status: 'ok',
11 |     service: 'whatsapp-service',
12 |     timestamp: new Date().toISOString()
13 |   });
14 | });
15 | 
16 | // Mount routes
17 | router.use('/sessions', sessionsRoutes);
18 | router.use('/messages', messagesRoutes);
19 | 
20 | // Handle 404 for API routes
21 | router.use((req, res) => {
22 |   logger.warn(`API endpoint not found: ${req.method} ${req.originalUrl}`);
23 |   res.status(404).json({
24 |     success: false,
25 |     error: 'API endpoint not found'
26 |   });
27 | });
28 | 
29 | module.exports = router; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/api/routes/messages.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * WhatsApp messages routes
  3 |  */
  4 | 
  5 | const express = require('express');
  6 | const whatsAppClient = require('../../client/whatsapp-client');
  7 | 
  8 | const router = express.Router();
  9 | 
 10 | /**
 11 |  * GET /api/messages
 12 |  * Fetch messages from a specific chat
 13 |  */
 14 | router.get('/messages', async (req, res) => {
 15 |   try {
 16 |     const { chatId, limit } = req.query;
 17 |     
 18 |     if (!chatId) {
 19 |       return res.status(400).json({
 20 |         success: false,
 21 |         message: 'Chat ID is required'
 22 |       });
 23 |     }
 24 |     
 25 |     // Check if client is connected
 26 |     const status = whatsAppClient.getStatus();
 27 |     if (status.status !== 'CONNECTED') {
 28 |       return res.status(400).json({
 29 |         success: false,
 30 |         message: `WhatsApp is not connected. Current status: ${status.status}`
 31 |       });
 32 |     }
 33 |     
 34 |     // Fetch messages
 35 |     const messages = await whatsAppClient.fetchMessages(
 36 |       chatId, 
 37 |       limit ? parseInt(limit, 10) : 2000
 38 |     );
 39 |     
 40 |     res.json({
 41 |       success: true,
 42 |       messages
 43 |     });
 44 |   } catch (error) {
 45 |     console.error('Error fetching messages:', error);
 46 |     res.status(500).json({
 47 |       success: false,
 48 |       message: error.message || 'Failed to fetch messages'
 49 |     });
 50 |   }
 51 | });
 52 | 
 53 | /**
 54 |  * GET /api/fetch-all
 55 |  * Fetch all messages from all chats, optionally since a date
 56 |  */
 57 | router.get('/fetch-all', async (req, res) => {
 58 |   try {
 59 |     const { since } = req.query;
 60 |     
 61 |     // Check if client is connected
 62 |     const status = whatsAppClient.getStatus();
 63 |     if (status.status !== 'CONNECTED') {
 64 |       return res.status(400).json({
 65 |         success: false,
 66 |         message: `WhatsApp is not connected. Current status: ${status.status}`
 67 |       });
 68 |     }
 69 |     
 70 |     // Parse since date if provided
 71 |     let sinceDate = null;
 72 |     if (since) {
 73 |       try {
 74 |         sinceDate = new Date(since);
 75 |       } catch (error) {
 76 |         return res.status(400).json({
 77 |           success: false,
 78 |           message: 'Invalid date format for since parameter'
 79 |         });
 80 |       }
 81 |     }
 82 |     
 83 |     // Fetch all messages
 84 |     const data = await whatsAppClient.fetchAllMessages(sinceDate);
 85 |     
 86 |     res.json({
 87 |       success: true,
 88 |       ...data
 89 |     });
 90 |   } catch (error) {
 91 |     console.error('Error fetching all messages:', error);
 92 |     res.status(500).json({
 93 |       success: false,
 94 |       message: error.message || 'Failed to fetch messages'
 95 |     });
 96 |   }
 97 | });
 98 | 
 99 | /**
100 |  * GET /api/chats
101 |  * Get a list of all chats
102 |  */
103 | router.get('/chats', async (req, res) => {
104 |   try {
105 |     // Check if client is connected
106 |     const status = whatsAppClient.getStatus();
107 |     if (status.status !== 'CONNECTED') {
108 |       return res.status(400).json({
109 |         success: false,
110 |         message: `WhatsApp is not connected. Current status: ${status.status}`
111 |       });
112 |     }
113 |     
114 |     // Fetch all chats
115 |     const data = await whatsAppClient.fetchAllMessages();
116 |     
117 |     res.json({
118 |       success: true,
119 |       chats: data.conversations
120 |     });
121 |   } catch (error) {
122 |     console.error('Error fetching chats:', error);
123 |     res.status(500).json({
124 |       success: false,
125 |       message: error.message || 'Failed to fetch chats'
126 |     });
127 |   }
128 | });
129 | 
130 | module.exports = router; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/api/routes/sessions.js:
--------------------------------------------------------------------------------
  1 | const express = require('express');
  2 | const router = express.Router();
  3 | const clientManager = require('../../client/client-manager');
  4 | const logger = require('../../utils/logger');
  5 | 
  6 | /**
  7 |  * GET /api/sessions
  8 |  * List all active WhatsApp sessions
  9 |  */
 10 | router.get('/', (req, res) => {
 11 |   try {
 12 |     const sessions = clientManager.getAllClientInfo();
 13 |     res.json({
 14 |       success: true,
 15 |       count: sessions.length,
 16 |       sessions
 17 |     });
 18 |   } catch (error) {
 19 |     logger.error(`Error listing sessions: ${error.message}`, { error: error.stack });
 20 |     res.status(500).json({
 21 |       success: false,
 22 |       error: 'Failed to list sessions'
 23 |     });
 24 |   }
 25 | });
 26 | 
 27 | /**
 28 |  * POST /api/sessions
 29 |  * Create a new WhatsApp session
 30 |  */
 31 | router.post('/', async (req, res) => {
 32 |   try {
 33 |     let { sessionId } = req.body;
 34 |     
 35 |     // Generate a random session ID if not provided
 36 |     if (!sessionId) {
 37 |       sessionId = `session_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`;
 38 |     }
 39 |     
 40 |     // Check if session already exists
 41 |     const existingClient = clientManager.getClient(sessionId);
 42 |     if (existingClient) {
 43 |       return res.status(409).json({
 44 |         success: false,
 45 |         error: `Session ${sessionId} already exists`,
 46 |         sessionId
 47 |       });
 48 |     }
 49 |     
 50 |     // Create new client
 51 |     const client = await clientManager.getOrCreateClient(sessionId);
 52 |     
 53 |     res.status(201).json({
 54 |       success: true,
 55 |       message: 'WhatsApp session created',
 56 |       session: client.getInfo()
 57 |     });
 58 |   } catch (error) {
 59 |     logger.error(`Error creating session: ${error.message}`, { error: error.stack });
 60 |     res.status(500).json({
 61 |       success: false,
 62 |       error: 'Failed to create WhatsApp session'
 63 |     });
 64 |   }
 65 | });
 66 | 
 67 | /**
 68 |  * GET /api/sessions/:sessionId
 69 |  * Get session status
 70 |  */
 71 | router.get('/:sessionId', (req, res) => {
 72 |   const { sessionId } = req.params;
 73 |   
 74 |   try {
 75 |     const client = clientManager.getClient(sessionId);
 76 |     if (!client) {
 77 |       return res.status(404).json({
 78 |         success: false,
 79 |         error: `Session ${sessionId} not found`
 80 |       });
 81 |     }
 82 |     
 83 |     res.json({
 84 |       success: true,
 85 |       session: client.getInfo()
 86 |     });
 87 |   } catch (error) {
 88 |     logger.error(`Error getting session ${sessionId}: ${error.message}`, { 
 89 |       sessionId, 
 90 |       error: error.stack 
 91 |     });
 92 |     res.status(500).json({
 93 |       success: false,
 94 |       error: 'Failed to get session status'
 95 |     });
 96 |   }
 97 | });
 98 | 
 99 | /**
100 |  * GET /api/sessions/:sessionId/qr
101 |  * Get QR code for session
102 |  */
103 | router.get('/:sessionId/qr', async (req, res) => {
104 |   const { sessionId } = req.params;
105 |   
106 |   try {
107 |     const client = clientManager.getClient(sessionId);
108 |     if (!client) {
109 |       return res.status(404).json({
110 |         success: false,
111 |         error: `Session ${sessionId} not found`
112 |       });
113 |     }
114 |     
115 |     if (client.status !== 'qr_received') {
116 |       return res.status(400).json({
117 |         success: false,
118 |         error: `No QR code available for session ${sessionId}`,
119 |         status: client.status
120 |       });
121 |     }
122 |     
123 |     const qrDataUrl = await client.generateQRCodeDataUrl();
124 |     
125 |     if (!qrDataUrl) {
126 |       return res.status(404).json({
127 |         success: false,
128 |         error: 'QR code is not available'
129 |       });
130 |     }
131 |     
132 |     res.json({
133 |       success: true,
134 |       qrCode: qrDataUrl
135 |     });
136 |   } catch (error) {
137 |     logger.error(`Error getting QR code for session ${sessionId}: ${error.message}`, { 
138 |       sessionId, 
139 |       error: error.stack 
140 |     });
141 |     res.status(500).json({
142 |       success: false,
143 |       error: 'Failed to get QR code'
144 |     });
145 |   }
146 | });
147 | 
148 | /**
149 |  * DELETE /api/sessions/:sessionId
150 |  * Logout and destroy a session
151 |  */
152 | router.delete('/:sessionId', async (req, res) => {
153 |   const { sessionId } = req.params;
154 |   const { action = 'logout' } = req.query; // 'logout' or 'destroy'
155 |   
156 |   try {
157 |     const client = clientManager.getClient(sessionId);
158 |     if (!client) {
159 |       return res.status(404).json({
160 |         success: false,
161 |         error: `Session ${sessionId} not found`
162 |       });
163 |     }
164 |     
165 |     // Perform the requested action
166 |     let result = false;
167 |     if (action === 'destroy') {
168 |       result = await clientManager.closeClient(sessionId);
169 |     } else {
170 |       result = await clientManager.logoutClient(sessionId);
171 |     }
172 |     
173 |     if (result) {
174 |       res.json({
175 |         success: true,
176 |         message: `Session ${sessionId} ${action === 'destroy' ? 'destroyed' : 'logged out'}`
177 |       });
178 |     } else {
179 |       res.status(500).json({
180 |         success: false,
181 |         error: `Failed to ${action === 'destroy' ? 'destroy' : 'logout'} session`
182 |       });
183 |     }
184 |   } catch (error) {
185 |     logger.error(`Error deleting session ${sessionId}: ${error.message}`, { 
186 |       sessionId, 
187 |       error: error.stack 
188 |     });
189 |     res.status(500).json({
190 |       success: false,
191 |       error: `Failed to delete session: ${error.message}`
192 |     });
193 |   }
194 | });
195 | 
196 | module.exports = router; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/client/client-manager.js:
--------------------------------------------------------------------------------
  1 | const WhatsAppClient = require('./whatsapp-client');
  2 | const logger = require('../utils/logger');
  3 | 
  4 | /**
  5 |  * Manager for multiple WhatsApp client instances
  6 |  */
  7 | class ClientManager {
  8 |   constructor() {
  9 |     this.clients = new Map();
 10 |   }
 11 | 
 12 |   /**
 13 |    * Get or create a WhatsApp client
 14 |    * @param {string} sessionId - Session identifier
 15 |    * @returns {Promise<WhatsAppClient>} WhatsApp client instance
 16 |    */
 17 |   async getOrCreateClient(sessionId) {
 18 |     if (!sessionId) {
 19 |       throw new Error('Session ID is required');
 20 |     }
 21 | 
 22 |     // Return existing client if it exists
 23 |     if (this.clients.has(sessionId)) {
 24 |       logger.debug(`Using existing client for session ${sessionId}`);
 25 |       return this.clients.get(sessionId);
 26 |     }
 27 | 
 28 |     try {
 29 |       // Create new client
 30 |       logger.info(`Creating new client for session ${sessionId}`);
 31 |       const client = new WhatsAppClient(sessionId);
 32 |       this.clients.set(sessionId, client);
 33 |       
 34 |       // Initialize the client
 35 |       await client.initialize();
 36 |       return client;
 37 |     } catch (error) {
 38 |       logger.error(`Failed to create client: ${error.message}`, { sessionId, error: error.stack });
 39 |       throw error;
 40 |     }
 41 |   }
 42 | 
 43 |   /**
 44 |    * Get a client by session ID
 45 |    * @param {string} sessionId - Session identifier
 46 |    * @returns {WhatsAppClient|null} WhatsApp client instance or null if not found
 47 |    */
 48 |   getClient(sessionId) {
 49 |     return this.clients.get(sessionId) || null;
 50 |   }
 51 | 
 52 |   /**
 53 |    * Close and destroy a client session
 54 |    * @param {string} sessionId - Session identifier
 55 |    * @returns {Promise<boolean>} Success status
 56 |    */
 57 |   async closeClient(sessionId) {
 58 |     const client = this.clients.get(sessionId);
 59 |     if (!client) {
 60 |       logger.warn(`Client session ${sessionId} not found for closing`);
 61 |       return false;
 62 |     }
 63 | 
 64 |     try {
 65 |       logger.info(`Closing client session ${sessionId}`);
 66 |       await client.destroy();
 67 |       this.clients.delete(sessionId);
 68 |       return true;
 69 |     } catch (error) {
 70 |       logger.error(`Error closing client: ${error.message}`, { sessionId, error: error.stack });
 71 |       return false;
 72 |     }
 73 |   }
 74 | 
 75 |   /**
 76 |    * Logout a client session
 77 |    * @param {string} sessionId - Session identifier
 78 |    * @returns {Promise<boolean>} Success status
 79 |    */
 80 |   async logoutClient(sessionId) {
 81 |     const client = this.clients.get(sessionId);
 82 |     if (!client) {
 83 |       logger.warn(`Client session ${sessionId} not found for logout`);
 84 |       return false;
 85 |     }
 86 | 
 87 |     try {
 88 |       logger.info(`Logging out client session ${sessionId}`);
 89 |       await client.logout();
 90 |       this.clients.delete(sessionId);
 91 |       return true;
 92 |     } catch (error) {
 93 |       logger.error(`Error logging out client: ${error.message}`, { sessionId, error: error.stack });
 94 |       return false;
 95 |     }
 96 |   }
 97 | 
 98 |   /**
 99 |    * Get all active clients
100 |    * @returns {Array<object>} List of client info
101 |    */
102 |   getAllClientInfo() {
103 |     const clientInfoList = [];
104 |     
105 |     for (const [sessionId, client] of this.clients.entries()) {
106 |       clientInfoList.push(client.getInfo());
107 |     }
108 |     
109 |     return clientInfoList;
110 |   }
111 | 
112 |   /**
113 |    * Close all client sessions
114 |    * @returns {Promise<void>}
115 |    */
116 |   async closeAllClients() {
117 |     logger.info(`Closing all WhatsApp client sessions (${this.clients.size} clients)`);
118 |     
119 |     const closePromises = [];
120 |     for (const [sessionId, client] of this.clients.entries()) {
121 |       closePromises.push(client.destroy().catch(error => {
122 |         logger.error(`Error closing client ${sessionId}: ${error.message}`, { error: error.stack });
123 |       }));
124 |     }
125 |     
126 |     await Promise.all(closePromises);
127 |     this.clients.clear();
128 |   }
129 | }
130 | 
131 | // Create singleton instance
132 | const clientManager = new ClientManager();
133 | 
134 | module.exports = clientManager; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/index.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * WhatsApp Service - Main Entry Point
  3 |  */
  4 | const express = require('express');
  5 | const http = require('http');
  6 | const path = require('path');
  7 | const cors = require('cors');
  8 | const WebSocket = require('ws');
  9 | const whatsAppClient = require('./client/whatsapp-client');
 10 | const eventEmitter = require('./utils/event-emitter');
 11 | const config = require('../config');
 12 | 
 13 | // Import API routes
 14 | const authRoutes = require('./api/routes/auth');
 15 | const messagesRoutes = require('./api/routes/messages');
 16 | 
 17 | // Create Express app
 18 | const app = express();
 19 | const server = http.createServer(app);
 20 | 
 21 | // Use middleware
 22 | app.use(cors());
 23 | app.use(express.json());
 24 | app.use(express.urlencoded({ extended: true }));
 25 | 
 26 | // Serve static files from 'public' directory
 27 | app.use(express.static(path.join(__dirname, 'public')));
 28 | 
 29 | // Use API routes
 30 | app.use('/api', authRoutes);
 31 | app.use('/api', messagesRoutes);
 32 | 
 33 | // Root route redirects to index.html
 34 | app.get('/', (req, res) => {
 35 |   res.sendFile(path.join(__dirname, 'public', 'index.html'));
 36 | });
 37 | 
 38 | // 404 handler
 39 | app.use((req, res) => {
 40 |   res.status(404).json({
 41 |     success: false,
 42 |     message: 'Route not found'
 43 |   });
 44 | });
 45 | 
 46 | // WebSocket server
 47 | const wss = new WebSocket.Server({ 
 48 |   server: server,
 49 |   path: '/ws'
 50 | });
 51 | 
 52 | // WebSocket connection handler
 53 | wss.on('connection', (ws) => {
 54 |   console.log('WebSocket client connected');
 55 |   
 56 |   // Send initial status on connection
 57 |   ws.send(JSON.stringify({
 58 |     type: 'status',
 59 |     data: whatsAppClient.getStatus()
 60 |   }));
 61 |   
 62 |   // Event handlers
 63 |   const handleQr = (data) => {
 64 |     ws.send(JSON.stringify({
 65 |       type: 'qr',
 66 |       data: {
 67 |         timestamp: data.timestamp,
 68 |         hasQrCode: true
 69 |       }
 70 |     }));
 71 |   };
 72 |   
 73 |   const handleStatusChange = (status) => {
 74 |     ws.send(JSON.stringify({
 75 |       type: 'status',
 76 |       data: whatsAppClient.getStatus()
 77 |     }));
 78 |   };
 79 |   
 80 |   // Register event listeners
 81 |   eventEmitter.on('whatsapp.qr', handleQr);
 82 |   eventEmitter.on('whatsapp.ready', handleStatusChange);
 83 |   eventEmitter.on('whatsapp.disconnected', handleStatusChange);
 84 |   eventEmitter.on('whatsapp.auth_failure', handleStatusChange);
 85 |   
 86 |   // Handle WebSocket messages (like ping)
 87 |   ws.on('message', (message) => {
 88 |     try {
 89 |       const data = JSON.parse(message);
 90 |       
 91 |       if (data.type === 'ping') {
 92 |         ws.send(JSON.stringify({ type: 'pong' }));
 93 |       }
 94 |     } catch (error) {
 95 |       console.error('Error parsing WebSocket message:', error);
 96 |     }
 97 |   });
 98 |   
 99 |   // Handle WebSocket close
100 |   ws.on('close', () => {
101 |     console.log('WebSocket client disconnected');
102 |     
103 |     // Remove event listeners
104 |     eventEmitter.off('whatsapp.qr', handleQr);
105 |     eventEmitter.off('whatsapp.ready', handleStatusChange);
106 |     eventEmitter.off('whatsapp.disconnected', handleStatusChange);
107 |     eventEmitter.off('whatsapp.auth_failure', handleStatusChange);
108 |   });
109 | });
110 | 
111 | // Initialize WhatsApp client
112 | whatsAppClient.initialize()
113 |   .then(() => {
114 |     console.log('WhatsApp client initialized');
115 |   })
116 |   .catch((error) => {
117 |     console.error('Failed to initialize WhatsApp client:', error);
118 |   });
119 | 
120 | // Start the server
121 | const port = config.port || 3000;
122 | server.listen(port, () => {
123 |   console.log(`WhatsApp service running on port ${port}`);
124 |   console.log(`Web interface available at http://localhost:${port}/`);
125 | }); 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/utils/event-emitter.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Event emitter singleton for the WhatsApp service
 3 |  */
 4 | 
 5 | const EventEmitter = require('events');
 6 | 
 7 | // Create a singleton instance
 8 | const eventEmitter = new EventEmitter();
 9 | 
10 | // Increase max listeners to avoid warnings when many modules listen
11 | eventEmitter.setMaxListeners(20);
12 | 
13 | module.exports = eventEmitter; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/utils/logger.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Simple logger utility for WhatsApp service
 3 |  */
 4 | 
 5 | // Log levels
 6 | const LOG_LEVELS = {
 7 |   ERROR: 'ERROR',
 8 |   WARNING: 'WARNING',
 9 |   INFO: 'INFO',
10 |   DEBUG: 'DEBUG'
11 | };
12 | 
13 | // Current log level
14 | const currentLevel = process.env.LOG_LEVEL || LOG_LEVELS.INFO;
15 | 
16 | // Check if a level is enabled
17 | const isLevelEnabled = (level) => {
18 |   const levels = Object.values(LOG_LEVELS);
19 |   const currentIndex = levels.indexOf(currentLevel);
20 |   const levelIndex = levels.indexOf(level);
21 |   
22 |   return levelIndex <= currentIndex;
23 | };
24 | 
25 | /**
26 |  * Log a message at the specified level
27 |  * @param {string} level - Log level
28 |  * @param {string} message - Log message
29 |  * @param {object} data - Additional data to log
30 |  */
31 | const log = (level, message, data = {}) => {
32 |   if (!isLevelEnabled(level)) return;
33 |   
34 |   const timestamp = new Date().toISOString();
35 |   const logData = {
36 |     timestamp,
37 |     level,
38 |     message,
39 |     ...data
40 |   };
41 |   
42 |   console.log(JSON.stringify(logData));
43 | };
44 | 
45 | // Export logger methods
46 | module.exports = {
47 |   error: (message, data) => log(LOG_LEVELS.ERROR, message, data),
48 |   warn: (message, data) => log(LOG_LEVELS.WARNING, message, data),
49 |   info: (message, data) => log(LOG_LEVELS.INFO, message, data),
50 |   debug: (message, data) => log(LOG_LEVELS.DEBUG, message, data),
51 |   LOG_LEVELS
52 | }; 


--------------------------------------------------------------------------------
/services/whatsapp-service/src/utils/message-formatter.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Utility to format WhatsApp messages into a standardized format
  3 |  */
  4 | 
  5 | /**
  6 |  * Format a WhatsApp message object into a standardized format
  7 |  * @param {object} message - WhatsApp message object
  8 |  * @returns {object} Formatted message
  9 |  */
 10 | function formatMessage(message) {
 11 |   // Basic message info
 12 |   const formattedMessage = {
 13 |     id: message.id._serialized || message.id,
 14 |     timestamp: message.timestamp * 1000, // Convert to milliseconds
 15 |     from: message.from,
 16 |     fromMe: message.fromMe,
 17 |     chatId: message.chatId || message.from,
 18 |     type: message.type,
 19 |   };
 20 | 
 21 |   // Handle different message types
 22 |   switch (message.type) {
 23 |     case 'chat':
 24 |       formattedMessage.body = message.body;
 25 |       break;
 26 |       
 27 |     case 'image':
 28 |       formattedMessage.body = message.caption || '';
 29 |       formattedMessage.mimetype = message.mimetype;
 30 |       formattedMessage.hasMedia = true;
 31 |       break;
 32 |       
 33 |     case 'video':
 34 |       formattedMessage.body = message.caption || '';
 35 |       formattedMessage.mimetype = message.mimetype;
 36 |       formattedMessage.hasMedia = true;
 37 |       break;
 38 |       
 39 |     case 'audio':
 40 |       formattedMessage.body = '';
 41 |       formattedMessage.mimetype = message.mimetype;
 42 |       formattedMessage.hasMedia = true;
 43 |       break;
 44 |       
 45 |     case 'document':
 46 |       formattedMessage.body = message.caption || '';
 47 |       formattedMessage.filename = message.filename;
 48 |       formattedMessage.mimetype = message.mimetype;
 49 |       formattedMessage.hasMedia = true;
 50 |       break;
 51 |       
 52 |     case 'location':
 53 |       formattedMessage.body = message.body || '';
 54 |       formattedMessage.location = {
 55 |         latitude: message.location.latitude,
 56 |         longitude: message.location.longitude,
 57 |         description: message.location.description || ''
 58 |       };
 59 |       break;
 60 |       
 61 |     case 'contact':
 62 |       formattedMessage.body = '';
 63 |       formattedMessage.contacts = message.vCards.map(vcard => ({ vcard }));
 64 |       break;
 65 |       
 66 |     default:
 67 |       formattedMessage.body = message.body || '';
 68 |   }
 69 | 
 70 |   // Handle optional properties if they exist
 71 |   if (message.quotedMsg) {
 72 |     formattedMessage.quotedMessage = {
 73 |       id: message.quotedMsg.id._serialized || message.quotedMsg.id,
 74 |       body: message.quotedMsg.body || '',
 75 |       type: message.quotedMsg.type
 76 |     };
 77 |   }
 78 | 
 79 |   // Add metadata
 80 |   formattedMessage.metadata = {
 81 |     source: 'whatsapp',
 82 |     raw: { messageType: message.type }
 83 |   };
 84 | 
 85 |   return formattedMessage;
 86 | }
 87 | 
 88 | /**
 89 |  * Format chat data into a standardized format
 90 |  * @param {object} chat - WhatsApp chat object
 91 |  * @returns {object} Formatted chat
 92 |  */
 93 | function formatChat(chat) {
 94 |   return {
 95 |     id: chat.id._serialized || chat.id,
 96 |     name: chat.name || '',
 97 |     isGroup: chat.isGroup,
 98 |     timestamp: chat.timestamp * 1000, // Convert to milliseconds
 99 |     unreadCount: chat.unreadCount,
100 |     metadata: {
101 |       source: 'whatsapp'
102 |     }
103 |   };
104 | }
105 | 
106 | /**
107 |  * Format contact data into a standardized format
108 |  * @param {object} contact - WhatsApp contact object
109 |  * @returns {object} Formatted contact
110 |  */
111 | function formatContact(contact) {
112 |   return {
113 |     id: contact.id._serialized || contact.id,
114 |     name: contact.name || contact.pushname || '',
115 |     number: contact.number,
116 |     metadata: {
117 |       source: 'whatsapp',
118 |       isMyContact: contact.isMyContact
119 |     }
120 |   };
121 | }
122 | 
123 | module.exports = {
124 |   formatMessage,
125 |   formatChat,
126 |   formatContact
127 | }; 


--------------------------------------------------------------------------------
/setup.bat:
--------------------------------------------------------------------------------
  1 | @echo off
  2 | setlocal enabledelayedexpansion
  3 | 
  4 | :: Colors for output
  5 | set GREEN=[92m
  6 | set YELLOW=[93m
  7 | set RED=[91m
  8 | set NC=[0m
  9 | 
 10 | :: Default virtual environment directory name
 11 | set VENV_DIR=venv
 12 | 
 13 | :: Check if the script is being run from the project root directory
 14 | if not exist "requirements.txt" (
 15 |     echo %RED%Error: requirements.txt not found.%NC%
 16 |     echo %YELLOW%Please run this script from the project root directory.%NC%
 17 |     exit /b 1
 18 | )
 19 | 
 20 | :: Function to check if running in an active virtual environment
 21 | call :check_venv
 22 | if %ERRORLEVEL% neq 0 (
 23 |     call :setup_venv
 24 | )
 25 | 
 26 | :: Install dependencies
 27 | call :install_dependencies
 28 | 
 29 | :: Verify all dependencies are installed
 30 | call :verify_dependencies
 31 | 
 32 | echo %GREEN%Setup completed successfully!%NC%
 33 | echo %YELLOW%You can now run the application.%NC%
 34 | 
 35 | :: Print next steps
 36 | echo.
 37 | echo %YELLOW%Next Steps:%NC%
 38 | echo 1. To activate the virtual environment in a new terminal:
 39 | echo    %GREEN%%VENV_DIR%\Scripts\activate%NC%
 40 | echo 2. Create your environment file:
 41 | echo    %GREEN%copy .env.example .env%NC%
 42 | echo 3. Edit the .env file with your API keys:
 43 | echo    %GREEN%notepad .env%NC%
 44 | echo    Or use one of these commands:
 45 | echo    %GREEN%code .env%NC%     # For Visual Studio Code
 46 | echo    %GREEN%start notepad .env%NC%  # For Notepad
 47 | echo    %GREEN%start wordpad .env%NC%  # For WordPad
 48 | echo 4. To run the Telegram Application:
 49 | echo    %GREEN%python3 main.py%NC%
 50 | echo.
 51 | echo %YELLOW%Note: Make sure you have configured your Telegram API credentials in the config file before running the application.%NC%
 52 | 
 53 | goto :eof
 54 | 
 55 | :check_venv
 56 | :: Check if running in an active virtual environment
 57 | if "%VIRTUAL_ENV%"=="" (
 58 |     echo %YELLOW%No active virtual environment detected.%NC%
 59 |     exit /b 1
 60 | ) else (
 61 |     echo %GREEN%Active virtual environment detected: %VIRTUAL_ENV%%NC%
 62 |     exit /b 0
 63 | )
 64 | goto :eof
 65 | 
 66 | :setup_venv
 67 | :: Create and activate virtual environment if it doesn't exist
 68 | if not exist "%VENV_DIR%\" (
 69 |     echo %YELLOW%Creating virtual environment in %VENV_DIR%...%NC%
 70 |     python3 -m venv %VENV_DIR%
 71 |     if %ERRORLEVEL% neq 0 (
 72 |         echo %RED%Failed to create virtual environment.%NC%
 73 |         echo %YELLOW%Please ensure Python 3 and venv are installed.%NC%
 74 |         exit /b 1
 75 |     )
 76 | ) else (
 77 |     echo %GREEN%Virtual environment already exists in %VENV_DIR%.%NC%
 78 | )
 79 | 
 80 | echo %YELLOW%Activating virtual environment...%NC%
 81 | call %VENV_DIR%\Scripts\activate
 82 | if %ERRORLEVEL% neq 0 (
 83 |     echo %RED%Failed to activate virtual environment.%NC%
 84 |     exit /b 1
 85 | )
 86 | echo %GREEN%Virtual environment activated!%NC%
 87 | goto :eof
 88 | 
 89 | :install_dependencies
 90 | :: Install dependencies from requirements.txt
 91 | echo %YELLOW%Installing dependencies from requirements.txt...%NC%
 92 | python3 -m pip install -q -r requirements.txt
 93 | if %ERRORLEVEL% neq 0 (
 94 |     echo %RED%Failed to install dependencies.%NC%
 95 |     exit /b 1
 96 | )
 97 | echo %GREEN%Dependencies installed successfully!%NC%
 98 | goto :eof
 99 | 
100 | :verify_dependencies
101 | :: Verify all dependencies are installed
102 | echo %YELLOW%Verifying installed dependencies...%NC%
103 | 
104 | set has_missing=0
105 | set has_mismatch=0
106 | set missing_packages=
107 | 
108 | :: Read requirements.txt and check each package
109 | for /f "tokens=*" %%a in (requirements.txt) do (
110 |     set line=%%a
111 |     
112 |     :: Skip comments and empty lines
113 |     echo !line! | findstr /r "^#" > nul
114 |     if !ERRORLEVEL! neq 0 (
115 |         if not "!line!"=="" (
116 |             :: Extract package name and version
117 |             for /f "tokens=1,2 delims=>=" %%b in ("!line!") do (
118 |                 set package=%%b
119 |                 set package=!package: =!
120 |                 set version=%%c
121 |                 
122 |                 :: Check if package is installed
123 |                 python3 -m pip show !package! > nul 2>&1
124 |                 if !ERRORLEVEL! neq 0 (
125 |                     set has_missing=1
126 |                     set missing_packages=!missing_packages!  - !line!
127 |                 ) else if not "!version!"=="" (
128 |                     :: Version check is simplified in batch - just report the version
129 |                     for /f "tokens=2" %%i in ('python3 -m pip show !package! ^| findstr "Version"') do (
130 |                         set installed_version=%%i
131 |                         echo %YELLOW%Package !package! installed version: !installed_version!, required: !version!%NC%
132 |                     )
133 |                 )
134 |             )
135 |         )
136 |     )
137 | )
138 | 
139 | :: Report issues if any
140 | if !has_missing! neq 0 (
141 |     echo %RED%Some dependencies are missing.%NC%
142 |     
143 |     if not "!missing_packages!"=="" (
144 |         echo %YELLOW%Missing packages:%NC%
145 |         echo !missing_packages!
146 |     )
147 |     
148 |     echo %YELLOW%Please run the following command to install packages:%NC%
149 |     echo python3 -m pip install -r requirements.txt
150 |     exit /b 1
151 | )
152 | 
153 | echo %GREEN%All dependencies verified successfully!%NC%
154 | goto :eof 
155 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="ici-core",
 5 |     version="0.1.0",
 6 |     description="Intelligent Consciousness Interface - Core Framework",
 7 |     author="ICI Team",
 8 |     packages=find_packages(),
 9 |     python_requires=">=3.8",
10 |     install_requires=[
11 |         # Core dependencies - keep minimal
12 |         "pyyaml>=6.0",        # For configuration files
13 |         "sentence-transformers>=3.4.1",  # For text embeddings
14 |         "torch>=2.6.0",  # Required for sentence-transformers
15 |         "faiss-cpu>=1.7.0",  # For vector similarity search
16 |         "chromadb>=0.6.3",  # For ChromaDB vector database
17 |         "numpy>=2.2.2",     # Required for vector operations
18 |         "telethon>=1.39.0",  # For Telegram API access
19 |         "logtail-python>=0.3.3",
20 |         "openai>=1.68.0",
21 |         "langchain>=0.3.21",  # Core LangChain functionality
22 |         "langchain-openai>=0.1.0",  # For OpenAI integration
23 |         "langchain-community>=0.3.20",  # For additional model providers (including Ollama)
24 |         "langchain-anthropic>=0.3.10",  # For Claude models
25 |         "langchain-ollama>=0.3.0",
26 |         "python-dotenv>=1.0.1"
27 |     ],
28 |     extras_require={
29 |         "dev": [
30 |             "pytest>=7.0.0",   # For testing
31 |             "pytest-cov>=4.0.0", # For test coverage
32 |             "black>=23.0.0",   # For code formatting
33 |         ]
34 |     },
35 | )
36 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/__init__.py


--------------------------------------------------------------------------------
/tests/adapters/prompt_builders/test_basic_prompt_builder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tests for BasicPromptBuilder.
  3 | 
  4 | This module contains tests for the BasicPromptBuilder implementation.
  5 | """
  6 | 
  7 | import pytest
  8 | from typing import Dict, Any
  9 | 
 10 | from ici.adapters.prompt_builders.basic_prompt_builder import BasicPromptBuilder
 11 | from ici.core.exceptions import PromptBuilderError
 12 | 
 13 | 
 14 | @pytest.fixture
 15 | async def prompt_builder():
 16 |     """Create and initialize a BasicPromptBuilder for testing."""
 17 |     builder = BasicPromptBuilder()
 18 |     await builder.initialize()
 19 |     return builder
 20 | 
 21 | 
 22 | @pytest.mark.asyncio
 23 | async def test_build_prompt_with_documents(prompt_builder):
 24 |     """Test building a prompt with documents."""
 25 |     # Setup
 26 |     input_text = "What is the capital of France?"
 27 |     documents = [
 28 |         {"text": "Paris is the capital of France."},
 29 |         {"text": "France is a country in Western Europe."}
 30 |     ]
 31 |     
 32 |     # Build prompt
 33 |     prompt = await prompt_builder.build_prompt(input_text, documents)
 34 |     
 35 |     # Verify
 36 |     assert "Paris is the capital of France" in prompt
 37 |     assert "France is a country in Western Europe" in prompt
 38 |     assert "What is the capital of France?" in prompt
 39 | 
 40 | 
 41 | @pytest.mark.asyncio
 42 | async def test_build_prompt_no_documents(prompt_builder):
 43 |     """Test building a prompt with no documents."""
 44 |     # Setup
 45 |     input_text = "What is the capital of France?"
 46 |     
 47 |     # Build prompt with empty documents list
 48 |     prompt = await prompt_builder.build_prompt(input_text, [])
 49 |     
 50 |     # Verify fallback template is used
 51 |     assert "general knowledge" in prompt
 52 |     assert "What is the capital of France?" in prompt
 53 | 
 54 | 
 55 | @pytest.mark.asyncio
 56 | async def test_build_prompt_with_max_length(prompt_builder):
 57 |     """Test building a prompt with max_context_length."""
 58 |     # Setup
 59 |     input_text = "What is AI?"
 60 |     documents = [{"text": "Artificial Intelligence (AI) is a broad field of computer science..." * 20}]
 61 |     max_length = 100
 62 |     
 63 |     # Build prompt with length restriction
 64 |     prompt = await prompt_builder.build_prompt(input_text, documents, max_length)
 65 |     
 66 |     # Count context length (excluding template parts and question)
 67 |     template = prompt_builder._template
 68 |     question_part = template.split("{context}")[1].format(question=input_text)
 69 |     context_part = prompt[:prompt.index(question_part)]
 70 |     
 71 |     # Verify context is truncated
 72 |     assert len(context_part) <= max_length + len(template.split("{context}")[0])
 73 | 
 74 | 
 75 | @pytest.mark.asyncio
 76 | async def test_set_template(prompt_builder):
 77 |     """Test setting a custom template."""
 78 |     # Setup
 79 |     custom_template = "Custom {context}\n\nQuery: {question}"
 80 |     
 81 |     # Set custom template
 82 |     await prompt_builder.set_template(custom_template)
 83 |     
 84 |     # Build prompt
 85 |     input_text = "test question"
 86 |     documents = [{"text": "test document"}]
 87 |     prompt = await prompt_builder.build_prompt(input_text, documents)
 88 |     
 89 |     # Verify
 90 |     assert prompt.startswith("Custom test document")
 91 |     assert "Query: test question" in prompt
 92 | 
 93 | 
 94 | @pytest.mark.asyncio 
 95 | async def test_set_fallback_template(prompt_builder):
 96 |     """Test setting a custom fallback template."""
 97 |     # Setup
 98 |     custom_fallback = "No information available. Please answer: {question}"
 99 |     
100 |     # Set custom fallback template
101 |     prompt_builder.set_fallback_template(custom_fallback)
102 |     
103 |     # Build prompt with no documents
104 |     input_text = "test question"
105 |     prompt = await prompt_builder.build_prompt(input_text, [])
106 |     
107 |     # Verify
108 |     assert prompt == "No information available. Please answer: test question"
109 | 
110 | 
111 | @pytest.mark.asyncio
112 | async def test_invalid_template(prompt_builder):
113 |     """Test setting an invalid template."""
114 |     # Setup - missing {question} placeholder
115 |     invalid_template = "Context: {context}"
116 |     
117 |     # Attempt to set invalid template
118 |     with pytest.raises(PromptBuilderError):
119 |         await prompt_builder.set_template(invalid_template)
120 | 
121 | 
122 | @pytest.mark.asyncio
123 | async def test_healthcheck(prompt_builder):
124 |     """Test the healthcheck method."""
125 |     # Run healthcheck
126 |     health_result = await prompt_builder.healthcheck()
127 |     
128 |     # Verify
129 |     assert isinstance(health_result, dict)
130 |     assert "healthy" in health_result
131 |     assert health_result["healthy"] is True
132 |     assert "message" in health_result
133 |     assert "details" in health_result 


--------------------------------------------------------------------------------
/tests/adapters/validators/test_rule_based.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for RuleBasedValidator.
 3 | 
 4 | This module contains tests for the RuleBasedValidator implementation.
 5 | """
 6 | 
 7 | import pytest
 8 | from typing import Dict, Any
 9 | 
10 | from ici.adapters.validators.rule_based import RuleBasedValidator
11 | from ici.core.exceptions import ValidationError
12 | 
13 | 
14 | @pytest.fixture
15 | async def validator():
16 |     """Create and initialize a RuleBasedValidator for testing."""
17 |     validator = RuleBasedValidator()
18 |     await validator.initialize()
19 |     return validator
20 | 
21 | 
22 | @pytest.mark.asyncio
23 | async def test_validate_command_line_source(validator):
24 |     """Test validation with COMMAND_LINE source."""
25 |     # Setup
26 |     rules = []  # Not used in current implementation
27 |     failure_reasons = []
28 |     
29 |     # Valid source (COMMAND_LINE)
30 |     command_line_context = {"source": "COMMAND_LINE"}
31 |     result = await validator.validate("test input", command_line_context, rules, failure_reasons)
32 |     assert result is True
33 |     assert len(failure_reasons) == 0
34 |     
35 |     # Invalid source (not COMMAND_LINE)
36 |     failure_reasons.clear()
37 |     web_context = {"source": "WEB"}
38 |     result = await validator.validate("test input", web_context, rules, failure_reasons)
39 |     assert result is False
40 |     assert len(failure_reasons) == 1
41 |     assert "not from COMMAND_LINE" in failure_reasons[0]
42 |     
43 |     # Missing source
44 |     failure_reasons.clear()
45 |     empty_context = {}
46 |     result = await validator.validate("test input", empty_context, rules, failure_reasons)
47 |     assert result is False
48 |     assert len(failure_reasons) == 1
49 | 
50 | 
51 | @pytest.mark.asyncio
52 | async def test_healthcheck(validator):
53 |     """Test the healthcheck method."""
54 |     # Run healthcheck
55 |     health_result = await validator.healthcheck()
56 |     
57 |     # Verify response structure
58 |     assert isinstance(health_result, dict)
59 |     assert "healthy" in health_result
60 |     assert "message" in health_result
61 |     assert "details" in health_result
62 |     
63 |     # Should be healthy
64 |     assert health_result["healthy"] is True 


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/adapters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/unit/adapters/__init__.py


--------------------------------------------------------------------------------
/tests/unit/adapters/test_structured_logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Unit tests for the StructuredLogger implementation.
  3 | """
  4 | 
  5 | import json
  6 | import os
  7 | import tempfile
  8 | from typing import Dict, Any
  9 | 
 10 | import pytest
 11 | 
 12 | from ici.adapters.loggers import StructuredLogger
 13 | 
 14 | 
 15 | class TestStructuredLogger:
 16 |     """Test cases for the StructuredLogger."""
 17 | 
 18 |     def test_initialization(self):
 19 |         """Test logger initialization."""
 20 |         logger = StructuredLogger(name="test_logger")
 21 |         assert logger.name == "test_logger"
 22 |         assert logger.logger.level == 20  # INFO level
 23 | 
 24 |     def test_log_levels(self):
 25 |         """Test all log levels work correctly."""
 26 |         logger = StructuredLogger(name="test_logger")
 27 | 
 28 |         # These should not raise exceptions
 29 |         logger.debug({"action": "TEST", "message": "Debug message"})
 30 |         logger.info({"action": "TEST", "message": "Info message"})
 31 |         logger.warning({"action": "TEST", "message": "Warning message"})
 32 |         logger.error({"action": "TEST", "message": "Error message"})
 33 |         logger.critical({"action": "TEST", "message": "Critical message"})
 34 | 
 35 |     def test_log_to_file(self):
 36 |         """Test logging to a file."""
 37 |         # Create a temporary file
 38 |         with tempfile.NamedTemporaryFile(delete=False) as tmp:
 39 |             tmp_path = tmp.name
 40 | 
 41 |         try:
 42 |             # Create logger with file output
 43 |             logger = StructuredLogger(
 44 |                 name="test_file_logger",
 45 |                 level="INFO",
 46 |                 log_file=tmp_path,
 47 |                 console_output=False,
 48 |             )
 49 | 
 50 |             # Log a message
 51 |             test_message = "File logging test"
 52 |             logger.info({"action": "FILE_TEST", "message": test_message})
 53 | 
 54 |             # Verify file contains the log
 55 |             with open(tmp_path, "r") as f:
 56 |                 content = f.read()
 57 |                 log_data = json.loads(content)
 58 |                 assert log_data["action"] == "FILE_TEST"
 59 |                 assert log_data["message"] == test_message
 60 |                 assert log_data["logger"] == "test_file_logger"
 61 | 
 62 |         finally:
 63 |             # Clean up
 64 |             if os.path.exists(tmp_path):
 65 |                 os.unlink(tmp_path)
 66 | 
 67 |     def test_structured_data(self):
 68 |         """Test logging with structured data."""
 69 |         # Create a temporary file to capture log output
 70 |         with tempfile.NamedTemporaryFile(delete=False) as tmp:
 71 |             tmp_path = tmp.name
 72 | 
 73 |         try:
 74 |             # Create logger
 75 |             logger = StructuredLogger(
 76 |                 name="test_data_logger", log_file=tmp_path, console_output=False
 77 |             )
 78 | 
 79 |             # Log with structured data
 80 |             test_data = {
 81 |                 "user_id": 123,
 82 |                 "items": ["apple", "banana"],
 83 |                 "metadata": {"source": "test"},
 84 |             }
 85 | 
 86 |             logger.info(
 87 |                 {
 88 |                     "action": "DATA_TEST",
 89 |                     "message": "Testing structured data",
 90 |                     "data": test_data,
 91 |                 }
 92 |             )
 93 | 
 94 |             # Verify the structured data was logged correctly
 95 |             with open(tmp_path, "r") as f:
 96 |                 content = f.read()
 97 |                 log_data = json.loads(content)
 98 |                 assert log_data["action"] == "DATA_TEST"
 99 |                 assert log_data["data"] == test_data
100 | 
101 |         finally:
102 |             # Clean up
103 |             if os.path.exists(tmp_path):
104 |                 os.unlink(tmp_path)
105 | 
106 |     def test_exception_logging(self):
107 |         """Test logging exceptions."""
108 |         # Create a temporary file to capture log output
109 |         with tempfile.NamedTemporaryFile(delete=False) as tmp:
110 |             tmp_path = tmp.name
111 | 
112 |         try:
113 |             # Create logger
114 |             logger = StructuredLogger(
115 |                 name="test_exception_logger", log_file=tmp_path, console_output=False
116 |             )
117 | 
118 |             # Create and log an exception
119 |             try:
120 |                 raise ValueError("Test exception")
121 |             except ValueError as e:
122 |                 logger.error(
123 |                     {
124 |                         "action": "EXCEPTION_TEST",
125 |                         "message": "Testing exception logging",
126 |                         "exception": e,
127 |                     }
128 |                 )
129 | 
130 |             # Verify the exception was logged correctly
131 |             with open(tmp_path, "r") as f:
132 |                 content = f.read()
133 |                 log_data = json.loads(content)
134 |                 assert log_data["action"] == "EXCEPTION_TEST"
135 |                 assert "exception" in log_data
136 |                 assert log_data["exception"]["type"] == "ValueError"
137 |                 assert log_data["exception"]["message"] == "Test exception"
138 |                 assert isinstance(log_data["exception"]["traceback"], list)
139 | 
140 |         finally:
141 |             # Clean up
142 |             if os.path.exists(tmp_path):
143 |                 os.unlink(tmp_path)
144 | 


--------------------------------------------------------------------------------
/tests/unit/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sidetrip-ai/ici-core/9f7fc8af18271522d08d47091b8143be42892122/tests/unit/core/__init__.py


--------------------------------------------------------------------------------