24 | 2D Document Chunk Visualization 25 |
26 |
27 | An interactive PCA plot of vectorized document chunks. Each point represents a piece of text, clustered by semantic similarity. Powered by SafeStore.
28 |
├── LICENSE ├── safe_store ├── config.py ├── core │ ├── __init__.py │ ├── models.py │ └── exceptions.py ├── utils │ ├── __init__.py │ ├── concurrency.py │ └── json_parsing.py ├── depricated.py ├── indexing │ └── __init__.py ├── search │ ├── __init__.py │ └── similarity.py ├── security │ ├── __init__.py │ └── encryption.py ├── vectorization │ ├── __init__.py │ ├── methods │ │ ├── __init__.py │ │ ├── sentense_transformer │ │ │ ├── description.yaml │ │ │ └── __init__.py │ │ ├── ollama │ │ │ ├── description.yaml │ │ │ └── __init__.py │ │ ├── lollms │ │ │ ├── description.yaml │ │ │ └── __init__.py │ │ ├── openai │ │ │ └── description.yaml │ │ ├── tf_idf │ │ │ ├── description.yaml │ │ │ └── __init__.py │ │ └── cohere │ │ │ ├── description.yaml │ │ │ └── __init__.py │ ├── base.py │ ├── utils.py │ └── manager.py ├── graph │ ├── __init__.py │ └── prompts │ │ ├── entity_fusion_prompt.md │ │ ├── query_parsing_prompt.md │ │ ├── graph_extraction_prompt.md │ │ └── graph_extraction_prompt_with_ontology.md ├── __init__.py └── processing │ ├── tokenizers.py │ └── text_cleaning.py ├── examples ├── SafeStoreGraph │ ├── .gitignore │ ├── icon.png │ ├── requirements.txt │ └── description.yaml ├── requirements.txt ├── basic_usage_text.py ├── encryption_usage.py ├── dynamic_model_selection.py ├── custom_logging.py ├── metadata_generation.py ├── basic_usage.py └── graph_usage.py ├── tests ├── security │ ├── __init__.py │ └── test_encryption.py ├── fixtures │ ├── sample.docx │ ├── sample.pdf │ └── sample.html ├── test_chunking.py ├── test_store_phase4.py ├── conftest.py └── test_store_phase3.py ├── docs ├── requirements.txt ├── api.rst ├── index.rst ├── conf.py ├── installation.rst ├── logging.rst ├── quickstart.rst └── encryption.rst ├── temp_docs_point_cloud ├── animals.txt ├── tech.txt └── space.txt ├── point_cloud_web_app ├── data.json └── index.html ├── pyproject.toml ├── .gitignore └── scripts └── migration_v1_v2.py /LICENSE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/config.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/core/models.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/depricated.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /safe_store/indexing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/security/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/utils/concurrency.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/vectorization/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /safe_store/vectorization/methods/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/SafeStoreGraph/.gitignore: -------------------------------------------------------------------------------- 1 | projects 2 | config.json -------------------------------------------------------------------------------- /tests/security/__init__.py: -------------------------------------------------------------------------------- 1 | # tests/security/__init__.py 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx>=5.0 2 | sphinx-rtd-theme>=1.0 3 | # Add other Sphinx extensions if needed 4 | -------------------------------------------------------------------------------- /tests/fixtures/sample.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParisNeo/safe_store/HEAD/tests/fixtures/sample.docx -------------------------------------------------------------------------------- /tests/fixtures/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParisNeo/safe_store/HEAD/tests/fixtures/sample.pdf -------------------------------------------------------------------------------- /examples/SafeStoreGraph/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ParisNeo/safe_store/HEAD/examples/SafeStoreGraph/icon.png -------------------------------------------------------------------------------- /temp_docs_point_cloud/animals.txt: -------------------------------------------------------------------------------- 1 | The quick brown fox jumps over the lazy dog. A fast red fox is athletic. The sleepy dog rests. -------------------------------------------------------------------------------- /tests/fixtures/sample.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |This is HTML content.
5 | 6 | 7 | -------------------------------------------------------------------------------- /safe_store/graph/__init__.py: -------------------------------------------------------------------------------- 1 | # safe_store/graph/__init__.py 2 | from .graph_store import GraphStore 3 | 4 | __all__ = [ 5 | "GraphStore", 6 | ] -------------------------------------------------------------------------------- /temp_docs_point_cloud/tech.txt: -------------------------------------------------------------------------------- 1 | Python is a versatile programming language. Many developers use Python for AI. RAG pipelines are a common use case. -------------------------------------------------------------------------------- /examples/SafeStoreGraph/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn[standard] 3 | python-multipart 4 | lollms_client>=1.3.0 5 | safe_store>=2.7.0 6 | pipmaster -------------------------------------------------------------------------------- /temp_docs_point_cloud/space.txt: -------------------------------------------------------------------------------- 1 | The sun is a star at the center of our solar system. The Earth revolves around the sun. Space exploration is fascinating. -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | safe-store 2 | extract-msg 3 | beautifulsoup4 4 | sentence-transformers 5 | scikit-learn 6 | pandas 7 | plotly 8 | lollms_client 9 | -------------------------------------------------------------------------------- /examples/SafeStoreGraph/description.yaml: -------------------------------------------------------------------------------- 1 | author: ParisNeo & Lollms 2 | category: Data 3 | creation_date: '2025-08-18T10:05:00.000000' 4 | description: This application allows the user to upload files and convert them to a Graph with an optional ontology. The user can then query the graph using natural language questions. The application uses SafeStore for graph storage and retrieval, and Gemini Pro as the LLM for processing and answering questions. 5 | disclaimer: null 6 | last_update_date: '2025-08-18T10:05:00.000000' 7 | model: Gemini Pro 8 | name: SafeStoreGraph 9 | version: 1.0 10 | -------------------------------------------------------------------------------- /safe_store/vectorization/methods/sentense_transformer/description.yaml: -------------------------------------------------------------------------------- 1 | title: Sentence Transformers Vectorizer 2 | author: ParisNeo 3 | creation_date: 2025-10-10 4 | last_update_date: 2025-10-10 5 | class_name: STVectorizer 6 | description: > 7 | A local vectorizer that uses models from the sentence-transformers library. 8 | Models are downloaded and run directly on your machine. 9 | input_parameters: 10 | - name: model 11 | type: str 12 | description: "The name of the Sentence Transformer model to use from the Hugging Face Hub." 13 | mandatory: true 14 | default: "all-MiniLM-L6-v2" -------------------------------------------------------------------------------- /safe_store/vectorization/methods/ollama/description.yaml: -------------------------------------------------------------------------------- 1 | title: Ollama Vectorizer 2 | author: ParisNeo 3 | creation_date: 2025-10-10 4 | last_update_date: 2025-10-10 5 | class_name: OllamaVectorizer 6 | description: > 7 | A vectorizer that uses a local Ollama instance to generate text embeddings. 8 | Requires a running Ollama server. 9 | input_parameters: 10 | - name: model 11 | type: str 12 | description: "The name of the embedding model to use from your Ollama server (e.g., 'nomic-embed-text')." 13 | mandatory: true 14 | default: "" 15 | - name: host 16 | type: str 17 | description: "The URL of the Ollama server. If not provided, it defaults to http://localhost:11434 or the OLLAMA_HOST environment variable." 18 | mandatory: false 19 | default: "" -------------------------------------------------------------------------------- /safe_store/vectorization/methods/lollms/description.yaml: -------------------------------------------------------------------------------- 1 | title: Lollms Vectorizer 2 | author: ParisNeo 3 | creation_date: 2025-10-10 4 | last_update_date: 2025-10-10 5 | class_name: LollmsVectorizer 6 | description: > 7 | A vectorizer that connects to any OpenAI-compatible API, such as a local Lollms 8 | instance, for generating embeddings. 9 | input_parameters: 10 | - name: model 11 | type: str 12 | description: "The name of the embedding model served by the Lollms instance." 13 | mandatory: true 14 | default: "nomic-embed-text" 15 | - name: base_url 16 | type: str 17 | description: "The base URL of the OpenAI-compatible API endpoint." 18 | mandatory: true 19 | default: "http://localhost:9600" 20 | - name: api_key 21 | type: str 22 | description: "The API key for the service. Often not required for local instances." 23 | mandatory: false 24 | default: "not_needed" -------------------------------------------------------------------------------- /safe_store/vectorization/methods/openai/description.yaml: -------------------------------------------------------------------------------- 1 | title: OpenAI Vectorizer 2 | author: ParisNeo 3 | creation_date: 2025-10-10 4 | last_update_date: 2025-10-10 5 | class_name: OpenAIVectorizer 6 | description: > 7 | A vectorizer that uses OpenAI's API to generate text embeddings. 8 | Requires an OpenAI API key. 9 | input_parameters: 10 | - name: model 11 | type: str 12 | description: "The name of the OpenAI embedding model to use." 13 | mandatory: true 14 | default: "text-embedding-3-small" 15 | - name: api_key 16 | type: str 17 | description: "Your OpenAI API key. If not provided, the OPENAI_API_KEY environment variable will be used." 18 | mandatory: false 19 | default: "" 20 | - name: base_url 21 | type: str 22 | description: "Optional custom base URL for the OpenAI API, for use with proxies or other compatible services." 23 | mandatory: false 24 | default: "" -------------------------------------------------------------------------------- /safe_store/vectorization/methods/tf_idf/description.yaml: -------------------------------------------------------------------------------- 1 | title: TF-IDF Vectorizer 2 | author: ParisNeo 3 | creation_date: 2025-10-10 4 | last_update_date: 2025-10-10 5 | class_name: TfidfVectorizerWrapper 6 | description: > 7 | A classic, local vectorizer based on Term Frequency-Inverse Document Frequency. 8 | This vectorizer must be 'fit' on your data, so its performance is data-dependent. 9 | It does not capture semantic meaning like deep learning models. 10 | input_parameters: 11 | - name: name 12 | type: str 13 | description: "A unique name to identify this specific fitted TF-IDF model within the database." 14 | mandatory: true 15 | default: "default_tfidf" 16 | - name: params 17 | type: dict 18 | description: "Optional dictionary of parameters to pass to the underlying scikit-learn TfidfVectorizer, such as 'ngram_range' or 'max_features'." 19 | mandatory: false 20 | default: {} -------------------------------------------------------------------------------- /safe_store/graph/prompts/entity_fusion_prompt.md: -------------------------------------------------------------------------------- 1 | # [NEW & COMPLETE] prompts/entity_fusion_prompt.md 2 | Your task is to determine if two entities of the same type are, in fact, the same entity based on their properties. 3 | 4 | **Entity Type:** {entity_label} 5 | 6 | --- 7 | 8 | **Entity A Properties:** 9 | ```json 10 | {node_a_properties} 11 | ``` 12 | 13 | --- 14 | 15 | **Entity B Properties:** 16 | ```json 17 | {node_b_properties} 18 | ``` 19 | 20 | --- 21 | 22 | **Analysis:** 23 | Carefully compare the properties of Entity A and Entity B. Do they refer to the same real-world entity? Consider variations in naming, partial information, or different levels of detail. 24 | 25 | **Output Format:** 26 | You MUST respond with only a single, well-formed JSON object in a markdown code block. The JSON object must have two keys: 27 | 1. `"is_same"`: A boolean (`true` or `false`). 28 | 2. `"reasoning"`: A brief, one-sentence explanation for your decision. 29 | 30 | **Example Response:** 31 | ```json 32 | {{ 33 | "is_same": true, 34 | "reasoning": "Both entities share the same unique identifier and have highly similar descriptive properties." 35 | }} 36 | ``` -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | === 2 | API 3 | === 4 | 5 | This section provides detailed documentation for the ``safe_store`` library's public API. 6 | 7 | Core Class 8 | ---------- 9 | 10 | .. automodule:: SafeStore.store 11 | :members: SafeStore, LogLevel 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | Exceptions 16 | ---------- 17 | 18 | .. automodule:: SafeStore.core.exceptions 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Vectorizers 24 | ----------- 25 | 26 | .. automodule:: SafeStore.vectorization.base 27 | :members: BaseVectorizer 28 | :undoc-members: 29 | 30 | .. automodule:: SafeStore.vectorization.methods.sentence_transformer 31 | :members: SentenceTransformerVectorizer 32 | :undoc-members: 33 | 34 | .. automodule:: SafeStore.vectorization.methods.tfidf 35 | :members: TfidfVectorizerWrapper 36 | :undoc-members: 37 | 38 | Utilities 39 | --------- 40 | While primarily used internally, the ``ascii_colors`` library is exposed for configuration. 41 | 42 | .. automodule:: ascii_colors 43 | :members: ASCIIColors, LogLevel, FileHandler, Formatter, JSONFormatter 44 | :undoc-members: 45 | 46 | (Add other modules/classes as needed) 47 | -------------------------------------------------------------------------------- /safe_store/vectorization/methods/cohere/description.yaml: -------------------------------------------------------------------------------- 1 | title: Cohere Vectorizer 2 | author: ParisNeo 3 | creation_date: 2025-10-10 4 | last_update_date: 2025-10-10 5 | class_name: CohereVectorizer 6 | description: > 7 | A vectorizer that uses Cohere's API to generate text embeddings. 8 | Requires a Cohere API key, which can be provided via the 'api_key' parameter 9 | or the COHERE_API_KEY environment variable. 10 | input_parameters: 11 | - name: model 12 | type: str 13 | description: "The name of the Cohere embedding model to use." 14 | mandatory: true 15 | default: "embed-english-v3.0" 16 | - name: api_key 17 | type: str 18 | description: "Your Cohere API key. If not provided, the COHERE_API_KEY environment variable will be used." 19 | mandatory: false 20 | default: "" 21 | - name: input_type 22 | type: str 23 | description: "The type of input being embedded, e.g., 'search_document' or 'search_query'." 24 | mandatory: false 25 | default: "search_document" 26 | - name: truncate 27 | type: str 28 | description: "The truncation strategy for inputs longer than the model's context window ('NONE', 'START', 'END')." 29 | mandatory: false 30 | default: "END" -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. safe_store documentation master file, created by 2 | sphinx-quickstart onHTML content can also be indexed.
", encoding='utf-8') 20 | 21 | print(f"Created sample files in: {doc_dir.resolve()}") 22 | 23 | # --- 2. Initialize safe_store --- 24 | # Use DEBUG level for more verbose output, adjust lock timeout if needed 25 | # Add encryption_key="your-secret-password" to enable encryption 26 | store = safe_store.SafeStore( 27 | "my_vector_store.db", 28 | log_level=safe_store.LogLevel.DEBUG, 29 | lock_timeout=10 # Wait up to 10s for write lock 30 | # encryption_key="your-secret-password" # Uncomment to enable 31 | ) 32 | 33 | # Best practice: Use safe_store as a context manager 34 | try: 35 | with store: 36 | # --- 3. Add Documents (acquires write lock) --- 37 | print("\n--- Indexing Documents ---") 38 | # Requires safe_store[sentence-transformers] 39 | store.add_document(doc1_path, vectorizer_name="st:all-MiniLM-L6-v2", chunk_size=50, chunk_overlap=10) 40 | 41 | # Requires safe_store[parsing] for HTML 42 | store.add_document(doc2_path, vectorizer_name="st:all-MiniLM-L6-v2") 43 | 44 | # Add TF-IDF vectors as well (requires safe_store[tfidf]) 45 | # This will fit TF-IDF on all documents 46 | print("\n--- Adding TF-IDF Vectorization ---") 47 | store.add_vectorization("tfidf:my_analysis") 48 | 49 | # --- 4. Query (read operation, concurrent with WAL) --- 50 | print("\n--- Querying using Sentence Transformer ---") 51 | query_st = "simple storage" 52 | results_st = store.query(query_st, vectorizer_name="st:all-MiniLM-L6-v2", top_k=2) 53 | for i, res in enumerate(results_st): 54 | print(f"ST Result {i+1}: Score={res['similarity']:.4f}, Path='{Path(res['file_path']).name}', Text='{res['chunk_text'][:60]}...'") 55 | 56 | print("\n--- Querying using TF-IDF ---") 57 | query_tfidf = "html index" 58 | results_tfidf = store.query(query_tfidf, vectorizer_name="tfidf:my_analysis", top_k=2) 59 | for i, res in enumerate(results_tfidf): 60 | print(f"TFIDF Result {i+1}: Score={res['similarity']:.4f}, Path='{Path(res['file_path']).name}', Text='{res['chunk_text'][:60]}...'") 61 | 62 | # --- 5. List Methods --- 63 | print("\n--- Listing Vectorization Methods ---") 64 | methods = store.list_vectorization_methods() 65 | for method in methods: 66 | print(f"- ID: {method['method_id']}, Name: {method['method_name']}, Type: {method['method_type']}, Dim: {method['vector_dim']}") 67 | 68 | except safe_store.ConfigurationError as e: 69 | print(f"\n[ERROR] Missing dependency: {e}") 70 | print("Please install the required extras (e.g., pip install safe_store[all])") 71 | except safe_store.ConcurrencyError as e: 72 | print(f"\n[ERROR] Lock timeout or concurrency issue: {e}") 73 | except Exception as e: 74 | print(f"\n[ERROR] An unexpected error occurred: {e}") 75 | finally: 76 | # Connection is closed automatically by the 'with' statement exit 77 | print("\n--- Store context closed ---") 78 | # Cleanup (optional) 79 | # import shutil 80 | # shutil.rmtree(doc_dir) 81 | # Path("my_vector_store.db").unlink(missing_ok=True) 82 | # Path("my_vector_store.db.lock").unlink(missing_ok=True) 83 | 84 | print("\nCheck 'my_vector_store.db' and console logs.") 85 | 86 | -------------------------------------------------------------------------------- /safe_store/vectorization/methods/sentense_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | # safe_store/vectorization/methods/st.py 2 | import numpy as np 3 | from typing import List, Optional, Dict, Any 4 | from safe_store.vectorization.base import BaseVectorizer 5 | from safe_store.core.exceptions import ConfigurationError, VectorizationError 6 | from safe_store.processing.tokenizers import HuggingFaceTokenizerWrapper 7 | from ascii_colors import ASCIIColors, trace_exception 8 | import pipmaster as pm 9 | 10 | class_name="STVectorizer" 11 | 12 | try: 13 | pm.ensure_packages(["torch","torchvision","sentence-transformers"]) 14 | from sentence_transformers import SentenceTransformer 15 | except Exception as e: 16 | trace_exception(e) 17 | SentenceTransformer = None 18 | 19 | 20 | def list_available_models(**kwargs) -> List[str]: 21 | """ 22 | Returns a curated list of popular and effective Sentence Transformer models. 23 | This list is static as querying the Hugging Face Hub dynamically is not practical. 24 | """ 25 | return [ 26 | "all-MiniLM-L6-v2", 27 | "all-mpnet-base-v2", 28 | "multi-qa-mpnet-base-dot-v1", 29 | "all-distilroberta-v1", 30 | "paraphrase-albert-small-v2", 31 | "LaBSE" 32 | ] 33 | 34 | class STVectorizer(BaseVectorizer): 35 | """Vectorizes text using models from the sentence-transformers library.""" 36 | 37 | DEFAULT_MODEL: str = "all-MiniLM-L6-v2" 38 | 39 | def __init__(self, model_config: Dict[str, Any], cache_folder: Optional[str] = None, **kwargs): 40 | super().__init__(vectorizer_name="st") 41 | 42 | if SentenceTransformer is None: 43 | raise ConfigurationError("STVectorizer requires 'sentence-transformers'. Install with: pip install safe_store[sentence-transformers]") 44 | 45 | self.model_name: str = model_config.get("model", self.DEFAULT_MODEL) 46 | if not self.model_name: 47 | raise ConfigurationError("STVectorizer config must include a 'model' key.") 48 | 49 | try: 50 | self.model: SentenceTransformer = SentenceTransformer(self.model_name, cache_folder=cache_folder) 51 | self._dim: int = self.model.get_sentence_embedding_dimension() 52 | self._dtype: np.dtype = np.dtype(np.float32) 53 | ASCIIColors.info(f"Model '{self.model_name}' loaded. Dimension: {self._dim}") 54 | except Exception as e: 55 | raise VectorizationError(f"Failed to load Sentence Transformer model '{self.model_name}': {e}") from e 56 | 57 | def get_tokenizer(self) -> Optional[HuggingFaceTokenizerWrapper]: 58 | """Returns the tokenizer from the loaded SentenceTransformer model, wrapped.""" 59 | if hasattr(self.model, 'tokenizer'): 60 | return HuggingFaceTokenizerWrapper(self.model.tokenizer) 61 | return None 62 | 63 | def vectorize(self, texts: List[str]) -> np.ndarray: 64 | if not texts: 65 | return np.empty((0, self.dim), dtype=self.dtype) 66 | try: 67 | embeddings = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=False) 68 | if not isinstance(embeddings, np.ndarray): 69 | raise VectorizationError("SentenceTransformer model did not return a NumPy array.") 70 | if embeddings.dtype != self._dtype: 71 | embeddings = embeddings.astype(self._dtype) 72 | return embeddings 73 | except Exception as e: 74 | raise VectorizationError(f"Error during sentence-transformer encoding: {e}") from e 75 | 76 | @property 77 | def dim(self) -> int: 78 | return self._dim 79 | 80 | @property 81 | def dtype(self) -> np.dtype: 82 | return self._dtype 83 | 84 | @staticmethod 85 | def list_models(**kwargs) -> List[str]: 86 | """ 87 | Returns a list of popular SentenceTransformer models. 88 | This is not an exhaustive list from an API but a curated selection. 89 | """ 90 | return [ 91 | "all-MiniLM-L6-v2", 92 | "all-mpnet-base-v2", 93 | "paraphrase-multilingual-MiniLM-L12-v2", 94 | "distiluse-base-multilingual-cased-v1", 95 | "all-roberta-large-v1" 96 | ] -------------------------------------------------------------------------------- /examples/dynamic_model_selection.py: -------------------------------------------------------------------------------- 1 | # examples/dynamic_model_selection.py 2 | """ 3 | This example demonstrates how to use the `list_available_models` class method 4 | to dynamically discover and select a model from a running Ollama instance, 5 | and then use it to create and query a SafeStore. 6 | """ 7 | import safe_store 8 | from pathlib import Path 9 | import shutil 10 | 11 | # --- Configuration --- 12 | DB_FILE = "dynamic_ollama_store.db" 13 | # This example assumes an Ollama server is running at the default host. 14 | # If your Ollama server is elsewhere, you can specify it: 15 | # OLLAMA_HOST = "http://192.168.1.10:11434" 16 | OLLAMA_HOST = "http://localhost:11434" 17 | 18 | def cleanup(): 19 | """Removes the database file from previous runs.""" 20 | Path(DB_FILE).unlink(missing_ok=True) 21 | Path(f"{DB_FILE}.lock").unlink(missing_ok=True) 22 | print(f"--- Cleaned up old database file: {DB_FILE} ---") 23 | 24 | if __name__ == "__main__": 25 | cleanup() 26 | 27 | # --- 1. Discover available Ollama models --- 28 | print(f"\n--- Step 1: Discovering models from Ollama at {OLLAMA_HOST} ---") 29 | try: 30 | # Use the class method to get a list of models from the Ollama server 31 | available_models = safe_store.SafeStore.list_available_models( 32 | vectorizer_name="ollama", 33 | host=OLLAMA_HOST # Pass the host to the method 34 | ) 35 | 36 | if not available_models: 37 | print("\n[ERROR] No models found on the Ollama server.") 38 | print("Please make sure Ollama is running and you have pulled at least one model, for example:") 39 | print(" ollama pull nomic-embed-text") 40 | exit() 41 | 42 | print("Found available models:") 43 | for model in available_models: 44 | print(f" - {model}") 45 | 46 | except safe_store.VectorizationError as e: 47 | print(f"\n[ERROR] Could not connect to the Ollama server: {e}") 48 | print("Please ensure your Ollama server is running and accessible.") 49 | exit() 50 | except Exception as e: 51 | print(f"\n[ERROR] An unexpected error occurred: {e}") 52 | exit() 53 | 54 | # --- 2. Select a model and configure the store --- 55 | print("\n--- Step 2: Selecting a model ---") 56 | # For this example, we'll just pick the first model from the list. 57 | # In a real application, you might let the user choose. 58 | selected_model = available_models[0] 59 | print(f"Selected model: {selected_model}") 60 | 61 | # Prepare the configuration for the SafeStore instance 62 | vectorizer_name = "ollama" 63 | vectorizer_config = { 64 | "model": selected_model, 65 | "host": OLLAMA_HOST 66 | } 67 | 68 | # --- 3. Initialize SafeStore with the selected model --- 69 | print("\n--- Step 3: Initializing SafeStore ---") 70 | store = safe_store.SafeStore( 71 | db_path=DB_FILE, 72 | vectorizer_name=vectorizer_name, 73 | vectorizer_config=vectorizer_config, 74 | log_level=safe_store.LogLevel.INFO 75 | ) 76 | print("SafeStore initialized successfully.") 77 | 78 | # --- 4. Use the store to add and query text --- 79 | print("\n--- Step 4: Adding text and querying ---") 80 | with store: 81 | # Add some sample text 82 | store.add_text( 83 | unique_id="tech-report-01", 84 | text="The new quantum processor shows a 200% performance increase in benchmark tests." 85 | ) 86 | store.add_text( 87 | unique_id="finance-summary-01", 88 | text="Quarterly earnings are up by 15%, driven by the new hardware division." 89 | ) 90 | print("Added two text entries to the store.") 91 | 92 | # Perform a query 93 | query_text = "What were the results of the processor benchmarks?" 94 | print(f"\nQuerying for: '{query_text}'") 95 | results = store.query(query_text, top_k=1) 96 | 97 | if results: 98 | result = results[0] 99 | print(f"Found a relevant chunk with {result['similarity_percent']:.2f}% similarity:") 100 | print(f" -> Text: '{result['chunk_text']}'") 101 | else: 102 | print("No relevant results found for the query.") 103 | 104 | print("\n--- Example Finished ---") -------------------------------------------------------------------------------- /examples/custom_logging.py: -------------------------------------------------------------------------------- 1 | # examples/custom_logging.py 2 | """ 3 | Demonstrates how to configure ascii_colors globally to customize 4 | safe_store's logging output (and any other ascii_colors usage). 5 | """ 6 | import safe_store 7 | from ascii_colors import ASCIIColors, LogLevel, FileHandler, Formatter 8 | from pathlib import Path 9 | import shutil 10 | 11 | # --- Configuration --- 12 | DB_FILE = "custom_log_store.db" 13 | LOG_FILE = "safe_store_custom.log" 14 | DOC_DIR = Path("temp_docs_custom_log") 15 | 16 | # --- Helper Functions --- 17 | def print_header(title): 18 | print("\n" + "="*10 + f" {title} " + "="*10) 19 | 20 | def cleanup(): 21 | print_header("Cleaning Up") 22 | db_path = Path(DB_FILE) 23 | log_path = Path(LOG_FILE) 24 | lock_path = Path(f"{DB_FILE}.lock") 25 | wal_path = Path(f"{DB_FILE}-wal") 26 | shm_path = Path(f"{DB_FILE}-shm") 27 | 28 | if DOC_DIR.exists(): shutil.rmtree(DOC_DIR) 29 | if db_path.exists(): db_path.unlink() 30 | if log_path.exists(): log_path.unlink() 31 | if lock_path.exists(): lock_path.unlink(missing_ok=True) 32 | if wal_path.exists(): wal_path.unlink(missing_ok=True) 33 | if shm_path.exists(): shm_path.unlink(missing_ok=True) 34 | print("- Cleanup complete.") 35 | 36 | # --- Main Script --- 37 | if __name__ == "__main__": 38 | cleanup() # Start fresh 39 | 40 | print_header("Configuring Global Logging") 41 | 42 | # 1. Set the global minimum log level (e.g., show DEBUG messages) 43 | ASCIIColors.set_log_level(LogLevel.DEBUG) 44 | print(f"- Global log level set to: {LogLevel.DEBUG.name}") 45 | 46 | # 2. Create a file handler to log messages to a file 47 | file_handler = FileHandler(LOG_FILE, encoding='utf-8') 48 | print(f"- Configured file logging to: {LOG_FILE}") 49 | 50 | # 3. Define a format for the file logger 51 | # Example format: Timestamp - Level Name - Message 52 | file_formatter = Formatter( 53 | "%(asctime)s [%(levelname)-8s] %(message)s", 54 | datefmt="%Y-%m-%d %H:%M:%S" 55 | ) 56 | file_handler.setFormatter(file_formatter) 57 | print(f"- Set custom format for file logger.") 58 | 59 | # 4. Add the configured file handler to ascii_colors 60 | ASCIIColors.add_handler(file_handler) 61 | print(f"- Added file handler globally.") 62 | 63 | # Optional: Remove the default console handler if you *only* want file logging 64 | # default_console_handler = ASCIIColors.get_default_handler() 65 | # if default_console_handler: 66 | # ASCIIColors.remove_handler(default_console_handler) 67 | # print("- Removed default console handler.") 68 | # else: 69 | # print("- Default console handler not found or already removed.") 70 | print("- Default console handler remains active (logs will go to console AND file).") 71 | 72 | 73 | # --- Initialize and use safe_store --- 74 | # It will now use the global logging configuration we just set. 75 | print_header("Initializing and Using safe_store") 76 | print("safe_store actions will now be logged according to the global settings.") 77 | print(f"Check the console output AND the '{LOG_FILE}' file.") 78 | 79 | try: 80 | store = safe_store.SafeStore(DB_FILE) # Uses global log level (DEBUG) 81 | 82 | # Prepare a sample document 83 | DOC_DIR.mkdir(exist_ok=True) 84 | doc_path = DOC_DIR / "logging_test.txt" 85 | doc_path.write_text("This is a test document for custom logging.", encoding='utf-8') 86 | 87 | with store: 88 | # Add the document - DEBUG messages should appear in the log file 89 | store.add_document(doc_path, vectorizer_name="st:all-MiniLM-L6-v2") 90 | 91 | # Perform a query 92 | results = store.query("custom logging test") 93 | print("\n--- Query Results ---") 94 | if results: 95 | print(f"Found {len(results)} result(s).") 96 | else: 97 | print("No results found.") 98 | 99 | except safe_store.ConfigurationError as e: 100 | print(f"\n[ERROR] Missing dependency: {e}") 101 | print("Please install required extras (e.g., pip install safe_store[sentence-transformers])") 102 | except Exception as e: 103 | print(f"\n[ERROR] An unexpected error occurred: {e.__class__.__name__}: {e}") 104 | finally: 105 | print("\n--- End of Script ---") 106 | print(f"Review console output and '{LOG_FILE}' for detailed logs.") 107 | 108 | -------------------------------------------------------------------------------- /safe_store/search/similarity.py: -------------------------------------------------------------------------------- 1 | # safe_store/search/similarity.py 2 | import numpy as np 3 | from ascii_colors import ASCIIColors 4 | from typing import Union 5 | 6 | # Type hint for vectors 7 | VectorInput = Union[np.ndarray, list[float]] # Allow lists as input for query? No, enforce ndarray. 8 | Vector1D = np.ndarray # Shape (D,) 9 | Matrix2D = np.ndarray # Shape (N, D) 10 | 11 | def cosine_similarity(query_vector: Vector1D, vectors: Matrix2D) -> np.ndarray: 12 | """ 13 | Calculates cosine similarity between a single query vector and a matrix of vectors. 14 | 15 | Handles normalization and potential zero vectors gracefully. 16 | 17 | Args: 18 | query_vector: A 1D NumPy array representing the query vector (shape D). 19 | vectors: A 2D NumPy array where each row is a vector to compare against 20 | (shape N, D). Can also handle the case where vectors is 1D 21 | (shape D) representing a single comparison vector, by reshaping it. 22 | 23 | Returns: 24 | A 1D NumPy array of shape (N,) containing the cosine similarity scores, 25 | where each score is between -1.0 and 1.0. 26 | 27 | Raises: 28 | TypeError: If inputs are not NumPy arrays. 29 | ValueError: If input shapes are incompatible (e.g., query is not 1D, 30 | matrix is not 1D or 2D, or dimensions mismatch). 31 | """ 32 | if not isinstance(query_vector, np.ndarray) or not isinstance(vectors, np.ndarray): 33 | raise TypeError("Input query_vector and vectors must be NumPy arrays.") 34 | 35 | # Validate query_vector shape 36 | if query_vector.ndim != 1: 37 | raise ValueError(f"Query vector must be 1D, but got shape {query_vector.shape}") 38 | 39 | # Validate and potentially reshape vectors matrix 40 | if vectors.ndim == 1: 41 | # Allow comparing query to a single vector passed as 1D array 42 | if query_vector.shape[0] == vectors.shape[0]: 43 | vectors = vectors.reshape(1, -1) # Reshape to (1, D) 44 | ASCIIColors.debug("Reshaped 1D input 'vectors' to 2D for single vector comparison.") 45 | else: 46 | raise ValueError( 47 | f"If 'vectors' is 1D, its dimension ({vectors.shape[0]}) must match " 48 | f"query_vector dimension ({query_vector.shape[0]})" 49 | ) 50 | elif vectors.ndim != 2: 51 | raise ValueError(f"Input 'vectors' must be a 1D or 2D array, but got shape {vectors.shape}") 52 | 53 | # Dimension compatibility check 54 | if query_vector.shape[0] != vectors.shape[1]: 55 | raise ValueError( 56 | f"Query vector dimension ({query_vector.shape[0]}) must match " 57 | f"the dimension of vectors in the matrix ({vectors.shape[1]})" 58 | ) 59 | 60 | num_vectors = vectors.shape[0] 61 | if num_vectors == 0: 62 | ASCIIColors.debug("Input 'vectors' matrix is empty, returning empty similarity array.") 63 | return np.array([], dtype=query_vector.dtype) # Return empty array of appropriate type 64 | 65 | ASCIIColors.debug(f"Calculating cosine similarity: query_shape={query_vector.shape}, matrix_shape={vectors.shape}") 66 | 67 | # Calculate norms, adding epsilon for numerical stability and avoiding zero division 68 | epsilon = np.finfo(query_vector.dtype).eps # Use machine epsilon for the data type 69 | query_norm = np.linalg.norm(query_vector) 70 | vectors_norm = np.linalg.norm(vectors, axis=1) # Norm of each row vector 71 | 72 | # Handle potential zero vectors by replacing norm with epsilon 73 | query_norm_safe = query_norm if query_norm > epsilon else epsilon 74 | vectors_norm_safe = np.where(vectors_norm > epsilon, vectors_norm, epsilon) 75 | 76 | # Normalize vectors 77 | # Using np.divide with 'out' and 'where' could be slightly more robust, but direct division is common 78 | norm_query = query_vector / query_norm_safe 79 | # Use broadcasting for matrix normalization: vectors_norm_safe[:, np.newaxis] ensures (N, 1) shape 80 | norm_vectors = vectors / vectors_norm_safe[:, np.newaxis] 81 | 82 | # Calculate dot product between normalized matrix rows and the normalized query vector 83 | # Result is (N, D) dot (D,) -> (N,) 84 | similarity_scores = np.dot(norm_vectors, norm_query) 85 | 86 | # Clip scores to be strictly within [-1, 1] due to potential floating point inaccuracies 87 | similarity_scores = np.clip(similarity_scores, -1.0, 1.0) 88 | 89 | ASCIIColors.debug(f"Similarity calculation complete. Output shape: {similarity_scores.shape}") 90 | return similarity_scores -------------------------------------------------------------------------------- /docs/encryption.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Encryption 3 | ========== 4 | 5 | ``safe_store`` provides optional encryption at rest for the text content of document chunks stored in the database. This helps protect sensitive information if the database file itself is exposed. 6 | 7 | How it Works 8 | ------------ 9 | 10 | * **Algorithm:** Uses Fernet symmetric authenticated cryptography from the `cryptography
27 | An interactive PCA plot of vectorized document chunks. Each point represents a piece of text, clustered by semantic similarity. Powered by SafeStore.
28 |
Efficient retrieval is crucial for RAG pipelines.
", 60 | encoding='utf-8' 61 | ) 62 | print(f"- Documents created in: {DOC_DIR.resolve()}") 63 | 64 | # --- Main Script --- 65 | if __name__ == "__main__": 66 | # --- Discover and Print Available Vectorizers --- 67 | print_header("Discovering Available Vectorizers") 68 | available_vectorizers = safe_store.SafeStore.list_available_vectorizers() 69 | for vec in available_vectorizers: 70 | print(f"\n- Vectorizer: {vec['name']} ({vec.get('title', 'No Title')})") 71 | print(f" Description: {vec.get('description', 'N/A').strip()}") 72 | if vec.get('input_parameters'): 73 | print(" Parameters:") 74 | for param in vec['input_parameters']: 75 | default_val = f" (default: {param['default']})" if 'default' in param else "" 76 | mandatory_flag = "[MANDATORY]" if param.get('mandatory') else "[OPTIONAL]" 77 | print(f" - {param['name']}: {param.get('description', 'N/A')} {mandatory_flag}{default_val}") 78 | 79 | DOC_DIR = Path("temp_docs_basic") 80 | prepare_documents(DOC_DIR) 81 | 82 | # --- Example 1: Sentence Transformer (ST) --- 83 | if USE_ST: 84 | db_file_st = "st_store.db" 85 | print_header(f"Sentence Transformer Example (DB: {db_file_st})") 86 | cleanup_db_files(db_file_st) 87 | try: 88 | store_st = safe_store.SafeStore( 89 | db_path=db_file_st, 90 | vectorizer_name="st", 91 | vectorizer_config=st_config, 92 | log_level=safe_store.LogLevel.INFO 93 | ) 94 | with store_st: 95 | store_st.add_document(DOC_DIR / "intro.txt", metadata={"topic": "introduction"}) 96 | if USE_PARSING: 97 | store_st.add_document(DOC_DIR / "web_snippet.html", metadata={"source": "web"}) 98 | 99 | results_st = store_st.query("local database library", top_k=1) 100 | if results_st: 101 | res = results_st[0] 102 | print(f" Query Result: Score={res['similarity_percent']:.2f}%, Text='{res['chunk_text'][:60]}...'") 103 | 104 | # NEW: Demonstrate vectorizing with metadata 105 | print("\n Demonstrating vectorization with metadata...") 106 | store_st.add_text( 107 | unique_id="metadata_vectorization_test", 108 | text="This text is about oranges and lemons.", 109 | metadata={"topic": "citrus fruits", "author": "test"}, 110 | vectorize_with_metadata=True, # This is the new option 111 | force_reindex=True 112 | ) 113 | # This query should be more similar to the metadata ("citrus") than the other documents. 114 | results_meta = store_st.query("information about citrus", top_k=1) 115 | if results_meta: 116 | res = results_meta[0] 117 | print(f" Querying with metadata context ('citrus'): Score={res['similarity_percent']:.2f}%, Path='{res['file_path']}'") 118 | if res['file_path'] == 'metadata_vectorization_test': 119 | print(" SUCCESS: The most relevant result came from the document with vectorized metadata.") 120 | else: 121 | print(" NOTE: The top result was not the one with vectorized metadata, which might happen with some models.") 122 | 123 | print("\n Demonstrating file update...") 124 | (DOC_DIR / "update_later.txt").write_text("This content is new and improved for re-indexing.") 125 | store_st.add_document(DOC_DIR / "update_later.txt", force_reindex=True) 126 | print(" 'update_later.txt' has been re-indexed.") 127 | 128 | except safe_store.ConfigurationError as e: 129 | print(f" [SKIP] Could not run ST example: {e}") 130 | except Exception as e: 131 | print(f" [ERROR] An unexpected error occurred: {e}") 132 | 133 | # --- Example 2: TF-IDF --- 134 | if USE_TFIDF: 135 | db_file_tfidf = "tfidf_store.db" 136 | print_header(f"TF-IDF Example (DB: {db_file_tfidf})") 137 | cleanup_db_files(db_file_tfidf) 138 | try: 139 | store_tfidf = safe_store.SafeStore( 140 | db_path=db_file_tfidf, 141 | vectorizer_name="tfidf", 142 | vectorizer_config=tfidf_config, 143 | chunking_strategy='character' 144 | ) 145 | with store_tfidf: 146 | print(" Adding documents (this will fit the TF-IDF model)...") 147 | store_tfidf.add_document(DOC_DIR / "intro.txt") 148 | if USE_PARSING: 149 | store_tfidf.add_document(DOC_DIR / "web_snippet.html") 150 | 151 | results_tfidf = store_tfidf.query("SQLite backend storage", top_k=1) 152 | if results_tfidf: 153 | res = results_tfidf[0] 154 | print(f" Query Result: Score={res['similarity_percent']:.2f}%, Text='{res['chunk_text'][:60]}...'") 155 | 156 | except safe_store.ConfigurationError as e: 157 | print(f" [SKIP] Could not run TF-IDF example: {e}") 158 | except Exception as e: 159 | print(f" [ERROR] An unexpected error occurred: {e}") 160 | 161 | # --- Example 3: Ollama --- 162 | if USE_OLLAMA: 163 | db_file_ollama = "ollama_store.db" 164 | print_header(f"Ollama Example with Custom Tokenizer (DB: {db_file_ollama})") 165 | cleanup_db_files(db_file_ollama) 166 | try: 167 | available_models = safe_store.SafeStore.list_models("ollama") 168 | print(f" Found Ollama models: {available_models}") 169 | if ollama_config["model"] not in available_models: 170 | print(f" [SKIP] Model '{ollama_config['model']}' not found in Ollama.") 171 | else: 172 | store_ollama = safe_store.SafeStore( 173 | db_path=db_file_ollama, 174 | vectorizer_name="ollama", 175 | vectorizer_config=ollama_config, 176 | # --- NOUVEAUTÉ : Utiliser le chunking par token en fournissant un tokenizer personnalisé --- 177 | chunking_strategy='token', 178 | custom_tokenizer={"name": "tiktoken", "model": "cl100k_base"} 179 | ) 180 | with store_ollama: 181 | store_ollama.add_document(DOC_DIR / "intro.txt") 182 | results_ollama = store_ollama.query("file-based vector db", top_k=1) 183 | if results_ollama: 184 | res = results_ollama[0] 185 | print(f" Query Result: Score={res['similarity_percent']:.2f}%, Text='{res['chunk_text'][:60]}...'") 186 | 187 | except safe_store.VectorizationError as e: 188 | print(f" [SKIP] Could not connect to Ollama server: {e}") 189 | except Exception as e: 190 | print(f" [ERROR] An unexpected error occurred: {e}") 191 | 192 | 193 | # --- API-based examples --- 194 | if USE_OPENAI: 195 | db_file_openai = "openai_store.db" 196 | print_header(f"OpenAI Example (DB: {db_file_openai})") 197 | cleanup_db_files(db_file_openai) 198 | try: 199 | store_openai = safe_store.SafeStore( 200 | db_path=db_file_openai, 201 | vectorizer_name="openai", 202 | vectorizer_config=openai_config, 203 | chunking_strategy='character' # Also required for OpenAI 204 | ) 205 | with store_openai: 206 | store_openai.add_document(DOC_DIR / "intro.txt") 207 | results_openai = store_openai.query("python tool for embeddings", top_k=1) 208 | if results_openai: 209 | print(f" Query Result: Score={results_openai[0]['similarity_percent']:.2f}%") 210 | except Exception as e: 211 | print(f" [ERROR] OpenAI example failed: {e}") 212 | 213 | if USE_COHERE: 214 | db_file_cohere = "cohere_store.db" 215 | print_header(f"Cohere Example (DB: {db_file_cohere})") 216 | cleanup_db_files(db_file_cohere) 217 | try: 218 | store_cohere = safe_store.SafeStore( 219 | db_path=db_file_cohere, 220 | vectorizer_name="cohere", 221 | vectorizer_config=cohere_config, 222 | chunking_strategy='character' # Also required for Cohere 223 | ) 224 | with store_cohere: 225 | store_cohere.add_document(DOC_DIR / "intro.txt") 226 | results_cohere = store_cohere.query("library for vector search", top_k=1) 227 | if results_cohere: 228 | print(f" Query Result: Score={results_cohere[0]['similarity_percent']:.2f}%") 229 | except Exception as e: 230 | print(f" [ERROR] Cohere example failed: {e}") 231 | 232 | print("\n--- Final Cleanup ---") 233 | if DOC_DIR.exists(): 234 | shutil.rmtree(DOC_DIR) 235 | print(f"- Removed directory: {DOC_DIR}") 236 | 237 | print("\n--- End of Script ---") -------------------------------------------------------------------------------- /scripts/migration_v1_v2.py: -------------------------------------------------------------------------------- 1 | # migrate_v1_to_v2_argparse.py 2 | import sqlite3 3 | from pathlib import Path 4 | from typing import Union, Optional, Any 5 | import argparse 6 | from ascii_colors import ASCIIColors 7 | 8 | # --- DatabaseError and connect_db remain the same --- 9 | class DatabaseError(Exception): 10 | pass 11 | 12 | def connect_db(db_path: Union[str, Path]) -> sqlite3.Connection: 13 | db_path_obj = Path(db_path).resolve() 14 | try: 15 | db_path_obj.parent.mkdir(parents=True, exist_ok=True) 16 | conn = sqlite3.connect( 17 | str(db_path_obj), 18 | detect_types=sqlite3.PARSE_DECLTYPES, 19 | check_same_thread=False 20 | ) 21 | conn.execute("PRAGMA journal_mode=WAL;") 22 | conn.execute("PRAGMA foreign_keys = ON;") 23 | ASCIIColors.debug(f"Connected to database: {db_path_obj} (WAL enabled)") 24 | return conn 25 | except sqlite3.Error as e: 26 | msg = f"Database connection error to {db_path_obj}: {e}" 27 | ASCIIColors.error(msg, exc_info=True) 28 | raise DatabaseError(msg) from e 29 | 30 | # --- set_store_metadata and get_store_metadata remain the same --- 31 | def set_store_metadata(conn: sqlite3.Connection, key: str, value: str) -> None: 32 | sql = "INSERT OR REPLACE INTO store_metadata (key, value) VALUES (?, ?)" 33 | cursor = conn.cursor() 34 | try: 35 | cursor.execute(sql, (key, value)) 36 | ASCIIColors.debug(f"Set store_metadata: {key} = {value}") 37 | except sqlite3.Error as e: 38 | msg = f"Error setting store metadata '{key}': {e}" 39 | ASCIIColors.error(msg, exc_info=True) 40 | raise DatabaseError(msg) from e 41 | 42 | def get_store_metadata(conn: sqlite3.Connection, key: str) -> Optional[str]: 43 | cursor = conn.cursor() 44 | try: 45 | cursor.execute("SELECT 1 FROM sqlite_master WHERE type='table' AND name='store_metadata';") 46 | if not cursor.fetchone(): 47 | return None 48 | 49 | sql = "SELECT value FROM store_metadata WHERE key = ?" 50 | cursor.execute(sql, (key,)) 51 | result = cursor.fetchone() 52 | return result[0] if result else None 53 | except sqlite3.Error as e: 54 | ASCIIColors.warning(f"Could not get store metadata for key '{key}' (may not exist yet): {e}") 55 | return None 56 | 57 | def table_exists(cursor: sqlite3.Cursor, table_name: str) -> bool: 58 | """Checks if a table exists in the database.""" 59 | cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?;", (table_name,)) 60 | return cursor.fetchone() is not None 61 | 62 | def migrate_v1_to_v2(db_path: Path, auto_yes: bool = False): 63 | """ 64 | Migrates the SafeStore database from v1.0 schema to v2.0 schema. 65 | Adds graph-related tables and columns. 66 | 67 | Args: 68 | db_path: Path object to the database file. 69 | auto_yes: If True, skips interactive prompts. 70 | """ 71 | ASCIIColors.info(f"Attempting migration for database: {db_path}") 72 | 73 | if not db_path.exists(): 74 | ASCIIColors.error(f"Database file {db_path} does not exist. Cannot migrate.") 75 | ASCIIColors.info("If this is a new setup, the main application will initialize it to v2.0.") 76 | return False 77 | 78 | if not auto_yes: 79 | ASCIIColors.warning("IMPORTANT: Please backup your database file before proceeding!") 80 | try: 81 | if not Path("/dev/tty").is_char_device(): 82 | ASCIIColors.info("Non-interactive environment detected, proceeding without prompt.") 83 | elif input("Press Enter to continue or Ctrl+C to abort..."): 84 | ASCIIColors.info("Migration aborted by user input.") 85 | return False 86 | except (EOFError, KeyboardInterrupt): 87 | ASCIIColors.info("Migration aborted.") 88 | return False 89 | except Exception: 90 | ASCIIColors.info("Could not get interactive input, proceeding with caution. Use --yes to bypass.") 91 | 92 | conn = None 93 | try: 94 | conn = connect_db(db_path) 95 | cursor = conn.cursor() 96 | 97 | # --- Pre-migration V1 Schema Check --- 98 | ASCIIColors.info("Performing pre-migration schema check...") 99 | required_v1_tables = ["documents", "vectorization_methods", "chunks", "vectors"] 100 | missing_v1_tables = [] 101 | for table_name in required_v1_tables: 102 | if not table_exists(cursor, table_name): 103 | missing_v1_tables.append(table_name) 104 | 105 | if missing_v1_tables: 106 | ASCIIColors.error(f"The database at '{db_path}' is missing essential v1.0 tables: {', '.join(missing_v1_tables)}.") 107 | ASCIIColors.error("This script expects a database with a valid v1.0 schema.") 108 | ASCIIColors.info("If this is an empty database, your application should initialize it directly to v2.0.") 109 | return False 110 | ASCIIColors.green("Basic v1.0 schema tables found.") 111 | 112 | 113 | # --- Version Check (after confirming basic tables exist) --- 114 | current_version = get_store_metadata(conn, 'schema_version') 115 | if current_version == '2.0': 116 | ASCIIColors.success(f"Database '{db_path}' is already at schema version 2.0. No migration needed.") 117 | return True 118 | elif current_version: 119 | ASCIIColors.warning(f"Database '{db_path}' has an existing schema version: '{current_version}'.") 120 | ASCIIColors.warning("This script is designed for v1.0 (no version marker) to v2.0 migration.") 121 | if not auto_yes: 122 | if input(f"Continue migration from '{current_version}' to '2.0'? (yes/NO): ").lower() != 'yes': 123 | ASCIIColors.info("Migration aborted by user.") 124 | return False 125 | else: 126 | ASCIIColors.info(f"Auto-proceeding with migration from '{current_version}' to '2.0'.") 127 | else: 128 | ASCIIColors.info("No schema_version metadata found. Assuming v1.0 database.") 129 | 130 | 131 | ASCIIColors.info("Proceeding with v1.0 to v2.0 migration tasks...") 132 | 133 | cursor.execute("PRAGMA foreign_keys=OFF;") 134 | 135 | # 1. Add 'graph_processed_at' column and index to 'chunks' table 136 | ASCIIColors.info("Updating 'chunks' table (guaranteed to exist by pre-check)...") 137 | cursor.execute("PRAGMA table_info(chunks);") 138 | columns_in_chunks = [info[1] for info in cursor.fetchall()] 139 | if 'graph_processed_at' not in columns_in_chunks: 140 | cursor.execute("ALTER TABLE chunks ADD COLUMN graph_processed_at DATETIME;") 141 | ASCIIColors.info("Added 'graph_processed_at' column to 'chunks'.") 142 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_chunk_graph_processed_at ON chunks (graph_processed_at);") 143 | ASCIIColors.green("'chunks' table updated and indexed.") 144 | 145 | # 2. Create 'store_metadata' table 146 | ASCIIColors.info("Ensuring 'store_metadata' table exists...") 147 | cursor.execute(""" 148 | CREATE TABLE IF NOT EXISTS store_metadata (key TEXT PRIMARY KEY, value TEXT); 149 | """) 150 | ASCIIColors.green("'store_metadata' table ensured.") 151 | 152 | # 3. Create 'graph_nodes' table and indexes 153 | ASCIIColors.info("Ensuring 'graph_nodes' table and indexes...") 154 | cursor.execute(""" 155 | CREATE TABLE IF NOT EXISTS graph_nodes ( 156 | node_id INTEGER PRIMARY KEY AUTOINCREMENT, node_label TEXT NOT NULL, 157 | node_properties TEXT, unique_signature TEXT UNIQUE); 158 | """) 159 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_graph_node_label ON graph_nodes (node_label);") 160 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_graph_node_signature ON graph_nodes (unique_signature);") 161 | ASCIIColors.green("'graph_nodes' table and indexes ensured.") 162 | 163 | # 4. Create 'graph_relationships' table and indexes 164 | ASCIIColors.info("Ensuring 'graph_relationships' table and indexes...") 165 | cursor.execute(""" 166 | CREATE TABLE IF NOT EXISTS graph_relationships ( 167 | relationship_id INTEGER PRIMARY KEY AUTOINCREMENT, source_node_id INTEGER NOT NULL, 168 | target_node_id INTEGER NOT NULL, relationship_type TEXT NOT NULL, 169 | relationship_properties TEXT, 170 | FOREIGN KEY (source_node_id) REFERENCES graph_nodes (node_id) ON DELETE CASCADE, 171 | FOREIGN KEY (target_node_id) REFERENCES graph_nodes (node_id) ON DELETE CASCADE); 172 | """) 173 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_graph_rel_source_type ON graph_relationships (source_node_id, relationship_type);") 174 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_graph_rel_target_type ON graph_relationships (target_node_id, relationship_type);") 175 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_graph_rel_type ON graph_relationships (relationship_type);") 176 | ASCIIColors.green("'graph_relationships' table and indexes ensured.") 177 | 178 | # 5. Create 'node_chunk_links' table and indexes 179 | ASCIIColors.info("Ensuring 'node_chunk_links' table and indexes...") 180 | cursor.execute(""" 181 | CREATE TABLE IF NOT EXISTS node_chunk_links ( 182 | node_id INTEGER NOT NULL, chunk_id INTEGER NOT NULL, 183 | FOREIGN KEY (node_id) REFERENCES graph_nodes (node_id) ON DELETE CASCADE, 184 | FOREIGN KEY (chunk_id) REFERENCES chunks (chunk_id) ON DELETE CASCADE, 185 | PRIMARY KEY (node_id, chunk_id)); 186 | """) 187 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_ncl_node_id ON node_chunk_links (node_id);") 188 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_ncl_chunk_id ON node_chunk_links (chunk_id);") 189 | ASCIIColors.green("'node_chunk_links' table and indexes ensured.") 190 | 191 | # 6. Update schema version in store_metadata 192 | ASCIIColors.info("Updating schema version to 2.0 in 'store_metadata'.") 193 | cursor.execute("INSERT OR REPLACE INTO store_metadata (key, value) VALUES (?, ?)", ('schema_version', '2.0')) 194 | 195 | cursor.execute("PRAGMA foreign_keys=ON;") 196 | 197 | conn.commit() 198 | ASCIIColors.success(f"Database migration to v2.0 completed successfully for: {db_path}") 199 | return True 200 | 201 | except sqlite3.Error as e: 202 | ASCIIColors.error(f"SQLite error during migration: {e}") 203 | if conn: 204 | ASCIIColors.warning("Rolling back changes due to error.") 205 | conn.rollback() 206 | return False 207 | except DatabaseError as e: 208 | ASCIIColors.error(f"Database operation error during migration: {e}") 209 | if conn: 210 | ASCIIColors.warning("Rolling back changes due to error.") 211 | conn.rollback() 212 | return False 213 | except Exception as e: 214 | ASCIIColors.error(f"An unexpected error occurred during migration: {e}", exc_info=True) 215 | if conn: 216 | ASCIIColors.warning("Rolling back changes due to error.") 217 | conn.rollback() 218 | return False 219 | finally: 220 | if conn: 221 | conn.close() 222 | ASCIIColors.debug("Database connection closed.") 223 | 224 | # --- main() function with argparse remains the same --- 225 | def main(): 226 | parser = argparse.ArgumentParser( 227 | description="Migrate SafeStore SQLite database from v1.0 schema to v2.0 schema.", 228 | formatter_class=argparse.RawTextHelpFormatter, 229 | epilog=""" 230 | Example usage: 231 | python %(prog)s /path/to/your/safestore.db 232 | python %(prog)s my_database.sqlite --yes 233 | 234 | This script adds new tables and columns for graph database functionality. 235 | It is designed to be run on a database created with a pre-graph version of SafeStore. 236 | Ensure you have a backup of your database before running this script. 237 | """ 238 | ) 239 | parser.add_argument( 240 | "db_path", 241 | type=Path, 242 | help="Path to the SQLite database file to migrate." 243 | ) 244 | parser.add_argument( 245 | "--yes", 246 | "-y", 247 | action="store_true", 248 | help="Automatically answer 'yes' to confirmation prompts (use with caution)." 249 | ) 250 | 251 | args = parser.parse_args() 252 | 253 | if migrate_v1_to_v2(args.db_path, auto_yes=args.yes): 254 | ASCIIColors.highlight("Migration process finished.") 255 | else: 256 | ASCIIColors.critical("Migration process failed or was aborted. Please check the logs.") 257 | exit(1) 258 | 259 | if __name__ == "__main__": 260 | main() 261 | -------------------------------------------------------------------------------- /examples/graph_usage.py: -------------------------------------------------------------------------------- 1 | # [FINAL & ROBUST] examples/graph_usage.py 2 | import safe_store 3 | from safe_store import GraphStore, LogLevel, SafeStore 4 | import pipmaster as pm 5 | 6 | pm.ensure_packages(["lollms_client"]) 7 | from lollms_client import LollmsClient 8 | from ascii_colors import ASCIIColors, trace_exception 9 | import sqlite3 10 | from pathlib import Path 11 | import json 12 | import shutil 13 | from typing import Dict, List, Any, Optional 14 | 15 | # --- Configuration --- 16 | DB_FILE = "graph_example_store.db" 17 | DOC_DIR = Path("temp_docs_graph_example") 18 | 19 | # --- LOLLMS Client Configuration --- 20 | BINDING_NAME = "ollama" 21 | HOST_ADDRESS = "http://localhost:11434" 22 | MODEL_NAME = "mistral:latest" 23 | 24 | # --- Ontology Definitions --- 25 | DETAILED_ONTOLOGY = { 26 | "nodes": { 27 | "Person": {"description": "A human individual.", "properties": {"name": "string", "title": "string"}}, 28 | "Company": {"description": "A commercial business.", "properties": {"name": "string", "location": "string"}}, 29 | "Product": {"description": "A product created by a company.", "properties": {"name": "string"}}, 30 | "ResearchPaper": {"description": "An academic publication.", "properties": {"title": "string"}}, 31 | "University": {"description": "An institution of higher education.", "properties": {"name": "string"}} 32 | }, 33 | "relationships": { 34 | "WORKS_AT": {"description": "Person is employed by Company.", "source": "Person", "target": "Company"}, 35 | "CEO_OF": {"description": "Person is the CEO of Company.", "source": "Person", "target": "Company"}, 36 | "FOUNDED": {"description": "Person founded a Company.", "source": "Person", "target": "Company"}, 37 | "COMPETITOR_OF": {"description": "Company is a competitor of another Company.", "source": "Company", "target": "Company"}, 38 | "PRODUCES": {"description": "Company creates a Product.", "source": "Company", "target": "Product"}, 39 | "AUTHOR_OF": {"description": "Person wrote a ResearchPaper.", "source": "Person", "target": "ResearchPaper"}, 40 | "AFFILIATED_WITH": {"description": "Person is associated with a University.", "source": "Person", "target": "University"} 41 | } 42 | } 43 | SIMPLE_ONTOLOGY = { 44 | "nodes": {"Entity": {"description": "A person, company, or organization.", "properties": {"name": "string"}}}, 45 | "relationships": {"IS_RELATED_TO": {"description": "Indicates a general connection between two entities.", "source": "Entity", "target": "Entity"}} 46 | } 47 | 48 | # NEW: Ontology as a simple string of instructions 49 | STRING_ONTOLOGY = """ 50 | - Extract People, Companies, and Products as nodes. 51 | - For 'People' nodes, extract their full name and any job title mentioned as properties. 52 | - For 'Companies' nodes, extract their full name and location as properties. 53 | - For 'Products' nodes, extract their name. 54 | - Create relationships like WORKS_AT, CEO_OF, and PRODUCES between these nodes. 55 | """ 56 | 57 | 58 | LC_CLIENT: Optional[LollmsClient] = None 59 | 60 | def initialize_lollms_client() -> bool: 61 | global LC_CLIENT 62 | if LC_CLIENT is None: 63 | ASCIIColors.info(f"Initializing LollmsClient: Binding='{BINDING_NAME}', Host='{HOST_ADDRESS}', Model='{MODEL_NAME}'") 64 | try: 65 | LC_CLIENT = LollmsClient(llm_binding_name=BINDING_NAME, llm_binding_config={"host_address": HOST_ADDRESS, "model_name": MODEL_NAME}) 66 | if not LC_CLIENT.llm: 67 | ASCIIColors.error(f"LollmsClient binding '{BINDING_NAME}' is not ready."); LC_CLIENT = None; return False 68 | ASCIIColors.success("LollmsClient initialized and ready.") 69 | return True 70 | except Exception as e: 71 | ASCIIColors.error(f"Failed to initialize LollmsClient: {e}"); trace_exception(e); LC_CLIENT = None; return False 72 | return True 73 | 74 | def llm_executor_callback(full_prompt: str) -> str: 75 | global LC_CLIENT 76 | if LC_CLIENT is None: raise ConnectionError("LollmsClient not initialized.") 77 | try: 78 | return LC_CLIENT.generate_code(full_prompt, language="json", temperature=0.05, top_k=10) 79 | except Exception as e: 80 | raise RuntimeError(f"LLM execution for JSON failed: {e}") from e 81 | 82 | def generate_answer_from_context(question: str, graph_data: Dict, chunks_data: Optional[List[Dict]] = None) -> str: 83 | global LC_CLIENT 84 | if LC_CLIENT is None: return "LLM not available." 85 | context_lines = ["--- CONTEXT ---"] 86 | if graph_data and graph_data.get("nodes"): 87 | context_lines.append("\n[Graph Information]:") 88 | node_map = {n['node_id']: n for n in graph_data['nodes']} 89 | 90 | def get_node_instance_name(node_id: int) -> str: 91 | """Helper to get the best possible name for a node instance.""" 92 | node = node_map.get(node_id) 93 | if not node: 94 | return f"ID:{node_id}" 95 | props = node.get('properties', {}) 96 | # Prioritize 'identifying_value', then 'name', then 'title' before falling back to ID. 97 | return props.get('identifying_value') or props.get('name') or props.get('title') or f"ID:{node_id}" 98 | 99 | for node in graph_data['nodes']: 100 | instance_name = get_node_instance_name(node['node_id']) 101 | context_lines.append(f"- Instance '{instance_name}' (type: {node['label']}): {json.dumps(node.get('properties', {}))}") 102 | 103 | for rel in graph_data.get('relationships', []): 104 | src_name = get_node_instance_name(rel['source_node_id']) 105 | tgt_name = get_node_instance_name(rel['target_node_id']) 106 | context_lines.append(f"- Relationship: '{src_name}' --[{rel['type']}]--> '{tgt_name}'") 107 | 108 | if chunks_data: 109 | context_lines.append("\n[Relevant Text Snippets]:") 110 | for i, chunk in enumerate(chunks_data): 111 | context_lines.append(f"- Snippet {i+1}: \"{chunk['chunk_text']}\"") 112 | context_lines.append("\n--- END OF CONTEXT ---") 113 | context_str = "\n".join(context_lines) 114 | 115 | prompt = (f"Answer the user's question based ONLY on the provided context. Do not use prior knowledge.\n\n" 116 | f"{context_str}\n\nQuestion: {question}") 117 | 118 | ASCIIColors.magenta("--- Sending Synthesis Prompt to LLM ---") 119 | try: 120 | return LC_CLIENT.generate_text(prompt, n_predict=512) 121 | except Exception as e: 122 | ASCIIColors.error(f"Error during answer synthesis: {e}") 123 | return "Error generating the answer." 124 | 125 | def print_header(title: str): 126 | print("\n" + "="*25 + f" {title} " + "="*25) 127 | 128 | def cleanup(): 129 | print_header("Cleaning Up Previous Run") 130 | paths = [Path(DB_FILE), Path(f"{DB_FILE}.lock"), Path(f"{DB_FILE}-wal"), Path(f"{DB_FILE}-shm"), DOC_DIR] 131 | for p in paths: 132 | try: 133 | if p.is_file(): p.unlink(missing_ok=True); print(f"- Removed file: {p}") 134 | elif p.is_dir(): shutil.rmtree(p, ignore_errors=True); print(f"- Removed directory: {p}") 135 | except OSError as e: print(f"- Warning: Could not remove {p}: {e}") 136 | 137 | def clear_graph_data(conn: sqlite3.Connection): 138 | ASCIIColors.warning("\nClearing all existing graph data from the database...") 139 | try: 140 | conn.execute("BEGIN") 141 | conn.execute("DELETE FROM node_chunk_links;") 142 | conn.execute("DELETE FROM graph_relationships;") 143 | conn.execute("DELETE FROM graph_nodes;") 144 | conn.execute("UPDATE chunks SET graph_processed_at = NULL;") 145 | conn.commit() 146 | ASCIIColors.success("Graph data cleared.") 147 | except sqlite3.Error as e: 148 | conn.rollback() 149 | ASCIIColors.error(f"Failed to clear graph data: {e}") 150 | 151 | if __name__ == "__main__": 152 | cleanup() 153 | if not initialize_lollms_client(): 154 | ASCIIColors.error("Exiting: LollmsClient initialization failure."); exit(1) 155 | 156 | ASCIIColors.set_log_level(LogLevel.INFO) 157 | 158 | try: 159 | print_header("Preparing Documents (One-time setup)") 160 | DOC_DIR.mkdir(exist_ok=True, parents=True) 161 | doc1_content = "Acme Innovations, led by CEO Dr. Evelyn Reed, is a tech company based in Silicon Valley. Their flagship product, 'NovaCore', was launched in 2023. John Doe works as a Senior Engineer at Acme Innovations and reports to Dr. Reed. Acme Innovations is a competitor of Beta Solutions." 162 | (DOC_DIR / "company_info.txt").write_text(doc1_content.strip(), encoding='utf-8') 163 | doc2_content = "The research paper 'Quantum Entanglement in Nanostructures' by Dr. Alice Smith cites work by Dr. Evelyn Reed on early quantum theories. Dr. Reed is also known for her work at Acme Innovations." 164 | (DOC_DIR / "research_paper_snippet.txt").write_text(doc2_content.strip(), encoding='utf-8') 165 | 166 | with SafeStore(db_path=DB_FILE) as store: 167 | store.add_document(DOC_DIR / "company_info.txt") 168 | store.add_document(DOC_DIR / "research_paper_snippet.txt") 169 | 170 | print_header("PASS 1: Building Graph with DETAILED Ontology") 171 | graph_store_detailed = GraphStore(store=store, llm_executor_callback=llm_executor_callback, ontology=DETAILED_ONTOLOGY) 172 | graph_store_detailed.build_graph_for_all_documents() 173 | ASCIIColors.success("Graph building with detailed ontology complete.") 174 | 175 | print_header("DEMO 1.1: RAG Query (Who is Dr. Evelyn Reed?)") 176 | query = "Who is Dr. Evelyn Reed and what companies is she associated with?" 177 | result = graph_store_detailed.query_graph(query, output_mode="full") 178 | full_answer = generate_answer_from_context(query, result.get('graph'), result.get('chunks')) 179 | ASCIIColors.green("Final Answer (from Graph + Chunks):") 180 | print(full_answer) 181 | 182 | print_header("DEMO 1.2: Manually Editing the Graph") 183 | ASCIIColors.info("We will manually add a new product 'ChronoLeap' and link it to an 'Acme' company.") 184 | 185 | company_nodes = graph_store_detailed.get_nodes_by_label("Company") 186 | acme_node = next((n for n in company_nodes if 'acme' in n.get('properties', {}).get('name', '').lower()), None) 187 | 188 | if acme_node: 189 | acme_id = acme_node['node_id'] 190 | acme_name = acme_node['properties']['name'] 191 | ASCIIColors.info(f"Found '{acme_name}' with Node ID: {acme_id}") 192 | 193 | product_id = graph_store_detailed.add_node(label="Product", properties={"name": "ChronoLeap"}) 194 | ASCIIColors.info(f"Created new 'ChronoLeap' product with Node ID: {product_id}") 195 | 196 | rel_id = graph_store_detailed.add_relationship(acme_id, product_id, "PRODUCES") 197 | ASCIIColors.info(f"Linked them with 'PRODUCES' relationship (ID: {rel_id})") 198 | 199 | print_header("DEMO 1.3: Querying the Manually Added Data") 200 | manual_query = "What new products does Acme produce?" 201 | manual_result = graph_store_detailed.query_graph(manual_query, output_mode="full") 202 | manual_answer = generate_answer_from_context(manual_query, manual_result.get('graph')) 203 | ASCIIColors.green("Final Answer (from Graph-Only):") 204 | print(manual_answer) 205 | else: 206 | ASCIIColors.warning("Could not find any 'Acme' company node to perform manual edit demo.") 207 | 208 | print_header("PASS 2: Rebuilding Graph with SIMPLE Ontology") 209 | clear_graph_data(store.conn) 210 | 211 | graph_store_simple = GraphStore(store=store, llm_executor_callback=llm_executor_callback, ontology=SIMPLE_ONTOLOGY) 212 | graph_store_simple.build_graph_for_all_documents() 213 | ASCIIColors.success("Graph building with simple ontology complete.") 214 | 215 | print_header("DEMO 2.1: Observing the new simple graph structure") 216 | simple_nodes = graph_store_simple.get_nodes_by_label("Entity", limit=10) 217 | ASCIIColors.blue("\nNodes extracted with the simple 'Entity' label:") 218 | if simple_nodes: 219 | for n in simple_nodes: print(f" - ID: {n['node_id']}, Props: {n.get('properties')}") 220 | else: 221 | print(" No 'Entity' nodes found.") 222 | 223 | print_header("PASS 3: Rebuilding Graph with STRING-BASED Ontology") 224 | clear_graph_data(store.conn) 225 | 226 | graph_store_string = GraphStore(store=store, llm_executor_callback=llm_executor_callback, ontology=STRING_ONTOLOGY) 227 | graph_store_string.build_graph_for_all_documents() 228 | ASCIIColors.success("Graph building with string-based ontology complete.") 229 | 230 | print_header("DEMO 3.1: Observing the graph from string ontology") 231 | string_nodes_viz = graph_store_string.get_all_nodes_for_visualization(limit=15) 232 | ASCIIColors.blue("\nNodes extracted with the string ontology:") 233 | if string_nodes_viz: 234 | for n in string_nodes_viz: print(f" - Label: {n['label']}, Props: {n.get('properties')}") 235 | else: 236 | print(" No nodes found.") 237 | 238 | 239 | except Exception as e: 240 | ASCIIColors.error(f"An unexpected error occurred in the main process: {e}") 241 | trace_exception(e) 242 | finally: 243 | print_header("Example Finished") 244 | ASCIIColors.info(f"Database file is at: {Path(DB_FILE).resolve()}") --------------------------------------------------------------------------------