├── sample_docs
    ├── new_added.txt
    ├── Can LLMs Generate Novel Research Ideas.pdf
    └── healthcare_records.json
├── assets
    ├── chat_snapshot.png
    └── nexusync_logo.png
├── MANIFEST.in
├── src
    └── nexusync
    │   ├── core
    │       ├── __init__.py
    │       ├── querier.py
    │       ├── indexing_functions.py
    │       ├── chat_engine.py
    │       └── indexer.py
    │   ├── models
    │       ├── __init__.py
    │       ├── embedding_models.py
    │       └── language_models.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── logging_config.py
    │       └── file_operations.py
    │   ├── __init__.py
    │   └── nexusync.py
├── requirements.txt
├── dummy_dataset.json
├── LICENSE.txt
├── setup.py
├── .gitignore
├── back_end_api.py
├── README.md
├── index.html
└── notebooks
    ├── data_structure_generator.ipynb
    └── NHS_Application_Test.ipynb


/sample_docs/new_added.txt:
--------------------------------------------------------------------------------
1 | Breaking News: Trump and Harris had a fight!!!!


--------------------------------------------------------------------------------
/assets/chat_snapshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zakk-Yang/nexusync/HEAD/assets/chat_snapshot.png


--------------------------------------------------------------------------------
/assets/nexusync_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zakk-Yang/nexusync/HEAD/assets/nexusync_logo.png


--------------------------------------------------------------------------------
/sample_docs/Can LLMs Generate Novel Research Ideas.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Zakk-Yang/nexusync/HEAD/sample_docs/Can LLMs Generate Novel Research Ideas.pdf


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | include requirements.txt
4 | recursive-include src *.py
5 | recursive-include docs *.md
6 | recursive-include tests *.py


--------------------------------------------------------------------------------
/src/nexusync/core/__init__.py:
--------------------------------------------------------------------------------
1 | # src/core/__init__.py
2 | from .indexer import Indexer
3 | from .querier import Querier
4 | from .chat_engine import ChatEngine
5 | 
6 | __all__ = ["Indexer", "Querier", "ChatEngine"]
7 | 


--------------------------------------------------------------------------------
/src/nexusync/models/__init__.py:
--------------------------------------------------------------------------------
1 | # src/models/__init__.py
2 | 
3 | from .embedding_models import set_embedding_model
4 | from .language_models import set_language_model
5 | 
6 | __all__ = ["set_embedding_model", "set_language_model"]
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | llama_index
 2 | llama-index-llms-ollama
 3 | llama-index-embeddings-huggingface
 4 | chromadb
 5 | llama-index-vector-stores-chroma
 6 | transformers>=4.45.2
 7 | python-pptx
 8 | Pillow
 9 | docx2txt
10 | openpyxl
11 | python-dotenv
12 | spacy
13 | flask


--------------------------------------------------------------------------------
/src/nexusync/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # src/utils/__init__.py
 2 | 
 3 | from .logging_config import get_logger
 4 | from .file_operations import get_all_files, get_file_hash, get_changed_files
 5 | 
 6 | __all__ = [
 7 |     "get_logger",
 8 |     "get_all_files",
 9 |     "get_file_hash",
10 |     "get_changed_files",
11 | ]
12 | 


--------------------------------------------------------------------------------
/src/nexusync/__init__.py:
--------------------------------------------------------------------------------
 1 | # src/__init__.py
 2 | 
 3 | from .core.indexer import Indexer
 4 | from .core.querier import Querier
 5 | from .core.chat_engine import ChatEngine
 6 | from .core.indexing_functions import rebuild_index
 7 | from .nexusync import NexuSync
 8 | 
 9 | __all__ = [
10 |     "NexuSync",
11 |     "Indexer",
12 |     "Querier",
13 |     "ChatEngine",
14 |     "rebuild_index",
15 | ]
16 | 


--------------------------------------------------------------------------------
/dummy_dataset.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "demographics": {
 3 |         "patient_id": "P123456789",
 4 |         "first_name": "Jane",
 5 |         "last_name": "Doe",
 6 |         "date_of_birth": "1985-07-24",
 7 |         "gender": "Female",
 8 |         "contact_information": {
 9 |             "address": "123 Elm Street, Springfield, IL, 62704",
10 |             "phone": "+44 7911 123456",
11 |             "email": "jane.doe@example.com"
12 |         }
13 |     },
14 |     "emergency_contact": {
15 |         "name": "John Doe",
16 |         "relationship": "Spouse",
17 |         "phone": "+44 7911 654321"
18 |     }
19 | }


--------------------------------------------------------------------------------
/src/nexusync/utils/logging_config.py:
--------------------------------------------------------------------------------
 1 | # src/utils/logging_config.py
 2 | 
 3 | 
 4 | import logging
 5 | import warnings
 6 | 
 7 | 
 8 | def silence_all_warnings():
 9 |     # Ignore all warnings
10 |     warnings.filterwarnings("ignore")
11 | 
12 | 
13 | def get_logger(name):
14 |     # Silence all warnings
15 |     silence_all_warnings()
16 | 
17 |     logger = logging.getLogger(name)
18 | 
19 |     if not logger.handlers:
20 |         logger.setLevel(logging.INFO)
21 |         handler = logging.StreamHandler()
22 |         formatter = logging.Formatter(
23 |             "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
24 |         )
25 |         handler.setFormatter(formatter)
26 |         logger.addHandler(handler)
27 |         logger.propagate = False  # Prevent propagation to ancestor loggers
28 | 
29 |     return logger
30 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Zakk-Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # setup.py
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | with open("README.md", "r", encoding="utf-8") as fh:
 6 |     long_description = fh.read()
 7 | 
 8 | setup(
 9 |     packages=find_packages(where="src"),
10 |     package_dir={"": "src"},
11 |     name="nexusync",
12 |     version="0.3.6",
13 |     author="Zakk Yang",
14 |     author_email="zakkyang@protonmail.com",
15 |     description="A powerful document indexing and querying tool built on top of LlamaIndex",
16 |     long_description=long_description,
17 |     long_description_content_type="text/markdown",
18 |     url="https://github.com/Zakk-Yang/nexusync.git",
19 |     classifiers=[
20 |         "Programming Language :: Python :: 3",
21 |         "License :: OSI Approved :: MIT License",
22 |         "Operating System :: OS Independent",
23 |     ],
24 |     python_requires=">=3.10",
25 |     install_requires=[
26 |         "llama_index",
27 |         "llama-index-llms-ollama",
28 |         "llama-index-embeddings-huggingface",
29 |         "chromadb",
30 |         "llama-index-vector-stores-chroma",
31 |         "transformers>=4.45.2",
32 |         "python-pptx",
33 |         "Pillow",
34 |         "docx2txt",
35 |         "openpyxl",
36 |         "python-dotenv",
37 |         "spacy",
38 |         "flask",
39 |     ],
40 |     include_package_data=True,  # Ensures files specified in MANIFEST.in are included
41 | )
42 | 


--------------------------------------------------------------------------------
/src/nexusync/models/embedding_models.py:
--------------------------------------------------------------------------------
 1 | # src/utils/embedding_models.py
 2 | 
 3 | from typing import Optional
 4 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 5 | from llama_index.embeddings.openai import OpenAIEmbedding
 6 | from llama_index.core import Settings
 7 | import os
 8 | from dotenv import load_dotenv
 9 | from nexusync.utils.logging_config import get_logger
10 | 
11 | 
12 | def set_embedding_model(
13 |     openai_model: Optional[str] = None, huggingface_model: Optional[str] = None
14 | ) -> None:
15 |     """
16 |     Set up the embedding model for the index.
17 | 
18 |     Args:
19 |         openai_model (Optional[str]): Name of the OpenAI embedding model.
20 |         huggingface_model (Optional[str]): Name of the HuggingFace embedding model.
21 | 
22 |     Raises:
23 |         ValueError: If both or neither embedding model is specified.
24 |     """
25 |     logger = get_logger("nexusync.utils.embedding_models.set_embedding_model")
26 |     load_dotenv()
27 | 
28 |     if (openai_model and huggingface_model) or (
29 |         not openai_model and not huggingface_model
30 |     ):
31 |         raise ValueError(
32 |             "Specify either OpenAI or HuggingFace embedding model, not both or neither."
33 |         )
34 | 
35 |     if openai_model:
36 |         openai_api_key = os.getenv("OPENAI_API_KEY")
37 |         if not openai_api_key:
38 |             raise ValueError("OpenAI API key not found in environment variables.")
39 |         Settings.embed_model = OpenAIEmbedding(
40 |             model=openai_model, api_key=openai_api_key
41 |         )
42 |         logger.info(f"Using OpenAI embedding model: {openai_model}")
43 |     else:
44 |         Settings.embed_model = HuggingFaceEmbedding(model_name=huggingface_model)
45 |         logger.info(f"Using HuggingFace embedding model: {huggingface_model}")
46 | 


--------------------------------------------------------------------------------
/src/nexusync/models/language_models.py:
--------------------------------------------------------------------------------
 1 | # src/utils/language_models.py
 2 | 
 3 | from typing import Optional
 4 | from llama_index.llms.ollama import Ollama
 5 | from llama_index.llms.openai import OpenAI
 6 | from llama_index.core import Settings
 7 | import os
 8 | from dotenv import load_dotenv
 9 | from nexusync.utils.logging_config import get_logger
10 | 
11 | 
12 | def set_language_model(
13 |     openai_model: Optional[str] = None,
14 |     ollama_model: Optional[str] = None,
15 |     temperature: Optional[float] = 0.7,
16 |     base_url: Optional[str] = None,
17 | ) -> None:
18 |     """
19 |     Set up the language model for the index.
20 | 
21 |     Args:
22 |         openai_model (Optional[str]): Name of the OpenAI model.
23 |         ollama_model (Optional[str]): Name of the Ollama model.
24 |         temperature (Optional[float]): Temperature for the language model.
25 |         base_url (Optional[str]): Ollama base url
26 | 
27 |     Raises:
28 |         ValueError: If both or neither model is specified, or if OpenAI API key is missing.
29 |     """
30 |     logger = get_logger("nexusync.utils.embedding_models.set_language_model")
31 |     load_dotenv()
32 | 
33 |     if (openai_model and ollama_model) or (not openai_model and not ollama_model):
34 |         raise ValueError("Specify either OpenAI or Ollama model, not both or neither.")
35 | 
36 |     if openai_model:
37 |         openai_api_key = os.getenv("OPENAI_API_KEY")
38 |         if not openai_api_key:
39 |             raise ValueError("OpenAI API key not found in environment variables.")
40 |         Settings.llm = OpenAI(
41 |             model=openai_model, temperature=temperature, api_key=openai_api_key
42 |         )
43 |         logger.info(f"Using OpenAI LLM model: {openai_model}")
44 |     else:
45 |         Settings.llm = Ollama(
46 |             model=ollama_model, temperature=temperature, base_url=base_url
47 |         )
48 |         logger.info(
49 |             f"Ollama LLM initialized with model: {ollama_model} and base_url: {base_url}"
50 |         )
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # Distribution / packaging
  7 | .Python
  8 | build/
  9 | develop-eggs/
 10 | dist/
 11 | downloads/
 12 | eggs/
 13 | .eggs/
 14 | lib/
 15 | lib64/
 16 | parts/
 17 | sdist/
 18 | var/
 19 | *.egg-info/
 20 | .installed.cfg
 21 | *.egg
 22 | 
 23 | # PyInstaller
 24 | #  Usually these files are written by a python script from a template
 25 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 26 | *.manifest
 27 | *.spec
 28 | 
 29 | # Installer logs
 30 | pip-log.txt
 31 | pip-delete-this-directory.txt
 32 | 
 33 | # Unit test / coverage reports
 34 | htmlcov/
 35 | .tox/
 36 | .coverage
 37 | .coverage.*
 38 | .cache
 39 | nosetests.xml
 40 | coverage.xml
 41 | *.cover
 42 | *.py,cover
 43 | .hypothesis/
 44 | .pytest_cache/
 45 | 
 46 | # Translations
 47 | *.mo
 48 | *.pot
 49 | 
 50 | # Django stuff:
 51 | *.log
 52 | local_settings.py
 53 | db.sqlite3
 54 | db.sqlite3-journal
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # IPython Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # dotenv
 79 | .env
 80 | 
 81 | # virtualenv
 82 | venv/
 83 | ENV/
 84 | env/
 85 | 
 86 | # Spyder project settings
 87 | .spyderproject
 88 | .spyproject
 89 | 
 90 | # Rope project settings
 91 | .ropeproject
 92 | 
 93 | 
 94 | # Ignore .DS_Store files
 95 | .DS_Store
 96 | 
 97 | 
 98 | # Ignore Python cache files
 99 | __pycache__/
100 | *.pyc
101 | 
102 | # Ignore environment files
103 | .env
104 | 
105 | # Ignore user documents and data directories
106 | documents/
107 | storage/
108 | chroma_db/
109 | indexed_files.json
110 | 
111 | # Ignore logs and temporary files
112 | *.log
113 | *.tmp
114 | 
115 | # Ignore OS-specific files
116 | .DS_Store
117 | Thumbs.db
118 | 
119 | # Ignore virtual environment directories (if any)
120 | venv/
121 | env/
122 | test.ipynb
123 | storage/
124 | index_storage/
125 | 
126 | # Python Bytecode
127 | __pycache__/
128 | *.py[cod]
129 | 
130 | test.py


--------------------------------------------------------------------------------
/src/nexusync/utils/file_operations.py:
--------------------------------------------------------------------------------
 1 | # src/utils/file_operations.py
 2 | 
 3 | import os
 4 | from typing import List, Tuple
 5 | import hashlib
 6 | 
 7 | 
 8 | def get_all_files(directory: str, recursive: bool = True) -> List[str]:
 9 |     """
10 |     Get all file paths in the given directory.
11 | 
12 |     Args:
13 |         directory (str): The directory to search for files.
14 |         recursive (bool): If True, search subdirectories as well. Defaults to True.
15 | 
16 |     Returns:
17 |         List[str]: A list of file paths.
18 |     """
19 |     file_paths = []
20 |     if recursive:
21 |         for root, _, files in os.walk(directory):
22 |             for file in files:
23 |                 file_paths.append(os.path.join(root, file))
24 |     else:
25 |         file_paths = [
26 |             os.path.join(directory, f)
27 |             for f in os.listdir(directory)
28 |             if os.path.isfile(os.path.join(directory, f))
29 |         ]
30 |     return file_paths
31 | 
32 | 
33 | def get_file_hash(file_path: str) -> str:
34 |     """
35 |     Compute the MD5 hash of a file.
36 | 
37 |     Args:
38 |         file_path (str): The path to the file.
39 | 
40 |     Returns:
41 |         str: The MD5 hash of the file.
42 |     """
43 |     hasher = hashlib.md5()
44 |     with open(file_path, "rb") as file:
45 |         buf = file.read()
46 |         hasher.update(buf)
47 |     return hasher.hexdigest()
48 | 
49 | 
50 | def get_changed_files(
51 |     directory: str, old_hashes: dict
52 | ) -> Tuple[List[str], List[str], List[str]]:
53 |     """
54 |     Determine which files in the directory have been added, modified, or deleted.
55 | 
56 |     Args:
57 |         directory (str): The directory to check for changes.
58 |         old_hashes (dict): A dictionary of file paths and their previous hashes.
59 | 
60 |     Returns:
61 |         Tuple[List[str], List[str], List[str]]: Lists of added, modified, and deleted file paths.
62 |     """
63 |     current_files = get_all_files(directory)
64 |     current_hashes = {file: get_file_hash(file) for file in current_files}
65 | 
66 |     added = [file for file in current_files if file not in old_hashes]
67 |     modified = [
68 |         file
69 |         for file in current_files
70 |         if file in old_hashes and current_hashes[file] != old_hashes[file]
71 |     ]
72 |     deleted = [file for file in old_hashes if file not in current_files]
73 | 
74 |     return added, modified, deleted
75 | 


--------------------------------------------------------------------------------
/src/nexusync/core/querier.py:
--------------------------------------------------------------------------------
  1 | # src/core/querier.py
  2 | 
  3 | from typing import List, Optional, Dict, Any
  4 | from llama_index.core import (
  5 |     VectorStoreIndex,
  6 |     PromptTemplate,
  7 | )
  8 | import logging
  9 | from llama_index.core.postprocessor import SentenceEmbeddingOptimizer
 10 | from llama_index.core.postprocessor import KeywordNodePostprocessor
 11 | from nexusync.utils.logging_config import get_logger
 12 | 
 13 | 
 14 | class Querier:
 15 |     def __init__(self, index: VectorStoreIndex):
 16 |         """
 17 |         Initialize the Querier with a VectorStoreIndex.
 18 | 
 19 |         Args:
 20 |             index (VectorStoreIndex): The index to be used for querying.
 21 |         """
 22 |         self.index = index
 23 |         self.logger = get_logger("nexusync.core.querier")
 24 | 
 25 |     def query(
 26 |         self, text_qa_template: str, query: str, similarity_top_k: int = 3
 27 |     ) -> Dict[str, Any]:
 28 |         """
 29 |         Query the index using a query engine.
 30 | 
 31 |         Args:
 32 |             text_qa_template (str): The template for the QA prompt.
 33 |             query (str): The query string.
 34 |             similarity_top_k (int, optional): Number of top similar documents to consider. Defaults to 3.
 35 | 
 36 |         Returns:
 37 |             Dict[str, Any]: A dictionary containing the response and metadata.
 38 |         """
 39 |         try:
 40 |             qa_template = PromptTemplate(text_qa_template)
 41 |             query_engine = self.index.as_query_engine(
 42 |                 text_qa_template=qa_template,
 43 |                 similarity_top_k=similarity_top_k,
 44 |                 node_postprocessors=[
 45 |                     SentenceEmbeddingOptimizer(percentile_cutoff=0.5),
 46 |                     KeywordNodePostprocessor(required_keywords=[]),
 47 |                 ],
 48 |             )
 49 | 
 50 |             response = query_engine.query(query)
 51 | 
 52 |             answer = str(response)
 53 |             metadata = {"sources": []}
 54 | 
 55 |             if hasattr(response, "source_nodes"):
 56 |                 for node in response.source_nodes:
 57 |                     source_info = {
 58 |                         "source_text": node.node.get_text(),
 59 |                         "metadata": node.node.metadata,
 60 |                     }
 61 |                     metadata["sources"].append(source_info)
 62 | 
 63 |             return {"response": answer, "metadata": metadata}
 64 | 
 65 |         except Exception as e:
 66 |             self.logger.error(f"An error occurred during query: {e}", exc_info=True)
 67 |             return {
 68 |                 "response": f"An error occurred while processing your request: {str(e)}",
 69 |                 "metadata": {},
 70 |             }
 71 | 
 72 |     def get_relevant_documents(
 73 |         self, query: str, num_docs: int = 3
 74 |     ) -> List[Dict[str, Any]]:
 75 |         """
 76 |         Retrieve the most relevant documents for a given query.
 77 | 
 78 |         Args:
 79 |             query (str): The query string.
 80 |             num_docs (int): The number of documents to retrieve. Defaults to 3.
 81 | 
 82 |         Returns:
 83 |             List[Dict[str, Any]]: A list of dictionaries containing document info and relevance scores.
 84 |         """
 85 |         try:
 86 |             retriever = self.index.as_retriever(similarity_top_k=num_docs)
 87 |             nodes = retriever.retrieve(query)
 88 | 
 89 |             relevant_docs = []
 90 |             for node in nodes:
 91 |                 doc_info = {
 92 |                     "content": node.node.get_text(),
 93 |                     "metadata": node.node.metadata,
 94 |                     "score": node.score,
 95 |                 }
 96 |                 relevant_docs.append(doc_info)
 97 | 
 98 |             return relevant_docs
 99 | 
100 |         except Exception as e:
101 |             self.logger.error(
102 |                 f"An error occurred while retrieving relevant documents: {e}",
103 |                 exc_info=True,
104 |             )
105 |             return []
106 | 


--------------------------------------------------------------------------------
/src/nexusync/core/indexing_functions.py:
--------------------------------------------------------------------------------
  1 | # src/core/indexing_functions.py
  2 | 
  3 | import shutil
  4 | import os
  5 | from nexusync.core.indexer import Indexer
  6 | from nexusync.utils.logging_config import get_logger
  7 | from llama_index.core import Settings
  8 | from typing import List
  9 | import os
 10 | from typing import List, Optional, Dict, Any
 11 | from llama_index.core import (
 12 |     VectorStoreIndex,
 13 |     SimpleDirectoryReader,
 14 |     StorageContext,
 15 |     load_index_from_storage,
 16 | )
 17 | 
 18 | from llama_index.vector_stores.chroma import ChromaVectorStore
 19 | import chromadb
 20 | from nexusync.utils.logging_config import get_logger
 21 | import shutil
 22 | from llama_index.core import Settings
 23 | from nexusync.models.embedding_models import set_embedding_model
 24 | from nexusync.models.language_models import set_language_model
 25 | 
 26 | logger = get_logger("nexusync.core.indexing_functions")
 27 | 
 28 | 
 29 | def rebuild_index(
 30 |     input_dirs: List[str],
 31 |     openai_model_yn: bool,
 32 |     embedding_model: str,
 33 |     language_model: str,
 34 |     temperature: float,
 35 |     chroma_db_dir: str,
 36 |     index_persist_dir: str,
 37 |     chroma_collection_name: str,
 38 |     chunk_overlap: int,
 39 |     chunk_size: int,
 40 |     recursive: bool,
 41 |     base_url: Optional[str] = None,
 42 | ):
 43 |     """
 44 |     Standalone function to rebuild the index.
 45 | 
 46 |     This function can be called independently of NexuSync initialization.
 47 |     """
 48 |     logger.info("Starting index rebuild process...")
 49 | 
 50 |     Settings.chunk_overlap = chunk_overlap
 51 |     Settings.chunk_size = chunk_size
 52 |     # Initialize the embedding and language model
 53 |     if openai_model_yn:
 54 |         set_embedding_model(openai_model=embedding_model)
 55 |         set_language_model(openai_model=language_model)
 56 | 
 57 |     else:
 58 |         set_embedding_model(huggingface_model=embedding_model)
 59 |         set_language_model(
 60 |             ollama_model=language_model, temperature=temperature, base_url=base_url
 61 |         )
 62 | 
 63 |     # Step 1: Delete the existing index directory
 64 |     if os.path.exists(index_persist_dir):
 65 |         logger.info(f"Deleting existing index directory: {index_persist_dir}")
 66 |         shutil.rmtree(index_persist_dir)
 67 |     else:
 68 |         logger.warning(
 69 |             f"Index directory {index_persist_dir} does not exist. Skipping deletion."
 70 |         )
 71 | 
 72 |     # Step 2: Delete the Chroma database directory
 73 |     if os.path.exists(chroma_db_dir):
 74 |         logger.info(f"Deleting existing Chroma DB directory: {chroma_db_dir}")
 75 |         shutil.rmtree(chroma_db_dir)
 76 |     else:
 77 |         logger.warning(
 78 |             f"Chroma DB directory {chroma_db_dir} does not exist. Skipping deletion."
 79 |         )
 80 | 
 81 |     try:
 82 |         storage_context = StorageContext.from_defaults(persist_dir=index_persist_dir)
 83 |         index = load_index_from_storage(storage_context)
 84 |         logger.info("Index already built. Loading from disk.")
 85 |     except FileNotFoundError:
 86 |         logger.warning("Index not found. Building a new index.")
 87 |         document_list = []
 88 |         total_files = 0
 89 |         for file_path in input_dirs:
 90 |             if not os.path.isdir(file_path):
 91 |                 logger.error(f"Directory {file_path} does not exist.")
 92 |                 raise ValueError(f"Directory {file_path} does not exist.")
 93 |             # Count files before loading
 94 |             file_count = sum(
 95 |                 len(files)
 96 |                 for _, _, files in os.walk(file_path)
 97 |                 if recursive or _ == file_path
 98 |             )
 99 |             total_files += file_count
100 |             documents = SimpleDirectoryReader(
101 |                 file_path, filename_as_id=True, recursive=recursive
102 |             ).load_data()
103 |             logger.info(f"Loaded {file_count} files from all directories.")
104 |             document_list.extend(documents)
105 |         index = VectorStoreIndex.from_documents(document_list)
106 |         index.storage_context.persist(persist_dir=index_persist_dir)
107 |         chroma_client = chromadb.PersistentClient(path=chroma_db_dir)
108 |         chroma_collection = chroma_client.get_or_create_collection(
109 |             chroma_collection_name
110 |         )
111 |         vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
112 |         storage_context = StorageContext.from_defaults(
113 |             persist_dir=index_persist_dir, vector_store=vector_store
114 |         )
115 | 
116 |         if not document_list:
117 |             logger.error("No documents found to build the index.")
118 |             raise ValueError("No documents found to build the index.")
119 | 
120 |         logger.info("Index Built.")
121 |     except Exception as e:
122 |         logger.error(f"An unexpected error occurred during initiation: {e}")
123 |         raise
124 | 


--------------------------------------------------------------------------------
/src/nexusync/nexusync.py:
--------------------------------------------------------------------------------
  1 | # src/nexusync/nexusync.py
  2 | 
  3 | from .core.indexer import Indexer
  4 | from .core.querier import Querier
  5 | from .core.chat_engine import ChatEngine
  6 | from .models.embedding_models import set_embedding_model
  7 | from .models.language_models import set_language_model
  8 | from nexusync.utils.logging_config import get_logger
  9 | from typing import List, Dict, Any
 10 | from dotenv import load_dotenv
 11 | 
 12 | 
 13 | class NexuSync:
 14 |     def __init__(
 15 |         self,
 16 |         input_dirs: List[str],
 17 |         openai_model_yn: bool = None,
 18 |         language_model: str = None,
 19 |         base_url: str = None,
 20 |         embedding_model: str = None,
 21 |         temperature: float = 0.4,
 22 |         chroma_db_dir: str = "chroma_db",
 23 |         index_persist_dir: str = "index_storage",
 24 |         chroma_collection_name: str = "my_collection",
 25 |         chunk_size: int = 1024,
 26 |         chunk_overlap: int = 20,
 27 |         recursive: bool = True,
 28 |     ):
 29 |         load_dotenv()
 30 |         self.logger = get_logger("nexusync.NexuSync")
 31 |         self.input_dirs = input_dirs
 32 |         self.embedding_model = embedding_model
 33 |         self.language_model = language_model
 34 |         self.base_url = str(base_url) if base_url else None
 35 |         self.temperature = temperature
 36 |         self.chroma_db_dir = chroma_db_dir
 37 |         self.index_persist_dir = index_persist_dir
 38 |         self.chroma_collection_name = chroma_collection_name
 39 |         self.chunk_size = chunk_size
 40 |         self.chunk_overlap = chunk_overlap
 41 |         self.recursive = recursive
 42 |         self.openai_model_yn = openai_model_yn
 43 |         self._initialize_models()
 44 |         self.indexer = Indexer(
 45 |             input_dirs=self.input_dirs,
 46 |             recursive=self.recursive,
 47 |             chroma_db_dir=self.chroma_db_dir,
 48 |             index_persist_dir=self.index_persist_dir,
 49 |             chroma_collection_name=self.chroma_collection_name,
 50 |             chunk_size=self.chunk_size,
 51 |             chunk_overlap=self.chunk_overlap,
 52 |         )
 53 |         self.logger.info("Vectors and Querier initialized successfully.")
 54 |         self.index_vector_store = self.indexer.initialize_index()
 55 | 
 56 |         # Initialize querier with the indexer
 57 |         self.querier = Querier(index=self.index_vector_store)
 58 | 
 59 |         # Initialize chat engine with the indexer
 60 |         self.chat_engine = ChatEngine(index=self.index_vector_store)
 61 | 
 62 |     def _initialize_models(self):
 63 |         # Initialize the embedding and language model
 64 |         if self.openai_model_yn:
 65 |             set_embedding_model(openai_model=self.embedding_model)
 66 |             set_language_model(
 67 |                 openai_model=self.language_model, temperature=self.temperature
 68 |             )
 69 | 
 70 |         else:
 71 |             set_embedding_model(huggingface_model=self.embedding_model)
 72 |             set_language_model(
 73 |                 ollama_model=self.language_model,
 74 |                 temperature=self.temperature,
 75 |                 base_url=self.base_url,
 76 |             )
 77 | 
 78 |     def initialize_stream_chat(
 79 |         self,
 80 |         text_qa_template: str,
 81 |         chat_mode: str = "context",
 82 |         similarity_top_k: int = 3,
 83 |     ):
 84 |         self.chat_engine.initialize_chat_engine(
 85 |             text_qa_template=text_qa_template,
 86 |             chat_mode=chat_mode,
 87 |             similarity_top_k=similarity_top_k,
 88 |         )
 89 | 
 90 |     def start_chat_stream(self, query: str):
 91 |         if not self.chat_engine:
 92 |             raise ValueError(
 93 |                 "Chat engine not initialized. Call initialize_stream_chat first."
 94 |             )
 95 |         return self.chat_engine.chat_stream(query)
 96 | 
 97 |     def start_query(
 98 |         self, text_qa_template: str, query: str, similarity_top_k: int = 3
 99 |     ) -> Dict[str, Any]:
100 |         """
101 |         Start a query using the initialized Querier.
102 | 
103 |         Args:
104 |             text_qa_template (str): The template for the QA prompt.
105 |             query (str): The query string.
106 |             similarity_top_k (int, optional): Number of top similar documents to consider. Defaults to 3.
107 | 
108 |         Returns:
109 |             Dict[str, Any]: A dictionary containing the response and metadata.
110 | 
111 |         Raises:
112 |             ValueError: If the Querier is not initialized.
113 |         """
114 |         if not self.querier:
115 |             self.logger.error("Querier not initialized. Call initialize_vectors first.")
116 |             raise ValueError("Querier not initialized. Call initialize_vectors first.")
117 | 
118 |         try:
119 |             self.logger.info(f"Starting query: {query}")
120 |             response = self.querier.query(text_qa_template, query, similarity_top_k)
121 |             self.logger.info("Query completed successfully.")
122 |             return response
123 |         except Exception as e:
124 |             self.logger.error(
125 |                 f"An error occurred during query: {str(e)}", exc_info=True
126 |             )
127 |             return {
128 |                 "response": f"An error occurred while processing your request: {str(e)}",
129 |                 "metadata": {},
130 |             }
131 | 
132 |     def refresh_index(self):
133 |         self.indexer.refresh()
134 | 
135 |     def get_index_stats(self):
136 |         return self.indexer.get_index_stats()
137 | 


--------------------------------------------------------------------------------
/src/nexusync/core/chat_engine.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, Any, List, Generator
  2 | from llama_index.core import VectorStoreIndex, PromptTemplate
  3 | from llama_index.core.memory import ChatMemoryBuffer
  4 | from llama_index.core.postprocessor import (
  5 |     SentenceEmbeddingOptimizer,
  6 |     KeywordNodePostprocessor,
  7 | )
  8 | from nexusync.utils.logging_config import get_logger
  9 | 
 10 | 
 11 | class ChatEngine:
 12 |     def __init__(self, index: VectorStoreIndex):
 13 |         """
 14 |         Initialize the ChatEngine with a VectorStoreIndex.
 15 | 
 16 |         Args:
 17 |             index (VectorStoreIndex): The index to be used for querying in chat.
 18 |         """
 19 |         self.logger = get_logger("nexusync.core.chat_engine")
 20 |         self.chat_engine = None
 21 |         self.chat_history = []
 22 |         self.index = index
 23 | 
 24 |     def initialize_chat_engine(
 25 |         self,
 26 |         text_qa_template: str,
 27 |         chat_mode: str = "context",
 28 |         similarity_top_k: int = 3,
 29 |     ):
 30 |         """
 31 |         Initialize the chat engine.
 32 | 
 33 |         Args:
 34 |             text_qa_template (str): The template for the QA prompt.
 35 |             chat_mode (str, optional): The mode for the chat engine. Defaults to 'context'.
 36 |             similarity_top_k (int, optional): Number of top similar documents to consider. Defaults to 3.
 37 |         """
 38 |         qa_template = PromptTemplate(text_qa_template)
 39 |         memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
 40 |         if not isinstance(self.index, VectorStoreIndex):
 41 |             raise ValueError("The index does not contain a valid VectorStoreIndex")
 42 | 
 43 |         self.chat_engine = self.index.as_chat_engine(
 44 |             memory=memory,
 45 |             chat_mode=chat_mode,
 46 |             text_qa_template=qa_template,
 47 |             similarity_top_k=similarity_top_k,
 48 |             node_postprocessors=[
 49 |                 SentenceEmbeddingOptimizer(percentile_cutoff=0.7),
 50 |                 KeywordNodePostprocessor(required_keywords=[]),
 51 |             ],
 52 |         )
 53 |         self.logger.info("Chat engine initialized")
 54 | 
 55 |     def chat(self, query: str) -> Dict[str, Any]:
 56 |         """
 57 |         Process a query using the chat engine.
 58 | 
 59 |         Args:
 60 |             query (str): The user's query string.
 61 | 
 62 |         Returns:
 63 |             Dict[str, Any]: A dictionary containing the response and metadata.
 64 | 
 65 |         Raises:
 66 |             ValueError: If the chat engine is not initialized.
 67 |         """
 68 |         if self.chat_engine is None:
 69 |             raise ValueError(
 70 |                 "Chat engine not initialized. Call initialize_chat_engine first."
 71 |             )
 72 | 
 73 |         try:
 74 |             response = self.chat_engine.chat(query)
 75 | 
 76 |             answer = str(response)
 77 |             metadata: Dict[str, List[Dict[str, Any]]] = {"sources": []}
 78 | 
 79 |             if hasattr(response, "source_nodes"):
 80 |                 for node in response.source_nodes:
 81 |                     source_info = {
 82 |                         "source_text": node.node.get_text(),
 83 |                         "metadata": node.node.metadata,
 84 |                     }
 85 |                     metadata["sources"].append(source_info)
 86 | 
 87 |             self.chat_history.append({"query": query, "response": answer})
 88 | 
 89 |             return {"response": answer, "metadata": metadata}
 90 | 
 91 |         except Exception as e:
 92 |             self.logger.error(f"An error occurred during chat: {e}", exc_info=True)
 93 |             return {
 94 |                 "response": f"An error occurred while processing your request: {str(e)}",
 95 |                 "metadata": {},
 96 |             }
 97 | 
 98 |     def chat_stream(self, query: str) -> Generator[str | Dict[str, Any], None, None]:
 99 |         if self.chat_engine is None:
100 |             raise ValueError(
101 |                 "Chat engine not initialized. Call initialize_chat_engine first."
102 |             )
103 | 
104 |         try:
105 |             response_stream = self.chat_engine.stream_chat(query)
106 | 
107 |             full_response = ""
108 |             for token in response_stream.response_gen:
109 |                 full_response += token
110 |                 yield token  # Yield each token as it's generated
111 | 
112 |             # After all tokens have been yielded, prepare and yield the final response with metadata
113 |             metadata = {"sources": []}
114 |             if hasattr(response_stream, "source_nodes"):
115 |                 for node in response_stream.source_nodes:
116 |                     source_info = {
117 |                         "source_text": node.node.get_text(),
118 |                         "metadata": node.node.metadata,
119 |                     }
120 |                     metadata["sources"].append(source_info)
121 | 
122 |             # Append to chat history
123 |             self.chat_history.append({"query": query, "response": full_response})
124 | 
125 |             # Yield the final response with metadata
126 |             yield {
127 |                 "response": full_response,
128 |                 "metadata": metadata,
129 |             }
130 | 
131 |         except Exception as e:
132 |             self.logger.error(
133 |                 f"An error occurred during chat streaming: {e}", exc_info=True
134 |             )
135 |             yield {
136 |                 "response": f"An error occurred while processing your request: {str(e)}",
137 |                 "metadata": {},
138 |             }
139 | 
140 |     def clear_chat_history(self):
141 |         self.chat_history = []
142 |         self.logger.info("Chat history cleared")
143 | 
144 |         if hasattr(self.chat_engine, "memory") and self.chat_engine.memory is not None:
145 |             self.chat_engine.memory.clear()
146 |             self.logger.info("Chat engine memory cleared")
147 | 
148 |     def get_chat_history(self) -> List[Dict[str, str]]:
149 |         """
150 |         Get the current chat history.
151 | 
152 |         Returns:
153 |             List[Dict[str, str]]: A list of dictionaries containing queries and responses.
154 |         """
155 |         return self.chat_history
156 | 


--------------------------------------------------------------------------------
/src/nexusync/core/indexer.py:
--------------------------------------------------------------------------------
  1 | # src/core/indexer.py
  2 | 
  3 | import os
  4 | from typing import List, Optional, Dict, Any
  5 | from llama_index.core import (
  6 |     VectorStoreIndex,
  7 |     SimpleDirectoryReader,
  8 |     StorageContext,
  9 |     load_index_from_storage,
 10 | )
 11 | 
 12 | from llama_index.vector_stores.chroma import ChromaVectorStore
 13 | import chromadb
 14 | from nexusync.utils.logging_config import get_logger
 15 | from llama_index.core import Settings
 16 | 
 17 | 
 18 | class Indexer:
 19 |     """
 20 |     Indexer is responsible for managing the indexing operations, including creating, refreshing,
 21 |     and deleting documents from the index. It supports integration with Chroma for efficient similarity search.
 22 | 
 23 |     Attributes:
 24 |         input_dirs (List[str]): A list of directory paths containing documents to be indexed.
 25 |         recursive (bool): Indicates if subdirectories within input_dirs should be scanned for documents.
 26 |         chroma_db_dir (str): The directory where the Chroma database is stored.
 27 |         index_persist_dir (str): The directory where the index is persisted to disk for future use.
 28 |         chroma_collection_name (str): The name of the collection within the Chroma database.
 29 |         index (VectorStoreIndex): The current index instance, loaded or created during initialization.
 30 |         logger (logging.Logger): A logger instance for logging operations and errors.
 31 |         storage_context (StorageContext): The context for managing the storage and loading of the index.
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         input_dirs: List[str],
 37 |         recursive: bool = True,
 38 |         chroma_db_dir: str = "chroma_db",
 39 |         index_persist_dir: str = "index_storage",
 40 |         chroma_collection_name: str = "my_collection",
 41 |         chunk_size: int = 1024,  # Default from llamaindex
 42 |         chunk_overlap: int = 20,  # Default from llamaindex
 43 |     ):
 44 |         """
 45 |         Initialize the Indexer with the given parameters.
 46 | 
 47 |         Args:
 48 |             input_dirs (List[str]): Directories containing documents to be indexed.
 49 |             recursive (bool, optional): Scan subdirectories if True. Defaults to True.
 50 |             chroma_db_dir (str, optional): Directory for Chroma database. Defaults to "chroma_db".
 51 |             index_persist_dir (str, optional): Directory to persist the index. Defaults to "index_storage".
 52 |             chroma_collection_name (str, optional): Name of the Chroma collection. Defaults to "my_collection".
 53 |             chunk_size (int, optional): Size of each text chunk. Defaults to 1024.
 54 |             chunk_overlap (int, optional): Overlap between chunks. Defaults to 20.
 55 | 
 56 |         Note:
 57 |             The __init__ method doesn't create the index immediately. Instead, it calls the _initiate method,
 58 |             which either loads an existing index or builds a new one.
 59 |         """
 60 |         self.logger = get_logger("nexusync.core.indexer")  # Use full logger name
 61 |         self.input_dirs = input_dirs
 62 |         self.recursive = recursive
 63 |         self.chroma_db_dir = chroma_db_dir
 64 |         self.index_persist_dir = index_persist_dir
 65 |         self.chroma_collection_name = chroma_collection_name
 66 |         self.chunk_size = chunk_size
 67 |         self.chunk_overlap = chunk_overlap
 68 |         self.index = None
 69 |         Settings.chunk_overlap = chunk_overlap
 70 |         Settings.chunk_size = chunk_size
 71 | 
 72 |     def initialize_index(self):
 73 |         """
 74 |         Load an existing index from storage or create a new one if not found.
 75 | 
 76 |         Raises:
 77 |             ValueError: If no documents are found in the specified directories.
 78 |         """
 79 | 
 80 |         try:
 81 |             self.storage_context = StorageContext.from_defaults(
 82 |                 persist_dir=self.index_persist_dir
 83 |             )
 84 |             self.index = load_index_from_storage(self.storage_context)
 85 |             self.logger.info("Index already built. Loading from disk.")
 86 |         except FileNotFoundError:
 87 |             self.logger.warning("Index not found. Building a new index.")
 88 |             self.document_list = []
 89 |             total_files = 0
 90 |             for file_path in self.input_dirs:
 91 |                 if not os.path.isdir(file_path):
 92 |                     self.logger.error(f"Directory {file_path} does not exist.")
 93 |                     raise ValueError(f"Directory {file_path} does not exist.")
 94 |                 file_count = sum(len(files) for _, _, files in os.walk(file_path))
 95 |                 total_files += file_count
 96 |                 documents = SimpleDirectoryReader(
 97 |                     file_path, filename_as_id=True
 98 |                 ).load_data()
 99 |                 self.logger.info(f"Loaded {total_files} files from all directories.")
100 |                 self.document_list.extend(documents)
101 | 
102 |             self.index = VectorStoreIndex.from_documents(self.document_list)
103 |             self.index.storage_context.persist(persist_dir=self.index_persist_dir)
104 |             chroma_client = chromadb.PersistentClient(path=self.chroma_db_dir)
105 |             chroma_collection = chroma_client.get_or_create_collection(
106 |                 self.chroma_collection_name
107 |             )
108 |             vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
109 |             self.storage_context = StorageContext.from_defaults(
110 |                 persist_dir=self.index_persist_dir, vector_store=vector_store
111 |             )
112 | 
113 |             if not self.document_list:
114 |                 self.logger.error("No documents found to build the index.")
115 |                 raise ValueError("No documents found to build the index.")
116 | 
117 |             self.logger.info("Index Built.")
118 |         except Exception as e:
119 |             self.logger.error(f"An unexpected error occurred during initiation: {e}")
120 |             raise
121 |         return self.index
122 | 
123 |     def refresh(self):
124 |         """
125 |         Refresh the index by performing incremental updates and deletions based on the current
126 |         state of the files.
127 | 
128 |         Raises:
129 |             RuntimeError: If an error occurs during the refresh process.
130 |         """
131 |         self.logger.info("Starting index refresh process...")
132 |         try:
133 |             # Step 1: Collect current files
134 |             current_files = set()
135 |             for input_dir in self.input_dirs:
136 |                 for root, _, files in os.walk(input_dir):
137 |                     for file in files:
138 |                         current_files.add(os.path.abspath(os.path.join(root, file)))
139 | 
140 |             # Step 2: Perform upinsert (this will add new and update existing documents)
141 |             self.upinsert()
142 | 
143 |             # Step 3: Perform delete (this will remove documents that no longer exist)
144 |             self.delete(current_files)
145 | 
146 |             # Step 4: Verify and log the results
147 |             updated_stats = self.get_index_stats()
148 | 
149 |             if updated_stats["num_documents"] != len(current_files):
150 |                 self.logger.warning(
151 |                     f"Mismatch between indexed documents ({updated_stats['num_documents']}) and files in directories ({len(current_files)})"
152 |                 )
153 | 
154 |         except Exception as e:
155 |             self.logger.error(
156 |                 f"An error occurred during index refresh: {e}", exc_info=True
157 |             )
158 |             raise
159 | 
160 |     def upinsert(self):
161 |         """
162 |         Upsert (update or insert) documents into the index based on changes or new additions.
163 | 
164 |         Raises:
165 |             RuntimeError: If an error occurs while performing the upinsert operation.
166 |         """
167 |         total_documents = 0
168 |         total_refreshed = 0
169 | 
170 |         for input_dir in self.input_dirs:
171 |             self.logger.info(f"Processing directory: {input_dir}")
172 |             documents = SimpleDirectoryReader(
173 |                 input_dir, recursive=self.recursive, filename_as_id=True
174 |             ).load_data()
175 |             total_documents += len(documents)
176 |             loaded_file_count = self.get_index_stats()["num_documents"]
177 |             self.logger.info(f"Loaded {loaded_file_count} files from {input_dir}")
178 | 
179 |             refreshed_docs = self.index.refresh_ref_docs(documents)
180 |             num_refreshed = sum(1 for r in refreshed_docs if r)
181 |             total_refreshed += num_refreshed
182 | 
183 |             if num_refreshed == 0:
184 |                 self.logger.info(f"No files were modified or added in {input_dir}")
185 |             else:
186 |                 for doc, is_refreshed in zip(documents, refreshed_docs):
187 |                     if is_refreshed:
188 |                         doc_path = doc.metadata.get("file_path", "Unknown path")
189 |                         self.logger.info(f"Updated file: {doc_path}")
190 | 
191 |         if total_refreshed == 0:
192 |             self.logger.info("No files were modified or added in any directory")
193 |         else:
194 |             self.logger.info(f"Total files modified or added: {total_refreshed}")
195 | 
196 |     def delete(self, current_files: set):
197 |         """Delete documents from the index if their corresponding files have been deleted from the filesystem."""
198 |         ref_doc_info = self.index.ref_doc_info
199 |         deleted_docs = []
200 | 
201 |         for doc_id, info in ref_doc_info.items():
202 |             file_path = info.metadata.get("file_path")
203 |             if file_path and os.path.abspath(file_path) not in current_files:
204 |                 self.logger.info(f"Deleted file: {file_path}")
205 |                 deleted_docs.append(doc_id)
206 | 
207 |         if deleted_docs:
208 |             self.logger.info(f"Deleting {len(deleted_docs)} chunks from the index.")
209 |             for doc_id in deleted_docs:
210 |                 self.index.delete_ref_doc(doc_id, delete_from_docstore=True)
211 |             self.logger.info("Deletion process completed.")
212 |         else:
213 |             self.logger.info("No deleted files found.")
214 | 
215 |     def get_index_stats(self) -> Dict[str, Any]:
216 |         """Get statistics about the current index."""
217 |         # Count unique file paths in the index
218 |         unique_files = set()
219 |         for doc_id, info in self.index.ref_doc_info.items():
220 |             file_path = info.metadata.get("file_path")
221 |             if file_path:
222 |                 unique_files.add(file_path)
223 | 
224 |         return {
225 |             "num_documents": len(unique_files),  # Count of unique documents
226 |             "num_nodes": len(self.index.ref_doc_info),  # Total number of nodes
227 |             "index_persist_dir": self.index_persist_dir,
228 |             "chroma_db_dir": self.chroma_db_dir,
229 |             "chroma_collection_name": self.chroma_collection_name,
230 |         }
231 | 


--------------------------------------------------------------------------------
/back_end_api.py:
--------------------------------------------------------------------------------
  1 | # back_end_api.py
  2 | from flask import Flask, request, jsonify, Response, send_from_directory
  3 | import json
  4 | import logging
  5 | from nexusync import NexuSync, rebuild_index
  6 | 
  7 | app = Flask(__name__)
  8 | 
  9 | # Configure logging
 10 | logging.basicConfig(level=logging.DEBUG)
 11 | 
 12 | # Configuration Parameters
 13 | # For non-openai model:
 14 | # OPENAI_MODEL_YN = False
 15 | # EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
 16 | # LANGUAGE_MODEL = "llama3.2"
 17 | 
 18 | # For openai model: need to create .env in the src folder to include OPENAI_API_KEY = 'sk-xxx'
 19 | OPENAI_MODEL_YN = True
 20 | EMBEDDING_MODEL = "text-embedding-3-large"
 21 | LANGUAGE_MODEL = "gpt-4o-mini"
 22 | TEMPERATURE = 0.4
 23 | INPUT_DIRS = ["sample_docs/"]  # Can include multiple paths
 24 | CHROMA_DB_DIR = "chroma_db"
 25 | INDEX_PERSIST_DIR = "index_storage"
 26 | CHROMA_COLLECTION_NAME = "my_collection"
 27 | CHUNK_SIZE = 1024
 28 | CHUNK_OVERLAP = 20
 29 | RECURSIVE = True
 30 | 
 31 | 
 32 | # Define the QA Prompt Template
 33 | text_qa_template = """
 34 | Context Information:
 35 | {context_str}
 36 | Query: {query_str}
 37 | Instructions:
 38 | You are helping NHS doctors to review patients' medical records and give interperetations on the results.
 39 | Carefully read the context information and the query.
 40 | If the query is in the format [patient_id, summary_report], generate a summary report using the template below.
 41 | Use the available information from the context to fill in each section.
 42 | Include relevant dates and timeline information in each section.
 43 | If information for a section is not available, state "No information available" for that section.
 44 | Provide concise and accurate information based on the given context.
 45 | Adapt the template as needed to fit the patient's specific medical history and conditions.
 46 | 
 47 | Summary Report Template:
 48 | 
 49 | Patient Summary Report for {patient_id}
 50 | 1. Demographics
 51 | 
 52 | Name: [First Name] [Last Name]
 53 | Date of Birth: [DOB]
 54 | Gender: [Gender]
 55 | Contact Information:
 56 | 
 57 | Address: [Address]
 58 | Phone: [Phone Number]
 59 | Email: [Email Address]
 60 | 
 61 | 
 62 | 
 63 | 2. Past Medical History & Procedures
 64 | 
 65 | Chronic Conditions: [List of chronic conditions with diagnosis dates]
 66 | Major Illnesses: [List of major illnesses with dates]
 67 | Surgical Procedures: [List of surgical procedures with dates]
 68 | Other Significant Medical Events: [List with dates]
 69 | Your interpretation: [Your interpretation of the medical records]
 70 | 
 71 | 3. Medication History
 72 | [List each current medication with the following information]
 73 | 
 74 | Name: [Medication Name]
 75 | Dosage: [Dosage]
 76 | Frequency: [Frequency]
 77 | Start Date: [Start Date]
 78 | Prescriber: [Prescriber Name]
 79 | Purpose: [Brief description of why the medication is prescribed]
 80 | 
 81 | [Include a brief list of significant past medications, if available]
 82 | 4. Allergies and Adverse Reactions
 83 | 
 84 | Medication Allergies: [List or "No known medication allergies"]
 85 | Other Allergies: [List or "No known other allergies"]
 86 | Adverse Reactions: [List any significant adverse reactions to treatments or medications]
 87 | 
 88 | 5. Social History & Occupation
 89 | 
 90 | Occupation: [Current or most recent occupation]
 91 | Smoking Status: [Current smoker, former smoker, never smoker]
 92 | Alcohol Use: [Description of alcohol use]
 93 | Recreational Drug Use: [If applicable]
 94 | Exercise Habits: [Brief description]
 95 | Diet: [Any significant dietary information]
 96 | Other Relevant Social Factors: [e.g., living situation, support system]
 97 | Your interpretation: [Your interpretation of the social history]
 98 | 
 99 | 6. Physical Examination & Vital Signs
100 | Most Recent Vital Signs (Date: [Date of most recent vital signs])
101 | 
102 | Blood Pressure: [BP]
103 | Heart Rate: [HR]
104 | Respiratory Rate: [RR]
105 | Temperature: [Temp]
106 | Oxygen Saturation: [O2 Sat]
107 | Weight: [Weight]
108 | Height: [Height]
109 | BMI: [BMI]
110 | Your interpretation: [Your interpretation of the vital signs]
111 | [Include any significant physical examination findings]
112 | 
113 | 7. Laboratory Results
114 | [List most recent significant laboratory tests with dates, results, and normal ranges]
115 | 
116 | 8. Imaging and Diagnostic Results
117 | [List recent imaging studies and other diagnostic tests with dates and summary of results]
118 | 
119 | 9. Treatment Plan and Interventions
120 | 
121 | Current Treatment Plans: [List current treatments or interventions]
122 | Ongoing Therapies: [e.g., physical therapy, chemotherapy, dialysis]
123 | Recent Changes in Management: [Any recent significant changes in treatment]
124 | Your interpretation: [Your interpretation of the treatment plan]
125 | 
126 | 10. Immunizations
127 | [List relevant immunizations with dates]
128 | 
129 | 11. Upcoming Appointments and Follow-ups
130 | [List any scheduled appointments with dates, types, and locations]
131 | 
132 | 
133 | Answer: [Generate the report based on the template above, filling in the available information from the context]
134 | 
135 | Answer: """
136 | 
137 | ns = NexuSync(
138 |     input_dirs=INPUT_DIRS,
139 |     openai_model_yn=OPENAI_MODEL_YN,
140 |     embedding_model=EMBEDDING_MODEL,
141 |     language_model=LANGUAGE_MODEL,
142 |     temperature=TEMPERATURE,
143 |     chroma_db_dir=CHROMA_DB_DIR,
144 |     index_persist_dir=INDEX_PERSIST_DIR,
145 |     chroma_collection_name=CHROMA_COLLECTION_NAME,
146 |     chunk_overlap=CHUNK_OVERLAP,
147 |     chunk_size=CHUNK_SIZE,
148 |     recursive=RECURSIVE,
149 | )
150 | 
151 | 
152 | # Initialize the Chat Engine Once
153 | ns.initialize_stream_chat(
154 |     text_qa_template=text_qa_template, chat_mode="context", similarity_top_k=3
155 | )
156 | 
157 | 
158 | # Root Route - Serve the index.html file
159 | @app.route("/")
160 | def index():
161 |     return send_from_directory(".", "index.html")
162 | 
163 | 
164 | @app.route("/chat", methods=["POST"])
165 | def chat():
166 |     data = request.get_json()
167 |     if not data or "message" not in data:
168 |         return jsonify({"error": "Invalid request. 'message' field is required."}), 400
169 | 
170 |     user_input = data["message"]
171 | 
172 |     def generate_response():
173 |         try:
174 |             source_file_paths = []
175 |             response_generator = ns.chat_engine.chat_stream(user_input)
176 | 
177 |             for item in response_generator:
178 |                 if isinstance(item, str):
179 |                     # Stream individual tokens
180 |                     yield json.dumps({"response": item}) + "\n"
181 |                 elif isinstance(item, dict):
182 |                     # Final response with metadata
183 |                     metadata = item.get("metadata", {})
184 |                     sources = metadata.get("sources", [])
185 | 
186 |                     # Extract source file paths
187 |                     for source in sources:
188 |                         metadata_info = source.get("metadata", {})
189 |                         file_path = metadata_info.get("file_path", "Unknown source")
190 |                         source_file_paths.append(file_path)
191 | 
192 |                     # Remove duplicates while preserving order
193 |                     source_file_paths = list(dict.fromkeys(source_file_paths))
194 | 
195 |                     # Format the source file paths
196 |                     if source_file_paths:
197 |                         sources_formatted = "\n".join(
198 |                             f"- {path}" for path in source_file_paths
199 |                         )
200 |                         yield json.dumps(
201 |                             {"sources": sources_formatted, "final": True}
202 |                         ) + "\n"
203 |                     else:
204 |                         yield json.dumps(
205 |                             {"sources": "No sources found", "final": True}
206 |                         ) + "\n"
207 | 
208 |         except Exception as e:
209 |             logging.error(f"Error in chat endpoint: {e}", exc_info=True)
210 |             yield json.dumps(
211 |                 {"error": f"An error occurred while processing your request: {str(e)}"}
212 |             ) + "\n"
213 | 
214 |     return Response(generate_response(), mimetype="application/json")
215 | 
216 | 
217 | @app.route("/rebuild_index", methods=["POST"])
218 | def rebuild_index_route():
219 |     global ns, EMBEDDING_MODEL, LANGUAGE_MODEL, TEMPERATURE, INPUT_DIRS
220 | 
221 |     data = request.get_json()
222 |     if not data:
223 |         return jsonify({"error": "No data provided"}), 400
224 | 
225 |     try:
226 |         # Update global variables
227 |         EMBEDDING_MODEL = data.get("embedding_model", EMBEDDING_MODEL)
228 |         LANGUAGE_MODEL = data.get("llm_model", LANGUAGE_MODEL)
229 |         TEMPERATURE = data.get("temperature", TEMPERATURE)
230 |         INPUT_DIRS = data.get("input_dirs", INPUT_DIRS)
231 | 
232 |         # Rebuild index
233 |         rebuild_index(
234 |             input_dirs=INPUT_DIRS,
235 |             openai_model_yn=OPENAI_MODEL_YN,
236 |             embedding_model=EMBEDDING_MODEL,
237 |             language_model=LANGUAGE_MODEL,
238 |             temperature=TEMPERATURE,
239 |             chroma_db_dir=CHROMA_DB_DIR,
240 |             index_persist_dir=INDEX_PERSIST_DIR,
241 |             chroma_collection_name=CHROMA_COLLECTION_NAME,
242 |             chunk_overlap=CHUNK_OVERLAP,
243 |             chunk_size=CHUNK_SIZE,
244 |             recursive=RECURSIVE,
245 |         )
246 | 
247 |         # Reinitialize NexuSync
248 |         ns = NexuSync(
249 |             input_dirs=INPUT_DIRS,
250 |             openai_model_yn=OPENAI_MODEL_YN,
251 |             embedding_model=EMBEDDING_MODEL,
252 |             language_model=LANGUAGE_MODEL,
253 |             temperature=TEMPERATURE,
254 |             chroma_db_dir=CHROMA_DB_DIR,
255 |             index_persist_dir=INDEX_PERSIST_DIR,
256 |             chroma_collection_name=CHROMA_COLLECTION_NAME,
257 |             chunk_overlap=CHUNK_OVERLAP,
258 |             chunk_size=CHUNK_SIZE,
259 |             recursive=RECURSIVE,
260 |         )
261 | 
262 |         # Reinitialize the chat engine
263 |         ns.initialize_stream_chat(
264 |             text_qa_template=text_qa_template, chat_mode="context", similarity_top_k=3
265 |         )
266 | 
267 |         return jsonify({"status": "Index rebuilt successfully"}), 200
268 |     except Exception as e:
269 |         app.logger.error(f"Error rebuilding index: {e}", exc_info=True)
270 |         return jsonify({"error": str(e)}), 500
271 | 
272 | 
273 | @app.route("/reset_chat", methods=["POST"])
274 | def reset_chat():
275 |     try:
276 |         ns.chat_engine.clear_chat_history()
277 |         return jsonify({"status": "Chat history cleared successfully."}), 200
278 |     except Exception as e:
279 |         logging.error(f"Error resetting chat history: {e}", exc_info=True)
280 |         return jsonify({"error": f"An error occurred: {str(e)}"}), 500
281 | 
282 | 
283 | @app.route("/refresh_index", methods=["POST"])
284 | def refresh_index():
285 |     try:
286 |         ns.indexer.refresh()
287 |         return jsonify({"status": "Index refreshed successfully."}), 200
288 |     except Exception as e:
289 |         logging.error(f"Error refreshing index: {e}", exc_info=True)
290 |         return jsonify({"error": f"An error occurred: {str(e)}"}), 500
291 | 
292 | 
293 | if __name__ == "__main__":
294 |     # Run the Flask app
295 |     app.run(host="0.0.0.0", port=2024, debug=True)
296 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [pypi](https://pypi.org/project/nexusync/)<br>
  2 | [GitHub](https://github.com/Zakk-Yang/nexusync)<br>
  3 | [![Downloads](https://static.pepy.tech/badge/nexusync)](https://pepy.tech/project/nexusync)
  4 | 
  5 | <p align="center">
  6 |   <img src="https://raw.githubusercontent.com/Zakk-Yang/nexusync/main/assets/nexusync_logo.png" alt="NexuSync Logo" width="200"/>
  7 | </p>
  8 | 
  9 | 
 10 | Newest version = 0.3.6: torch package needs to be installed seperately to make sure your system env matches; 
 11 | 
 12 | Development Plan for the next version:
 13 | - Adding PDF OCF using ollama llama3.2 vision
 14 | 
 15 | 
 16 | # NexuSync 
 17 | 
 18 | *NexuSync* is a lightweight yet powerful library for building Retrieval-Augmented Generation (RAG) systems, built on top of **LlamaIndex**. It offers a simple and user-friendly interface for developers to configure and deploy RAG systems efficiently. Choose between using the **Ollama LLM** model for offline, privacy-focused applications or the **OpenAI API** for a hosted solution.
 19 | 
 20 | ---
 21 | 
 22 | ## 🚀 Features
 23 | 
 24 | - **Lightweight Design**: Simplify the integration and configuration of RAG systems without unnecessary complexity.
 25 | - **User-Friendly Interface**: Intuitive APIs and clear documentation make setup a breeze.
 26 | - **Flexible Document Indexing**: Automatically index documents from specified directories, keeping your knowledge base up-to-date.
 27 | - **Efficient Querying**: Use natural language to query your document collection and get relevant answers quickly.
 28 | - **Conversational Interface**: Engage in chat-like interactions for more intuitive information retrieval.
 29 | - **Customizable Embedding Options**: Choose between HuggingFace Embedding models or OpenAI's offerings.
 30 | - **Incremental Updates**: Easily update and insert new documents into the index or delete the index for removed documents.
 31 | - **Automatic Deletion Handling**: Documents removed from the filesystem are automatically removed from the index.
 32 | - **Extensive File Format Support**: Supports multiple file formats including `.csv`, `.docx`, `.epub`, `.hwp`, `.ipynb`, `.mbox`, `.md`, `.pdf`, `.png`, `.ppt`, `.pptm`, `.pptx`, `.json`, and more.
 33 | 
 34 | 
 35 | ---
 36 | 
 37 | 
 38 | ## 🛠 Prerequisites
 39 | - Python 3.10 or higher
 40 | - Install Pytorch, please visit https://pytorch.org/get-started/locally/ 
 41 | - Install Ollama: https://ollama.com/download or OpenAI API (need to create .env file to include OPENAI_API_KEY = 'sk-xxx')
 42 | - Suggested to use conda for your env control to avoid enviroment conflicts:
 43 | 
 44 |  **Install `conda` for WSL2 (Windows Subsystem for Linux 2)**: 
 45 | 1. Open your WSL2 terminal
 46 | 2. Download the Miniconda installer:
 47 | `wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh`
 48 | 3. Run the installer:
 49 | `bash Miniconda3-latest-Linux-x86_64.sh`
 50 | 4. Follow the prompts to complete the installation
 51 | 5. Restart your terminal or run source ~/.bashrc
 52 | 
 53 |  **Install `conda` for Windows**: 
 54 | 1. Download the Miniconda installer for Windows from https://docs.conda.io/en/latest/miniconda.html
 55 | 2. Run the .exe file and follow the installation prompts
 56 | 3. Choose whether to add Conda to your PATH environment variable during installation
 57 | 
 58 |  **Install `conda` for Linux**: 
 59 | 1. Open a terminal
 60 | 2. Download the Miniconda installer
 61 | `wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh`
 62 | 3. Run the installer:
 63 | `bash Miniconda3-latest-Linux-x86_64.sh`
 64 | 4.Follow the prompts to complete the installation
 65 | 5. Restart your terminal or run `source ~/.bashrc`
 66 | 
 67 |  **Install `conda` for macOS**: 
 68 | 1. Open a terminal
 69 | 2. Download the Miniconda installer
 70 | `curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh`
 71 | 3. Run the installer:
 72 | `bash Miniconda3-latest-MacOSX-x86_64.sh`
 73 | 4.Follow the prompts to complete the installation
 74 | 5. Restart your terminal or run `source ~/.bash_profile`
 75 | 
 76 | **After installation on any platform, verify the installation by running**:
 77 | `conda --version`
 78 | 
 79 | ---
 80 | 
 81 | 
 82 | ## 📦Installation
 83 | 1. Use conda to create env in your project folder:
 84 | ```bash
 85 | conda create env --name <your_env_name> python=3.10
 86 | conda activate <your_env_name>
 87 | ```
 88 | 
 89 | 2. Then, install NexuSync under your conda env, run the following command:
 90 | 
 91 | ```bash
 92 | pip install nexusync
 93 | ```
 94 | Or  `git clone https://github.com/Zakk-Yang/nexusync.git`
 95 | 
 96 | 
 97 | 3. Install pytorch (https://pytorch.org/get-started/locally/):
 98 | - If you are using cuda, make sure your cuda version matches:
 99 |   - For CUDA 11.8 (example, for windows and wsl2/linux)
100 |   `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118`
101 |   - For CUDA 12.1 (example, for windows and wsl2/linux)
102 |   `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121`
103 |   - For macOS
104 |   `pip3 install torch torchvision torchaudio`
105 | 
106 | ---
107 | 
108 | 
109 | ## 🌟 Quick Start
110 | 
111 | Here's how you can get started with NexuSync:
112 | 
113 | 1. ### Import NexuSync
114 | ```python
115 | from nexusync import NexuSync
116 | ```
117 | 2. ### Choose Your Model
118 | ##### **Option A: Using OpenAI Model**
119 | ```python
120 | #------- Use OpenAI Model ------- 
121 | # Customize your parameters for openai model, create .env file in the project folder to include OPENAI_API_KEY = 'sk-xxx'
122 | OPENAI_MODEL_YN = True 
123 | EMBEDDING_MODEL = "text-embedding-3-large" 
124 | LANGUAGE_MODEL = "gpt-4o-mini"
125 | TEMPERATURE = 0.4 # range from 0 to 1, higher means higher creativitiy level
126 | CHROMA_DB_DIR = 'chroma_db' # Your path to the chroma db
127 | INDEX_PERSIST_DIR = 'index_storage' # Your path to the index storage
128 | CHROMA_COLLECTION_NAME = 'my_collection' 
129 | INPUT_DIRS = ["../sample_docs"] # can specify multiple document paths
130 | CHUNK_SIZE = 1024 # Size of text chunks for creating embeddings
131 | CHUNK_OVERLAP = 20 # Overlap between text chunks to maintain context
132 | RECURSIVE = True # Recursive or not under one folder
133 | ```
134 | 
135 | ##### **Option B: Using Ollama Model**
136 | ```python
137 | #------- Use Ollama Model ------- 
138 | # Customize your parameters for ollama model
139 | OPENAI_MODEL_YN = False # if False, you will use ollama model
140 | EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5" # suggested embedding model, you can replace with any HuggingFace embedding models
141 | LANGUAGE_MODEL = 'llama3.2' # you need to download ollama model first, please check https://ollama.com/download
142 | BASE_URL = "http://localhost:11434" # you can swith to different base_url for Ollama model
143 | TEMPERATURE = 0.4 # range from 0 to 1, higher means higher creativitiy level
144 | CHROMA_DB_DIR = 'chroma_db' # Your path to the chroma db
145 | INDEX_PERSIST_DIR = 'index_storage' # Your path to the index storage
146 | CHROMA_COLLECTION_NAME = 'my_collection' 
147 | INPUT_DIRS = ["../sample_docs"] # can specify multiple document paths
148 | CHUNK_SIZE = 1024 # Size of text chunks for creating embeddings
149 | CHUNK_OVERLAP = 20 # Overlap between text chunks to maintain context
150 | RECURSIVE = True # Recursive or not under one folder
151 | ```
152 | 
153 | ### 3. Initialize Vector DB
154 | ```python
155 | # example for Ollama Model
156 | ns = NexuSync(input_dirs=INPUT_DIRS, 
157 |               openai_model_yn=False, 
158 |               embedding_model=EMBEDDING_MODEL, 
159 |               language_model=LANGUAGE_MODEL, 
160 |               base_url = BASE_URL, # OpenAI model does not need base_url, here we use Ollama Model as an example
161 |               temperature=TEMPERATURE, 
162 |               chroma_db_dir = CHROMA_DB_DIR,
163 |               index_persist_dir = INDEX_PERSIST_DIR,
164 |               chroma_collection_name=CHROMA_COLLECTION_NAME,
165 |               chunk_overlap=CHUNK_OVERLAP,
166 |               chunk_size=CHUNK_SIZE,
167 |               recursive=RECURSIVE
168 |               )
169 | ```
170 | 
171 | ### 4. Start Quering (quick quering with no memory)
172 | ```python
173 | #------- Start Quering (one-time, no memory and without stream chat) ----- 
174 | query = "main result of the paper can llm generate novltive ideas"
175 | 
176 | text_qa_template = """
177 | Context Information:
178 | --------------------
179 | {context_str}
180 | --------------------
181 | 
182 | Query: {query_str}
183 | 
184 | Instructions:
185 | 1. Carefully read the context information and the query.
186 | 2. Think through the problem step by step.
187 | 3. Provide a concise and accurate answer based on the given context.
188 | 4. If the answer cannot be determined from the context, state "Based on the given information, I cannot provide a definitive answer."
189 | 5. If you need to make any assumptions, clearly state them.
190 | 6. If relevant, provide a brief explanation of your reasoning.
191 | 
192 | Answer: """
193 | 
194 | response = ns.start_query(text_qa_template = text_qa_template, query = query )
195 | 
196 | print(f"Query: {query}")
197 | print(f"Response: {response['response']}")
198 | print(f"Response: {response['metadata']}")
199 | ```
200 | 
201 | ### 5. Engage in Stream Chat (token by token output, with Memory)
202 | ```python
203 | # First, initalize the stream chat engine
204 | ns.initialize_stream_chat(
205 |     text_qa_template=text_qa_template,
206 |     chat_mode="context",
207 |     similarity_top_k=3
208 | )
209 | 
210 | query = "main result of the paper can llm generate novltive ideas"
211 | 
212 | for item in ns.start_chat_stream(query):
213 |     if isinstance(item, str):
214 |         # This is a token, print or process as needed
215 |         print(item, end='', flush=True)
216 |     else:
217 |         # This is the final response with metadata
218 |         print("\n\nFull response:", item['response'])
219 |         print("Metadata:", item['metadata'])
220 |         break
221 | ```
222 | 
223 | ### 6. Access Chat History (for stream chat)
224 | ```python
225 | chat_history = ns.chat_engine.get_chat_history()
226 | print("Chat History:")
227 | for entry in chat_history:
228 |     print(f"Human: {entry['query']}")
229 |     print(f"AI: {entry['response']}\n")
230 | ```
231 | 
232 | ### 7. Incrementally Refresh Index
233 | ```python
234 | #------- Incrementaly Refresh Index without Rebuilding it ----- 
235 | # If you have files modified, inserted or deleted, you don't need to rebuild all the index
236 | ns.refresh_index()
237 | ```
238 | ### 8. Rebuild Index From Scratch
239 | ```python
240 | #------- Rebuild Index ----- 
241 | # Rebuild the index when either of the following is changed:
242 | # - openai_model_yn
243 | # - embedding_model
244 | # - language_model
245 | # - base_url
246 | # - chroma_db_dir
247 | # - index_persist_dir
248 | # - chroma_collection_name
249 | # - chunk_overlap
250 | # - chunk_size
251 | # - recursive
252 | 
253 | from nexusync import rebuild_index
254 | from nexusync import NexuSync
255 | 
256 | OPENAI_MODEL_YN = True # if False, you will use ollama model
257 | EMBEDDING_MODEL = "text-embedding-3-large" # suggested embedding model
258 | LANGUAGE_MODEL = 'gpt-4o-mini' # you need to download ollama model first, please check https://ollama.com/download
259 | TEMPERATURE = 0.4 # range from 0 to 1, higher means higher creativitiy level
260 | CHROMA_DB_DIR = 'chroma_db'
261 | INDEX_PERSIST_DIR = 'index_storage'
262 | CHROMA_COLLECTION_NAME = 'my_collection'
263 | INPUT_DIRS = ["../sample_docs"] # can specify multiple document paths
264 | CHUNK_SIZE = 1024
265 | CHUNK_OVERLAP = 20
266 | RECURSIVE = True
267 | 
268 | # Assume we changed the model from Ollama to OPENAI
269 | rebuild_index(input_dirs=INPUT_DIRS, 
270 |               openai_model_yn=OPENAI_MODEL_YN, 
271 |               embedding_model=EMBEDDING_MODEL, 
272 |               language_model=LANGUAGE_MODEL, 
273 |               temperature=TEMPERATURE, 
274 |               chroma_db_dir = CHROMA_DB_DIR,
275 |               index_persist_dir = INDEX_PERSIST_DIR,
276 |               chroma_collection_name=CHROMA_COLLECTION_NAME,
277 |               chunk_overlap=CHUNK_OVERLAP,
278 |               chunk_size=CHUNK_SIZE,
279 |               recursive=RECURSIVE
280 |               )
281 | 
282 | # Reinitiate the ns after rebuilding the index
283 | ns = NexuSync(input_dirs=INPUT_DIRS, 
284 |               openai_model_yn=OPENAI_MODEL_YN, 
285 |               embedding_model=EMBEDDING_MODEL, 
286 |               language_model=LANGUAGE_MODEL, 
287 |               temperature=TEMPERATURE, 
288 |               chroma_db_dir = CHROMA_DB_DIR,
289 |               index_persist_dir = INDEX_PERSIST_DIR,
290 |               chroma_collection_name=CHROMA_COLLECTION_NAME,
291 |               chunk_overlap=CHUNK_OVERLAP,
292 |               chunk_size=CHUNK_SIZE,
293 |               recursive=RECURSIVE
294 |               )
295 | 
296 | # Test the new built index
297 | query = "main result of the paper can llm generate novltive ideas"
298 | 
299 | text_qa_template = """
300 | Context Information:
301 | --------------------
302 | {context_str}
303 | --------------------
304 | 
305 | Query: {query_str}
306 | 
307 | Instructions:
308 | 1. Carefully read the context information and the query.
309 | 2. Think through the problem step by step.
310 | 3. Provide a concise and accurate answer based on the given context.
311 | 4. If the answer cannot be determined from the context, state "Based on the given information, I cannot provide a definitive answer."
312 | 5. If you need to make any assumptions, clearly state them.
313 | 6. If relevant, provide a brief explanation of your reasoning.
314 | 
315 | Answer: """
316 | 
317 | 
318 | response = ns.start_query(text_qa_template = text_qa_template, query = query )
319 | 
320 | print(f"Query: {query}")
321 | print(f"Response: {response['response']}")
322 | print(f"Response: {response['metadata']}")
323 | ```
324 | ---
325 | 
326 | ## 🎯 User Interface
327 | 1. git clone or download this project: 
328 | ```bash
329 | git clone https://github.com/Zakk-Yang/nexusync.git
330 | ```
331 | 2. Configure Backend
332 | - Open back_end_api.py in your IDE.
333 | - Adjust the parameters according to your requirements.
334 | 
335 | 3. Open the terminal and run 
336 | ```
337 | python back_end_api.py
338 | ```
339 | Ensure that the parameters in `back_end_api.py` align with the settings in the side panel of the interface. If not, copy and paste your desired Embedding Model and Language Model in the side panel and click "Apply Settings".
340 | 
341 | 4. Start interacting with your data!
342 | 
343 | <p align="center">
344 |   <img src="https://raw.githubusercontent.com/Zakk-Yang/nexusync/main/assets/chat_snapshot.png" alt="Screen Shot" width="600"/>
345 | </p>
346 | 
347 | ---
348 | 
349 | ## 📚 Documentation & Examples
350 | For more detailed usage examples, check out the demo notebooks.
351 | 
352 | ---
353 | 
354 | ## 📝 License
355 | This project is licensed under the MIT License - see the LICENSE file for details.
356 | 
357 | ---
358 | 
359 | ## 📫 Contact
360 | For questions or suggestions, feel free to open an issue or contact the maintainer:
361 | 
362 | Name: Zakk Yang
363 | Email: zakkyang@hotmail.com
364 | GitHub: Zakk-Yang
365 | 
366 | ---
367 | 
368 | ## 🌟 Support
369 | If you find this project helpful, please give it a ⭐ on [GitHub](https://github.com/Zakk-Yang/nexusync)! Your support is appreciated.
370 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/marked/2.0.3/marked.min.js"></script>
  7 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/html2pdf.js/0.10.1/html2pdf.bundle.min.js"></script>
  8 |     <title>NexuSync Chat Interface</title>
  9 |     <style>
 10 |         * {
 11 |             box-sizing: border-box;
 12 |             margin: 0;
 13 |             padding: 0;
 14 |         }
 15 |         body, html {
 16 |             font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 17 |             height: 100%;
 18 |             overflow: hidden;
 19 |         }
 20 |         .container {
 21 |             display: flex;
 22 |             height: 100%;
 23 |             background-color: #f0f2f5;
 24 |         }
 25 |         .chat-area {
 26 |             flex-grow: 1;
 27 |             display: flex;
 28 |             flex-direction: column;
 29 |             transition: margin-left 0.3s;
 30 |         }
 31 |         .header {
 32 |             background-color: #007bff;
 33 |             color: white;
 34 |             padding: 0.5rem 1rem; /* Reduced vertical padding */
 35 |             display: flex;
 36 |             justify-content: space-between;
 37 |             align-items: center;
 38 |             height: 60px; /* Set a fixed height for the header */
 39 |         }
 40 |         .header h1 {
 41 |             font-size: 1.5rem;
 42 |             margin-left: 0.5rem;
 43 |         }
 44 |         .header-left {
 45 |             display: flex;
 46 |             align-items: center;
 47 |         }
 48 |         .nexusync-title {
 49 |             font-size: 1.2rem;
 50 |             font-weight: bold;
 51 |             color: white;
 52 |             margin-left: 10px;
 53 |         }
 54 |         .header-buttons {
 55 |             display: flex;
 56 |             gap: 0.5rem;
 57 |         }
 58 |         .btn {
 59 |             background-color: transparent;
 60 |             border: none;
 61 |             color: white;
 62 |             cursor: pointer;
 63 |             padding: 0.5rem;
 64 |             border-radius: 4px;
 65 |             transition: background-color 0.3s;
 66 |         }
 67 |         .btn:hover {
 68 |             background-color: rgba(255, 255, 255, 0.2);
 69 |         }
 70 |         .chat-messages {
 71 |             flex-grow: 1;
 72 |             overflow-y: auto;
 73 |             padding: 1rem;
 74 |         }
 75 |         .message {
 76 |             max-width: 70%;
 77 |             margin-bottom: 1rem;
 78 |             padding: 0.75rem;
 79 |             border-radius: 1rem;
 80 |             line-height: 1.4;
 81 |         }
 82 |         .user-message {
 83 |             background-color: #007bff;
 84 |             color: white;
 85 |             margin-left: auto;
 86 |             border-bottom-right-radius: 0;
 87 |         }
 88 |         .assistant-message {
 89 |             position: relative;
 90 |         }
 91 |         .download-pdf-button {
 92 |             position: absolute;
 93 |             top: 5px;
 94 |             right: 5px;
 95 |             background-color: #28a745;
 96 |             color: white;
 97 |             border: none;
 98 |             padding: 5px 10px;
 99 |             border-radius: 4px;
100 |             cursor: pointer;
101 |             display: none;
102 |         }
103 |         .assistant-message:hover .download-pdf-button {
104 |             display: block;
105 |         }
106 |         .input-area {
107 |             display: flex;
108 |             padding: 1rem;
109 |             background-color: white;
110 |             border-top: 1px solid #e0e0e0;
111 |         }
112 |         #user-input {
113 |             flex-grow: 1;
114 |             padding: 0.75rem;
115 |             border: 1px solid #ccc;
116 |             border-radius: 1.5rem;
117 |             margin-right: 0.5rem;
118 |         }
119 |         #send-button {
120 |             background-color: #007bff;
121 |             color: white;
122 |             border: none;
123 |             padding: 0.75rem 1.5rem;
124 |             border-radius: 1.5rem;
125 |             cursor: pointer;
126 |         }
127 |         .settings-panel {
128 |             position: fixed;
129 |             top: 0;
130 |             left: -300px;
131 |             width: 300px;
132 |             height: 100%;
133 |             background-color: white;
134 |             box-shadow: 2px 0 5px rgba(0, 0, 0, 0.1);
135 |             transition: left 0.3s;
136 |             overflow-y: auto;
137 |             z-index: 1000;
138 |         }
139 |         .settings-panel.open {
140 |             left: 0;
141 |         }
142 |         .settings-content {
143 |             padding: 1rem;
144 |         }
145 |         .settings-group {
146 |             margin-bottom: 1rem;
147 |         }
148 |         .settings-group label {
149 |             display: block;
150 |             margin-bottom: 0.5rem;
151 |         }
152 |         .settings-group input {
153 |             width: 100%;
154 |             padding: 0.5rem;
155 |             border: 1px solid #ccc;
156 |             border-radius: 4px;
157 |         }
158 |         #apply-settings {
159 |             width: 100%;
160 |             padding: 0.75rem;
161 |             background-color: #007bff;
162 |             color: white;
163 |             border: none;
164 |             border-radius: 4px;
165 |             cursor: pointer;
166 |         }
167 |         .overlay {
168 |             position: fixed;
169 |             top: 0;
170 |             left: 0;
171 |             right: 0;
172 |             bottom: 0;
173 |             background-color: rgba(0, 0, 0, 0.5);
174 |             display: none;
175 |         }
176 |         .overlay.active {
177 |             display: block;
178 |         }
179 |         .sources {
180 |             font-size: 0.9em;
181 |             margin-top: 0.5rem;
182 |             padding-top: 0.5rem;
183 |             border-top: 1px solid #ccc;
184 |         }
185 |         /* Add styles for Markdown content */
186 |         .markdown-content {
187 |             line-height: 1.6;
188 |         }
189 |         .markdown-content h1, .markdown-content h2, .markdown-content h3 {
190 |             margin-top: 1em;
191 |             margin-bottom: 0.5em;
192 |         }
193 |         .markdown-content p {
194 |             margin-bottom: 1em;
195 |         }
196 |         .markdown-content ul, .markdown-content ol {
197 |             margin-bottom: 1em;
198 |             padding-left: 2em;
199 |         }
200 |         .markdown-content code {
201 |             background-color: #f0f0f0;
202 |             padding: 0.2em 0.4em;
203 |             border-radius: 3px;
204 |         }
205 |         .markdown-content pre {
206 |             background-color: #f0f0f0;
207 |             padding: 1em;
208 |             border-radius: 5px;
209 |             overflow-x: auto;
210 |         }
211 |         .nhs-logo {
212 |             height: 20px; /* Set a fixed height for the logo */
213 |             width: auto;
214 |             margin-left: 10px;
215 |         }
216 |     </style>
217 | </head>
218 | <body>
219 |     <div class="container">
220 |         <div class="settings-panel" id="settings-panel">
221 |             <div class="settings-content">
222 |                 <h2>Settings</h2>
223 |                 <div class="settings-group">
224 |                     <label for="embedding-model">Embedding Model:</label>
225 |                     <input type="text" id="embedding-model" value="BAAI/bge-base-en-v1.5">
226 |                 </div>
227 |                 <div class="settings-group">
228 |                     <label for="llm-model">Language Model:</label>
229 |                     <input type="text" id="llm-model" value="llama3.2">
230 |                 </div>
231 |                 <div class="settings-group">
232 |                     <label for="temperature">Temperature:</label>
233 |                     <input type="number" id="temperature" value="0.4" min="0" max="1" step="0.1">
234 |                 </div>
235 |                 <div class="settings-group">
236 |                     <label for="input-dirs">Input Directories:</label>
237 |                     <input type="text" id="input-dirs" value="sample_docs/">
238 |                 </div>
239 |                 <button id="apply-settings">Apply Settings</button>
240 |             </div>
241 |         </div>
242 |         <div class="chat-area">
243 |             <div class="header">
244 |                 <div class="header-left">
245 |                     <button id="toggle-settings" class="btn">☰</button>
246 |                     <span class="nexusync-title">NexuSync Chat</span>
247 |                 </div>
248 |                 <div class="header-buttons">
249 |                     <button id="refresh-index" class="btn" title="Refresh Index">🔄</button>
250 |                     <button id="reset-chat" class="btn" title="Reset Chat">🗑️</button>
251 |                 </div>
252 |             </div>
253 |             <div id="chat-messages" class="chat-messages">
254 |                 <!-- Chat messages will be appended here -->
255 |             </div>
256 |             <div class="input-area">
257 |                 <input type="text" id="user-input" placeholder="Type your message here...">
258 |                 <button id="send-button">Send</button>
259 |             </div>
260 |         </div>
261 |     </div>
262 |     <div class="overlay" id="overlay"></div>
263 | 
264 |     <script>
265 |         const chatMessages = document.getElementById('chat-messages');
266 |         const userInput = document.getElementById('user-input');
267 |         const sendButton = document.getElementById('send-button');
268 |         const toggleSettings = document.getElementById('toggle-settings');
269 |         const settingsPanel = document.getElementById('settings-panel');
270 |         const overlay = document.getElementById('overlay');
271 |         const applySettingsButton = document.getElementById('apply-settings');
272 |         const refreshIndexButton = document.getElementById('refresh-index');
273 |         const resetChatButton = document.getElementById('reset-chat');
274 | 
275 |         toggleSettings.addEventListener('click', () => {
276 |             settingsPanel.classList.toggle('open');
277 |             overlay.classList.toggle('active');
278 |         });
279 | 
280 |         overlay.addEventListener('click', () => {
281 |             settingsPanel.classList.remove('open');
282 |             overlay.classList.remove('active');
283 |         });
284 | 
285 |         sendButton.addEventListener('click', sendMessage);
286 |         userInput.addEventListener('keypress', (event) => {
287 |             if (event.key === 'Enter') {
288 |                 event.preventDefault();
289 |                 sendMessage();
290 |             }
291 |         });
292 | 
293 |         applySettingsButton.addEventListener('click', applySettings);
294 |         refreshIndexButton.addEventListener('click', refreshIndex);
295 |         resetChatButton.addEventListener('click', resetChat);
296 | 
297 |         function addMessage(message, isUser = false) {
298 |             const messageElement = document.createElement('div');
299 |             messageElement.classList.add('message', isUser ? 'user-message' : 'assistant-message');
300 |             
301 |             if (!isUser) {
302 |                 const downloadButton = document.createElement('button');
303 |                 downloadButton.classList.add('download-pdf-button');
304 |                 downloadButton.textContent = '📥 PDF';
305 |                 downloadButton.onclick = () => downloadPdf(messageElement);
306 |                 messageElement.appendChild(downloadButton);
307 |             }
308 | 
309 |             const contentElement = document.createElement('div');
310 |             contentElement.classList.add('message-content');
311 |             contentElement.innerHTML = isUser ? escapeHTML(message) : formatResponse(message);
312 |             messageElement.appendChild(contentElement);
313 | 
314 |             chatMessages.appendChild(messageElement);
315 |             chatMessages.scrollTop = chatMessages.scrollHeight;
316 |         }
317 | 
318 |         async function sendMessage() {
319 |             const message = userInput.value.trim();
320 |             if (message === '') return;
321 | 
322 |             addMessage(message, true);
323 |             userInput.value = '';
324 | 
325 |             const assistantMessageElement = document.createElement('div');
326 |             assistantMessageElement.classList.add('message', 'assistant-message');
327 |             const messageContent = document.createElement('div');
328 |             messageContent.classList.add('message-content');
329 |             messageContent.textContent = 'Thinking...';
330 |             assistantMessageElement.appendChild(messageContent);
331 | 
332 |             const downloadButton = document.createElement('button');
333 |             downloadButton.classList.add('download-pdf-button');
334 |             downloadButton.textContent = '📥 PDF';
335 |             downloadButton.onclick = () => downloadPdf(assistantMessageElement);
336 |             assistantMessageElement.appendChild(downloadButton);
337 | 
338 |             chatMessages.appendChild(assistantMessageElement);
339 |             chatMessages.scrollTop = chatMessages.scrollHeight;
340 | 
341 |             try {
342 |                 const response = await fetch('/chat', {
343 |                     method: 'POST',
344 |                     headers: { 'Content-Type': 'application/json' },
345 |                     body: JSON.stringify({ message: message })
346 |                 });
347 | 
348 |                 if (!response.ok) {
349 |                     throw new Error(`Server error: ${response.statusText}`);
350 |                 }
351 | 
352 |                 const reader = response.body.getReader();
353 |                 const decoder = new TextDecoder('utf-8');
354 |                 let accumulatedText = '';
355 | 
356 |                 while (true) {
357 |                     const { value, done } = await reader.read();
358 |                     if (done) break;
359 |                     const chunk = decoder.decode(value, { stream: true });
360 |                     const lines = chunk.split('\n').filter(line => line.trim() !== '');
361 | 
362 |                     for (const line of lines) {
363 |                         let data;
364 |                         try {
365 |                             data = JSON.parse(line);
366 |                         } catch (e) {
367 |                             console.error('Error parsing JSON:', e);
368 |                             continue;
369 |                         }
370 | 
371 |                         if (data.response) {
372 |                             accumulatedText += data.response;
373 |                             messageContent.innerHTML = formatResponse(accumulatedText);
374 |                             chatMessages.scrollTop = chatMessages.scrollHeight;
375 |                         }
376 | 
377 |                         if (data.error) {
378 |                             messageContent.textContent = `Error: ${data.error}`;
379 |                             chatMessages.scrollTop = chatMessages.scrollHeight;
380 |                             return;
381 |                         }
382 |                     }
383 |                 }
384 |             } catch (error) {
385 |                 console.error('Error during fetch:', error);
386 |                 messageContent.textContent = `Error: ${error.message}`;
387 |                 chatMessages.scrollTop = chatMessages.scrollHeight;
388 |             }
389 |         }
390 | 
391 |         function formatResponse(responseText) {
392 |             const sourcesMarker = '\n\n**Sources:**\n';
393 |             const [response, sources] = responseText.split(sourcesMarker);
394 |             
395 |             // Parse the response text as Markdown
396 |             let formattedResponse = marked(response.trim());
397 |             
398 |             // Wrap the parsed Markdown in a div for styling
399 |             formattedResponse = `<div class="markdown-content">${formattedResponse}</div>`;
400 |             
401 |             if (sources) {
402 |                 const sourcesList = sources.trim().split('\n').map(source => {
403 |                     return escapeHTML(source);
404 |                 }).join('<br>');
405 |                 formattedResponse += `<div class="sources"><strong>Sources:</strong><br>${sourcesList}</div>`;
406 |             }
407 |             return formattedResponse;
408 |         }
409 | 
410 |         function escapeHTML(str) {
411 |             const div = document.createElement('div');
412 |             div.textContent = str;
413 |             return div.innerHTML;
414 |         }
415 | 
416 |         async function applySettings() {
417 |             const settings = {
418 |                 embedding_model: document.getElementById('embedding-model').value,
419 |                 llm_model: document.getElementById('llm-model').value,
420 |                 temperature: parseFloat(document.getElementById('temperature').value),
421 |                 input_dirs: document.getElementById('input-dirs').value.split(',').map(dir => dir.trim())
422 |             };
423 | 
424 |             try {
425 |                 const response = await fetch('/rebuild_index', {
426 |                     method: 'POST',
427 |                     headers: { 'Content-Type': 'application/json' },
428 |                     body: JSON.stringify(settings)
429 |                 });
430 | 
431 |                 const result = await response.json();
432 |                 if (response.ok) {
433 |                     alert('Settings applied and index rebuilt successfully');
434 |                     settingsPanel.classList.remove('open');
435 |                     overlay.classList.remove('active');
436 |                 } else {
437 |                     alert(`Error: ${result.error}`);
438 |                 }
439 |             } catch (error) {
440 |                 console.error('Error applying settings:', error);
441 |                 alert(`Error applying settings: ${error.message}`);
442 |             }
443 |         }
444 | 
445 |         async function refreshIndex() {
446 |             if (confirm('Are you sure you want to refresh the index? This may take some time.')) {
447 |                 try {
448 |                     const response = await fetch('/refresh_index', {
449 |                         method: 'POST',
450 |                         headers: { 'Content-Type': 'application/json' }
451 |                     });
452 | 
453 |                     const result = await response.json();
454 |                     if (response.ok) {
455 |                         alert('Index refreshed successfully');
456 |                     } else {
457 |                         alert(`Error: ${result.error}`);
458 |                     }
459 |                 } catch (error) {
460 |                     console.error('Error refreshing index:', error);
461 |                     alert(`Error refreshing index: ${error.message}`);
462 |                 }
463 |             }
464 |         }
465 | 
466 |         async function resetChat() {
467 |             if (confirm('Are you sure you want to reset the chat?')) {
468 |                 try {
469 |                     const response = await fetch('/reset_chat', {
470 |                         method: 'POST',
471 |                         headers: { 'Content-Type': 'application/json' }
472 |                     });
473 | 
474 |                     if (response.ok) {
475 |                         chatMessages.innerHTML = '';
476 |                         alert('Chat history has been reset.');
477 |                     } else {
478 |                         const result = await response.json();
479 |                         alert(`Error: ${result.error}`);
480 |                     }
481 |                 } catch (error) {
482 |                     console.error('Error resetting chat:', error);
483 |                     alert(`Error resetting chat: ${error.message}`);
484 |                 }
485 |             }
486 |         }
487 | 
488 |         function downloadPdf(content) {
489 |             const element = content.cloneNode(true);
490 |             
491 |             // Remove the download button from the cloned content
492 |             const downloadButton = element.querySelector('.download-pdf-button');
493 |             if (downloadButton) {
494 |                 downloadButton.remove();
495 |             }
496 | 
497 |             const opt = {
498 |                 margin: 10,
499 |                 filename: 'ai_response.pdf',
500 |                 image: { type: 'jpeg', quality: 0.98 },
501 |                 html2canvas: { scale: 2 },
502 |                 jsPDF: { unit: 'mm', format: 'a4', orientation: 'portrait' }
503 |             };
504 | 
505 |             html2pdf().from(element).set(opt).save();
506 |         }
507 | 
508 |     </script>
509 | </body>
510 | </html>


--------------------------------------------------------------------------------
/notebooks/data_structure_generator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Sample json generator for the NHS application"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 2,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "data": {
 17 |       "text/plain": [
 18 |        "'health_records.json'"
 19 |       ]
 20 |      },
 21 |      "execution_count": 2,
 22 |      "metadata": {},
 23 |      "output_type": "execute_result"
 24 |     }
 25 |    ],
 26 |    "source": [
 27 |     "import json\n",
 28 |     "from datetime import datetime\n",
 29 |     "import random\n",
 30 |     "\n",
 31 |     "# Sample JSON structure correction and creation of 10 patients with datetime and location_name\n",
 32 |     "\n",
 33 |     "# Function to generate random dates for testing purposes\n",
 34 |     "def random_date(start, end):\n",
 35 |     "    return start + (end - start) * random.random()\n",
 36 |     "\n",
 37 |     "# Clinic locations\n",
 38 |     "clinic_names = [\n",
 39 |     "    \"NHS Springfield Clinic\", \"City Health Centre\", \"Riverbend Medical Centre\",\n",
 40 |     "    \"Hillside Clinic\", \"Springfield Wellness Center\", \"Pinewood Clinic\",\n",
 41 |     "    \"Downtown Medical Facility\", \"Riverside Clinic\", \"Green Valley Medical\",\n",
 42 |     "    \"Maple Grove Health Center\"\n",
 43 |     "]\n",
 44 |     "\n",
 45 |     "# Base patient data structure\n",
 46 |     "def generate_patient(patient_id):\n",
 47 |     "    patient = {\n",
 48 |     "        \"demographics\": {\n",
 49 |     "            \"patient_id\": patient_id,\n",
 50 |     "            \"first_name\": f\"Patient{patient_id}\",\n",
 51 |     "            \"last_name\": \"Doe\",\n",
 52 |     "            \"date_of_birth\": random_date(datetime(1970, 1, 1), datetime(2000, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
 53 |     "            \"gender\": random.choice([\"Female\", \"Male\"]),\n",
 54 |     "            \"contact_information\": {\n",
 55 |     "                \"address\": f\"{random.randint(100, 999)} Elm Street, Springfield, IL, 62704\",\n",
 56 |     "                \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\",\n",
 57 |     "                \"email\": f\"patient{patient_id}@example.com\"\n",
 58 |     "            }\n",
 59 |     "        },\n",
 60 |     "        \"emergency_contact\": {\n",
 61 |     "            \"name\": f\"Spouse of Patient{patient_id}\",\n",
 62 |     "            \"relationship\": \"Spouse\",\n",
 63 |     "            \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\"\n",
 64 |     "        },\n",
 65 |     "        \"medical_history\": [\n",
 66 |     "            {\n",
 67 |     "                \"condition\": \"Hypertension\",\n",
 68 |     "                \"diagnosis_date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
 69 |     "                \"notes\": \"Managing blood pressure with medication and lifestyle changes.\"\n",
 70 |     "            },\n",
 71 |     "            {\n",
 72 |     "                \"condition\": \"Type 2 Diabetes Mellitus\",\n",
 73 |     "                \"diagnosis_date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
 74 |     "                \"notes\": \"Controlled through diet, exercise, and medication.\"\n",
 75 |     "            }\n",
 76 |     "        ],\n",
 77 |     "        \"medications\": [\n",
 78 |     "            {\n",
 79 |     "                \"medication_id\": f\"M{random.randint(1000, 9999)}\",\n",
 80 |     "                \"name\": \"Lisinopril\",\n",
 81 |     "                \"dosage\": \"10 mg\",\n",
 82 |     "                \"frequency\": \"Once daily\",\n",
 83 |     "                \"start_date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
 84 |     "                \"end_date\": None,\n",
 85 |     "                \"prescriber\": \"Dr. Emily Smith\",\n",
 86 |     "                \"location_name\": random.choice(clinic_names)\n",
 87 |     "            }\n",
 88 |     "        ],\n",
 89 |     "        \"immunizations\": [\n",
 90 |     "            {\n",
 91 |     "                \"immunization_id\": f\"I{random.randint(1000, 9999)}\",\n",
 92 |     "                \"vaccine\": \"Influenza\",\n",
 93 |     "                \"date_administered\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
 94 |     "                \"administered_by\": \"NHS Clinic\",\n",
 95 |     "                \"location_name\": random.choice(clinic_names)\n",
 96 |     "            }\n",
 97 |     "        ],\n",
 98 |     "        \"blood_test_results\": [\n",
 99 |     "            {\n",
100 |     "                \"lab_id\": f\"L{random.randint(1000, 9999)}\",\n",
101 |     "                \"test_name\": \"Complete Blood Count (CBC)\",\n",
102 |     "                \"date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
103 |     "                \"results\": {\n",
104 |     "                    \"Hemoglobin\": \"13.5 g/dL\",\n",
105 |     "                    \"White Blood Cells\": \"6.2 x10^3/µL\",\n",
106 |     "                    \"Platelets\": \"250 x10^3/µL\"\n",
107 |     "                },\n",
108 |     "                \"normal_ranges\": {\n",
109 |     "                    \"Hemoglobin\": \"12-16 g/dL\",\n",
110 |     "                    \"White Blood Cells\": \"4-11 x10^3/µL\",\n",
111 |     "                    \"Platelets\": \"150-450 x10^3/µL\"\n",
112 |     "                },\n",
113 |     "                \"interpretation\": \"All values within normal limits.\"\n",
114 |     "            }\n",
115 |     "        ],\n",
116 |     "        \"appointments\": [\n",
117 |     "            {\n",
118 |     "                \"appointment_id\": f\"AP{random.randint(1000, 9999)}\",\n",
119 |     "                \"date_time\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
120 |     "                \"type\": \"General Consultation\",\n",
121 |     "                \"with\": \"Dr. Emily Smith\",\n",
122 |     "                \"location_name\": random.choice(clinic_names),\n",
123 |     "                \"status\": \"Scheduled\",\n",
124 |     "                \"notes\": \"Review HbA1c results.\"\n",
125 |     "            }\n",
126 |     "        ],\n",
127 |     "        \"vital_signs\": [\n",
128 |     "            {\n",
129 |     "                \"vital_id\": f\"V{random.randint(1000, 9999)}\",\n",
130 |     "                \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
131 |     "                \"blood_pressure\": \"120/80 mmHg\",\n",
132 |     "                \"heart_rate\": \"72 bpm\",\n",
133 |     "                \"respiratory_rate\": \"16 breaths/min\",\n",
134 |     "                \"temperature\": \"98.6°F\",\n",
135 |     "                \"oxygen_saturation\": \"98%\",\n",
136 |     "                \"weight\": \"70 kg\",\n",
137 |     "                \"height\": \"165 cm\",\n",
138 |     "                \"bmi\": \"25.7\"\n",
139 |     "            }\n",
140 |     "        ]\n",
141 |     "    }\n",
142 |     "    return patient\n",
143 |     "\n",
144 |     "# Generate 10 patients\n",
145 |     "patients_data = [generate_patient(f\"P{str(i).zfill(6)}\") for i in range(10)]\n",
146 |     "\n",
147 |     "# Save the JSON to a file for download\n",
148 |     "file_path = 'health_records.json'\n",
149 |     "with open(file_path, 'w') as file:\n",
150 |     "    json.dump(patients_data, file, indent=4)\n",
151 |     "\n",
152 |     "file_path\n"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 4,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "Cancer patient health records have been generated and saved to cancer_health_records.json\n"
165 |      ]
166 |     }
167 |    ],
168 |    "source": [
169 |     "import json\n",
170 |     "from datetime import datetime, timedelta\n",
171 |     "import random\n",
172 |     "\n",
173 |     "def random_date(start, end):\n",
174 |     "    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))\n",
175 |     "\n",
176 |     "clinic_names = [\n",
177 |     "    \"NHS Oncology Centre\", \"City Cancer Institute\", \"Riverbend Oncology Clinic\",\n",
178 |     "    \"Hillside Cancer Care\", \"Springfield Cancer Center\", \"Pinewood Oncology\",\n",
179 |     "    \"Downtown Cancer Facility\", \"Riverside Oncology Clinic\", \"Green Valley Cancer Institute\",\n",
180 |     "    \"Maple Grove Oncology Center\"\n",
181 |     "]\n",
182 |     "\n",
183 |     "cancer_types = [\n",
184 |     "    \"Breast Cancer\", \"Lung Cancer\", \"Colorectal Cancer\", \"Prostate Cancer\",\n",
185 |     "    \"Leukemia\", \"Lymphoma\", \"Melanoma\", \"Ovarian Cancer\", \"Pancreatic Cancer\", \"Thyroid Cancer\"\n",
186 |     "]\n",
187 |     "\n",
188 |     "def generate_cancer_history(cancer_type):\n",
189 |     "    diagnosis_date = random_date(datetime(2010, 1, 1), datetime(2023, 12, 31))\n",
190 |     "    stages = [\"I\", \"II\", \"III\", \"IV\"]\n",
191 |     "    treatments = {\n",
192 |     "        \"Breast Cancer\": [\"Mastectomy\", \"Radiation therapy\", \"Chemotherapy\", \"Hormone therapy\"],\n",
193 |     "        \"Lung Cancer\": [\"Lobectomy\", \"Radiation therapy\", \"Chemotherapy\", \"Immunotherapy\"],\n",
194 |     "        \"Colorectal Cancer\": [\"Colectomy\", \"Radiation therapy\", \"Chemotherapy\"],\n",
195 |     "        \"Prostate Cancer\": [\"Prostatectomy\", \"Radiation therapy\", \"Hormone therapy\"],\n",
196 |     "        \"Leukemia\": [\"Chemotherapy\", \"Stem cell transplant\", \"Targeted therapy\"],\n",
197 |     "        \"Lymphoma\": [\"Chemotherapy\", \"Radiation therapy\", \"Immunotherapy\"],\n",
198 |     "        \"Melanoma\": [\"Wide excision\", \"Immunotherapy\", \"Targeted therapy\"],\n",
199 |     "        \"Ovarian Cancer\": [\"Oophorectomy\", \"Chemotherapy\", \"Targeted therapy\"],\n",
200 |     "        \"Pancreatic Cancer\": [\"Whipple procedure\", \"Chemotherapy\", \"Radiation therapy\"],\n",
201 |     "        \"Thyroid Cancer\": [\"Thyroidectomy\", \"Radioactive iodine therapy\", \"Targeted therapy\"]\n",
202 |     "    }\n",
203 |     "    \n",
204 |     "    stage = random.choice(stages)\n",
205 |     "    treatment = random.sample(treatments[cancer_type], k=random.randint(1, len(treatments[cancer_type])))\n",
206 |     "    \n",
207 |     "    return {\n",
208 |     "        \"condition\": cancer_type,\n",
209 |     "        \"diagnosis_date\": diagnosis_date.strftime('%Y-%m-%d %H:%M:%S'),\n",
210 |     "        \"stage\": stage,\n",
211 |     "        \"treatment\": treatment,\n",
212 |     "        \"notes\": f\"Stage {stage} {cancer_type} diagnosed. Treatment plan includes {', '.join(treatment)}.\"\n",
213 |     "    }\n",
214 |     "\n",
215 |     "def generate_medications(cancer_type, treatment):\n",
216 |     "    medications = []\n",
217 |     "    if \"Chemotherapy\" in treatment:\n",
218 |     "        chemo_drugs = {\n",
219 |     "            \"Breast Cancer\": [\"Doxorubicin\", \"Paclitaxel\", \"Cyclophosphamide\"],\n",
220 |     "            \"Lung Cancer\": [\"Cisplatin\", \"Carboplatin\", \"Pemetrexed\"],\n",
221 |     "            \"Colorectal Cancer\": [\"Fluorouracil\", \"Oxaliplatin\", \"Irinotecan\"],\n",
222 |     "            \"Prostate Cancer\": [\"Docetaxel\", \"Cabazitaxel\"],\n",
223 |     "            \"Leukemia\": [\"Imatinib\", \"Dasatinib\", \"Nilotinib\"],\n",
224 |     "            \"Lymphoma\": [\"Rituximab\", \"Cyclophosphamide\", \"Doxorubicin\"],\n",
225 |     "            \"Melanoma\": [\"Dacarbazine\", \"Temozolomide\"],\n",
226 |     "            \"Ovarian Cancer\": [\"Paclitaxel\", \"Carboplatin\"],\n",
227 |     "            \"Pancreatic Cancer\": [\"Gemcitabine\", \"Abraxane\"],\n",
228 |     "            \"Thyroid Cancer\": [\"Doxorubicin\", \"Cisplatin\"]\n",
229 |     "        }\n",
230 |     "        for drug in random.sample(chemo_drugs[cancer_type], k=random.randint(1, len(chemo_drugs[cancer_type]))):\n",
231 |     "            medications.append({\n",
232 |     "                \"medication_id\": f\"M{random.randint(1000, 9999)}\",\n",
233 |     "                \"name\": drug,\n",
234 |     "                \"dosage\": f\"{random.randint(50, 200)} mg\",\n",
235 |     "                \"frequency\": \"Every 3 weeks\",\n",
236 |     "                \"start_date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
237 |     "                \"end_date\": None,\n",
238 |     "                \"prescriber\": \"Dr. John Oncologist\",\n",
239 |     "                \"location_name\": random.choice(clinic_names)\n",
240 |     "            })\n",
241 |     "    \n",
242 |     "    if \"Hormone therapy\" in treatment:\n",
243 |     "        hormone_drugs = [\"Tamoxifen\", \"Anastrozole\", \"Letrozole\"]\n",
244 |     "        medications.append({\n",
245 |     "            \"medication_id\": f\"M{random.randint(1000, 9999)}\",\n",
246 |     "            \"name\": random.choice(hormone_drugs),\n",
247 |     "            \"dosage\": \"20 mg\",\n",
248 |     "            \"frequency\": \"Once daily\",\n",
249 |     "            \"start_date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
250 |     "            \"end_date\": None,\n",
251 |     "            \"prescriber\": \"Dr. Jane Endocrinologist\",\n",
252 |     "            \"location_name\": random.choice(clinic_names)\n",
253 |     "        })\n",
254 |     "    \n",
255 |     "    return medications\n",
256 |     "\n",
257 |     "def generate_blood_tests(cancer_type):\n",
258 |     "    tests = [\n",
259 |     "        {\n",
260 |     "            \"lab_id\": f\"L{random.randint(1000, 9999)}\",\n",
261 |     "            \"test_name\": \"Complete Blood Count (CBC)\",\n",
262 |     "            \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
263 |     "            \"results\": {\n",
264 |     "                \"Hemoglobin\": f\"{random.uniform(8.0, 14.0):.1f} g/dL\",\n",
265 |     "                \"White Blood Cells\": f\"{random.uniform(3.0, 11.0):.1f} x10^3/µL\",\n",
266 |     "                \"Platelets\": f\"{random.randint(100, 400)} x10^3/µL\"\n",
267 |     "            },\n",
268 |     "            \"normal_ranges\": {\n",
269 |     "                \"Hemoglobin\": \"12-16 g/dL\",\n",
270 |     "                \"White Blood Cells\": \"4-11 x10^3/µL\",\n",
271 |     "                \"Platelets\": \"150-450 x10^3/µL\"\n",
272 |     "            },\n",
273 |     "            \"interpretation\": \"Values affected by ongoing cancer treatment.\"\n",
274 |     "        }\n",
275 |     "    ]\n",
276 |     "    \n",
277 |     "    if cancer_type in [\"Breast Cancer\", \"Prostate Cancer\", \"Ovarian Cancer\"]:\n",
278 |     "        tests.append({\n",
279 |     "            \"lab_id\": f\"L{random.randint(1000, 9999)}\",\n",
280 |     "            \"test_name\": \"Tumor Marker Test\",\n",
281 |     "            \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
282 |     "            \"results\": {\n",
283 |     "                \"CA-125\": f\"{random.uniform(0, 100):.1f} U/mL\" if cancer_type == \"Ovarian Cancer\" else None,\n",
284 |     "                \"PSA\": f\"{random.uniform(0, 10):.1f} ng/mL\" if cancer_type == \"Prostate Cancer\" else None,\n",
285 |     "                \"CA 15-3\": f\"{random.uniform(0, 50):.1f} U/mL\" if cancer_type == \"Breast Cancer\" else None\n",
286 |     "            },\n",
287 |     "            \"normal_ranges\": {\n",
288 |     "                \"CA-125\": \"<35 U/mL\",\n",
289 |     "                \"PSA\": \"<4 ng/mL\",\n",
290 |     "                \"CA 15-3\": \"<30 U/mL\"\n",
291 |     "            },\n",
292 |     "            \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\"\n",
293 |     "        })\n",
294 |     "    \n",
295 |     "    return tests\n",
296 |     "\n",
297 |     "def generate_appointments(cancer_type, treatment):\n",
298 |     "    appointments = [\n",
299 |     "        {\n",
300 |     "            \"appointment_id\": f\"AP{random.randint(1000, 9999)}\",\n",
301 |     "            \"date_time\": random_date(datetime(2024, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
302 |     "            \"type\": \"Oncology Follow-up\",\n",
303 |     "            \"with\": \"Dr. John Oncologist\",\n",
304 |     "            \"location_name\": random.choice(clinic_names),\n",
305 |     "            \"status\": \"Scheduled\",\n",
306 |     "            \"notes\": \"Review treatment progress and discuss next steps.\"\n",
307 |     "        }\n",
308 |     "    ]\n",
309 |     "    \n",
310 |     "    if \"Radiation therapy\" in treatment:\n",
311 |     "        appointments.append({\n",
312 |     "            \"appointment_id\": f\"AP{random.randint(1000, 9999)}\",\n",
313 |     "            \"date_time\": random_date(datetime(2024, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
314 |     "            \"type\": \"Radiation Therapy Session\",\n",
315 |     "            \"with\": \"Dr. Sarah Radiologist\",\n",
316 |     "            \"location_name\": random.choice(clinic_names),\n",
317 |     "            \"status\": \"Scheduled\",\n",
318 |     "            \"notes\": \"Continued radiation treatment as per plan.\"\n",
319 |     "        })\n",
320 |     "    \n",
321 |     "    return appointments\n",
322 |     "\n",
323 |     "def generate_vital_signs():\n",
324 |     "    return [\n",
325 |     "        {\n",
326 |     "            \"vital_id\": f\"V{random.randint(1000, 9999)}\",\n",
327 |     "            \"date\": random_date(datetime(2024, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
328 |     "            \"blood_pressure\": f\"{random.randint(110, 140)}/{random.randint(60, 90)} mmHg\",\n",
329 |     "            \"heart_rate\": f\"{random.randint(60, 100)} bpm\",\n",
330 |     "            \"respiratory_rate\": f\"{random.randint(12, 20)} breaths/min\",\n",
331 |     "            \"temperature\": f\"{random.uniform(97.0, 99.0):.1f}°F\",\n",
332 |     "            \"oxygen_saturation\": f\"{random.randint(95, 100)}%\",\n",
333 |     "            \"weight\": f\"{random.randint(50, 90)} kg\",\n",
334 |     "            \"height\": f\"{random.randint(150, 190)} cm\",\n",
335 |     "            \"bmi\": f\"{random.uniform(18.5, 29.9):.1f}\"\n",
336 |     "        }\n",
337 |     "    ]\n",
338 |     "\n",
339 |     "def generate_imaging_results(cancer_type):\n",
340 |     "    imaging_types = {\n",
341 |     "        \"Breast Cancer\": \"Mammogram\",\n",
342 |     "        \"Lung Cancer\": \"Chest CT\",\n",
343 |     "        \"Colorectal Cancer\": \"Abdominal CT\",\n",
344 |     "        \"Prostate Cancer\": \"Prostate MRI\",\n",
345 |     "        \"Leukemia\": \"PET-CT\",\n",
346 |     "        \"Lymphoma\": \"PET-CT\",\n",
347 |     "        \"Melanoma\": \"Skin and Lymph Node Ultrasound\",\n",
348 |     "        \"Ovarian Cancer\": \"Pelvic CT\",\n",
349 |     "        \"Pancreatic Cancer\": \"Abdominal CT\",\n",
350 |     "        \"Thyroid Cancer\": \"Thyroid Ultrasound\"\n",
351 |     "    }\n",
352 |     "    \n",
353 |     "    return [\n",
354 |     "        {\n",
355 |     "            \"imaging_id\": f\"IM{random.randint(1000, 9999)}\",\n",
356 |     "            \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
357 |     "            \"type\": imaging_types[cancer_type],\n",
358 |     "            \"location_name\": random.choice(clinic_names),\n",
359 |     "            \"results\": f\"Follow-up {imaging_types[cancer_type]} shows {random.choice(['stable disease', 'partial response', 'complete response', 'progressive disease'])}.\",\n",
360 |     "            \"radiologist\": \"Dr. Emily Imaging\"\n",
361 |     "        }\n",
362 |     "    ]\n",
363 |     "\n",
364 |     "def generate_patient(patient_id):\n",
365 |     "    cancer_type = random.choice(cancer_types)\n",
366 |     "    cancer_history = generate_cancer_history(cancer_type)\n",
367 |     "    \n",
368 |     "    patient = {\n",
369 |     "        \"demographics\": {\n",
370 |     "            \"patient_id\": patient_id,\n",
371 |     "            \"first_name\": f\"Patient{patient_id}\",\n",
372 |     "            \"last_name\": \"Doe\",\n",
373 |     "            \"date_of_birth\": random_date(datetime(1950, 1, 1), datetime(1990, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
374 |     "            \"gender\": random.choice([\"Female\", \"Male\"]),\n",
375 |     "            \"contact_information\": {\n",
376 |     "                \"address\": f\"{random.randint(100, 999)} Elm Street, Springfield, IL, 62704\",\n",
377 |     "                \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\",\n",
378 |     "                \"email\": f\"patient{patient_id}@example.com\"\n",
379 |     "            }\n",
380 |     "        },\n",
381 |     "        \"emergency_contact\": {\n",
382 |     "            \"name\": f\"Spouse of Patient{patient_id}\",\n",
383 |     "            \"relationship\": \"Spouse\",\n",
384 |     "            \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\"\n",
385 |     "        },\n",
386 |     "        \"medical_history\": [cancer_history],\n",
387 |     "        \"medications\": generate_medications(cancer_type, cancer_history[\"treatment\"]),\n",
388 |     "        \"immunizations\": [\n",
389 |     "            {\n",
390 |     "                \"immunization_id\": f\"I{random.randint(1000, 9999)}\",\n",
391 |     "                \"vaccine\": \"Influenza\",\n",
392 |     "                \"date_administered\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n",
393 |     "                \"administered_by\": \"NHS Oncology Clinic\",\n",
394 |     "                \"location_name\": random.choice(clinic_names)\n",
395 |     "            }\n",
396 |     "        ],\n",
397 |     "        \"blood_test_results\": generate_blood_tests(cancer_type),\n",
398 |     "        \"appointments\": generate_appointments(cancer_type, cancer_history[\"treatment\"]),\n",
399 |     "        \"vital_signs\": generate_vital_signs(),\n",
400 |     "        \"imaging_results\": generate_imaging_results(cancer_type)\n",
401 |     "    }\n",
402 |     "    return patient\n",
403 |     "\n",
404 |     "# Generate 10 patients\n",
405 |     "patients_data = [generate_patient(f\"P{str(i).zfill(6)}\") for i in range(10)]\n",
406 |     "\n",
407 |     "# Save the JSON to a file for download\n",
408 |     "file_path = 'cancer_health_records.json'\n",
409 |     "with open(file_path, 'w') as file:\n",
410 |     "    json.dump(patients_data, file, indent=4)\n",
411 |     "\n",
412 |     "print(f\"Cancer patient health records have been generated and saved to {file_path}\")"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": null,
418 |    "metadata": {},
419 |    "outputs": [],
420 |    "source": []
421 |   }
422 |  ],
423 |  "metadata": {
424 |   "kernelspec": {
425 |    "display_name": "nhs_hackthon",
426 |    "language": "python",
427 |    "name": "python3"
428 |   },
429 |   "language_info": {
430 |    "codemirror_mode": {
431 |     "name": "ipython",
432 |     "version": 3
433 |    },
434 |    "file_extension": ".py",
435 |    "mimetype": "text/x-python",
436 |    "name": "python",
437 |    "nbconvert_exporter": "python",
438 |    "pygments_lexer": "ipython3",
439 |    "version": "3.10.15"
440 |   }
441 |  },
442 |  "nbformat": 4,
443 |  "nbformat_minor": 2
444 | }
445 | 


--------------------------------------------------------------------------------
/notebooks/NHS_Application_Test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 10,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "2024-10-12 16:52:18,027 - nexusync.utils.embedding_models.set_embedding_model - INFO - Using OpenAI embedding model: text-embedding-3-large\n",
 13 |       "2024-10-12 16:52:18,029 - nexusync.utils.embedding_models.set_language_model - INFO - Using OpenAI LLM model: gpt-4o-mini\n",
 14 |       "2024-10-12 16:52:18,029 - nexusync.NexuSync - INFO - Vectors and Querier initialized successfully.\n",
 15 |       "2024-10-12 16:52:18,185 - nexusync.core.indexer - INFO - Index already built. Loading from disk.\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "from nexusync import NexuSync\n",
 21 |     "\n",
 22 |     "OPENAI_MODEL_YN = True\n",
 23 |     "EMBEDDING_MODEL = \"text-embedding-3-large\"\n",
 24 |     "LANGUAGE_MODEL = \"gpt-4o-mini\"\n",
 25 |     "TEMPERATURE = 0.01\n",
 26 |     "INPUT_DIRS = [\"../sample_docs\"]\n",
 27 |     "CHROMA_DB_DIR = \"chroma_db\"\n",
 28 |     "INDEX_PERSIST_DIR = \"index_storage\"\n",
 29 |     "CHROMA_COLLECTION_NAME = \"my_collection\"\n",
 30 |     "CHUNK_SIZE = 1024\n",
 31 |     "CHUNK_OVERLAP = 20\n",
 32 |     "RECURSIVE = True\n",
 33 |     "\n",
 34 |     "\n",
 35 |     "ns = NexuSync(\n",
 36 |     "    input_dirs=INPUT_DIRS,\n",
 37 |     "    openai_model_yn=OPENAI_MODEL_YN,\n",
 38 |     "    embedding_model=EMBEDDING_MODEL,\n",
 39 |     "    language_model=LANGUAGE_MODEL,\n",
 40 |     "    temperature=TEMPERATURE,\n",
 41 |     "    chroma_db_dir=CHROMA_DB_DIR,\n",
 42 |     "    index_persist_dir=INDEX_PERSIST_DIR,\n",
 43 |     "    chroma_collection_name=CHROMA_COLLECTION_NAME,\n",
 44 |     "    chunk_overlap=CHUNK_OVERLAP,\n",
 45 |     "    chunk_size=CHUNK_SIZE,\n",
 46 |     "    recursive=RECURSIVE,\n",
 47 |     ")"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": 23,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "text_qa_template = \"\"\"\n",
 57 |     "Context Information:\n",
 58 |     "{context_str}\n",
 59 |     "Query: {query_str}\n",
 60 |     "Instructions:\n",
 61 |     "You are helping NHS doctors to review patients' medical records and give interperetations on the results.\n",
 62 |     "Carefully read the context information and the query.\n",
 63 |     "If the query is in the format [patient_id, summary_report], generate a summary report using the template below.\n",
 64 |     "Use the available information from the context to fill in each section.\n",
 65 |     "Include relevant dates and timeline information in each section.\n",
 66 |     "If information for a section is not available, state \"No information available\" for that section.\n",
 67 |     "Provide concise and accurate information based on the given context.\n",
 68 |     "Adapt the template as needed to fit the patient's specific medical history and conditions.\n",
 69 |     "\n",
 70 |     "Summary Report Template:\n",
 71 |     "\n",
 72 |     "Patient Summary Report for {patient_id}\n",
 73 |     "1. Demographics\n",
 74 |     "\n",
 75 |     "Name: [First Name] [Last Name]\n",
 76 |     "Date of Birth: [DOB]\n",
 77 |     "Gender: [Gender]\n",
 78 |     "Contact Information:\n",
 79 |     "\n",
 80 |     "Address: [Address]\n",
 81 |     "Phone: [Phone Number]\n",
 82 |     "Email: [Email Address]\n",
 83 |     "\n",
 84 |     "\n",
 85 |     "\n",
 86 |     "2. Past Medical History & Procedures\n",
 87 |     "\n",
 88 |     "Chronic Conditions: [List of chronic conditions with diagnosis dates]\n",
 89 |     "Major Illnesses: [List of major illnesses with dates]\n",
 90 |     "Surgical Procedures: [List of surgical procedures with dates]\n",
 91 |     "Other Significant Medical Events: [List with dates]\n",
 92 |     "Your interpretation: [Your interpretation of the medical records]\n",
 93 |     "\n",
 94 |     "3. Medication History\n",
 95 |     "[List each current medication with the following information]\n",
 96 |     "\n",
 97 |     "Name: [Medication Name]\n",
 98 |     "Dosage: [Dosage]\n",
 99 |     "Frequency: [Frequency]\n",
100 |     "Start Date: [Start Date]\n",
101 |     "Prescriber: [Prescriber Name]\n",
102 |     "Purpose: [Brief description of why the medication is prescribed]\n",
103 |     "\n",
104 |     "[Include a brief list of significant past medications, if available]\n",
105 |     "4. Allergies and Adverse Reactions\n",
106 |     "\n",
107 |     "Medication Allergies: [List or \"No known medication allergies\"]\n",
108 |     "Other Allergies: [List or \"No known other allergies\"]\n",
109 |     "Adverse Reactions: [List any significant adverse reactions to treatments or medications]\n",
110 |     "\n",
111 |     "5. Social History & Occupation\n",
112 |     "\n",
113 |     "Occupation: [Current or most recent occupation]\n",
114 |     "Smoking Status: [Current smoker, former smoker, never smoker]\n",
115 |     "Alcohol Use: [Description of alcohol use]\n",
116 |     "Recreational Drug Use: [If applicable]\n",
117 |     "Exercise Habits: [Brief description]\n",
118 |     "Diet: [Any significant dietary information]\n",
119 |     "Other Relevant Social Factors: [e.g., living situation, support system]\n",
120 |     "Your interpretation: [Your interpretation of the social history]\n",
121 |     "\n",
122 |     "6. Physical Examination & Vital Signs\n",
123 |     "Most Recent Vital Signs (Date: [Date of most recent vital signs])\n",
124 |     "\n",
125 |     "Blood Pressure: [BP]\n",
126 |     "Heart Rate: [HR]\n",
127 |     "Respiratory Rate: [RR]\n",
128 |     "Temperature: [Temp]\n",
129 |     "Oxygen Saturation: [O2 Sat]\n",
130 |     "Weight: [Weight]\n",
131 |     "Height: [Height]\n",
132 |     "BMI: [BMI]\n",
133 |     "Your interpretation: [Your interpretation of the vital signs]\n",
134 |     "[Include any significant physical examination findings]\n",
135 |     "\n",
136 |     "7. Laboratory Results\n",
137 |     "[List most recent significant laboratory tests with dates, results, and normal ranges]\n",
138 |     "\n",
139 |     "8. Imaging and Diagnostic Results\n",
140 |     "[List recent imaging studies and other diagnostic tests with dates and summary of results]\n",
141 |     "\n",
142 |     "9. Treatment Plan and Interventions\n",
143 |     "\n",
144 |     "Current Treatment Plans: [List current treatments or interventions]\n",
145 |     "Ongoing Therapies: [e.g., physical therapy, chemotherapy, dialysis]\n",
146 |     "Recent Changes in Management: [Any recent significant changes in treatment]\n",
147 |     "Your interpretation: [Your interpretation of the treatment plan]\n",
148 |     "\n",
149 |     "10. Immunizations\n",
150 |     "[List relevant immunizations with dates]\n",
151 |     "\n",
152 |     "11. Upcoming Appointments and Follow-ups\n",
153 |     "[List any scheduled appointments with dates, types, and locations]\n",
154 |     "\n",
155 |     "\n",
156 |     "Answer: [Generate the report based on the template above, filling in the available information from the context]\n",
157 |     "\n",
158 |     "Answer: \"\"\""
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 24,
164 |    "metadata": {},
165 |    "outputs": [
166 |     {
167 |      "name": "stderr",
168 |      "output_type": "stream",
169 |      "text": [
170 |       "2024-10-13 08:02:24,048 - nexusync.core.chat_engine - INFO - Chat engine initialized\n"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "# Initialize the Chat Engine Once\n",
176 |     "ns.initialize_stream_chat(\n",
177 |     "    text_qa_template=text_qa_template, chat_mode=\"context\", similarity_top_k=3\n",
178 |     ")\n"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 25,
184 |    "metadata": {},
185 |    "outputs": [
186 |     {
187 |      "name": "stdout",
188 |      "output_type": "stream",
189 |      "text": [
190 |       "```markdown\n",
191 |       "Here is the summary report for Patient ID P000000:\n",
192 |       "\n",
193 |       "### Patient Demographics\n",
194 |       "- **First Name:** PatientP000000\n",
195 |       "- **Last Name:** Doe\n",
196 |       "- **Date of Birth:** November 1, 1966\n",
197 |       "- **Gender:** Male\n",
198 |       "- **Contact Information:**\n",
199 |       "  - **Address:** 139 Elm Street, Springfield, IL, 62704\n",
200 |       "  - **Phone:** +44 7911 671531\n",
201 |       "  - **Email:** patientP000000@example.com\n",
202 |       "\n",
203 |       "### Emergency Contact\n",
204 |       "- **Name:** Spouse of PatientP000000\n",
205 |       "- **Relationship:** Spouse\n",
206 |       "- **Phone:** +44 7911 376933\n",
207 |       "\n",
208 |       "### Medical History\n",
209 |       "- **Condition:** Leukemia\n",
210 |       "  - **Diagnosis Date:** July 8, 2018\n",
211 |       "  - **Stage:** IV\n",
212 |       "  - **Treatment:** Chemotherapy\n",
213 |       "  - **Notes:** Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy.\n",
214 |       "  - **Location Name:** Green Valley Cancer Institute\n",
215 |       "\n",
216 |       "### Medications\n",
217 |       "1. **Name:** Imatinib\n",
218 |       "   - **Dosage:** 96 mg\n",
219 |       "   - **Frequency:** Every 3 weeks\n",
220 |       "   - **Start Date:** March 29, 2024\n",
221 |       "   - **Prescriber:** Dr. John Oncologist\n",
222 |       "   - **Location Name:** NHS Oncology Centre\n",
223 |       "\n",
224 |       "2. **Name:** Nilotinib\n",
225 |       "   - **Dosage:** 178 mg\n",
226 |       "   - **Frequency:** Every 3 weeks\n",
227 |       "   - **Start Date:** September 20, 2023\n",
228 |       "   - **Prescriber:** Dr. John Oncologist\n",
229 |       "   - **Location Name:** Green Valley Cancer Institute\n",
230 |       "\n",
231 |       "3. **Name:** Dasatinib\n",
232 |       "   - **Dosage:** 123 mg\n",
233 |       "   - **Frequency:** Every 3 weeks\n",
234 |       "   - **Start Date:** October 25, 2024\n",
235 |       "   - **Prescriber:** Dr. John Oncologist\n",
236 |       "   - **Location Name:** Riverbend Oncology Clinic\n",
237 |       "\n",
238 |       "### Immunizations\n",
239 |       "- **Vaccine:** Influenza\n",
240 |       "  - **Date Administered:** September 8, 2024\n",
241 |       "  - **Administered By:** NHS Oncology Clinic\n",
242 |       "  - **Location Name:** Downtown Cancer Facility\n",
243 |       "\n",
244 |       "### Blood Test Results\n",
245 |       "- **Test Name:** Complete Blood Count (CBC)\n",
246 |       "  - **Date:** October 22, 2024\n",
247 |       "  - **Results:**\n",
248 |       "    - Hemoglobin: 12.7 g/dL\n",
249 |       "    - White Blood Cells: 3.0 x10^3/µL\n",
250 |       "    - Platelets: 330 x10^3/µL\n",
251 |       "  - **Normal Ranges:**\n",
252 |       "    - Hemoglobin: 12-16 g/dL\n",
253 |       "    - White Blood Cells: 4-11 x10^3/µL\n",
254 |       "    - Platelets: 150-450 x10^3/µL\n",
255 |       "  - **Interpretation:** Values affected by ongoing cancer treatment.\n",
256 |       "\n",
257 |       "### Appointments\n",
258 |       "- **Appointment ID:** AP9886\n",
259 |       "  - **Date & Time:** December 23, 2024, 18:14:39\n",
260 |       "  - **Type:** Oncology Follow-up\n",
261 |       "  - **With:** Dr. John Oncologist\n",
262 |       "  - **Location Name:** Riverside Oncology Clinic\n",
263 |       "  - **Status:** Scheduled\n",
264 |       "  - **Notes:** Review treatment progress and discuss next steps.\n",
265 |       "\n",
266 |       "This summary provides an overview of the patient's demographics, medical history, medications, immunizations, blood test results, and upcoming appointments. If you need more specific information or details, feel free to ask!```\n",
267 |       "\n",
268 |       "Full response:\n",
269 |       "```markdown\n",
270 |       "Here is the summary report for Patient ID P000000:\n",
271 |       "\n",
272 |       "### Patient Demographics\n",
273 |       "- **First Name:** PatientP000000\n",
274 |       "- **Last Name:** Doe\n",
275 |       "- **Date of Birth:** November 1, 1966\n",
276 |       "- **Gender:** Male\n",
277 |       "- **Contact Information:**\n",
278 |       "  - **Address:** 139 Elm Street, Springfield, IL, 62704\n",
279 |       "  - **Phone:** +44 7911 671531\n",
280 |       "  - **Email:** patientP000000@example.com\n",
281 |       "\n",
282 |       "### Emergency Contact\n",
283 |       "- **Name:** Spouse of PatientP000000\n",
284 |       "- **Relationship:** Spouse\n",
285 |       "- **Phone:** +44 7911 376933\n",
286 |       "\n",
287 |       "### Medical History\n",
288 |       "- **Condition:** Leukemia\n",
289 |       "  - **Diagnosis Date:** July 8, 2018\n",
290 |       "  - **Stage:** IV\n",
291 |       "  - **Treatment:** Chemotherapy\n",
292 |       "  - **Notes:** Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy.\n",
293 |       "  - **Location Name:** Green Valley Cancer Institute\n",
294 |       "\n",
295 |       "### Medications\n",
296 |       "1. **Name:** Imatinib\n",
297 |       "   - **Dosage:** 96 mg\n",
298 |       "   - **Frequency:** Every 3 weeks\n",
299 |       "   - **Start Date:** March 29, 2024\n",
300 |       "   - **Prescriber:** Dr. John Oncologist\n",
301 |       "   - **Location Name:** NHS Oncology Centre\n",
302 |       "\n",
303 |       "2. **Name:** Nilotinib\n",
304 |       "   - **Dosage:** 178 mg\n",
305 |       "   - **Frequency:** Every 3 weeks\n",
306 |       "   - **Start Date:** September 20, 2023\n",
307 |       "   - **Prescriber:** Dr. John Oncologist\n",
308 |       "   - **Location Name:** Green Valley Cancer Institute\n",
309 |       "\n",
310 |       "3. **Name:** Dasatinib\n",
311 |       "   - **Dosage:** 123 mg\n",
312 |       "   - **Frequency:** Every 3 weeks\n",
313 |       "   - **Start Date:** October 25, 2024\n",
314 |       "   - **Prescriber:** Dr. John Oncologist\n",
315 |       "   - **Location Name:** Riverbend Oncology Clinic\n",
316 |       "\n",
317 |       "### Immunizations\n",
318 |       "- **Vaccine:** Influenza\n",
319 |       "  - **Date Administered:** September 8, 2024\n",
320 |       "  - **Administered By:** NHS Oncology Clinic\n",
321 |       "  - **Location Name:** Downtown Cancer Facility\n",
322 |       "\n",
323 |       "### Blood Test Results\n",
324 |       "- **Test Name:** Complete Blood Count (CBC)\n",
325 |       "  - **Date:** October 22, 2024\n",
326 |       "  - **Results:**\n",
327 |       "    - Hemoglobin: 12.7 g/dL\n",
328 |       "    - White Blood Cells: 3.0 x10^3/µL\n",
329 |       "    - Platelets: 330 x10^3/µL\n",
330 |       "  - **Normal Ranges:**\n",
331 |       "    - Hemoglobin: 12-16 g/dL\n",
332 |       "    - White Blood Cells: 4-11 x10^3/µL\n",
333 |       "    - Platelets: 150-450 x10^3/µL\n",
334 |       "  - **Interpretation:** Values affected by ongoing cancer treatment.\n",
335 |       "\n",
336 |       "### Appointments\n",
337 |       "- **Appointment ID:** AP9886\n",
338 |       "  - **Date & Time:** December 23, 2024, 18:14:39\n",
339 |       "  - **Type:** Oncology Follow-up\n",
340 |       "  - **With:** Dr. John Oncologist\n",
341 |       "  - **Location Name:** Riverside Oncology Clinic\n",
342 |       "  - **Status:** Scheduled\n",
343 |       "  - **Notes:** Review treatment progress and discuss next steps.\n",
344 |       "\n",
345 |       "This summary provides an overview of the patient's demographics, medical history, medications, immunizations, blood test results, and upcoming appointments. If you need more specific information or details, feel free to ask!\n",
346 |       "```\n",
347 |       "\n",
348 |       "Metadata: {'sources': [{'source_text': 'file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n[\\n    {\\n        \"demographics\": {\\n            \"patient_id\": \"P000000\",\\n            \"first_name\": \"PatientP000000\",\\n            \"last_name\": \"Doe\",\\n            \"date_of_birth\": \"1966-11-01 01:16:41\",\\n            \"gender\": \"Male\",\\n            \"contact_information\": {\\n                \"address\": \"139 Elm Street, Springfield, IL, 62704\",\\n                \"phone\": \"+44 7911 671531\",\\n                \"email\": \"patientP000000@example.com\"\\n            }\\n        },\\n        \"emergency_contact\": {\\n            \"name\": \"Spouse of PatientP000000\",\\n            \"relationship\": \"Spouse\",\\n            \"phone\": \"+44 7911 376933\"\\n        },\\n        \"medical_history\": [\\n            {\\n                \"condition\": \"Leukemia\",\\n                \"diagnosis_date\": \"2018-07-08 06:55:27\",\\n                \"stage\": \"IV\",\\n                \"treatment\": [\\n                    \"Chemotherapy\"\\n                ],\\n                \"notes\": \"Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy.\" John Oncologist\",\\n                \"location_name\": \"Green Valley Cancer Institute\"\\n            },\\n            {\\n                \"medication_id\": \"M6362\",\\n                \"name\": \"Dasatinib\",\\n                \"dosage\": \"123 mg\",\\n                \"frequency\": \"Every 3 weeks\",\\n                \"start_date\": \"2024-10-25 05:15:15\",\\n                \"end_date\": null,\\n                \"prescriber\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Riverbend Oncology Clinic\"\\n            }\\n        ],\\n        \"immunizations\": [\\n            {\\n                \"immunization_id\": \"I4299\",\\n                \"vaccine\": \"Influenza\",\\n                \"date_administered\": \"2024-09-08 11:00:53\",\\n                \"administered_by\": \"NHS Oncology Clinic\",\\n                \"location_name\": \"Downtown Cancer Facility\"\\n            }\\n        ],\\n        \"blood_test_results\": [\\n            {\\n                \"lab_id\": \"L1164\",\\n                \"test_name\": \"Complete Blood Count (CBC)\",\\n                \"date\": \"2024-10-22 08:11:43\",\\n                \"results\": {\\n                    \"Hemoglobin\": \"12.7 g/dL\",\\n                    \"White Blood Cells\": \"3.0 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"330 x10^3/\\\\u00b5L\"\\n                },\\n                \"normal_ranges\": {\\n                    \"Hemoglobin\": \"12-16 g/dL\",\\n                    \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n                },\\n                \"interpretation\": \"Values affected by ongoing cancer treatment.\" }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP9886\",\\n                \"date_time\": \"2024-12-23 18:14:39\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP9886\",\\n                \"date_time\": \"2024-12-23 18:14:39\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Riverside Oncology Clinic\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Review treatment progress and discuss next steps.\" }\\n        ],\\n        \"medications\": [\\n            {\\n                \"medication_id\": \"M6464\",\\n                \"name\": \"Imatinib\",\\n                \"dosage\": \"96 mg\",\\n                \"frequency\": \"Every 3 weeks\",\\n                \"start_date\": \"2024-03-29 12:36:55\",\\n                \"end_date\": null,\\n                \"prescriber\": \"Dr. John Oncologist\",\\n                \"location_name\": \"NHS Oncology Centre\"\\n            },\\n            {\\n                \"medication_id\": \"M8879\",\\n                \"name\": \"Nilotinib\",\\n                \"dosage\": \"178 mg\",\\n                \"frequency\": \"Every 3 weeks\",\\n                \"start_date\": \"2023-09-20 17:48:39\",\\n                \"end_date\": null,\\n                \"prescriber\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Green Valley Cancer Institute\"\\n            },\\n            {\\n                \"medication_id\": \"M6362\",\\n                \"name\": \"Dasatinib\",\\n                \"dosage\": \"123 mg\",\\n                \"frequency\": \"Every 3 weeks\",\\n                \"start_date\": \"2024-10-25 05:15:15\",\\n                \"end_date\": null,\\n                \"prescriber\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Riverbend Oncology Clinic\"\\n            }\\n        ],\\n        \"immunizations\": [\\n            {\\n                \"immunization_id\": \"I4299\",\\n                \"vaccine\": \"Influenza\",\\n                \"date_administered\": \"2024-09-08 11:00:53\",\\n                \"administered_by\": \"NHS Oncology Clinic\",\\n                \"location_name\": \"Downtown Cancer Facility\"\\n            }\\n        ],\\n        \"blood_test_results\": [\\n            {\\n                \"lab_id\": \"L1164\",\\n                \"test_name\": \"Complete Blood Count (CBC)\",\\n                \"date\": \"2024-10-22 08:11:43\",\\n                \"results\": {\\n                    \"Hemoglobin\": \"12.7 g/dL\",\\n                    \"White Blood Cells\": \"3.0 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"330 x10^3/\\\\u00b5L\"\\n                },\\n                \"normal_ranges\": {\\n                    \"Hemoglobin\": \"12-16 g/dL\",\\n                    \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n                },\\n                \"interpretation\": \"Values affected by ongoing cancer treatment.\" }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP9886\",\\n                \"date_time\": \"2024-12-23 18:14:39\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Riverside Oncology Clinic\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Review treatment progress and discuss next steps.\"', 'metadata': {'file_path': '/Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json', 'file_name': 'healthcare_records.json', 'file_type': 'application/json', 'file_size': 42774, 'creation_date': '2024-10-12', 'last_modified_date': '2024-10-12'}}, {'source_text': 'file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n\",\\n                \"radiologist\": \"Dr. Emily Imaging\"\\n            }\\n        ]\\n    },\\n    {\\n        \"demographics\": {\\n            \"patient_id\": \"P000007\",\\n            \"first_name\": \"PatientP000007\",\\n            \"last_name\": \"Doe\",\\n            \"date_of_birth\": \"1979-08-04 23:12:34\",\\n            \"gender\": \"Male\",\\n            \"contact_information\": {\\n                \"address\": \"169 Elm Street, Springfield, IL, 62704\",\\n                \"phone\": \"+44 7911 795798\",\\n                \"email\": \"patientP000007@example.com\"\\n            }\\n        },\\n        \"emergency_contact\": {\\n            \"name\": \"Spouse of PatientP000007\",\\n            \"relationship\": \"Spouse\",\\n            \"phone\": \"+44 7911 236769\"\\n        },\\n        \"medical_history\": [\\n            {\\n                \"condition\": \"Prostate Cancer\",\\n                \"diagnosis_date\": \"2023-12-19 07:43:10\",\\n                \"stage\": \"IV\",\\n                \"treatment\": [\\n                    \"Hormone therapy\"\\n                ],\\n                \"notes\": \"Stage IV Prostate Cancer diagnosed. Treatment plan includes Hormone therapy.\" Jane Endocrinologist\",\\n                \"location_name\": \"Springfield Cancer Center\"\\n            }\\n        ],\\n        \"immunizations\": [\\n            {\\n                \"immunization_id\": \"I8026\",\\n                \"vaccine\": \"Influenza\",\\n                \"date_administered\": \"2023-04-08 20:32:55\",\\n                \"administered_by\": \"NHS Oncology Clinic\",\\n                \"location_name\": \"Green Valley Cancer Institute\"\\n            }\\n        ],\\n        \"blood_test_results\": [\\n            {\\n                \"lab_id\": \"L1407\",\\n                \"test_name\": \"Complete Blood Count (CBC)\",\\n                \"date\": \"2023-06-16 22:50:38\",\\n                \"results\": {\\n                    \"Hemoglobin\": \"12.3 g/dL\",\\n                    \"White Blood Cells\": \"8.4 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"108 x10^3/\\\\u00b5L\"\\n                },\\n                \"normal_ranges\": {\\n                    \"Hemoglobin\": \"12-16 g/dL\",\\n                    \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n                },\\n                \"interpretation\": \"Values affected by ongoing cancer treatment.\" },\\n            {\\n                \"lab_id\": \"L6043\",\\n                \"test_name\": \"Tumor Marker Test\",\\n                \"date\": \"2023-02-27 17:42:48\",\\n                \"results\": {\\n                    \"CA-125\": null,\\n                    \"PSA\": \"5.3 ng/mL\",\\n                    \"CA 15-3\": null\\n                },\\n                \"normal_ranges\": {\\n                    \"CA-125\": \"<35 U/mL\",\\n                    \"PSA\": \"<4 ng/mL\",\\n                    \"CA 15-3\": \"<30 U/mL\"\\n                },\\n                \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP4016\",\\n                \"date_time\": \"2024-07-22 20:43:56\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. }\\n        ],\\n        \"medications\": [\\n            {\\n                \"medication_id\": \"M6281\",\\n                \"name\": \"Tamoxifen\",\\n                \"dosage\": \"20 mg\",\\n                \"frequency\": \"Once daily\",\\n                \"start_date\": \"2023-06-30 05:10:26\",\\n                \"end_date\": null,\\n                \"prescriber\": \"Dr. Jane Endocrinologist\",\\n                \"location_name\": \"Springfield Cancer Center\"\\n            }\\n        ],\\n        \"immunizations\": [\\n            {\\n                \"immunization_id\": \"I8026\",\\n                \"vaccine\": \"Influenza\",\\n                \"date_administered\": \"2023-04-08 20:32:55\",\\n                \"administered_by\": \"NHS Oncology Clinic\",\\n                \"location_name\": \"Green Valley Cancer Institute\"\\n            }\\n        ],\\n        \"blood_test_results\": [\\n            {\\n                \"lab_id\": \"L1407\",\\n                \"test_name\": \"Complete Blood Count (CBC)\",\\n                \"date\": \"2023-06-16 22:50:38\",\\n                \"results\": {\\n                    \"Hemoglobin\": \"12.3 g/dL\",\\n                    \"White Blood Cells\": \"8.4 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"108 x10^3/\\\\u00b5L\"\\n                },\\n                \"normal_ranges\": {\\n                    \"Hemoglobin\": \"12-16 g/dL\",\\n                    \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n                    \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n                },\\n                \"interpretation\": \"Values affected by ongoing cancer treatment.\" },\\n            {\\n                \"lab_id\": \"L6043\",\\n                \"test_name\": \"Tumor Marker Test\",\\n                \"date\": \"2023-02-27 17:42:48\",\\n                \"results\": {\\n                    \"CA-125\": null,\\n                    \"PSA\": \"5.3 ng/mL\",\\n                    \"CA 15-3\": null\\n                },\\n                \"normal_ranges\": {\\n                    \"CA-125\": \"<35 U/mL\",\\n                    \"PSA\": \"<4 ng/mL\",\\n                    \"CA 15-3\": \"<30 U/mL\"\\n                },\\n                \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n\",\\n                \"radiologist\": \"Dr. Emily Imaging\"\\n            }\\n        ]\\n    },\\n    {\\n        \"demographics\": {\\n            \"patient_id\": \"P000007\",\\n            \"first_name\": \"PatientP000007\",\\n            \"last_name\": \"Doe\",\\n            \"date_of_birth\": \"1979-08-04 23:12:34\",\\n            \"gender\": \"Male\",\\n            \"contact_information\": {\\n                \"address\": \"169 Elm Street, Springfield, IL, 62704\",\\n                \"phone\": \"+44 7911 795798\",\\n                \"email\": \"patientP000007@example.com\"\\n            }\\n        },\\n        \"emergency_contact\": {\\n            \"name\": \"Spouse of PatientP000007\",\\n            \"relationship\": \"Spouse\",\\n            \"phone\": \"+44 7911 236769\"\\n        },\\n        \"medical_history\": [\\n            {\\n                \"condition\": \"Prostate Cancer\",\\n                \"diagnosis_date\": \"2023-12-19 07:43:10\",\\n                \"stage\": \"IV\",\\n                \"treatment\": [\\n                    \"Hormone therapy\"\\n                ],\\n                \"notes\": \"Stage IV Prostate Cancer diagnosed. }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP4016\",\\n                \"date_time\": \"2024-07-22 20:43:56\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. John Oncologist\",\\n                \"location_name\": \"NHS Oncology Centre\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Review treatment progress and discuss next steps.\"', 'metadata': {'file_path': '/Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json', 'file_name': 'healthcare_records.json', 'file_type': 'application/json', 'file_size': 42774, 'creation_date': '2024-10-12', 'last_modified_date': '2024-10-12'}}, {'source_text': 'Sarah Radiologist\",\\n                \"location_name\": \"Downtown Cancer Facility\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Continued radiation treatment as per plan.\" }\\n        ],\\n        \"vital_signs\": [\\n            {\\n                \"vital_id\": \"V4595\",\\n                \"date\": \"2024-09-26 11:33:59\",\\n                \"blood_pressure\": \"128/62 mmHg\",\\n                \"heart_rate\": \"69 bpm\",\\n                \"respiratory_rate\": \"19 breaths/min\",\\n                \"temperature\": \"98.8\\\\u00b0F\",\\n                \"oxygen_saturation\": \"98%\",\\n                \"weight\": \"74 kg\",\\n                \"height\": \"183 cm\",\\n                \"bmi\": \"23.7\"\\n            }\\n        ],\\n        \"imaging_results\": [\\n            {\\n                \"imaging_id\": \"IM9534\",\\n                \"date\": \"2024-05-09 08:42:59\",\\n                \"type\": \"Prostate MRI\",\\n                \"location_name\": \"Riverbend Oncology Clinic\",\\n                \"results\": \"Follow-up Prostate MRI shows complete response. \",\\n                \"radiologist\": \"Dr. file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n},\\n            {\\n                \"lab_id\": \"L9689\",\\n                \"test_name\": \"Tumor Marker Test\",\\n                \"date\": \"2023-11-10 04:03:05\",\\n                \"results\": {\\n                    \"CA-125\": null,\\n                    \"PSA\": \"3.4 ng/mL\",\\n                    \"CA 15-3\": null\\n                },\\n                \"normal_ranges\": {\\n                    \"CA-125\": \"<35 U/mL\",\\n                    \"PSA\": \"<4 ng/mL\",\\n                    \"CA 15-3\": \"<30 U/mL\"\\n                },\\n                \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP6208\",\\n                \"date_time\": \"2024-08-08 21:34:49\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. \",\\n                \"radiologist\": \"Dr. Emily Imaging\"\\n            }\\n        ]\\n    },\\n    {\\n        \"demographics\": {\\n            \"patient_id\": \"P000003\",\\n            \"first_name\": \"PatientP000003\",\\n            \"last_name\": \"Doe\",\\n            \"date_of_birth\": \"1955-09-21 10:55:36\",\\n            \"gender\": \"Male\",\\n            \"contact_information\": {\\n                \"address\": \"264 Elm Street, Springfield, IL, 62704\",\\n                \"phone\": \"+44 7911 216070\",\\n                \"email\": \"patientP000003@example.com\"\\n            }\\n        },\\n        \"emergency_contact\": {\\n            \"name\": \"Spouse of PatientP000003\",\\n            \"relationship\": \"Spouse\",\\n            \"phone\": \"+44 7911 729480\"\\n        },\\n        \"medical_history\": [\\n            {\\n                \"condition\": \"Melanoma\",\\n                \"diagnosis_date\": \"2012-09-22 07:19:34\",\\n                \"stage\": \"III\",\\n                \"treatment\": [\\n                    \"Wide excision\",\\n                    \"Immunotherapy\",\\n                    \"Targeted therapy\"\\n                ],\\n                \"notes\": \"Stage III Melanoma diagnosed. Treatment plan includes Wide excision, Immunotherapy, Targeted therapy.\" }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP6208\",\\n                \"date_time\": \"2024-08-08 21:34:49\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Hillside Cancer Care\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Review treatment progress and discuss next steps.\" },\\n            {\\n                \"appointment_id\": \"AP8361\",\\n                \"date_time\": \"2024-12-15 21:21:21\",\\n                \"type\": \"Radiation Therapy Session\",\\n                \"with\": \"Dr. },\\n            {\\n                \"appointment_id\": \"AP8361\",\\n                \"date_time\": \"2024-12-15 21:21:21\",\\n                \"type\": \"Radiation Therapy Session\",\\n                \"with\": \"Dr. Sarah Radiologist\",\\n                \"location_name\": \"Downtown Cancer Facility\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Continued radiation treatment as per plan.\" }\\n        ],\\n        \"vital_signs\": [\\n            {\\n                \"vital_id\": \"V4595\",\\n                \"date\": \"2024-09-26 11:33:59\",\\n                \"blood_pressure\": \"128/62 mmHg\",\\n                \"heart_rate\": \"69 bpm\",\\n                \"respiratory_rate\": \"19 breaths/min\",\\n                \"temperature\": \"98.8\\\\u00b0F\",\\n                \"oxygen_saturation\": \"98%\",\\n                \"weight\": \"74 kg\",\\n                \"height\": \"183 cm\",\\n                \"bmi\": \"23.7\"\\n            }\\n        ],\\n        \"imaging_results\": [\\n            {\\n                \"imaging_id\": \"IM9534\",\\n                \"date\": \"2024-05-09 08:42:59\",\\n                \"type\": \"Prostate MRI\",\\n                \"location_name\": \"Riverbend Oncology Clinic\",\\n                \"results\": \"Follow-up Prostate MRI shows complete response. file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n},\\n            {\\n                \"lab_id\": \"L9689\",\\n                \"test_name\": \"Tumor Marker Test\",\\n                \"date\": \"2023-11-10 04:03:05\",\\n                \"results\": {\\n                    \"CA-125\": null,\\n                    \"PSA\": \"3.4 ng/mL\",\\n                    \"CA 15-3\": null\\n                },\\n                \"normal_ranges\": {\\n                    \"CA-125\": \"<35 U/mL\",\\n                    \"PSA\": \"<4 ng/mL\",\\n                    \"CA 15-3\": \"<30 U/mL\"\\n                },\\n                \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" }\\n        ],\\n        \"appointments\": [\\n            {\\n                \"appointment_id\": \"AP6208\",\\n                \"date_time\": \"2024-08-08 21:34:49\",\\n                \"type\": \"Oncology Follow-up\",\\n                \"with\": \"Dr. John Oncologist\",\\n                \"location_name\": \"Hillside Cancer Care\",\\n                \"status\": \"Scheduled\",\\n                \"notes\": \"Review treatment progress and discuss next steps.\"', 'metadata': {'file_path': '/Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json', 'file_name': 'healthcare_records.json', 'file_type': 'application/json', 'file_size': 42774, 'creation_date': '2024-10-12', 'last_modified_date': '2024-10-12'}}]}\n"
349 |      ]
350 |     }
351 |    ],
352 |    "source": [
353 |     "query = \"[P000000, summary_report]\"\n",
354 |     "\n",
355 |     "print(\"```markdown\")\n",
356 |     "for item in ns.start_chat_stream(query):\n",
357 |     "    if isinstance(item, str):\n",
358 |     "        # This is a token, print or process as needed\n",
359 |     "        print(item, end='', flush=True)\n",
360 |     "    else:\n",
361 |     "        # This is the final response with metadata\n",
362 |     "        print(\"```\\n\")\n",
363 |     "        print(\"Full response:\")\n",
364 |     "        print(\"```markdown\")\n",
365 |     "        print(item['response'])\n",
366 |     "        print(\"```\\n\")\n",
367 |     "        print(\"Metadata:\", item['metadata'])\n",
368 |     "        break"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "code",
373 |    "execution_count": null,
374 |    "metadata": {},
375 |    "outputs": [],
376 |    "source": []
377 |   }
378 |  ],
379 |  "metadata": {
380 |   "kernelspec": {
381 |    "display_name": "nhs_hackthon",
382 |    "language": "python",
383 |    "name": "python3"
384 |   },
385 |   "language_info": {
386 |    "codemirror_mode": {
387 |     "name": "ipython",
388 |     "version": 3
389 |    },
390 |    "file_extension": ".py",
391 |    "mimetype": "text/x-python",
392 |    "name": "python",
393 |    "nbconvert_exporter": "python",
394 |    "pygments_lexer": "ipython3",
395 |    "version": "3.10.15"
396 |   }
397 |  },
398 |  "nbformat": 4,
399 |  "nbformat_minor": 2
400 | }
401 | 


--------------------------------------------------------------------------------
/sample_docs/healthcare_records.json:
--------------------------------------------------------------------------------
   1 | [
   2 |     {
   3 |         "demographics": {
   4 |             "patient_id": "P000000",
   5 |             "first_name": "PatientP000000",
   6 |             "last_name": "Doe",
   7 |             "date_of_birth": "1966-11-01 01:16:41",
   8 |             "gender": "Male",
   9 |             "contact_information": {
  10 |                 "address": "139 Elm Street, Springfield, IL, 62704",
  11 |                 "phone": "+44 7911 671531",
  12 |                 "email": "patientP000000@example.com"
  13 |             }
  14 |         },
  15 |         "emergency_contact": {
  16 |             "name": "Spouse of PatientP000000",
  17 |             "relationship": "Spouse",
  18 |             "phone": "+44 7911 376933"
  19 |         },
  20 |         "medical_history": [
  21 |             {
  22 |                 "condition": "Leukemia",
  23 |                 "diagnosis_date": "2018-07-08 06:55:27",
  24 |                 "stage": "IV",
  25 |                 "treatment": [
  26 |                     "Chemotherapy"
  27 |                 ],
  28 |                 "notes": "Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy."
  29 |             }
  30 |         ],
  31 |         "medications": [
  32 |             {
  33 |                 "medication_id": "M6464",
  34 |                 "name": "Imatinib",
  35 |                 "dosage": "96 mg",
  36 |                 "frequency": "Every 3 weeks",
  37 |                 "start_date": "2024-03-29 12:36:55",
  38 |                 "end_date": null,
  39 |                 "prescriber": "Dr. John Oncologist",
  40 |                 "location_name": "NHS Oncology Centre"
  41 |             },
  42 |             {
  43 |                 "medication_id": "M8879",
  44 |                 "name": "Nilotinib",
  45 |                 "dosage": "178 mg",
  46 |                 "frequency": "Every 3 weeks",
  47 |                 "start_date": "2023-09-20 17:48:39",
  48 |                 "end_date": null,
  49 |                 "prescriber": "Dr. John Oncologist",
  50 |                 "location_name": "Green Valley Cancer Institute"
  51 |             },
  52 |             {
  53 |                 "medication_id": "M6362",
  54 |                 "name": "Dasatinib",
  55 |                 "dosage": "123 mg",
  56 |                 "frequency": "Every 3 weeks",
  57 |                 "start_date": "2024-10-25 05:15:15",
  58 |                 "end_date": null,
  59 |                 "prescriber": "Dr. John Oncologist",
  60 |                 "location_name": "Riverbend Oncology Clinic"
  61 |             }
  62 |         ],
  63 |         "immunizations": [
  64 |             {
  65 |                 "immunization_id": "I4299",
  66 |                 "vaccine": "Influenza",
  67 |                 "date_administered": "2024-09-08 11:00:53",
  68 |                 "administered_by": "NHS Oncology Clinic",
  69 |                 "location_name": "Downtown Cancer Facility"
  70 |             }
  71 |         ],
  72 |         "blood_test_results": [
  73 |             {
  74 |                 "lab_id": "L1164",
  75 |                 "test_name": "Complete Blood Count (CBC)",
  76 |                 "date": "2024-10-22 08:11:43",
  77 |                 "results": {
  78 |                     "Hemoglobin": "12.7 g/dL",
  79 |                     "White Blood Cells": "3.0 x10^3/\u00b5L",
  80 |                     "Platelets": "330 x10^3/\u00b5L"
  81 |                 },
  82 |                 "normal_ranges": {
  83 |                     "Hemoglobin": "12-16 g/dL",
  84 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
  85 |                     "Platelets": "150-450 x10^3/\u00b5L"
  86 |                 },
  87 |                 "interpretation": "Values affected by ongoing cancer treatment."
  88 |             }
  89 |         ],
  90 |         "appointments": [
  91 |             {
  92 |                 "appointment_id": "AP9886",
  93 |                 "date_time": "2024-12-23 18:14:39",
  94 |                 "type": "Oncology Follow-up",
  95 |                 "with": "Dr. John Oncologist",
  96 |                 "location_name": "Riverside Oncology Clinic",
  97 |                 "status": "Scheduled",
  98 |                 "notes": "Review treatment progress and discuss next steps."
  99 |             }
 100 |         ],
 101 |         "vital_signs": [
 102 |             {
 103 |                 "vital_id": "V4963",
 104 |                 "date": "2024-12-12 14:12:13",
 105 |                 "blood_pressure": "124/77 mmHg",
 106 |                 "heart_rate": "87 bpm",
 107 |                 "respiratory_rate": "16 breaths/min",
 108 |                 "temperature": "97.9\u00b0F",
 109 |                 "oxygen_saturation": "98%",
 110 |                 "weight": "70 kg",
 111 |                 "height": "177 cm",
 112 |                 "bmi": "27.8"
 113 |             }
 114 |         ],
 115 |         "imaging_results": [
 116 |             {
 117 |                 "imaging_id": "IM3488",
 118 |                 "date": "2024-09-20 17:12:39",
 119 |                 "type": "PET-CT",
 120 |                 "location_name": "Riverside Oncology Clinic",
 121 |                 "results": "Follow-up PET-CT shows complete response.",
 122 |                 "radiologist": "Dr. Emily Imaging"
 123 |             }
 124 |         ]
 125 |     },
 126 |     {
 127 |         "demographics": {
 128 |             "patient_id": "P000001",
 129 |             "first_name": "PatientP000001",
 130 |             "last_name": "Doe",
 131 |             "date_of_birth": "1977-06-05 18:13:55",
 132 |             "gender": "Male",
 133 |             "contact_information": {
 134 |                 "address": "229 Elm Street, Springfield, IL, 62704",
 135 |                 "phone": "+44 7911 468034",
 136 |                 "email": "patientP000001@example.com"
 137 |             }
 138 |         },
 139 |         "emergency_contact": {
 140 |             "name": "Spouse of PatientP000001",
 141 |             "relationship": "Spouse",
 142 |             "phone": "+44 7911 552032"
 143 |         },
 144 |         "medical_history": [
 145 |             {
 146 |                 "condition": "Prostate Cancer",
 147 |                 "diagnosis_date": "2023-11-11 06:27:54",
 148 |                 "stage": "II",
 149 |                 "treatment": [
 150 |                     "Radiation therapy",
 151 |                     "Prostatectomy"
 152 |                 ],
 153 |                 "notes": "Stage II Prostate Cancer diagnosed. Treatment plan includes Radiation therapy, Prostatectomy."
 154 |             }
 155 |         ],
 156 |         "medications": [],
 157 |         "immunizations": [
 158 |             {
 159 |                 "immunization_id": "I4609",
 160 |                 "vaccine": "Influenza",
 161 |                 "date_administered": "2023-10-03 01:24:17",
 162 |                 "administered_by": "NHS Oncology Clinic",
 163 |                 "location_name": "Green Valley Cancer Institute"
 164 |             }
 165 |         ],
 166 |         "blood_test_results": [
 167 |             {
 168 |                 "lab_id": "L8146",
 169 |                 "test_name": "Complete Blood Count (CBC)",
 170 |                 "date": "2024-12-11 21:37:09",
 171 |                 "results": {
 172 |                     "Hemoglobin": "13.8 g/dL",
 173 |                     "White Blood Cells": "8.9 x10^3/\u00b5L",
 174 |                     "Platelets": "304 x10^3/\u00b5L"
 175 |                 },
 176 |                 "normal_ranges": {
 177 |                     "Hemoglobin": "12-16 g/dL",
 178 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 179 |                     "Platelets": "150-450 x10^3/\u00b5L"
 180 |                 },
 181 |                 "interpretation": "Values affected by ongoing cancer treatment."
 182 |             },
 183 |             {
 184 |                 "lab_id": "L2406",
 185 |                 "test_name": "Tumor Marker Test",
 186 |                 "date": "2023-03-02 17:46:34",
 187 |                 "results": {
 188 |                     "CA-125": null,
 189 |                     "PSA": "7.8 ng/mL",
 190 |                     "CA 15-3": null
 191 |                 },
 192 |                 "normal_ranges": {
 193 |                     "CA-125": "<35 U/mL",
 194 |                     "PSA": "<4 ng/mL",
 195 |                     "CA 15-3": "<30 U/mL"
 196 |                 },
 197 |                 "interpretation": "Elevated levels may indicate disease activity or treatment response."
 198 |             }
 199 |         ],
 200 |         "appointments": [
 201 |             {
 202 |                 "appointment_id": "AP7530",
 203 |                 "date_time": "2024-12-09 02:42:18",
 204 |                 "type": "Oncology Follow-up",
 205 |                 "with": "Dr. John Oncologist",
 206 |                 "location_name": "Pinewood Oncology",
 207 |                 "status": "Scheduled",
 208 |                 "notes": "Review treatment progress and discuss next steps."
 209 |             },
 210 |             {
 211 |                 "appointment_id": "AP8976",
 212 |                 "date_time": "2024-06-02 17:54:13",
 213 |                 "type": "Radiation Therapy Session",
 214 |                 "with": "Dr. Sarah Radiologist",
 215 |                 "location_name": "Green Valley Cancer Institute",
 216 |                 "status": "Scheduled",
 217 |                 "notes": "Continued radiation treatment as per plan."
 218 |             }
 219 |         ],
 220 |         "vital_signs": [
 221 |             {
 222 |                 "vital_id": "V4781",
 223 |                 "date": "2024-07-18 07:38:18",
 224 |                 "blood_pressure": "120/64 mmHg",
 225 |                 "heart_rate": "99 bpm",
 226 |                 "respiratory_rate": "16 breaths/min",
 227 |                 "temperature": "98.1\u00b0F",
 228 |                 "oxygen_saturation": "96%",
 229 |                 "weight": "72 kg",
 230 |                 "height": "176 cm",
 231 |                 "bmi": "19.4"
 232 |             }
 233 |         ],
 234 |         "imaging_results": [
 235 |             {
 236 |                 "imaging_id": "IM1047",
 237 |                 "date": "2024-03-15 01:43:13",
 238 |                 "type": "Prostate MRI",
 239 |                 "location_name": "City Cancer Institute",
 240 |                 "results": "Follow-up Prostate MRI shows stable disease.",
 241 |                 "radiologist": "Dr. Emily Imaging"
 242 |             }
 243 |         ]
 244 |     },
 245 |     {
 246 |         "demographics": {
 247 |             "patient_id": "P000002",
 248 |             "first_name": "PatientP000002",
 249 |             "last_name": "Doe",
 250 |             "date_of_birth": "1977-05-17 17:56:51",
 251 |             "gender": "Male",
 252 |             "contact_information": {
 253 |                 "address": "485 Elm Street, Springfield, IL, 62704",
 254 |                 "phone": "+44 7911 975759",
 255 |                 "email": "patientP000002@example.com"
 256 |             }
 257 |         },
 258 |         "emergency_contact": {
 259 |             "name": "Spouse of PatientP000002",
 260 |             "relationship": "Spouse",
 261 |             "phone": "+44 7911 645934"
 262 |         },
 263 |         "medical_history": [
 264 |             {
 265 |                 "condition": "Prostate Cancer",
 266 |                 "diagnosis_date": "2019-04-16 04:35:16",
 267 |                 "stage": "I",
 268 |                 "treatment": [
 269 |                     "Radiation therapy",
 270 |                     "Hormone therapy"
 271 |                 ],
 272 |                 "notes": "Stage I Prostate Cancer diagnosed. Treatment plan includes Radiation therapy, Hormone therapy."
 273 |             }
 274 |         ],
 275 |         "medications": [
 276 |             {
 277 |                 "medication_id": "M6283",
 278 |                 "name": "Letrozole",
 279 |                 "dosage": "20 mg",
 280 |                 "frequency": "Once daily",
 281 |                 "start_date": "2024-07-06 16:22:22",
 282 |                 "end_date": null,
 283 |                 "prescriber": "Dr. Jane Endocrinologist",
 284 |                 "location_name": "Riverside Oncology Clinic"
 285 |             }
 286 |         ],
 287 |         "immunizations": [
 288 |             {
 289 |                 "immunization_id": "I3475",
 290 |                 "vaccine": "Influenza",
 291 |                 "date_administered": "2024-01-27 11:18:27",
 292 |                 "administered_by": "NHS Oncology Clinic",
 293 |                 "location_name": "Riverbend Oncology Clinic"
 294 |             }
 295 |         ],
 296 |         "blood_test_results": [
 297 |             {
 298 |                 "lab_id": "L5832",
 299 |                 "test_name": "Complete Blood Count (CBC)",
 300 |                 "date": "2023-10-13 18:29:53",
 301 |                 "results": {
 302 |                     "Hemoglobin": "10.6 g/dL",
 303 |                     "White Blood Cells": "5.1 x10^3/\u00b5L",
 304 |                     "Platelets": "172 x10^3/\u00b5L"
 305 |                 },
 306 |                 "normal_ranges": {
 307 |                     "Hemoglobin": "12-16 g/dL",
 308 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 309 |                     "Platelets": "150-450 x10^3/\u00b5L"
 310 |                 },
 311 |                 "interpretation": "Values affected by ongoing cancer treatment."
 312 |             },
 313 |             {
 314 |                 "lab_id": "L9689",
 315 |                 "test_name": "Tumor Marker Test",
 316 |                 "date": "2023-11-10 04:03:05",
 317 |                 "results": {
 318 |                     "CA-125": null,
 319 |                     "PSA": "3.4 ng/mL",
 320 |                     "CA 15-3": null
 321 |                 },
 322 |                 "normal_ranges": {
 323 |                     "CA-125": "<35 U/mL",
 324 |                     "PSA": "<4 ng/mL",
 325 |                     "CA 15-3": "<30 U/mL"
 326 |                 },
 327 |                 "interpretation": "Elevated levels may indicate disease activity or treatment response."
 328 |             }
 329 |         ],
 330 |         "appointments": [
 331 |             {
 332 |                 "appointment_id": "AP6208",
 333 |                 "date_time": "2024-08-08 21:34:49",
 334 |                 "type": "Oncology Follow-up",
 335 |                 "with": "Dr. John Oncologist",
 336 |                 "location_name": "Hillside Cancer Care",
 337 |                 "status": "Scheduled",
 338 |                 "notes": "Review treatment progress and discuss next steps."
 339 |             },
 340 |             {
 341 |                 "appointment_id": "AP8361",
 342 |                 "date_time": "2024-12-15 21:21:21",
 343 |                 "type": "Radiation Therapy Session",
 344 |                 "with": "Dr. Sarah Radiologist",
 345 |                 "location_name": "Downtown Cancer Facility",
 346 |                 "status": "Scheduled",
 347 |                 "notes": "Continued radiation treatment as per plan."
 348 |             }
 349 |         ],
 350 |         "vital_signs": [
 351 |             {
 352 |                 "vital_id": "V4595",
 353 |                 "date": "2024-09-26 11:33:59",
 354 |                 "blood_pressure": "128/62 mmHg",
 355 |                 "heart_rate": "69 bpm",
 356 |                 "respiratory_rate": "19 breaths/min",
 357 |                 "temperature": "98.8\u00b0F",
 358 |                 "oxygen_saturation": "98%",
 359 |                 "weight": "74 kg",
 360 |                 "height": "183 cm",
 361 |                 "bmi": "23.7"
 362 |             }
 363 |         ],
 364 |         "imaging_results": [
 365 |             {
 366 |                 "imaging_id": "IM9534",
 367 |                 "date": "2024-05-09 08:42:59",
 368 |                 "type": "Prostate MRI",
 369 |                 "location_name": "Riverbend Oncology Clinic",
 370 |                 "results": "Follow-up Prostate MRI shows complete response.",
 371 |                 "radiologist": "Dr. Emily Imaging"
 372 |             }
 373 |         ]
 374 |     },
 375 |     {
 376 |         "demographics": {
 377 |             "patient_id": "P000003",
 378 |             "first_name": "PatientP000003",
 379 |             "last_name": "Doe",
 380 |             "date_of_birth": "1955-09-21 10:55:36",
 381 |             "gender": "Male",
 382 |             "contact_information": {
 383 |                 "address": "264 Elm Street, Springfield, IL, 62704",
 384 |                 "phone": "+44 7911 216070",
 385 |                 "email": "patientP000003@example.com"
 386 |             }
 387 |         },
 388 |         "emergency_contact": {
 389 |             "name": "Spouse of PatientP000003",
 390 |             "relationship": "Spouse",
 391 |             "phone": "+44 7911 729480"
 392 |         },
 393 |         "medical_history": [
 394 |             {
 395 |                 "condition": "Melanoma",
 396 |                 "diagnosis_date": "2012-09-22 07:19:34",
 397 |                 "stage": "III",
 398 |                 "treatment": [
 399 |                     "Wide excision",
 400 |                     "Immunotherapy",
 401 |                     "Targeted therapy"
 402 |                 ],
 403 |                 "notes": "Stage III Melanoma diagnosed. Treatment plan includes Wide excision, Immunotherapy, Targeted therapy."
 404 |             }
 405 |         ],
 406 |         "medications": [],
 407 |         "immunizations": [
 408 |             {
 409 |                 "immunization_id": "I7312",
 410 |                 "vaccine": "Influenza",
 411 |                 "date_administered": "2024-10-17 12:49:10",
 412 |                 "administered_by": "NHS Oncology Clinic",
 413 |                 "location_name": "City Cancer Institute"
 414 |             }
 415 |         ],
 416 |         "blood_test_results": [
 417 |             {
 418 |                 "lab_id": "L8673",
 419 |                 "test_name": "Complete Blood Count (CBC)",
 420 |                 "date": "2024-06-22 11:08:49",
 421 |                 "results": {
 422 |                     "Hemoglobin": "12.1 g/dL",
 423 |                     "White Blood Cells": "4.7 x10^3/\u00b5L",
 424 |                     "Platelets": "356 x10^3/\u00b5L"
 425 |                 },
 426 |                 "normal_ranges": {
 427 |                     "Hemoglobin": "12-16 g/dL",
 428 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 429 |                     "Platelets": "150-450 x10^3/\u00b5L"
 430 |                 },
 431 |                 "interpretation": "Values affected by ongoing cancer treatment."
 432 |             }
 433 |         ],
 434 |         "appointments": [
 435 |             {
 436 |                 "appointment_id": "AP5552",
 437 |                 "date_time": "2024-03-18 16:19:45",
 438 |                 "type": "Oncology Follow-up",
 439 |                 "with": "Dr. John Oncologist",
 440 |                 "location_name": "NHS Oncology Centre",
 441 |                 "status": "Scheduled",
 442 |                 "notes": "Review treatment progress and discuss next steps."
 443 |             }
 444 |         ],
 445 |         "vital_signs": [
 446 |             {
 447 |                 "vital_id": "V1842",
 448 |                 "date": "2024-08-28 12:40:10",
 449 |                 "blood_pressure": "134/90 mmHg",
 450 |                 "heart_rate": "61 bpm",
 451 |                 "respiratory_rate": "20 breaths/min",
 452 |                 "temperature": "98.2\u00b0F",
 453 |                 "oxygen_saturation": "99%",
 454 |                 "weight": "72 kg",
 455 |                 "height": "179 cm",
 456 |                 "bmi": "26.7"
 457 |             }
 458 |         ],
 459 |         "imaging_results": [
 460 |             {
 461 |                 "imaging_id": "IM6696",
 462 |                 "date": "2023-05-17 22:21:53",
 463 |                 "type": "Skin and Lymph Node Ultrasound",
 464 |                 "location_name": "Pinewood Oncology",
 465 |                 "results": "Follow-up Skin and Lymph Node Ultrasound shows stable disease.",
 466 |                 "radiologist": "Dr. Emily Imaging"
 467 |             }
 468 |         ]
 469 |     },
 470 |     {
 471 |         "demographics": {
 472 |             "patient_id": "P000004",
 473 |             "first_name": "PatientP000004",
 474 |             "last_name": "Doe",
 475 |             "date_of_birth": "1979-05-17 14:31:47",
 476 |             "gender": "Male",
 477 |             "contact_information": {
 478 |                 "address": "956 Elm Street, Springfield, IL, 62704",
 479 |                 "phone": "+44 7911 806455",
 480 |                 "email": "patientP000004@example.com"
 481 |             }
 482 |         },
 483 |         "emergency_contact": {
 484 |             "name": "Spouse of PatientP000004",
 485 |             "relationship": "Spouse",
 486 |             "phone": "+44 7911 414021"
 487 |         },
 488 |         "medical_history": [
 489 |             {
 490 |                 "condition": "Colorectal Cancer",
 491 |                 "diagnosis_date": "2012-04-01 12:07:10",
 492 |                 "stage": "III",
 493 |                 "treatment": [
 494 |                     "Colectomy",
 495 |                     "Chemotherapy",
 496 |                     "Radiation therapy"
 497 |                 ],
 498 |                 "notes": "Stage III Colorectal Cancer diagnosed. Treatment plan includes Colectomy, Chemotherapy, Radiation therapy."
 499 |             }
 500 |         ],
 501 |         "medications": [
 502 |             {
 503 |                 "medication_id": "M6938",
 504 |                 "name": "Oxaliplatin",
 505 |                 "dosage": "170 mg",
 506 |                 "frequency": "Every 3 weeks",
 507 |                 "start_date": "2023-01-29 01:08:50",
 508 |                 "end_date": null,
 509 |                 "prescriber": "Dr. John Oncologist",
 510 |                 "location_name": "Riverbend Oncology Clinic"
 511 |             }
 512 |         ],
 513 |         "immunizations": [
 514 |             {
 515 |                 "immunization_id": "I1234",
 516 |                 "vaccine": "Influenza",
 517 |                 "date_administered": "2023-12-07 02:42:39",
 518 |                 "administered_by": "NHS Oncology Clinic",
 519 |                 "location_name": "Green Valley Cancer Institute"
 520 |             }
 521 |         ],
 522 |         "blood_test_results": [
 523 |             {
 524 |                 "lab_id": "L8342",
 525 |                 "test_name": "Complete Blood Count (CBC)",
 526 |                 "date": "2023-02-19 17:47:20",
 527 |                 "results": {
 528 |                     "Hemoglobin": "11.7 g/dL",
 529 |                     "White Blood Cells": "5.3 x10^3/\u00b5L",
 530 |                     "Platelets": "370 x10^3/\u00b5L"
 531 |                 },
 532 |                 "normal_ranges": {
 533 |                     "Hemoglobin": "12-16 g/dL",
 534 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 535 |                     "Platelets": "150-450 x10^3/\u00b5L"
 536 |                 },
 537 |                 "interpretation": "Values affected by ongoing cancer treatment."
 538 |             }
 539 |         ],
 540 |         "appointments": [
 541 |             {
 542 |                 "appointment_id": "AP6696",
 543 |                 "date_time": "2024-02-02 12:48:41",
 544 |                 "type": "Oncology Follow-up",
 545 |                 "with": "Dr. John Oncologist",
 546 |                 "location_name": "Riverside Oncology Clinic",
 547 |                 "status": "Scheduled",
 548 |                 "notes": "Review treatment progress and discuss next steps."
 549 |             },
 550 |             {
 551 |                 "appointment_id": "AP5617",
 552 |                 "date_time": "2024-10-27 21:38:55",
 553 |                 "type": "Radiation Therapy Session",
 554 |                 "with": "Dr. Sarah Radiologist",
 555 |                 "location_name": "Maple Grove Oncology Center",
 556 |                 "status": "Scheduled",
 557 |                 "notes": "Continued radiation treatment as per plan."
 558 |             }
 559 |         ],
 560 |         "vital_signs": [
 561 |             {
 562 |                 "vital_id": "V3543",
 563 |                 "date": "2024-01-21 23:23:40",
 564 |                 "blood_pressure": "140/88 mmHg",
 565 |                 "heart_rate": "77 bpm",
 566 |                 "respiratory_rate": "20 breaths/min",
 567 |                 "temperature": "97.3\u00b0F",
 568 |                 "oxygen_saturation": "97%",
 569 |                 "weight": "58 kg",
 570 |                 "height": "159 cm",
 571 |                 "bmi": "20.8"
 572 |             }
 573 |         ],
 574 |         "imaging_results": [
 575 |             {
 576 |                 "imaging_id": "IM6696",
 577 |                 "date": "2024-02-19 17:30:04",
 578 |                 "type": "Abdominal CT",
 579 |                 "location_name": "Green Valley Cancer Institute",
 580 |                 "results": "Follow-up Abdominal CT shows partial response.",
 581 |                 "radiologist": "Dr. Emily Imaging"
 582 |             }
 583 |         ]
 584 |     },
 585 |     {
 586 |         "demographics": {
 587 |             "patient_id": "P000005",
 588 |             "first_name": "PatientP000005",
 589 |             "last_name": "Doe",
 590 |             "date_of_birth": "1967-02-17 02:56:48",
 591 |             "gender": "Male",
 592 |             "contact_information": {
 593 |                 "address": "641 Elm Street, Springfield, IL, 62704",
 594 |                 "phone": "+44 7911 279349",
 595 |                 "email": "patientP000005@example.com"
 596 |             }
 597 |         },
 598 |         "emergency_contact": {
 599 |             "name": "Spouse of PatientP000005",
 600 |             "relationship": "Spouse",
 601 |             "phone": "+44 7911 519193"
 602 |         },
 603 |         "medical_history": [
 604 |             {
 605 |                 "condition": "Prostate Cancer",
 606 |                 "diagnosis_date": "2011-10-24 02:34:53",
 607 |                 "stage": "III",
 608 |                 "treatment": [
 609 |                     "Prostatectomy",
 610 |                     "Radiation therapy"
 611 |                 ],
 612 |                 "notes": "Stage III Prostate Cancer diagnosed. Treatment plan includes Prostatectomy, Radiation therapy."
 613 |             }
 614 |         ],
 615 |         "medications": [],
 616 |         "immunizations": [
 617 |             {
 618 |                 "immunization_id": "I2525",
 619 |                 "vaccine": "Influenza",
 620 |                 "date_administered": "2024-04-22 11:16:59",
 621 |                 "administered_by": "NHS Oncology Clinic",
 622 |                 "location_name": "Springfield Cancer Center"
 623 |             }
 624 |         ],
 625 |         "blood_test_results": [
 626 |             {
 627 |                 "lab_id": "L9784",
 628 |                 "test_name": "Complete Blood Count (CBC)",
 629 |                 "date": "2024-09-21 23:04:29",
 630 |                 "results": {
 631 |                     "Hemoglobin": "9.5 g/dL",
 632 |                     "White Blood Cells": "9.8 x10^3/\u00b5L",
 633 |                     "Platelets": "176 x10^3/\u00b5L"
 634 |                 },
 635 |                 "normal_ranges": {
 636 |                     "Hemoglobin": "12-16 g/dL",
 637 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 638 |                     "Platelets": "150-450 x10^3/\u00b5L"
 639 |                 },
 640 |                 "interpretation": "Values affected by ongoing cancer treatment."
 641 |             },
 642 |             {
 643 |                 "lab_id": "L3153",
 644 |                 "test_name": "Tumor Marker Test",
 645 |                 "date": "2023-08-25 20:20:57",
 646 |                 "results": {
 647 |                     "CA-125": null,
 648 |                     "PSA": "8.8 ng/mL",
 649 |                     "CA 15-3": null
 650 |                 },
 651 |                 "normal_ranges": {
 652 |                     "CA-125": "<35 U/mL",
 653 |                     "PSA": "<4 ng/mL",
 654 |                     "CA 15-3": "<30 U/mL"
 655 |                 },
 656 |                 "interpretation": "Elevated levels may indicate disease activity or treatment response."
 657 |             }
 658 |         ],
 659 |         "appointments": [
 660 |             {
 661 |                 "appointment_id": "AP3654",
 662 |                 "date_time": "2024-02-09 17:39:14",
 663 |                 "type": "Oncology Follow-up",
 664 |                 "with": "Dr. John Oncologist",
 665 |                 "location_name": "Riverbend Oncology Clinic",
 666 |                 "status": "Scheduled",
 667 |                 "notes": "Review treatment progress and discuss next steps."
 668 |             },
 669 |             {
 670 |                 "appointment_id": "AP1634",
 671 |                 "date_time": "2024-08-19 21:15:07",
 672 |                 "type": "Radiation Therapy Session",
 673 |                 "with": "Dr. Sarah Radiologist",
 674 |                 "location_name": "Green Valley Cancer Institute",
 675 |                 "status": "Scheduled",
 676 |                 "notes": "Continued radiation treatment as per plan."
 677 |             }
 678 |         ],
 679 |         "vital_signs": [
 680 |             {
 681 |                 "vital_id": "V7186",
 682 |                 "date": "2024-03-18 11:50:15",
 683 |                 "blood_pressure": "125/86 mmHg",
 684 |                 "heart_rate": "80 bpm",
 685 |                 "respiratory_rate": "18 breaths/min",
 686 |                 "temperature": "98.0\u00b0F",
 687 |                 "oxygen_saturation": "99%",
 688 |                 "weight": "80 kg",
 689 |                 "height": "176 cm",
 690 |                 "bmi": "22.6"
 691 |             }
 692 |         ],
 693 |         "imaging_results": [
 694 |             {
 695 |                 "imaging_id": "IM9572",
 696 |                 "date": "2024-01-27 20:01:56",
 697 |                 "type": "Prostate MRI",
 698 |                 "location_name": "Green Valley Cancer Institute",
 699 |                 "results": "Follow-up Prostate MRI shows complete response.",
 700 |                 "radiologist": "Dr. Emily Imaging"
 701 |             }
 702 |         ]
 703 |     },
 704 |     {
 705 |         "demographics": {
 706 |             "patient_id": "P000006",
 707 |             "first_name": "PatientP000006",
 708 |             "last_name": "Doe",
 709 |             "date_of_birth": "1969-12-19 11:45:49",
 710 |             "gender": "Male",
 711 |             "contact_information": {
 712 |                 "address": "107 Elm Street, Springfield, IL, 62704",
 713 |                 "phone": "+44 7911 884888",
 714 |                 "email": "patientP000006@example.com"
 715 |             }
 716 |         },
 717 |         "emergency_contact": {
 718 |             "name": "Spouse of PatientP000006",
 719 |             "relationship": "Spouse",
 720 |             "phone": "+44 7911 738108"
 721 |         },
 722 |         "medical_history": [
 723 |             {
 724 |                 "condition": "Thyroid Cancer",
 725 |                 "diagnosis_date": "2019-01-14 17:15:54",
 726 |                 "stage": "III",
 727 |                 "treatment": [
 728 |                     "Radioactive iodine therapy",
 729 |                     "Targeted therapy"
 730 |                 ],
 731 |                 "notes": "Stage III Thyroid Cancer diagnosed. Treatment plan includes Radioactive iodine therapy, Targeted therapy."
 732 |             }
 733 |         ],
 734 |         "medications": [],
 735 |         "immunizations": [
 736 |             {
 737 |                 "immunization_id": "I4453",
 738 |                 "vaccine": "Influenza",
 739 |                 "date_administered": "2024-08-05 13:22:33",
 740 |                 "administered_by": "NHS Oncology Clinic",
 741 |                 "location_name": "NHS Oncology Centre"
 742 |             }
 743 |         ],
 744 |         "blood_test_results": [
 745 |             {
 746 |                 "lab_id": "L3089",
 747 |                 "test_name": "Complete Blood Count (CBC)",
 748 |                 "date": "2024-10-29 21:08:00",
 749 |                 "results": {
 750 |                     "Hemoglobin": "10.5 g/dL",
 751 |                     "White Blood Cells": "3.9 x10^3/\u00b5L",
 752 |                     "Platelets": "215 x10^3/\u00b5L"
 753 |                 },
 754 |                 "normal_ranges": {
 755 |                     "Hemoglobin": "12-16 g/dL",
 756 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 757 |                     "Platelets": "150-450 x10^3/\u00b5L"
 758 |                 },
 759 |                 "interpretation": "Values affected by ongoing cancer treatment."
 760 |             }
 761 |         ],
 762 |         "appointments": [
 763 |             {
 764 |                 "appointment_id": "AP4886",
 765 |                 "date_time": "2024-02-08 08:56:07",
 766 |                 "type": "Oncology Follow-up",
 767 |                 "with": "Dr. John Oncologist",
 768 |                 "location_name": "Riverbend Oncology Clinic",
 769 |                 "status": "Scheduled",
 770 |                 "notes": "Review treatment progress and discuss next steps."
 771 |             }
 772 |         ],
 773 |         "vital_signs": [
 774 |             {
 775 |                 "vital_id": "V4214",
 776 |                 "date": "2024-10-14 21:14:03",
 777 |                 "blood_pressure": "119/61 mmHg",
 778 |                 "heart_rate": "61 bpm",
 779 |                 "respiratory_rate": "17 breaths/min",
 780 |                 "temperature": "97.3\u00b0F",
 781 |                 "oxygen_saturation": "96%",
 782 |                 "weight": "64 kg",
 783 |                 "height": "159 cm",
 784 |                 "bmi": "21.4"
 785 |             }
 786 |         ],
 787 |         "imaging_results": [
 788 |             {
 789 |                 "imaging_id": "IM9087",
 790 |                 "date": "2024-04-09 20:13:50",
 791 |                 "type": "Thyroid Ultrasound",
 792 |                 "location_name": "Downtown Cancer Facility",
 793 |                 "results": "Follow-up Thyroid Ultrasound shows partial response.",
 794 |                 "radiologist": "Dr. Emily Imaging"
 795 |             }
 796 |         ]
 797 |     },
 798 |     {
 799 |         "demographics": {
 800 |             "patient_id": "P000007",
 801 |             "first_name": "PatientP000007",
 802 |             "last_name": "Doe",
 803 |             "date_of_birth": "1979-08-04 23:12:34",
 804 |             "gender": "Male",
 805 |             "contact_information": {
 806 |                 "address": "169 Elm Street, Springfield, IL, 62704",
 807 |                 "phone": "+44 7911 795798",
 808 |                 "email": "patientP000007@example.com"
 809 |             }
 810 |         },
 811 |         "emergency_contact": {
 812 |             "name": "Spouse of PatientP000007",
 813 |             "relationship": "Spouse",
 814 |             "phone": "+44 7911 236769"
 815 |         },
 816 |         "medical_history": [
 817 |             {
 818 |                 "condition": "Prostate Cancer",
 819 |                 "diagnosis_date": "2023-12-19 07:43:10",
 820 |                 "stage": "IV",
 821 |                 "treatment": [
 822 |                     "Hormone therapy"
 823 |                 ],
 824 |                 "notes": "Stage IV Prostate Cancer diagnosed. Treatment plan includes Hormone therapy."
 825 |             }
 826 |         ],
 827 |         "medications": [
 828 |             {
 829 |                 "medication_id": "M6281",
 830 |                 "name": "Tamoxifen",
 831 |                 "dosage": "20 mg",
 832 |                 "frequency": "Once daily",
 833 |                 "start_date": "2023-06-30 05:10:26",
 834 |                 "end_date": null,
 835 |                 "prescriber": "Dr. Jane Endocrinologist",
 836 |                 "location_name": "Springfield Cancer Center"
 837 |             }
 838 |         ],
 839 |         "immunizations": [
 840 |             {
 841 |                 "immunization_id": "I8026",
 842 |                 "vaccine": "Influenza",
 843 |                 "date_administered": "2023-04-08 20:32:55",
 844 |                 "administered_by": "NHS Oncology Clinic",
 845 |                 "location_name": "Green Valley Cancer Institute"
 846 |             }
 847 |         ],
 848 |         "blood_test_results": [
 849 |             {
 850 |                 "lab_id": "L1407",
 851 |                 "test_name": "Complete Blood Count (CBC)",
 852 |                 "date": "2023-06-16 22:50:38",
 853 |                 "results": {
 854 |                     "Hemoglobin": "12.3 g/dL",
 855 |                     "White Blood Cells": "8.4 x10^3/\u00b5L",
 856 |                     "Platelets": "108 x10^3/\u00b5L"
 857 |                 },
 858 |                 "normal_ranges": {
 859 |                     "Hemoglobin": "12-16 g/dL",
 860 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
 861 |                     "Platelets": "150-450 x10^3/\u00b5L"
 862 |                 },
 863 |                 "interpretation": "Values affected by ongoing cancer treatment."
 864 |             },
 865 |             {
 866 |                 "lab_id": "L6043",
 867 |                 "test_name": "Tumor Marker Test",
 868 |                 "date": "2023-02-27 17:42:48",
 869 |                 "results": {
 870 |                     "CA-125": null,
 871 |                     "PSA": "5.3 ng/mL",
 872 |                     "CA 15-3": null
 873 |                 },
 874 |                 "normal_ranges": {
 875 |                     "CA-125": "<35 U/mL",
 876 |                     "PSA": "<4 ng/mL",
 877 |                     "CA 15-3": "<30 U/mL"
 878 |                 },
 879 |                 "interpretation": "Elevated levels may indicate disease activity or treatment response."
 880 |             }
 881 |         ],
 882 |         "appointments": [
 883 |             {
 884 |                 "appointment_id": "AP4016",
 885 |                 "date_time": "2024-07-22 20:43:56",
 886 |                 "type": "Oncology Follow-up",
 887 |                 "with": "Dr. John Oncologist",
 888 |                 "location_name": "NHS Oncology Centre",
 889 |                 "status": "Scheduled",
 890 |                 "notes": "Review treatment progress and discuss next steps."
 891 |             }
 892 |         ],
 893 |         "vital_signs": [
 894 |             {
 895 |                 "vital_id": "V4113",
 896 |                 "date": "2024-02-04 12:36:34",
 897 |                 "blood_pressure": "120/73 mmHg",
 898 |                 "heart_rate": "67 bpm",
 899 |                 "respiratory_rate": "15 breaths/min",
 900 |                 "temperature": "98.5\u00b0F",
 901 |                 "oxygen_saturation": "100%",
 902 |                 "weight": "59 kg",
 903 |                 "height": "152 cm",
 904 |                 "bmi": "22.8"
 905 |             }
 906 |         ],
 907 |         "imaging_results": [
 908 |             {
 909 |                 "imaging_id": "IM1050",
 910 |                 "date": "2024-03-29 19:28:10",
 911 |                 "type": "Prostate MRI",
 912 |                 "location_name": "Hillside Cancer Care",
 913 |                 "results": "Follow-up Prostate MRI shows progressive disease.",
 914 |                 "radiologist": "Dr. Emily Imaging"
 915 |             }
 916 |         ]
 917 |     },
 918 |     {
 919 |         "demographics": {
 920 |             "patient_id": "P000008",
 921 |             "first_name": "PatientP000008",
 922 |             "last_name": "Doe",
 923 |             "date_of_birth": "1972-03-09 04:40:52",
 924 |             "gender": "Female",
 925 |             "contact_information": {
 926 |                 "address": "855 Elm Street, Springfield, IL, 62704",
 927 |                 "phone": "+44 7911 796265",
 928 |                 "email": "patientP000008@example.com"
 929 |             }
 930 |         },
 931 |         "emergency_contact": {
 932 |             "name": "Spouse of PatientP000008",
 933 |             "relationship": "Spouse",
 934 |             "phone": "+44 7911 246038"
 935 |         },
 936 |         "medical_history": [
 937 |             {
 938 |                 "condition": "Lung Cancer",
 939 |                 "diagnosis_date": "2011-09-03 18:06:35",
 940 |                 "stage": "II",
 941 |                 "treatment": [
 942 |                     "Immunotherapy",
 943 |                     "Lobectomy",
 944 |                     "Chemotherapy"
 945 |                 ],
 946 |                 "notes": "Stage II Lung Cancer diagnosed. Treatment plan includes Immunotherapy, Lobectomy, Chemotherapy."
 947 |             }
 948 |         ],
 949 |         "medications": [
 950 |             {
 951 |                 "medication_id": "M7287",
 952 |                 "name": "Pemetrexed",
 953 |                 "dosage": "77 mg",
 954 |                 "frequency": "Every 3 weeks",
 955 |                 "start_date": "2024-09-09 12:45:53",
 956 |                 "end_date": null,
 957 |                 "prescriber": "Dr. John Oncologist",
 958 |                 "location_name": "Springfield Cancer Center"
 959 |             },
 960 |             {
 961 |                 "medication_id": "M5325",
 962 |                 "name": "Carboplatin",
 963 |                 "dosage": "173 mg",
 964 |                 "frequency": "Every 3 weeks",
 965 |                 "start_date": "2024-11-08 12:30:13",
 966 |                 "end_date": null,
 967 |                 "prescriber": "Dr. John Oncologist",
 968 |                 "location_name": "Maple Grove Oncology Center"
 969 |             },
 970 |             {
 971 |                 "medication_id": "M2591",
 972 |                 "name": "Cisplatin",
 973 |                 "dosage": "74 mg",
 974 |                 "frequency": "Every 3 weeks",
 975 |                 "start_date": "2024-07-29 04:43:06",
 976 |                 "end_date": null,
 977 |                 "prescriber": "Dr. John Oncologist",
 978 |                 "location_name": "Riverside Oncology Clinic"
 979 |             }
 980 |         ],
 981 |         "immunizations": [
 982 |             {
 983 |                 "immunization_id": "I4821",
 984 |                 "vaccine": "Influenza",
 985 |                 "date_administered": "2023-06-27 11:46:48",
 986 |                 "administered_by": "NHS Oncology Clinic",
 987 |                 "location_name": "Hillside Cancer Care"
 988 |             }
 989 |         ],
 990 |         "blood_test_results": [
 991 |             {
 992 |                 "lab_id": "L6932",
 993 |                 "test_name": "Complete Blood Count (CBC)",
 994 |                 "date": "2023-02-22 03:43:11",
 995 |                 "results": {
 996 |                     "Hemoglobin": "10.9 g/dL",
 997 |                     "White Blood Cells": "10.1 x10^3/\u00b5L",
 998 |                     "Platelets": "217 x10^3/\u00b5L"
 999 |                 },
1000 |                 "normal_ranges": {
1001 |                     "Hemoglobin": "12-16 g/dL",
1002 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
1003 |                     "Platelets": "150-450 x10^3/\u00b5L"
1004 |                 },
1005 |                 "interpretation": "Values affected by ongoing cancer treatment."
1006 |             }
1007 |         ],
1008 |         "appointments": [
1009 |             {
1010 |                 "appointment_id": "AP2944",
1011 |                 "date_time": "2024-02-02 08:23:07",
1012 |                 "type": "Oncology Follow-up",
1013 |                 "with": "Dr. John Oncologist",
1014 |                 "location_name": "Pinewood Oncology",
1015 |                 "status": "Scheduled",
1016 |                 "notes": "Review treatment progress and discuss next steps."
1017 |             }
1018 |         ],
1019 |         "vital_signs": [
1020 |             {
1021 |                 "vital_id": "V2995",
1022 |                 "date": "2024-06-11 00:51:21",
1023 |                 "blood_pressure": "134/88 mmHg",
1024 |                 "heart_rate": "92 bpm",
1025 |                 "respiratory_rate": "13 breaths/min",
1026 |                 "temperature": "98.2\u00b0F",
1027 |                 "oxygen_saturation": "96%",
1028 |                 "weight": "74 kg",
1029 |                 "height": "181 cm",
1030 |                 "bmi": "24.3"
1031 |             }
1032 |         ],
1033 |         "imaging_results": [
1034 |             {
1035 |                 "imaging_id": "IM3589",
1036 |                 "date": "2023-10-16 18:32:58",
1037 |                 "type": "Chest CT",
1038 |                 "location_name": "Springfield Cancer Center",
1039 |                 "results": "Follow-up Chest CT shows stable disease.",
1040 |                 "radiologist": "Dr. Emily Imaging"
1041 |             }
1042 |         ]
1043 |     },
1044 |     {
1045 |         "demographics": {
1046 |             "patient_id": "P000009",
1047 |             "first_name": "PatientP000009",
1048 |             "last_name": "Doe",
1049 |             "date_of_birth": "1984-12-13 16:14:14",
1050 |             "gender": "Female",
1051 |             "contact_information": {
1052 |                 "address": "439 Elm Street, Springfield, IL, 62704",
1053 |                 "phone": "+44 7911 375598",
1054 |                 "email": "patientP000009@example.com"
1055 |             }
1056 |         },
1057 |         "emergency_contact": {
1058 |             "name": "Spouse of PatientP000009",
1059 |             "relationship": "Spouse",
1060 |             "phone": "+44 7911 514976"
1061 |         },
1062 |         "medical_history": [
1063 |             {
1064 |                 "condition": "Ovarian Cancer",
1065 |                 "diagnosis_date": "2015-04-13 04:49:58",
1066 |                 "stage": "III",
1067 |                 "treatment": [
1068 |                     "Targeted therapy",
1069 |                     "Oophorectomy"
1070 |                 ],
1071 |                 "notes": "Stage III Ovarian Cancer diagnosed. Treatment plan includes Targeted therapy, Oophorectomy."
1072 |             }
1073 |         ],
1074 |         "medications": [],
1075 |         "immunizations": [
1076 |             {
1077 |                 "immunization_id": "I4059",
1078 |                 "vaccine": "Influenza",
1079 |                 "date_administered": "2023-08-07 10:15:51",
1080 |                 "administered_by": "NHS Oncology Clinic",
1081 |                 "location_name": "Riverbend Oncology Clinic"
1082 |             }
1083 |         ],
1084 |         "blood_test_results": [
1085 |             {
1086 |                 "lab_id": "L2372",
1087 |                 "test_name": "Complete Blood Count (CBC)",
1088 |                 "date": "2024-05-11 01:55:56",
1089 |                 "results": {
1090 |                     "Hemoglobin": "10.7 g/dL",
1091 |                     "White Blood Cells": "3.9 x10^3/\u00b5L",
1092 |                     "Platelets": "203 x10^3/\u00b5L"
1093 |                 },
1094 |                 "normal_ranges": {
1095 |                     "Hemoglobin": "12-16 g/dL",
1096 |                     "White Blood Cells": "4-11 x10^3/\u00b5L",
1097 |                     "Platelets": "150-450 x10^3/\u00b5L"
1098 |                 },
1099 |                 "interpretation": "Values affected by ongoing cancer treatment."
1100 |             },
1101 |             {
1102 |                 "lab_id": "L2333",
1103 |                 "test_name": "Tumor Marker Test",
1104 |                 "date": "2023-11-22 00:01:45",
1105 |                 "results": {
1106 |                     "CA-125": "1.4 U/mL",
1107 |                     "PSA": null,
1108 |                     "CA 15-3": null
1109 |                 },
1110 |                 "normal_ranges": {
1111 |                     "CA-125": "<35 U/mL",
1112 |                     "PSA": "<4 ng/mL",
1113 |                     "CA 15-3": "<30 U/mL"
1114 |                 },
1115 |                 "interpretation": "Elevated levels may indicate disease activity or treatment response."
1116 |             }
1117 |         ],
1118 |         "appointments": [
1119 |             {
1120 |                 "appointment_id": "AP8770",
1121 |                 "date_time": "2024-03-30 02:23:15",
1122 |                 "type": "Oncology Follow-up",
1123 |                 "with": "Dr. John Oncologist",
1124 |                 "location_name": "City Cancer Institute",
1125 |                 "status": "Scheduled",
1126 |                 "notes": "Review treatment progress and discuss next steps."
1127 |             }
1128 |         ],
1129 |         "vital_signs": [
1130 |             {
1131 |                 "vital_id": "V8097",
1132 |                 "date": "2024-10-25 20:17:04",
1133 |                 "blood_pressure": "119/85 mmHg",
1134 |                 "heart_rate": "69 bpm",
1135 |                 "respiratory_rate": "12 breaths/min",
1136 |                 "temperature": "98.1\u00b0F",
1137 |                 "oxygen_saturation": "95%",
1138 |                 "weight": "71 kg",
1139 |                 "height": "181 cm",
1140 |                 "bmi": "24.9"
1141 |             }
1142 |         ],
1143 |         "imaging_results": [
1144 |             {
1145 |                 "imaging_id": "IM1918",
1146 |                 "date": "2024-12-30 23:54:17",
1147 |                 "type": "Pelvic CT",
1148 |                 "location_name": "Green Valley Cancer Institute",
1149 |                 "results": "Follow-up Pelvic CT shows complete response.",
1150 |                 "radiologist": "Dr. Emily Imaging"
1151 |             }
1152 |         ]
1153 |     }
1154 | ]


--------------------------------------------------------------------------------