├── sample_docs ├── new_added.txt ├── Can LLMs Generate Novel Research Ideas.pdf └── healthcare_records.json ├── assets ├── chat_snapshot.png └── nexusync_logo.png ├── MANIFEST.in ├── src └── nexusync │ ├── core │ ├── __init__.py │ ├── querier.py │ ├── indexing_functions.py │ ├── chat_engine.py │ └── indexer.py │ ├── models │ ├── __init__.py │ ├── embedding_models.py │ └── language_models.py │ ├── utils │ ├── __init__.py │ ├── logging_config.py │ └── file_operations.py │ ├── __init__.py │ └── nexusync.py ├── requirements.txt ├── dummy_dataset.json ├── LICENSE.txt ├── setup.py ├── .gitignore ├── back_end_api.py ├── README.md ├── index.html └── notebooks ├── data_structure_generator.ipynb └── NHS_Application_Test.ipynb /sample_docs/new_added.txt: -------------------------------------------------------------------------------- 1 | Breaking News: Trump and Harris had a fight!!!! -------------------------------------------------------------------------------- /assets/chat_snapshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zakk-Yang/nexusync/HEAD/assets/chat_snapshot.png -------------------------------------------------------------------------------- /assets/nexusync_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zakk-Yang/nexusync/HEAD/assets/nexusync_logo.png -------------------------------------------------------------------------------- /sample_docs/Can LLMs Generate Novel Research Ideas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zakk-Yang/nexusync/HEAD/sample_docs/Can LLMs Generate Novel Research Ideas.pdf -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include requirements.txt 4 | recursive-include src *.py 5 | recursive-include docs *.md 6 | recursive-include tests *.py -------------------------------------------------------------------------------- /src/nexusync/core/__init__.py: -------------------------------------------------------------------------------- 1 | # src/core/__init__.py 2 | from .indexer import Indexer 3 | from .querier import Querier 4 | from .chat_engine import ChatEngine 5 | 6 | __all__ = ["Indexer", "Querier", "ChatEngine"] 7 | -------------------------------------------------------------------------------- /src/nexusync/models/__init__.py: -------------------------------------------------------------------------------- 1 | # src/models/__init__.py 2 | 3 | from .embedding_models import set_embedding_model 4 | from .language_models import set_language_model 5 | 6 | __all__ = ["set_embedding_model", "set_language_model"] 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | llama_index 2 | llama-index-llms-ollama 3 | llama-index-embeddings-huggingface 4 | chromadb 5 | llama-index-vector-stores-chroma 6 | transformers>=4.45.2 7 | python-pptx 8 | Pillow 9 | docx2txt 10 | openpyxl 11 | python-dotenv 12 | spacy 13 | flask -------------------------------------------------------------------------------- /src/nexusync/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # src/utils/__init__.py 2 | 3 | from .logging_config import get_logger 4 | from .file_operations import get_all_files, get_file_hash, get_changed_files 5 | 6 | __all__ = [ 7 | "get_logger", 8 | "get_all_files", 9 | "get_file_hash", 10 | "get_changed_files", 11 | ] 12 | -------------------------------------------------------------------------------- /src/nexusync/__init__.py: -------------------------------------------------------------------------------- 1 | # src/__init__.py 2 | 3 | from .core.indexer import Indexer 4 | from .core.querier import Querier 5 | from .core.chat_engine import ChatEngine 6 | from .core.indexing_functions import rebuild_index 7 | from .nexusync import NexuSync 8 | 9 | __all__ = [ 10 | "NexuSync", 11 | "Indexer", 12 | "Querier", 13 | "ChatEngine", 14 | "rebuild_index", 15 | ] 16 | -------------------------------------------------------------------------------- /dummy_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "demographics": { 3 | "patient_id": "P123456789", 4 | "first_name": "Jane", 5 | "last_name": "Doe", 6 | "date_of_birth": "1985-07-24", 7 | "gender": "Female", 8 | "contact_information": { 9 | "address": "123 Elm Street, Springfield, IL, 62704", 10 | "phone": "+44 7911 123456", 11 | "email": "jane.doe@example.com" 12 | } 13 | }, 14 | "emergency_contact": { 15 | "name": "John Doe", 16 | "relationship": "Spouse", 17 | "phone": "+44 7911 654321" 18 | } 19 | } -------------------------------------------------------------------------------- /src/nexusync/utils/logging_config.py: -------------------------------------------------------------------------------- 1 | # src/utils/logging_config.py 2 | 3 | 4 | import logging 5 | import warnings 6 | 7 | 8 | def silence_all_warnings(): 9 | # Ignore all warnings 10 | warnings.filterwarnings("ignore") 11 | 12 | 13 | def get_logger(name): 14 | # Silence all warnings 15 | silence_all_warnings() 16 | 17 | logger = logging.getLogger(name) 18 | 19 | if not logger.handlers: 20 | logger.setLevel(logging.INFO) 21 | handler = logging.StreamHandler() 22 | formatter = logging.Formatter( 23 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 24 | ) 25 | handler.setFormatter(formatter) 26 | logger.addHandler(handler) 27 | logger.propagate = False # Prevent propagation to ancestor loggers 28 | 29 | return logger 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Zakk-Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open("README.md", "r", encoding="utf-8") as fh: 6 | long_description = fh.read() 7 | 8 | setup( 9 | packages=find_packages(where="src"), 10 | package_dir={"": "src"}, 11 | name="nexusync", 12 | version="0.3.6", 13 | author="Zakk Yang", 14 | author_email="zakkyang@protonmail.com", 15 | description="A powerful document indexing and querying tool built on top of LlamaIndex", 16 | long_description=long_description, 17 | long_description_content_type="text/markdown", 18 | url="https://github.com/Zakk-Yang/nexusync.git", 19 | classifiers=[ 20 | "Programming Language :: Python :: 3", 21 | "License :: OSI Approved :: MIT License", 22 | "Operating System :: OS Independent", 23 | ], 24 | python_requires=">=3.10", 25 | install_requires=[ 26 | "llama_index", 27 | "llama-index-llms-ollama", 28 | "llama-index-embeddings-huggingface", 29 | "chromadb", 30 | "llama-index-vector-stores-chroma", 31 | "transformers>=4.45.2", 32 | "python-pptx", 33 | "Pillow", 34 | "docx2txt", 35 | "openpyxl", 36 | "python-dotenv", 37 | "spacy", 38 | "flask", 39 | ], 40 | include_package_data=True, # Ensures files specified in MANIFEST.in are included 41 | ) 42 | -------------------------------------------------------------------------------- /src/nexusync/models/embedding_models.py: -------------------------------------------------------------------------------- 1 | # src/utils/embedding_models.py 2 | 3 | from typing import Optional 4 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding 5 | from llama_index.embeddings.openai import OpenAIEmbedding 6 | from llama_index.core import Settings 7 | import os 8 | from dotenv import load_dotenv 9 | from nexusync.utils.logging_config import get_logger 10 | 11 | 12 | def set_embedding_model( 13 | openai_model: Optional[str] = None, huggingface_model: Optional[str] = None 14 | ) -> None: 15 | """ 16 | Set up the embedding model for the index. 17 | 18 | Args: 19 | openai_model (Optional[str]): Name of the OpenAI embedding model. 20 | huggingface_model (Optional[str]): Name of the HuggingFace embedding model. 21 | 22 | Raises: 23 | ValueError: If both or neither embedding model is specified. 24 | """ 25 | logger = get_logger("nexusync.utils.embedding_models.set_embedding_model") 26 | load_dotenv() 27 | 28 | if (openai_model and huggingface_model) or ( 29 | not openai_model and not huggingface_model 30 | ): 31 | raise ValueError( 32 | "Specify either OpenAI or HuggingFace embedding model, not both or neither." 33 | ) 34 | 35 | if openai_model: 36 | openai_api_key = os.getenv("OPENAI_API_KEY") 37 | if not openai_api_key: 38 | raise ValueError("OpenAI API key not found in environment variables.") 39 | Settings.embed_model = OpenAIEmbedding( 40 | model=openai_model, api_key=openai_api_key 41 | ) 42 | logger.info(f"Using OpenAI embedding model: {openai_model}") 43 | else: 44 | Settings.embed_model = HuggingFaceEmbedding(model_name=huggingface_model) 45 | logger.info(f"Using HuggingFace embedding model: {huggingface_model}") 46 | -------------------------------------------------------------------------------- /src/nexusync/models/language_models.py: -------------------------------------------------------------------------------- 1 | # src/utils/language_models.py 2 | 3 | from typing import Optional 4 | from llama_index.llms.ollama import Ollama 5 | from llama_index.llms.openai import OpenAI 6 | from llama_index.core import Settings 7 | import os 8 | from dotenv import load_dotenv 9 | from nexusync.utils.logging_config import get_logger 10 | 11 | 12 | def set_language_model( 13 | openai_model: Optional[str] = None, 14 | ollama_model: Optional[str] = None, 15 | temperature: Optional[float] = 0.7, 16 | base_url: Optional[str] = None, 17 | ) -> None: 18 | """ 19 | Set up the language model for the index. 20 | 21 | Args: 22 | openai_model (Optional[str]): Name of the OpenAI model. 23 | ollama_model (Optional[str]): Name of the Ollama model. 24 | temperature (Optional[float]): Temperature for the language model. 25 | base_url (Optional[str]): Ollama base url 26 | 27 | Raises: 28 | ValueError: If both or neither model is specified, or if OpenAI API key is missing. 29 | """ 30 | logger = get_logger("nexusync.utils.embedding_models.set_language_model") 31 | load_dotenv() 32 | 33 | if (openai_model and ollama_model) or (not openai_model and not ollama_model): 34 | raise ValueError("Specify either OpenAI or Ollama model, not both or neither.") 35 | 36 | if openai_model: 37 | openai_api_key = os.getenv("OPENAI_API_KEY") 38 | if not openai_api_key: 39 | raise ValueError("OpenAI API key not found in environment variables.") 40 | Settings.llm = OpenAI( 41 | model=openai_model, temperature=temperature, api_key=openai_api_key 42 | ) 43 | logger.info(f"Using OpenAI LLM model: {openai_model}") 44 | else: 45 | Settings.llm = Ollama( 46 | model=ollama_model, temperature=temperature, base_url=base_url 47 | ) 48 | logger.info( 49 | f"Ollama LLM initialized with model: {ollama_model} and base_url: {base_url}" 50 | ) 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # PyInstaller 24 | # Usually these files are written by a python script from a template 25 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 26 | *.manifest 27 | *.spec 28 | 29 | # Installer logs 30 | pip-log.txt 31 | pip-delete-this-directory.txt 32 | 33 | # Unit test / coverage reports 34 | htmlcov/ 35 | .tox/ 36 | .coverage 37 | .coverage.* 38 | .cache 39 | nosetests.xml 40 | coverage.xml 41 | *.cover 42 | *.py,cover 43 | .hypothesis/ 44 | .pytest_cache/ 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | local_settings.py 53 | db.sqlite3 54 | db.sqlite3-journal 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | env/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | .spyproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | 94 | # Ignore .DS_Store files 95 | .DS_Store 96 | 97 | 98 | # Ignore Python cache files 99 | __pycache__/ 100 | *.pyc 101 | 102 | # Ignore environment files 103 | .env 104 | 105 | # Ignore user documents and data directories 106 | documents/ 107 | storage/ 108 | chroma_db/ 109 | indexed_files.json 110 | 111 | # Ignore logs and temporary files 112 | *.log 113 | *.tmp 114 | 115 | # Ignore OS-specific files 116 | .DS_Store 117 | Thumbs.db 118 | 119 | # Ignore virtual environment directories (if any) 120 | venv/ 121 | env/ 122 | test.ipynb 123 | storage/ 124 | index_storage/ 125 | 126 | # Python Bytecode 127 | __pycache__/ 128 | *.py[cod] 129 | 130 | test.py -------------------------------------------------------------------------------- /src/nexusync/utils/file_operations.py: -------------------------------------------------------------------------------- 1 | # src/utils/file_operations.py 2 | 3 | import os 4 | from typing import List, Tuple 5 | import hashlib 6 | 7 | 8 | def get_all_files(directory: str, recursive: bool = True) -> List[str]: 9 | """ 10 | Get all file paths in the given directory. 11 | 12 | Args: 13 | directory (str): The directory to search for files. 14 | recursive (bool): If True, search subdirectories as well. Defaults to True. 15 | 16 | Returns: 17 | List[str]: A list of file paths. 18 | """ 19 | file_paths = [] 20 | if recursive: 21 | for root, _, files in os.walk(directory): 22 | for file in files: 23 | file_paths.append(os.path.join(root, file)) 24 | else: 25 | file_paths = [ 26 | os.path.join(directory, f) 27 | for f in os.listdir(directory) 28 | if os.path.isfile(os.path.join(directory, f)) 29 | ] 30 | return file_paths 31 | 32 | 33 | def get_file_hash(file_path: str) -> str: 34 | """ 35 | Compute the MD5 hash of a file. 36 | 37 | Args: 38 | file_path (str): The path to the file. 39 | 40 | Returns: 41 | str: The MD5 hash of the file. 42 | """ 43 | hasher = hashlib.md5() 44 | with open(file_path, "rb") as file: 45 | buf = file.read() 46 | hasher.update(buf) 47 | return hasher.hexdigest() 48 | 49 | 50 | def get_changed_files( 51 | directory: str, old_hashes: dict 52 | ) -> Tuple[List[str], List[str], List[str]]: 53 | """ 54 | Determine which files in the directory have been added, modified, or deleted. 55 | 56 | Args: 57 | directory (str): The directory to check for changes. 58 | old_hashes (dict): A dictionary of file paths and their previous hashes. 59 | 60 | Returns: 61 | Tuple[List[str], List[str], List[str]]: Lists of added, modified, and deleted file paths. 62 | """ 63 | current_files = get_all_files(directory) 64 | current_hashes = {file: get_file_hash(file) for file in current_files} 65 | 66 | added = [file for file in current_files if file not in old_hashes] 67 | modified = [ 68 | file 69 | for file in current_files 70 | if file in old_hashes and current_hashes[file] != old_hashes[file] 71 | ] 72 | deleted = [file for file in old_hashes if file not in current_files] 73 | 74 | return added, modified, deleted 75 | -------------------------------------------------------------------------------- /src/nexusync/core/querier.py: -------------------------------------------------------------------------------- 1 | # src/core/querier.py 2 | 3 | from typing import List, Optional, Dict, Any 4 | from llama_index.core import ( 5 | VectorStoreIndex, 6 | PromptTemplate, 7 | ) 8 | import logging 9 | from llama_index.core.postprocessor import SentenceEmbeddingOptimizer 10 | from llama_index.core.postprocessor import KeywordNodePostprocessor 11 | from nexusync.utils.logging_config import get_logger 12 | 13 | 14 | class Querier: 15 | def __init__(self, index: VectorStoreIndex): 16 | """ 17 | Initialize the Querier with a VectorStoreIndex. 18 | 19 | Args: 20 | index (VectorStoreIndex): The index to be used for querying. 21 | """ 22 | self.index = index 23 | self.logger = get_logger("nexusync.core.querier") 24 | 25 | def query( 26 | self, text_qa_template: str, query: str, similarity_top_k: int = 3 27 | ) -> Dict[str, Any]: 28 | """ 29 | Query the index using a query engine. 30 | 31 | Args: 32 | text_qa_template (str): The template for the QA prompt. 33 | query (str): The query string. 34 | similarity_top_k (int, optional): Number of top similar documents to consider. Defaults to 3. 35 | 36 | Returns: 37 | Dict[str, Any]: A dictionary containing the response and metadata. 38 | """ 39 | try: 40 | qa_template = PromptTemplate(text_qa_template) 41 | query_engine = self.index.as_query_engine( 42 | text_qa_template=qa_template, 43 | similarity_top_k=similarity_top_k, 44 | node_postprocessors=[ 45 | SentenceEmbeddingOptimizer(percentile_cutoff=0.5), 46 | KeywordNodePostprocessor(required_keywords=[]), 47 | ], 48 | ) 49 | 50 | response = query_engine.query(query) 51 | 52 | answer = str(response) 53 | metadata = {"sources": []} 54 | 55 | if hasattr(response, "source_nodes"): 56 | for node in response.source_nodes: 57 | source_info = { 58 | "source_text": node.node.get_text(), 59 | "metadata": node.node.metadata, 60 | } 61 | metadata["sources"].append(source_info) 62 | 63 | return {"response": answer, "metadata": metadata} 64 | 65 | except Exception as e: 66 | self.logger.error(f"An error occurred during query: {e}", exc_info=True) 67 | return { 68 | "response": f"An error occurred while processing your request: {str(e)}", 69 | "metadata": {}, 70 | } 71 | 72 | def get_relevant_documents( 73 | self, query: str, num_docs: int = 3 74 | ) -> List[Dict[str, Any]]: 75 | """ 76 | Retrieve the most relevant documents for a given query. 77 | 78 | Args: 79 | query (str): The query string. 80 | num_docs (int): The number of documents to retrieve. Defaults to 3. 81 | 82 | Returns: 83 | List[Dict[str, Any]]: A list of dictionaries containing document info and relevance scores. 84 | """ 85 | try: 86 | retriever = self.index.as_retriever(similarity_top_k=num_docs) 87 | nodes = retriever.retrieve(query) 88 | 89 | relevant_docs = [] 90 | for node in nodes: 91 | doc_info = { 92 | "content": node.node.get_text(), 93 | "metadata": node.node.metadata, 94 | "score": node.score, 95 | } 96 | relevant_docs.append(doc_info) 97 | 98 | return relevant_docs 99 | 100 | except Exception as e: 101 | self.logger.error( 102 | f"An error occurred while retrieving relevant documents: {e}", 103 | exc_info=True, 104 | ) 105 | return [] 106 | -------------------------------------------------------------------------------- /src/nexusync/core/indexing_functions.py: -------------------------------------------------------------------------------- 1 | # src/core/indexing_functions.py 2 | 3 | import shutil 4 | import os 5 | from nexusync.core.indexer import Indexer 6 | from nexusync.utils.logging_config import get_logger 7 | from llama_index.core import Settings 8 | from typing import List 9 | import os 10 | from typing import List, Optional, Dict, Any 11 | from llama_index.core import ( 12 | VectorStoreIndex, 13 | SimpleDirectoryReader, 14 | StorageContext, 15 | load_index_from_storage, 16 | ) 17 | 18 | from llama_index.vector_stores.chroma import ChromaVectorStore 19 | import chromadb 20 | from nexusync.utils.logging_config import get_logger 21 | import shutil 22 | from llama_index.core import Settings 23 | from nexusync.models.embedding_models import set_embedding_model 24 | from nexusync.models.language_models import set_language_model 25 | 26 | logger = get_logger("nexusync.core.indexing_functions") 27 | 28 | 29 | def rebuild_index( 30 | input_dirs: List[str], 31 | openai_model_yn: bool, 32 | embedding_model: str, 33 | language_model: str, 34 | temperature: float, 35 | chroma_db_dir: str, 36 | index_persist_dir: str, 37 | chroma_collection_name: str, 38 | chunk_overlap: int, 39 | chunk_size: int, 40 | recursive: bool, 41 | base_url: Optional[str] = None, 42 | ): 43 | """ 44 | Standalone function to rebuild the index. 45 | 46 | This function can be called independently of NexuSync initialization. 47 | """ 48 | logger.info("Starting index rebuild process...") 49 | 50 | Settings.chunk_overlap = chunk_overlap 51 | Settings.chunk_size = chunk_size 52 | # Initialize the embedding and language model 53 | if openai_model_yn: 54 | set_embedding_model(openai_model=embedding_model) 55 | set_language_model(openai_model=language_model) 56 | 57 | else: 58 | set_embedding_model(huggingface_model=embedding_model) 59 | set_language_model( 60 | ollama_model=language_model, temperature=temperature, base_url=base_url 61 | ) 62 | 63 | # Step 1: Delete the existing index directory 64 | if os.path.exists(index_persist_dir): 65 | logger.info(f"Deleting existing index directory: {index_persist_dir}") 66 | shutil.rmtree(index_persist_dir) 67 | else: 68 | logger.warning( 69 | f"Index directory {index_persist_dir} does not exist. Skipping deletion." 70 | ) 71 | 72 | # Step 2: Delete the Chroma database directory 73 | if os.path.exists(chroma_db_dir): 74 | logger.info(f"Deleting existing Chroma DB directory: {chroma_db_dir}") 75 | shutil.rmtree(chroma_db_dir) 76 | else: 77 | logger.warning( 78 | f"Chroma DB directory {chroma_db_dir} does not exist. Skipping deletion." 79 | ) 80 | 81 | try: 82 | storage_context = StorageContext.from_defaults(persist_dir=index_persist_dir) 83 | index = load_index_from_storage(storage_context) 84 | logger.info("Index already built. Loading from disk.") 85 | except FileNotFoundError: 86 | logger.warning("Index not found. Building a new index.") 87 | document_list = [] 88 | total_files = 0 89 | for file_path in input_dirs: 90 | if not os.path.isdir(file_path): 91 | logger.error(f"Directory {file_path} does not exist.") 92 | raise ValueError(f"Directory {file_path} does not exist.") 93 | # Count files before loading 94 | file_count = sum( 95 | len(files) 96 | for _, _, files in os.walk(file_path) 97 | if recursive or _ == file_path 98 | ) 99 | total_files += file_count 100 | documents = SimpleDirectoryReader( 101 | file_path, filename_as_id=True, recursive=recursive 102 | ).load_data() 103 | logger.info(f"Loaded {file_count} files from all directories.") 104 | document_list.extend(documents) 105 | index = VectorStoreIndex.from_documents(document_list) 106 | index.storage_context.persist(persist_dir=index_persist_dir) 107 | chroma_client = chromadb.PersistentClient(path=chroma_db_dir) 108 | chroma_collection = chroma_client.get_or_create_collection( 109 | chroma_collection_name 110 | ) 111 | vector_store = ChromaVectorStore(chroma_collection=chroma_collection) 112 | storage_context = StorageContext.from_defaults( 113 | persist_dir=index_persist_dir, vector_store=vector_store 114 | ) 115 | 116 | if not document_list: 117 | logger.error("No documents found to build the index.") 118 | raise ValueError("No documents found to build the index.") 119 | 120 | logger.info("Index Built.") 121 | except Exception as e: 122 | logger.error(f"An unexpected error occurred during initiation: {e}") 123 | raise 124 | -------------------------------------------------------------------------------- /src/nexusync/nexusync.py: -------------------------------------------------------------------------------- 1 | # src/nexusync/nexusync.py 2 | 3 | from .core.indexer import Indexer 4 | from .core.querier import Querier 5 | from .core.chat_engine import ChatEngine 6 | from .models.embedding_models import set_embedding_model 7 | from .models.language_models import set_language_model 8 | from nexusync.utils.logging_config import get_logger 9 | from typing import List, Dict, Any 10 | from dotenv import load_dotenv 11 | 12 | 13 | class NexuSync: 14 | def __init__( 15 | self, 16 | input_dirs: List[str], 17 | openai_model_yn: bool = None, 18 | language_model: str = None, 19 | base_url: str = None, 20 | embedding_model: str = None, 21 | temperature: float = 0.4, 22 | chroma_db_dir: str = "chroma_db", 23 | index_persist_dir: str = "index_storage", 24 | chroma_collection_name: str = "my_collection", 25 | chunk_size: int = 1024, 26 | chunk_overlap: int = 20, 27 | recursive: bool = True, 28 | ): 29 | load_dotenv() 30 | self.logger = get_logger("nexusync.NexuSync") 31 | self.input_dirs = input_dirs 32 | self.embedding_model = embedding_model 33 | self.language_model = language_model 34 | self.base_url = str(base_url) if base_url else None 35 | self.temperature = temperature 36 | self.chroma_db_dir = chroma_db_dir 37 | self.index_persist_dir = index_persist_dir 38 | self.chroma_collection_name = chroma_collection_name 39 | self.chunk_size = chunk_size 40 | self.chunk_overlap = chunk_overlap 41 | self.recursive = recursive 42 | self.openai_model_yn = openai_model_yn 43 | self._initialize_models() 44 | self.indexer = Indexer( 45 | input_dirs=self.input_dirs, 46 | recursive=self.recursive, 47 | chroma_db_dir=self.chroma_db_dir, 48 | index_persist_dir=self.index_persist_dir, 49 | chroma_collection_name=self.chroma_collection_name, 50 | chunk_size=self.chunk_size, 51 | chunk_overlap=self.chunk_overlap, 52 | ) 53 | self.logger.info("Vectors and Querier initialized successfully.") 54 | self.index_vector_store = self.indexer.initialize_index() 55 | 56 | # Initialize querier with the indexer 57 | self.querier = Querier(index=self.index_vector_store) 58 | 59 | # Initialize chat engine with the indexer 60 | self.chat_engine = ChatEngine(index=self.index_vector_store) 61 | 62 | def _initialize_models(self): 63 | # Initialize the embedding and language model 64 | if self.openai_model_yn: 65 | set_embedding_model(openai_model=self.embedding_model) 66 | set_language_model( 67 | openai_model=self.language_model, temperature=self.temperature 68 | ) 69 | 70 | else: 71 | set_embedding_model(huggingface_model=self.embedding_model) 72 | set_language_model( 73 | ollama_model=self.language_model, 74 | temperature=self.temperature, 75 | base_url=self.base_url, 76 | ) 77 | 78 | def initialize_stream_chat( 79 | self, 80 | text_qa_template: str, 81 | chat_mode: str = "context", 82 | similarity_top_k: int = 3, 83 | ): 84 | self.chat_engine.initialize_chat_engine( 85 | text_qa_template=text_qa_template, 86 | chat_mode=chat_mode, 87 | similarity_top_k=similarity_top_k, 88 | ) 89 | 90 | def start_chat_stream(self, query: str): 91 | if not self.chat_engine: 92 | raise ValueError( 93 | "Chat engine not initialized. Call initialize_stream_chat first." 94 | ) 95 | return self.chat_engine.chat_stream(query) 96 | 97 | def start_query( 98 | self, text_qa_template: str, query: str, similarity_top_k: int = 3 99 | ) -> Dict[str, Any]: 100 | """ 101 | Start a query using the initialized Querier. 102 | 103 | Args: 104 | text_qa_template (str): The template for the QA prompt. 105 | query (str): The query string. 106 | similarity_top_k (int, optional): Number of top similar documents to consider. Defaults to 3. 107 | 108 | Returns: 109 | Dict[str, Any]: A dictionary containing the response and metadata. 110 | 111 | Raises: 112 | ValueError: If the Querier is not initialized. 113 | """ 114 | if not self.querier: 115 | self.logger.error("Querier not initialized. Call initialize_vectors first.") 116 | raise ValueError("Querier not initialized. Call initialize_vectors first.") 117 | 118 | try: 119 | self.logger.info(f"Starting query: {query}") 120 | response = self.querier.query(text_qa_template, query, similarity_top_k) 121 | self.logger.info("Query completed successfully.") 122 | return response 123 | except Exception as e: 124 | self.logger.error( 125 | f"An error occurred during query: {str(e)}", exc_info=True 126 | ) 127 | return { 128 | "response": f"An error occurred while processing your request: {str(e)}", 129 | "metadata": {}, 130 | } 131 | 132 | def refresh_index(self): 133 | self.indexer.refresh() 134 | 135 | def get_index_stats(self): 136 | return self.indexer.get_index_stats() 137 | -------------------------------------------------------------------------------- /src/nexusync/core/chat_engine.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, List, Generator 2 | from llama_index.core import VectorStoreIndex, PromptTemplate 3 | from llama_index.core.memory import ChatMemoryBuffer 4 | from llama_index.core.postprocessor import ( 5 | SentenceEmbeddingOptimizer, 6 | KeywordNodePostprocessor, 7 | ) 8 | from nexusync.utils.logging_config import get_logger 9 | 10 | 11 | class ChatEngine: 12 | def __init__(self, index: VectorStoreIndex): 13 | """ 14 | Initialize the ChatEngine with a VectorStoreIndex. 15 | 16 | Args: 17 | index (VectorStoreIndex): The index to be used for querying in chat. 18 | """ 19 | self.logger = get_logger("nexusync.core.chat_engine") 20 | self.chat_engine = None 21 | self.chat_history = [] 22 | self.index = index 23 | 24 | def initialize_chat_engine( 25 | self, 26 | text_qa_template: str, 27 | chat_mode: str = "context", 28 | similarity_top_k: int = 3, 29 | ): 30 | """ 31 | Initialize the chat engine. 32 | 33 | Args: 34 | text_qa_template (str): The template for the QA prompt. 35 | chat_mode (str, optional): The mode for the chat engine. Defaults to 'context'. 36 | similarity_top_k (int, optional): Number of top similar documents to consider. Defaults to 3. 37 | """ 38 | qa_template = PromptTemplate(text_qa_template) 39 | memory = ChatMemoryBuffer.from_defaults(token_limit=3000) 40 | if not isinstance(self.index, VectorStoreIndex): 41 | raise ValueError("The index does not contain a valid VectorStoreIndex") 42 | 43 | self.chat_engine = self.index.as_chat_engine( 44 | memory=memory, 45 | chat_mode=chat_mode, 46 | text_qa_template=qa_template, 47 | similarity_top_k=similarity_top_k, 48 | node_postprocessors=[ 49 | SentenceEmbeddingOptimizer(percentile_cutoff=0.7), 50 | KeywordNodePostprocessor(required_keywords=[]), 51 | ], 52 | ) 53 | self.logger.info("Chat engine initialized") 54 | 55 | def chat(self, query: str) -> Dict[str, Any]: 56 | """ 57 | Process a query using the chat engine. 58 | 59 | Args: 60 | query (str): The user's query string. 61 | 62 | Returns: 63 | Dict[str, Any]: A dictionary containing the response and metadata. 64 | 65 | Raises: 66 | ValueError: If the chat engine is not initialized. 67 | """ 68 | if self.chat_engine is None: 69 | raise ValueError( 70 | "Chat engine not initialized. Call initialize_chat_engine first." 71 | ) 72 | 73 | try: 74 | response = self.chat_engine.chat(query) 75 | 76 | answer = str(response) 77 | metadata: Dict[str, List[Dict[str, Any]]] = {"sources": []} 78 | 79 | if hasattr(response, "source_nodes"): 80 | for node in response.source_nodes: 81 | source_info = { 82 | "source_text": node.node.get_text(), 83 | "metadata": node.node.metadata, 84 | } 85 | metadata["sources"].append(source_info) 86 | 87 | self.chat_history.append({"query": query, "response": answer}) 88 | 89 | return {"response": answer, "metadata": metadata} 90 | 91 | except Exception as e: 92 | self.logger.error(f"An error occurred during chat: {e}", exc_info=True) 93 | return { 94 | "response": f"An error occurred while processing your request: {str(e)}", 95 | "metadata": {}, 96 | } 97 | 98 | def chat_stream(self, query: str) -> Generator[str | Dict[str, Any], None, None]: 99 | if self.chat_engine is None: 100 | raise ValueError( 101 | "Chat engine not initialized. Call initialize_chat_engine first." 102 | ) 103 | 104 | try: 105 | response_stream = self.chat_engine.stream_chat(query) 106 | 107 | full_response = "" 108 | for token in response_stream.response_gen: 109 | full_response += token 110 | yield token # Yield each token as it's generated 111 | 112 | # After all tokens have been yielded, prepare and yield the final response with metadata 113 | metadata = {"sources": []} 114 | if hasattr(response_stream, "source_nodes"): 115 | for node in response_stream.source_nodes: 116 | source_info = { 117 | "source_text": node.node.get_text(), 118 | "metadata": node.node.metadata, 119 | } 120 | metadata["sources"].append(source_info) 121 | 122 | # Append to chat history 123 | self.chat_history.append({"query": query, "response": full_response}) 124 | 125 | # Yield the final response with metadata 126 | yield { 127 | "response": full_response, 128 | "metadata": metadata, 129 | } 130 | 131 | except Exception as e: 132 | self.logger.error( 133 | f"An error occurred during chat streaming: {e}", exc_info=True 134 | ) 135 | yield { 136 | "response": f"An error occurred while processing your request: {str(e)}", 137 | "metadata": {}, 138 | } 139 | 140 | def clear_chat_history(self): 141 | self.chat_history = [] 142 | self.logger.info("Chat history cleared") 143 | 144 | if hasattr(self.chat_engine, "memory") and self.chat_engine.memory is not None: 145 | self.chat_engine.memory.clear() 146 | self.logger.info("Chat engine memory cleared") 147 | 148 | def get_chat_history(self) -> List[Dict[str, str]]: 149 | """ 150 | Get the current chat history. 151 | 152 | Returns: 153 | List[Dict[str, str]]: A list of dictionaries containing queries and responses. 154 | """ 155 | return self.chat_history 156 | -------------------------------------------------------------------------------- /src/nexusync/core/indexer.py: -------------------------------------------------------------------------------- 1 | # src/core/indexer.py 2 | 3 | import os 4 | from typing import List, Optional, Dict, Any 5 | from llama_index.core import ( 6 | VectorStoreIndex, 7 | SimpleDirectoryReader, 8 | StorageContext, 9 | load_index_from_storage, 10 | ) 11 | 12 | from llama_index.vector_stores.chroma import ChromaVectorStore 13 | import chromadb 14 | from nexusync.utils.logging_config import get_logger 15 | from llama_index.core import Settings 16 | 17 | 18 | class Indexer: 19 | """ 20 | Indexer is responsible for managing the indexing operations, including creating, refreshing, 21 | and deleting documents from the index. It supports integration with Chroma for efficient similarity search. 22 | 23 | Attributes: 24 | input_dirs (List[str]): A list of directory paths containing documents to be indexed. 25 | recursive (bool): Indicates if subdirectories within input_dirs should be scanned for documents. 26 | chroma_db_dir (str): The directory where the Chroma database is stored. 27 | index_persist_dir (str): The directory where the index is persisted to disk for future use. 28 | chroma_collection_name (str): The name of the collection within the Chroma database. 29 | index (VectorStoreIndex): The current index instance, loaded or created during initialization. 30 | logger (logging.Logger): A logger instance for logging operations and errors. 31 | storage_context (StorageContext): The context for managing the storage and loading of the index. 32 | """ 33 | 34 | def __init__( 35 | self, 36 | input_dirs: List[str], 37 | recursive: bool = True, 38 | chroma_db_dir: str = "chroma_db", 39 | index_persist_dir: str = "index_storage", 40 | chroma_collection_name: str = "my_collection", 41 | chunk_size: int = 1024, # Default from llamaindex 42 | chunk_overlap: int = 20, # Default from llamaindex 43 | ): 44 | """ 45 | Initialize the Indexer with the given parameters. 46 | 47 | Args: 48 | input_dirs (List[str]): Directories containing documents to be indexed. 49 | recursive (bool, optional): Scan subdirectories if True. Defaults to True. 50 | chroma_db_dir (str, optional): Directory for Chroma database. Defaults to "chroma_db". 51 | index_persist_dir (str, optional): Directory to persist the index. Defaults to "index_storage". 52 | chroma_collection_name (str, optional): Name of the Chroma collection. Defaults to "my_collection". 53 | chunk_size (int, optional): Size of each text chunk. Defaults to 1024. 54 | chunk_overlap (int, optional): Overlap between chunks. Defaults to 20. 55 | 56 | Note: 57 | The __init__ method doesn't create the index immediately. Instead, it calls the _initiate method, 58 | which either loads an existing index or builds a new one. 59 | """ 60 | self.logger = get_logger("nexusync.core.indexer") # Use full logger name 61 | self.input_dirs = input_dirs 62 | self.recursive = recursive 63 | self.chroma_db_dir = chroma_db_dir 64 | self.index_persist_dir = index_persist_dir 65 | self.chroma_collection_name = chroma_collection_name 66 | self.chunk_size = chunk_size 67 | self.chunk_overlap = chunk_overlap 68 | self.index = None 69 | Settings.chunk_overlap = chunk_overlap 70 | Settings.chunk_size = chunk_size 71 | 72 | def initialize_index(self): 73 | """ 74 | Load an existing index from storage or create a new one if not found. 75 | 76 | Raises: 77 | ValueError: If no documents are found in the specified directories. 78 | """ 79 | 80 | try: 81 | self.storage_context = StorageContext.from_defaults( 82 | persist_dir=self.index_persist_dir 83 | ) 84 | self.index = load_index_from_storage(self.storage_context) 85 | self.logger.info("Index already built. Loading from disk.") 86 | except FileNotFoundError: 87 | self.logger.warning("Index not found. Building a new index.") 88 | self.document_list = [] 89 | total_files = 0 90 | for file_path in self.input_dirs: 91 | if not os.path.isdir(file_path): 92 | self.logger.error(f"Directory {file_path} does not exist.") 93 | raise ValueError(f"Directory {file_path} does not exist.") 94 | file_count = sum(len(files) for _, _, files in os.walk(file_path)) 95 | total_files += file_count 96 | documents = SimpleDirectoryReader( 97 | file_path, filename_as_id=True 98 | ).load_data() 99 | self.logger.info(f"Loaded {total_files} files from all directories.") 100 | self.document_list.extend(documents) 101 | 102 | self.index = VectorStoreIndex.from_documents(self.document_list) 103 | self.index.storage_context.persist(persist_dir=self.index_persist_dir) 104 | chroma_client = chromadb.PersistentClient(path=self.chroma_db_dir) 105 | chroma_collection = chroma_client.get_or_create_collection( 106 | self.chroma_collection_name 107 | ) 108 | vector_store = ChromaVectorStore(chroma_collection=chroma_collection) 109 | self.storage_context = StorageContext.from_defaults( 110 | persist_dir=self.index_persist_dir, vector_store=vector_store 111 | ) 112 | 113 | if not self.document_list: 114 | self.logger.error("No documents found to build the index.") 115 | raise ValueError("No documents found to build the index.") 116 | 117 | self.logger.info("Index Built.") 118 | except Exception as e: 119 | self.logger.error(f"An unexpected error occurred during initiation: {e}") 120 | raise 121 | return self.index 122 | 123 | def refresh(self): 124 | """ 125 | Refresh the index by performing incremental updates and deletions based on the current 126 | state of the files. 127 | 128 | Raises: 129 | RuntimeError: If an error occurs during the refresh process. 130 | """ 131 | self.logger.info("Starting index refresh process...") 132 | try: 133 | # Step 1: Collect current files 134 | current_files = set() 135 | for input_dir in self.input_dirs: 136 | for root, _, files in os.walk(input_dir): 137 | for file in files: 138 | current_files.add(os.path.abspath(os.path.join(root, file))) 139 | 140 | # Step 2: Perform upinsert (this will add new and update existing documents) 141 | self.upinsert() 142 | 143 | # Step 3: Perform delete (this will remove documents that no longer exist) 144 | self.delete(current_files) 145 | 146 | # Step 4: Verify and log the results 147 | updated_stats = self.get_index_stats() 148 | 149 | if updated_stats["num_documents"] != len(current_files): 150 | self.logger.warning( 151 | f"Mismatch between indexed documents ({updated_stats['num_documents']}) and files in directories ({len(current_files)})" 152 | ) 153 | 154 | except Exception as e: 155 | self.logger.error( 156 | f"An error occurred during index refresh: {e}", exc_info=True 157 | ) 158 | raise 159 | 160 | def upinsert(self): 161 | """ 162 | Upsert (update or insert) documents into the index based on changes or new additions. 163 | 164 | Raises: 165 | RuntimeError: If an error occurs while performing the upinsert operation. 166 | """ 167 | total_documents = 0 168 | total_refreshed = 0 169 | 170 | for input_dir in self.input_dirs: 171 | self.logger.info(f"Processing directory: {input_dir}") 172 | documents = SimpleDirectoryReader( 173 | input_dir, recursive=self.recursive, filename_as_id=True 174 | ).load_data() 175 | total_documents += len(documents) 176 | loaded_file_count = self.get_index_stats()["num_documents"] 177 | self.logger.info(f"Loaded {loaded_file_count} files from {input_dir}") 178 | 179 | refreshed_docs = self.index.refresh_ref_docs(documents) 180 | num_refreshed = sum(1 for r in refreshed_docs if r) 181 | total_refreshed += num_refreshed 182 | 183 | if num_refreshed == 0: 184 | self.logger.info(f"No files were modified or added in {input_dir}") 185 | else: 186 | for doc, is_refreshed in zip(documents, refreshed_docs): 187 | if is_refreshed: 188 | doc_path = doc.metadata.get("file_path", "Unknown path") 189 | self.logger.info(f"Updated file: {doc_path}") 190 | 191 | if total_refreshed == 0: 192 | self.logger.info("No files were modified or added in any directory") 193 | else: 194 | self.logger.info(f"Total files modified or added: {total_refreshed}") 195 | 196 | def delete(self, current_files: set): 197 | """Delete documents from the index if their corresponding files have been deleted from the filesystem.""" 198 | ref_doc_info = self.index.ref_doc_info 199 | deleted_docs = [] 200 | 201 | for doc_id, info in ref_doc_info.items(): 202 | file_path = info.metadata.get("file_path") 203 | if file_path and os.path.abspath(file_path) not in current_files: 204 | self.logger.info(f"Deleted file: {file_path}") 205 | deleted_docs.append(doc_id) 206 | 207 | if deleted_docs: 208 | self.logger.info(f"Deleting {len(deleted_docs)} chunks from the index.") 209 | for doc_id in deleted_docs: 210 | self.index.delete_ref_doc(doc_id, delete_from_docstore=True) 211 | self.logger.info("Deletion process completed.") 212 | else: 213 | self.logger.info("No deleted files found.") 214 | 215 | def get_index_stats(self) -> Dict[str, Any]: 216 | """Get statistics about the current index.""" 217 | # Count unique file paths in the index 218 | unique_files = set() 219 | for doc_id, info in self.index.ref_doc_info.items(): 220 | file_path = info.metadata.get("file_path") 221 | if file_path: 222 | unique_files.add(file_path) 223 | 224 | return { 225 | "num_documents": len(unique_files), # Count of unique documents 226 | "num_nodes": len(self.index.ref_doc_info), # Total number of nodes 227 | "index_persist_dir": self.index_persist_dir, 228 | "chroma_db_dir": self.chroma_db_dir, 229 | "chroma_collection_name": self.chroma_collection_name, 230 | } 231 | -------------------------------------------------------------------------------- /back_end_api.py: -------------------------------------------------------------------------------- 1 | # back_end_api.py 2 | from flask import Flask, request, jsonify, Response, send_from_directory 3 | import json 4 | import logging 5 | from nexusync import NexuSync, rebuild_index 6 | 7 | app = Flask(__name__) 8 | 9 | # Configure logging 10 | logging.basicConfig(level=logging.DEBUG) 11 | 12 | # Configuration Parameters 13 | # For non-openai model: 14 | # OPENAI_MODEL_YN = False 15 | # EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5" 16 | # LANGUAGE_MODEL = "llama3.2" 17 | 18 | # For openai model: need to create .env in the src folder to include OPENAI_API_KEY = 'sk-xxx' 19 | OPENAI_MODEL_YN = True 20 | EMBEDDING_MODEL = "text-embedding-3-large" 21 | LANGUAGE_MODEL = "gpt-4o-mini" 22 | TEMPERATURE = 0.4 23 | INPUT_DIRS = ["sample_docs/"] # Can include multiple paths 24 | CHROMA_DB_DIR = "chroma_db" 25 | INDEX_PERSIST_DIR = "index_storage" 26 | CHROMA_COLLECTION_NAME = "my_collection" 27 | CHUNK_SIZE = 1024 28 | CHUNK_OVERLAP = 20 29 | RECURSIVE = True 30 | 31 | 32 | # Define the QA Prompt Template 33 | text_qa_template = """ 34 | Context Information: 35 | {context_str} 36 | Query: {query_str} 37 | Instructions: 38 | You are helping NHS doctors to review patients' medical records and give interperetations on the results. 39 | Carefully read the context information and the query. 40 | If the query is in the format [patient_id, summary_report], generate a summary report using the template below. 41 | Use the available information from the context to fill in each section. 42 | Include relevant dates and timeline information in each section. 43 | If information for a section is not available, state "No information available" for that section. 44 | Provide concise and accurate information based on the given context. 45 | Adapt the template as needed to fit the patient's specific medical history and conditions. 46 | 47 | Summary Report Template: 48 | 49 | Patient Summary Report for {patient_id} 50 | 1. Demographics 51 | 52 | Name: [First Name] [Last Name] 53 | Date of Birth: [DOB] 54 | Gender: [Gender] 55 | Contact Information: 56 | 57 | Address: [Address] 58 | Phone: [Phone Number] 59 | Email: [Email Address] 60 | 61 | 62 | 63 | 2. Past Medical History & Procedures 64 | 65 | Chronic Conditions: [List of chronic conditions with diagnosis dates] 66 | Major Illnesses: [List of major illnesses with dates] 67 | Surgical Procedures: [List of surgical procedures with dates] 68 | Other Significant Medical Events: [List with dates] 69 | Your interpretation: [Your interpretation of the medical records] 70 | 71 | 3. Medication History 72 | [List each current medication with the following information] 73 | 74 | Name: [Medication Name] 75 | Dosage: [Dosage] 76 | Frequency: [Frequency] 77 | Start Date: [Start Date] 78 | Prescriber: [Prescriber Name] 79 | Purpose: [Brief description of why the medication is prescribed] 80 | 81 | [Include a brief list of significant past medications, if available] 82 | 4. Allergies and Adverse Reactions 83 | 84 | Medication Allergies: [List or "No known medication allergies"] 85 | Other Allergies: [List or "No known other allergies"] 86 | Adverse Reactions: [List any significant adverse reactions to treatments or medications] 87 | 88 | 5. Social History & Occupation 89 | 90 | Occupation: [Current or most recent occupation] 91 | Smoking Status: [Current smoker, former smoker, never smoker] 92 | Alcohol Use: [Description of alcohol use] 93 | Recreational Drug Use: [If applicable] 94 | Exercise Habits: [Brief description] 95 | Diet: [Any significant dietary information] 96 | Other Relevant Social Factors: [e.g., living situation, support system] 97 | Your interpretation: [Your interpretation of the social history] 98 | 99 | 6. Physical Examination & Vital Signs 100 | Most Recent Vital Signs (Date: [Date of most recent vital signs]) 101 | 102 | Blood Pressure: [BP] 103 | Heart Rate: [HR] 104 | Respiratory Rate: [RR] 105 | Temperature: [Temp] 106 | Oxygen Saturation: [O2 Sat] 107 | Weight: [Weight] 108 | Height: [Height] 109 | BMI: [BMI] 110 | Your interpretation: [Your interpretation of the vital signs] 111 | [Include any significant physical examination findings] 112 | 113 | 7. Laboratory Results 114 | [List most recent significant laboratory tests with dates, results, and normal ranges] 115 | 116 | 8. Imaging and Diagnostic Results 117 | [List recent imaging studies and other diagnostic tests with dates and summary of results] 118 | 119 | 9. Treatment Plan and Interventions 120 | 121 | Current Treatment Plans: [List current treatments or interventions] 122 | Ongoing Therapies: [e.g., physical therapy, chemotherapy, dialysis] 123 | Recent Changes in Management: [Any recent significant changes in treatment] 124 | Your interpretation: [Your interpretation of the treatment plan] 125 | 126 | 10. Immunizations 127 | [List relevant immunizations with dates] 128 | 129 | 11. Upcoming Appointments and Follow-ups 130 | [List any scheduled appointments with dates, types, and locations] 131 | 132 | 133 | Answer: [Generate the report based on the template above, filling in the available information from the context] 134 | 135 | Answer: """ 136 | 137 | ns = NexuSync( 138 | input_dirs=INPUT_DIRS, 139 | openai_model_yn=OPENAI_MODEL_YN, 140 | embedding_model=EMBEDDING_MODEL, 141 | language_model=LANGUAGE_MODEL, 142 | temperature=TEMPERATURE, 143 | chroma_db_dir=CHROMA_DB_DIR, 144 | index_persist_dir=INDEX_PERSIST_DIR, 145 | chroma_collection_name=CHROMA_COLLECTION_NAME, 146 | chunk_overlap=CHUNK_OVERLAP, 147 | chunk_size=CHUNK_SIZE, 148 | recursive=RECURSIVE, 149 | ) 150 | 151 | 152 | # Initialize the Chat Engine Once 153 | ns.initialize_stream_chat( 154 | text_qa_template=text_qa_template, chat_mode="context", similarity_top_k=3 155 | ) 156 | 157 | 158 | # Root Route - Serve the index.html file 159 | @app.route("/") 160 | def index(): 161 | return send_from_directory(".", "index.html") 162 | 163 | 164 | @app.route("/chat", methods=["POST"]) 165 | def chat(): 166 | data = request.get_json() 167 | if not data or "message" not in data: 168 | return jsonify({"error": "Invalid request. 'message' field is required."}), 400 169 | 170 | user_input = data["message"] 171 | 172 | def generate_response(): 173 | try: 174 | source_file_paths = [] 175 | response_generator = ns.chat_engine.chat_stream(user_input) 176 | 177 | for item in response_generator: 178 | if isinstance(item, str): 179 | # Stream individual tokens 180 | yield json.dumps({"response": item}) + "\n" 181 | elif isinstance(item, dict): 182 | # Final response with metadata 183 | metadata = item.get("metadata", {}) 184 | sources = metadata.get("sources", []) 185 | 186 | # Extract source file paths 187 | for source in sources: 188 | metadata_info = source.get("metadata", {}) 189 | file_path = metadata_info.get("file_path", "Unknown source") 190 | source_file_paths.append(file_path) 191 | 192 | # Remove duplicates while preserving order 193 | source_file_paths = list(dict.fromkeys(source_file_paths)) 194 | 195 | # Format the source file paths 196 | if source_file_paths: 197 | sources_formatted = "\n".join( 198 | f"- {path}" for path in source_file_paths 199 | ) 200 | yield json.dumps( 201 | {"sources": sources_formatted, "final": True} 202 | ) + "\n" 203 | else: 204 | yield json.dumps( 205 | {"sources": "No sources found", "final": True} 206 | ) + "\n" 207 | 208 | except Exception as e: 209 | logging.error(f"Error in chat endpoint: {e}", exc_info=True) 210 | yield json.dumps( 211 | {"error": f"An error occurred while processing your request: {str(e)}"} 212 | ) + "\n" 213 | 214 | return Response(generate_response(), mimetype="application/json") 215 | 216 | 217 | @app.route("/rebuild_index", methods=["POST"]) 218 | def rebuild_index_route(): 219 | global ns, EMBEDDING_MODEL, LANGUAGE_MODEL, TEMPERATURE, INPUT_DIRS 220 | 221 | data = request.get_json() 222 | if not data: 223 | return jsonify({"error": "No data provided"}), 400 224 | 225 | try: 226 | # Update global variables 227 | EMBEDDING_MODEL = data.get("embedding_model", EMBEDDING_MODEL) 228 | LANGUAGE_MODEL = data.get("llm_model", LANGUAGE_MODEL) 229 | TEMPERATURE = data.get("temperature", TEMPERATURE) 230 | INPUT_DIRS = data.get("input_dirs", INPUT_DIRS) 231 | 232 | # Rebuild index 233 | rebuild_index( 234 | input_dirs=INPUT_DIRS, 235 | openai_model_yn=OPENAI_MODEL_YN, 236 | embedding_model=EMBEDDING_MODEL, 237 | language_model=LANGUAGE_MODEL, 238 | temperature=TEMPERATURE, 239 | chroma_db_dir=CHROMA_DB_DIR, 240 | index_persist_dir=INDEX_PERSIST_DIR, 241 | chroma_collection_name=CHROMA_COLLECTION_NAME, 242 | chunk_overlap=CHUNK_OVERLAP, 243 | chunk_size=CHUNK_SIZE, 244 | recursive=RECURSIVE, 245 | ) 246 | 247 | # Reinitialize NexuSync 248 | ns = NexuSync( 249 | input_dirs=INPUT_DIRS, 250 | openai_model_yn=OPENAI_MODEL_YN, 251 | embedding_model=EMBEDDING_MODEL, 252 | language_model=LANGUAGE_MODEL, 253 | temperature=TEMPERATURE, 254 | chroma_db_dir=CHROMA_DB_DIR, 255 | index_persist_dir=INDEX_PERSIST_DIR, 256 | chroma_collection_name=CHROMA_COLLECTION_NAME, 257 | chunk_overlap=CHUNK_OVERLAP, 258 | chunk_size=CHUNK_SIZE, 259 | recursive=RECURSIVE, 260 | ) 261 | 262 | # Reinitialize the chat engine 263 | ns.initialize_stream_chat( 264 | text_qa_template=text_qa_template, chat_mode="context", similarity_top_k=3 265 | ) 266 | 267 | return jsonify({"status": "Index rebuilt successfully"}), 200 268 | except Exception as e: 269 | app.logger.error(f"Error rebuilding index: {e}", exc_info=True) 270 | return jsonify({"error": str(e)}), 500 271 | 272 | 273 | @app.route("/reset_chat", methods=["POST"]) 274 | def reset_chat(): 275 | try: 276 | ns.chat_engine.clear_chat_history() 277 | return jsonify({"status": "Chat history cleared successfully."}), 200 278 | except Exception as e: 279 | logging.error(f"Error resetting chat history: {e}", exc_info=True) 280 | return jsonify({"error": f"An error occurred: {str(e)}"}), 500 281 | 282 | 283 | @app.route("/refresh_index", methods=["POST"]) 284 | def refresh_index(): 285 | try: 286 | ns.indexer.refresh() 287 | return jsonify({"status": "Index refreshed successfully."}), 200 288 | except Exception as e: 289 | logging.error(f"Error refreshing index: {e}", exc_info=True) 290 | return jsonify({"error": f"An error occurred: {str(e)}"}), 500 291 | 292 | 293 | if __name__ == "__main__": 294 | # Run the Flask app 295 | app.run(host="0.0.0.0", port=2024, debug=True) 296 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [pypi](https://pypi.org/project/nexusync/)
2 | [GitHub](https://github.com/Zakk-Yang/nexusync)
3 | [![Downloads](https://static.pepy.tech/badge/nexusync)](https://pepy.tech/project/nexusync) 4 | 5 |

6 | NexuSync Logo 7 |

8 | 9 | 10 | Newest version = 0.3.6: torch package needs to be installed seperately to make sure your system env matches; 11 | 12 | Development Plan for the next version: 13 | - Adding PDF OCF using ollama llama3.2 vision 14 | 15 | 16 | # NexuSync 17 | 18 | *NexuSync* is a lightweight yet powerful library for building Retrieval-Augmented Generation (RAG) systems, built on top of **LlamaIndex**. It offers a simple and user-friendly interface for developers to configure and deploy RAG systems efficiently. Choose between using the **Ollama LLM** model for offline, privacy-focused applications or the **OpenAI API** for a hosted solution. 19 | 20 | --- 21 | 22 | ## 🚀 Features 23 | 24 | - **Lightweight Design**: Simplify the integration and configuration of RAG systems without unnecessary complexity. 25 | - **User-Friendly Interface**: Intuitive APIs and clear documentation make setup a breeze. 26 | - **Flexible Document Indexing**: Automatically index documents from specified directories, keeping your knowledge base up-to-date. 27 | - **Efficient Querying**: Use natural language to query your document collection and get relevant answers quickly. 28 | - **Conversational Interface**: Engage in chat-like interactions for more intuitive information retrieval. 29 | - **Customizable Embedding Options**: Choose between HuggingFace Embedding models or OpenAI's offerings. 30 | - **Incremental Updates**: Easily update and insert new documents into the index or delete the index for removed documents. 31 | - **Automatic Deletion Handling**: Documents removed from the filesystem are automatically removed from the index. 32 | - **Extensive File Format Support**: Supports multiple file formats including `.csv`, `.docx`, `.epub`, `.hwp`, `.ipynb`, `.mbox`, `.md`, `.pdf`, `.png`, `.ppt`, `.pptm`, `.pptx`, `.json`, and more. 33 | 34 | 35 | --- 36 | 37 | 38 | ## 🛠 Prerequisites 39 | - Python 3.10 or higher 40 | - Install Pytorch, please visit https://pytorch.org/get-started/locally/ 41 | - Install Ollama: https://ollama.com/download or OpenAI API (need to create .env file to include OPENAI_API_KEY = 'sk-xxx') 42 | - Suggested to use conda for your env control to avoid enviroment conflicts: 43 | 44 | **Install `conda` for WSL2 (Windows Subsystem for Linux 2)**: 45 | 1. Open your WSL2 terminal 46 | 2. Download the Miniconda installer: 47 | `wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh` 48 | 3. Run the installer: 49 | `bash Miniconda3-latest-Linux-x86_64.sh` 50 | 4. Follow the prompts to complete the installation 51 | 5. Restart your terminal or run source ~/.bashrc 52 | 53 | **Install `conda` for Windows**: 54 | 1. Download the Miniconda installer for Windows from https://docs.conda.io/en/latest/miniconda.html 55 | 2. Run the .exe file and follow the installation prompts 56 | 3. Choose whether to add Conda to your PATH environment variable during installation 57 | 58 | **Install `conda` for Linux**: 59 | 1. Open a terminal 60 | 2. Download the Miniconda installer 61 | `wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh` 62 | 3. Run the installer: 63 | `bash Miniconda3-latest-Linux-x86_64.sh` 64 | 4.Follow the prompts to complete the installation 65 | 5. Restart your terminal or run `source ~/.bashrc` 66 | 67 | **Install `conda` for macOS**: 68 | 1. Open a terminal 69 | 2. Download the Miniconda installer 70 | `curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh` 71 | 3. Run the installer: 72 | `bash Miniconda3-latest-MacOSX-x86_64.sh` 73 | 4.Follow the prompts to complete the installation 74 | 5. Restart your terminal or run `source ~/.bash_profile` 75 | 76 | **After installation on any platform, verify the installation by running**: 77 | `conda --version` 78 | 79 | --- 80 | 81 | 82 | ## 📦Installation 83 | 1. Use conda to create env in your project folder: 84 | ```bash 85 | conda create env --name python=3.10 86 | conda activate 87 | ``` 88 | 89 | 2. Then, install NexuSync under your conda env, run the following command: 90 | 91 | ```bash 92 | pip install nexusync 93 | ``` 94 | Or `git clone https://github.com/Zakk-Yang/nexusync.git` 95 | 96 | 97 | 3. Install pytorch (https://pytorch.org/get-started/locally/): 98 | - If you are using cuda, make sure your cuda version matches: 99 | - For CUDA 11.8 (example, for windows and wsl2/linux) 100 | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` 101 | - For CUDA 12.1 (example, for windows and wsl2/linux) 102 | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121` 103 | - For macOS 104 | `pip3 install torch torchvision torchaudio` 105 | 106 | --- 107 | 108 | 109 | ## 🌟 Quick Start 110 | 111 | Here's how you can get started with NexuSync: 112 | 113 | 1. ### Import NexuSync 114 | ```python 115 | from nexusync import NexuSync 116 | ``` 117 | 2. ### Choose Your Model 118 | ##### **Option A: Using OpenAI Model** 119 | ```python 120 | #------- Use OpenAI Model ------- 121 | # Customize your parameters for openai model, create .env file in the project folder to include OPENAI_API_KEY = 'sk-xxx' 122 | OPENAI_MODEL_YN = True 123 | EMBEDDING_MODEL = "text-embedding-3-large" 124 | LANGUAGE_MODEL = "gpt-4o-mini" 125 | TEMPERATURE = 0.4 # range from 0 to 1, higher means higher creativitiy level 126 | CHROMA_DB_DIR = 'chroma_db' # Your path to the chroma db 127 | INDEX_PERSIST_DIR = 'index_storage' # Your path to the index storage 128 | CHROMA_COLLECTION_NAME = 'my_collection' 129 | INPUT_DIRS = ["../sample_docs"] # can specify multiple document paths 130 | CHUNK_SIZE = 1024 # Size of text chunks for creating embeddings 131 | CHUNK_OVERLAP = 20 # Overlap between text chunks to maintain context 132 | RECURSIVE = True # Recursive or not under one folder 133 | ``` 134 | 135 | ##### **Option B: Using Ollama Model** 136 | ```python 137 | #------- Use Ollama Model ------- 138 | # Customize your parameters for ollama model 139 | OPENAI_MODEL_YN = False # if False, you will use ollama model 140 | EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5" # suggested embedding model, you can replace with any HuggingFace embedding models 141 | LANGUAGE_MODEL = 'llama3.2' # you need to download ollama model first, please check https://ollama.com/download 142 | BASE_URL = "http://localhost:11434" # you can swith to different base_url for Ollama model 143 | TEMPERATURE = 0.4 # range from 0 to 1, higher means higher creativitiy level 144 | CHROMA_DB_DIR = 'chroma_db' # Your path to the chroma db 145 | INDEX_PERSIST_DIR = 'index_storage' # Your path to the index storage 146 | CHROMA_COLLECTION_NAME = 'my_collection' 147 | INPUT_DIRS = ["../sample_docs"] # can specify multiple document paths 148 | CHUNK_SIZE = 1024 # Size of text chunks for creating embeddings 149 | CHUNK_OVERLAP = 20 # Overlap between text chunks to maintain context 150 | RECURSIVE = True # Recursive or not under one folder 151 | ``` 152 | 153 | ### 3. Initialize Vector DB 154 | ```python 155 | # example for Ollama Model 156 | ns = NexuSync(input_dirs=INPUT_DIRS, 157 | openai_model_yn=False, 158 | embedding_model=EMBEDDING_MODEL, 159 | language_model=LANGUAGE_MODEL, 160 | base_url = BASE_URL, # OpenAI model does not need base_url, here we use Ollama Model as an example 161 | temperature=TEMPERATURE, 162 | chroma_db_dir = CHROMA_DB_DIR, 163 | index_persist_dir = INDEX_PERSIST_DIR, 164 | chroma_collection_name=CHROMA_COLLECTION_NAME, 165 | chunk_overlap=CHUNK_OVERLAP, 166 | chunk_size=CHUNK_SIZE, 167 | recursive=RECURSIVE 168 | ) 169 | ``` 170 | 171 | ### 4. Start Quering (quick quering with no memory) 172 | ```python 173 | #------- Start Quering (one-time, no memory and without stream chat) ----- 174 | query = "main result of the paper can llm generate novltive ideas" 175 | 176 | text_qa_template = """ 177 | Context Information: 178 | -------------------- 179 | {context_str} 180 | -------------------- 181 | 182 | Query: {query_str} 183 | 184 | Instructions: 185 | 1. Carefully read the context information and the query. 186 | 2. Think through the problem step by step. 187 | 3. Provide a concise and accurate answer based on the given context. 188 | 4. If the answer cannot be determined from the context, state "Based on the given information, I cannot provide a definitive answer." 189 | 5. If you need to make any assumptions, clearly state them. 190 | 6. If relevant, provide a brief explanation of your reasoning. 191 | 192 | Answer: """ 193 | 194 | response = ns.start_query(text_qa_template = text_qa_template, query = query ) 195 | 196 | print(f"Query: {query}") 197 | print(f"Response: {response['response']}") 198 | print(f"Response: {response['metadata']}") 199 | ``` 200 | 201 | ### 5. Engage in Stream Chat (token by token output, with Memory) 202 | ```python 203 | # First, initalize the stream chat engine 204 | ns.initialize_stream_chat( 205 | text_qa_template=text_qa_template, 206 | chat_mode="context", 207 | similarity_top_k=3 208 | ) 209 | 210 | query = "main result of the paper can llm generate novltive ideas" 211 | 212 | for item in ns.start_chat_stream(query): 213 | if isinstance(item, str): 214 | # This is a token, print or process as needed 215 | print(item, end='', flush=True) 216 | else: 217 | # This is the final response with metadata 218 | print("\n\nFull response:", item['response']) 219 | print("Metadata:", item['metadata']) 220 | break 221 | ``` 222 | 223 | ### 6. Access Chat History (for stream chat) 224 | ```python 225 | chat_history = ns.chat_engine.get_chat_history() 226 | print("Chat History:") 227 | for entry in chat_history: 228 | print(f"Human: {entry['query']}") 229 | print(f"AI: {entry['response']}\n") 230 | ``` 231 | 232 | ### 7. Incrementally Refresh Index 233 | ```python 234 | #------- Incrementaly Refresh Index without Rebuilding it ----- 235 | # If you have files modified, inserted or deleted, you don't need to rebuild all the index 236 | ns.refresh_index() 237 | ``` 238 | ### 8. Rebuild Index From Scratch 239 | ```python 240 | #------- Rebuild Index ----- 241 | # Rebuild the index when either of the following is changed: 242 | # - openai_model_yn 243 | # - embedding_model 244 | # - language_model 245 | # - base_url 246 | # - chroma_db_dir 247 | # - index_persist_dir 248 | # - chroma_collection_name 249 | # - chunk_overlap 250 | # - chunk_size 251 | # - recursive 252 | 253 | from nexusync import rebuild_index 254 | from nexusync import NexuSync 255 | 256 | OPENAI_MODEL_YN = True # if False, you will use ollama model 257 | EMBEDDING_MODEL = "text-embedding-3-large" # suggested embedding model 258 | LANGUAGE_MODEL = 'gpt-4o-mini' # you need to download ollama model first, please check https://ollama.com/download 259 | TEMPERATURE = 0.4 # range from 0 to 1, higher means higher creativitiy level 260 | CHROMA_DB_DIR = 'chroma_db' 261 | INDEX_PERSIST_DIR = 'index_storage' 262 | CHROMA_COLLECTION_NAME = 'my_collection' 263 | INPUT_DIRS = ["../sample_docs"] # can specify multiple document paths 264 | CHUNK_SIZE = 1024 265 | CHUNK_OVERLAP = 20 266 | RECURSIVE = True 267 | 268 | # Assume we changed the model from Ollama to OPENAI 269 | rebuild_index(input_dirs=INPUT_DIRS, 270 | openai_model_yn=OPENAI_MODEL_YN, 271 | embedding_model=EMBEDDING_MODEL, 272 | language_model=LANGUAGE_MODEL, 273 | temperature=TEMPERATURE, 274 | chroma_db_dir = CHROMA_DB_DIR, 275 | index_persist_dir = INDEX_PERSIST_DIR, 276 | chroma_collection_name=CHROMA_COLLECTION_NAME, 277 | chunk_overlap=CHUNK_OVERLAP, 278 | chunk_size=CHUNK_SIZE, 279 | recursive=RECURSIVE 280 | ) 281 | 282 | # Reinitiate the ns after rebuilding the index 283 | ns = NexuSync(input_dirs=INPUT_DIRS, 284 | openai_model_yn=OPENAI_MODEL_YN, 285 | embedding_model=EMBEDDING_MODEL, 286 | language_model=LANGUAGE_MODEL, 287 | temperature=TEMPERATURE, 288 | chroma_db_dir = CHROMA_DB_DIR, 289 | index_persist_dir = INDEX_PERSIST_DIR, 290 | chroma_collection_name=CHROMA_COLLECTION_NAME, 291 | chunk_overlap=CHUNK_OVERLAP, 292 | chunk_size=CHUNK_SIZE, 293 | recursive=RECURSIVE 294 | ) 295 | 296 | # Test the new built index 297 | query = "main result of the paper can llm generate novltive ideas" 298 | 299 | text_qa_template = """ 300 | Context Information: 301 | -------------------- 302 | {context_str} 303 | -------------------- 304 | 305 | Query: {query_str} 306 | 307 | Instructions: 308 | 1. Carefully read the context information and the query. 309 | 2. Think through the problem step by step. 310 | 3. Provide a concise and accurate answer based on the given context. 311 | 4. If the answer cannot be determined from the context, state "Based on the given information, I cannot provide a definitive answer." 312 | 5. If you need to make any assumptions, clearly state them. 313 | 6. If relevant, provide a brief explanation of your reasoning. 314 | 315 | Answer: """ 316 | 317 | 318 | response = ns.start_query(text_qa_template = text_qa_template, query = query ) 319 | 320 | print(f"Query: {query}") 321 | print(f"Response: {response['response']}") 322 | print(f"Response: {response['metadata']}") 323 | ``` 324 | --- 325 | 326 | ## 🎯 User Interface 327 | 1. git clone or download this project: 328 | ```bash 329 | git clone https://github.com/Zakk-Yang/nexusync.git 330 | ``` 331 | 2. Configure Backend 332 | - Open back_end_api.py in your IDE. 333 | - Adjust the parameters according to your requirements. 334 | 335 | 3. Open the terminal and run 336 | ``` 337 | python back_end_api.py 338 | ``` 339 | Ensure that the parameters in `back_end_api.py` align with the settings in the side panel of the interface. If not, copy and paste your desired Embedding Model and Language Model in the side panel and click "Apply Settings". 340 | 341 | 4. Start interacting with your data! 342 | 343 |

344 | Screen Shot 345 |

346 | 347 | --- 348 | 349 | ## 📚 Documentation & Examples 350 | For more detailed usage examples, check out the demo notebooks. 351 | 352 | --- 353 | 354 | ## 📝 License 355 | This project is licensed under the MIT License - see the LICENSE file for details. 356 | 357 | --- 358 | 359 | ## 📫 Contact 360 | For questions or suggestions, feel free to open an issue or contact the maintainer: 361 | 362 | Name: Zakk Yang 363 | Email: zakkyang@hotmail.com 364 | GitHub: Zakk-Yang 365 | 366 | --- 367 | 368 | ## 🌟 Support 369 | If you find this project helpful, please give it a ⭐ on [GitHub](https://github.com/Zakk-Yang/nexusync)! Your support is appreciated. 370 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | NexuSync Chat Interface 9 | 217 | 218 | 219 |
220 |
221 |
222 |

Settings

223 |
224 | 225 | 226 |
227 |
228 | 229 | 230 |
231 |
232 | 233 | 234 |
235 |
236 | 237 | 238 |
239 | 240 |
241 |
242 |
243 |
244 |
245 | 246 | NexuSync Chat 247 |
248 |
249 | 250 | 251 |
252 |
253 |
254 | 255 |
256 |
257 | 258 | 259 |
260 |
261 |
262 |
263 | 264 | 509 | 510 | -------------------------------------------------------------------------------- /notebooks/data_structure_generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Sample json generator for the NHS application" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "'health_records.json'" 19 | ] 20 | }, 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "output_type": "execute_result" 24 | } 25 | ], 26 | "source": [ 27 | "import json\n", 28 | "from datetime import datetime\n", 29 | "import random\n", 30 | "\n", 31 | "# Sample JSON structure correction and creation of 10 patients with datetime and location_name\n", 32 | "\n", 33 | "# Function to generate random dates for testing purposes\n", 34 | "def random_date(start, end):\n", 35 | " return start + (end - start) * random.random()\n", 36 | "\n", 37 | "# Clinic locations\n", 38 | "clinic_names = [\n", 39 | " \"NHS Springfield Clinic\", \"City Health Centre\", \"Riverbend Medical Centre\",\n", 40 | " \"Hillside Clinic\", \"Springfield Wellness Center\", \"Pinewood Clinic\",\n", 41 | " \"Downtown Medical Facility\", \"Riverside Clinic\", \"Green Valley Medical\",\n", 42 | " \"Maple Grove Health Center\"\n", 43 | "]\n", 44 | "\n", 45 | "# Base patient data structure\n", 46 | "def generate_patient(patient_id):\n", 47 | " patient = {\n", 48 | " \"demographics\": {\n", 49 | " \"patient_id\": patient_id,\n", 50 | " \"first_name\": f\"Patient{patient_id}\",\n", 51 | " \"last_name\": \"Doe\",\n", 52 | " \"date_of_birth\": random_date(datetime(1970, 1, 1), datetime(2000, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 53 | " \"gender\": random.choice([\"Female\", \"Male\"]),\n", 54 | " \"contact_information\": {\n", 55 | " \"address\": f\"{random.randint(100, 999)} Elm Street, Springfield, IL, 62704\",\n", 56 | " \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\",\n", 57 | " \"email\": f\"patient{patient_id}@example.com\"\n", 58 | " }\n", 59 | " },\n", 60 | " \"emergency_contact\": {\n", 61 | " \"name\": f\"Spouse of Patient{patient_id}\",\n", 62 | " \"relationship\": \"Spouse\",\n", 63 | " \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\"\n", 64 | " },\n", 65 | " \"medical_history\": [\n", 66 | " {\n", 67 | " \"condition\": \"Hypertension\",\n", 68 | " \"diagnosis_date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 69 | " \"notes\": \"Managing blood pressure with medication and lifestyle changes.\"\n", 70 | " },\n", 71 | " {\n", 72 | " \"condition\": \"Type 2 Diabetes Mellitus\",\n", 73 | " \"diagnosis_date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 74 | " \"notes\": \"Controlled through diet, exercise, and medication.\"\n", 75 | " }\n", 76 | " ],\n", 77 | " \"medications\": [\n", 78 | " {\n", 79 | " \"medication_id\": f\"M{random.randint(1000, 9999)}\",\n", 80 | " \"name\": \"Lisinopril\",\n", 81 | " \"dosage\": \"10 mg\",\n", 82 | " \"frequency\": \"Once daily\",\n", 83 | " \"start_date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 84 | " \"end_date\": None,\n", 85 | " \"prescriber\": \"Dr. Emily Smith\",\n", 86 | " \"location_name\": random.choice(clinic_names)\n", 87 | " }\n", 88 | " ],\n", 89 | " \"immunizations\": [\n", 90 | " {\n", 91 | " \"immunization_id\": f\"I{random.randint(1000, 9999)}\",\n", 92 | " \"vaccine\": \"Influenza\",\n", 93 | " \"date_administered\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 94 | " \"administered_by\": \"NHS Clinic\",\n", 95 | " \"location_name\": random.choice(clinic_names)\n", 96 | " }\n", 97 | " ],\n", 98 | " \"blood_test_results\": [\n", 99 | " {\n", 100 | " \"lab_id\": f\"L{random.randint(1000, 9999)}\",\n", 101 | " \"test_name\": \"Complete Blood Count (CBC)\",\n", 102 | " \"date\": random_date(datetime(2010, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 103 | " \"results\": {\n", 104 | " \"Hemoglobin\": \"13.5 g/dL\",\n", 105 | " \"White Blood Cells\": \"6.2 x10^3/µL\",\n", 106 | " \"Platelets\": \"250 x10^3/µL\"\n", 107 | " },\n", 108 | " \"normal_ranges\": {\n", 109 | " \"Hemoglobin\": \"12-16 g/dL\",\n", 110 | " \"White Blood Cells\": \"4-11 x10^3/µL\",\n", 111 | " \"Platelets\": \"150-450 x10^3/µL\"\n", 112 | " },\n", 113 | " \"interpretation\": \"All values within normal limits.\"\n", 114 | " }\n", 115 | " ],\n", 116 | " \"appointments\": [\n", 117 | " {\n", 118 | " \"appointment_id\": f\"AP{random.randint(1000, 9999)}\",\n", 119 | " \"date_time\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 120 | " \"type\": \"General Consultation\",\n", 121 | " \"with\": \"Dr. Emily Smith\",\n", 122 | " \"location_name\": random.choice(clinic_names),\n", 123 | " \"status\": \"Scheduled\",\n", 124 | " \"notes\": \"Review HbA1c results.\"\n", 125 | " }\n", 126 | " ],\n", 127 | " \"vital_signs\": [\n", 128 | " {\n", 129 | " \"vital_id\": f\"V{random.randint(1000, 9999)}\",\n", 130 | " \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 131 | " \"blood_pressure\": \"120/80 mmHg\",\n", 132 | " \"heart_rate\": \"72 bpm\",\n", 133 | " \"respiratory_rate\": \"16 breaths/min\",\n", 134 | " \"temperature\": \"98.6°F\",\n", 135 | " \"oxygen_saturation\": \"98%\",\n", 136 | " \"weight\": \"70 kg\",\n", 137 | " \"height\": \"165 cm\",\n", 138 | " \"bmi\": \"25.7\"\n", 139 | " }\n", 140 | " ]\n", 141 | " }\n", 142 | " return patient\n", 143 | "\n", 144 | "# Generate 10 patients\n", 145 | "patients_data = [generate_patient(f\"P{str(i).zfill(6)}\") for i in range(10)]\n", 146 | "\n", 147 | "# Save the JSON to a file for download\n", 148 | "file_path = 'health_records.json'\n", 149 | "with open(file_path, 'w') as file:\n", 150 | " json.dump(patients_data, file, indent=4)\n", 151 | "\n", 152 | "file_path\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 4, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | "Cancer patient health records have been generated and saved to cancer_health_records.json\n" 165 | ] 166 | } 167 | ], 168 | "source": [ 169 | "import json\n", 170 | "from datetime import datetime, timedelta\n", 171 | "import random\n", 172 | "\n", 173 | "def random_date(start, end):\n", 174 | " return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))\n", 175 | "\n", 176 | "clinic_names = [\n", 177 | " \"NHS Oncology Centre\", \"City Cancer Institute\", \"Riverbend Oncology Clinic\",\n", 178 | " \"Hillside Cancer Care\", \"Springfield Cancer Center\", \"Pinewood Oncology\",\n", 179 | " \"Downtown Cancer Facility\", \"Riverside Oncology Clinic\", \"Green Valley Cancer Institute\",\n", 180 | " \"Maple Grove Oncology Center\"\n", 181 | "]\n", 182 | "\n", 183 | "cancer_types = [\n", 184 | " \"Breast Cancer\", \"Lung Cancer\", \"Colorectal Cancer\", \"Prostate Cancer\",\n", 185 | " \"Leukemia\", \"Lymphoma\", \"Melanoma\", \"Ovarian Cancer\", \"Pancreatic Cancer\", \"Thyroid Cancer\"\n", 186 | "]\n", 187 | "\n", 188 | "def generate_cancer_history(cancer_type):\n", 189 | " diagnosis_date = random_date(datetime(2010, 1, 1), datetime(2023, 12, 31))\n", 190 | " stages = [\"I\", \"II\", \"III\", \"IV\"]\n", 191 | " treatments = {\n", 192 | " \"Breast Cancer\": [\"Mastectomy\", \"Radiation therapy\", \"Chemotherapy\", \"Hormone therapy\"],\n", 193 | " \"Lung Cancer\": [\"Lobectomy\", \"Radiation therapy\", \"Chemotherapy\", \"Immunotherapy\"],\n", 194 | " \"Colorectal Cancer\": [\"Colectomy\", \"Radiation therapy\", \"Chemotherapy\"],\n", 195 | " \"Prostate Cancer\": [\"Prostatectomy\", \"Radiation therapy\", \"Hormone therapy\"],\n", 196 | " \"Leukemia\": [\"Chemotherapy\", \"Stem cell transplant\", \"Targeted therapy\"],\n", 197 | " \"Lymphoma\": [\"Chemotherapy\", \"Radiation therapy\", \"Immunotherapy\"],\n", 198 | " \"Melanoma\": [\"Wide excision\", \"Immunotherapy\", \"Targeted therapy\"],\n", 199 | " \"Ovarian Cancer\": [\"Oophorectomy\", \"Chemotherapy\", \"Targeted therapy\"],\n", 200 | " \"Pancreatic Cancer\": [\"Whipple procedure\", \"Chemotherapy\", \"Radiation therapy\"],\n", 201 | " \"Thyroid Cancer\": [\"Thyroidectomy\", \"Radioactive iodine therapy\", \"Targeted therapy\"]\n", 202 | " }\n", 203 | " \n", 204 | " stage = random.choice(stages)\n", 205 | " treatment = random.sample(treatments[cancer_type], k=random.randint(1, len(treatments[cancer_type])))\n", 206 | " \n", 207 | " return {\n", 208 | " \"condition\": cancer_type,\n", 209 | " \"diagnosis_date\": diagnosis_date.strftime('%Y-%m-%d %H:%M:%S'),\n", 210 | " \"stage\": stage,\n", 211 | " \"treatment\": treatment,\n", 212 | " \"notes\": f\"Stage {stage} {cancer_type} diagnosed. Treatment plan includes {', '.join(treatment)}.\"\n", 213 | " }\n", 214 | "\n", 215 | "def generate_medications(cancer_type, treatment):\n", 216 | " medications = []\n", 217 | " if \"Chemotherapy\" in treatment:\n", 218 | " chemo_drugs = {\n", 219 | " \"Breast Cancer\": [\"Doxorubicin\", \"Paclitaxel\", \"Cyclophosphamide\"],\n", 220 | " \"Lung Cancer\": [\"Cisplatin\", \"Carboplatin\", \"Pemetrexed\"],\n", 221 | " \"Colorectal Cancer\": [\"Fluorouracil\", \"Oxaliplatin\", \"Irinotecan\"],\n", 222 | " \"Prostate Cancer\": [\"Docetaxel\", \"Cabazitaxel\"],\n", 223 | " \"Leukemia\": [\"Imatinib\", \"Dasatinib\", \"Nilotinib\"],\n", 224 | " \"Lymphoma\": [\"Rituximab\", \"Cyclophosphamide\", \"Doxorubicin\"],\n", 225 | " \"Melanoma\": [\"Dacarbazine\", \"Temozolomide\"],\n", 226 | " \"Ovarian Cancer\": [\"Paclitaxel\", \"Carboplatin\"],\n", 227 | " \"Pancreatic Cancer\": [\"Gemcitabine\", \"Abraxane\"],\n", 228 | " \"Thyroid Cancer\": [\"Doxorubicin\", \"Cisplatin\"]\n", 229 | " }\n", 230 | " for drug in random.sample(chemo_drugs[cancer_type], k=random.randint(1, len(chemo_drugs[cancer_type]))):\n", 231 | " medications.append({\n", 232 | " \"medication_id\": f\"M{random.randint(1000, 9999)}\",\n", 233 | " \"name\": drug,\n", 234 | " \"dosage\": f\"{random.randint(50, 200)} mg\",\n", 235 | " \"frequency\": \"Every 3 weeks\",\n", 236 | " \"start_date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 237 | " \"end_date\": None,\n", 238 | " \"prescriber\": \"Dr. John Oncologist\",\n", 239 | " \"location_name\": random.choice(clinic_names)\n", 240 | " })\n", 241 | " \n", 242 | " if \"Hormone therapy\" in treatment:\n", 243 | " hormone_drugs = [\"Tamoxifen\", \"Anastrozole\", \"Letrozole\"]\n", 244 | " medications.append({\n", 245 | " \"medication_id\": f\"M{random.randint(1000, 9999)}\",\n", 246 | " \"name\": random.choice(hormone_drugs),\n", 247 | " \"dosage\": \"20 mg\",\n", 248 | " \"frequency\": \"Once daily\",\n", 249 | " \"start_date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 250 | " \"end_date\": None,\n", 251 | " \"prescriber\": \"Dr. Jane Endocrinologist\",\n", 252 | " \"location_name\": random.choice(clinic_names)\n", 253 | " })\n", 254 | " \n", 255 | " return medications\n", 256 | "\n", 257 | "def generate_blood_tests(cancer_type):\n", 258 | " tests = [\n", 259 | " {\n", 260 | " \"lab_id\": f\"L{random.randint(1000, 9999)}\",\n", 261 | " \"test_name\": \"Complete Blood Count (CBC)\",\n", 262 | " \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 263 | " \"results\": {\n", 264 | " \"Hemoglobin\": f\"{random.uniform(8.0, 14.0):.1f} g/dL\",\n", 265 | " \"White Blood Cells\": f\"{random.uniform(3.0, 11.0):.1f} x10^3/µL\",\n", 266 | " \"Platelets\": f\"{random.randint(100, 400)} x10^3/µL\"\n", 267 | " },\n", 268 | " \"normal_ranges\": {\n", 269 | " \"Hemoglobin\": \"12-16 g/dL\",\n", 270 | " \"White Blood Cells\": \"4-11 x10^3/µL\",\n", 271 | " \"Platelets\": \"150-450 x10^3/µL\"\n", 272 | " },\n", 273 | " \"interpretation\": \"Values affected by ongoing cancer treatment.\"\n", 274 | " }\n", 275 | " ]\n", 276 | " \n", 277 | " if cancer_type in [\"Breast Cancer\", \"Prostate Cancer\", \"Ovarian Cancer\"]:\n", 278 | " tests.append({\n", 279 | " \"lab_id\": f\"L{random.randint(1000, 9999)}\",\n", 280 | " \"test_name\": \"Tumor Marker Test\",\n", 281 | " \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 282 | " \"results\": {\n", 283 | " \"CA-125\": f\"{random.uniform(0, 100):.1f} U/mL\" if cancer_type == \"Ovarian Cancer\" else None,\n", 284 | " \"PSA\": f\"{random.uniform(0, 10):.1f} ng/mL\" if cancer_type == \"Prostate Cancer\" else None,\n", 285 | " \"CA 15-3\": f\"{random.uniform(0, 50):.1f} U/mL\" if cancer_type == \"Breast Cancer\" else None\n", 286 | " },\n", 287 | " \"normal_ranges\": {\n", 288 | " \"CA-125\": \"<35 U/mL\",\n", 289 | " \"PSA\": \"<4 ng/mL\",\n", 290 | " \"CA 15-3\": \"<30 U/mL\"\n", 291 | " },\n", 292 | " \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\"\n", 293 | " })\n", 294 | " \n", 295 | " return tests\n", 296 | "\n", 297 | "def generate_appointments(cancer_type, treatment):\n", 298 | " appointments = [\n", 299 | " {\n", 300 | " \"appointment_id\": f\"AP{random.randint(1000, 9999)}\",\n", 301 | " \"date_time\": random_date(datetime(2024, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 302 | " \"type\": \"Oncology Follow-up\",\n", 303 | " \"with\": \"Dr. John Oncologist\",\n", 304 | " \"location_name\": random.choice(clinic_names),\n", 305 | " \"status\": \"Scheduled\",\n", 306 | " \"notes\": \"Review treatment progress and discuss next steps.\"\n", 307 | " }\n", 308 | " ]\n", 309 | " \n", 310 | " if \"Radiation therapy\" in treatment:\n", 311 | " appointments.append({\n", 312 | " \"appointment_id\": f\"AP{random.randint(1000, 9999)}\",\n", 313 | " \"date_time\": random_date(datetime(2024, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 314 | " \"type\": \"Radiation Therapy Session\",\n", 315 | " \"with\": \"Dr. Sarah Radiologist\",\n", 316 | " \"location_name\": random.choice(clinic_names),\n", 317 | " \"status\": \"Scheduled\",\n", 318 | " \"notes\": \"Continued radiation treatment as per plan.\"\n", 319 | " })\n", 320 | " \n", 321 | " return appointments\n", 322 | "\n", 323 | "def generate_vital_signs():\n", 324 | " return [\n", 325 | " {\n", 326 | " \"vital_id\": f\"V{random.randint(1000, 9999)}\",\n", 327 | " \"date\": random_date(datetime(2024, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 328 | " \"blood_pressure\": f\"{random.randint(110, 140)}/{random.randint(60, 90)} mmHg\",\n", 329 | " \"heart_rate\": f\"{random.randint(60, 100)} bpm\",\n", 330 | " \"respiratory_rate\": f\"{random.randint(12, 20)} breaths/min\",\n", 331 | " \"temperature\": f\"{random.uniform(97.0, 99.0):.1f}°F\",\n", 332 | " \"oxygen_saturation\": f\"{random.randint(95, 100)}%\",\n", 333 | " \"weight\": f\"{random.randint(50, 90)} kg\",\n", 334 | " \"height\": f\"{random.randint(150, 190)} cm\",\n", 335 | " \"bmi\": f\"{random.uniform(18.5, 29.9):.1f}\"\n", 336 | " }\n", 337 | " ]\n", 338 | "\n", 339 | "def generate_imaging_results(cancer_type):\n", 340 | " imaging_types = {\n", 341 | " \"Breast Cancer\": \"Mammogram\",\n", 342 | " \"Lung Cancer\": \"Chest CT\",\n", 343 | " \"Colorectal Cancer\": \"Abdominal CT\",\n", 344 | " \"Prostate Cancer\": \"Prostate MRI\",\n", 345 | " \"Leukemia\": \"PET-CT\",\n", 346 | " \"Lymphoma\": \"PET-CT\",\n", 347 | " \"Melanoma\": \"Skin and Lymph Node Ultrasound\",\n", 348 | " \"Ovarian Cancer\": \"Pelvic CT\",\n", 349 | " \"Pancreatic Cancer\": \"Abdominal CT\",\n", 350 | " \"Thyroid Cancer\": \"Thyroid Ultrasound\"\n", 351 | " }\n", 352 | " \n", 353 | " return [\n", 354 | " {\n", 355 | " \"imaging_id\": f\"IM{random.randint(1000, 9999)}\",\n", 356 | " \"date\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 357 | " \"type\": imaging_types[cancer_type],\n", 358 | " \"location_name\": random.choice(clinic_names),\n", 359 | " \"results\": f\"Follow-up {imaging_types[cancer_type]} shows {random.choice(['stable disease', 'partial response', 'complete response', 'progressive disease'])}.\",\n", 360 | " \"radiologist\": \"Dr. Emily Imaging\"\n", 361 | " }\n", 362 | " ]\n", 363 | "\n", 364 | "def generate_patient(patient_id):\n", 365 | " cancer_type = random.choice(cancer_types)\n", 366 | " cancer_history = generate_cancer_history(cancer_type)\n", 367 | " \n", 368 | " patient = {\n", 369 | " \"demographics\": {\n", 370 | " \"patient_id\": patient_id,\n", 371 | " \"first_name\": f\"Patient{patient_id}\",\n", 372 | " \"last_name\": \"Doe\",\n", 373 | " \"date_of_birth\": random_date(datetime(1950, 1, 1), datetime(1990, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 374 | " \"gender\": random.choice([\"Female\", \"Male\"]),\n", 375 | " \"contact_information\": {\n", 376 | " \"address\": f\"{random.randint(100, 999)} Elm Street, Springfield, IL, 62704\",\n", 377 | " \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\",\n", 378 | " \"email\": f\"patient{patient_id}@example.com\"\n", 379 | " }\n", 380 | " },\n", 381 | " \"emergency_contact\": {\n", 382 | " \"name\": f\"Spouse of Patient{patient_id}\",\n", 383 | " \"relationship\": \"Spouse\",\n", 384 | " \"phone\": f\"+44 7911 {random.randint(100000, 999999)}\"\n", 385 | " },\n", 386 | " \"medical_history\": [cancer_history],\n", 387 | " \"medications\": generate_medications(cancer_type, cancer_history[\"treatment\"]),\n", 388 | " \"immunizations\": [\n", 389 | " {\n", 390 | " \"immunization_id\": f\"I{random.randint(1000, 9999)}\",\n", 391 | " \"vaccine\": \"Influenza\",\n", 392 | " \"date_administered\": random_date(datetime(2023, 1, 1), datetime(2024, 12, 31)).strftime('%Y-%m-%d %H:%M:%S'),\n", 393 | " \"administered_by\": \"NHS Oncology Clinic\",\n", 394 | " \"location_name\": random.choice(clinic_names)\n", 395 | " }\n", 396 | " ],\n", 397 | " \"blood_test_results\": generate_blood_tests(cancer_type),\n", 398 | " \"appointments\": generate_appointments(cancer_type, cancer_history[\"treatment\"]),\n", 399 | " \"vital_signs\": generate_vital_signs(),\n", 400 | " \"imaging_results\": generate_imaging_results(cancer_type)\n", 401 | " }\n", 402 | " return patient\n", 403 | "\n", 404 | "# Generate 10 patients\n", 405 | "patients_data = [generate_patient(f\"P{str(i).zfill(6)}\") for i in range(10)]\n", 406 | "\n", 407 | "# Save the JSON to a file for download\n", 408 | "file_path = 'cancer_health_records.json'\n", 409 | "with open(file_path, 'w') as file:\n", 410 | " json.dump(patients_data, file, indent=4)\n", 411 | "\n", 412 | "print(f\"Cancer patient health records have been generated and saved to {file_path}\")" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": {}, 419 | "outputs": [], 420 | "source": [] 421 | } 422 | ], 423 | "metadata": { 424 | "kernelspec": { 425 | "display_name": "nhs_hackthon", 426 | "language": "python", 427 | "name": "python3" 428 | }, 429 | "language_info": { 430 | "codemirror_mode": { 431 | "name": "ipython", 432 | "version": 3 433 | }, 434 | "file_extension": ".py", 435 | "mimetype": "text/x-python", 436 | "name": "python", 437 | "nbconvert_exporter": "python", 438 | "pygments_lexer": "ipython3", 439 | "version": "3.10.15" 440 | } 441 | }, 442 | "nbformat": 4, 443 | "nbformat_minor": 2 444 | } 445 | -------------------------------------------------------------------------------- /notebooks/NHS_Application_Test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 10, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "2024-10-12 16:52:18,027 - nexusync.utils.embedding_models.set_embedding_model - INFO - Using OpenAI embedding model: text-embedding-3-large\n", 13 | "2024-10-12 16:52:18,029 - nexusync.utils.embedding_models.set_language_model - INFO - Using OpenAI LLM model: gpt-4o-mini\n", 14 | "2024-10-12 16:52:18,029 - nexusync.NexuSync - INFO - Vectors and Querier initialized successfully.\n", 15 | "2024-10-12 16:52:18,185 - nexusync.core.indexer - INFO - Index already built. Loading from disk.\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "from nexusync import NexuSync\n", 21 | "\n", 22 | "OPENAI_MODEL_YN = True\n", 23 | "EMBEDDING_MODEL = \"text-embedding-3-large\"\n", 24 | "LANGUAGE_MODEL = \"gpt-4o-mini\"\n", 25 | "TEMPERATURE = 0.01\n", 26 | "INPUT_DIRS = [\"../sample_docs\"]\n", 27 | "CHROMA_DB_DIR = \"chroma_db\"\n", 28 | "INDEX_PERSIST_DIR = \"index_storage\"\n", 29 | "CHROMA_COLLECTION_NAME = \"my_collection\"\n", 30 | "CHUNK_SIZE = 1024\n", 31 | "CHUNK_OVERLAP = 20\n", 32 | "RECURSIVE = True\n", 33 | "\n", 34 | "\n", 35 | "ns = NexuSync(\n", 36 | " input_dirs=INPUT_DIRS,\n", 37 | " openai_model_yn=OPENAI_MODEL_YN,\n", 38 | " embedding_model=EMBEDDING_MODEL,\n", 39 | " language_model=LANGUAGE_MODEL,\n", 40 | " temperature=TEMPERATURE,\n", 41 | " chroma_db_dir=CHROMA_DB_DIR,\n", 42 | " index_persist_dir=INDEX_PERSIST_DIR,\n", 43 | " chroma_collection_name=CHROMA_COLLECTION_NAME,\n", 44 | " chunk_overlap=CHUNK_OVERLAP,\n", 45 | " chunk_size=CHUNK_SIZE,\n", 46 | " recursive=RECURSIVE,\n", 47 | ")" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 23, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "text_qa_template = \"\"\"\n", 57 | "Context Information:\n", 58 | "{context_str}\n", 59 | "Query: {query_str}\n", 60 | "Instructions:\n", 61 | "You are helping NHS doctors to review patients' medical records and give interperetations on the results.\n", 62 | "Carefully read the context information and the query.\n", 63 | "If the query is in the format [patient_id, summary_report], generate a summary report using the template below.\n", 64 | "Use the available information from the context to fill in each section.\n", 65 | "Include relevant dates and timeline information in each section.\n", 66 | "If information for a section is not available, state \"No information available\" for that section.\n", 67 | "Provide concise and accurate information based on the given context.\n", 68 | "Adapt the template as needed to fit the patient's specific medical history and conditions.\n", 69 | "\n", 70 | "Summary Report Template:\n", 71 | "\n", 72 | "Patient Summary Report for {patient_id}\n", 73 | "1. Demographics\n", 74 | "\n", 75 | "Name: [First Name] [Last Name]\n", 76 | "Date of Birth: [DOB]\n", 77 | "Gender: [Gender]\n", 78 | "Contact Information:\n", 79 | "\n", 80 | "Address: [Address]\n", 81 | "Phone: [Phone Number]\n", 82 | "Email: [Email Address]\n", 83 | "\n", 84 | "\n", 85 | "\n", 86 | "2. Past Medical History & Procedures\n", 87 | "\n", 88 | "Chronic Conditions: [List of chronic conditions with diagnosis dates]\n", 89 | "Major Illnesses: [List of major illnesses with dates]\n", 90 | "Surgical Procedures: [List of surgical procedures with dates]\n", 91 | "Other Significant Medical Events: [List with dates]\n", 92 | "Your interpretation: [Your interpretation of the medical records]\n", 93 | "\n", 94 | "3. Medication History\n", 95 | "[List each current medication with the following information]\n", 96 | "\n", 97 | "Name: [Medication Name]\n", 98 | "Dosage: [Dosage]\n", 99 | "Frequency: [Frequency]\n", 100 | "Start Date: [Start Date]\n", 101 | "Prescriber: [Prescriber Name]\n", 102 | "Purpose: [Brief description of why the medication is prescribed]\n", 103 | "\n", 104 | "[Include a brief list of significant past medications, if available]\n", 105 | "4. Allergies and Adverse Reactions\n", 106 | "\n", 107 | "Medication Allergies: [List or \"No known medication allergies\"]\n", 108 | "Other Allergies: [List or \"No known other allergies\"]\n", 109 | "Adverse Reactions: [List any significant adverse reactions to treatments or medications]\n", 110 | "\n", 111 | "5. Social History & Occupation\n", 112 | "\n", 113 | "Occupation: [Current or most recent occupation]\n", 114 | "Smoking Status: [Current smoker, former smoker, never smoker]\n", 115 | "Alcohol Use: [Description of alcohol use]\n", 116 | "Recreational Drug Use: [If applicable]\n", 117 | "Exercise Habits: [Brief description]\n", 118 | "Diet: [Any significant dietary information]\n", 119 | "Other Relevant Social Factors: [e.g., living situation, support system]\n", 120 | "Your interpretation: [Your interpretation of the social history]\n", 121 | "\n", 122 | "6. Physical Examination & Vital Signs\n", 123 | "Most Recent Vital Signs (Date: [Date of most recent vital signs])\n", 124 | "\n", 125 | "Blood Pressure: [BP]\n", 126 | "Heart Rate: [HR]\n", 127 | "Respiratory Rate: [RR]\n", 128 | "Temperature: [Temp]\n", 129 | "Oxygen Saturation: [O2 Sat]\n", 130 | "Weight: [Weight]\n", 131 | "Height: [Height]\n", 132 | "BMI: [BMI]\n", 133 | "Your interpretation: [Your interpretation of the vital signs]\n", 134 | "[Include any significant physical examination findings]\n", 135 | "\n", 136 | "7. Laboratory Results\n", 137 | "[List most recent significant laboratory tests with dates, results, and normal ranges]\n", 138 | "\n", 139 | "8. Imaging and Diagnostic Results\n", 140 | "[List recent imaging studies and other diagnostic tests with dates and summary of results]\n", 141 | "\n", 142 | "9. Treatment Plan and Interventions\n", 143 | "\n", 144 | "Current Treatment Plans: [List current treatments or interventions]\n", 145 | "Ongoing Therapies: [e.g., physical therapy, chemotherapy, dialysis]\n", 146 | "Recent Changes in Management: [Any recent significant changes in treatment]\n", 147 | "Your interpretation: [Your interpretation of the treatment plan]\n", 148 | "\n", 149 | "10. Immunizations\n", 150 | "[List relevant immunizations with dates]\n", 151 | "\n", 152 | "11. Upcoming Appointments and Follow-ups\n", 153 | "[List any scheduled appointments with dates, types, and locations]\n", 154 | "\n", 155 | "\n", 156 | "Answer: [Generate the report based on the template above, filling in the available information from the context]\n", 157 | "\n", 158 | "Answer: \"\"\"" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 24, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stderr", 168 | "output_type": "stream", 169 | "text": [ 170 | "2024-10-13 08:02:24,048 - nexusync.core.chat_engine - INFO - Chat engine initialized\n" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "# Initialize the Chat Engine Once\n", 176 | "ns.initialize_stream_chat(\n", 177 | " text_qa_template=text_qa_template, chat_mode=\"context\", similarity_top_k=3\n", 178 | ")\n" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 25, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "name": "stdout", 188 | "output_type": "stream", 189 | "text": [ 190 | "```markdown\n", 191 | "Here is the summary report for Patient ID P000000:\n", 192 | "\n", 193 | "### Patient Demographics\n", 194 | "- **First Name:** PatientP000000\n", 195 | "- **Last Name:** Doe\n", 196 | "- **Date of Birth:** November 1, 1966\n", 197 | "- **Gender:** Male\n", 198 | "- **Contact Information:**\n", 199 | " - **Address:** 139 Elm Street, Springfield, IL, 62704\n", 200 | " - **Phone:** +44 7911 671531\n", 201 | " - **Email:** patientP000000@example.com\n", 202 | "\n", 203 | "### Emergency Contact\n", 204 | "- **Name:** Spouse of PatientP000000\n", 205 | "- **Relationship:** Spouse\n", 206 | "- **Phone:** +44 7911 376933\n", 207 | "\n", 208 | "### Medical History\n", 209 | "- **Condition:** Leukemia\n", 210 | " - **Diagnosis Date:** July 8, 2018\n", 211 | " - **Stage:** IV\n", 212 | " - **Treatment:** Chemotherapy\n", 213 | " - **Notes:** Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy.\n", 214 | " - **Location Name:** Green Valley Cancer Institute\n", 215 | "\n", 216 | "### Medications\n", 217 | "1. **Name:** Imatinib\n", 218 | " - **Dosage:** 96 mg\n", 219 | " - **Frequency:** Every 3 weeks\n", 220 | " - **Start Date:** March 29, 2024\n", 221 | " - **Prescriber:** Dr. John Oncologist\n", 222 | " - **Location Name:** NHS Oncology Centre\n", 223 | "\n", 224 | "2. **Name:** Nilotinib\n", 225 | " - **Dosage:** 178 mg\n", 226 | " - **Frequency:** Every 3 weeks\n", 227 | " - **Start Date:** September 20, 2023\n", 228 | " - **Prescriber:** Dr. John Oncologist\n", 229 | " - **Location Name:** Green Valley Cancer Institute\n", 230 | "\n", 231 | "3. **Name:** Dasatinib\n", 232 | " - **Dosage:** 123 mg\n", 233 | " - **Frequency:** Every 3 weeks\n", 234 | " - **Start Date:** October 25, 2024\n", 235 | " - **Prescriber:** Dr. John Oncologist\n", 236 | " - **Location Name:** Riverbend Oncology Clinic\n", 237 | "\n", 238 | "### Immunizations\n", 239 | "- **Vaccine:** Influenza\n", 240 | " - **Date Administered:** September 8, 2024\n", 241 | " - **Administered By:** NHS Oncology Clinic\n", 242 | " - **Location Name:** Downtown Cancer Facility\n", 243 | "\n", 244 | "### Blood Test Results\n", 245 | "- **Test Name:** Complete Blood Count (CBC)\n", 246 | " - **Date:** October 22, 2024\n", 247 | " - **Results:**\n", 248 | " - Hemoglobin: 12.7 g/dL\n", 249 | " - White Blood Cells: 3.0 x10^3/µL\n", 250 | " - Platelets: 330 x10^3/µL\n", 251 | " - **Normal Ranges:**\n", 252 | " - Hemoglobin: 12-16 g/dL\n", 253 | " - White Blood Cells: 4-11 x10^3/µL\n", 254 | " - Platelets: 150-450 x10^3/µL\n", 255 | " - **Interpretation:** Values affected by ongoing cancer treatment.\n", 256 | "\n", 257 | "### Appointments\n", 258 | "- **Appointment ID:** AP9886\n", 259 | " - **Date & Time:** December 23, 2024, 18:14:39\n", 260 | " - **Type:** Oncology Follow-up\n", 261 | " - **With:** Dr. John Oncologist\n", 262 | " - **Location Name:** Riverside Oncology Clinic\n", 263 | " - **Status:** Scheduled\n", 264 | " - **Notes:** Review treatment progress and discuss next steps.\n", 265 | "\n", 266 | "This summary provides an overview of the patient's demographics, medical history, medications, immunizations, blood test results, and upcoming appointments. If you need more specific information or details, feel free to ask!```\n", 267 | "\n", 268 | "Full response:\n", 269 | "```markdown\n", 270 | "Here is the summary report for Patient ID P000000:\n", 271 | "\n", 272 | "### Patient Demographics\n", 273 | "- **First Name:** PatientP000000\n", 274 | "- **Last Name:** Doe\n", 275 | "- **Date of Birth:** November 1, 1966\n", 276 | "- **Gender:** Male\n", 277 | "- **Contact Information:**\n", 278 | " - **Address:** 139 Elm Street, Springfield, IL, 62704\n", 279 | " - **Phone:** +44 7911 671531\n", 280 | " - **Email:** patientP000000@example.com\n", 281 | "\n", 282 | "### Emergency Contact\n", 283 | "- **Name:** Spouse of PatientP000000\n", 284 | "- **Relationship:** Spouse\n", 285 | "- **Phone:** +44 7911 376933\n", 286 | "\n", 287 | "### Medical History\n", 288 | "- **Condition:** Leukemia\n", 289 | " - **Diagnosis Date:** July 8, 2018\n", 290 | " - **Stage:** IV\n", 291 | " - **Treatment:** Chemotherapy\n", 292 | " - **Notes:** Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy.\n", 293 | " - **Location Name:** Green Valley Cancer Institute\n", 294 | "\n", 295 | "### Medications\n", 296 | "1. **Name:** Imatinib\n", 297 | " - **Dosage:** 96 mg\n", 298 | " - **Frequency:** Every 3 weeks\n", 299 | " - **Start Date:** March 29, 2024\n", 300 | " - **Prescriber:** Dr. John Oncologist\n", 301 | " - **Location Name:** NHS Oncology Centre\n", 302 | "\n", 303 | "2. **Name:** Nilotinib\n", 304 | " - **Dosage:** 178 mg\n", 305 | " - **Frequency:** Every 3 weeks\n", 306 | " - **Start Date:** September 20, 2023\n", 307 | " - **Prescriber:** Dr. John Oncologist\n", 308 | " - **Location Name:** Green Valley Cancer Institute\n", 309 | "\n", 310 | "3. **Name:** Dasatinib\n", 311 | " - **Dosage:** 123 mg\n", 312 | " - **Frequency:** Every 3 weeks\n", 313 | " - **Start Date:** October 25, 2024\n", 314 | " - **Prescriber:** Dr. John Oncologist\n", 315 | " - **Location Name:** Riverbend Oncology Clinic\n", 316 | "\n", 317 | "### Immunizations\n", 318 | "- **Vaccine:** Influenza\n", 319 | " - **Date Administered:** September 8, 2024\n", 320 | " - **Administered By:** NHS Oncology Clinic\n", 321 | " - **Location Name:** Downtown Cancer Facility\n", 322 | "\n", 323 | "### Blood Test Results\n", 324 | "- **Test Name:** Complete Blood Count (CBC)\n", 325 | " - **Date:** October 22, 2024\n", 326 | " - **Results:**\n", 327 | " - Hemoglobin: 12.7 g/dL\n", 328 | " - White Blood Cells: 3.0 x10^3/µL\n", 329 | " - Platelets: 330 x10^3/µL\n", 330 | " - **Normal Ranges:**\n", 331 | " - Hemoglobin: 12-16 g/dL\n", 332 | " - White Blood Cells: 4-11 x10^3/µL\n", 333 | " - Platelets: 150-450 x10^3/µL\n", 334 | " - **Interpretation:** Values affected by ongoing cancer treatment.\n", 335 | "\n", 336 | "### Appointments\n", 337 | "- **Appointment ID:** AP9886\n", 338 | " - **Date & Time:** December 23, 2024, 18:14:39\n", 339 | " - **Type:** Oncology Follow-up\n", 340 | " - **With:** Dr. John Oncologist\n", 341 | " - **Location Name:** Riverside Oncology Clinic\n", 342 | " - **Status:** Scheduled\n", 343 | " - **Notes:** Review treatment progress and discuss next steps.\n", 344 | "\n", 345 | "This summary provides an overview of the patient's demographics, medical history, medications, immunizations, blood test results, and upcoming appointments. If you need more specific information or details, feel free to ask!\n", 346 | "```\n", 347 | "\n", 348 | "Metadata: {'sources': [{'source_text': 'file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n[\\n {\\n \"demographics\": {\\n \"patient_id\": \"P000000\",\\n \"first_name\": \"PatientP000000\",\\n \"last_name\": \"Doe\",\\n \"date_of_birth\": \"1966-11-01 01:16:41\",\\n \"gender\": \"Male\",\\n \"contact_information\": {\\n \"address\": \"139 Elm Street, Springfield, IL, 62704\",\\n \"phone\": \"+44 7911 671531\",\\n \"email\": \"patientP000000@example.com\"\\n }\\n },\\n \"emergency_contact\": {\\n \"name\": \"Spouse of PatientP000000\",\\n \"relationship\": \"Spouse\",\\n \"phone\": \"+44 7911 376933\"\\n },\\n \"medical_history\": [\\n {\\n \"condition\": \"Leukemia\",\\n \"diagnosis_date\": \"2018-07-08 06:55:27\",\\n \"stage\": \"IV\",\\n \"treatment\": [\\n \"Chemotherapy\"\\n ],\\n \"notes\": \"Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy.\" John Oncologist\",\\n \"location_name\": \"Green Valley Cancer Institute\"\\n },\\n {\\n \"medication_id\": \"M6362\",\\n \"name\": \"Dasatinib\",\\n \"dosage\": \"123 mg\",\\n \"frequency\": \"Every 3 weeks\",\\n \"start_date\": \"2024-10-25 05:15:15\",\\n \"end_date\": null,\\n \"prescriber\": \"Dr. John Oncologist\",\\n \"location_name\": \"Riverbend Oncology Clinic\"\\n }\\n ],\\n \"immunizations\": [\\n {\\n \"immunization_id\": \"I4299\",\\n \"vaccine\": \"Influenza\",\\n \"date_administered\": \"2024-09-08 11:00:53\",\\n \"administered_by\": \"NHS Oncology Clinic\",\\n \"location_name\": \"Downtown Cancer Facility\"\\n }\\n ],\\n \"blood_test_results\": [\\n {\\n \"lab_id\": \"L1164\",\\n \"test_name\": \"Complete Blood Count (CBC)\",\\n \"date\": \"2024-10-22 08:11:43\",\\n \"results\": {\\n \"Hemoglobin\": \"12.7 g/dL\",\\n \"White Blood Cells\": \"3.0 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"330 x10^3/\\\\u00b5L\"\\n },\\n \"normal_ranges\": {\\n \"Hemoglobin\": \"12-16 g/dL\",\\n \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n },\\n \"interpretation\": \"Values affected by ongoing cancer treatment.\" }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP9886\",\\n \"date_time\": \"2024-12-23 18:14:39\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP9886\",\\n \"date_time\": \"2024-12-23 18:14:39\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. John Oncologist\",\\n \"location_name\": \"Riverside Oncology Clinic\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Review treatment progress and discuss next steps.\" }\\n ],\\n \"medications\": [\\n {\\n \"medication_id\": \"M6464\",\\n \"name\": \"Imatinib\",\\n \"dosage\": \"96 mg\",\\n \"frequency\": \"Every 3 weeks\",\\n \"start_date\": \"2024-03-29 12:36:55\",\\n \"end_date\": null,\\n \"prescriber\": \"Dr. John Oncologist\",\\n \"location_name\": \"NHS Oncology Centre\"\\n },\\n {\\n \"medication_id\": \"M8879\",\\n \"name\": \"Nilotinib\",\\n \"dosage\": \"178 mg\",\\n \"frequency\": \"Every 3 weeks\",\\n \"start_date\": \"2023-09-20 17:48:39\",\\n \"end_date\": null,\\n \"prescriber\": \"Dr. John Oncologist\",\\n \"location_name\": \"Green Valley Cancer Institute\"\\n },\\n {\\n \"medication_id\": \"M6362\",\\n \"name\": \"Dasatinib\",\\n \"dosage\": \"123 mg\",\\n \"frequency\": \"Every 3 weeks\",\\n \"start_date\": \"2024-10-25 05:15:15\",\\n \"end_date\": null,\\n \"prescriber\": \"Dr. John Oncologist\",\\n \"location_name\": \"Riverbend Oncology Clinic\"\\n }\\n ],\\n \"immunizations\": [\\n {\\n \"immunization_id\": \"I4299\",\\n \"vaccine\": \"Influenza\",\\n \"date_administered\": \"2024-09-08 11:00:53\",\\n \"administered_by\": \"NHS Oncology Clinic\",\\n \"location_name\": \"Downtown Cancer Facility\"\\n }\\n ],\\n \"blood_test_results\": [\\n {\\n \"lab_id\": \"L1164\",\\n \"test_name\": \"Complete Blood Count (CBC)\",\\n \"date\": \"2024-10-22 08:11:43\",\\n \"results\": {\\n \"Hemoglobin\": \"12.7 g/dL\",\\n \"White Blood Cells\": \"3.0 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"330 x10^3/\\\\u00b5L\"\\n },\\n \"normal_ranges\": {\\n \"Hemoglobin\": \"12-16 g/dL\",\\n \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n },\\n \"interpretation\": \"Values affected by ongoing cancer treatment.\" }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP9886\",\\n \"date_time\": \"2024-12-23 18:14:39\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. John Oncologist\",\\n \"location_name\": \"Riverside Oncology Clinic\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Review treatment progress and discuss next steps.\"', 'metadata': {'file_path': '/Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json', 'file_name': 'healthcare_records.json', 'file_type': 'application/json', 'file_size': 42774, 'creation_date': '2024-10-12', 'last_modified_date': '2024-10-12'}}, {'source_text': 'file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n\",\\n \"radiologist\": \"Dr. Emily Imaging\"\\n }\\n ]\\n },\\n {\\n \"demographics\": {\\n \"patient_id\": \"P000007\",\\n \"first_name\": \"PatientP000007\",\\n \"last_name\": \"Doe\",\\n \"date_of_birth\": \"1979-08-04 23:12:34\",\\n \"gender\": \"Male\",\\n \"contact_information\": {\\n \"address\": \"169 Elm Street, Springfield, IL, 62704\",\\n \"phone\": \"+44 7911 795798\",\\n \"email\": \"patientP000007@example.com\"\\n }\\n },\\n \"emergency_contact\": {\\n \"name\": \"Spouse of PatientP000007\",\\n \"relationship\": \"Spouse\",\\n \"phone\": \"+44 7911 236769\"\\n },\\n \"medical_history\": [\\n {\\n \"condition\": \"Prostate Cancer\",\\n \"diagnosis_date\": \"2023-12-19 07:43:10\",\\n \"stage\": \"IV\",\\n \"treatment\": [\\n \"Hormone therapy\"\\n ],\\n \"notes\": \"Stage IV Prostate Cancer diagnosed. Treatment plan includes Hormone therapy.\" Jane Endocrinologist\",\\n \"location_name\": \"Springfield Cancer Center\"\\n }\\n ],\\n \"immunizations\": [\\n {\\n \"immunization_id\": \"I8026\",\\n \"vaccine\": \"Influenza\",\\n \"date_administered\": \"2023-04-08 20:32:55\",\\n \"administered_by\": \"NHS Oncology Clinic\",\\n \"location_name\": \"Green Valley Cancer Institute\"\\n }\\n ],\\n \"blood_test_results\": [\\n {\\n \"lab_id\": \"L1407\",\\n \"test_name\": \"Complete Blood Count (CBC)\",\\n \"date\": \"2023-06-16 22:50:38\",\\n \"results\": {\\n \"Hemoglobin\": \"12.3 g/dL\",\\n \"White Blood Cells\": \"8.4 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"108 x10^3/\\\\u00b5L\"\\n },\\n \"normal_ranges\": {\\n \"Hemoglobin\": \"12-16 g/dL\",\\n \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n },\\n \"interpretation\": \"Values affected by ongoing cancer treatment.\" },\\n {\\n \"lab_id\": \"L6043\",\\n \"test_name\": \"Tumor Marker Test\",\\n \"date\": \"2023-02-27 17:42:48\",\\n \"results\": {\\n \"CA-125\": null,\\n \"PSA\": \"5.3 ng/mL\",\\n \"CA 15-3\": null\\n },\\n \"normal_ranges\": {\\n \"CA-125\": \"<35 U/mL\",\\n \"PSA\": \"<4 ng/mL\",\\n \"CA 15-3\": \"<30 U/mL\"\\n },\\n \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP4016\",\\n \"date_time\": \"2024-07-22 20:43:56\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. }\\n ],\\n \"medications\": [\\n {\\n \"medication_id\": \"M6281\",\\n \"name\": \"Tamoxifen\",\\n \"dosage\": \"20 mg\",\\n \"frequency\": \"Once daily\",\\n \"start_date\": \"2023-06-30 05:10:26\",\\n \"end_date\": null,\\n \"prescriber\": \"Dr. Jane Endocrinologist\",\\n \"location_name\": \"Springfield Cancer Center\"\\n }\\n ],\\n \"immunizations\": [\\n {\\n \"immunization_id\": \"I8026\",\\n \"vaccine\": \"Influenza\",\\n \"date_administered\": \"2023-04-08 20:32:55\",\\n \"administered_by\": \"NHS Oncology Clinic\",\\n \"location_name\": \"Green Valley Cancer Institute\"\\n }\\n ],\\n \"blood_test_results\": [\\n {\\n \"lab_id\": \"L1407\",\\n \"test_name\": \"Complete Blood Count (CBC)\",\\n \"date\": \"2023-06-16 22:50:38\",\\n \"results\": {\\n \"Hemoglobin\": \"12.3 g/dL\",\\n \"White Blood Cells\": \"8.4 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"108 x10^3/\\\\u00b5L\"\\n },\\n \"normal_ranges\": {\\n \"Hemoglobin\": \"12-16 g/dL\",\\n \"White Blood Cells\": \"4-11 x10^3/\\\\u00b5L\",\\n \"Platelets\": \"150-450 x10^3/\\\\u00b5L\"\\n },\\n \"interpretation\": \"Values affected by ongoing cancer treatment.\" },\\n {\\n \"lab_id\": \"L6043\",\\n \"test_name\": \"Tumor Marker Test\",\\n \"date\": \"2023-02-27 17:42:48\",\\n \"results\": {\\n \"CA-125\": null,\\n \"PSA\": \"5.3 ng/mL\",\\n \"CA 15-3\": null\\n },\\n \"normal_ranges\": {\\n \"CA-125\": \"<35 U/mL\",\\n \"PSA\": \"<4 ng/mL\",\\n \"CA 15-3\": \"<30 U/mL\"\\n },\\n \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n\",\\n \"radiologist\": \"Dr. Emily Imaging\"\\n }\\n ]\\n },\\n {\\n \"demographics\": {\\n \"patient_id\": \"P000007\",\\n \"first_name\": \"PatientP000007\",\\n \"last_name\": \"Doe\",\\n \"date_of_birth\": \"1979-08-04 23:12:34\",\\n \"gender\": \"Male\",\\n \"contact_information\": {\\n \"address\": \"169 Elm Street, Springfield, IL, 62704\",\\n \"phone\": \"+44 7911 795798\",\\n \"email\": \"patientP000007@example.com\"\\n }\\n },\\n \"emergency_contact\": {\\n \"name\": \"Spouse of PatientP000007\",\\n \"relationship\": \"Spouse\",\\n \"phone\": \"+44 7911 236769\"\\n },\\n \"medical_history\": [\\n {\\n \"condition\": \"Prostate Cancer\",\\n \"diagnosis_date\": \"2023-12-19 07:43:10\",\\n \"stage\": \"IV\",\\n \"treatment\": [\\n \"Hormone therapy\"\\n ],\\n \"notes\": \"Stage IV Prostate Cancer diagnosed. }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP4016\",\\n \"date_time\": \"2024-07-22 20:43:56\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. John Oncologist\",\\n \"location_name\": \"NHS Oncology Centre\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Review treatment progress and discuss next steps.\"', 'metadata': {'file_path': '/Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json', 'file_name': 'healthcare_records.json', 'file_type': 'application/json', 'file_size': 42774, 'creation_date': '2024-10-12', 'last_modified_date': '2024-10-12'}}, {'source_text': 'Sarah Radiologist\",\\n \"location_name\": \"Downtown Cancer Facility\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Continued radiation treatment as per plan.\" }\\n ],\\n \"vital_signs\": [\\n {\\n \"vital_id\": \"V4595\",\\n \"date\": \"2024-09-26 11:33:59\",\\n \"blood_pressure\": \"128/62 mmHg\",\\n \"heart_rate\": \"69 bpm\",\\n \"respiratory_rate\": \"19 breaths/min\",\\n \"temperature\": \"98.8\\\\u00b0F\",\\n \"oxygen_saturation\": \"98%\",\\n \"weight\": \"74 kg\",\\n \"height\": \"183 cm\",\\n \"bmi\": \"23.7\"\\n }\\n ],\\n \"imaging_results\": [\\n {\\n \"imaging_id\": \"IM9534\",\\n \"date\": \"2024-05-09 08:42:59\",\\n \"type\": \"Prostate MRI\",\\n \"location_name\": \"Riverbend Oncology Clinic\",\\n \"results\": \"Follow-up Prostate MRI shows complete response. \",\\n \"radiologist\": \"Dr. file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n},\\n {\\n \"lab_id\": \"L9689\",\\n \"test_name\": \"Tumor Marker Test\",\\n \"date\": \"2023-11-10 04:03:05\",\\n \"results\": {\\n \"CA-125\": null,\\n \"PSA\": \"3.4 ng/mL\",\\n \"CA 15-3\": null\\n },\\n \"normal_ranges\": {\\n \"CA-125\": \"<35 U/mL\",\\n \"PSA\": \"<4 ng/mL\",\\n \"CA 15-3\": \"<30 U/mL\"\\n },\\n \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP6208\",\\n \"date_time\": \"2024-08-08 21:34:49\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. \",\\n \"radiologist\": \"Dr. Emily Imaging\"\\n }\\n ]\\n },\\n {\\n \"demographics\": {\\n \"patient_id\": \"P000003\",\\n \"first_name\": \"PatientP000003\",\\n \"last_name\": \"Doe\",\\n \"date_of_birth\": \"1955-09-21 10:55:36\",\\n \"gender\": \"Male\",\\n \"contact_information\": {\\n \"address\": \"264 Elm Street, Springfield, IL, 62704\",\\n \"phone\": \"+44 7911 216070\",\\n \"email\": \"patientP000003@example.com\"\\n }\\n },\\n \"emergency_contact\": {\\n \"name\": \"Spouse of PatientP000003\",\\n \"relationship\": \"Spouse\",\\n \"phone\": \"+44 7911 729480\"\\n },\\n \"medical_history\": [\\n {\\n \"condition\": \"Melanoma\",\\n \"diagnosis_date\": \"2012-09-22 07:19:34\",\\n \"stage\": \"III\",\\n \"treatment\": [\\n \"Wide excision\",\\n \"Immunotherapy\",\\n \"Targeted therapy\"\\n ],\\n \"notes\": \"Stage III Melanoma diagnosed. Treatment plan includes Wide excision, Immunotherapy, Targeted therapy.\" }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP6208\",\\n \"date_time\": \"2024-08-08 21:34:49\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. John Oncologist\",\\n \"location_name\": \"Hillside Cancer Care\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Review treatment progress and discuss next steps.\" },\\n {\\n \"appointment_id\": \"AP8361\",\\n \"date_time\": \"2024-12-15 21:21:21\",\\n \"type\": \"Radiation Therapy Session\",\\n \"with\": \"Dr. },\\n {\\n \"appointment_id\": \"AP8361\",\\n \"date_time\": \"2024-12-15 21:21:21\",\\n \"type\": \"Radiation Therapy Session\",\\n \"with\": \"Dr. Sarah Radiologist\",\\n \"location_name\": \"Downtown Cancer Facility\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Continued radiation treatment as per plan.\" }\\n ],\\n \"vital_signs\": [\\n {\\n \"vital_id\": \"V4595\",\\n \"date\": \"2024-09-26 11:33:59\",\\n \"blood_pressure\": \"128/62 mmHg\",\\n \"heart_rate\": \"69 bpm\",\\n \"respiratory_rate\": \"19 breaths/min\",\\n \"temperature\": \"98.8\\\\u00b0F\",\\n \"oxygen_saturation\": \"98%\",\\n \"weight\": \"74 kg\",\\n \"height\": \"183 cm\",\\n \"bmi\": \"23.7\"\\n }\\n ],\\n \"imaging_results\": [\\n {\\n \"imaging_id\": \"IM9534\",\\n \"date\": \"2024-05-09 08:42:59\",\\n \"type\": \"Prostate MRI\",\\n \"location_name\": \"Riverbend Oncology Clinic\",\\n \"results\": \"Follow-up Prostate MRI shows complete response. file_path: /Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json\\n\\n},\\n {\\n \"lab_id\": \"L9689\",\\n \"test_name\": \"Tumor Marker Test\",\\n \"date\": \"2023-11-10 04:03:05\",\\n \"results\": {\\n \"CA-125\": null,\\n \"PSA\": \"3.4 ng/mL\",\\n \"CA 15-3\": null\\n },\\n \"normal_ranges\": {\\n \"CA-125\": \"<35 U/mL\",\\n \"PSA\": \"<4 ng/mL\",\\n \"CA 15-3\": \"<30 U/mL\"\\n },\\n \"interpretation\": \"Elevated levels may indicate disease activity or treatment response.\" }\\n ],\\n \"appointments\": [\\n {\\n \"appointment_id\": \"AP6208\",\\n \"date_time\": \"2024-08-08 21:34:49\",\\n \"type\": \"Oncology Follow-up\",\\n \"with\": \"Dr. John Oncologist\",\\n \"location_name\": \"Hillside Cancer Care\",\\n \"status\": \"Scheduled\",\\n \"notes\": \"Review treatment progress and discuss next steps.\"', 'metadata': {'file_path': '/Users/zakkyang/local-projects/nexusync/notebooks/../sample_docs/healthcare_records.json', 'file_name': 'healthcare_records.json', 'file_type': 'application/json', 'file_size': 42774, 'creation_date': '2024-10-12', 'last_modified_date': '2024-10-12'}}]}\n" 349 | ] 350 | } 351 | ], 352 | "source": [ 353 | "query = \"[P000000, summary_report]\"\n", 354 | "\n", 355 | "print(\"```markdown\")\n", 356 | "for item in ns.start_chat_stream(query):\n", 357 | " if isinstance(item, str):\n", 358 | " # This is a token, print or process as needed\n", 359 | " print(item, end='', flush=True)\n", 360 | " else:\n", 361 | " # This is the final response with metadata\n", 362 | " print(\"```\\n\")\n", 363 | " print(\"Full response:\")\n", 364 | " print(\"```markdown\")\n", 365 | " print(item['response'])\n", 366 | " print(\"```\\n\")\n", 367 | " print(\"Metadata:\", item['metadata'])\n", 368 | " break" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [], 376 | "source": [] 377 | } 378 | ], 379 | "metadata": { 380 | "kernelspec": { 381 | "display_name": "nhs_hackthon", 382 | "language": "python", 383 | "name": "python3" 384 | }, 385 | "language_info": { 386 | "codemirror_mode": { 387 | "name": "ipython", 388 | "version": 3 389 | }, 390 | "file_extension": ".py", 391 | "mimetype": "text/x-python", 392 | "name": "python", 393 | "nbconvert_exporter": "python", 394 | "pygments_lexer": "ipython3", 395 | "version": "3.10.15" 396 | } 397 | }, 398 | "nbformat": 4, 399 | "nbformat_minor": 2 400 | } 401 | -------------------------------------------------------------------------------- /sample_docs/healthcare_records.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "demographics": { 4 | "patient_id": "P000000", 5 | "first_name": "PatientP000000", 6 | "last_name": "Doe", 7 | "date_of_birth": "1966-11-01 01:16:41", 8 | "gender": "Male", 9 | "contact_information": { 10 | "address": "139 Elm Street, Springfield, IL, 62704", 11 | "phone": "+44 7911 671531", 12 | "email": "patientP000000@example.com" 13 | } 14 | }, 15 | "emergency_contact": { 16 | "name": "Spouse of PatientP000000", 17 | "relationship": "Spouse", 18 | "phone": "+44 7911 376933" 19 | }, 20 | "medical_history": [ 21 | { 22 | "condition": "Leukemia", 23 | "diagnosis_date": "2018-07-08 06:55:27", 24 | "stage": "IV", 25 | "treatment": [ 26 | "Chemotherapy" 27 | ], 28 | "notes": "Stage IV Leukemia diagnosed. Treatment plan includes Chemotherapy." 29 | } 30 | ], 31 | "medications": [ 32 | { 33 | "medication_id": "M6464", 34 | "name": "Imatinib", 35 | "dosage": "96 mg", 36 | "frequency": "Every 3 weeks", 37 | "start_date": "2024-03-29 12:36:55", 38 | "end_date": null, 39 | "prescriber": "Dr. John Oncologist", 40 | "location_name": "NHS Oncology Centre" 41 | }, 42 | { 43 | "medication_id": "M8879", 44 | "name": "Nilotinib", 45 | "dosage": "178 mg", 46 | "frequency": "Every 3 weeks", 47 | "start_date": "2023-09-20 17:48:39", 48 | "end_date": null, 49 | "prescriber": "Dr. John Oncologist", 50 | "location_name": "Green Valley Cancer Institute" 51 | }, 52 | { 53 | "medication_id": "M6362", 54 | "name": "Dasatinib", 55 | "dosage": "123 mg", 56 | "frequency": "Every 3 weeks", 57 | "start_date": "2024-10-25 05:15:15", 58 | "end_date": null, 59 | "prescriber": "Dr. John Oncologist", 60 | "location_name": "Riverbend Oncology Clinic" 61 | } 62 | ], 63 | "immunizations": [ 64 | { 65 | "immunization_id": "I4299", 66 | "vaccine": "Influenza", 67 | "date_administered": "2024-09-08 11:00:53", 68 | "administered_by": "NHS Oncology Clinic", 69 | "location_name": "Downtown Cancer Facility" 70 | } 71 | ], 72 | "blood_test_results": [ 73 | { 74 | "lab_id": "L1164", 75 | "test_name": "Complete Blood Count (CBC)", 76 | "date": "2024-10-22 08:11:43", 77 | "results": { 78 | "Hemoglobin": "12.7 g/dL", 79 | "White Blood Cells": "3.0 x10^3/\u00b5L", 80 | "Platelets": "330 x10^3/\u00b5L" 81 | }, 82 | "normal_ranges": { 83 | "Hemoglobin": "12-16 g/dL", 84 | "White Blood Cells": "4-11 x10^3/\u00b5L", 85 | "Platelets": "150-450 x10^3/\u00b5L" 86 | }, 87 | "interpretation": "Values affected by ongoing cancer treatment." 88 | } 89 | ], 90 | "appointments": [ 91 | { 92 | "appointment_id": "AP9886", 93 | "date_time": "2024-12-23 18:14:39", 94 | "type": "Oncology Follow-up", 95 | "with": "Dr. John Oncologist", 96 | "location_name": "Riverside Oncology Clinic", 97 | "status": "Scheduled", 98 | "notes": "Review treatment progress and discuss next steps." 99 | } 100 | ], 101 | "vital_signs": [ 102 | { 103 | "vital_id": "V4963", 104 | "date": "2024-12-12 14:12:13", 105 | "blood_pressure": "124/77 mmHg", 106 | "heart_rate": "87 bpm", 107 | "respiratory_rate": "16 breaths/min", 108 | "temperature": "97.9\u00b0F", 109 | "oxygen_saturation": "98%", 110 | "weight": "70 kg", 111 | "height": "177 cm", 112 | "bmi": "27.8" 113 | } 114 | ], 115 | "imaging_results": [ 116 | { 117 | "imaging_id": "IM3488", 118 | "date": "2024-09-20 17:12:39", 119 | "type": "PET-CT", 120 | "location_name": "Riverside Oncology Clinic", 121 | "results": "Follow-up PET-CT shows complete response.", 122 | "radiologist": "Dr. Emily Imaging" 123 | } 124 | ] 125 | }, 126 | { 127 | "demographics": { 128 | "patient_id": "P000001", 129 | "first_name": "PatientP000001", 130 | "last_name": "Doe", 131 | "date_of_birth": "1977-06-05 18:13:55", 132 | "gender": "Male", 133 | "contact_information": { 134 | "address": "229 Elm Street, Springfield, IL, 62704", 135 | "phone": "+44 7911 468034", 136 | "email": "patientP000001@example.com" 137 | } 138 | }, 139 | "emergency_contact": { 140 | "name": "Spouse of PatientP000001", 141 | "relationship": "Spouse", 142 | "phone": "+44 7911 552032" 143 | }, 144 | "medical_history": [ 145 | { 146 | "condition": "Prostate Cancer", 147 | "diagnosis_date": "2023-11-11 06:27:54", 148 | "stage": "II", 149 | "treatment": [ 150 | "Radiation therapy", 151 | "Prostatectomy" 152 | ], 153 | "notes": "Stage II Prostate Cancer diagnosed. Treatment plan includes Radiation therapy, Prostatectomy." 154 | } 155 | ], 156 | "medications": [], 157 | "immunizations": [ 158 | { 159 | "immunization_id": "I4609", 160 | "vaccine": "Influenza", 161 | "date_administered": "2023-10-03 01:24:17", 162 | "administered_by": "NHS Oncology Clinic", 163 | "location_name": "Green Valley Cancer Institute" 164 | } 165 | ], 166 | "blood_test_results": [ 167 | { 168 | "lab_id": "L8146", 169 | "test_name": "Complete Blood Count (CBC)", 170 | "date": "2024-12-11 21:37:09", 171 | "results": { 172 | "Hemoglobin": "13.8 g/dL", 173 | "White Blood Cells": "8.9 x10^3/\u00b5L", 174 | "Platelets": "304 x10^3/\u00b5L" 175 | }, 176 | "normal_ranges": { 177 | "Hemoglobin": "12-16 g/dL", 178 | "White Blood Cells": "4-11 x10^3/\u00b5L", 179 | "Platelets": "150-450 x10^3/\u00b5L" 180 | }, 181 | "interpretation": "Values affected by ongoing cancer treatment." 182 | }, 183 | { 184 | "lab_id": "L2406", 185 | "test_name": "Tumor Marker Test", 186 | "date": "2023-03-02 17:46:34", 187 | "results": { 188 | "CA-125": null, 189 | "PSA": "7.8 ng/mL", 190 | "CA 15-3": null 191 | }, 192 | "normal_ranges": { 193 | "CA-125": "<35 U/mL", 194 | "PSA": "<4 ng/mL", 195 | "CA 15-3": "<30 U/mL" 196 | }, 197 | "interpretation": "Elevated levels may indicate disease activity or treatment response." 198 | } 199 | ], 200 | "appointments": [ 201 | { 202 | "appointment_id": "AP7530", 203 | "date_time": "2024-12-09 02:42:18", 204 | "type": "Oncology Follow-up", 205 | "with": "Dr. John Oncologist", 206 | "location_name": "Pinewood Oncology", 207 | "status": "Scheduled", 208 | "notes": "Review treatment progress and discuss next steps." 209 | }, 210 | { 211 | "appointment_id": "AP8976", 212 | "date_time": "2024-06-02 17:54:13", 213 | "type": "Radiation Therapy Session", 214 | "with": "Dr. Sarah Radiologist", 215 | "location_name": "Green Valley Cancer Institute", 216 | "status": "Scheduled", 217 | "notes": "Continued radiation treatment as per plan." 218 | } 219 | ], 220 | "vital_signs": [ 221 | { 222 | "vital_id": "V4781", 223 | "date": "2024-07-18 07:38:18", 224 | "blood_pressure": "120/64 mmHg", 225 | "heart_rate": "99 bpm", 226 | "respiratory_rate": "16 breaths/min", 227 | "temperature": "98.1\u00b0F", 228 | "oxygen_saturation": "96%", 229 | "weight": "72 kg", 230 | "height": "176 cm", 231 | "bmi": "19.4" 232 | } 233 | ], 234 | "imaging_results": [ 235 | { 236 | "imaging_id": "IM1047", 237 | "date": "2024-03-15 01:43:13", 238 | "type": "Prostate MRI", 239 | "location_name": "City Cancer Institute", 240 | "results": "Follow-up Prostate MRI shows stable disease.", 241 | "radiologist": "Dr. Emily Imaging" 242 | } 243 | ] 244 | }, 245 | { 246 | "demographics": { 247 | "patient_id": "P000002", 248 | "first_name": "PatientP000002", 249 | "last_name": "Doe", 250 | "date_of_birth": "1977-05-17 17:56:51", 251 | "gender": "Male", 252 | "contact_information": { 253 | "address": "485 Elm Street, Springfield, IL, 62704", 254 | "phone": "+44 7911 975759", 255 | "email": "patientP000002@example.com" 256 | } 257 | }, 258 | "emergency_contact": { 259 | "name": "Spouse of PatientP000002", 260 | "relationship": "Spouse", 261 | "phone": "+44 7911 645934" 262 | }, 263 | "medical_history": [ 264 | { 265 | "condition": "Prostate Cancer", 266 | "diagnosis_date": "2019-04-16 04:35:16", 267 | "stage": "I", 268 | "treatment": [ 269 | "Radiation therapy", 270 | "Hormone therapy" 271 | ], 272 | "notes": "Stage I Prostate Cancer diagnosed. Treatment plan includes Radiation therapy, Hormone therapy." 273 | } 274 | ], 275 | "medications": [ 276 | { 277 | "medication_id": "M6283", 278 | "name": "Letrozole", 279 | "dosage": "20 mg", 280 | "frequency": "Once daily", 281 | "start_date": "2024-07-06 16:22:22", 282 | "end_date": null, 283 | "prescriber": "Dr. Jane Endocrinologist", 284 | "location_name": "Riverside Oncology Clinic" 285 | } 286 | ], 287 | "immunizations": [ 288 | { 289 | "immunization_id": "I3475", 290 | "vaccine": "Influenza", 291 | "date_administered": "2024-01-27 11:18:27", 292 | "administered_by": "NHS Oncology Clinic", 293 | "location_name": "Riverbend Oncology Clinic" 294 | } 295 | ], 296 | "blood_test_results": [ 297 | { 298 | "lab_id": "L5832", 299 | "test_name": "Complete Blood Count (CBC)", 300 | "date": "2023-10-13 18:29:53", 301 | "results": { 302 | "Hemoglobin": "10.6 g/dL", 303 | "White Blood Cells": "5.1 x10^3/\u00b5L", 304 | "Platelets": "172 x10^3/\u00b5L" 305 | }, 306 | "normal_ranges": { 307 | "Hemoglobin": "12-16 g/dL", 308 | "White Blood Cells": "4-11 x10^3/\u00b5L", 309 | "Platelets": "150-450 x10^3/\u00b5L" 310 | }, 311 | "interpretation": "Values affected by ongoing cancer treatment." 312 | }, 313 | { 314 | "lab_id": "L9689", 315 | "test_name": "Tumor Marker Test", 316 | "date": "2023-11-10 04:03:05", 317 | "results": { 318 | "CA-125": null, 319 | "PSA": "3.4 ng/mL", 320 | "CA 15-3": null 321 | }, 322 | "normal_ranges": { 323 | "CA-125": "<35 U/mL", 324 | "PSA": "<4 ng/mL", 325 | "CA 15-3": "<30 U/mL" 326 | }, 327 | "interpretation": "Elevated levels may indicate disease activity or treatment response." 328 | } 329 | ], 330 | "appointments": [ 331 | { 332 | "appointment_id": "AP6208", 333 | "date_time": "2024-08-08 21:34:49", 334 | "type": "Oncology Follow-up", 335 | "with": "Dr. John Oncologist", 336 | "location_name": "Hillside Cancer Care", 337 | "status": "Scheduled", 338 | "notes": "Review treatment progress and discuss next steps." 339 | }, 340 | { 341 | "appointment_id": "AP8361", 342 | "date_time": "2024-12-15 21:21:21", 343 | "type": "Radiation Therapy Session", 344 | "with": "Dr. Sarah Radiologist", 345 | "location_name": "Downtown Cancer Facility", 346 | "status": "Scheduled", 347 | "notes": "Continued radiation treatment as per plan." 348 | } 349 | ], 350 | "vital_signs": [ 351 | { 352 | "vital_id": "V4595", 353 | "date": "2024-09-26 11:33:59", 354 | "blood_pressure": "128/62 mmHg", 355 | "heart_rate": "69 bpm", 356 | "respiratory_rate": "19 breaths/min", 357 | "temperature": "98.8\u00b0F", 358 | "oxygen_saturation": "98%", 359 | "weight": "74 kg", 360 | "height": "183 cm", 361 | "bmi": "23.7" 362 | } 363 | ], 364 | "imaging_results": [ 365 | { 366 | "imaging_id": "IM9534", 367 | "date": "2024-05-09 08:42:59", 368 | "type": "Prostate MRI", 369 | "location_name": "Riverbend Oncology Clinic", 370 | "results": "Follow-up Prostate MRI shows complete response.", 371 | "radiologist": "Dr. Emily Imaging" 372 | } 373 | ] 374 | }, 375 | { 376 | "demographics": { 377 | "patient_id": "P000003", 378 | "first_name": "PatientP000003", 379 | "last_name": "Doe", 380 | "date_of_birth": "1955-09-21 10:55:36", 381 | "gender": "Male", 382 | "contact_information": { 383 | "address": "264 Elm Street, Springfield, IL, 62704", 384 | "phone": "+44 7911 216070", 385 | "email": "patientP000003@example.com" 386 | } 387 | }, 388 | "emergency_contact": { 389 | "name": "Spouse of PatientP000003", 390 | "relationship": "Spouse", 391 | "phone": "+44 7911 729480" 392 | }, 393 | "medical_history": [ 394 | { 395 | "condition": "Melanoma", 396 | "diagnosis_date": "2012-09-22 07:19:34", 397 | "stage": "III", 398 | "treatment": [ 399 | "Wide excision", 400 | "Immunotherapy", 401 | "Targeted therapy" 402 | ], 403 | "notes": "Stage III Melanoma diagnosed. Treatment plan includes Wide excision, Immunotherapy, Targeted therapy." 404 | } 405 | ], 406 | "medications": [], 407 | "immunizations": [ 408 | { 409 | "immunization_id": "I7312", 410 | "vaccine": "Influenza", 411 | "date_administered": "2024-10-17 12:49:10", 412 | "administered_by": "NHS Oncology Clinic", 413 | "location_name": "City Cancer Institute" 414 | } 415 | ], 416 | "blood_test_results": [ 417 | { 418 | "lab_id": "L8673", 419 | "test_name": "Complete Blood Count (CBC)", 420 | "date": "2024-06-22 11:08:49", 421 | "results": { 422 | "Hemoglobin": "12.1 g/dL", 423 | "White Blood Cells": "4.7 x10^3/\u00b5L", 424 | "Platelets": "356 x10^3/\u00b5L" 425 | }, 426 | "normal_ranges": { 427 | "Hemoglobin": "12-16 g/dL", 428 | "White Blood Cells": "4-11 x10^3/\u00b5L", 429 | "Platelets": "150-450 x10^3/\u00b5L" 430 | }, 431 | "interpretation": "Values affected by ongoing cancer treatment." 432 | } 433 | ], 434 | "appointments": [ 435 | { 436 | "appointment_id": "AP5552", 437 | "date_time": "2024-03-18 16:19:45", 438 | "type": "Oncology Follow-up", 439 | "with": "Dr. John Oncologist", 440 | "location_name": "NHS Oncology Centre", 441 | "status": "Scheduled", 442 | "notes": "Review treatment progress and discuss next steps." 443 | } 444 | ], 445 | "vital_signs": [ 446 | { 447 | "vital_id": "V1842", 448 | "date": "2024-08-28 12:40:10", 449 | "blood_pressure": "134/90 mmHg", 450 | "heart_rate": "61 bpm", 451 | "respiratory_rate": "20 breaths/min", 452 | "temperature": "98.2\u00b0F", 453 | "oxygen_saturation": "99%", 454 | "weight": "72 kg", 455 | "height": "179 cm", 456 | "bmi": "26.7" 457 | } 458 | ], 459 | "imaging_results": [ 460 | { 461 | "imaging_id": "IM6696", 462 | "date": "2023-05-17 22:21:53", 463 | "type": "Skin and Lymph Node Ultrasound", 464 | "location_name": "Pinewood Oncology", 465 | "results": "Follow-up Skin and Lymph Node Ultrasound shows stable disease.", 466 | "radiologist": "Dr. Emily Imaging" 467 | } 468 | ] 469 | }, 470 | { 471 | "demographics": { 472 | "patient_id": "P000004", 473 | "first_name": "PatientP000004", 474 | "last_name": "Doe", 475 | "date_of_birth": "1979-05-17 14:31:47", 476 | "gender": "Male", 477 | "contact_information": { 478 | "address": "956 Elm Street, Springfield, IL, 62704", 479 | "phone": "+44 7911 806455", 480 | "email": "patientP000004@example.com" 481 | } 482 | }, 483 | "emergency_contact": { 484 | "name": "Spouse of PatientP000004", 485 | "relationship": "Spouse", 486 | "phone": "+44 7911 414021" 487 | }, 488 | "medical_history": [ 489 | { 490 | "condition": "Colorectal Cancer", 491 | "diagnosis_date": "2012-04-01 12:07:10", 492 | "stage": "III", 493 | "treatment": [ 494 | "Colectomy", 495 | "Chemotherapy", 496 | "Radiation therapy" 497 | ], 498 | "notes": "Stage III Colorectal Cancer diagnosed. Treatment plan includes Colectomy, Chemotherapy, Radiation therapy." 499 | } 500 | ], 501 | "medications": [ 502 | { 503 | "medication_id": "M6938", 504 | "name": "Oxaliplatin", 505 | "dosage": "170 mg", 506 | "frequency": "Every 3 weeks", 507 | "start_date": "2023-01-29 01:08:50", 508 | "end_date": null, 509 | "prescriber": "Dr. John Oncologist", 510 | "location_name": "Riverbend Oncology Clinic" 511 | } 512 | ], 513 | "immunizations": [ 514 | { 515 | "immunization_id": "I1234", 516 | "vaccine": "Influenza", 517 | "date_administered": "2023-12-07 02:42:39", 518 | "administered_by": "NHS Oncology Clinic", 519 | "location_name": "Green Valley Cancer Institute" 520 | } 521 | ], 522 | "blood_test_results": [ 523 | { 524 | "lab_id": "L8342", 525 | "test_name": "Complete Blood Count (CBC)", 526 | "date": "2023-02-19 17:47:20", 527 | "results": { 528 | "Hemoglobin": "11.7 g/dL", 529 | "White Blood Cells": "5.3 x10^3/\u00b5L", 530 | "Platelets": "370 x10^3/\u00b5L" 531 | }, 532 | "normal_ranges": { 533 | "Hemoglobin": "12-16 g/dL", 534 | "White Blood Cells": "4-11 x10^3/\u00b5L", 535 | "Platelets": "150-450 x10^3/\u00b5L" 536 | }, 537 | "interpretation": "Values affected by ongoing cancer treatment." 538 | } 539 | ], 540 | "appointments": [ 541 | { 542 | "appointment_id": "AP6696", 543 | "date_time": "2024-02-02 12:48:41", 544 | "type": "Oncology Follow-up", 545 | "with": "Dr. John Oncologist", 546 | "location_name": "Riverside Oncology Clinic", 547 | "status": "Scheduled", 548 | "notes": "Review treatment progress and discuss next steps." 549 | }, 550 | { 551 | "appointment_id": "AP5617", 552 | "date_time": "2024-10-27 21:38:55", 553 | "type": "Radiation Therapy Session", 554 | "with": "Dr. Sarah Radiologist", 555 | "location_name": "Maple Grove Oncology Center", 556 | "status": "Scheduled", 557 | "notes": "Continued radiation treatment as per plan." 558 | } 559 | ], 560 | "vital_signs": [ 561 | { 562 | "vital_id": "V3543", 563 | "date": "2024-01-21 23:23:40", 564 | "blood_pressure": "140/88 mmHg", 565 | "heart_rate": "77 bpm", 566 | "respiratory_rate": "20 breaths/min", 567 | "temperature": "97.3\u00b0F", 568 | "oxygen_saturation": "97%", 569 | "weight": "58 kg", 570 | "height": "159 cm", 571 | "bmi": "20.8" 572 | } 573 | ], 574 | "imaging_results": [ 575 | { 576 | "imaging_id": "IM6696", 577 | "date": "2024-02-19 17:30:04", 578 | "type": "Abdominal CT", 579 | "location_name": "Green Valley Cancer Institute", 580 | "results": "Follow-up Abdominal CT shows partial response.", 581 | "radiologist": "Dr. Emily Imaging" 582 | } 583 | ] 584 | }, 585 | { 586 | "demographics": { 587 | "patient_id": "P000005", 588 | "first_name": "PatientP000005", 589 | "last_name": "Doe", 590 | "date_of_birth": "1967-02-17 02:56:48", 591 | "gender": "Male", 592 | "contact_information": { 593 | "address": "641 Elm Street, Springfield, IL, 62704", 594 | "phone": "+44 7911 279349", 595 | "email": "patientP000005@example.com" 596 | } 597 | }, 598 | "emergency_contact": { 599 | "name": "Spouse of PatientP000005", 600 | "relationship": "Spouse", 601 | "phone": "+44 7911 519193" 602 | }, 603 | "medical_history": [ 604 | { 605 | "condition": "Prostate Cancer", 606 | "diagnosis_date": "2011-10-24 02:34:53", 607 | "stage": "III", 608 | "treatment": [ 609 | "Prostatectomy", 610 | "Radiation therapy" 611 | ], 612 | "notes": "Stage III Prostate Cancer diagnosed. Treatment plan includes Prostatectomy, Radiation therapy." 613 | } 614 | ], 615 | "medications": [], 616 | "immunizations": [ 617 | { 618 | "immunization_id": "I2525", 619 | "vaccine": "Influenza", 620 | "date_administered": "2024-04-22 11:16:59", 621 | "administered_by": "NHS Oncology Clinic", 622 | "location_name": "Springfield Cancer Center" 623 | } 624 | ], 625 | "blood_test_results": [ 626 | { 627 | "lab_id": "L9784", 628 | "test_name": "Complete Blood Count (CBC)", 629 | "date": "2024-09-21 23:04:29", 630 | "results": { 631 | "Hemoglobin": "9.5 g/dL", 632 | "White Blood Cells": "9.8 x10^3/\u00b5L", 633 | "Platelets": "176 x10^3/\u00b5L" 634 | }, 635 | "normal_ranges": { 636 | "Hemoglobin": "12-16 g/dL", 637 | "White Blood Cells": "4-11 x10^3/\u00b5L", 638 | "Platelets": "150-450 x10^3/\u00b5L" 639 | }, 640 | "interpretation": "Values affected by ongoing cancer treatment." 641 | }, 642 | { 643 | "lab_id": "L3153", 644 | "test_name": "Tumor Marker Test", 645 | "date": "2023-08-25 20:20:57", 646 | "results": { 647 | "CA-125": null, 648 | "PSA": "8.8 ng/mL", 649 | "CA 15-3": null 650 | }, 651 | "normal_ranges": { 652 | "CA-125": "<35 U/mL", 653 | "PSA": "<4 ng/mL", 654 | "CA 15-3": "<30 U/mL" 655 | }, 656 | "interpretation": "Elevated levels may indicate disease activity or treatment response." 657 | } 658 | ], 659 | "appointments": [ 660 | { 661 | "appointment_id": "AP3654", 662 | "date_time": "2024-02-09 17:39:14", 663 | "type": "Oncology Follow-up", 664 | "with": "Dr. John Oncologist", 665 | "location_name": "Riverbend Oncology Clinic", 666 | "status": "Scheduled", 667 | "notes": "Review treatment progress and discuss next steps." 668 | }, 669 | { 670 | "appointment_id": "AP1634", 671 | "date_time": "2024-08-19 21:15:07", 672 | "type": "Radiation Therapy Session", 673 | "with": "Dr. Sarah Radiologist", 674 | "location_name": "Green Valley Cancer Institute", 675 | "status": "Scheduled", 676 | "notes": "Continued radiation treatment as per plan." 677 | } 678 | ], 679 | "vital_signs": [ 680 | { 681 | "vital_id": "V7186", 682 | "date": "2024-03-18 11:50:15", 683 | "blood_pressure": "125/86 mmHg", 684 | "heart_rate": "80 bpm", 685 | "respiratory_rate": "18 breaths/min", 686 | "temperature": "98.0\u00b0F", 687 | "oxygen_saturation": "99%", 688 | "weight": "80 kg", 689 | "height": "176 cm", 690 | "bmi": "22.6" 691 | } 692 | ], 693 | "imaging_results": [ 694 | { 695 | "imaging_id": "IM9572", 696 | "date": "2024-01-27 20:01:56", 697 | "type": "Prostate MRI", 698 | "location_name": "Green Valley Cancer Institute", 699 | "results": "Follow-up Prostate MRI shows complete response.", 700 | "radiologist": "Dr. Emily Imaging" 701 | } 702 | ] 703 | }, 704 | { 705 | "demographics": { 706 | "patient_id": "P000006", 707 | "first_name": "PatientP000006", 708 | "last_name": "Doe", 709 | "date_of_birth": "1969-12-19 11:45:49", 710 | "gender": "Male", 711 | "contact_information": { 712 | "address": "107 Elm Street, Springfield, IL, 62704", 713 | "phone": "+44 7911 884888", 714 | "email": "patientP000006@example.com" 715 | } 716 | }, 717 | "emergency_contact": { 718 | "name": "Spouse of PatientP000006", 719 | "relationship": "Spouse", 720 | "phone": "+44 7911 738108" 721 | }, 722 | "medical_history": [ 723 | { 724 | "condition": "Thyroid Cancer", 725 | "diagnosis_date": "2019-01-14 17:15:54", 726 | "stage": "III", 727 | "treatment": [ 728 | "Radioactive iodine therapy", 729 | "Targeted therapy" 730 | ], 731 | "notes": "Stage III Thyroid Cancer diagnosed. Treatment plan includes Radioactive iodine therapy, Targeted therapy." 732 | } 733 | ], 734 | "medications": [], 735 | "immunizations": [ 736 | { 737 | "immunization_id": "I4453", 738 | "vaccine": "Influenza", 739 | "date_administered": "2024-08-05 13:22:33", 740 | "administered_by": "NHS Oncology Clinic", 741 | "location_name": "NHS Oncology Centre" 742 | } 743 | ], 744 | "blood_test_results": [ 745 | { 746 | "lab_id": "L3089", 747 | "test_name": "Complete Blood Count (CBC)", 748 | "date": "2024-10-29 21:08:00", 749 | "results": { 750 | "Hemoglobin": "10.5 g/dL", 751 | "White Blood Cells": "3.9 x10^3/\u00b5L", 752 | "Platelets": "215 x10^3/\u00b5L" 753 | }, 754 | "normal_ranges": { 755 | "Hemoglobin": "12-16 g/dL", 756 | "White Blood Cells": "4-11 x10^3/\u00b5L", 757 | "Platelets": "150-450 x10^3/\u00b5L" 758 | }, 759 | "interpretation": "Values affected by ongoing cancer treatment." 760 | } 761 | ], 762 | "appointments": [ 763 | { 764 | "appointment_id": "AP4886", 765 | "date_time": "2024-02-08 08:56:07", 766 | "type": "Oncology Follow-up", 767 | "with": "Dr. John Oncologist", 768 | "location_name": "Riverbend Oncology Clinic", 769 | "status": "Scheduled", 770 | "notes": "Review treatment progress and discuss next steps." 771 | } 772 | ], 773 | "vital_signs": [ 774 | { 775 | "vital_id": "V4214", 776 | "date": "2024-10-14 21:14:03", 777 | "blood_pressure": "119/61 mmHg", 778 | "heart_rate": "61 bpm", 779 | "respiratory_rate": "17 breaths/min", 780 | "temperature": "97.3\u00b0F", 781 | "oxygen_saturation": "96%", 782 | "weight": "64 kg", 783 | "height": "159 cm", 784 | "bmi": "21.4" 785 | } 786 | ], 787 | "imaging_results": [ 788 | { 789 | "imaging_id": "IM9087", 790 | "date": "2024-04-09 20:13:50", 791 | "type": "Thyroid Ultrasound", 792 | "location_name": "Downtown Cancer Facility", 793 | "results": "Follow-up Thyroid Ultrasound shows partial response.", 794 | "radiologist": "Dr. Emily Imaging" 795 | } 796 | ] 797 | }, 798 | { 799 | "demographics": { 800 | "patient_id": "P000007", 801 | "first_name": "PatientP000007", 802 | "last_name": "Doe", 803 | "date_of_birth": "1979-08-04 23:12:34", 804 | "gender": "Male", 805 | "contact_information": { 806 | "address": "169 Elm Street, Springfield, IL, 62704", 807 | "phone": "+44 7911 795798", 808 | "email": "patientP000007@example.com" 809 | } 810 | }, 811 | "emergency_contact": { 812 | "name": "Spouse of PatientP000007", 813 | "relationship": "Spouse", 814 | "phone": "+44 7911 236769" 815 | }, 816 | "medical_history": [ 817 | { 818 | "condition": "Prostate Cancer", 819 | "diagnosis_date": "2023-12-19 07:43:10", 820 | "stage": "IV", 821 | "treatment": [ 822 | "Hormone therapy" 823 | ], 824 | "notes": "Stage IV Prostate Cancer diagnosed. Treatment plan includes Hormone therapy." 825 | } 826 | ], 827 | "medications": [ 828 | { 829 | "medication_id": "M6281", 830 | "name": "Tamoxifen", 831 | "dosage": "20 mg", 832 | "frequency": "Once daily", 833 | "start_date": "2023-06-30 05:10:26", 834 | "end_date": null, 835 | "prescriber": "Dr. Jane Endocrinologist", 836 | "location_name": "Springfield Cancer Center" 837 | } 838 | ], 839 | "immunizations": [ 840 | { 841 | "immunization_id": "I8026", 842 | "vaccine": "Influenza", 843 | "date_administered": "2023-04-08 20:32:55", 844 | "administered_by": "NHS Oncology Clinic", 845 | "location_name": "Green Valley Cancer Institute" 846 | } 847 | ], 848 | "blood_test_results": [ 849 | { 850 | "lab_id": "L1407", 851 | "test_name": "Complete Blood Count (CBC)", 852 | "date": "2023-06-16 22:50:38", 853 | "results": { 854 | "Hemoglobin": "12.3 g/dL", 855 | "White Blood Cells": "8.4 x10^3/\u00b5L", 856 | "Platelets": "108 x10^3/\u00b5L" 857 | }, 858 | "normal_ranges": { 859 | "Hemoglobin": "12-16 g/dL", 860 | "White Blood Cells": "4-11 x10^3/\u00b5L", 861 | "Platelets": "150-450 x10^3/\u00b5L" 862 | }, 863 | "interpretation": "Values affected by ongoing cancer treatment." 864 | }, 865 | { 866 | "lab_id": "L6043", 867 | "test_name": "Tumor Marker Test", 868 | "date": "2023-02-27 17:42:48", 869 | "results": { 870 | "CA-125": null, 871 | "PSA": "5.3 ng/mL", 872 | "CA 15-3": null 873 | }, 874 | "normal_ranges": { 875 | "CA-125": "<35 U/mL", 876 | "PSA": "<4 ng/mL", 877 | "CA 15-3": "<30 U/mL" 878 | }, 879 | "interpretation": "Elevated levels may indicate disease activity or treatment response." 880 | } 881 | ], 882 | "appointments": [ 883 | { 884 | "appointment_id": "AP4016", 885 | "date_time": "2024-07-22 20:43:56", 886 | "type": "Oncology Follow-up", 887 | "with": "Dr. John Oncologist", 888 | "location_name": "NHS Oncology Centre", 889 | "status": "Scheduled", 890 | "notes": "Review treatment progress and discuss next steps." 891 | } 892 | ], 893 | "vital_signs": [ 894 | { 895 | "vital_id": "V4113", 896 | "date": "2024-02-04 12:36:34", 897 | "blood_pressure": "120/73 mmHg", 898 | "heart_rate": "67 bpm", 899 | "respiratory_rate": "15 breaths/min", 900 | "temperature": "98.5\u00b0F", 901 | "oxygen_saturation": "100%", 902 | "weight": "59 kg", 903 | "height": "152 cm", 904 | "bmi": "22.8" 905 | } 906 | ], 907 | "imaging_results": [ 908 | { 909 | "imaging_id": "IM1050", 910 | "date": "2024-03-29 19:28:10", 911 | "type": "Prostate MRI", 912 | "location_name": "Hillside Cancer Care", 913 | "results": "Follow-up Prostate MRI shows progressive disease.", 914 | "radiologist": "Dr. Emily Imaging" 915 | } 916 | ] 917 | }, 918 | { 919 | "demographics": { 920 | "patient_id": "P000008", 921 | "first_name": "PatientP000008", 922 | "last_name": "Doe", 923 | "date_of_birth": "1972-03-09 04:40:52", 924 | "gender": "Female", 925 | "contact_information": { 926 | "address": "855 Elm Street, Springfield, IL, 62704", 927 | "phone": "+44 7911 796265", 928 | "email": "patientP000008@example.com" 929 | } 930 | }, 931 | "emergency_contact": { 932 | "name": "Spouse of PatientP000008", 933 | "relationship": "Spouse", 934 | "phone": "+44 7911 246038" 935 | }, 936 | "medical_history": [ 937 | { 938 | "condition": "Lung Cancer", 939 | "diagnosis_date": "2011-09-03 18:06:35", 940 | "stage": "II", 941 | "treatment": [ 942 | "Immunotherapy", 943 | "Lobectomy", 944 | "Chemotherapy" 945 | ], 946 | "notes": "Stage II Lung Cancer diagnosed. Treatment plan includes Immunotherapy, Lobectomy, Chemotherapy." 947 | } 948 | ], 949 | "medications": [ 950 | { 951 | "medication_id": "M7287", 952 | "name": "Pemetrexed", 953 | "dosage": "77 mg", 954 | "frequency": "Every 3 weeks", 955 | "start_date": "2024-09-09 12:45:53", 956 | "end_date": null, 957 | "prescriber": "Dr. John Oncologist", 958 | "location_name": "Springfield Cancer Center" 959 | }, 960 | { 961 | "medication_id": "M5325", 962 | "name": "Carboplatin", 963 | "dosage": "173 mg", 964 | "frequency": "Every 3 weeks", 965 | "start_date": "2024-11-08 12:30:13", 966 | "end_date": null, 967 | "prescriber": "Dr. John Oncologist", 968 | "location_name": "Maple Grove Oncology Center" 969 | }, 970 | { 971 | "medication_id": "M2591", 972 | "name": "Cisplatin", 973 | "dosage": "74 mg", 974 | "frequency": "Every 3 weeks", 975 | "start_date": "2024-07-29 04:43:06", 976 | "end_date": null, 977 | "prescriber": "Dr. John Oncologist", 978 | "location_name": "Riverside Oncology Clinic" 979 | } 980 | ], 981 | "immunizations": [ 982 | { 983 | "immunization_id": "I4821", 984 | "vaccine": "Influenza", 985 | "date_administered": "2023-06-27 11:46:48", 986 | "administered_by": "NHS Oncology Clinic", 987 | "location_name": "Hillside Cancer Care" 988 | } 989 | ], 990 | "blood_test_results": [ 991 | { 992 | "lab_id": "L6932", 993 | "test_name": "Complete Blood Count (CBC)", 994 | "date": "2023-02-22 03:43:11", 995 | "results": { 996 | "Hemoglobin": "10.9 g/dL", 997 | "White Blood Cells": "10.1 x10^3/\u00b5L", 998 | "Platelets": "217 x10^3/\u00b5L" 999 | }, 1000 | "normal_ranges": { 1001 | "Hemoglobin": "12-16 g/dL", 1002 | "White Blood Cells": "4-11 x10^3/\u00b5L", 1003 | "Platelets": "150-450 x10^3/\u00b5L" 1004 | }, 1005 | "interpretation": "Values affected by ongoing cancer treatment." 1006 | } 1007 | ], 1008 | "appointments": [ 1009 | { 1010 | "appointment_id": "AP2944", 1011 | "date_time": "2024-02-02 08:23:07", 1012 | "type": "Oncology Follow-up", 1013 | "with": "Dr. John Oncologist", 1014 | "location_name": "Pinewood Oncology", 1015 | "status": "Scheduled", 1016 | "notes": "Review treatment progress and discuss next steps." 1017 | } 1018 | ], 1019 | "vital_signs": [ 1020 | { 1021 | "vital_id": "V2995", 1022 | "date": "2024-06-11 00:51:21", 1023 | "blood_pressure": "134/88 mmHg", 1024 | "heart_rate": "92 bpm", 1025 | "respiratory_rate": "13 breaths/min", 1026 | "temperature": "98.2\u00b0F", 1027 | "oxygen_saturation": "96%", 1028 | "weight": "74 kg", 1029 | "height": "181 cm", 1030 | "bmi": "24.3" 1031 | } 1032 | ], 1033 | "imaging_results": [ 1034 | { 1035 | "imaging_id": "IM3589", 1036 | "date": "2023-10-16 18:32:58", 1037 | "type": "Chest CT", 1038 | "location_name": "Springfield Cancer Center", 1039 | "results": "Follow-up Chest CT shows stable disease.", 1040 | "radiologist": "Dr. Emily Imaging" 1041 | } 1042 | ] 1043 | }, 1044 | { 1045 | "demographics": { 1046 | "patient_id": "P000009", 1047 | "first_name": "PatientP000009", 1048 | "last_name": "Doe", 1049 | "date_of_birth": "1984-12-13 16:14:14", 1050 | "gender": "Female", 1051 | "contact_information": { 1052 | "address": "439 Elm Street, Springfield, IL, 62704", 1053 | "phone": "+44 7911 375598", 1054 | "email": "patientP000009@example.com" 1055 | } 1056 | }, 1057 | "emergency_contact": { 1058 | "name": "Spouse of PatientP000009", 1059 | "relationship": "Spouse", 1060 | "phone": "+44 7911 514976" 1061 | }, 1062 | "medical_history": [ 1063 | { 1064 | "condition": "Ovarian Cancer", 1065 | "diagnosis_date": "2015-04-13 04:49:58", 1066 | "stage": "III", 1067 | "treatment": [ 1068 | "Targeted therapy", 1069 | "Oophorectomy" 1070 | ], 1071 | "notes": "Stage III Ovarian Cancer diagnosed. Treatment plan includes Targeted therapy, Oophorectomy." 1072 | } 1073 | ], 1074 | "medications": [], 1075 | "immunizations": [ 1076 | { 1077 | "immunization_id": "I4059", 1078 | "vaccine": "Influenza", 1079 | "date_administered": "2023-08-07 10:15:51", 1080 | "administered_by": "NHS Oncology Clinic", 1081 | "location_name": "Riverbend Oncology Clinic" 1082 | } 1083 | ], 1084 | "blood_test_results": [ 1085 | { 1086 | "lab_id": "L2372", 1087 | "test_name": "Complete Blood Count (CBC)", 1088 | "date": "2024-05-11 01:55:56", 1089 | "results": { 1090 | "Hemoglobin": "10.7 g/dL", 1091 | "White Blood Cells": "3.9 x10^3/\u00b5L", 1092 | "Platelets": "203 x10^3/\u00b5L" 1093 | }, 1094 | "normal_ranges": { 1095 | "Hemoglobin": "12-16 g/dL", 1096 | "White Blood Cells": "4-11 x10^3/\u00b5L", 1097 | "Platelets": "150-450 x10^3/\u00b5L" 1098 | }, 1099 | "interpretation": "Values affected by ongoing cancer treatment." 1100 | }, 1101 | { 1102 | "lab_id": "L2333", 1103 | "test_name": "Tumor Marker Test", 1104 | "date": "2023-11-22 00:01:45", 1105 | "results": { 1106 | "CA-125": "1.4 U/mL", 1107 | "PSA": null, 1108 | "CA 15-3": null 1109 | }, 1110 | "normal_ranges": { 1111 | "CA-125": "<35 U/mL", 1112 | "PSA": "<4 ng/mL", 1113 | "CA 15-3": "<30 U/mL" 1114 | }, 1115 | "interpretation": "Elevated levels may indicate disease activity or treatment response." 1116 | } 1117 | ], 1118 | "appointments": [ 1119 | { 1120 | "appointment_id": "AP8770", 1121 | "date_time": "2024-03-30 02:23:15", 1122 | "type": "Oncology Follow-up", 1123 | "with": "Dr. John Oncologist", 1124 | "location_name": "City Cancer Institute", 1125 | "status": "Scheduled", 1126 | "notes": "Review treatment progress and discuss next steps." 1127 | } 1128 | ], 1129 | "vital_signs": [ 1130 | { 1131 | "vital_id": "V8097", 1132 | "date": "2024-10-25 20:17:04", 1133 | "blood_pressure": "119/85 mmHg", 1134 | "heart_rate": "69 bpm", 1135 | "respiratory_rate": "12 breaths/min", 1136 | "temperature": "98.1\u00b0F", 1137 | "oxygen_saturation": "95%", 1138 | "weight": "71 kg", 1139 | "height": "181 cm", 1140 | "bmi": "24.9" 1141 | } 1142 | ], 1143 | "imaging_results": [ 1144 | { 1145 | "imaging_id": "IM1918", 1146 | "date": "2024-12-30 23:54:17", 1147 | "type": "Pelvic CT", 1148 | "location_name": "Green Valley Cancer Institute", 1149 | "results": "Follow-up Pelvic CT shows complete response.", 1150 | "radiologist": "Dr. Emily Imaging" 1151 | } 1152 | ] 1153 | } 1154 | ] --------------------------------------------------------------------------------